Kaggle X-Ray
Tue Aug 02 2022 01:02:44 GMT+0000 (Coordinated Universal Time)
Saved by @mnis00014
#https://www.kaggle.com/code/bishwajitnayak/deep-learning-biswajit-n import glob main_path = "final_dataset" train_path = os.path.join(main_path,"training") val_path=os.path.join(main_path,"validation") test_path=os.path.join(main_path,"testing") train_ok = glob.glob(train_path+"/ok/*.jpg") train_nok = glob.glob(train_path+"/nok/*.jpg") val_ok = glob.glob(val_path+"/ok/*.jpg") val_nok = glob.glob(val_path+"/nok/*.jpg") test_ok = glob.glob(test_path+"/ok/*.jpg") test_nok = glob.glob(test_path+"/nok/*.jpg") ------------------------------------------------------------------------------------ train_list = [x for x in train_ok] train_list.extend([x for x in train_nok]) df_train = pd.DataFrame(np.concatenate([['ok']*len(train_ok) , ['nok']*len(train_nok)]), columns = ['class']) df_train['image'] = [x for x in train_list] val_list = [x for x in val_ok] val_list.extend([x for x in val_nok]) df_val = pd.DataFrame(np.concatenate([['ok']*len(val_ok) , ['nok']*len(val_nok)]), columns = ['class']) df_val['image'] = [x for x in val_list] test_list = [x for x in test_ok] test_list.extend([x for x in test_nok]) df_test = pd.DataFrame(np.concatenate([['ok']*len(test_ok) , ['nok']*len(test_nok)]), columns = ['class']) df_test['image'] = [x for x in test_list] ----------------------------------------------------------------------- df_train df_test df_val ------- plt.figure(figsize=(6,4)) ax = sns.countplot(x='class', data=df_train, palette="mako") plt.xlabel("Class", fontsize= 12) plt.ylabel("# of Samples", fontsize= 12) plt.ylim(0,500) plt.xticks([0,1], ['ok', 'nok'], fontsize = 11) for p in ax.patches: ax.annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+100), fontsize = 13) plt.show() ------------ plt.figure(figsize=(7,5)) df_train['class'].value_counts().plot(kind='pie',labels = ['',''], autopct='%1.1f%%', colors = ['darkcyan','blue'], explode = [0,0.05], textprops = {"fontsize":15}) plt.legend(labels=['nok', 'ok']) plt.show() ------------ plt.figure(figsize=(6,4)) ax = sns.countplot(x='class', data=df_test, palette="mako") plt.xlabel("Class", fontsize= 12) plt.ylabel("# of Samples", fontsize= 12) plt.ylim(0,150) plt.xticks([0,1], ['ok', 'nok'], fontsize = 11) for p in ax.patches: ax.annotate((p.get_height()), (p.get_x()+0.32, p.get_height()+20), fontsize = 13) plt.show() -------------- plt.figure(figsize=(7,5)) df_test['class'].value_counts().plot(kind='pie',labels = ['',''], autopct='%1.1f%%', colors = ['darkcyan','blue'], explode = [0,0.05], textprops = {"fontsize":15}) plt.legend(labels=['nok', 'ok']) plt.show() --------------- IMG_SIZE = 150 print('Train Set - ok') plt.figure(figsize=(12,12)) for i in range(0, 12): plt.subplot(3,4,i + 1) img = cv2.imread(train_ok[i]) img = cv2.resize(img, (IMG_SIZE,IMG_SIZE)) plt.imshow(img) plt.axis("off") plt.tight_layout() plt.show() ----------------- print('Train Set - nok') plt.figure(figsize=(12,12)) for i in range(0, 12): plt.subplot(3,4,i + 1) img = cv2.imread(train_nok[i]) img = cv2.resize(img, (IMG_SIZE,IMG_SIZE)) plt.imshow(img) plt.axis("off") plt.tight_layout() plt.show() ---------------- print('Validation Set - ok') plt.figure(figsize=(12,12)) for i in range(0, 12): plt.subplot(3,4,i + 1) img = cv2.imread(val_ok[i]) img = cv2.resize(img, (IMG_SIZE,IMG_SIZE)) plt.imshow(img) plt.axis("off") plt.tight_layout() plt.show() -------------- print('Validation Set - nok') plt.figure(figsize=(12,12)) for i in range(0, 12): plt.subplot(3,4,i + 1) img = cv2.imread(val_nok[i]) img = cv2.resize(img, (IMG_SIZE,IMG_SIZE)) plt.imshow(img) plt.axis("off") plt.title() plt.tight_layout() plt.show() --------------- # https://vijayabhaskar96.medium.com/tutorial-on-keras-flow-from-dataframe-1fd4493d237c EPOCHS = 100 PATIENCE = 10 INPUT_SHAPE = (256, 256, 3) IMG_SHAPE = (256, 256) BATCH_SIZE = 32 SEED = 42 tf.random.set_seed(42) model_name = 'Transfer_learning.h5' train_datagen = ImageDataGenerator(rescale = 1/255.) test_datagen = ImageDataGenerator(rescale = 1/255.) val_datagen = ImageDataGenerator (rescale = 1/255.) train_g = train_datagen.flow_from_dataframe(df_train, x_col = 'image', y_col = 'class', target_size = IMG_SHAPE, class_mode = 'binary', batch_size = BATCH_SIZE, shuffle = True, seed = SEED) val_g = val_datagen.flow_from_dataframe(df_val, x_col = 'image', y_col = 'class', target_size = IMG_SHAPE, class_mode = 'binary', shuffle = True, batch_size = BATCH_SIZE, seed = SEED) test_g = val_datagen.flow_from_dataframe(df_test, x_col = 'image', y_col = 'class', target_size = IMG_SHAPE, class_mode = 'binary', batch_size = 1, shuffle = False) ----- data_aug_train = ImageDataGenerator(rescale = 1/255., rotation_range = 20, brightness_range=[0.2, 0.7], width_shift_range = 0.1, height_shift_range = 0.1, shear_range = 0.1, zoom_range = 0.1, horizontal_flip = True) train_g1 = data_aug_train.flow_from_dataframe(df_train, x_col = 'image', y_col = 'class', target_size = IMG_SHAPE, class_mode = 'binary', batch_size = BATCH_SIZE, shuffle = True, seed = SEED) ---- def get_model(): base_model = tf.keras.applications.vgg19.VGG19(include_top=False, weights = None) base_model.trainable = False inputs = tf.keras.layers.Input(shape=(256, 256, 3), name="input_layer") x = base_model(inputs) x = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling_layer")(x) outputs = tf.keras.layers.Dense(1, activation="sigmoid", name="output_layer")(x) model = tf.keras.Model(inputs, outputs) return model ---- tf.keras.backend.clear_session() model = get_model() # compile model model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001, decay=1e-6), metrics=['accuracy', 'Recall', 'Precision']) # make directory for logs logdir = os.path.join('C:/Users/mnis9/Seal Eng Image/CV-gpu/Thesis Writing/logs', model_name) #os.mkdir(logdir) from math import floor N_FLODS = 5 INIT_LR = 3e-4 T_BS = 16 V_BS = 16 decay_rate = 0.95 decay_step = 1 # early stopping cp = EarlyStopping(monitor ='val_loss', mode = 'min', verbose = 2, patience = PATIENCE, restore_best_weights=True) mc = ModelCheckpoint(model_name, monitor = 'val_loss', mode = 'min', verbose = 2, save_best_only = True) tsb = TensorBoard(log_dir=logdir) lrs = LearningRateScheduler(lambda epoch : INIT_LR * pow(decay_rate, floor(epoch / decay_step))) # training start = timer() # Fit the model history_00= model.fit(train_g, epochs=100, steps_per_epoch=len(train_g), validation_data=val_g, validation_steps=len(val_g), callbacks= [cp, mc, tsb]) end = timer() elapsed = end - start print('Total Time Elapsed: ', int(elapsed//60), ' minutes ', (round(elapsed%60)), ' seconds') ---- fig, ax = plt.subplots(figsize=(20,8)) sns.lineplot(x = history.epoch, y = history.history['loss']) sns.lineplot(x = history.epoch, y = history.history['val_loss']) ax.set_title('Learning Curve (Loss)') ax.set_ylabel('Loss') ax.set_xlabel('Epoch') # ax.set_ylim(0, 0.5) ax.legend(['train', 'val'], loc='best') plt.show() ----------- fig, ax = plt.subplots(figsize=(20,8)) sns.lineplot(x = history.epoch, y = history.history['accuracy']) sns.lineplot(x = history.epoch, y = history.history['val_accuracy']) ax.set_title('Learning Curve (Accuracy)') ax.set_ylabel('Accuracy') ax.set_xlabel('Epoch') # ax.set_ylim(0.80, 1.0) ax.legend(['train', 'val'], loc='best') plt.show() -------- score = model.evaluate(ds_val, steps = len(df_val)/BATCH, verbose = 0) print('Val loss:', score[0]) print('Val accuracy:', score[1]) -------- score = model.evaluate(ds_test, steps = len(df_test), verbose = 0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) ---------- num_label = {'ok': 0, 'nok' : 1} Y_test = df_test['class'].copy().map(num_label).astype('int') --------- ds_test.reset() predictions = model.predict(ds_test, steps=len(ds_test), verbose=0) pred_labels= np.where(predictions>0.5, 1, 0) ------ print("Test Accuracy: ", accuracy_score(Y_test, pred_labels)) ---------- import sklearn.metrics as metrics confusion_matrix = metrics.confusion_matrix(Y_test, pred_labels) sns.heatmap(confusion_matrix, annot=True, fmt="d") plt.xlabel("Predicted Label", fontsize= 12) plt.ylabel("True Label", fontsize= 12) plt.show() ----------- print(metrics.classification_report(Y_test, pred_labels, labels = [0, 1])) ------- roc_auc = metrics.roc_auc_score(Y_test, predictions) print('ROC_AUC: ', roc_auc) fpr, tpr, thresholds = metrics.roc_curve(Y_test, predictions) plt.plot(fpr, tpr, label = 'ROC_AUC = %0.3f' % roc_auc) plt.xlabel("False Positive Rate", fontsize= 12) plt.ylabel("True Positive Rate", fontsize= 12) plt.legend(loc="lower right") plt.show()
Import dataset as df
https://www.kaggle.com/code/jonaspalucibarbosa/chest-x-ray-pneumonia-cnn-transfer-learning
Comments