Kaggle X-Ray

PHOTO EMBED

Tue Aug 02 2022 01:02:44 GMT+0000 (Coordinated Universal Time)

Saved by @mnis00014

#https://www.kaggle.com/code/bishwajitnayak/deep-learning-biswajit-n
import glob
main_path = "final_dataset"


train_path = os.path.join(main_path,"training")
val_path=os.path.join(main_path,"validation")
test_path=os.path.join(main_path,"testing")

train_ok = glob.glob(train_path+"/ok/*.jpg")
train_nok = glob.glob(train_path+"/nok/*.jpg")

val_ok = glob.glob(val_path+"/ok/*.jpg")
val_nok = glob.glob(val_path+"/nok/*.jpg")

test_ok = glob.glob(test_path+"/ok/*.jpg")
test_nok = glob.glob(test_path+"/nok/*.jpg")

------------------------------------------------------------------------------------

train_list = [x for x in train_ok]
train_list.extend([x for x in train_nok])

df_train = pd.DataFrame(np.concatenate([['ok']*len(train_ok) , ['nok']*len(train_nok)]), columns = ['class'])
df_train['image'] = [x for x in train_list]

val_list = [x for x in val_ok]
val_list.extend([x for x in val_nok])

df_val = pd.DataFrame(np.concatenate([['ok']*len(val_ok) , ['nok']*len(val_nok)]), columns = ['class'])
df_val['image'] = [x for x in val_list]

test_list = [x for x in test_ok]
test_list.extend([x for x in test_nok])

df_test = pd.DataFrame(np.concatenate([['ok']*len(test_ok) , ['nok']*len(test_nok)]), columns = ['class'])
df_test['image'] = [x for x in test_list]

-----------------------------------------------------------------------
  
df_train
df_test
df_val

-------

  plt.figure(figsize=(6,4))

ax = sns.countplot(x='class', data=df_train, palette="mako")

plt.xlabel("Class", fontsize= 12)
plt.ylabel("# of Samples", fontsize= 12)
plt.ylim(0,500)
plt.xticks([0,1], ['ok', 'nok'], fontsize = 11)

for p in ax.patches:
    ax.annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+100), fontsize = 13)
    
plt.show()  

------------

plt.figure(figsize=(7,5))

df_train['class'].value_counts().plot(kind='pie',labels = ['',''], autopct='%1.1f%%', colors = ['darkcyan','blue'], explode = [0,0.05], textprops = {"fontsize":15})

plt.legend(labels=['nok', 'ok'])
plt.show()

------------

plt.figure(figsize=(6,4))

ax = sns.countplot(x='class', data=df_test, palette="mako")

plt.xlabel("Class", fontsize= 12)
plt.ylabel("# of Samples", fontsize= 12)
plt.ylim(0,150)
plt.xticks([0,1], ['ok', 'nok'], fontsize = 11)

for p in ax.patches:
    ax.annotate((p.get_height()), (p.get_x()+0.32, p.get_height()+20), fontsize = 13)
    
plt.show()

--------------

plt.figure(figsize=(7,5))

df_test['class'].value_counts().plot(kind='pie',labels = ['',''], autopct='%1.1f%%', colors = ['darkcyan','blue'], explode = [0,0.05], textprops = {"fontsize":15})

plt.legend(labels=['nok', 'ok'])
plt.show()

---------------
IMG_SIZE = 150
print('Train Set - ok')

plt.figure(figsize=(12,12))

for i in range(0, 12):
    plt.subplot(3,4,i + 1)
    img = cv2.imread(train_ok[i])
    img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
    plt.imshow(img)
    plt.axis("off")

plt.tight_layout()

plt.show()

-----------------
  
print('Train Set - nok')

plt.figure(figsize=(12,12))

for i in range(0, 12):
    plt.subplot(3,4,i + 1)
    img = cv2.imread(train_nok[i])
    img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
    plt.imshow(img)
    plt.axis("off")

plt.tight_layout()

plt.show()

----------------

print('Validation Set - ok')

plt.figure(figsize=(12,12))

for i in range(0, 12):
    plt.subplot(3,4,i + 1)
    img = cv2.imread(val_ok[i])
    img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
    plt.imshow(img)
    plt.axis("off")

plt.tight_layout()

plt.show()

--------------

print('Validation Set - nok')

plt.figure(figsize=(12,12))

for i in range(0, 12):
    plt.subplot(3,4,i + 1)
    img = cv2.imread(val_nok[i])
    img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
    plt.imshow(img)
    plt.axis("off")
    plt.title()

plt.tight_layout()

plt.show()

---------------
  
# https://vijayabhaskar96.medium.com/tutorial-on-keras-flow-from-dataframe-1fd4493d237c
EPOCHS = 100
PATIENCE = 10
INPUT_SHAPE = (256, 256, 3)
IMG_SHAPE = (256, 256)
BATCH_SIZE = 32
SEED = 42

tf.random.set_seed(42)

model_name = 'Transfer_learning.h5'

train_datagen = ImageDataGenerator(rescale = 1/255.)
test_datagen = ImageDataGenerator(rescale = 1/255.)
val_datagen = ImageDataGenerator (rescale = 1/255.)


train_g = train_datagen.flow_from_dataframe(df_train,
                                            x_col = 'image',
                                            y_col = 'class',
                                            target_size = IMG_SHAPE,
                                            class_mode = 'binary',
                                            batch_size = BATCH_SIZE,
                                            shuffle = True,
                                            seed = SEED)

val_g = val_datagen.flow_from_dataframe(df_val,
                                        x_col = 'image',
                                        y_col = 'class',
                                        target_size = IMG_SHAPE,
                                        class_mode = 'binary',
                                        shuffle = True,
                                        batch_size = BATCH_SIZE,
                                        seed = SEED)

test_g = val_datagen.flow_from_dataframe(df_test,
                                         x_col = 'image',
                                         y_col = 'class',
                                         target_size = IMG_SHAPE,
                                         class_mode = 'binary',
                                         batch_size = 1,
                                         shuffle = False)
                                            
-----
  
data_aug_train = ImageDataGenerator(rescale = 1/255.,
                                   rotation_range = 20,
                                   brightness_range=[0.2, 0.7],
                                   width_shift_range = 0.1,
                                   height_shift_range = 0.1,
                                   shear_range = 0.1,
                                   zoom_range = 0.1,
                                   horizontal_flip = True)
    
train_g1 = data_aug_train.flow_from_dataframe(df_train,
                                              x_col = 'image',
                                              y_col = 'class',
                                              target_size = IMG_SHAPE,
                                              class_mode = 'binary',
                                              batch_size = BATCH_SIZE,
                                              shuffle = True,
                                              seed = SEED)
----

def get_model():
    
    base_model = tf.keras.applications.vgg19.VGG19(include_top=False, weights = None)
    base_model.trainable = False
    inputs = tf.keras.layers.Input(shape=(256, 256, 3), name="input_layer")
    x = base_model(inputs)
    x = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling_layer")(x)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid", name="output_layer")(x)
    model = tf.keras.Model(inputs, outputs)
    
    return model

----

tf.keras.backend.clear_session()

model = get_model()

# compile model
model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001, decay=1e-6),
              metrics=['accuracy', 'Recall', 'Precision'])


# make directory for logs
logdir = os.path.join('C:/Users/mnis9/Seal Eng Image/CV-gpu/Thesis Writing/logs', model_name)
#os.mkdir(logdir)

from math import floor
N_FLODS = 5
INIT_LR = 3e-4
T_BS = 16
V_BS = 16
decay_rate = 0.95
decay_step = 1

# early stopping
cp = EarlyStopping(monitor ='val_loss', mode = 'min', verbose = 2, patience = PATIENCE, restore_best_weights=True)

mc = ModelCheckpoint(model_name, monitor = 'val_loss', mode = 'min', verbose = 2, save_best_only = True)

tsb = TensorBoard(log_dir=logdir)

lrs = LearningRateScheduler(lambda epoch : INIT_LR * pow(decay_rate, floor(epoch / decay_step)))

# training
start = timer()

# Fit the model 
history_00= model.fit(train_g, 
                   epochs=100,
                   steps_per_epoch=len(train_g),
                   validation_data=val_g,
                   validation_steps=len(val_g), 
                   callbacks= [cp, mc, tsb])


end = timer()
elapsed = end - start
print('Total Time Elapsed: ', int(elapsed//60), ' minutes ', (round(elapsed%60)), ' seconds')

----

fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['loss'])
sns.lineplot(x = history.epoch, y = history.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
# ax.set_ylim(0, 0.5)
ax.legend(['train', 'val'], loc='best')
plt.show()

-----------
  
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['accuracy'])
sns.lineplot(x = history.epoch, y = history.history['val_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
# ax.set_ylim(0.80, 1.0)
ax.legend(['train', 'val'], loc='best')
plt.show()

--------

score = model.evaluate(ds_val, steps = len(df_val)/BATCH, verbose = 0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])

--------

score = model.evaluate(ds_test, steps = len(df_test), verbose = 0)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

----------

num_label = {'ok': 0, 'nok' : 1}
Y_test = df_test['class'].copy().map(num_label).astype('int')

---------
  
ds_test.reset()
predictions = model.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions>0.5, 1, 0)

------

print("Test Accuracy: ", accuracy_score(Y_test, pred_labels))

----------

import sklearn.metrics as metrics

confusion_matrix = metrics.confusion_matrix(Y_test, pred_labels)
sns.heatmap(confusion_matrix, annot=True, fmt="d")

plt.xlabel("Predicted Label", fontsize= 12)
plt.ylabel("True Label", fontsize= 12)

plt.show()

-----------
  
print(metrics.classification_report(Y_test, pred_labels, labels = [0, 1]))

-------
  
  
roc_auc = metrics.roc_auc_score(Y_test, predictions)
print('ROC_AUC: ', roc_auc)

fpr, tpr, thresholds = metrics.roc_curve(Y_test, predictions)

plt.plot(fpr, tpr, label = 'ROC_AUC = %0.3f' % roc_auc)

plt.xlabel("False Positive Rate", fontsize= 12)
plt.ylabel("True Positive Rate", fontsize= 12)
plt.legend(loc="lower right")

plt.show()
content_copyCOPY

Import dataset as df

https://www.kaggle.com/code/jonaspalucibarbosa/chest-x-ray-pneumonia-cnn-transfer-learning