#https://www.kaggle.com/code/bishwajitnayak/deep-learning-biswajit-n
import glob
main_path = "final_dataset"
train_path = os.path.join(main_path,"training")
val_path=os.path.join(main_path,"validation")
test_path=os.path.join(main_path,"testing")
train_ok = glob.glob(train_path+"/ok/*.jpg")
train_nok = glob.glob(train_path+"/nok/*.jpg")
val_ok = glob.glob(val_path+"/ok/*.jpg")
val_nok = glob.glob(val_path+"/nok/*.jpg")
test_ok = glob.glob(test_path+"/ok/*.jpg")
test_nok = glob.glob(test_path+"/nok/*.jpg")
------------------------------------------------------------------------------------
train_list = [x for x in train_ok]
train_list.extend([x for x in train_nok])
df_train = pd.DataFrame(np.concatenate([['ok']*len(train_ok) , ['nok']*len(train_nok)]), columns = ['class'])
df_train['image'] = [x for x in train_list]
val_list = [x for x in val_ok]
val_list.extend([x for x in val_nok])
df_val = pd.DataFrame(np.concatenate([['ok']*len(val_ok) , ['nok']*len(val_nok)]), columns = ['class'])
df_val['image'] = [x for x in val_list]
test_list = [x for x in test_ok]
test_list.extend([x for x in test_nok])
df_test = pd.DataFrame(np.concatenate([['ok']*len(test_ok) , ['nok']*len(test_nok)]), columns = ['class'])
df_test['image'] = [x for x in test_list]
-----------------------------------------------------------------------
df_train
df_test
df_val
-------
plt.figure(figsize=(6,4))
ax = sns.countplot(x='class', data=df_train, palette="mako")
plt.xlabel("Class", fontsize= 12)
plt.ylabel("# of Samples", fontsize= 12)
plt.ylim(0,500)
plt.xticks([0,1], ['ok', 'nok'], fontsize = 11)
for p in ax.patches:
ax.annotate((p.get_height()), (p.get_x()+0.30, p.get_height()+100), fontsize = 13)
plt.show()
------------
plt.figure(figsize=(7,5))
df_train['class'].value_counts().plot(kind='pie',labels = ['',''], autopct='%1.1f%%', colors = ['darkcyan','blue'], explode = [0,0.05], textprops = {"fontsize":15})
plt.legend(labels=['nok', 'ok'])
plt.show()
------------
plt.figure(figsize=(6,4))
ax = sns.countplot(x='class', data=df_test, palette="mako")
plt.xlabel("Class", fontsize= 12)
plt.ylabel("# of Samples", fontsize= 12)
plt.ylim(0,150)
plt.xticks([0,1], ['ok', 'nok'], fontsize = 11)
for p in ax.patches:
ax.annotate((p.get_height()), (p.get_x()+0.32, p.get_height()+20), fontsize = 13)
plt.show()
--------------
plt.figure(figsize=(7,5))
df_test['class'].value_counts().plot(kind='pie',labels = ['',''], autopct='%1.1f%%', colors = ['darkcyan','blue'], explode = [0,0.05], textprops = {"fontsize":15})
plt.legend(labels=['nok', 'ok'])
plt.show()
---------------
IMG_SIZE = 150
print('Train Set - ok')
plt.figure(figsize=(12,12))
for i in range(0, 12):
plt.subplot(3,4,i + 1)
img = cv2.imread(train_ok[i])
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
plt.imshow(img)
plt.axis("off")
plt.tight_layout()
plt.show()
-----------------
print('Train Set - nok')
plt.figure(figsize=(12,12))
for i in range(0, 12):
plt.subplot(3,4,i + 1)
img = cv2.imread(train_nok[i])
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
plt.imshow(img)
plt.axis("off")
plt.tight_layout()
plt.show()
----------------
print('Validation Set - ok')
plt.figure(figsize=(12,12))
for i in range(0, 12):
plt.subplot(3,4,i + 1)
img = cv2.imread(val_ok[i])
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
plt.imshow(img)
plt.axis("off")
plt.tight_layout()
plt.show()
--------------
print('Validation Set - nok')
plt.figure(figsize=(12,12))
for i in range(0, 12):
plt.subplot(3,4,i + 1)
img = cv2.imread(val_nok[i])
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
plt.imshow(img)
plt.axis("off")
plt.title()
plt.tight_layout()
plt.show()
---------------
# https://vijayabhaskar96.medium.com/tutorial-on-keras-flow-from-dataframe-1fd4493d237c
EPOCHS = 100
PATIENCE = 10
INPUT_SHAPE = (256, 256, 3)
IMG_SHAPE = (256, 256)
BATCH_SIZE = 32
SEED = 42
tf.random.set_seed(42)
model_name = 'Transfer_learning.h5'
train_datagen = ImageDataGenerator(rescale = 1/255.)
test_datagen = ImageDataGenerator(rescale = 1/255.)
val_datagen = ImageDataGenerator (rescale = 1/255.)
train_g = train_datagen.flow_from_dataframe(df_train,
x_col = 'image',
y_col = 'class',
target_size = IMG_SHAPE,
class_mode = 'binary',
batch_size = BATCH_SIZE,
shuffle = True,
seed = SEED)
val_g = val_datagen.flow_from_dataframe(df_val,
x_col = 'image',
y_col = 'class',
target_size = IMG_SHAPE,
class_mode = 'binary',
shuffle = True,
batch_size = BATCH_SIZE,
seed = SEED)
test_g = val_datagen.flow_from_dataframe(df_test,
x_col = 'image',
y_col = 'class',
target_size = IMG_SHAPE,
class_mode = 'binary',
batch_size = 1,
shuffle = False)
-----
data_aug_train = ImageDataGenerator(rescale = 1/255.,
rotation_range = 20,
brightness_range=[0.2, 0.7],
width_shift_range = 0.1,
height_shift_range = 0.1,
shear_range = 0.1,
zoom_range = 0.1,
horizontal_flip = True)
train_g1 = data_aug_train.flow_from_dataframe(df_train,
x_col = 'image',
y_col = 'class',
target_size = IMG_SHAPE,
class_mode = 'binary',
batch_size = BATCH_SIZE,
shuffle = True,
seed = SEED)
----
def get_model():
base_model = tf.keras.applications.vgg19.VGG19(include_top=False, weights = None)
base_model.trainable = False
inputs = tf.keras.layers.Input(shape=(256, 256, 3), name="input_layer")
x = base_model(inputs)
x = tf.keras.layers.GlobalAveragePooling2D(name="global_average_pooling_layer")(x)
outputs = tf.keras.layers.Dense(1, activation="sigmoid", name="output_layer")(x)
model = tf.keras.Model(inputs, outputs)
return model
----
tf.keras.backend.clear_session()
model = get_model()
# compile model
model.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.Adam(learning_rate = 0.001, decay=1e-6),
metrics=['accuracy', 'Recall', 'Precision'])
# make directory for logs
logdir = os.path.join('C:/Users/mnis9/Seal Eng Image/CV-gpu/Thesis Writing/logs', model_name)
#os.mkdir(logdir)
from math import floor
N_FLODS = 5
INIT_LR = 3e-4
T_BS = 16
V_BS = 16
decay_rate = 0.95
decay_step = 1
# early stopping
cp = EarlyStopping(monitor ='val_loss', mode = 'min', verbose = 2, patience = PATIENCE, restore_best_weights=True)
mc = ModelCheckpoint(model_name, monitor = 'val_loss', mode = 'min', verbose = 2, save_best_only = True)
tsb = TensorBoard(log_dir=logdir)
lrs = LearningRateScheduler(lambda epoch : INIT_LR * pow(decay_rate, floor(epoch / decay_step)))
# training
start = timer()
# Fit the model
history_00= model.fit(train_g,
epochs=100,
steps_per_epoch=len(train_g),
validation_data=val_g,
validation_steps=len(val_g),
callbacks= [cp, mc, tsb])
end = timer()
elapsed = end - start
print('Total Time Elapsed: ', int(elapsed//60), ' minutes ', (round(elapsed%60)), ' seconds')
----
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['loss'])
sns.lineplot(x = history.epoch, y = history.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
# ax.set_ylim(0, 0.5)
ax.legend(['train', 'val'], loc='best')
plt.show()
-----------
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['accuracy'])
sns.lineplot(x = history.epoch, y = history.history['val_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
# ax.set_ylim(0.80, 1.0)
ax.legend(['train', 'val'], loc='best')
plt.show()
--------
score = model.evaluate(ds_val, steps = len(df_val)/BATCH, verbose = 0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])
--------
score = model.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
----------
num_label = {'ok': 0, 'nok' : 1}
Y_test = df_test['class'].copy().map(num_label).astype('int')
---------
ds_test.reset()
predictions = model.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions>0.5, 1, 0)
------
print("Test Accuracy: ", accuracy_score(Y_test, pred_labels))
----------
import sklearn.metrics as metrics
confusion_matrix = metrics.confusion_matrix(Y_test, pred_labels)
sns.heatmap(confusion_matrix, annot=True, fmt="d")
plt.xlabel("Predicted Label", fontsize= 12)
plt.ylabel("True Label", fontsize= 12)
plt.show()
-----------
print(metrics.classification_report(Y_test, pred_labels, labels = [0, 1]))
-------
roc_auc = metrics.roc_auc_score(Y_test, predictions)
print('ROC_AUC: ', roc_auc)
fpr, tpr, thresholds = metrics.roc_curve(Y_test, predictions)
plt.plot(fpr, tpr, label = 'ROC_AUC = %0.3f' % roc_auc)
plt.xlabel("False Positive Rate", fontsize= 12)
plt.ylabel("True Positive Rate", fontsize= 12)
plt.legend(loc="lower right")
plt.show()