### We create a bunch of helpful functions throughout the course.
### Storing them here so they're easily accessible.
import tensorflow as tf
# Create a function to import an image and resize it to be able to be used with our model
def load_and_prep_image(filename, img_shape=256, scale=True):
"""
Reads in an image from filename, turns it into a tensor and reshapes into
(224, 224, 3).
Parameters
----------
filename (str): string filename of target image
img_shape (int): size to resize target image to, default 224
scale (bool): whether to scale pixel values to range(0, 1), default True
"""
# Read in the image
img = tf.io.read_file(filename)
# Decode it into a tensor
img = tf.image.decode_jpeg(img)
# Resize the image
img = tf.image.resize(img, [img_shape, img_shape])
if scale:
# Rescale the image (get all values between 0 and 1)
return img/255.
else:
return img
# Note: The following confusion matrix code is a remix of Scikit-Learn's
# plot_confusion_matrix function - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.plot_confusion_matrix.html
import itertools
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
# Our function needs a different name to sklearn's plot_confusion_matrix
def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False):
"""Makes a labelled confusion matrix comparing predictions and ground truth labels.
If classes is passed, confusion matrix will be labelled, if not, integer class values
will be used.
Args:
y_true: Array of truth labels (must be same shape as y_pred).
y_pred: Array of predicted labels (must be same shape as y_true).
classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
figsize: Size of output figure (default=(10, 10)).
text_size: Size of output figure text (default=15).
norm: normalize values or not (default=False).
savefig: save confusion matrix to file (default=False).
Returns:
A labelled confusion matrix plot comparing y_true and y_pred.
Example usage:
make_confusion_matrix(y_true=test_labels, # ground truth test labels
y_pred=y_preds, # predicted labels
classes=class_names, # array of class label names
figsize=(15, 15),
text_size=10)
"""
# Create the confustion matrix
cm = confusion_matrix(y_true, y_pred)
cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
n_classes = cm.shape[0] # find the number of classes we're dealing with
# Plot the figure and make it pretty
fig, ax = plt.subplots(figsize=figsize)
cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
fig.colorbar(cax)
# Are there a list of classes?
if classes:
labels = classes
else:
labels = np.arange(cm.shape[0])
# Label the axes
ax.set(title="Confusion Matrix",
xlabel="Predicted label",
ylabel="True label",
xticks=np.arange(n_classes), # create enough axis slots for each class
yticks=np.arange(n_classes),
xticklabels=labels, # axes will labeled with class names (if they exist) or ints
yticklabels=labels)
# Make x-axis labels appear on bottom
ax.xaxis.set_label_position("bottom")
ax.xaxis.tick_bottom()
# Set the threshold for different colors
threshold = (cm.max() + cm.min()) / 2.
# Plot the text on each cell
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if norm:
plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
horizontalalignment="center",
color="white" if cm[i, j] > threshold else "black",
size=text_size)
else:
plt.text(j, i, f"{cm[i, j]}",
horizontalalignment="center",
color="white" if cm[i, j] > threshold else "black",
size=text_size)
# Save the figure to the current working directory
if savefig:
fig.savefig("confusion_matrix.png")
# Make a function to predict on images and plot them (works with multi-class)
def plot_performance(hist):
hist_ = hist.history
epochs = hist.epoch
plt.plot(epochs, hist_['accuracy'], label='Training Accuracy')
plt.plot(epochs, hist_['val_accuracy'], label='Validation Accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, hist_['loss'], label='Training loss')
plt.plot(epochs, hist_['val_loss'], label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
recall = np.array(hist_['recall'])
precision = np.array(hist_['precision'])
val_recall = np.array(hist_['val_recall'])
val_precision = np.array(hist_['val_precision'])
plt.figure()
plt.plot(epochs,
2*((recall * precision)/(recall + precision)),
label='Training f1')
plt.plot(epochs,
2*((val_recall * val_precision)/(val_recall + val_precision)),
label='Validation f1')
plt.title('Training and validation F1-Score')
plt.legend()
plt.show()
import datetime
def create_tensorboard_callback(dir_name, experiment_name):
"""
Creates a TensorBoard callback instand to store log files.
Stores log files with the filepath:
"dir_name/experiment_name/current_datetime/"
Args:
dir_name: target directory to store TensorBoard log files
experiment_name: name of experiment directory (e.g. efficientnet_model_1)
"""
log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(
log_dir=log_dir
)
print(f"Saving TensorBoard log files to: {log_dir}")
return tensorboard_callback
# Plot the validation and training data separately
import matplotlib.pyplot as plt
def plot_performance(hist):
hist_ = hist.history
epochs = hist.epoch
plt.plot(epochs, hist_['accuracy'], label='Training Accuracy')
plt.plot(epochs, hist_['val_accuracy'], label='Validation Accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, hist_['loss'], label='Training loss')
plt.plot(epochs, hist_['val_loss'], label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
recall = np.array(hist_['recall'])
precision = np.array(hist_['precision'])
val_recall = np.array(hist_['val_recall'])
val_precision = np.array(hist_['val_precision'])
plt.figure()
plt.plot(epochs,
2*((recall * precision)/(recall + precision)),
label='Training f1')
plt.plot(epochs,
2*((val_recall * val_precision)/(val_recall + val_precision)),
label='Validation f1')
plt.title('Training and validation F1-Score')
plt.legend()
plt.show()
def compare_historys(original_history, new_history, initial_epochs=5):
"""
Compares two TensorFlow model History objects.
Args:
original_history: History object from original model (before new_history)
new_history: History object from continued model training (after original_history)
initial_epochs: Number of epochs in original_history (new_history plot starts from here)
"""
# Get original history measurements
acc = original_history.history["accuracy"]
loss = original_history.history["loss"]
val_acc = original_history.history["val_accuracy"]
val_loss = original_history.history["val_loss"]
# Combine original history with new history
total_acc = acc + new_history.history["accuracy"]
total_loss = loss + new_history.history["loss"]
total_val_acc = val_acc + new_history.history["val_accuracy"]
total_val_loss = val_loss + new_history.history["val_loss"]
# Make plots
plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(total_acc, label='Training Accuracy')
plt.plot(total_val_acc, label='Validation Accuracy')
plt.plot([initial_epochs-1, initial_epochs-1],
plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(2, 1, 2)
plt.plot(total_loss, label='Training Loss')
plt.plot(total_val_loss, label='Validation Loss')
plt.plot([initial_epochs-1, initial_epochs-1],
plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()
# Create function to unzip a zipfile into current working directory
# (since we're going to be downloading and unzipping a few files)
import zipfile
def unzip_data(filename):
"""
Unzips filename into the current working directory.
Args:
filename (str): a filepath to a target zip folder to be unzipped.
"""
zip_ref = zipfile.ZipFile(filename, "r")
zip_ref.extractall()
zip_ref.close()
# Walk through an image classification directory and find out how many files (images)
# are in each subdirectory.
import os
def walk_through_dir(dir_path):
"""
Walks through dir_path returning its contents.
Args:
dir_path (str): target directory
Returns:
A print out of:
number of subdiretories in dir_path
number of images (files) in each subdirectory
name of each subdirectory
"""
for dirpath, dirnames, filenames in os.walk(dir_path):
print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")
# Function to evaluate: accuracy, precision, recall, f1-score
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
def calculate_results(y_true, y_pred):
"""
Calculates model accuracy, precision, recall and f1 score of a binary classification model.
Args:
y_true: true labels in the form of a 1D array
y_pred: predicted labels in the form of a 1D array
Returns a dictionary of accuracy, precision, recall, f1-score.
"""
# Calculate model accuracy
model_accuracy = accuracy_score(y_true, y_pred) * 100
# Calculate model precision, recall and f1 score using "weighted average
model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
model_results = {"accuracy": model_accuracy,
"precision": model_precision,
"recall": model_recall,
"f1": model_f1}
return model_results