#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #optimization #tensorflow
301-tensorflow-training-openvino: From Training to Deployment with TensorFlow and OpenVINO# Import TensorFlow and Other Libraries import os import sys from pathlib import Path import PIL import matplotlib.pyplot as plt import numpy as np import tensorflow as tf from PIL import Image from openvino.runtime import Core from openvino.tools.mo import mo_tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.models import Sequential sys.path.append("../utils") from notebook_utils import download_file # Download and Explore the Dataset import pathlib dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz" data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, untar=True) data_dir = pathlib.Path(data_dir) image_count = len(list(data_dir.glob('*/*.jpg'))) print(image_count) roses = list(data_dir.glob('roses/*')) PIL.Image.open(str(roses[0])) PIL.Image.open(str(roses[1])) tulips = list(data_dir.glob('tulips/*')) PIL.Image.open(str(tulips[0])) PIL.Image.open(str(tulips[1])) # Create a Dataset batch_size = 32 img_height = 180 img_width = 180 train_ds = tf.keras.preprocessing.image_dataset_from_directory( data_dir, validation_split=0.2, subset="training", seed=123, image_size=(img_height, img_width), batch_size=batch_size) val_ds = tf.keras.preprocessing.image_dataset_from_directory( data_dir, validation_split=0.2, subset="validation", seed=123, image_size=(img_height, img_width), batch_size=batch_size) class_names = train_ds.class_names print(class_names) # Visualize the Data plt.figure(figsize=(10, 10)) for images, labels in train_ds.take(1): for i in range(9): ax = plt.subplot(3, 3, i + 1) plt.imshow(images[i].numpy().astype("uint8")) plt.title(class_names[labels[i]]) plt.axis("off") for image_batch, labels_batch in train_ds: print(image_batch.shape) print(labels_batch.shape) break # Configure the Dataset for Performance # AUTOTUNE = tf.data.AUTOTUNE AUTOTUNE = tf.data.experimental.AUTOTUNE train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE) val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) # Standardize the Data normalization_layer = layers.experimental.preprocessing.Rescaling(1./255) normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) image_batch, labels_batch = next(iter(normalized_ds)) first_image = image_batch[0] # Notice the pixels values are now in `[0,1]`. print(np.min(first_image), np.max(first_image)) # Create the Model num_classes = 5 model = Sequential([ layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)), layers.Conv2D(16, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(32, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Conv2D(64, 3, padding='same', activation='relu'), layers.MaxPooling2D(), layers.Flatten(), layers.Dense(128, activation='relu'), layers.Dense(num_classes) ]) # Compile the Model model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #optimization #tensorflow
301-tensorflow-training-openvino: Post-Training Quantization with TensorFlow Classification Model# Preparation from pathlib import Path import tensorflow as tf model_xml = Path("model/flower/flower_ir.xml") dataset_url = ( "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz" ) data_dir = Path(tf.keras.utils.get_file("flower_photos", origin=dataset_url, untar=True)) if not model_xml.exists(): print("Executing training notebook. This will take a while...") %run 301-tensorflow-training-openvino.ipynb # Imports import copy import os import sys import cv2 import matplotlib.pyplot as plt import numpy as np from addict import Dict from openvino.tools.pot.api import Metric, DataLoader from openvino.tools.pot.graph import load_model, save_model from openvino.tools.pot.graph.model_utils import compress_model_weights from openvino.tools.pot.engines.ie_engine import IEEngine from openvino.tools.pot.pipeline.initializer import create_pipeline from openvino.runtime import Core from PIL import Image sys.path.append("../utils") from notebook_utils import benchmark_model, download_file # Settings model_config = Dict( { "model_name": "flower", "model": "model/flower/flower_ir.xml", "weights": "model/flower/flower_ir.bin", } ) engine_config = Dict({"device": "CPU", "stat_requests_number": 2, "eval_requests_number": 2}) algorithms = [ { "name": "DefaultQuantization", "params": { "target_device": "CPU", "preset": "performance", "stat_subset_size": 1000, }, } ] # Create DataLoader Class class ClassificationDataLoader(DataLoader): """ DataLoader for image data that is stored in a directory per category. For example, for categories _rose_ and _daisy_, rose images are expected in data_source/rose, daisy images in data_source/daisy. """ def __init__(self, data_source): """ :param data_source: path to data directory """ self.data_source = Path(data_source) self.dataset = [p for p in data_dir.glob("**/*") if p.suffix in (".png", ".jpg")] self.class_names = sorted([item.name for item in Path(data_dir).iterdir() if item.is_dir()]) def __len__(self): """ Returns the number of elements in the dataset """ return len(self.dataset) def __getitem__(self, index): """ Get item from self.dataset at the specified index. Returns (annotation, image), where annotation is a tuple (index, class_index) and image a preprocessed image in network shape """ if index >= len(self): raise IndexError filepath = self.dataset[index] annotation = (index, self.class_names.index(filepath.parent.name)) image = self._read_image(filepath) return annotation, image def _read_image(self, index): """ Read image at dataset[index] to memory, resize, convert to BGR and to network shape :param index: dataset index to read :return ndarray representation of image batch """ image = cv2.imread(os.path.join(self.data_source, index))[:, :, (2, 1, 0)] image = cv2.resize(image, (180, 180)).astype(np.float32) return image # Create Accuracy Metric Class class Accuracy(Metric): def __init__(self): super().__init__() self._name = "accuracy" self._matches = [] @property def value(self): """Returns accuracy metric value for the last model output.""" return {self._name: self._matches[-1]} @property def avg_value(self): """ Returns accuracy metric value for all model outputs. Results per image are stored in self._matches, where True means a correct prediction and False a wrong prediction. Accuracy is computed as the number of correct predictions divided by the total number of predictions. """ num_correct = np.count_nonzero(self._matches) return {self._name: num_correct / len(self._matches)} def update(self, output, target): """Updates prediction matches. :param output: model output :param target: annotations """ predict = np.argmax(output[0], axis=1) match = predict == target self._matches.append(match) def reset(self): """ Resets the Accuracy metric. This is a required method that should initialize all attributes to their initial value. """ self._matches = [] def get_attributes(self): """ Returns a dictionary of metric attributes {metric_name: {attribute_name: value}}. Required attributes: 'direction': 'higher-better' or 'higher-worse' 'type': metric type """ return {self._name: {"direction": "higher-better", "type": "accuracy"}} # POT Optimization # Step 1: Load the model model = load_model(model_config=model_config) original_model = copy.deepcopy(model) # Step 2: Initialize the data loader data_loader = ClassificationDataLoader(data_source=data_dir) # Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric # Compute metric results on original model metric = Accuracy() # Step 4: Initialize the engine for metric calculation and statistics collection engine = IEEngine(config=engine_config, data_loader=data_loader, metric=metric) # Step 5: Create a pipeline of compression algorithms pipeline = create_pipeline(algo_config=algorithms, engine=engine) # Step 6: Execute the pipeline compressed_model = pipeline.run(model=model) # Step 7 (Optional): Compress model weights quantized precision # in order to reduce the size of final .bin file compress_model_weights(model=compressed_model) # Step 8: Save the compressed model and get the path to the model compressed_model_paths = save_model( model=compressed_model, save_path=os.path.join(os.path.curdir, "model/optimized") ) compressed_model_xml = Path(compressed_model_paths[0]["model"]) print(f"The quantized model is stored in {compressed_model_xml}") # Step 9 (Optional): Evaluate the original and compressed model. Print the results original_metric_results = pipeline.evaluate(original_model) if original_metric_results: print(f"Accuracy of the original model: {next(iter(original_metric_results.values())):.5f}") quantized_metric_results = pipeline.evaluate(compressed_model) if quantized_metric_results: print(f"Accuracy of the quantized model: {next(iter(quantized_metric_results.values())):.5f}") # Run Inference on Quantized Model def pre_process_image(imagePath, img_height=180): # Model input format n, c, h, w = [1, 3, img_height, img_height] image = Image.open(imagePath) image = image.resize((h, w), resample=Image.BILINEAR) # Convert to array and change data layout from HWC to CHW image = np.array(image) input_image = image.reshape((n, h, w, c)) return input_image # Load the optimized model and get the names of the input and output layer ie = Core() model_pot = ie.read_model(model="model/optimized/flower_ir.xml") compiled_model_pot = ie.compile_model(model=model_pot, device_name="CPU") input_layer = compiled_model_pot.input(0) output_layer = compiled_model_pot.output(0) # Get the class names: a list of directory names in alphabetical order class_names = sorted([item.name for item in Path(data_dir).iterdir() if item.is_dir()]) # Run inference on an input image... inp_img_url = ( "https://upload.wikimedia.org/wikipedia/commons/4/48/A_Close_Up_Photo_of_a_Dandelion.jpg" ) directory = "output" inp_file_name = "A_Close_Up_Photo_of_a_Dandelion.jpg" file_path = Path(directory)/Path(inp_file_name) # Download the image if it does not exist yet if not Path(inp_file_name).exists(): download_file(inp_img_url, inp_file_name, directory=directory) # Pre-process the image and get it ready for inference. input_image = pre_process_image(imagePath=file_path) print(f'input image shape: {input_image.shape}') print(f'input layer shape: {input_layer.shape}') res = compiled_model_pot([input_image])[output_layer] score = tf.nn.softmax(res[0]) # Show the results image = Image.open(file_path) plt.imshow(image) print( "This image most likely belongs to {} with a {:.2f} percent confidence.".format( class_names[np.argmax(score)], 100 * np.max(score) ) ) # Compare Inference Speed # print the available devices on this system ie = Core() print("Device information:") print(ie.get_property("CPU", "FULL_DEVICE_NAME")) if "GPU" in ie.available_devices: print(ie.get_property("GPU", "FULL_DEVICE_NAME")) # Original model - CPU benchmark_model(model_path=model_xml, device="CPU", seconds=15, api='async') # Quantized model - CPU benchmark_model(model_path=compressed_model_xml, device="CPU", seconds=15, api='async') # Original model - MULTI:CPU,GPU if "GPU" in ie.available_devices: benchmark_model(model_path=model_xml, device="MULTI:CPU,GPU", seconds=15, api='async') else: print("A supported integrated GPU is not available on this system.") # Quantized model - MULTI:CPU,GPU if "GPU" in ie.available_devices: benchmark_model(model_path=compressed_model_xml, device="MULTI:CPU,GPU", seconds=15, api='async') else: print("A supported integrated GPU is not available on this system.") # print the available devices on this system print("Device information:") print(ie.get_property("CPU", "FULL_DEVICE_NAME")) if "GPU" in ie.available_devices: print(ie.get_property("GPU", "FULL_DEVICE_NAME")) # Original IR model - CPU benchmark_output = %sx benchmark_app -m $model_xml -t 15 -api async # Remove logging info from benchmark_app output and show only the results benchmark_result = [line for line in benchmark_output if not (line.startswith(r"[") or line.startswith(" ") or line=="")] print("\n".join(benchmark_result)) # Quantized IR model - CPU benchmark_output = %sx benchmark_app -m $compressed_model_xml -t 15 -api async # Remove logging info from benchmark_app output and show only the results benchmark_result = [line for line in benchmark_output if not (line.startswith(r"[") or line.startswith(" ") or line=="")] print("\n".join(benchmark_result)) # Original IR model - MULTI:CPU,GPU ie = Core() if "GPU" in ie.available_devices: benchmark_output = %sx benchmark_app -m $model_xml -d MULTI:CPU,GPU -t 15 -api async # Remove logging info from benchmark_app output and show only the results benchmark_result = [line for line in benchmark_output if not (line.startswith(r"[") or line.startswith(" ") or line=="")] print("\n".join(benchmark_result)) else: print("An integrated GPU is not available on this system.") # Quantized IR model - MULTI:CPU,GPU ie = Core() if "GPU" in ie.available_devices: benchmark_output = %sx benchmark_app -m $compressed_model_xml -d MULTI:CPU,GPU -t 15 -api async # Remove logging info from benchmark_app output and show only the results benchmark_result = [line for line in benchmark_output if not (line.startswith(r"[") or line.startswith(" ") or line=="")] print("\n".join(benchmark_result)) else: print("An integrated GPU is not available on this system.")
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #quantization #nncf #optimization #pytorch
302-pytorch-quantization-aware-training: Optimizing PyTorch models with Neural Network Compression Framework of OpenVINO by 8-bit quantization# Imports and Settings # On Windows, add the directory that contains cl.exe to the PATH to enable PyTorch to find the # required C++ tools. This code assumes that Visual Studio 2019 is installed in the default # directory. If you have a different C++ compiler, please add the correct path to os.environ["PATH"] # directly. Note that the C++ Redistributable is not enough to run this notebook. # Adding the path to os.environ["LIB"] is not always required - it depends on the system's configuration import sys if sys.platform == "win32": import distutils.command.build_ext import os from pathlib import Path VS_INSTALL_DIR = r"C:/Program Files (x86)/Microsoft Visual Studio" cl_paths = sorted(list(Path(VS_INSTALL_DIR).glob("**/Hostx86/x64/cl.exe"))) if len(cl_paths) == 0: raise ValueError( "Cannot find Visual Studio. This notebook requires a C++ compiler. If you installed " "a C++ compiler, please add the directory that contains cl.exe to `os.environ['PATH']`." ) else: # If multiple versions of MSVC are installed, get the most recent version cl_path = cl_paths[-1] vs_dir = str(cl_path.parent) os.environ["PATH"] += f"{os.pathsep}{vs_dir}" # Code for finding the library dirs from # https://stackoverflow.com/questions/47423246/get-pythons-lib-path d = distutils.core.Distribution() b = distutils.command.build_ext.build_ext(d) b.finalize_options() os.environ["LIB"] = os.pathsep.join(b.library_dirs) print(f"Added {vs_dir} to PATH") import sys import time import warnings # to disable warnings on export to ONNX import zipfile from pathlib import Path import logging import torch import nncf # Important - should be imported directly after torch import torch.nn as nn import torch.nn.parallel import torch.optim import torch.utils.data import torch.utils.data.distributed import torchvision.datasets as datasets import torchvision.models as models import torchvision.transforms as transforms from nncf.common.utils.logger import set_log_level set_log_level(logging.ERROR) # Disables all NNCF info and warning messages from nncf import NNCFConfig from nncf.torch import create_compressed_model, register_default_init_args from openvino.runtime import Core from torch.jit import TracerWarning sys.path.append("../utils") from notebook_utils import download_file torch.manual_seed(0) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using {device} device") MODEL_DIR = Path("model") OUTPUT_DIR = Path("output") DATA_DIR = Path("data") BASE_MODEL_NAME = "resnet18" image_size = 64 OUTPUT_DIR.mkdir(exist_ok=True) MODEL_DIR.mkdir(exist_ok=True) DATA_DIR.mkdir(exist_ok=True) # Paths where PyTorch, ONNX and OpenVINO IR models will be stored fp32_pth_path = Path(MODEL_DIR / (BASE_MODEL_NAME + "_fp32")).with_suffix(".pth") fp32_onnx_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + "_fp32")).with_suffix(".onnx") fp32_ir_path = fp32_onnx_path.with_suffix(".xml") int8_onnx_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + "_int8")).with_suffix(".onnx") int8_ir_path = int8_onnx_path.with_suffix(".xml") # It's possible to train FP32 model from scratch, but it might be slow. So the pre-trained weights are downloaded by default. pretrained_on_tiny_imagenet = True fp32_pth_url = "https://storage.openvinotoolkit.org/repositories/nncf/openvino_notebook_ckpts/302_resnet18_fp32_v1.pth" download_file(fp32_pth_url, directory=MODEL_DIR, filename=fp32_pth_path.name) # Download Tiny ImageNet dataset def download_tiny_imagenet_200( data_dir: Path, url="http://cs231n.stanford.edu/tiny-imagenet-200.zip", tarname="tiny-imagenet-200.zip", ): archive_path = data_dir / tarname download_file(url, directory=data_dir, filename=tarname) zip_ref = zipfile.ZipFile(archive_path, "r") zip_ref.extractall(path=data_dir) zip_ref.close() def prepare_tiny_imagenet_200(dataset_dir: Path): # format validation set the same way as train set is formatted val_data_dir = dataset_dir / 'val' val_annotations_file = val_data_dir / 'val_annotations.txt' with open(val_annotations_file, 'r') as f: val_annotation_data = map(lambda line: line.split('\t')[:2], f.readlines()) val_images_dir = val_data_dir / 'images' for image_filename, image_label in val_annotation_data: from_image_filepath = val_images_dir / image_filename to_image_dir = val_data_dir / image_label if not to_image_dir.exists(): to_image_dir.mkdir() to_image_filepath = to_image_dir / image_filename from_image_filepath.rename(to_image_filepath) val_annotations_file.unlink() val_images_dir.rmdir() DATASET_DIR = DATA_DIR / "tiny-imagenet-200" if not DATASET_DIR.exists(): download_tiny_imagenet_200(DATA_DIR) prepare_tiny_imagenet_200(DATASET_DIR) print(f"Successfully downloaded and prepared dataset at: {DATASET_DIR}") # Pre-train Floating-Point Model # Train Function def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter("Time", ":3.3f") losses = AverageMeter("Loss", ":2.3f") top1 = AverageMeter("Acc@1", ":2.2f") top5 = AverageMeter("Acc@5", ":2.2f") progress = ProgressMeter( len(train_loader), [batch_time, losses, top1, top5], prefix="Epoch:[{}]".format(epoch) ) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): images = images.to(device) target = target.to(device) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do opt step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() print_frequency = 50 if i % print_frequency == 0: progress.display(i) # Validate Function def validate(val_loader, model, criterion): batch_time = AverageMeter("Time", ":3.3f") losses = AverageMeter("Loss", ":2.3f") top1 = AverageMeter("Acc@1", ":2.2f") top5 = AverageMeter("Acc@5", ":2.2f") progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix="Test: ") # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.to(device) target = target.to(device) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() print_frequency = 10 if i % print_frequency == 0: progress.display(i) print(" * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5)) return top1.avg # Helpers class AverageMeter(object): """Computes and stores the average and current value""" def __init__(self, name, fmt=":f"): self.name = name self.fmt = fmt self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count def __str__(self): fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" return fmtstr.format(**self.__dict__) class ProgressMeter(object): def __init__(self, num_batches, meters, prefix=""): self.batch_fmtstr = self._get_batch_fmtstr(num_batches) self.meters = meters self.prefix = prefix def display(self, batch): entries = [self.prefix + self.batch_fmtstr.format(batch)] entries += [str(meter) for meter in self.meters] print("\t".join(entries)) def _get_batch_fmtstr(self, num_batches): num_digits = len(str(num_batches // 1)) fmt = "{:" + str(num_digits) + "d}" return "[" + fmt + "/" + fmt.format(num_batches) + "]" def accuracy(output, target, topk=(1,)): """Computes the accuracy over the k top predictions for the specified values of k""" with torch.no_grad(): maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) return res # Get a Pre-trained FP32 Model num_classes = 200 # 200 is for Tiny ImageNet, default is 1000 for ImageNet init_lr = 1e-4 batch_size = 128 epochs = 4 model = models.resnet18(pretrained=not pretrained_on_tiny_imagenet) # update the last FC layer for Tiny ImageNet number of classes model.fc = nn.Linear(in_features=512, out_features=num_classes, bias=True) model.to(device) # Data loading code train_dir = DATASET_DIR / "train" val_dir = DATASET_DIR / "val" normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( train_dir, transforms.Compose( [ transforms.Resize(image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ] ), ) val_dataset = datasets.ImageFolder( val_dir, transforms.Compose( [ transforms.Resize(image_size), transforms.ToTensor(), normalize, ] ), ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, sampler=None ) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True ) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=init_lr) if pretrained_on_tiny_imagenet: # # ** WARNING: torch.load functionality uses Python's pickling module that # may be used to perform arbitrary code execution during unpickling. Only load data that you # trust. # checkpoint = torch.load(str(fp32_pth_path), map_location="cpu") model.load_state_dict(checkpoint["state_dict"], strict=True) acc1_fp32 = checkpoint["acc1"] else: best_acc1 = 0 # Training loop for epoch in range(0, epochs): # run a single training epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set acc1 = validate(val_loader, model, criterion) is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if is_best: checkpoint = {"state_dict": model.state_dict(), "acc1": acc1} torch.save(checkpoint, fp32_pth_path) acc1_fp32 = best_acc1 print(f"Accuracy of FP32 model: {acc1_fp32:.3f}") dummy_input = torch.randn(1, 3, image_size, image_size).to(device) torch.onnx.export(model, dummy_input, fp32_onnx_path) print(f"FP32 ONNX model was exported to {fp32_onnx_path}.") # Create and Initialize Quantization nncf_config_dict = { "input_info": {"sample_size": [1, 3, image_size, image_size]}, "log_dir": str(OUTPUT_DIR), # log directory for NNCF-specific logging outputs "compression": { "algorithm": "quantization", # specify the algorithm here }, } nncf_config = NNCFConfig.from_dict(nncf_config_dict) nncf_config = register_default_init_args(nncf_config, train_loader) compression_ctrl, model = create_compressed_model(model, nncf_config) acc1 = validate(val_loader, model, criterion) print(f"Accuracy of initialized INT8 model: {acc1:.3f}") # Fine-tune the Compressed Model compression_lr = init_lr / 10 optimizer = torch.optim.Adam(model.parameters(), lr=compression_lr) # train for one epoch with NNCF train(train_loader, model, criterion, optimizer, epoch=0) # evaluate on validation set after Quantization-Aware Training (QAT case) acc1_int8 = validate(val_loader, model, criterion) print(f"Accuracy of tuned INT8 model: {acc1_int8:.3f}") print(f"Accuracy drop of tuned INT8 model over pre-trained FP32 model: {acc1_fp32 - acc1_int8:.3f}") # Export INT8 Model to ONNX if not int8_onnx_path.exists(): warnings.filterwarnings("ignore", category=TracerWarning) warnings.filterwarnings("ignore", category=UserWarning) # Export INT8 model to ONNX that is supported by the OpenVINO™ toolkit compression_ctrl.export_model(int8_onnx_path) print(f"INT8 ONNX model exported to {int8_onnx_path}.") # Convert ONNX models to OpenVINO Intermediate Representation (IR) if not fp32_ir_path.exists(): !mo --input_model $fp32_onnx_path --input_shape "[1,3, $image_size, $image_size]" --mean_values "[123.675, 116.28 , 103.53]" --scale_values "[58.395, 57.12 , 57.375]" --data_type FP16 --output_dir $OUTPUT_DIR if not int8_ir_path.exists(): !mo --input_model $int8_onnx_path --input_shape "[1,3, $image_size, $image_size]" --mean_values "[123.675, 116.28 , 103.53]" --scale_values "[58.395, 57.12 , 57.375]" --data_type FP16 --output_dir $OUTPUT_DIR # Benchmark Model Performance by Computing Inference Time def parse_benchmark_output(benchmark_output): parsed_output = [line for line in benchmark_output if not (line.startswith(r"[") or line.startswith(" ") or line == "")] print(*parsed_output, sep='\n') print('Benchmark FP32 model (IR)') benchmark_output = ! benchmark_app -m $fp32_ir_path -d CPU -api async -t 15 parse_benchmark_output(benchmark_output) print('Benchmark INT8 model (IR)') benchmark_output = ! benchmark_app -m $int8_ir_path -d CPU -api async -t 15 parse_benchmark_output(benchmark_output) # Show CPU Information for reference ie = Core() ie.get_property(device_name="CPU", name="FULL_DEVICE_NAME")
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #tensorflow #quantization #nncf #optimization
305-tensorflow-quantization-aware-training: Optimizing TensorFlow models with Neural Network Compression Framework of OpenVINO by 8-bit quantization# Imports and Settings from pathlib import Path import logging import tensorflow as tf import tensorflow_datasets as tfds from tensorflow.python.keras import layers from tensorflow.python.keras import models from nncf import NNCFConfig from nncf.tensorflow.helpers.model_creation import create_compressed_model from nncf.tensorflow.initialization import register_default_init_args from nncf.common.utils.logger import set_log_level set_log_level(logging.ERROR) MODEL_DIR = Path("model") OUTPUT_DIR = Path("output") MODEL_DIR.mkdir(exist_ok=True) OUTPUT_DIR.mkdir(exist_ok=True) BASE_MODEL_NAME = "ResNet-18" fp32_h5_path = Path(MODEL_DIR / (BASE_MODEL_NAME + "_fp32")).with_suffix(".h5") fp32_sm_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + "_fp32")) fp32_ir_path = Path(OUTPUT_DIR / "saved_model").with_suffix(".xml") int8_pb_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + "_int8")).with_suffix(".pb") int8_pb_name = Path(BASE_MODEL_NAME + "_int8").with_suffix(".pb") int8_ir_path = int8_pb_path.with_suffix(".xml") BATCH_SIZE = 128 IMG_SIZE = (64, 64) # Default Imagenet image size NUM_CLASSES = 10 # For Imagenette dataset LR = 1e-5 MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255) # From Imagenet dataset STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255) # From Imagenet dataset fp32_pth_url = "https://storage.openvinotoolkit.org/repositories/nncf/openvino_notebook_ckpts/305_resnet18_imagenette_fp32_v1.h5" _ = tf.keras.utils.get_file(fp32_h5_path.resolve(), fp32_pth_url) print(f'Absolute path where the model weights are saved:\n {fp32_h5_path.resolve()}') # Dataset Preprocessing datasets, datasets_info = tfds.load('imagenette/160px', shuffle_files=True, as_supervised=True, with_info=True, read_config=tfds.ReadConfig(shuffle_seed=0)) train_dataset, validation_dataset = datasets['train'], datasets['validation'] fig = tfds.show_examples(train_dataset, datasets_info) def preprocessing(image, label): image = tf.image.resize(image, IMG_SIZE) image = image - MEAN_RGB image = image / STDDEV_RGB label = tf.one_hot(label, NUM_CLASSES) return image, label train_dataset = (train_dataset.map(preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE) .batch(BATCH_SIZE) .prefetch(tf.data.experimental.AUTOTUNE)) validation_dataset = (validation_dataset.map(preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE) .batch(BATCH_SIZE) .prefetch(tf.data.experimental.AUTOTUNE)) # Define a Floating-Point Model def residual_conv_block(filters, stage, block, strides=(1, 1), cut='pre'): def layer(input_tensor): x = layers.BatchNormalization(epsilon=2e-5)(input_tensor) x = layers.Activation('relu')(x) # defining shortcut connection if cut == 'pre': shortcut = input_tensor elif cut == 'post': shortcut = layers.Conv2D(filters, (1, 1), strides=strides, kernel_initializer='he_uniform', use_bias=False)(x) # continue with convolution layers x = layers.ZeroPadding2D(padding=(1, 1))(x) x = layers.Conv2D(filters, (3, 3), strides=strides, kernel_initializer='he_uniform', use_bias=False)(x) x = layers.BatchNormalization(epsilon=2e-5)(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1))(x) x = layers.Conv2D(filters, (3, 3), kernel_initializer='he_uniform', use_bias=False)(x) # add residual connection x = layers.Add()([x, shortcut]) return x return layer def ResNet18(input_shape=None): """Instantiates the ResNet18 architecture.""" img_input = layers.Input(shape=input_shape, name='data') # ResNet18 bottom x = layers.BatchNormalization(epsilon=2e-5, scale=False)(img_input) x = layers.ZeroPadding2D(padding=(3, 3))(x) x = layers.Conv2D(64, (7, 7), strides=(2, 2), kernel_initializer='he_uniform', use_bias=False)(x) x = layers.BatchNormalization(epsilon=2e-5)(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1))(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='valid')(x) # ResNet18 body repetitions = (2, 2, 2, 2) for stage, rep in enumerate(repetitions): for block in range(rep): filters = 64 * (2 ** stage) if block == 0 and stage == 0: x = residual_conv_block(filters, stage, block, strides=(1, 1), cut='post')(x) elif block == 0: x = residual_conv_block(filters, stage, block, strides=(2, 2), cut='post')(x) else: x = residual_conv_block(filters, stage, block, strides=(1, 1), cut='pre')(x) x = layers.BatchNormalization(epsilon=2e-5)(x) x = layers.Activation('relu')(x) # ResNet18 top x = layers.GlobalAveragePooling2D()(x) x = layers.Dense(NUM_CLASSES)(x) x = layers.Activation('softmax')(x) # Create model model = models.Model(img_input, x) return model IMG_SHAPE = IMG_SIZE + (3,) model = ResNet18(input_shape=IMG_SHAPE) # Pre-train Floating-Point Model # Load the floating-point weights model.load_weights(fp32_h5_path) # Compile the floating-point model model.compile(loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1), metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')]) # Validate the floating-point model test_loss, acc_fp32 = model.evaluate(validation_dataset, callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1'])) print(f"\nAccuracy of FP32 model: {acc_fp32:.3f}") model.save(fp32_sm_path) print(f'Absolute path where the model is saved:\n {fp32_sm_path.resolve()}') # Create and Initialize Quantization nncf_config_dict = { "input_info": {"sample_size": [1, 3] + list(IMG_SIZE)}, "log_dir": str(OUTPUT_DIR), # log directory for NNCF-specific logging outputs "compression": { "algorithm": "quantization", # specify the algorithm here }, } nncf_config = NNCFConfig.from_dict(nncf_config_dict) nncf_config = register_default_init_args(nncf_config=nncf_config, data_loader=train_dataset, batch_size=BATCH_SIZE) compression_ctrl, model = create_compressed_model(model, nncf_config) # Compile the int8 model model.compile(optimizer=tf.keras.optimizers.Adam(lr=LR), loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1), metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')]) # Validate the int8 model test_loss, test_acc = model.evaluate(validation_dataset, callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1'])) print(f"\nAccuracy of INT8 model after initialization: {test_acc:.3f}") # Fine-tune the Compressed Model # Train the int8 model model.fit(train_dataset, epochs=2) # Validate the int8 model test_loss, acc_int8 = model.evaluate(validation_dataset, callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1'])) print(f"\nAccuracy of INT8 model after fine-tuning: {acc_int8:.3f}") print(f"\nAccuracy drop of tuned INT8 model over pre-trained FP32 model: {acc_fp32 - acc_int8:.3f}") compression_ctrl.export_model(int8_pb_path, 'frozen_graph') print(f'Absolute path where the int8 model is saved:\n {int8_pb_path.resolve()}') # Export Frozen Graph Models to OpenVINO Intermediate Representation (IR) !mo --framework=tf --input_shape=[1,64,64,3] --input=data --saved_model_dir=$fp32_sm_path --output_dir=$OUTPUT_DIR !mo --framework=tf --input_shape=[1,64,64,3] --input=Placeholder --input_model=$int8_pb_path --output_dir=$OUTPUT_DIR # Benchmark Model Performance by Computing Inference Time def parse_benchmark_output(benchmark_output): parsed_output = [line for line in benchmark_output if not (line.startswith(r"[") or line.startswith(" ") or line == "")] print(*parsed_output, sep='\n') print('Benchmark FP32 model (IR)') benchmark_output = ! benchmark_app -m $fp32_ir_path -d CPU -api async -t 15 parse_benchmark_output(benchmark_output) print('\nBenchmark INT8 model (IR)') benchmark_output = ! benchmark_app -m $int8_ir_path -d CPU -api async -t 15 parse_benchmark_output(benchmark_output) # Show CPU Information for reference from openvino.runtime import Core ie = Core() ie.get_property(device_name='CPU', name="FULL_DEVICE_NAME")
Sat Jun 18 2022 21:03:36 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/301-tensorflow-training-openvino/301-tensorflow-training-openvino.ipynb
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #optimization #tensorflowSat Jun 18 2022 20:58:59 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/301-tensorflow-training-openvino/301-tensorflow-training-openvino-pot.ipynb
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #optimization #tensorflowSat Jun 18 2022 20:53:15 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/302-pytorch-quantization-aware-training/302-pytorch-quantization-aware-training.ipynb
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #quantization #nncf #optimization #pytorchSat Jun 18 2022 20:47:05 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #tensorflow #quantization #nncf #optimization