#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #openvino-execution-provider-for-onnx #tiny-yolov4
Object detection with YOLOv4 in Python using OpenVINO™ Execution Provider# pip3 install openvino
# Install ONNX Runtime for OpenVINO™ Execution Provider
# pip3 install onnxruntime-openvino==1.11.0
# pip3 install -r requirements.txt
# Running the ONNXRuntime OpenVINO™ Execution Provider sample
# python3 yolov4.py --device CPU_FP32 --video classroom.mp4 --model yolov4.onnx
'''
Copyright (C) 2021-2022, Intel Corporation
SPDX-License-Identifier: Apache-2.0
Major Portions of this code are copyright of their respective authors and released under the Apache License Version 2.0:
- onnx, Copyright 2021-2022. For licensing see https://github.com/onnx/models/blob/master/LICENSE
'''
import cv2
import numpy as np
from onnx import numpy_helper
import onnx
import onnxruntime as rt
import os
from PIL import Image
from scipy import special
import colorsys
import random
import argparse
import sys
import time
import platform
if platform.system() == "Windows":
from openvino import utils
utils.add_openvino_libs_to_path()
def image_preprocess(image, target_size, gt_boxes=None):
ih, iw = target_size
h, w, _ = image.shape
scale = min(iw/w, ih/h)
nw, nh = int(scale * w), int(scale * h)
image_resized = cv2.resize(image, (nw, nh))
image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
dw, dh = (iw - nw) // 2, (ih-nh) // 2
image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized
image_padded = image_padded / 255.
if gt_boxes is None:
return image_padded
else:
gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_padded, gt_boxes
def postprocess_bbbox(pred_bbox):
'''define anchor boxes'''
for i, pred in enumerate(pred_bbox):
conv_shape = pred.shape
output_size = conv_shape[1]
conv_raw_dxdy = pred[:, :, :, :, 0:2]
conv_raw_dwdh = pred[:, :, :, :, 2:4]
xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)
xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
xy_grid = xy_grid.astype(float)
pred_xy = ((special.expit(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
pred_wh = (np.exp(conv_raw_dwdh) * ANCHORS[i])
pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1)
pred_bbox = [np.reshape(x, (-1, np.shape(x)[-1])) for x in pred_bbox]
pred_bbox = np.concatenate(pred_bbox, axis=0)
return pred_bbox
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
'''remove boundary boxs with a low detection probability'''
valid_scale=[0, np.inf]
pred_bbox = np.array(pred_bbox)
pred_xywh = pred_bbox[:, 0:4]
pred_conf = pred_bbox[:, 4]
pred_prob = pred_bbox[:, 5:]
# # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
# # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
org_h, org_w = org_img_shape
resize_ratio = min(input_size / org_w, input_size / org_h)
dw = (input_size - resize_ratio * org_w) / 2
dh = (input_size - resize_ratio * org_h) / 2
pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
# # (3) clip some boxes that are out of range
pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
pred_coor[invalid_mask] = 0
# # (4) discard some invalid boxes
bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
# # (5) discard some boxes with low scores
classes = np.argmax(pred_prob, axis=-1)
scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
score_mask = scores > score_threshold
mask = np.logical_and(scale_mask, score_mask)
coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
def bboxes_iou(boxes1, boxes2):
'''calculate the Intersection Over Union value'''
boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
return ious
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes: (xmin, ymin, xmax, ymax, score, class)
Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
https://github.com/bharatsingh430/soft-nms
"""
classes_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
for cls in classes_in_img:
cls_mask = (bboxes[:, 5] == cls)
cls_bboxes = bboxes[cls_mask]
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])
best_bbox = cls_bboxes[max_ind]
best_bboxes.append(best_bbox)
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
weight = np.ones((len(iou),), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]
return best_bboxes
def read_class_names(class_file_name):
'''loads class name from a file'''
names = {}
with open(class_file_name, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names
def draw_bbox(image, bboxes, classes=read_class_names("coco.names"), show_label=True):
"""
bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
"""
num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)
for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
fontScale = 0.5
score = bbox[4]
class_ind = int(bbox[5])
bbox_color = colors[class_ind]
bbox_thick = int(0.6 * (image_h + image_w) / 600)
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
if show_label:
bbox_mess = '%s: %.2f' % (classes[class_ind], score)
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1)
cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
return image
def get_anchors(anchors_path, tiny=False):
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = np.array(anchors.split(','), dtype=np.float32)
return anchors.reshape(3, 3, 2)
#Specify the path to anchors file on your machine
ANCHORS = "./yolov4_anchors.txt"
STRIDES = [8, 16, 32]
XYSCALE = [1.2, 1.1, 1.05]
ANCHORS = get_anchors(ANCHORS)
STRIDES = np.array(STRIDES)
def parse_arguments():
parser = argparse.ArgumentParser(description='Object Detection using YOLOv4 in OPENCV using OpenVINO Execution Provider for ONNXRuntime')
parser.add_argument('--device', default='CPU_FP32', help="Device to perform inference on 'cpu (MLAS)' or on devices supported by OpenVINO-EP [CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16].")
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
parser.add_argument('--model', help='Path to model.')
args = parser.parse_args()
return args
def check_model_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext != ".onnx"):
raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
def main():
# Process arguments
args = parse_arguments()
# Validate model file path
check_model_extension(args.model)
# Process inputs
win_name = 'Object detection using ONNXRuntime OpenVINO Execution Provider using YoloV4 model'
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
output_file = "yolo_out_py.avi"
if (args.image):
# Open the image file
if not os.path.isfile(args.image):
print("Input image file ", args.image, " doesn't exist")
sys.exit(1)
cap = cv2.VideoCapture(args.image)
output_file = args.image[:-4]+'_yolo_out_py.jpg'
elif (args.video):
# Open the video file
if not os.path.isfile(args.video):
print("Input video file ", args.video, " doesn't exist")
sys.exit(1)
cap = cv2.VideoCapture(args.video)
output_file = args.video[:-4]+'_yolo_out_py.avi'
else:
# Webcam input
cap = cv2.VideoCapture(0)
# Get the video writer initialized to save the output video
if (not args.image):
vid_writer = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
# Check the device information and create a session
device = args.device
so = rt.SessionOptions()
so.log_severity_level = 3
if(args.device == 'cpu'):
print("Device type selected is 'cpu' which is the default CPU Execution Provider (MLAS)")
#Specify the path to the ONNX model on your machine and register the CPU EP
sess = rt.InferenceSession(args.model, so, providers=['CPUExecutionProvider'])
else:
#Specify the path to the ONNX model on your machine and register the OpenVINO EP
sess = rt.InferenceSession(args.model, so, providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : device}])
print("Device type selected is: " + device + " using the OpenVINO Execution Provider")
'''
other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16'
'''
input_name = sess.get_inputs()[0].name
while cv2.waitKey(1) < 0:
# get frame from the video
has_frame, frame = cap.read()
# Stop the program if reached end of video
if not has_frame:
print("Done processing !!!")
print("Output file is stored as ", output_file)
has_frame=False
cv2.waitKey(3000)
# Release device
cap.release()
break
input_size = 416
original_image = frame
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
original_image_size = original_image.shape[:2]
image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
image_data = image_data[np.newaxis, ...].astype(np.float32)
outputs = sess.get_outputs()
output_names = list(map(lambda output: output.name, outputs))
start = time.time()
detections = sess.run(output_names, {input_name: image_data})
end = time.time()
inference_time = end - start
pred_bbox = postprocess_bbbox(detections)
bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)
bboxes = nms(bboxes, 0.213, method='nms')
image = draw_bbox(original_image, bboxes)
cv2.putText(image,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
cv2.putText(image,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
# Write the frame with the detection boxes
if (args.image):
cv2.imwrite(output_file, image.astype(np.uint8))
else:
vid_writer.write(image.astype(np.uint8))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow(win_name, image)
if __name__ == "__main__":
main()
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #yolov2 #openvino-execution-provider-for-onnx
Object detection with tinyYOLOv2 in Python using OpenVINO™ Execution Provider# pip3 install openvino
# Install ONNX Runtime for OpenVINO™ Execution Provider
# pip3 install onnxruntime-openvino==1.11.0
# pip3 install -r requirements.txt
# How to run the sample
# python3 tiny_yolov2_obj_detection_sample.py --h
# Running the ONNXRuntime OpenVINO™ Execution Provider sample
# python3 tiny_yolov2_obj_detection_sample.py --video face-demographics-walking-and-pause.mp4 --model tinyyolov2.onnx --device CPU_FP32
'''
Copyright (C) 2021-2022, Intel Corporation
SPDX-License-Identifier: Apache-2.0
'''
import numpy as np
import onnxruntime as rt
import cv2
import time
import os
import argparse
import platform
if platform.system() == "Windows":
from openvino import utils
utils.add_openvino_libs_to_path()
# color look up table for different classes for object detection sample
clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
(128,255,0),(128,128,0),(0,128,255),(128,0,128),
(255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
(255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
# 20 labels that the tiny-yolov2 model can do the object_detection on
label = ["aeroplane","bicycle","bird","boat","bottle",
"bus","car","cat","chair","cow","diningtable",
"dog","horse","motorbike","person","pottedplant",
"sheep","sofa","train","tvmonitor"]
def parse_arguments():
parser = argparse.ArgumentParser(description='Object Detection using YOLOv2 in OPENCV using OpenVINO Execution Provider for ONNXRuntime')
parser.add_argument('--device', default='CPU_FP32', help="Device to perform inference on 'cpu (MLAS)' or on devices supported by OpenVINO-EP [CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16].")
parser.add_argument('--video', help='Path to video file.')
parser.add_argument('--model', help='Path to model.')
args = parser.parse_args()
return args
def sigmoid(x, derivative=False):
return x*(1-x) if derivative else 1/(1+np.exp(-x))
def softmax(x):
score_mat_exp = np.exp(np.asarray(x))
return score_mat_exp / score_mat_exp.sum(0)
def check_model_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext != ".onnx"):
raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
def check_video_file_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext == ".mp4" or ext == ".avi" or ext == ".mov"):
pass
else:
raise Exception(fp, "is an unknown file format. Use the video file ending with .mp4 or .avi or .mov formats")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the video file is Invalid")
def image_preprocess(frame):
in_frame = cv2.resize(frame, (416, 416))
preprocessed_image = np.asarray(in_frame)
preprocessed_image = preprocessed_image.astype(np.float32)
preprocessed_image = preprocessed_image.transpose(2,0,1)
#Reshaping the input array to align with the input shape of the model
preprocessed_image = preprocessed_image.reshape(1,3,416,416)
return preprocessed_image
def postprocess_output(out, frame, x_scale, y_scale, i):
out = out[0][0]
num_classes = 20
anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
existing_labels = {l: [] for l in label}
#Inside this loop we compute the bounding box b for grid cell (cy, cx)
for cy in range(0,13):
for cx in range(0,13):
for b in range(0,5):
# First we read the tx, ty, width(tw), and height(th) for the bounding box from the out array, as well as the confidence score
channel = b*(num_classes+5)
tx = out[channel ][cy][cx]
ty = out[channel+1][cy][cx]
tw = out[channel+2][cy][cx]
th = out[channel+3][cy][cx]
tc = out[channel+4][cy][cx]
x = (float(cx) + sigmoid(tx))*32
y = (float(cy) + sigmoid(ty))*32
w = np.exp(tw) * 32 * anchors[2*b]
h = np.exp(th) * 32 * anchors[2*b+1]
#calculating the confidence score
confidence = sigmoid(tc) # The confidence value for the bounding box is given by tc
classes = np.zeros(num_classes)
for c in range(0,num_classes):
classes[c] = out[channel + 5 +c][cy][cx]
# we take the softmax to turn the array into a probability distribution. And then we pick the class with the largest score as the winner.
classes = softmax(classes)
detected_class = classes.argmax()
# Now we can compute the final score for this bounding box and we only want to keep the ones whose combined score is over a certain threshold
if 0.60 < classes[detected_class]*confidence:
color =clut[detected_class]
x = (x - w/2)*x_scale
y = (y - h/2)*y_scale
w *= x_scale
h *= y_scale
labelX = int((x+x+w)/2)
labelY = int((y+y+h)/2)
addLabel = True
lab_threshold = 100
for point in existing_labels[label[detected_class]]:
if labelX < point[0] + lab_threshold and labelX > point[0] - lab_threshold and \
labelY < point[1] + lab_threshold and labelY > point[1] - lab_threshold:
addLabel = False
#Adding class labels to the output of the frame and also drawing a rectangular bounding box around the object detected.
if addLabel:
cv2.rectangle(frame, (int(x),int(y)),(int(x+w),int(y+h)),color,2)
cv2.rectangle(frame, (int(x),int(y-13)),(int(x)+9*len(label[detected_class]),int(y)),color,-1)
cv2.putText(frame,label[detected_class],(int(x)+2,int(y)-3),cv2.FONT_HERSHEY_COMPLEX,0.4,(255,255,255),1)
existing_labels[label[detected_class]].append((labelX,labelY))
print('{} detected in frame {}'.format(label[detected_class],i))
def show_bbox(device, frame, inference_time):
cv2.putText(frame,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
cv2.putText(frame,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.imshow('frame',frame)
def main():
# Process arguments
args = parse_arguments()
# Validate model file path
check_model_extension(args.model)
so = rt.SessionOptions()
so.log_severity_level = 3
if (args.device == 'cpu'):
print("Device type selected is 'cpu' which is the default CPU Execution Provider (MLAS)")
#Specify the path to the ONNX model on your machine and register the CPU EP
sess = rt.InferenceSession(args.model, so, providers=['CPUExecutionProvider'])
elif (args.device == 'CPU_FP32' or args.device == 'GPU_FP32' or args.device == 'GPU_FP16' or args.device == 'MYRIAD_FP16' or args.device == 'VADM_FP16'):
#Specify the path to the ONNX model on your machine and register the OpenVINO EP
sess = rt.InferenceSession(args.model, so, providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : args.device}])
print("Device type selected is: " + args.device + " using the OpenVINO Execution Provider")
'''
other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16'
'''
else:
raise Exception("Device type selected is not [cpu, CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VADM_FP16]")
# Get the input name of the model
input_name = sess.get_inputs()[0].name
#validate video file input path
check_video_file_extension(args.video)
#Path to video file has to be provided
cap = cv2.VideoCapture(args.video)
# capturing different metrics of the image from the video
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
x_scale = float(width)/416.0 #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416).
y_scale = float(height)/416.0
# writing the inferencing output as a video to the local disk
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video_name = args.device + "_output.avi"
output_video = cv2.VideoWriter(output_video_name,fourcc, float(17.0), (640,360))
# capturing one frame at a time from the video feed and performing the inference
i = 0
while cv2.waitKey(1) < 0:
l_start = time.time()
ret, frame = cap.read()
if not ret:
break
initial_w = cap.get(3)
initial_h = cap.get(4)
# preprocessing the input frame and reshaping it.
#In the document of tino-yolo-v2, input shape of this network is (1,3,416,416). so we resize the model frame w.r.t that size.
preprocessed_image = image_preprocess(frame)
start = time.time()
#Running the session by passing in the input data of the model
out = sess.run(None, {input_name: preprocessed_image})
end = time.time()
inference_time = end - start
#Get the output
postprocess_output(out, frame, x_scale, y_scale, i)
#Show the Output
output_video.write(frame)
show_bbox(args.device, frame, inference_time)
#Press 'q' to quit the process
print('Processed Frame {}'.format(i))
i += 1
l_end = time.time()
print('Loop Time = {}'.format(l_end - l_start))
output_video.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
#python #openvino #openvino-notebook #onnx #paddlepaddle
103-paddle-onnx-to-openvino: Convert a PaddlePaddle Model to ONNX and OpenVINO IR# Imports
import os
import time
import cv2
import matplotlib.pyplot as plt
import numpy as np
import paddlehub as hub
from IPython.display import Markdown, display
from PIL import Image
from openvino.runtime import Core
from paddle.static import InputSpec
from scipy.special import softmax
# Settings
IMAGE_FILENAME = "coco_close.png"
MODEL_NAME = "mobilenet_v3_large_imagenet_ssld"
hub.config.server = "https://paddlepaddle.org.cn/paddlehub"
# Show Inference on PaddlePaddle Model
classifier = hub.Module(name=MODEL_NAME)
# Load image in BGR format, as specified in model documentation
image = cv2.imread(filename=IMAGE_FILENAME)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
result = classifier.classification(images=[image], top_k=3)
for class_name, softmax_probability in result[0].items():
print(f"{class_name}, {softmax_probability:.5f}")
from mobilenet_v3_large_imagenet_ssld.data_feed import process_image
pil_image = Image.open(IMAGE_FILENAME)
processed_image = process_image(pil_image)
print(f"Processed image shape: {processed_image.shape}")
# Processed image is in (C,H,W) format, convert to (H,W,C) to show the image
plt.imshow(np.transpose(processed_image, (1, 2, 0)))
# Preparation
input_shape = list(classifier.cpu_predictor.get_input_tensor_shape().values())
print("input shape:", input_shape)
print("mean:", classifier.get_pretrained_images_mean())
print("std:", classifier.get_pretrained_images_std())
# Convert PaddlePaddle Model to ONNX
target_height, target_width = next(iter(input_shape))[2:]
x_spec = InputSpec([1, 3, target_height, target_width], "float32", "x")
print(
"Exporting PaddlePaddle model to ONNX with target_height "
f"{target_height} and target_width {target_width}"
)
classifier.export_onnx_model(".", input_spec=[x_spec], opset_version=11)
# Convert ONNX model to OpenVINO IR Format
model_xml = f"{MODEL_NAME}.xml"
if not os.path.exists(model_xml):
mo_command = f'mo --input_model {MODEL_NAME}.onnx --input_shape "[1,3,{target_height},{target_width}]"'
display(Markdown(f"Model Optimizer command to convert the ONNX model to IR: `{mo_command}`"))
display(Markdown("_Converting model to IR. This may take a few minutes..._"))
! $mo_command
else:
print(f"{model_xml} already exists.")
# Show Inference on OpenVINO Model
# Load Inference Engine and IR model
ie = Core()
model = ie.read_model(model=f"{MODEL_NAME}.xml", weights=f"{MODEL_NAME}.bin")
compiled_model = ie.compile_model(model=model, device_name="CPU")
# Get model output
output_layer = compiled_model.output(0)
# Read, show, and preprocess input image
# See the "Show Inference on PaddlePaddle Model" section for source of process_image
image = Image.open(IMAGE_FILENAME)
plt.imshow(image)
input_image = process_image(image)[None,]
# Do inference
ie_result = compiled_model([input_image])[output_layer][0]
# Compute softmax probabilities for the inference result and find the top three values
softmax_result = softmax(ie_result)
top_indices = np.argsort(softmax_result)[-3:][::-1]
top_softmax = softmax_result[top_indices]
# Convert the inference results to class names, using the same labels as the PaddlePaddle classifier
for index, softmax_probability in zip(top_indices, top_softmax):
print(f"{classifier.label_list[index]}, {softmax_probability:.5f}")
# Timing and Comparison
num_images = 50
# PaddlePaddle's classification method expects a BGR numpy array
image = cv2.imread(filename=IMAGE_FILENAME)
# The process_image function expects a PIL image
pil_image = Image.open(fp=IMAGE_FILENAME)
# Show CPU information
ie = Core()
print(f"CPU: {ie.get_property(device_name='CPU', name='FULL_DEVICE_NAME')}")
# Show inference speed on PaddlePaddle model
start = time.perf_counter()
for _ in range(num_images):
result = classifier.classification(images=[image], top_k=3)
end = time.perf_counter()
time_ir = end - start
print(
f"PaddlePaddle model on CPU: {time_ir/num_images:.4f} "
f"seconds per image, FPS: {num_images/time_ir:.2f}\n"
)
print("PaddlePaddle result:")
for class_name, softmax_probability in result[0].items():
print(f"{class_name}, {softmax_probability:.5f}")
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));
# Show inference speed on OpenVINO IR model
compiled_model = ie.compile_model(model=model, device_name="CPU")
output_layer = compiled_model.output(0)
start = time.perf_counter()
input_image = process_image(pil_image)[None,]
for _ in range(num_images):
ie_result = compiled_model([input_image])[output_layer][0]
result_index = np.argmax(ie_result)
class_name = classifier.label_list[np.argmax(ie_result)]
softmax_result = softmax(ie_result)
top_indices = np.argsort(softmax_result)[-3:][::-1]
top_softmax = softmax_result[top_indices]
end = time.perf_counter()
time_ir = end - start
print(
f"IR model in Inference Engine (CPU): {time_ir/num_images:.4f} "
f"seconds per image, FPS: {num_images/time_ir:.2f}"
)
print()
print("OpenVINO result:")
for index, softmax_probability in zip(top_indices, top_softmax):
print(f"{classifier.label_list[index]}, {softmax_probability:.5f}")
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));
#python #openvino #openvino-notebook #pytorch #onnx
102-pytorch-onnx-to-openvino: PyTorch to ONNX and OpenVINO IR Tutorial# Imports
import sys
import time
from pathlib import Path
import cv2
import numpy as np
import torch
from IPython.display import Markdown, display
from fastseg import MobileV3Large
from openvino.runtime import Core
sys.path.append("../utils")
from notebook_utils import CityScapesSegmentation, segmentation_map_to_image, viz_result_image
# Settings
IMAGE_WIDTH = 1024 # Suggested values: 2048, 1024 or 512. The minimum width is 512.
# Set IMAGE_HEIGHT manually for custom input sizes. Minimum height is 512
IMAGE_HEIGHT = 1024 if IMAGE_WIDTH == 2048 else 512
DIRECTORY_NAME = "model"
BASE_MODEL_NAME = DIRECTORY_NAME + f"/fastseg{IMAGE_WIDTH}"
# Paths where PyTorch, ONNX and OpenVINO IR models will be stored
model_path = Path(BASE_MODEL_NAME).with_suffix(".pth")
onnx_path = model_path.with_suffix(".onnx")
ir_path = model_path.with_suffix(".xml")
# Download the Fastseg Model
print("Downloading the Fastseg model (if it has not been downloaded before)....")
model = MobileV3Large.from_pretrained().cpu().eval()
print("Loaded PyTorch Fastseg model")
# Save the model
model_path.parent.mkdir(exist_ok=True)
torch.save(model.state_dict(), str(model_path))
print(f"Model saved at {model_path}")
# Convert PyTorch model to ONNX
if not onnx_path.exists():
dummy_input = torch.randn(1, 3, IMAGE_HEIGHT, IMAGE_WIDTH)
# For the Fastseg model, setting do_constant_folding to False is required
# for PyTorch>1.5.1
torch.onnx.export(
model,
dummy_input,
onnx_path,
opset_version=11,
do_constant_folding=False,
)
print(f"ONNX model exported to {onnx_path}.")
else:
print(f"ONNX model {onnx_path} already exists.")
# Convert ONNX Model to OpenVINO IR Format
# Construct the command for Model Optimizer
mo_command = f"""mo
--input_model "{onnx_path}"
--input_shape "[1,3, {IMAGE_HEIGHT}, {IMAGE_WIDTH}]"
--mean_values="[123.675, 116.28 , 103.53]"
--scale_values="[58.395, 57.12 , 57.375]"
--data_type FP16
--output_dir "{model_path.parent}"
"""
mo_command = " ".join(mo_command.split())
print("Model Optimizer command to convert the ONNX model to OpenVINO:")
display(Markdown(f"`{mo_command}`"))
if not ir_path.exists():
print("Exporting ONNX model to IR... This may take a few minutes.")
mo_result = %sx $mo_command
print("\n".join(mo_result))
else:
print(f"IR model {ir_path} already exists.")
# Show results: Load and Preprocess an Input Image
def normalize(image: np.ndarray) -> np.ndarray:
"""
Normalize the image to the given mean and standard deviation
for CityScapes models.
"""
image = image.astype(np.float32)
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
image /= 255.0
image -= mean
image /= std
return image
image_filename = "data/street.jpg"
image = cv2.cvtColor(cv2.imread(image_filename), cv2.COLOR_BGR2RGB)
resized_image = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT))
normalized_image = normalize(resized_image)
# Convert the resized images to network input shape
input_image = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0)
normalized_input_image = np.expand_dims(np.transpose(normalized_image, (2, 0, 1)), 0)
# ONNX Model in Inference Engine
# Load network to Inference Engine
ie = Core()
model_onnx = ie.read_model(model=onnx_path)
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")
output_layer_onnx = compiled_model_onnx.output(0)
# Run inference on the input image
res_onnx = compiled_model_onnx([normalized_input_image])[output_layer_onnx]
# Convert network result to segmentation map and display the result
result_mask_onnx = np.squeeze(np.argmax(res_onnx, axis=1)).astype(np.uint8)
viz_result_image(
image,
segmentation_map_to_image(result_mask_onnx, CityScapesSegmentation.get_colormap()),
resize=True,
)
# IR Model in Inference Engine
# Load the network in Inference Engine
ie = Core()
model_ir = ie.read_model(model=ir_path)
compiled_model_ir = ie.compile_model(model=model_ir, device_name="CPU")
# Get input and output layers
output_layer_ir = compiled_model_ir.output(0)
# Run inference on the input image
res_ir = compiled_model_ir([input_image])[output_layer_ir]
result_mask_ir = np.squeeze(np.argmax(res_ir, axis=1)).astype(np.uint8)
viz_result_image(
image,
segmentation_map_to_image(result=result_mask_ir, colormap=CityScapesSegmentation.get_colormap()),
resize=True,
)
# PyTorch Comparison
with torch.no_grad():
result_torch = model(torch.as_tensor(normalized_input_image).float())
result_mask_torch = torch.argmax(result_torch, dim=1).squeeze(0).numpy().astype(np.uint8)
viz_result_image(
image,
segmentation_map_to_image(result=result_mask_torch, colormap=CityScapesSegmentation.get_colormap()),
resize=True,
)
# Performance Comparison
num_images = 20
start = time.perf_counter()
for _ in range(num_images):
compiled_model_onnx([normalized_input_image])
end = time.perf_counter()
time_onnx = end - start
print(
f"ONNX model in Inference Engine/CPU: {time_onnx/num_images:.3f} "
f"seconds per image, FPS: {num_images/time_onnx:.2f}"
)
start = time.perf_counter()
for _ in range(num_images):
compiled_model_ir([input_image])
end = time.perf_counter()
time_ir = end - start
print(
f"IR model in Inference Engine/CPU: {time_ir/num_images:.3f} "
f"seconds per image, FPS: {num_images/time_ir:.2f}"
)
with torch.no_grad():
start = time.perf_counter()
for _ in range(num_images):
model(torch.as_tensor(input_image).float())
end = time.perf_counter()
time_torch = end - start
print(
f"PyTorch model on CPU: {time_torch/num_images:.3f} seconds per image, "
f"FPS: {num_images/time_torch:.2f}"
)
if "GPU" in ie.available_devices:
compiled_model_onnx_gpu = ie.compile_model(model=model_onnx, device_name="GPU")
start = time.perf_counter()
for _ in range(num_images):
compiled_model_onnx_gpu([input_image])
end = time.perf_counter()
time_onnx_gpu = end - start
print(
f"ONNX model in Inference Engine/GPU: {time_onnx_gpu/num_images:.3f} "
f"seconds per image, FPS: {num_images/time_onnx_gpu:.2f}"
)
compiled_model_ir_gpu = ie.compile_model(model=model_ir, device_name="GPU")
start = time.perf_counter()
for _ in range(num_images):
compiled_model_ir_gpu([input_image])
end = time.perf_counter()
time_ir_gpu = end - start
print(
f"IR model in Inference Engine/GPU: {time_ir_gpu/num_images:.3f} "
f"seconds per image, FPS: {num_images/time_ir_gpu:.2f}"
)
# Show Device Information
devices = ie.available_devices
for device in devices:
device_name = ie.get_property(device_name=device, name="FULL_DEVICE_NAME")
print(f"{device}: {device_name}")
#python #openvino #onnx #openvino-notebook #api #openvino-api
002-openvino-api: OpenVINO API tutorial# Load Inference Engine and Show Info
from openvino.runtime import Core
ie = Core()
devices = ie.available_devices
for device in devices:
device_name = ie.get_property(device_name=device, name="FULL_DEVICE_NAME")
print(f"{device}: {device_name}")
# Loading a Model
from openvino.runtime import Core
ie = Core()
classification_model_xml = "model/classification.xml"
model = ie.read_model(model=classification_model_xml)
compiled_model = ie.compile_model(model=model, device_name="CPU")
from openvino.runtime import Core
ie = Core()
onnx_model_path = "model/segmentation.onnx"
model_onnx = ie.read_model(model=onnx_model_path)
compiled_model_onnx = ie.compile_model(model=model_onnx, device_name="CPU")
from openvino.offline_transformations import serialize
serialize(model=model_onnx, model_path="model/exported_onnx_model.xml", weights_path="model/exported_onnx_model.bin")
# Getting Information about a Model
from openvino.runtime import Core
ie = Core()
classification_model_xml = "model/classification.xml"
model = ie.read_model(model=classification_model_xml)
model.input(0).any_name
input_layer = model.input(0)
print(f"input precision: {input_layer.element_type}")
print(f"input shape: {input_layer.shape}")
from openvino.runtime import Core
ie = Core()
classification_model_xml = "model/classification.xml"
model = ie.read_model(model=classification_model_xml)
model.output(0).any_name
output_layer = model.output(0)
output_layer
print(f"output precision: {output_layer.element_type}")
print(f"output shape: {output_layer.shape}")
from openvino.runtime import Core
ie = Core()
classification_model_xml = "model/classification.xml"
model = ie.read_model(model=classification_model_xml)
compiled_model = ie.compile_model(model=model, device_name="CPU")
input_layer = compiled_model.input(0)
output_layer = compiled_model.output(0)
import cv2
image_filename = "data/coco_hollywood.jpg"
image = cv2.imread(image_filename)
image.shape
# N,C,H,W = batch size, number of channels, height, width
N, C, H, W = input_layer.shape
# OpenCV resize expects the destination size as (width, height)
resized_image = cv2.resize(src=image, dsize=(W, H))
resized_image.shape
import numpy as np
input_data = np.expand_dims(np.transpose(resized_image, (2, 0, 1)), 0).astype(np.float32)
input_data.shape
# Do Inference
result = compiled_model([input_data])[output_layer]
request = compiled_model.create_infer_request()
request.infer(inputs={input_layer.any_name: input_data})
result = request.get_output_tensor(output_layer.index).data
from openvino.runtime import Core, PartialShape
ie = Core()
segmentation_model_xml = "model/segmentation.xml"
segmentation_model = ie.read_model(model=segmentation_model_xml)
segmentation_input_layer = segmentation_model.input(0)
segmentation_output_layer = segmentation_model.output(0)
print("~~~~ ORIGINAL MODEL ~~~~")
print(f"input shape: {segmentation_input_layer.shape}")
print(f"output shape: {segmentation_output_layer.shape}")
new_shape = PartialShape([1, 3, 544, 544])
segmentation_model.reshape({segmentation_input_layer.any_name: new_shape})
segmentation_compiled_model = ie.compile_model(model=segmentation_model, device_name="CPU")
# help(segmentation_compiled_model)
print("~~~~ RESHAPED MODEL ~~~~")
print(f"model input shape: {segmentation_input_layer.shape}")
print(
f"compiled_model input shape: "
f"{segmentation_compiled_model.input(index=0).shape}"
)
print(f"compiled_model output shape: {segmentation_output_layer.shape}")
# Change Batch Size
from openvino.runtime import Core, PartialShape
ie = Core()
segmentation_model_xml = "model/segmentation.xml"
segmentation_model = ie.read_model(model=segmentation_model_xml)
segmentation_input_layer = segmentation_model.input(0)
segmentation_output_layer = segmentation_model.output(0)
new_shape = PartialShape([2, 3, 544, 544])
segmentation_model.reshape({segmentation_input_layer.any_name: new_shape})
segmentation_compiled_model = ie.compile_model(model=segmentation_model, device_name="CPU")
print(f"input shape: {segmentation_input_layer.shape}")
print(f"output shape: {segmentation_output_layer.shape}")
import numpy as np
from openvino.runtime import Core, PartialShape
ie = Core()
segmentation_model_xml = "model/segmentation.xml"
segmentation_model = ie.read_model(model=segmentation_model_xml)
segmentation_input_layer = segmentation_model.input(0)
segmentation_output_layer = segmentation_model.output(0)
new_shape = PartialShape([2, 3, 544, 544])
segmentation_model.reshape({segmentation_input_layer.any_name: new_shape})
segmentation_compiled_model = ie.compile_model(model=segmentation_model, device_name="CPU")
input_data = np.random.rand(2, 3, 544, 544)
output = segmentation_compiled_model([input_data])
print(f"input data shape: {input_data.shape}")
print(f"result data data shape: {segmentation_output_layer.shape}")
# Caching a Model
import time
from pathlib import Path
from openvino.runtime import Core, PartialShape
ie = Core()
device_name = "GPU" # Model Caching is not available for CPU
if device_name in ie.available_devices and device_name != "CPU":
cache_path = Path("model/model_cache")
cache_path.mkdir(exist_ok=True)
# Enable caching for Inference Engine. To disable caching set enable_caching = False
enable_caching = True
config_dict = {"CACHE_DIR": str(cache_path)} if enable_caching else {}
classification_model_xml = "model/classification.xml"
model = ie.read_model(model=classification_model_xml)
start_time = time.perf_counter()
compiled_model = ie.compile_model(model=model, device_name=device_name, config=config_dict)
end_time = time.perf_counter()
print(f"Loading the network to the {device_name} device took {end_time-start_time:.2f} seconds.")
else:
print("Model caching is not available on CPU devices.")
if device_name in ie.available_devices and device_name != "CPU":
del compiled_model
start_time = time.perf_counter()
compiled_model = ie.compile_model(model=model, device_name=device_name, config=config_dict)
end_time = time.perf_counter()
print(f"Loading the network to the {device_name} device took {end_time-start_time:.2f} seconds.")
Fri Jun 17 2022 09:51:37 GMT+0000 (Coordinated Universal Time)
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #openvino-execution-provider-for-onnx #tiny-yolov4Fri Jun 17 2022 09:49:43 GMT+0000 (Coordinated Universal Time) https://github.com/microsoft/onnxruntime-inference-examples/blob/main/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #yolov2 #openvino-execution-provider-for-onnxThu Jun 09 2022 17:20:26 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/103-paddle-onnx-to-openvino/103-paddle-onnx-to-openvino-classification.ipynb
#python #openvino #openvino-notebook #onnx #paddlepaddleThu Jun 09 2022 17:04:05 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/102-pytorch-onnx-to-openvino/102-pytorch-onnx-to-openvino.ipynb
#python #openvino #openvino-notebook #pytorch #onnxThu Jun 09 2022 16:41:35 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/002-openvino-api/002-openvino-api.ipynb
#python #openvino #onnx #openvino-notebook #api #openvino-api

