You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

345 lines
14 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "8d41befc-7e65-4110-9118-35dce6e6ab0c",
"metadata": {},
"outputs": [],
"source": [
"from openvino.runtime import Core\n",
"import openvino.runtime as ov\n",
"import cv2 as cv\n",
"import numpy as np\n",
"from PIL import Image\n",
"from ultralytics.yolo.utils import ops\n",
"import torch\n",
"from ultralytics.yolo.utils.plotting import colors"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cd01038a-fe1a-4b47-ad49-b0641afdaee5",
"metadata": {},
"outputs": [],
"source": [
"def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scale_fill=False, scaleup=False, stride=32):\n",
" \"\"\"\n",
" Resize image and padding for detection. Takes image as input,\n",
" resizes image to fit into new shape with saving original aspect ratio and pads it to meet stride-multiple constraints\n",
"\n",
" Parameters:\n",
" img (np.ndarray): image for preprocessing\n",
" new_shape (Tuple(int, int)): image size after preprocessing in format [height, width]\n",
" color (Tuple(int, int, int)): color for filling padded area\n",
" auto (bool): use dynamic input size, only padding for stride constrins applied\n",
" scale_fill (bool): scale image to fill new_shape\n",
" scaleup (bool): allow scale image if it is lower then desired input size, can affect model accuracy\n",
" stride (int): input padding stride\n",
" Returns:\n",
" img (np.ndarray): image after preprocessing\n",
" ratio (Tuple(float, float)): hight and width scaling ratio\n",
" padding_size (Tuple(int, int)): height and width padding size\n",
"\n",
"\n",
" \"\"\"\n",
" # Resize and pad image while meeting stride-multiple constraints\n",
" shape = img.shape[:2] # current shape [height, width]\n",
" if isinstance(new_shape, int):\n",
" new_shape = (new_shape, new_shape)\n",
"\n",
" # Scale ratio (new / old)\n",
" r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n",
" if not scaleup: # only scale down, do not scale up (for better test mAP)\n",
" r = min(r, 1.0)\n",
"\n",
" # Compute padding\n",
" ratio = r, r # width, height ratios\n",
" new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n",
" dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding\n",
" if auto: # minimum rectangle\n",
" dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding\n",
" elif scale_fill: # stretch\n",
" dw, dh = 0.0, 0.0\n",
" new_unpad = (new_shape[1], new_shape[0])\n",
" ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios\n",
"\n",
" dw /= 2 # divide padding into 2 sides\n",
" dh /= 2\n",
"\n",
" if shape[::-1] != new_unpad: # resize\n",
" img = cv.resize(img, new_unpad, interpolation=cv.INTER_LINEAR)\n",
" top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))\n",
" left, right = int(round(dw - 0.1)), int(round(dw + 0.1))\n",
" img = cv.copyMakeBorder(img, top, bottom, left, right, cv.BORDER_CONSTANT, value=color) # add border\n",
" return img, ratio, (dw, dh)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5d01c15e-7dcc-4cec-87b0-a338e41051e4",
"metadata": {},
"outputs": [],
"source": [
"def preprocess_image(img0: np.ndarray):\n",
" \"\"\"\n",
" Preprocess image according to YOLOv8 input requirements.\n",
" Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW.\n",
"\n",
" Parameters:\n",
" img0 (np.ndarray): image for preprocessing\n",
" Returns:\n",
" img (np.ndarray): image after preprocessing\n",
" \"\"\"\n",
" # resize\n",
" img = letterbox(img0)[0]\n",
"\n",
" # Convert HWC to CHW\n",
" img = img.transpose(2, 0, 1)\n",
" img = np.ascontiguousarray(img)\n",
" return img"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5d99be3c-8c3d-4c4c-b82e-17b6724d2258",
"metadata": {},
"outputs": [],
"source": [
"def image_to_tensor(image:np.ndarray):\n",
" \"\"\"\n",
" Preprocess image according to YOLOv8 input requirements.\n",
" Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW.\n",
"\n",
" Parameters:\n",
" img (np.ndarray): image for preprocessing\n",
" Returns:\n",
" input_tensor (np.ndarray): input tensor in NCHW format with float32 values in [0, 1] range\n",
" \"\"\"\n",
" input_tensor = image.astype(np.float32) # uint8 to fp32\n",
" input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0\n",
"\n",
" # add batch dimension\n",
" if input_tensor.ndim == 3:\n",
" input_tensor = np.expand_dims(input_tensor, 0)\n",
" return input_tensor"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c1867e4e-1b99-431a-9d07-0389ae47d6db",
"metadata": {},
"outputs": [],
"source": [
"def postprocess(\n",
" pred_boxes,\n",
" input_hw,\n",
" orig_img,\n",
" min_conf_threshold=0.25,\n",
" nms_iou_threshold=0.7,\n",
" agnosting_nms=False,\n",
" max_detections=300,\n",
"):\n",
" \"\"\"\n",
" YOLOv8 model postprocessing function. Applied non maximum supression algorithm to detections and rescale boxes to original image size\n",
" Parameters:\n",
" pred_boxes (np.ndarray): model output prediction boxes\n",
" input_hw (np.ndarray): preprocessed image\n",
" orig_image (np.ndarray): image before preprocessing\n",
" min_conf_threshold (float, *optional*, 0.25): minimal accepted confidence for object filtering\n",
" nms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMS\n",
" agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not\n",
" max_detections (int, *optional*, 300): maximum detections after NMS\n",
" Returns:\n",
" pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label]\n",
" \"\"\"\n",
" nms_kwargs = {\"agnostic\": agnosting_nms, \"max_det\":max_detections}\n",
" preds = ops.non_max_suppression(\n",
" torch.from_numpy(pred_boxes),\n",
" min_conf_threshold,\n",
" nms_iou_threshold,\n",
" nc=3,\n",
" **nms_kwargs\n",
" )\n",
"\n",
" results = []\n",
" for i, pred in enumerate(preds):\n",
" shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape\n",
" if not len(pred):\n",
" results.append({\"det\": [], \"segment\": []})\n",
" continue\n",
" pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round()\n",
" results.append({\"det\": pred})\n",
"\n",
" return results"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1940d393-1e89-46cd-9f12-b965487e1874",
"metadata": {},
"outputs": [],
"source": [
"def draw_results(results, source_image, label_map):\n",
" \"\"\"\n",
" Helper function for drawing bounding boxes on image\n",
" Parameters:\n",
" image_res (np.ndarray): detection predictions in format [x1, y1, x2, y2, score, label_id]\n",
" source_image (np.ndarray): input image for drawing\n",
" label_map; (Dict[int, str]): label_id to class name mapping\n",
" Returns:\n",
" Image with boxes\n",
" \"\"\"\n",
" boxes = results[\"det\"]\n",
" for idx, (*xyxy, conf, lbl) in enumerate(boxes):\n",
" label = f'{label_map[int(lbl)]} {conf:.2f}'\n",
" source_image = plot_one_box(xyxy, source_image, label=label, color=colors(int(lbl)), line_thickness=1)\n",
" return source_image"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c599c49c-52af-4d0e-bad7-20c25fa2c851",
"metadata": {},
"outputs": [],
"source": [
"def plot_one_box(box, img,\n",
" color,\n",
" label, \n",
" line_thickness=5):\n",
" \"\"\"\n",
" Helper function for drawing single bounding box on image\n",
" Parameters:\n",
" x (np.ndarray): bounding box coordinates in format [x1, y1, x2, y2]\n",
" img (no.ndarray): input image\n",
" color (Tuple[int, int, int], *optional*, None): color in BGR format for drawing box, if not specified will be selected randomly\n",
" label (str, *optonal*, None): box label string, if not provided will not be provided as drowing result\n",
" line_thickness (int, *optional*, 5): thickness for box drawing lines\n",
" \"\"\"\n",
" # Plots one bounding box on image img\n",
" tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness\n",
" color = color or [random.randint(0, 255) for _ in range(3)]\n",
" c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))\n",
" cv.rectangle(img, c1, c2, color, thickness=tl, lineType=cv.LINE_AA)\n",
" if label:\n",
" tf = max(tl - 1, 1) # font thickness\n",
" t_size = cv.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]\n",
" c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3\n",
" cv.rectangle(img, c1, c2, color, -1, cv.LINE_AA) # filled\n",
" cv.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv.LINE_AA)\n",
"\n",
" return img"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ed0b7672-e2d1-47e6-8de8-07ffbb793a53",
"metadata": {},
"outputs": [],
"source": [
"def read_label(label_path):\n",
" with open(label_path, 'r') as f:\n",
" labels = f.read().split()\n",
" return labels"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "9e24f99f-cf0c-4063-aedd-574eea9fe18c",
"metadata": {},
"outputs": [],
"source": [
"label_path = \"labels.txt\"\n",
"image_path = \"test1.jpg\"\n",
"yoloe_model_path = \"yolov8n/best.xml\"\n",
"label_map = read_label(label_path)\n",
"core = ov.Core()\n",
"compiled_model = core.compile_model(yoloe_model_path, \"CPU\")\n",
"cap = cv.VideoCapture(0)\n",
"while cap.isOpened():\n",
" ret, frame = cap.read()\n",
" frame = cv.flip(frame, 180)\n",
" cv.namedWindow(\"MaskDetection\", 0) # 0可调大小注意窗口名必须imshow里面的一窗口名一直\n",
" cv.resizeWindow(\"MaskDetection\", 640, 480) # 设置长和宽\n",
" preprocessed_image = preprocess_image(frame)\n",
" input_tensor = image_to_tensor(preprocessed_image)\n",
" result = compiled_model(input_tensor)\n",
" detections = postprocess(pred_boxes=result[compiled_model.output(0)], input_hw=input_tensor.shape[2:], orig_img=frame)[0]\n",
" image_with_boxes = draw_results(detections, frame, label_map)\n",
" cv.imshow('MaskDetection', image_with_boxes)\n",
" key = cv.waitKey(1)\n",
" if key == 27: #esc退出\n",
" break\n",
"cap.release()\n",
"cv.destroyAllWindows()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e2a8aab-cf82-4bf8-80d3-0cded72f9493",
"metadata": {},
"outputs": [],
"source": [
"cap = cv.VideoCapture(0)\n",
"ret, frame = cap.read()\n",
"curr_frame = preprocess_image(frame)\n",
"curr_fram = image_to_tensor(curr_frame)\n",
"curr_request.set_tensor(compiled_model.input(0), ov.Tensor(curr_frame))\n",
"curr_request.start_async()\n",
"while cap.isOpened():\n",
" ret, next_frame = cap.read()\n",
" next_frame = cv.flip(next_frame, 180)\n",
" cv.namedWindow(\"MaskDetection\", 0) # 0可调大小注意窗口名必须imshow里面的一窗口名一直\n",
" cv.resizeWindow(\"MaskDetection\", 640, 480) # 设置长和宽\n",
" in_frame = preprocess_image(next_frame)\n",
" in_frame = image_to_tensor(in_frame)\n",
" next_request.set_tensor(input_layer, ov.Tensor(in_frame))\n",
" next_request.start_async()\n",
" if curr_request.wait_for(-1) == 1:\n",
" boxes_name = curr_request.get_output_tensor(0).data\n",
" conf_name = curr_request.get_output_tensor(1).data\n",
" boxes, scores, classes = process_result(box_results=boxes_name, conf_results=conf_name)\n",
" frame = draw_box(image=frame, boxes=boxes, scores=scores, classes=classes, labels=labels)\n",
" cv.imshow('MaskDetection', frame)\n",
" frame = next_frame\n",
" curr_request, next_request = next_request, curr_request\n",
" key = cv.waitKey(1)\n",
" if key == 27: #esc退出\n",
" break\n",
"cap.release()\n",
"cv.destroyAllWindows()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}