openvino_yolov9/inference_yolov8.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8d41befc-7e65-4110-9118-35dce6e6ab0c",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openvino.runtime import Core\n",
    "import openvino.runtime as ov\n",
    "import cv2 as cv\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "from ultralytics.yolo.utils import ops\n",
    "import torch\n",
    "from ultralytics.yolo.utils.plotting import colors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cd01038a-fe1a-4b47-ad49-b0641afdaee5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scale_fill=False, scaleup=False, stride=32):\n",
    "    \"\"\"\n",
    "    Resize image and padding for detection. Takes image as input,\n",
    "    resizes image to fit into new shape with saving original aspect ratio and pads it to meet stride-multiple constraints\n",
    "\n",
    "    Parameters:\n",
    "      img (np.ndarray): image for preprocessing\n",
    "      new_shape (Tuple(int, int)): image size after preprocessing in format [height, width]\n",
    "      color (Tuple(int, int, int)): color for filling padded area\n",
    "      auto (bool): use dynamic input size, only padding for stride constrins applied\n",
    "      scale_fill (bool): scale image to fill new_shape\n",
    "      scaleup (bool): allow scale image if it is lower then desired input size, can affect model accuracy\n",
    "      stride (int): input padding stride\n",
    "    Returns:\n",
    "      img (np.ndarray): image after preprocessing\n",
    "      ratio (Tuple(float, float)): hight and width scaling ratio\n",
    "      padding_size (Tuple(int, int)): height and width padding size\n",
    "\n",
    "\n",
    "    \"\"\"\n",
    "    # Resize and pad image while meeting stride-multiple constraints\n",
    "    shape = img.shape[:2]  # current shape [height, width]\n",
    "    if isinstance(new_shape, int):\n",
    "        new_shape = (new_shape, new_shape)\n",
    "\n",
    "    # Scale ratio (new / old)\n",
    "    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])\n",
    "    if not scaleup:  # only scale down, do not scale up (for better test mAP)\n",
    "        r = min(r, 1.0)\n",
    "\n",
    "    # Compute padding\n",
    "    ratio = r, r  # width, height ratios\n",
    "    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))\n",
    "    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding\n",
    "    if auto:  # minimum rectangle\n",
    "        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding\n",
    "    elif scale_fill:  # stretch\n",
    "        dw, dh = 0.0, 0.0\n",
    "        new_unpad = (new_shape[1], new_shape[0])\n",
    "        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios\n",
    "\n",
    "    dw /= 2  # divide padding into 2 sides\n",
    "    dh /= 2\n",
    "\n",
    "    if shape[::-1] != new_unpad:  # resize\n",
    "        img = cv.resize(img, new_unpad, interpolation=cv.INTER_LINEAR)\n",
    "    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))\n",
    "    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))\n",
    "    img = cv.copyMakeBorder(img, top, bottom, left, right, cv.BORDER_CONSTANT, value=color)  # add border\n",
    "    return img, ratio, (dw, dh)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5d01c15e-7dcc-4cec-87b0-a338e41051e4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess_image(img0: np.ndarray):\n",
    "    \"\"\"\n",
    "    Preprocess image according to YOLOv8 input requirements.\n",
    "    Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW.\n",
    "\n",
    "    Parameters:\n",
    "      img0 (np.ndarray): image for preprocessing\n",
    "    Returns:\n",
    "      img (np.ndarray): image after preprocessing\n",
    "    \"\"\"\n",
    "    # resize\n",
    "    img = letterbox(img0)[0]\n",
    "\n",
    "    # Convert HWC to CHW\n",
    "    img = img.transpose(2, 0, 1)\n",
    "    img = np.ascontiguousarray(img)\n",
    "    return img"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5d99be3c-8c3d-4c4c-b82e-17b6724d2258",
   "metadata": {},
   "outputs": [],
   "source": [
    "def image_to_tensor(image:np.ndarray):\n",
    "    \"\"\"\n",
    "    Preprocess image according to YOLOv8 input requirements.\n",
    "    Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW.\n",
    "\n",
    "    Parameters:\n",
    "      img (np.ndarray): image for preprocessing\n",
    "    Returns:\n",
    "      input_tensor (np.ndarray): input tensor in NCHW format with float32 values in [0, 1] range\n",
    "    \"\"\"\n",
    "    input_tensor = image.astype(np.float32)  # uint8 to fp32\n",
    "    input_tensor /= 255.0  # 0 - 255 to 0.0 - 1.0\n",
    "\n",
    "    # add batch dimension\n",
    "    if input_tensor.ndim == 3:\n",
    "        input_tensor = np.expand_dims(input_tensor, 0)\n",
    "    return input_tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c1867e4e-1b99-431a-9d07-0389ae47d6db",
   "metadata": {},
   "outputs": [],
   "source": [
    "def postprocess(\n",
    "    pred_boxes,\n",
    "    input_hw,\n",
    "    orig_img,\n",
    "    min_conf_threshold=0.25,\n",
    "    nms_iou_threshold=0.7,\n",
    "    agnosting_nms=False,\n",
    "    max_detections=300,\n",
    "):\n",
    "    \"\"\"\n",
    "    YOLOv8 model postprocessing function. Applied non maximum supression algorithm to detections and rescale boxes to original image size\n",
    "    Parameters:\n",
    "        pred_boxes (np.ndarray): model output prediction boxes\n",
    "        input_hw (np.ndarray): preprocessed image\n",
    "        orig_image (np.ndarray): image before preprocessing\n",
    "        min_conf_threshold (float, *optional*, 0.25): minimal accepted confidence for object filtering\n",
    "        nms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMS\n",
    "        agnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or not\n",
    "        max_detections (int, *optional*, 300):  maximum detections after NMS\n",
    "    Returns:\n",
    "       pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label]\n",
    "    \"\"\"\n",
    "    nms_kwargs = {\"agnostic\": agnosting_nms, \"max_det\":max_detections}\n",
    "    preds = ops.non_max_suppression(\n",
    "        torch.from_numpy(pred_boxes),\n",
    "        min_conf_threshold,\n",
    "        nms_iou_threshold,\n",
    "        nc=3,\n",
    "        **nms_kwargs\n",
    "    )\n",
    "\n",
    "    results = []\n",
    "    for i, pred in enumerate(preds):\n",
    "        shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shape\n",
    "        if not len(pred):\n",
    "            results.append({\"det\": [], \"segment\": []})\n",
    "            continue\n",
    "        pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round()\n",
    "        results.append({\"det\": pred})\n",
    "\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1940d393-1e89-46cd-9f12-b965487e1874",
   "metadata": {},
   "outputs": [],
   "source": [
    "def draw_results(results, source_image, label_map):\n",
    "    \"\"\"\n",
    "    Helper function for drawing bounding boxes on image\n",
    "    Parameters:\n",
    "        image_res (np.ndarray): detection predictions in format [x1, y1, x2, y2, score, label_id]\n",
    "        source_image (np.ndarray): input image for drawing\n",
    "        label_map; (Dict[int, str]): label_id to class name mapping\n",
    "    Returns:\n",
    "        Image with boxes\n",
    "    \"\"\"\n",
    "    boxes = results[\"det\"]\n",
    "    for idx, (*xyxy, conf, lbl) in enumerate(boxes):\n",
    "        label = f'{label_map[int(lbl)]} {conf:.2f}'\n",
    "        source_image = plot_one_box(xyxy, source_image, label=label, color=colors(int(lbl)), line_thickness=1)\n",
    "    return source_image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c599c49c-52af-4d0e-bad7-20c25fa2c851",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_one_box(box, img,\n",
    "                 color,\n",
    "                 label, \n",
    "                 line_thickness=5):\n",
    "    \"\"\"\n",
    "    Helper function for drawing single bounding box on image\n",
    "    Parameters:\n",
    "        x (np.ndarray): bounding box coordinates in format [x1, y1, x2, y2]\n",
    "        img (no.ndarray): input image\n",
    "        color (Tuple[int, int, int], *optional*, None): color in BGR format for drawing box, if not specified will be selected randomly\n",
    "        label (str, *optonal*, None): box label string, if not provided will not be provided as drowing result\n",
    "        line_thickness (int, *optional*, 5): thickness for box drawing lines\n",
    "    \"\"\"\n",
    "    # Plots one bounding box on image img\n",
    "    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness\n",
    "    color = color or [random.randint(0, 255) for _ in range(3)]\n",
    "    c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))\n",
    "    cv.rectangle(img, c1, c2, color, thickness=tl, lineType=cv.LINE_AA)\n",
    "    if label:\n",
    "        tf = max(tl - 1, 1)  # font thickness\n",
    "        t_size = cv.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]\n",
    "        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3\n",
    "        cv.rectangle(img, c1, c2, color, -1, cv.LINE_AA)  # filled\n",
    "        cv.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv.LINE_AA)\n",
    "\n",
    "    return img"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ed0b7672-e2d1-47e6-8de8-07ffbb793a53",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_label(label_path):\n",
    "    with open(label_path, 'r') as f:\n",
    "        labels = f.read().split()\n",
    "    return labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9e24f99f-cf0c-4063-aedd-574eea9fe18c",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_path = \"labels.txt\"\n",
    "image_path = \"test1.jpg\"\n",
    "yoloe_model_path = \"yolov8n/best.xml\"\n",
    "label_map = read_label(label_path)\n",
    "core = ov.Core()\n",
    "compiled_model = core.compile_model(yoloe_model_path, \"CPU\")\n",
    "cap = cv.VideoCapture(0)\n",
    "while cap.isOpened():\n",
    "    ret, frame = cap.read()\n",
    "    frame = cv.flip(frame, 180)\n",
    "    cv.namedWindow(\"MaskDetection\", 0)  # 0可调大小，注意：窗口名必须imshow里面的一窗口名一直\n",
    "    cv.resizeWindow(\"MaskDetection\", 640, 480)    # 设置长和宽\n",
    "    preprocessed_image = preprocess_image(frame)\n",
    "    input_tensor = image_to_tensor(preprocessed_image)\n",
    "    result = compiled_model(input_tensor)\n",
    "    detections = postprocess(pred_boxes=result[compiled_model.output(0)], input_hw=input_tensor.shape[2:], orig_img=frame)[0]\n",
    "    image_with_boxes = draw_results(detections, frame, label_map)\n",
    "    cv.imshow('MaskDetection', image_with_boxes)\n",
    "    key = cv.waitKey(1)\n",
    "    if key == 27: #esc退出\n",
    "        break\n",
    "cap.release()\n",
    "cv.destroyAllWindows()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e2a8aab-cf82-4bf8-80d3-0cded72f9493",
   "metadata": {},
   "outputs": [],
   "source": [
    "cap = cv.VideoCapture(0)\n",
    "ret, frame = cap.read()\n",
    "curr_frame = preprocess_image(frame)\n",
    "curr_fram = image_to_tensor(curr_frame)\n",
    "curr_request.set_tensor(compiled_model.input(0), ov.Tensor(curr_frame))\n",
    "curr_request.start_async()\n",
    "while cap.isOpened():\n",
    "    ret, next_frame = cap.read()\n",
    "    next_frame = cv.flip(next_frame, 180)\n",
    "    cv.namedWindow(\"MaskDetection\", 0)  # 0可调大小，注意：窗口名必须imshow里面的一窗口名一直\n",
    "    cv.resizeWindow(\"MaskDetection\", 640, 480)    # 设置长和宽\n",
    "    in_frame = preprocess_image(next_frame)\n",
    "    in_frame = image_to_tensor(in_frame)\n",
    "    next_request.set_tensor(input_layer, ov.Tensor(in_frame))\n",
    "    next_request.start_async()\n",
    "    if curr_request.wait_for(-1) == 1:\n",
    "        boxes_name = curr_request.get_output_tensor(0).data\n",
    "        conf_name = curr_request.get_output_tensor(1).data\n",
    "        boxes, scores, classes = process_result(box_results=boxes_name, conf_results=conf_name)\n",
    "        frame = draw_box(image=frame, boxes=boxes, scores=scores, classes=classes, labels=labels)\n",
    "        cv.imshow('MaskDetection', frame)\n",
    "    frame = next_frame\n",
    "    curr_request, next_request = next_request, curr_request\n",
    "    key = cv.waitKey(1)\n",
    "    if key == 27: #esc退出\n",
    "        break\n",
    "cap.release()\n",
    "cv.destroyAllWindows()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}