openvino_yolov9/inference.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "812a3abf-ca0a-46d6-8c20-b58d4c920f12",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openvino.runtime import Core\n",
    "import openvino.runtime as ov\n",
    "import cv2 as cv\n",
    "import numpy as np\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1f2c5225-46d4-4d05-8d24-744a65708eec",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Predictor:\n",
    "    \"\"\"\n",
    "    OpenVINO 模型推理器\n",
    "    \"\"\"\n",
    "    def __init__(self, model_path):\n",
    "        ie_core = Core()\n",
    "        model = ie_core.read_model(model=model_path)\n",
    "        self.compiled_model = ie_core.compile_model(model=model, device_name=\"CPU\")\n",
    "    def get_inputs_name(self, num):\n",
    "        return self.compiled_model.input(num)\n",
    "    \n",
    "    def get_outputs_name(self, num):\n",
    "        return self.compiled_model.output(num)\n",
    "    \n",
    "    def predict(self, input_data):\n",
    "        return self.compiled_model([input_data])\n",
    "    \n",
    "    def get_request(self):\n",
    "        return self.compiled_model.create_infer_request()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "956f7097-a0b4-4d69-97fe-969c7490287f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_image(input_image, size):\n",
    "    \"\"\"输入图片与处理方法，按照PP-Yoloe模型要求预处理图片数据\n",
    "\n",
    "    Args:\n",
    "        input_image (uint8): 输入图片矩阵\n",
    "        size (int): 模型输入大小\n",
    "\n",
    "    Returns:\n",
    "        float32: 返回处理后的图片矩阵数据\n",
    "    \"\"\"\n",
    "    max_len = max(input_image.shape)\n",
    "    img = np.zeros([640,640,3],np.uint8)\n",
    "    img[0:input_image.shape[0],0:input_image.shape[1]] = input_image # 将图片放到正方形背景中\n",
    "    img = cv.cvtColor(img,cv.COLOR_BGR2RGB)  # BGR转RGB\n",
    "    img = cv.resize(img, (size, size), cv.INTER_NEAREST) # 缩放图片\n",
    "    img = np.transpose(img,[2, 0, 1]) # 转换格式\n",
    "    img = img / 255.0 # 归一化\n",
    "    img = np.expand_dims(img,0) # 增加维度\n",
    "    return img.astype(np.float32)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f61fca39-5c27-4e45-94a2-d683c5a2462e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_result(box_results, conf_results):\n",
    "    \"\"\"按照PP-Yolove模型输出要求，处理数据，非极大值抑制，提取预测结果\n",
    "\n",
    "    Args:\n",
    "        box_results (float32): 预测框预测结果\n",
    "        conf_results (float32): 置信度预测结果\n",
    "    Returns:\n",
    "        float: 预测框\n",
    "        float: 分数\n",
    "        int: 类别\n",
    "    \"\"\"\n",
    "    conf_results = np.transpose(conf_results,[0, 2, 1]) # 转置\n",
    "    # 设置输出形状\n",
    "    box_results =box_results.reshape(8400,4) \n",
    "    conf_results = conf_results.reshape(8400,3)\n",
    "    scores = []\n",
    "    classes = []\n",
    "    boxes = []\n",
    "    for i in range(8400):\n",
    "        conf = conf_results[i,:] # 预测分数\n",
    "        score = np.max(conf) # 获取类别\n",
    "        # 筛选较小的预测类别\n",
    "        if score > 0.5:\n",
    "            classes.append(np.argmax(conf)) \n",
    "            scores.append(score) \n",
    "            boxes.append(box_results[i,:])\n",
    "    scores = np.array(scores)\n",
    "    boxes = np.array(boxes)\n",
    "    \n",
    "    result_box = []\n",
    "    result_score = []\n",
    "    result_class = []\n",
    "    # 非极大值抑制筛选重复的预测结果\n",
    "    if len(boxes) != 0:\n",
    "        # 非极大值抑制结果\n",
    "        indexs = tf.image.non_max_suppression(boxes,scores,len(scores),0.25,0.35)\n",
    "        for i, index in enumerate(indexs):\n",
    "            result_score.append(scores[index])\n",
    "            result_box.append(boxes[index,:])\n",
    "            result_class.append(classes[index])\n",
    "    # 返回结果\n",
    "    return np.array(result_box),np.array(result_score),np.array(result_class)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d20c64ea-c5c6-4a5f-9af7-5c34fa1e60d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def draw_box(image, boxes, scores, classes, labels):\n",
    "    \"\"\"将预测结果绘制到图像上\n",
    "\n",
    "    Args:\n",
    "        image (uint8): 原图片\n",
    "        boxes (float32): 预测框\n",
    "        scores (float32): 分数\n",
    "        classes (int): 类别\n",
    "        lables (str): 标签\n",
    "\n",
    "    Returns:\n",
    "        uint8: 标注好的图片\n",
    "    \"\"\"\n",
    "    colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]\n",
    "    scale = max(image.shape) / 640.0 # 缩放比例\n",
    "    if len(classes) != 0:\n",
    "        for i in range(len(classes)):\n",
    "            box = boxes[i,:]\n",
    "            x1 = int(box[0] * scale)\n",
    "            y1 = int(box[1] * scale)\n",
    "            x2 = int(box[2] * scale)\n",
    "            y2 = int(box[3] * scale)\n",
    "            label = labels[classes[i]]\n",
    "            score = scores[i]\n",
    "            cv.rectangle(image, (x1, y1), (x2, y2), colors[classes[i]], 2, cv.LINE_8)\n",
    "            cv.putText(image,label+\":\"+str(score),(x1,y1-10),cv.FONT_HERSHEY_SIMPLEX, 0.55, colors[classes[i]], 2)\n",
    "        \n",
    "    return image\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "05a749f9-bec4-4dc4-b1e9-db50e7663eef",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_label(label_path):\n",
    "    with open(label_path, 'r') as f:\n",
    "        labels = f.read().split()\n",
    "    return labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "f089212d-b1f6-4e6c-bed3-efdf22c77321",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "27f0b486-1994-4b93-9428-9619310936b9",
   "metadata": {},
   "outputs": [
    {
     "ename": "RuntimeError",
     "evalue": "Exception from src\\inference\\src\\cpp\\infer_request.cpp:112:\nException from src\\inference\\src\\cpp\\infer_request.cpp:66:\nException from src\\plugins\\intel_cpu\\src\\infer_request.cpp:390:\nCan't set the input tensor with name: images, because the model input (shape=[?,3,?,?]) and the tensor (shape=(640.640.3)) are incompatible\n\n\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[28], line 8\u001b[0m\n\u001b[0;32m      6\u001b[0m image \u001b[38;5;241m=\u001b[39m cv\u001b[38;5;241m.\u001b[39mimread(image_path)\n\u001b[0;32m      7\u001b[0m pimage \u001b[38;5;241m=\u001b[39m letterbox(image)[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m----> 8\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mpredictor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpimage\u001b[49m\u001b[43m)\u001b[49m[outputs_name]\n\u001b[0;32m      9\u001b[0m \u001b[38;5;28mprint\u001b[39m()\n\u001b[0;32m     10\u001b[0m boxes_results \u001b[38;5;241m=\u001b[39m results[:, :\u001b[38;5;241m4\u001b[39m, :]\n",
      "Cell \u001b[1;32mIn[2], line 16\u001b[0m, in \u001b[0;36mPredictor.predict\u001b[1;34m(self, input_data)\u001b[0m\n\u001b[0;32m     15\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpredict\u001b[39m(\u001b[38;5;28mself\u001b[39m, input_data):\n\u001b[1;32m---> 16\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompiled_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43minput_data\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\envs\\openvino\\lib\\site-packages\\openvino\\runtime\\ie_api.py:365\u001b[0m, in \u001b[0;36mCompiledModel.__call__\u001b[1;34m(self, inputs, share_inputs, share_outputs, decode_strings)\u001b[0m\n\u001b[0;32m    362\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_infer_request \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    363\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_infer_request \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcreate_infer_request()\n\u001b[1;32m--> 365\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_infer_request\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    366\u001b[0m \u001b[43m    \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    367\u001b[0m \u001b[43m    \u001b[49m\u001b[43mshare_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshare_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    368\u001b[0m \u001b[43m    \u001b[49m\u001b[43mshare_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshare_outputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    369\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdecode_strings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_strings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    370\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\envs\\openvino\\lib\\site-packages\\openvino\\runtime\\ie_api.py:132\u001b[0m, in \u001b[0;36mInferRequest.infer\u001b[1;34m(self, inputs, share_inputs, share_outputs, decode_strings)\u001b[0m\n\u001b[0;32m     55\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minfer\u001b[39m(\n\u001b[0;32m     56\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m     57\u001b[0m     inputs: Any \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     61\u001b[0m     decode_strings: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m     62\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m OVDict:\n\u001b[0;32m     63\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Infers specified input(s) in synchronous mode.\u001b[39;00m\n\u001b[0;32m     64\u001b[0m \n\u001b[0;32m     65\u001b[0m \u001b[38;5;124;03m    Blocks all methods of InferRequest while request is running.\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    130\u001b[0m \u001b[38;5;124;03m    :rtype: OVDict\u001b[39;00m\n\u001b[0;32m    131\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 132\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m OVDict(\u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_data_dispatch\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    133\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m    134\u001b[0m \u001b[43m        \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    135\u001b[0m \u001b[43m        \u001b[49m\u001b[43mis_shared\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshare_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    136\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mshare_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshare_outputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_strings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_strings\u001b[49m\u001b[43m)\u001b[49m)\n",
      "\u001b[1;31mRuntimeError\u001b[0m: Exception from src\\inference\\src\\cpp\\infer_request.cpp:112:\nException from src\\inference\\src\\cpp\\infer_request.cpp:66:\nException from src\\plugins\\intel_cpu\\src\\infer_request.cpp:390:\nCan't set the input tensor with name: images, because the model input (shape=[?,3,?,?]) and the tensor (shape=(640.640.3)) are incompatible\n\n\n"
     ]
    }
   ],
   "source": [
    "label_path = \"labels.txt\"\n",
    "image_path = \"test.jpg\"\n",
    "yoloe_model_path = \"models/yolov8n_openvino_model/yolov8n.xml\"\n",
    "predictor = Predictor(model_path = yoloe_model_path)\n",
    "outputs_name = predictor.get_outputs_name(0)\n",
    "image = cv.imread(image_path)\n",
    "pimage = letterbox(image)[0]\n",
    "results = predictor.predict(input_data=pimage)[outputs_name]\n",
    "print()\n",
    "boxes_results = results[:, :4, :]\n",
    "conf_results = results[:, 4: 7, :]\n",
    "print(boxes_results.reshape(8400, 4))\n",
    "print(conf_results.reshape(8400, 3))\n",
    "\n",
    "labels = read_label(label_path=label_path)\n",
    "boxes, scores, classes = process_result(box_results=boxes_results, conf_results=conf_results)\n",
    "result_frame = draw_box(image=image, boxes=boxes, scores=scores, classes=classes, labels=labels)\n",
    "print(classes)\n",
    "plt.imshow(result_frame)\n",
    "plt.show()\n",
    "# conf_name = predictor.get_outputs_name(1)\n",
    "\n",
    "# cap = cv.VideoCapture(0)\n",
    "# while cap.isOpened():\n",
    "#     ret, frame = cap.read()\n",
    "#     frame = cv.flip(frame, 180)\n",
    "#     cv.namedWindow(\"MaskDetection\", 0)  # 0可调大小，注意：窗口名必须imshow里面的一窗口名一直\n",
    "#     cv.resizeWindow(\"MaskDetection\", 640, 480)    # 设置长和宽\n",
    "#     input_frame = process_image(frame, 640)\n",
    "#     results = predictor.predict(input_data=input_frame)\n",
    "#     boxes, scores, classes = process_result(box_results=results[boxes_name], conf_results=results[conf_name])\n",
    "#     result_frame = draw_box(image=frame, boxes=boxes, scores=scores, classes=classes, labels=labels)\n",
    "#     cv.imshow('MaskDetection', result_frame)\n",
    "#     key = cv.waitKey(1)\n",
    "#     if key == 27: #esc退出\n",
    "#         break\n",
    "# cap.release()\n",
    "# cv.destroyAllWindows()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c2832b8-97f5-419b-90a1-30c092456112",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68507c5f-a4f1-4df4-bfc6-97b1cbbb24a8",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f9c5749-fad0-40c4-ab10-93733f325e98",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}