用户
 找回密码
 立即注册
cjd 该用户已被删除
发表于 2019-10-16 14:04:56
138514
问题描述: 在运行NANO自带的tensorrt包中的 tensorrt/samples/python/yolov3_onnx/onnx_to_tensorrt.py 时,运行卡慢,每帧图像处理速度在3s左右,相比于直接在darknet下使用yolov3进行检测时,无明显加速效果,求指点。

代码(onnx_to_tensorrt.py 见附件。
经测试,主要耗时的程序在boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH)),每张图片该行程序耗时在2-3s.求助!
本人运行的步骤如下:
1. $ sudo python yolov3_to_onnx.py
2. $ sudo python onnx_to_tensorrt.py
使用道具 举报 回复
发表于 2019-10-16 14:19:59
onnx_to_tensorrt.py代码:
  1. #!/usr/bin/env python2
  2. #
  3. # Copyright 1993-2019 NVIDIA Corporation.  All rights reserved.
  4. #
  5. # NOTICE TO LICENSEE:
  6. #
  7. # This source code and/or documentation ("Licensed Deliverables") are
  8. # subject to NVIDIA intellectual property rights under U.S. and
  9. # international Copyright laws.
  10. #
  11. # These Licensed Deliverables contained herein is PROPRIETARY and
  12. # CONFIDENTIAL to NVIDIA and is being provided under the terms and
  13. # conditions of a form of NVIDIA software license agreement by and
  14. # between NVIDIA and Licensee ("License Agreement") or electronically
  15. # accepted by Licensee.  Notwithstanding any terms or conditions to
  16. # the contrary in the License Agreement, reproduction or disclosure
  17. # of the Licensed Deliverables to any third party without the express
  18. # written consent of NVIDIA is prohibited.
  19. #
  20. # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
  21. # LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
  22. # SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
  23. # PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
  24. # NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
  25. # DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
  26. # NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
  27. # NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
  28. # LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
  29. # SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
  30. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  31. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  32. # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  33. # OF THESE LICENSED DELIVERABLES.
  34. #
  35. # U.S. Government End Users.  These Licensed Deliverables are a
  36. # "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
  37. # 1995), consisting of "commercial computer software" and "commercial
  38. # computer software documentation" as such terms are used in 48
  39. # C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
  40. # only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
  41. # 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
  42. # U.S. Government End Users acquire the Licensed Deliverables with
  43. # only those rights set forth herein.
  44. #
  45. # Any use of the Licensed Deliverables in individual and commercial
  46. # software must include, in the user documentation and internal
  47. # comments to the code, the above Disclaimer and U.S. Government End
  48. # Users Notice.
  49. #

  50. from __future__ import print_function
  51. import cv2 as cv
  52. import numpy as np
  53. import tensorrt as trt
  54. import pycuda.driver as cuda
  55. import pycuda.autoinit
  56. from PIL import ImageDraw
  57. import time
  58. from yolov3_to_onnx import download_file
  59. from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES

  60. import sys, os
  61. sys.path.insert(1, os.path.join(sys.path[0], ".."))
  62. import common

  63. TRT_LOGGER = trt.Logger()
  64. cap = cv.VideoCapture(0)

  65. def load_label_categories(label_file_path):
  66.     categories = [line.rstrip('\n') for line in open(label_file_path)]
  67.     return categories

  68. LABEL_FILE_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'coco_labels.txt')
  69. ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH)

  70. def get_engine1(vv):
  71.     with trt.Runtime(TRT_LOGGER) as runtime:
  72.         return runtime.deserialize_cuda_engine(vv)

  73. def get_engine(onnx_file_path, engine_file_path=""):
  74.     def build_engine():
  75.         """Takes an ONNX file and creates a TensorRT engine to run inference with"""
  76.         with trt.Builder(TRT_LOGGER) as builder,\
  77.               builder.create_network() as network, \
  78.               trt.OnnxParser(network, TRT_LOGGER) as parser:

  79.             builder.max_workspace_size = 1 << 30 # 1GB
  80.             builder.max_batch_size = 1

  81.             if not os.path.exists(onnx_file_path):
  82.                 print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
  83.                 exit(0)

  84.             print('Loading ONNX file from path {}...'.format(onnx_file_path))
  85.             with open(onnx_file_path, 'rb') as model:
  86.                 print('Beginning ONNX file parsing')
  87.                 parser.parse(model.read())
  88.             print('Completed parsing of ONNX file')

  89.             print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
  90.             engine = builder.build_cuda_engine(network)
  91.             print("Completed creating Engine")

  92.             with open(engine_file_path, "wb") as f:
  93.                 f.write(engine.serialize())
  94.             return engine

  95.     if os.path.exists(engine_file_path):
  96.         print("Reading engine from file {}".format(engine_file_path))
  97.         with open(engine_file_path, "rb") as f, \
  98.              trt.Runtime(TRT_LOGGER) as runtime:
  99.             return runtime.deserialize_cuda_engine(f.read())
  100.     else:
  101.         return build_engine()

  102. def main():
  103.     """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

  104.     # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
  105.     onnx_file_path = './yolov3.onnx'
  106.     engine_file_path = "./yolov3.trt"
  107.     file = open(engine_file_path,"rb")
  108.     f = file.read()
  109.     file.close()
  110.     engine= get_engine1(f)
  111.     context = engine.create_execution_context()
  112.     # with get_engine(onnx_file_path, engine_file_path) as engine:
  113.     #     print("finished")
  114.     # Download a dog image and save it to the following file path:
  115.     while True:
  116.         ret,frame = cap.read()
  117.         if ret:
  118.             x, y = frame.shape[0:2]
  119.                 # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
  120.             input_resolution_yolov3_HW = (608, 608)
  121.             # Create a pre-processor object by specifying the required input resolution for YOLOv3
  122.             preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
  123.             # Load an image from the specified input path, and return it together with  a pre-processed version
  124.             image_raw, image = preprocessor.process(frame)
  125.             # Store the shape of the original input image in WH format, we will need it for later
  126.             shape_orig_WH = image_raw.size

  127.             # Output shapes expected by the post-processor
  128.             output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
  129.             # Do inference with TensorRT
  130.             trt_outputs = [] #get_engine(onnx_file_path, engine_file_path) as engine,
  131.             
  132.             
  133.             inputs, outputs, bindings, stream = common.allocate_buffers(engine)
  134.                 # Do inference
  135.                 #print('Running inference on image {}...'.format(input_image_path))
  136.                 # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
  137.             inputs[0].host = image
  138.             trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
  139.             #b = time.clock()
  140.             # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
  141.             trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)]

  142.             postprocessor_args = {"yolo_masks": [(6, 7, 8), (3, 4, 5), (0, 1, 2)],                    # A list of 3 three-dimensional tuples for the YOLO masks
  143.                             "yolo_anchors": [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
  144.                                             (59, 119), (116, 90), (156, 198), (373, 326)],
  145.                             "obj_threshold": 0.6,                                               # Threshold for object coverage, float value between 0 and 1
  146.                             "nms_threshold": 0.5,                                               # Threshold for non-max suppression algorithm, float value between 0 and 1
  147.                             "yolo_input_resolution": input_resolution_yolov3_HW}

  148.             postprocessor = PostprocessYOLO(**postprocessor_args)

  149.             # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
  150.             boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
  151.             if boxes is None:
  152.                 pass
  153.             else:
  154.                 for box, score, category in zip(boxes, scores, classes):
  155.                     x_coord, y_coord, width, height = box
  156.                     left = max(0, np.floor(x_coord + 0.5).astype(int))
  157.                     top = max(0, np.floor(y_coord + 0.5).astype(int))
  158.                     right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int))
  159.                     bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int))
  160.                     cv.rectangle(frame,(left,top),(right,bottom),(255,0,0),thickness=2)
  161.                     cv.putText(frame,"%s:%.2f"%(ALL_CATEGORIES[category],score),(left, top - 12),cv.FONT_HERSHEY_SIMPLEX,0.7,(0,0,255),2,0)
  162.                     cv.imshow("obj_detected_img",frame)

  163.             c = cv.waitKey(20)
  164.             if c==27:
  165.                 break
  166.         else:
  167.             pass
  168.         
  169. if __name__ == '__main__':
  170.     main()
复制代码
使用道具 举报 回复 支持 反对
发表于 2019-10-16 14:27:38





本帖子中包含更多资源

您需要 登录 才可以下载或查看,没有帐号?立即注册

x
使用道具 举报 回复 支持 反对
发表于 2019-10-16 14:34:02



本帖子中包含更多资源

您需要 登录 才可以下载或查看,没有帐号?立即注册

x
使用道具 举报 回复 支持 反对
发表于 2020-8-20 18:55:29
你好楼主,请问你这个问题解决了吗?我和你相同的问题。
使用道具 举报 回复 支持 反对
发新帖
您需要登录后才可以回帖 登录 | 立即注册