1 为何要转

想部署在开发板上，通常需要先转成onnx形式

2 转换前准备什么

yolov3基础理解
pip install onnxruntime
pip install onnx

3 转换过程

主要代码如下，可得到转换后的onnx模型

import onnxruntime
import torch
import numpy as np

# 导入网络结构，加载模型权重load_darknet_weights下方提供代码
from models import Darknet, load_darknet_weights


cfg = 'cfg/yolov3.cfg'              # 想成功构建网络，配置文件不能少
weights = 'weights/yolov3.weights'      # pytorch模型权重，正常来说是.pt文件

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = Darknet(cfg, (320,416))     # 配置文件信息以及输入分辨率，默认416x416
model = model.to(device).eval()     # 朋友说因为BN、dropout的存在，所以这儿要转成eval()

# 加载权重
if weights.endswith('.pt'):  # pytorch format
    model.load_state_dict(torch.load(weights, map_location=device)['model'])
else:  # darknet format
    _ = load_darknet_weights(model, weights)
# print(model)

input_shape = (320, 416)        # 导出onnx模型的输入尺寸，要和pytorch模型的输入尺寸一致
dummy_input = torch.autograd.Variable(
    torch.randn(1, 3, input_shape[0], input_shape[1])
)
output_path = './weights/yolov3.onnx'       # onnx模型输出到哪里去
output_names = None             # 本来应该有个响当当的名字，我不太会

torch.onnx.export(
    model,
    dummy_input,
    output_path,
    verbose=True,
    keep_initializers_as_inputs=True,
    opset_version=11,       # 版本通常为10 or 11
    output_names=output_names,
)

导入的load_darknet_weights方法如下，这儿一般情况下不太重要，可不看：

import os
from pathlib import Path

def load_darknet_weights(self, weights, cutoff=-1):
    # Parses and loads the weights stored in 'weights'
    # cutoff: save layers between 0 and cutoff (if cutoff = -1 all are saved)
    file = Path(weights).name

    # Try to download weights if not available locally
    msg = weights + ' missing, download from https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI'
    if not os.path.isfile(weights):
        try:
            url = 'https://pjreddie.com/media/files/' + file
            print('Downloading ' + url)
            os.system('curl -f ' + url + ' -o ' + weights)
        except IOError:
            print(msg)
    assert os.path.exists(weights), msg  # download missing weights from Google Drive

    # Establish cutoffs
    if file == 'darknet53.conv.74':
        cutoff = 75
    elif file == 'yolov3-tiny.conv.15':
        cutoff = 15

    # Read weights file
    with open(weights, 'rb') as f:
        # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
        self.version = np.fromfile(f, dtype=np.int32, count=3)  # (int32) version info: major, minor, revision
        self.seen = np.fromfile(f, dtype=np.int64, count=1)  # (int64) number of images seen during training

        weights = np.fromfile(f, dtype=np.float32)  # The rest are weights

    ptr = 0
    for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
        if mdef['type'] == 'convolutional':
            conv_layer = module[0]
            if mdef['batch_normalize']:
                # Load BN bias, weights, running mean and running variance
                bn_layer = module[1]
                num_b = bn_layer.bias.numel()  # Number of biases
                # Bias
                bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
                bn_layer.bias.data.copy_(bn_b)
                ptr += num_b
                # Weight
                bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
                bn_layer.weight.data.copy_(bn_w)
                ptr += num_b
                # Running Mean
                bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
                bn_layer.running_mean.data.copy_(bn_rm)
                ptr += num_b
                # Running Var
                bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
                bn_layer.running_var.data.copy_(bn_rv)
                ptr += num_b
            else:
                # Load conv. bias
                num_b = conv_layer.bias.numel()
                conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
                conv_layer.bias.data.copy_(conv_b)
                ptr += num_b
            # Load conv. weights
            num_w = conv_layer.weight.numel()
            conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
            conv_layer.weight.data.copy_(conv_w)
            ptr += num_w

    return cutoff

4 检验onnx模型

4.1 onnx检验

通常这一步都没啥问题，要是有问题，当我没说。

import onnx

onnx_model = onnx.load("weights/yolov3.onnx")
onnx.checker.check_model(onnx_model)

4.2 np.testing.assert_allclose校验

import onnxruntime
import torch
import numpy as np

# 导入网络结构，加载模型权重load_darknet_weights下方提供代码
from models import Darknet, load_darknet_weights


cfg = 'cfg/yolov3.cfg'              # 想成功构建网络，配置文件不能少
weights = 'weights/yolov3.weights'      # pytorch模型权重，正常来说是.pt文件

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = Darknet(cfg, (320,416))     # 配置文件信息以及输入分辨率，默认416x416
model = model.to(device).eval()     # 朋友说因为BN、dropout的存在，所以这儿要转成eval()

# 加载权重
if weights.endswith('.pt'):  # pytorch format
    model.load_state_dict(torch.load(weights, map_location=device)['model'])
else:  # darknet format
    _ = load_darknet_weights(model, weights)
# print(model)

input_shape = (320, 416)        # 导出onnx模型的输入尺寸，要和pytorch模型的输入尺寸一致
dummy_input = torch.autograd.Variable(
    torch.randn(1, 3, input_shape[0], input_shape[1])
)
output_path = './weights/yolov3.onnx'       # onnx模型输出到哪里去
output_names = None             # 本来应该有个响当当的名字，我不太会


# 给个模型输入，分辨率要对
x = torch.randn(size=(1, 3, 320, 416), dtype=torch.float32).to(device)
with torch.no_grad():
    torch_out = model(x)        # 如果这个位置这样写，torch_out, _ = model(x)，此时等价于取消下方torch_out = torch_out[0]的操作
# print(type(torch_out))        # <class 'tuple'>，啥操作都干不了，元组里面是个张量
# print(type(torch_out[0]))     # <class 'torch.Tensor'>
torch_out = torch_out[0]        # 因此这儿需要把内部张量取出来

ort_session = onnxruntime.InferenceSession("weights/yolov3.onnx")   # 初始化模型
ort_inputs = {ort_session.get_inputs()[0].name: x.numpy()}          # 初始化数据，注意这儿的x是上面的输入数据x，后期应该是img
ort_outs = ort_session.run(None, ort_inputs)        # 推理得到输出
# print(type(ort_outs))       # <class 'list'>，里面是个numpy矩阵
# print(type(ort_outs[0]))    # <class 'numpy.ndarray'>
ort_outs = ort_outs[0]      # 因此这儿需要把内部numpy矩阵取出来，这一步很有必要

# print(torch_out.numpy().shape)      # (1, 8190, 85)，8190和输入分辨率320x416有关，85=80+4+1(我猜)
# print(ort_outs.shape)               # (1, 8190, 85)

# 比较实际值与期望值的差异，通过没啥事，不通过引发AssertionError
# 这儿需要两个numpy输入
np.testing.assert_allclose(torch_out.numpy(), ort_outs, rtol=1e-03, atol=1e-05)   

print('torch_out:', torch_out.numpy())
print('ort_outs:', ort_outs)

torch_out和ort_outs对比

4.3 warning消除记录

[W:onnxruntime:, graph.cc:1237 onnxruntime::Graph::Graph] Initializer 893 appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.

解决方案：
在得到的onnx模型文件夹下，新建remove_initializer_from_input.py，内容为：

import onnx
import argparse


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input", required=True, help="input model")
    parser.add_argument("--output", required=True, help="output model")
    args = parser.parse_args()
    return args


def remove_initializer_from_input():
    args = get_args()

    model = onnx.load(args.input)
    if model.ir_version < 4:
        print(
            'Model with ir_version below 4 requires to include initilizer in graph input'
        )
        return

    inputs = model.graph.input
    name_to_input = {}
    for input in inputs:
        name_to_input[input.name] = input

    for initializer in model.graph.initializer:
        if initializer.name in name_to_input:
            inputs.remove(name_to_input[initializer.name])

    onnx.save(model, args.output)


if __name__ == '__main__':
    remove_initializer_from_input()

打开终端，运行python remove_initializer_from_input.py --input yolov3.onnx --output yolov3.onnx即可。

位置示意

4.4 整体代码运行校验

主体代码如下：

import argparse
import time
import onnxruntime
import torch
import cv2
from models import parse_data_cfg
from utils.datasets import LoadWebcam, LoadImages
# from utils.utils import *     # 只是不太喜欢这种导入
import random
from utils.utils import load_classes, non_max_suppression, scale_coords, plot_one_box


# 利用 YOLOv3 进行行人检测
def detect(onnx_model_path,
           data,
           images='data/samples',  # input folder -- 输入图片，需要检测的图片
           fourcc='mp4v',  # video codec -- 视频编码、译码器
           img_size=416,
           conf_thres=0.5,  # 物体置信度阈值
           nms_thres=0.5):   # nms 置信度阈值

    # Initialize
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # 初始化网络模型
    model = onnxruntime.InferenceSession(onnx_model_path)

    # Set Dataloader
    if opt.webcam:
        dataloader = LoadWebcam(img_size=img_size, half=False)      # half=False表示不使用半精度
    else:
        dataloader = LoadImages(images, img_size=img_size, half=False)

    # Get classes and colors
    # parse_data_cfg(data)['names']:得到类别名称文件路径 names=data/coco.names
    classes = load_classes(parse_data_cfg(data)['names'])   # 得到类别名列表: ['person', 'bicycle'...]
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # 对于每种类别随机使用一种颜色画框

    # Run inference
    t0 = time.time()
    for i, (path, img, im0, vid_cap) in enumerate(dataloader):
        t = time.time()

        # Get detections shape: (3, 416, 320)
        img = torch.from_numpy(img).unsqueeze(0).to(device)     # torch.Size([1, 3, 416, 320])
        ort_inputs = {model.get_inputs()[0].name: img.numpy()}
        ort_outs = model.run(None, ort_inputs)
        pred = torch.from_numpy(ort_outs[0])
        det = non_max_suppression(pred.float(), conf_thres, nms_thres)[0] # torch.Size([5, 7])

        if det is not None and len(det) > 0:
            # Rescale boxes from 416 to true image size 映射到原图
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

            print('%gx%g ' % img.shape[2:], end='')     # print image size '288x416'
            for c in det[:, -1].unique():       # 对图片的所有类进行遍历循环
                n = (det[:, -1] == c).sum()     # 得到了当前类别的个数，也可以用来统计数目
                if classes[int(c)] != 'person':
                    continue
                print('%g %ss' % (n, classes[int(c)]), end=', ')    # 打印个数和类别'5 persons'

            # Draw bounding boxes and labels of detections
            # (x1y1x2y2, obj_conf, class_conf, class_pred)
            for *xyxy, conf, cls_conf, cls in det: # 对于最后的预测框进行遍历
                # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)]

                # Add bbox to the image
                label = '%s %.2f' % (classes[int(cls)], conf)   # 'person 1.00'
                # 只显示检测的人
                if classes[int(cls)] == 'person':
                    plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])

        print('Done. (%.3fs)' % (time.time() - t))

        if opt.webcam:  # Show live webcam
            cv2.imshow('onnx_display', im0)

    print('Done. (%.3fs)' % (time.time() - t0))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--onnx_model_path', type=str, default='weights/yolov3.onnx', help="onnx模型路径")
    parser.add_argument('--data', type=str, default='data/coco.data', help="数据集配置文件所在路径")
    parser.add_argument('--images', type=str, default='data/samples', help='需要进行检测的图片文件夹')
    parser.add_argument('--img-size', type=int, default=416, help='输入分辨率大小')
    parser.add_argument('--conf-thres', type=float, default=0.5, help='物体置信度阈值')
    parser.add_argument('--nms-thres', type=float, default=0.4, help='NMS阈值')
    parser.add_argument('--fourcc', type=str, default='mp4v', help='fourcc output video codec (verify ffmpeg support)')
    parser.add_argument('--webcam', default=True, help='是否使用摄像头进行检测')
    opt = parser.parse_args()
    print(opt)

    # 进行行人检测
    with torch.no_grad():
        detect(opt.onnx_model_path,
               opt.data,
               images=opt.images,
               img_size=opt.img_size,
               conf_thres=opt.conf_thres,
               nms_thres=opt.nms_thres,
               fourcc=opt.fourcc)

运行效果示意图：

onnx运行效果示意图

4.5 文件所在位置

文件所在位置

感谢链接

https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html

【pytorch转onnx】yolov3网络pytorch转onnx