本文记录基于mxnet训练得到的resnet50模型在转为onnx格式部署时发生的两个错误及解决办法
- batchnorm层 spatial参数不识别
- batchnorm层定义 fix_gamma=True 导致的onnx模型参数错误
环境
os: ubuntu 16.04
Mxnet : 1.6.0
onnx: 1.6.0
cuda: 10.2
cudnn: 8.0
模型转换
import os
import mxnet as mx
import numpy as np
from mxnet.contrib import onnx as onnx_mxnet
# load mxnet 模型
def LoadModel(model_file, param_file):
symbol = mx.model.sym.load(model_file)
param = mx.model.nd.load(param_file)
arg_params = {}
aux_params = {}
for k, v in param.items():
tp, name = k.split(':', 1)
if tp == 'arg':
arg_params[name] = v
if tp == 'aux':
aux_params[name] = v
return symbol, arg_params, aux_params
# mxnet模型转换为onnx格式保存
def mxnet_to_onnx(mxnet_model_file, mxnet_param_file, input_shape, output_file, layer_name=None):
sym, arg_params, aux_params, classes, net_shape, data_mean = LoadModel(mxnet_model_file, mxnet_param_file)
if layer_name:
layer_name += '_output'
sym = sym.get_internals()[layer_name]
shape = sym.infer_shape(data=input_shape)
params = {}
params.update(arg_params)
params.update(aux_params)
converted_model_path = onnx_mxnet.export_model(sym, params, [input_shape], np.float32, output_file)
tensorrt 加载onnx模型文件
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import tensorrt as trt
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
TRT_LOGGER = trt.Logger()
class TRTEngine(object):
def __init__(self, onnx_file, batch_size=1, output_shape=None):
self.engine, self.network = self.load_engine(onnx_file, batch_size)
self.output_shape = output_shape
def load_engine(self, onnx_file, batch_size=1):
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
with trt.Builder(TRT_LOGGER) as builder, builder.create_network(EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_batch_size = batch_size
builder.max_workspace_size = 1 << 30
with open(onnx_file, 'rb') as model:
if not parser.parse(model.read()):
for error in range(parser.num_errors):
print(parser.get_error(error))
engine = builder.build_cuda_engine(network)
print("Load onnx sucessful!")
print(engine.num_layers)
print(network, dir(network))
return engine, network
def allocate_buffers(self):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for binding in self.engine:
size = trt.volume(self.engine.get_binding_shape(binding)) * self.engine.max_batch_size
dtype = trt.nptype(self.engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if self.engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
return inputs, outputs, bindings, stream
def do_inference(self, context, bindings, inputs, outputs, stream):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
def inference(self, data):
with self.engine.create_execution_context() as context:
inputs, outputs, bindings, stream = self.allocate_buffers()
inputs[0].host = data
# print("input: ", inputs[0])
trt_outputs = self.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
return trt_outputs
问题记录
1. resnet50模型转换时报错
问题定位:
resnet50中使用的batchnorm层在转换至onnx时报错不支持属性spatial
解决办法:
mxnet源码mxnet/contrib/onnx/mx2onnx/_op_translation.py 359行 注释掉spatial参数
def convert_batchnorm(node, **kwargs):
"""Map MXNet's BatchNorm operator attributes to onnx's BatchNormalization operator
and return the created node.
"""
name, input_nodes, attrs = get_inputs(node, kwargs)
momentum = float(attrs.get("momentum", 0.9))
eps = float(attrs.get("eps", 0.001))
print("input nodes:",input_nodes)
print("name: ", name)
bn_node = onnx.helper.make_node(
"BatchNormalization",
input_nodes,
[name],
name=name,
epsilon=eps,
momentum=momentum,
# MXNet computes mean and variance per feature for batchnorm
# Default for onnx is across all spatial features. So disabling the parameter.
#spatial=0
)
return [bn_node]
2.resnet50 转为onnx后执行推理计算结果不对
问题定位: resnet50中对输入图片的batchnorm层在mxnet中定义时使用了参数fix_gamma=True, 导致转为onnx格式后, batchnorm的scale参数(即gamma参数)变为0, 从而使输入数据全部清零只输出了偏移量
通过netron.app 查看网络结构, 看到第一个batchnorm层(bn_data)的参数bn_data_gamma异常:
image.png
解决办法:
在onnx模型中手动修改bn_data层的gamma参数为1.0
import onnx
import numpy as np
def fix_batchnorm_error(onnx_file, new_onnx_file):
model = onnx.load(onnx_file)
weights = model.graph.initializer
#weight[0] 为bn_data_gamma
weights[0].float_data[0] = 1.0
weights[0].float_data[1] = 1.0
weights[0].float_data[2] = 1.0
onnx.save(model, new_onnx_file)