OnnxRuntime部署SwinTransformer

一、概述

实测SwinTransformer真的是涨点神器,刷榜秘籍,用SwinTransformer作为模型主干网络来微调下游任务对比ResNet50保守能够带来2~5个点的提升,当然模型参数量是大了点。测试了下基于OnnxRuntime cpu模式和gpu(非TensorRT)模式下的速度。对于大部分图片识别类任务,这个速度也是可以接受的。

模式 硬件 输入 平均速度
cpu Intel(R) Xeon(R) W-2102 CPU @ 2.90GHz 224*224 \color{blue}{360ms}
gpu Nvidia Tesla T4 224*224 \color{green}{10ms}

二、环境

三、模型转onnx

去掉训练时候的分类头,只提取timm的版本SwinTransformer的特征。
转出来的模型可以在此处下载
链接:https://pan.baidu.com/s/1oKUrPxPtYUFGVXJ2SiBP3g
提取码:czfj

import timm
import torch.nn as nn
import torch
class ft_net_swin_extract(nn.Module):

    def __init__(self, class_num, droprate=0.5, stride=2,):
        super(ft_net_swin_extract, self).__init__()
        model_ft = timm.create_model('swin_base_patch4_window7_224', pretrained=True)
        # avg pooling to global pooling
        #model_ft.avgpool = nn.AdaptiveAvgPool2d((1,1))
        model_ft.head = nn.Sequential() # save memory
        self.model = model_ft
    def forward(self, x):
        x = self.model.forward_features(x)
        return x
from model import ft_net_swin_extract
import numpy as np
from torchvision import models,transforms,datasets
import cv2
import onnx
import onnxruntime


data_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
image_file = "000_000_gan0002_c3s1_136308_04.jpg"
input = cv2.imread(image_file)
img_h,img_w,_ = input.shape
resize_input = cv2.resize(input,(224,224))
image = data_transforms(resize_input)
image = image.unsqueeze(0)
print(image.shape)

model = ft_net_swin_extract(class_num=751,circle=True)
model_path = "/home/nemo/DeepLearning/Person_reID_baseline_pytorch/MarketOut/best.pth"
model.load_state_dict(torch.load(model_path))
model.classifier.classifier = nn.Sequential()
model.eval()

torch_out = model(image)
# Export the model
torch.onnx.export(model,                     # model being run
                  image,                         # model input (or a tuple for multiple inputs)
                  "swin-transform.onnx",     # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=12,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  )

onnx_model = onnx.load("swin-transform.onnx")
onnx.checker.check_model(onnx_model)

四、编写onnxruntime 推理代码
代码地址
https://gitee.com/running_jiang/swintransformer-onnxruntime.git
https://github.com/runningJ/swintransformer-onnxruntime.git
欢迎star,拒绝白嫖。

cpu版本

#include <iostream>
#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <onnxruntime_cxx_api.h>
#include <algorithm>
#include <numeric>
#include <chrono>

using namespace std;
using namespace cv;
using namespace Ort;


template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
};

int main(int argc,char**argv)
{
    if (argc != 3)
    {
        cerr<<"usage "<< argv[0] <<" image_path model_path"<<endl;
        return 0;
    }
    cv::Mat image = imread(argv[1]);
    if(image.empty())
    {
        cerr <<"input image has problem "<< argv[1]<<endl;
        return 0;
    }

    string model_path = argv[2];

    Env env;
    SessionOptions options{nullptr};
    Session session(env, model_path.c_str(),options);

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    AllocatorWithDefaultOptions allocator;
    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;
    TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: ";
    for(int i = 0; i < inputDims.size(); ++i)
    {
        cout<< inputDims[i]<<" ";
    }
    cout <<endl;
    cout <<"-----------------------------------------"<<endl;
    const char* outputName = session.GetOutputName(0, allocator);
    cout << "Output Name: " << outputName << std::endl;
    TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: ";
    for(int i = 0; i < outputDims.size(); ++i)
    {
        cout<< outputDims[i]<<" ";
    }
    cout <<endl;

    //data preprocess
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(image, resizedImageBGR,cv::Size(inputDims.at(2), inputDims.at(3)));
    resizedImageRGB = resizedImageBGR;
    //cv::cvtColor(resizedImageBGR, resizedImageRGB,cv::ColorConversionCodes::COLOR_BGR2RGB);
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;
    cv::merge(channels, 3, resizedImage);
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);

    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());

    size_t outputTensorSize = vectorProduct(outputDims);
    std::vector<float> outputTensorValues(outputTensorSize);

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    
    std::vector<Value> inputTensors;
    std::vector<Value> outputTensors;

    MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);

    inputTensors.push_back(Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));

    outputTensors.push_back(Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));
    for(int i = 0; i < 100; ++i)
    {
        auto s_t=std::chrono::steady_clock::now();
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                    inputTensors.data(), 1, outputNames.data(),
                    outputTensors.data(), 1);
        auto e_t=std::chrono::steady_clock::now();
        double dr_s=std::chrono::duration<double,std::milli>(e_t-s_t).count();
        cout <<"runing inference cost time "<< dr_s <<"ms"<<endl;
    }

     for(int j = 0; j < 10; ++j)
     {
         cout << outputTensorValues.at(j)<<endl;
    }
    return 0;
}

cuda 版本

#include <iostream>
#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <onnxruntime_cxx_api.h>
#include <algorithm>
#include <numeric>
#include <chrono>

using namespace std;
using namespace cv;
using namespace Ort;

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
};

int main(int argc,char**argv)
{
    if (argc != 3)
    {
        cerr<<"usage "<< argv[0] <<" image_path model_path"<<endl;
        return 0;
    }
    cv::Mat image = imread(argv[1]);
    if(image.empty())
    {
        cerr <<"input image has problem "<< argv[1]<<endl;
        return 0;
    }
    string model_path = argv[2];

    Env env(ORT_LOGGING_LEVEL_WARNING, "Default");
    Session session{nullptr};
    SessionOptions session_options;
    OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0);
    session = Ort::Session(env, model_path.c_str(), session_options);

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();

    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    AllocatorWithDefaultOptions allocator;
    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;
    TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: ";
    for(int i = 0; i < inputDims.size(); ++i)
    {
        cout<< inputDims[i]<<" ";
    }
    cout <<endl;

    cout <<"-----------------------------------------"<<endl;
    const char* outputName = session.GetOutputName(0, allocator);
    cout << "Output Name: " << outputName << std::endl;
    TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: ";
    for(int i = 0; i < outputDims.size(); ++i)
    {
        cout<< outputDims[i]<<" ";
    }
    cout <<endl;

     //data preprocess
    cv::Mat resizedImageBGR, resizedImageRGB, resizedImage, preprocessedImage;
    cv::resize(image, resizedImageBGR,cv::Size(inputDims.at(2), inputDims.at(3)));
    resizedImageRGB = resizedImageBGR;
    //cv::cvtColor(resizedImageBGR, resizedImageRGB,cv::ColorConversionCodes::COLOR_BGR2RGB);
    resizedImageRGB.convertTo(resizedImage, CV_32F, 1.0 / 255);
    cv::Mat channels[3];
    cv::split(resizedImage, channels);
    channels[0] = (channels[0] - 0.485) / 0.229;
    channels[1] = (channels[1] - 0.456) / 0.224;
    channels[2] = (channels[2] - 0.406) / 0.225;
    cv::merge(channels, 3, resizedImage);
    cv::dnn::blobFromImage(resizedImage, preprocessedImage);

    size_t inputTensorSize = vectorProduct(inputDims);
    std::vector<float> inputTensorValues(inputTensorSize);
    inputTensorValues.assign(preprocessedImage.begin<float>(),
                             preprocessedImage.end<float>());
    size_t outputTensorSize = vectorProduct(outputDims);
    std::vector<float> outputTensorValues(outputTensorSize);

    std::vector<const char*> inputNames{inputName};
    std::vector<const char*> outputNames{outputName};
    
    std::vector<Value> inputTensors;
    std::vector<Value> outputTensors;

    MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(
        OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);

    inputTensors.push_back(Value::CreateTensor<float>(
        memoryInfo, inputTensorValues.data(), inputTensorSize, inputDims.data(),
        inputDims.size()));

    outputTensors.push_back(Value::CreateTensor<float>(
        memoryInfo, outputTensorValues.data(), outputTensorSize,
        outputDims.data(), outputDims.size()));
    for(int i = 0; i < 100; ++i)
    {
        auto s_t=std::chrono::steady_clock::now();
        session.Run(Ort::RunOptions{nullptr}, inputNames.data(),
                inputTensors.data(), 1, outputNames.data(),
                outputTensors.data(), 1);
        auto e_t=std::chrono::steady_clock::now();
        double dr_s=std::chrono::duration<double,std::milli>(e_t-s_t).count();
        cout <<"runing inference cost time "<< dr_s <<"ms"<<endl;
    }
    

     for(int j = 0; j < 10; ++j)
     {
         cout << outputTensorValues.at(j)<<endl;
    }
    return 0;
}
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 214,313评论 6 496
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 91,369评论 3 389
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 159,916评论 0 349
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 57,333评论 1 288
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 66,425评论 6 386
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,481评论 1 292
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,491评论 3 412
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,268评论 0 269
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 44,719评论 1 307
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,004评论 2 328
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,179评论 1 342
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 34,832评论 4 337
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,510评论 3 322
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,153评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,402评论 1 268
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,045评论 2 365
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,071评论 2 352

推荐阅读更多精彩内容