前言
目前基于深度学习的目标检测越来越火,其准确度很高。笔者采用Yolo-v3实现目标检测。Yolo-v3基于darknet框架,该框架采用纯c语言,不依赖来其他第三方库,相对于caffe框架在易用性对开发者友好(笔者编译过数次caffe才成功)。本文基于windows平台将yolo-v3编译为动态链接库dll,测试其检测性能。
New, python接口的YOLO-v3, !!!, 走过不要错过
为了方便测试,本人将测试通过的Visual Studio工程贴出来
Yolov3-windows测试工程
链接:https://pan.baidu.com/s/1i6ZK2ZCGzWbfWT1-_fUTzg
提取码:rj9o
复制这段内容后打开百度网盘手机App,操作更方便哦
开发环境
- windows 10 x64
- Visual Studio 2017
- opencv3.4.0
- darknet 笔者直接fork自AlexeyAB/darknet
动态链接库.dll的编译过程就不再赘述,相信熟悉C++,编译过opencv的小伙伴都很容易,本文测试使用的的cpu-only版本,笔者编译过cpu-only, gpu两个版本的yolo-v3 dll,需要dll的请点赞支持哦。
Yolo-v3
-
代码 C++,opencv
需要的文件可以在darknet链接下载打到:
- yolov3.cfg,yolov3的网络结构描述文件
- yolov3.weights,yolov3训练好的权重文件,在coco数据集上训练的
- coco.names, coco数据集的目标类别文件
#include<iostream>
#include<opencv2/opencv.hpp>
#include<yolo\include\yolo_v2_class.hpp>
using namespace std;
using namespace cv;
const string CFG_FILE = "darknet-master\\cfg\\yolov3.cfg";
const string WEIGHT_FILE = "yolov3.weights";
const string COCO_NAMES = "darknet-master\\cfg\\coco.names";
class Object
{
public:
Object();
Object::Object(int id, float confidence, Rect rect, String name);
~Object();
public:
int id;
float confidence;
Rect rect;
String name;
private:
};
Object::Object() {
}
Object::Object(int id,float confidence,Rect rect,String name) {
this->id = id;
this->confidence = confidence;
this->rect = rect;
this->name = name;
}
Object::~Object() {
}
int main() {
//--------------------------实例化一个Yolo检测器---------------------------
Detector yolo_detector(CFG_FILE, WEIGHT_FILE);
//读取目标类别文件,80类
vector<String> classNames;
ifstream fileIn(COCO_NAMES, ios::in);
if (!fileIn.is_open()) {
cerr << "failed to load COCO.names!" << endl;
return -1;
}
for (int i = 0; i < 80; i++) {
char temp1[100];
fileIn.getline(temp1, 100);
string temp2(temp1);
classNames.push_back(String(temp2));
}
//---------------------------加载输入图像-----------------------------------
auto image = Detector::load_image("7.jpg");
cout << "图像宽度=" << image.w << endl
<< "图像高度=" << image.h << endl
<< "图像通道=" << image.c << endl;
//-----------------------------目标检测---------------------------------------
TickMeter t;
t.start();
auto res = yolo_detector.detect(image);
t.stop();
cout << "YOLO-v3检测时间=" << t.getTimeSec() << "sec" << endl;
//----------------------------解析检测结果---------------------------------------
vector<Object> detectObjects;
for (auto& i:res) {
int id = i.obj_id;
float confidence = i.prob;
String name = classNames[id];
Rect rect = Rect{ static_cast<int>(i.x),static_cast<int>(i.y),static_cast<int>(i.w),static_cast<int>(i.h) };
detectObjects.push_back(Object{ id,confidence,rect,name });
}
//----------------------------绘制结果---------------------------------------------
Mat im_src = imread("7.jpg");
for (auto& i:detectObjects) {
rectangle(im_src, i.rect, Scalar(0, 255, 255), 2);
putText(im_src, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0),2);
}
imshow("yolo-v3", im_src);
waitKey(0);
}
-
API介绍
yolo_v2_class.hpp
中定义了Detector,将yolo封装到C++类中,方便使用。下面是Detector类的源码。
class Detector {
std::shared_ptr<void> detector_gpu_ptr;
std::deque<std::vector<bbox_t>> prev_bbox_vec_deque;
const int cur_gpu_id;
public:
float nms = .4;
bool wait_stream;
YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
YOLODLL_API ~Detector();
YOLODLL_API std::vector<bbox_t> detect(std::string image_filename, float thresh = 0.2, bool use_mean = false);
YOLODLL_API std::vector<bbox_t> detect(image_t img, float thresh = 0.2, bool use_mean = false);
static YOLODLL_API image_t load_image(std::string image_filename);
static YOLODLL_API void free_image(image_t m);
YOLODLL_API int get_net_width() const;
YOLODLL_API int get_net_height() const;
YOLODLL_API std::vector<bbox_t> tracking_id(std::vector<bbox_t> cur_bbox_vec, bool const change_history = true,
int const frames_story = 10, int const max_dist = 150);
std::vector<bbox_t> detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false)
{
if (img.data == NULL)
throw std::runtime_error("Image is empty");
auto detection_boxes = detect(img, thresh, use_mean);
float wk = (float)init_w / img.w, hk = (float)init_h / img.h;
for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk;
return detection_boxes;
}
#ifdef OPENCV
std::vector<bbox_t> detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false)
{
if(mat.data == NULL)
throw std::runtime_error("Image is empty");
auto image_ptr = mat_to_image_resize(mat);
return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean);
}
std::shared_ptr<image_t> mat_to_image_resize(cv::Mat mat) const
{
if (mat.data == NULL) return std::shared_ptr<image_t>(NULL);
cv::Mat det_mat;
cv::resize(mat, det_mat, cv::Size(get_net_width(), get_net_height()));
return mat_to_image(det_mat);
}
static std::shared_ptr<image_t> mat_to_image(cv::Mat img_src)
{
cv::Mat img;
cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR);
std::shared_ptr<image_t> image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; });
std::shared_ptr<IplImage> ipl_small = std::make_shared<IplImage>(img);
*image_ptr = ipl_to_image(ipl_small.get());
return image_ptr;
}
private:
static image_t ipl_to_image(IplImage* src)
{
unsigned char *data = (unsigned char *)src->imageData;
int h = src->height;
int w = src->width;
int c = src->nChannels;
int step = src->widthStep;
image_t out = make_image_custom(w, h, c);
int count = 0;
for (int k = 0; k < c; ++k) {
for (int i = 0; i < h; ++i) {
int i_step = i*step;
for (int j = 0; j < w; ++j) {
out.data[count++] = data[i_step + j*c + k] / 255.;
}
}
}
return out;
}
static image_t make_empty_image(int w, int h, int c)
{
image_t out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}
static image_t make_image_custom(int w, int h, int c)
{
image_t out = make_empty_image(w, h, c);
out.data = (float *)calloc(h*w*c, sizeof(float));
return out;
}
#endif // OPENCV
};
- 主要的方法:
- 构造方法
YOLODLL_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id = 0);
输入:配置文件(.cfg) 权重文件(.weight) gui_id表示使用的哪个GPU
- 加载输入图像
static YOLODLL_API image_t load_image(std::string image_filename);
输入:图像名称
此方法为静态方法,将二维图像转为张量Tensor
-
测试图像 均来自百度图片
- 动物,人
- 车辆,人
测试结果
-
动物,人的检测
-
车辆,行人的检测
从以上测试结果来看,yolo-v3的准确度上性能非凡,较小尺寸的目标也可以检测到。相对于MobileNet-SSD(v1版本)准确度上要好。
Yolo-v3封装为python接口
yolo原始接口采用C语言, 对于不熟悉C/C++的同学不友好, 其次C语言每次都要编译,相对麻烦。 现在深度学习采用python 才是标配, 因此本人利用pybind11封装为python API。
首先需要配置pybind11, 见文章:pybind11使用
工程配置
pybind11封装接口
python_api.cpp
#include<pybind11/pybind11.h>
#include<pybind11/stl.h>
#include<pybind11/numpy.h>
#include<string>
#include<vector>
#include<opencv2/opencv.hpp>
#include<yolo/include/yolo_v2_class.hpp>
using namespace cv;
namespace py = pybind11;
class Object
{
public:
Object();
Object(int id, float confidence, std::vector<int> rect, std::string name);
~Object();
public:
int id;
float confidence;
std::vector<int> rect; //[xmin, ymin, xmax, ymax]
std::string name;
};
Object::Object() {
}
Object::Object(int id, float confidence, std::vector<int> rect, std::string name) {
this->id = id;
this->confidence = confidence;
this->rect = rect;
this->name = name;
}
Object::~Object() {
}
class YoloDetector : public Detector {
public:
std::string weights_file;
std::string cfg_file;
private:
std::vector<std::string> classNames;
image_t cvMat_to_image_t(cv::Mat& image) {
image_t dst;
dst.w = image.cols;
dst.h = image.rows;
dst.c = image.channels();
dst.data = new float[dst.w*dst.h*dst.c * sizeof(float)];
int count = 0;
for (int i = 0; i < image.rows; i++)
{
for (int j = 0; j < image.cols; j++)
{
cv::Vec3b pixel = image.at<Vec3b>(i, j);
dst.data[count] = (float)pixel[0];
dst.data[count+1] = (float)pixel[1];
dst.data[count+2] = (float)pixel[2];
count += 3;
}
}
return dst;
}
public:
YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {
this->weights_file = weights_file;
this->cfg_file = cfg_file;
};
~YoloDetector() {};
public:
void setCOCOName(std::vector<std::string> names) {
for (auto i: names)
{
this->classNames.push_back(i);
}
}
//// image: BGR Format
//std::vector<Object> detectImage(cv::Mat& image) {
// Mat rgb_image;
// cvtColor(image, rgb_image, COLOR_BGR2RGB);
// cv::resize(rgb_image, rgb_image, cv::Size(this->get_net_width(), this->get_net_height()));
// image_t image_ = this->cvMat_to_image_t(rgb_image);
//
// auto res = this->detect(image_);
// std::vector<Object> objs;
// for (auto i:res)
// {
// objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
// }
// return objs;
//}
// image: BGR Format
std::vector<Object> detectImage(std::string image_name) {
auto res = this->detect(Detector::load_image(image_name));
std::vector<Object> objs;
for (auto i : res)
{
objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
}
return objs;
}
};
#if 0
int main() {
const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";
const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";
const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";
//读取目标类别文件,80类
std::vector<std::string> classNames;
std::ifstream fileIn(COCO_NAMES, std::ios::in);
if (!fileIn.is_open()) {
std::cerr << "failed to load COCO.names!" << std::endl;
return -1;
}
for (int i = 0; i < 80; i++) {
char temp1[100];
fileIn.getline(temp1, 100);
std::string temp2(temp1);
classNames.push_back(temp2);
}
YoloDetector detector(WEIGHT_FILE, CFG_FILE);
detector.setCOCOName(classNames);
cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");
auto detectObjects = detector.detectImage("D:\\YOLO-v3\\darknet-test.jpg");
for (auto& i : detectObjects) {
cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);
//putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);
}
imshow("yolo", image);
waitKey(0);
}
#endif
#if 1
PYBIND11_MODULE(yolov3, m) {
py::class_<Object>(m, "Object")
.def(py::init<int, float, std::vector<int>, std::string>())
.def_readwrite("id", &Object::id)
.def_readwrite("confidence", &Object::confidence)
.def_readwrite("rect", &Object::rect)
.def_readwrite("name", &Object::name);
py::class_<YoloDetector>(m, "YoloDetector")
.def(py::init<std::string, std::string>())
.def("detectImage", &YoloDetector::detectImage)
.def("setCOCOName", &YoloDetector::setCOCOName);
}
#endif
生成python可以调用的动态库
在pycharm中调用
new一个工程, 在工程目录下new一个 package
python代码
import demo18.yolov3 as yolov3
import cv2
detector = yolov3.YoloDetector('D:\\YOLO-v3\\yolov3.weights', 'D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg')
help(detector)
COCOName = []
with open('D:\\YOLO-v3\\darknet-master\\cfg\\coco.names', 'r') as f:
for i in f:
COCOName.append(i.rstrip())
detector.setCOCOName(COCOName)
out = detector.detectImage('D:\\YOLO-v3\\darknet-test.jpg')
image = cv2.imread('D:\\YOLO-v3\\darknet-test.jpg')
for i in out:
rect = i.rect
cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (0, 255, 255))
cv2.putText(image, i.name, (rect[0], rect[1]), 1, 1, (0, 0, 255))
cv2.imshow('yolo', image)
cv2.waitKey(0)
结果
上面的python接口只支持输入文件, 只能测试单张图像。对于视频目标目标检测行不通,因此,在此基础上继续封装接口。
主要实现了2个方法:
- detectFromFile() 输入图像文件
- detectImage() 输入numpy.ndarray对象, BGR格式
C++代码
#if 1
#include<pybind11/pybind11.h>
#include<pybind11/stl.h>
#include<pybind11/numpy.h>
#include<string>
#include<vector>
#include<opencv2/opencv.hpp>
#include<yolo/include/yolo_v2_class.hpp>
#include"ndarray_converter.h"
using namespace cv;
namespace py = pybind11;
cv::Mat testCV(cv::Mat& img_bgr) {
cv::Mat dst;
cv::cvtColor(img_bgr, dst, COLOR_BGR2GRAY);
return dst;
}
image_t cv_mat_to_image_t(cv::Mat& image) {
float* data = new float[image.rows*image.cols*image.channels()];
if (data == nullptr)
{
std::runtime_error("failed to malloc men!");
}
int cnt = 0;
for (int i = 0; i < image.rows; i++)
{
for (int j = 0; j < image.cols; j++)
{
if (image.channels() == 3)
{
float r = image.at<Vec3b>(i, j)[2] / 255.0f;
float g = image.at<Vec3b>(i, j)[1] / 255.0f;
float b = image.at<Vec3b>(i, j)[0] / 255.0f;
data[cnt] = r;
data[cnt + 1] = g;
data[cnt + 2] = b;
cnt += 3;
}
else
{
data[cnt] = static_cast<float>(image.at<uchar>(i, j)) / 255.0f;
cnt += 1;
}
}
}
image_t imaget;
imaget.c = image.channels();
imaget.h = image.rows;
imaget.w = image.cols;
imaget.data = data;
}
image_t make_empty_image(int w, int h, int c)
{
image_t out;
out.data = 0;
out.h = h;
out.w = w;
out.c = c;
return out;
}
image_t make_image(int w, int h, int c)
{
image_t out = make_empty_image(w, h, c);
out.data = new float[w*h*c];
return out;
}
image_t cv_mat_to_image_t2(cv::Mat& image) {
Mat dst;
cv::cvtColor(image, dst, COLOR_BGR2RGB);
int w, h, c;
w = image.cols;
h = image.rows;
int channels = image.channels();
unsigned char *data = dst.data;
if (!data)
throw std::runtime_error("file not found");
if (channels) c = channels;
int i, j, k;
image_t im = make_image(w, h, c);
for (k = 0; k < c; ++k) {
for (j = 0; j < h; ++j) {
for (i = 0; i < w; ++i) {
int dst_index = i + w * j + w * h*k;
int src_index = k + c * i + c * w*j;
im.data[dst_index] = (float)data[src_index] / 255.;
}
}
}
//free(data);
return im;
}
class Object
{
public:
Object();
Object(int id, float confidence, std::vector<int> rect, std::string name);
~Object();
public:
int id;
float confidence;
std::vector<int> rect; //[xmin, ymin, xmax, ymax]
std::string name;
};
Object::Object() {
}
Object::Object(int id, float confidence, std::vector<int> rect, std::string name) {
this->id = id;
this->confidence = confidence;
this->rect = rect;
this->name = name;
}
Object::~Object() {
}
class YoloDetector : public Detector {
public:
std::string weights_file;
std::string cfg_file;
private:
std::vector<std::string> classNames;
public:
YoloDetector(std::string weights_file, std::string cfg_file) :Detector(cfg_file, weights_file) {
this->weights_file = weights_file;
this->cfg_file = cfg_file;
};
~YoloDetector() {};
public:
void setCOCOName(std::vector<std::string> names) {
for (auto i: names)
{
this->classNames.push_back(i);
}
}
// image: BGR Format
std::vector<Object> detectFromFile(std::string image_name) {
auto res = this->detect(Detector::load_image(image_name));
std::vector<Object> objs;
for (auto i : res)
{
objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
}
return objs;
}
/*
overload
*/
std::vector<Object> detectImage(cv::Mat& image) {
/*float* data = new float[image.rows*image.cols*image.channels()];
if (data==nullptr)
{
std::runtime_error("failed to malloc men!");
}
int cnt = 0;
for (int i = 0; i < image.rows; i++)
{
for (int j = 0; j < image.cols; j++)
{
if (image.channels()==3)
{
float r = image.at<Vec3b>(i, j)[2] / 255.0f;
float g = image.at<Vec3b>(i, j)[1] / 255.0f;
float b = image.at<Vec3b>(i, j)[0] / 255.0f;
data[cnt] = r;
data[cnt+1] = g;
data[cnt+2] = b;
cnt += 3;
}
else
{
data[cnt] = static_cast<float>(image.at<uchar>(i, j)) / 255.0f;
cnt += 1;
}
}
}
image_t imaget;
imaget.c = image.channels();
imaget.h = image.rows;
imaget.w = image.cols;
imaget.data = data;
std::cout << "yolo: image input ok!" << std::endl;
std::cout << "yolo: start to detect" << std::endl;*/
auto imaget = cv_mat_to_image_t2(image);
auto res = this->detect(imaget);
std::cout << "yolo:finish to detect" << std::endl;
std::vector<Object> objs;
for (auto i : res)
{
objs.push_back(Object(i.obj_id, i.prob, { (int)i.x, (int)i.y, (int)(i.x + i.w), (int)(i.y + i.h) }, classNames[i.obj_id]));
}
return objs;
}
};
#if 1
int main() {
const std::string CFG_FILE = "D:\\YOLO-v3\\darknet-master\\cfg\\yolov3.cfg";
const std::string WEIGHT_FILE = "D:\\YOLO-v3\\yolov3.weights";
const std::string COCO_NAMES = "D:\\YOLO-v3\\darknet-master\\cfg\\coco.names";
//读取目标类别文件,80类
std::vector<std::string> classNames;
std::ifstream fileIn(COCO_NAMES, std::ios::in);
if (!fileIn.is_open()) {
std::cerr << "failed to load COCO.names!" << std::endl;
return -1;
}
for (int i = 0; i < 80; i++) {
char temp1[100];
fileIn.getline(temp1, 100);
std::string temp2(temp1);
classNames.push_back(temp2);
}
YoloDetector detector(WEIGHT_FILE, CFG_FILE);
detector.setCOCOName(classNames);
cv::Mat image = cv::imread("D:\\YOLO-v3\\darknet-test.jpg");
//auto detectObjects = detector.detectFromFile("D:\\YOLO-v3\\darknet-test.jpg");
auto detectObjects = detector.detectImage(image);
for (auto& i : detectObjects) {
cv::rectangle(image,Rect(i.rect[0],i.rect[1], i.rect[2]-i.rect[0], i.rect[3]-i.rect[1]), Scalar(0, 255, 255), 2);
//putText(image, i.name, i.rect.tl(), 1, 1.8, Scalar(255, 0, 0), 2);
}
imshow("yolo", image);
waitKey(0);
}
#endif
//.def("detectImage", py::overload_cast<cv::Mat>(&YoloDetector::detectImage))
//.def("detectImage", py::overload_cast<std::string>(&YoloDetector::detectImage))
#if 0
PYBIND11_MODULE(yolov3, m) {
NDArrayConverter::init_numpy();
py::class_<Object>(m, "Object")
.def(py::init<int, float, std::vector<int>, std::string>())
.def_readwrite("id", &Object::id)
.def_readwrite("confidence", &Object::confidence)
.def_readwrite("rect", &Object::rect)
.def_readwrite("name", &Object::name);
m.def("test_cv", &testCV, py::arg("image_bgr"));
py::class_<YoloDetector>(m, "YoloDetector")
.def(py::init<std::string, std::string>())
.def("detectFromFile",&YoloDetector::detectFromFile, py::arg("image_file"))
.def("detectImage", &YoloDetector::detectImage, py::arg("image_bgr"))
.def("setCOCOName", &YoloDetector::setCOCOName);
}
#endif
#endif // 0
结果
End
本文主要实现来了windows平台下yolo-v3的快速测试使用,关于yolo网络结构的设计,yolo模型的训练,下期再详细介绍,感谢甜心的大力支持。