方向梯度直方图(Histogram of Oriented Gradient, HOG)特征是一种在计算机视觉和图像处理中用来进行物体检测的特征描述子。
它通过计算和统计图像局部区域的梯度方向直方图来构成特征。
Hog特征结合SVM分类器在图像识别中应用非常广泛,尤其是传统机器学习中比较成功的行人检测算法。当然现在都是深度学习的天下了。
核心思想: 图像中,局部目标的表象和形状能够被梯度或边缘的方向密度分布很好地描述。本质是梯度的统计信息,而梯度主要存在于边缘的地方。
具体流程: 图像(image)->滑动检测窗口(win)->图像块(block)->细胞单元(cell)
1、将图像灰度化;
2、采用Gamma校正对输入图像进行颜色空间的标准化(归一化);目的是调节图像的对比度,降低图像局部的阴影和光照变化所造成的影响,同时可以抑制噪音的干扰,得到一幅图像(image)
3、计算图像每个像素的梯度(包括大小和方向);主要是为了捕获轮廓信息,同时进一步弱化光照的干扰。
4、选择滑动窗口(win)
5、在滑动窗口中选择一块(block)
6、在块中划分cell单元(cell)
7、统计每个cell的梯度直方图(不同梯度的个数),即可形成每个cell的descriptor描述子(描述向量);
8、将滑动窗口(win)内的所有块(block)的HOG特征descriptor串联起来就可以得到该滑动窗口的的HOG特征descriptor了。
9、如此按滑动步长一步步移动滑动窗口,直到图像的结束。
如何计算一个细胞(cell)中的梯度方向直方图呢?
细胞单元中的每一个像素点都为某个基于方向的直方图通道投票。
投票是采取加权投票的方式,即每一票都是带有权值的,这个权值是根据该像素点的梯度幅度计算出来。可以采用幅值本身或者它的函数来表示这个权值,实际测试表明: 使用幅值来表示权值能获得最佳的效果,当然,也可以选择幅值的函数来表示,比如幅值的平方根、幅值的平方、幅值的截断形式等。细胞单元可以是矩形的,也可以是星形的。直方图通道是平均分布在0-180(无向)或0-360(有向)范围内。经研究发现,采用无向的梯度和9个直方图通道,能在行人检测试验中取得最佳的效果。而在这种情况下方向的范围划分为180/9=20度。
这是HOG特征descriptor的直观显示:
原图:
HOG直观显示:
将其放大可以看到那些cell中的descriptor
这是直观显示HOG特征的代码
注意,图像的尺寸必须是偶数x偶数(也可能是整数x整数),不然程序运行会出错!!
#include <opencv2/opencv.hpp>
#include <cstdio>
#include <cstdlib>
#include <Windows.h>
//#include "opencvtest.h"
using namespace std;
using namespace cv;
// HOGDescriptor visual_imagealizer
// adapted for arbitrary size of feature sets and training images
Mat get_hogdescriptor_visual_image(Mat& origImg,
vector<float>& descriptorValues,//hog特征向量
Size winSize,//图片窗口大小
Size cellSize,
int scaleFactor,//缩放背景图像的比例
double viz_factor)//缩放hog特征的线长比例
{
Mat visual_image;//最后可视化的图像大小
resize(origImg, visual_image, Size(origImg.cols*scaleFactor, origImg.rows*scaleFactor));
int gradientBinSize = 9;
// dividing 180° into 9 bins, how large (in rad) is one bin?
float radRangeForOneBin = 3.14 / (float)gradientBinSize; //pi=3.14对应180°
// prepare data structure: 9 orientation / gradient strenghts for each cell
int cells_in_x_dir = winSize.width / cellSize.width;//x方向上的cell个数
int cells_in_y_dir = winSize.height / cellSize.height;//y方向上的cell个数
int totalnrofcells = cells_in_x_dir * cells_in_y_dir;//cell的总个数
//注意此处三维数组的定义格式
//int ***b;
//int a[2][3][4];
//int (*b)[3][4] = a;
//gradientStrengths[cells_in_y_dir][cells_in_x_dir][9]
float*** gradientStrengths = new float**[cells_in_y_dir];
int** cellUpdateCounter = new int*[cells_in_y_dir];
for (int y = 0; y<cells_in_y_dir; y++)
{
gradientStrengths[y] = new float*[cells_in_x_dir];
cellUpdateCounter[y] = new int[cells_in_x_dir];
for (int x = 0; x<cells_in_x_dir; x++)
{
gradientStrengths[y][x] = new float[gradientBinSize];
cellUpdateCounter[y][x] = 0;
for (int bin = 0; bin<gradientBinSize; bin++)
gradientStrengths[y][x][bin] = 0.0;//把每个cell的9个bin对应的梯度强度都初始化为0
}
}
// nr of blocks = nr of cells - 1
// since there is a new block on each cell (overlapping blocks!) but the last one
//相当于blockstride = (8,8)
int blocks_in_x_dir = cells_in_x_dir - 1;
int blocks_in_y_dir = cells_in_y_dir - 1;
// compute gradient strengths per cell
int descriptorDataIdx = 0;
int cellx = 0;
int celly = 0;
for (int blockx = 0; blockx<blocks_in_x_dir; blockx++)
{
for (int blocky = 0; blocky<blocks_in_y_dir; blocky++)
{
// 4 cells per block ...
for (int cellNr = 0; cellNr<4; cellNr++)
{
// compute corresponding cell nr
int cellx = blockx;
int celly = blocky;
if (cellNr == 1) celly++;
if (cellNr == 2) cellx++;
if (cellNr == 3)
{
cellx++;
celly++;
}
for (int bin = 0; bin<gradientBinSize; bin++)
{
float gradientStrength = descriptorValues[descriptorDataIdx];
descriptorDataIdx++;
gradientStrengths[celly][cellx][bin] += gradientStrength;//因为C是按行存储
} // for (all bins)
// note: overlapping blocks lead to multiple updates of this sum!
// we therefore keep track how often a cell was updated,
// to compute average gradient strengths
cellUpdateCounter[celly][cellx]++;//由于block之间有重叠,所以要记录哪些cell被多次计算了
} // for (all cells)
} // for (all block x pos)
} // for (all block y pos)
// compute average gradient strengths
for (int celly = 0; celly<cells_in_y_dir; celly++)
{
for (int cellx = 0; cellx<cells_in_x_dir; cellx++)
{
float NrUpdatesForThisCell = (float)cellUpdateCounter[celly][cellx];
// compute average gradient strenghts for each gradient bin direction
for (int bin = 0; bin<gradientBinSize; bin++)
{
gradientStrengths[celly][cellx][bin] /= NrUpdatesForThisCell;
}
}
}
cout << "winSize = " << winSize << endl;
cout << "cellSize = " << cellSize << endl;
cout << "blockSize = " << cellSize * 2 << endl;
cout << "blockNum = " << blocks_in_x_dir << "×" << blocks_in_y_dir << endl;
cout << "descriptorDataIdx = " << descriptorDataIdx << endl;
// draw cells
for (int celly = 0; celly<cells_in_y_dir; celly++)
{
for (int cellx = 0; cellx<cells_in_x_dir; cellx++)
{
int drawX = cellx * cellSize.width;
int drawY = celly * cellSize.height;
int mx = drawX + cellSize.width / 2;
int my = drawY + cellSize.height / 2;
rectangle(visual_image,
Point(drawX*scaleFactor, drawY*scaleFactor),
Point((drawX + cellSize.width)*scaleFactor,
(drawY + cellSize.height)*scaleFactor),
CV_RGB(0, 0, 0),//cell框线的颜色
1);
// draw in each cell all 9 gradient strengths
for (int bin = 0; bin<gradientBinSize; bin++)
{
float currentGradStrength = gradientStrengths[celly][cellx][bin];
// no line to draw?
if (currentGradStrength == 0)
continue;
float currRad = bin * radRangeForOneBin + radRangeForOneBin / 2;//取每个bin里的中间值,如10°,30°,...,170°.
float dirVecX = cos(currRad);
float dirVecY = sin(currRad);
float maxVecLen = cellSize.width / 2;
float scale = viz_factor; // just a visual_imagealization scale,
// to see the lines better
// compute line coordinates
float x1 = mx - dirVecX * currentGradStrength * maxVecLen * scale;
float y1 = my - dirVecY * currentGradStrength * maxVecLen * scale;
float x2 = mx + dirVecX * currentGradStrength * maxVecLen * scale;
float y2 = my + dirVecY * currentGradStrength * maxVecLen * scale;
// draw gradient visual_imagealization
line(visual_image,
Point(x1*scaleFactor, y1*scaleFactor),
Point(x2*scaleFactor, y2*scaleFactor),
CV_RGB(255, 255, 255),//HOG可视化的cell的颜色
1);
} // for (all bins)
} // for (cellx)
} // for (celly)
for (int y = 0; y<cells_in_y_dir; y++)
{
for (int x = 0; x<cells_in_x_dir; x++)
{
delete[] gradientStrengths[y][x];
}
delete[] gradientStrengths[y];
delete[] cellUpdateCounter[y];
}
delete[] gradientStrengths;
delete[] cellUpdateCounter;
return visual_image;//返回最终的HOG可视化图像
}
int main()
{
HOGDescriptor hog;//使用的是默认的hog参数
Mat src = imread("1.jpg");//注意这里边的双斜杠
int src_width = src.cols;
int src_height = src.rows;
int width = src_width;
int height = src_height;
hog.winSize = Size(width, height);
vector<float> des;//HOG特征向量
Mat dst;
resize(src, dst, Size(width, height));//规范图像尺寸
imshow("src", src);
hog.compute(dst, des);//计算hog特征
Mat background = Mat::zeros(Size(width, height), CV_8UC1);//设置黑色背景图,因为要用白色绘制hog特征
Mat d = get_hogdescriptor_visual_image(background, des, hog.winSize, hog.cellSize, 3, 2.5);
imshow("dst", d);
imwrite("hogvisualize.jpg", d);
waitKey();
return 0;
}