公式
SmoothL1LossLayer计算一张图片的损失函数,对应于下图加号右边部分 :
- i是mini-batch中anchor的索引
- pi是目标的预测概率
- pi*是groundtruth,有物体时 为1,否则为0
- ti是一个向量,预测坐标
- ti* 是一个向量,是gt包围盒的坐标
Lreg的公式就是下图,另外x=ti-ti* :
相关内容
- caffe_add caffe_sub caffe_mul caffe_div 函数
- caffe_cpu_asum 函数
- caffeine_cup_axpby 函数
代码功能描述
Forward
smooth_L1_Loss是Faster RCNN提出来的计算距离的loss,文章中提到对噪声点更加鲁棒。输入四个bottom,分别是predict,target,inside_weight,outside_weight。与论文并不完全一致,代码中实现的是更加general的版本,公式为:
Backword
源码分析
// ------------------------------------------------------------------
// Fast R-CNN
// copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// Modified by Wei Liu
// ------------------------------------------------------------------
#include <vector>
#include "caffe/layers/smooth_L1_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
//是整个层的初始化
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::LayerSetUp(bottom, top);
//bottem[0]代表的是预测坐标,也就是ti
//bottem[1]代表的是groundtruth的坐标,也就是ti*
//bottem[2]代表的是pi*,当有物体时取1,没有物体时取0
//所以在这里如果bottem的size是3,那么可以说bottem[2]中存放的是我们的weight
has_weights_ = (bottom.size() == 3);
}
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
CHECK_EQ(bottom[0]->height(), bottom[1]->height());
CHECK_EQ(bottom[0]->width(), bottom[1]->width());
if (has_weights_) {
CHECK_EQ(bottom[0]->channels(), bottom[2]->channels());
CHECK_EQ(bottom[0]->height(), bottom[2]->height());
CHECK_EQ(bottom[0]->width(), bottom[2]->width());
}
diff_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
errors_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
}
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count();
caffe_sub(
count,
bottom[0]->cpu_data(), //ti
bottom[1]->cpu_data(), //ti*
diff_.mutable_cpu_data()); //Blob定义了两种数据访问方式:const方式只读,不允许改写数据;
//mutable方式可以改写数据(对diff_的访问也是类似的)
//d := ti-ti*
if (has_weights_) { //乘上相关的权重,对应于(1)式中的pi*,有目标时为1
caffe_mul(
count,
bottom[2]->cpu_data(), //pi*
diff_.cpu_data(),
diff_.mutable_cpu_data()); // d := w * (b0 - b1)
//d := w_in * (b0 - b1)
}
const Dtype* diff_data = diff_.cpu_data();
Dtype* error_data = errors_.mutable_cpu_data();
//计算SmoothL1
for (int i = 0; i < count; ++i) {
Dtype val = diff_data[i];
Dtype abs_val = fabs(val);
if (abs_val < 1.) {
error_data[i] = 0.5 * val * val;
} else {
error_data[i] = abs_val - 0.5;
}
}
top[0]->mutable_cpu_data()[0] =
caffe_cpu_asum(count, errors_.cpu_data()) / bottom[0]->num();
}
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
int count = diff_.count();
Dtype* diff_data = diff_.mutable_cpu_data();
for (int i = 0; i < count; ++i) {
Dtype val = diff_data[i];
// f'(x) = x if |x| < 1
// = sign(x) otherwise
if (fabs(val) < 1.) {
diff_data[i] = val;
} else {
diff_data[i] = (Dtype(0) < val) - (val < Dtype(0)); //看不懂!
}
}
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_cpu_axpby( //b = alpha * a + beta * b
bottom[i]->count(), // count
alpha, // alpha
diff_.cpu_data(), // a
Dtype(0), // beta
bottom[i]->mutable_cpu_diff()); // b
}
}
}
#ifdef CPU_ONLY
STUB_GPU(SmoothL1LossLayer);
#endif
INSTANTIATE_CLASS(SmoothL1LossLayer);
REGISTER_LAYER_CLASS(SmoothL1Loss);
} // namespace caffe