yolo forward
//network.c
float train_network(network *net, data d)
{
//rows 一次加载的图片数目,
assert(d.X.rows % net->batch == 0);
int batch = net->batch;
//n为训练的次数
int n = d.X.rows / batch;
int i;
float sum = 0;
for(i = 0; i < n; ++i){
//完成数据拷贝,从d拷贝到net.input和net.truth中
get_next_batch(d, batch, i*batch, net->input, net->truth);
//单次 训练网络
float err = train_network_datum(net);
sum += err;
}
//返回单张图片的平均loss
return (float)sum/(n*batch);
}
float train_network_datum(network *net)
{
*net->seen += net->batch;
//seen 表示已经看过的数据(已训练)
net->train = 1;
forward_network(net);
backward_network(net);
float error = *net->cost;
//表示一轮结束,更新网络loss
if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net);
return error;
}
network.c
void forward_network(network *netp)
{
#ifdef GPU
if(netp->gpu_index >= 0){
forward_network_gpu(netp);
return;
}
#endif
network net = *netp;
int i;
for(i = 0; i < net.n; ++i){
net.index = i;
layer l = net.layers[i];
if(l.delta){
fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
}
//从这里开始我们可以一层一层分析了,重复的层就不再分析了,顺序如下:
//我们只看yolo 层的forward吧
l.forward(l, net);
net.input = l.output;
if(l.truth) {
net.truth = l.output;
}
}
//计算各层的cost loss
calc_network_cost(netp);
}
//yolo_layer.c
//yolo层训练的时候的loss 计算
void forward_yolo_layer(const layer l, network net)
{
int i,j,b,t,n;
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
#ifndef GPU
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
activate_array(l.output + index, 2*l.w*l.h, LOGISTIC);
index = entry_index(l, b, n*l.w*l.h, 4);
activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC);
}
}
#endif
memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
if(!net.train) return;
float avg_iou = 0;
float recall = 0;
float recall75 = 0;
float avg_cat = 0;
float avg_obj = 0;
float avg_anyobj = 0;
int count = 0;
int class_count = 0;
*(l.cost) = 0;
/*
* 首先,网络的每个输出的bbox都对比groudtruth,如果IOU > ignore则不参与训练,
* 进一步的,大于truth则计算loss,
* 参与训练,但是cfg文件中这个值设置的是1,
* 所以应该就是忽略后面这个进一步的了。
*/
for (b = 0; b < l.batch; ++b) {
for (j = 0; j < l.h; ++j) {
for (i = 0; i < l.w; ++i) {
for (n = 0; n < l.n; ++n) {
// n*l.w*l.h代表n张看过的图, 相当于找当前w,h所在的box
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h);
float best_iou = 0;
int best_t = 0;
//找到最大iou的框
for(t = 0; t < l.max_boxes; ++t){
//获取到真实框的box, 每个yolo层有 90 * 5 个 l.truths = 90*(4 + 1);,查找真实框的地址
box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1);
//如果当前没有一个框,退出
if(!truth.x) break;
float iou = box_iou(pred, truth);
if (iou > best_iou) {
best_iou = iou;
best_t = t;
}
}
/*
* 每个预测框都算一遍
*/
//获取网络第I,J个grid cell 负责的真实物体的下标
int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4);
//是否有物体(1 为有,)
avg_anyobj += l.output[obj_index];
l.delta[obj_index] = 0 - l.output[obj_index];
//yolo v3 ignore_thresh : 0.5 yolo_v3 tiny : 0.7,iou 大于0.5的时候,就不参与loss计算
if (best_iou > l.ignore_thresh) {
l.delta[obj_index] = 0;
}
//大于truth_thresh 则参与训练,但是cfg中为1
if (best_iou > l.truth_thresh) {
l.delta[obj_index] = 1 - l.output[obj_index];
int class = net.truth[best_t*(4 + 1) + b*l.truths + 4];
if (l.map) class = l.map[class];
int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1);
delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0);
box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1);
delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
}
}
}
}
/*
*第二个循环,对每个目标,查找最合适的anchor,
* 如果本层负责这个尺寸的anchor,就计算对应的各loss。否则忽略
*/
for(t = 0; t < l.max_boxes; ++t){
box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1);
if(!truth.x) break;
float best_iou = 0;
int best_n = 0;
i = (truth.x * l.w);
j = (truth.y * l.h);
box truth_shift = truth;
truth_shift.x = truth_shift.y = 0;
for(n = 0; n < l.total; ++n){
//寻找最合适的anchor宽高
box pred = {0};
pred.w = l.biases[2*n]/net.w;
pred.h = l.biases[2*n+1]/net.h;
float iou = box_iou(pred, truth_shift);
if (iou > best_iou){
best_iou = iou;
best_n = n;
}
}
//best_n 为最佳anchor 的wh
int mask_n = int_index(l.mask, best_n, l.n);
// 如果最合适的anchor由本层负责预测(由mask来决定)
if(mask_n >= 0){
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
// 计算boundbox的loss
float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
//objectness 的loss
int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4);
avg_obj += l.output[obj_index];
//有object 的为 1,LOSS 就等于 1- output
l.delta[obj_index] = 1 - l.output[obj_index];
//class loss
int class = net.truth[t*(4 + 1) + b*l.truths + 4];
if (l.map) class = l.map[class];
int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1);
delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat);
++count;
++class_count;
if(iou > .5) recall += 1;
if(iou > .75) recall75 += 1;
avg_iou += iou;
}
}
}
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count);
}
获取box函数:
box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
{
box b;
// lw/lh为网络输出大小, b.x, b.y 为全图相对尺寸
b.x = (i + x[index + 0*stride]) / lw;
b.y = (j + x[index + 1*stride]) / lh;
b.w = exp(x[index + 2*stride]) * biases[2*n] / w;
b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
return b;
}
所以yolo Box的取值为:
//box.c
box float_to_box(float *f, int stride)
{
box b = {0};
b.x = f[0];
b.y = f[1*stride];
b.w = f[2*stride];
b.h = f[3*stride];
return b;
}
//计算iou
float box_iou(box a, box b)
{
//相交部分乘以并集
return box_intersection(a, b)/box_union(a, b);
}
float box_intersection(box a, box b)
{
float w = overlap(a.x, a.w, b.x, b.w);
float h = overlap(a.y, a.h, b.y, b.h);
if(w < 0 || h < 0) return 0;
float area = w*h;
return area;
}
float box_union(box a, box b)
{
float i = box_intersection(a, b);
float u = a.w*a.h + b.w*b.h - i;
return u;
}
float box_intersection(box a, box b)
{
float w = overlap(a.x, a.w, b.x, b.w);
float h = overlap(a.y, a.h, b.y, b.h);
if(w < 0 || h < 0) return 0;
float area = w*h;
return area;
}
dbox dintersect(box a, box b)
{
float w = overlap(a.x, a.w, b.x, b.w);
float h = overlap(a.y, a.h, b.y, b.h);
dbox dover = derivative(a, b);
dbox di;
di.dw = dover.dw*h;
di.dx = dover.dx*h;
di.dh = dover.dh*w;
di.dy = dover.dy*w;
return di;
}
box_loss的计算函数:
//yolo_layer.c
// float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride)
{
//获取预测的Box
box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride);
//计算box的iou
float iou = box_iou(pred, truth);
//分别计算真实物体的 xywh
float tx = (truth.x*lw - i);
float ty = (truth.y*lh - j);
float tw = log(truth.w*w / biases[2*n]);
float th = log(truth.h*h / biases[2*n + 1]);
//返回loss tx-truth
delta[index + 0*stride] = scale * (tx - x[index + 0*stride]);
delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
return iou;
}
我们可以看到真实框的计算:
class_LOSS的计算:
//delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat);
void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat)
{
int n;
if (delta[index]){
//正确的class_index,loss = 0
delta[index + stride*class] = 1 - output[index + stride*class];
if(avg_cat) *avg_cat += output[index + stride*class];
return;
}
for(n = 0; n < classes; ++n){
//class 错了,loss 就等于预测的结果
delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n];
if(n == class && avg_cat) *avg_cat += output[index + stride*n];
}
}