Center Loss, 用于人脸识别中的判别性特征学习 论文: A Discriminative Feature Learning Approach for Deep Face Recognition 项目路径: [caffe-face]
<h2>1. prototxt 中的使用</h2>
layer {
name: "fc5"
type: "InnerProduct"
bottom: "res5_6"
top: "fc5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 512 # 提取特征层,特征维度
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
############## center loss ###############
layer {
name: "center_loss"
type: "CenterLoss"
bottom: "fc5"
bottom: "label"
top: "center_loss"
param {
lr_mult: 1
decay_mult: 2
}
center_loss_param {
num_output: 1000 # 类别数
center_filler {
type: "xavier"
}
}
loss_weight: 0.008 # 权重
}
############## softmax loss ###############
# Softmax Loss 需要再接一个全连接层
layer {
name: "fc6"
type: "InnerProduct"
bottom: "fc5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1000 # 类别数
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "softmax_loss"
type: "SoftmaxWithLoss"
bottom: "fc6"
bottom: "label"
top: "softmax_loss"
}
<h2>2. caffe.proto 中的定义</h2>
message LayerParameter {
optional CenterLossParameter center_loss_param = 149;
}
message CenterLossParameter {
optional uint32 num_output = 1; // The number of outputs for the layer 网络层输出,与类别数一致
optional FillerParameter center_filler = 2; // The filler for the centers
// The first axis to be lumped into a single inner product computation;
// all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 3 [default = 1];
}
<h2>3. center_loss_layer.hpp</h2>
#ifndef CAFFE_CENTER_LOSS_LAYER_HPP_
#define CAFFE_CENTER_LOSS_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/loss_layer.hpp"
namespace caffe {
template <typename Dtype>
class CenterLossLayer : public LossLayer<Dtype> {
public:
explicit CenterLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "CenterLoss"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return -1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top); // 前向传播 CPU 实现
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top); // 前向传播 GPU 实现
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); // 反向传播 CPU 实现
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); //反向传播 GPU 实现
int M_;
int K_;
int N_;
Blob<Dtype> distance_;
Blob<Dtype> variation_sum_;
};
} // namespace caffe
#endif // CAFFE_CENTER_LOSS_LAYER_HPP_
<h2>4. center_loss_layer.cpp - CPU 实现</h2>
#include <vector>
#include "caffe/filler.hpp"
#include "caffe/layers/center_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int num_output = this->layer_param_.center_loss_param().num_output();
N_ = num_output;
const int axis = bottom[0]->CanonicalAxisIndex(
this->layer_param_.center_loss_param().axis());
// Dimensions starting from "axis" are "flattened" into a single
// length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
// and axis == 1, N inner products with dimension CHW are performed.
K_ = bottom[0]->count(axis);
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
this->blobs_.resize(1);
// Intialize the weight
vector<int> center_shape(2);
center_shape[0] = N_;
center_shape[1] = K_;
this->blobs_[0].reset(new Blob<Dtype>(center_shape));
// fill the weights
shared_ptr<Filler<Dtype> > center_filler(GetFiller<Dtype>(
this->layer_param_.center_loss_param().center_filler()));
center_filler->Fill(this->blobs_[0].get());
} // 参数初始化
this->param_propagate_down_.resize(this->blobs_.size(), true);
}
template <typename Dtype>
void CenterLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK_EQ(bottom[1]->channels(), 1);
CHECK_EQ(bottom[1]->height(), 1);
CHECK_EQ(bottom[1]->width(), 1);
M_ = bottom[0]->num();
// The top shape will be the bottom shape with the flattened axes dropped,
// and replaced by a single axis with dimension num_output (N_).
LossLayer<Dtype>::Reshape(bottom, top);
distance_.ReshapeLike(*bottom[0]);
variation_sum_.ReshapeLike(*this->blobs_[0]);
}
template <typename Dtype>
void CenterLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* label = bottom[1]->cpu_data();
const Dtype* center = this->blobs_[0]->cpu_data();
Dtype* distance_data = distance_.mutable_cpu_data();
// the i-th distance_data
for (int i = 0; i < M_; i++) {
const int label_value = static_cast<int>(label[i]);
// D(i,:) = X(i,:) - C(y(i),:)
caffe_sub(K_, bottom_data + i K_, center + label_value K_, distance_data + i * K_);
}
Dtype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data());
Dtype loss = dot / M_ / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}
template <typename Dtype>
void CenterLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
// Gradient with respect to centers
if (this->param_propagate_down_[0]) {
const Dtype* label = bottom[1]->cpu_data();
Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff();
Dtype* variation_sum_data = variation_sum_.mutable_cpu_data();
const Dtype* distance_data = distance_.cpu_data();
// sum_{y_i==j}
caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());
for (int n = 0; n < N_; n++) {
int count = 0;
for (int m = 0; m < M_; m++) {
const int label_value = static_cast<int>(label[m]);
if (label_value == n) {
count++;
caffe_sub(K_, variation_sum_data + n K_, distance_data + m K_, variation_sum_data + n * K_);
}
}
caffe_axpy(K_, (Dtype)1./(count + (Dtype)1.), variation_sum_data + n K_, center_diff + n K_);
}
}
// Gradient with respect to bottom data
if (propagate_down[0]) {
caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff());
caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff());
}
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
}
#ifdef CPU_ONLY
STUB_GPU(CenterLossLayer);
#endif
INSTANTIATE_CLASS(CenterLossLayer);
REGISTER_LAYER_CLASS(CenterLoss);
} // namespace caffe
<h2>5. center_Loss_layer.cu - GPU 实现</h2>
#include <vector>
#include "caffe/filler.hpp"
#include "caffe/layers/center_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
global void Compute_distance_data_gpu(int nthreads, const int K, const Dtype* bottom,
const Dtype label, const Dtype center, Dtype* distance) {
CUDA_KERNEL_LOOP(index, nthreads) {
int m = index / K;
int k = index % K;
const int label_value = static_cast<int>(label[m]);
// distance(i) = x(i) - c_{y(i)}
distance[index] = bottom[index] - center[label_value * K + k];
}
}
template <typename Dtype>
global void Compute_center_diff_gpu(int nthreads, const int M, const int K,
const Dtype label, const Dtype distance, Dtype* variation_sum,
Dtype* center_diff) {
CUDA_KERNEL_LOOP(index, nthreads) {
int count = 0;
for (int m = 0; m < M; m++) {
const int label_value = static_cast<int>(label[m]);
if (label_value == index) {
count++;
for (int k = 0; k < K; k++) {
variation_sum[index K + k] -= distance[m K + k];
}
}
}
for (int k = 0; k < K; k++) {
center_diff[index K + k] = variation_sum[index K + k] /(count + (Dtype)1.);
}
}
}
template <typename Dtype>
void CenterLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int nthreads = M_ * K_;
Compute_distance_data_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
CAFFE_CUDA_NUM_THREADS>>>(nthreads, K_, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
this->blobs_[0]->gpu_data(), distance_.mutable_gpu_data());
Dtype dot;
caffe_gpu_dot(M_ * K_, distance_.gpu_data(), distance_.gpu_data(), &dot);
Dtype loss = dot / M_ / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}
template <typename Dtype>
void CenterLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
int nthreads = N_;
caffe_gpu_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());
Compute_center_diff_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
CAFFE_CUDA_NUM_THREADS>>>(nthreads, M_, K_, bottom[1]->gpu_data(), distance_.gpu_data(),
variation_sum_.mutable_cpu_data(), this->blobs_[0]->mutable_gpu_diff());
if (propagate_down[0]) {
caffe_gpu_scale(M_ * K_, top[0]->cpu_diff()[0] / M_,
distance_.gpu_data(), bottom[0]->mutable_gpu_diff());
}
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
}
INSTANTIATE_LAYER_GPU_FUNCS(CenterLossLayer);
} // namespace caffe
<h2>6. 基于 Center Loss 的训练</h2>
- 数据准备
与基于 Softmax Loss 的分类问题的数据格式一致,即:
img1 label1 img2 label2 img3 label3 ...
其中,label 从 0 开始.
根据数据集 labels 的总数设置 CenterLossLayer 的 num_output.</p></li>
<li>网络训练类似于分类问题的训练,进行网络训练即可.</p></li>
</ul><h2>7. 测试</h2>
基于训练的网络模型提取特征,计算相似度即可.