opengaze/src/gaze_predictor.cpp
2019-01-10 13:26:03 +01:00

160 lines
5.2 KiB
C++

#include "gaze_predictor.hpp"
#include <string>
// caffe
#define USE_OPENCV 1;
#include <caffe/caffe.hpp>
#include <caffe/util/io.hpp>
#include <caffe/blob.hpp>
#include <caffe/layers/pose_data_layer.hpp>
#include <caffe/layers/memory_data_layer.hpp>
using namespace cv;
using namespace std;
using namespace caffe;
namespace opengaze {
caffe::Net<float> *p_net_;
GazePredictor::GazePredictor() {
}
GazePredictor::~GazePredictor() {
delete p_net_;
}
void GazePredictor::initiaMPIIGaze(const std::vector<std::string> arguments={}) {
p_net_ = nullptr;
string param_path = arguments[0];
string model_path = arguments[1];
int gpu_id = stoi(arguments[3]);
// Set GPU (or CPU)
/*caffe::Caffe::set_mode(caffe::Caffe::CPU);
cout << "Using CPU model" << endl;*/
caffe::Caffe::set_mode(caffe::Caffe::GPU);
cout << "Using GPU with id " << gpu_id << endl;
Caffe::SetDevice(gpu_id);
cout << "load caffe model parameters from " << param_path << endl;
// create CNN
p_net_ = new Net<float>(param_path, caffe::TEST);
cout << "load caffe model from " << model_path << endl;
// load pre-trained weights (binary proto)
p_net_->CopyTrainedLayersFrom(model_path);
// judge model type base on the paramater file name
size_t i = param_path.rfind("/", param_path.length());
string filename;
if (i != string::npos)
filename = param_path.substr(i+1, param_path.length() - i);
if (!filename.compare(string("lenet_test.prototxt")))
model_type_ = 1;
else if (!filename.compare(string("googlenet.prototxt")))
model_type_ = 2;
else if (!filename.compare(string("alexnet_eye.prototxt")))
model_type_ = 3;
else if (!filename.compare(string("alexnet_face.prototxt")))
model_type_ = 4; // the single face model
else if (!filename.compare(string("alexnet_face_448.prototxt")))
model_type_ = 4; // the single face model
else{
model_type_ = 0;
cout<<"Cannot define the type of model!"<<endl;
exit(EXIT_FAILURE);
}
}
// gaze estimation with single face input image and with MPIIGaze method
Point3f GazePredictor::predictGazeMPIIGaze(cv::Mat input_image) {
vector<Mat> img_vec;
img_vec.push_back(input_image);
Vec2f gaze_norm_2d;
Point3f gaze_norm_3d;
std::vector<int> labelVector;
labelVector.clear();
labelVector.push_back(1);
labelVector.push_back(1);
float loss = 0.0;
caffe::shared_ptr<caffe::MemoryDataLayer<float> > data_layer_;
data_layer_ = boost::static_pointer_cast<MemoryDataLayer<float> >(p_net_->layer_by_name("data"));
data_layer_->AddMatVector(img_vec, labelVector);
// run network
p_net_->ForwardPrefilled(&loss);
if (model_type_==1)
{
// get output layer "ip2"
float *temp = (float*)p_net_->blob_by_name("ip2")->cpu_data();
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
temp = nullptr;
}
else if (model_type_==2)// if it is googlenet
{
float *temp1 = (float*)p_net_->blob_by_name("loss1/classifier")->cpu_data();
float *temp2 = (float*)p_net_->blob_by_name("loss2/classifier")->cpu_data();
float *temp3 = (float*)p_net_->blob_by_name("loss3/classifier")->cpu_data();
// average the output of three output values
gaze_norm_2d.val[0] = (temp1[0]+temp2[0]+temp3[0]) / 3.0f;
gaze_norm_2d.val[1] = (temp1[1]+temp2[1]+temp3[1]) / 3.0f;
temp1 = nullptr;
temp2 = nullptr;
temp3 = nullptr;
}
else if (model_type_==3)// if it is alexnet
{
float *temp;
temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
if (temp == NULL)
temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
if (temp == NULL) {
cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
exit(EXIT_FAILURE);
}
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
temp = NULL;
}
else if (model_type_==4)// if it is alexnet
{
float *temp;
temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
if (temp == NULL)
temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
if (temp == NULL) {
cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
exit(EXIT_FAILURE);
}
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
//// get the feature out
//temp = (float*)p_net_->blob_by_name("fc6_gaze")->cpu_data();
//for (int num_f=0; num_f<4096; ++num_f)
//{
// feature[num_f] = temp[num_f];
//}
temp = NULL;
}
float theta = gaze_norm_2d.val[0];
float phi = gaze_norm_2d.val[1];
gaze_norm_3d.x = (-1.0f)*cos(theta)*sin(phi);
gaze_norm_3d.y = (-1.0f)*sin(theta);
gaze_norm_3d.z = (-1.0f)*cos(theta)*cos(phi);
return gaze_norm_3d;
}
}