initial commit

This commit is contained in:
Xucong Zhang 2019-01-10 13:26:03 +01:00
commit e505acdb29
41 changed files with 2922 additions and 0 deletions

273
src/face_detector.cpp Normal file
View file

@ -0,0 +1,273 @@
#include <iostream>
#include <tbb/tbb.h>
// if we use OpenFace
#if USE_OPENFACE
#include <LandmarkCoreIncludes.h> // from "OpenFace-master/lib/local/LandmarkDetector/include/"
#endif
#include "face_detector.hpp"
using namespace std;
using namespace cv;
vector<LandmarkDetector::FaceModelParameters> det_parameters_;
vector<LandmarkDetector::CLNF> clnf_models_;
namespace opengaze {
FaceDetector::FaceDetector() {
method_type_ = Method::OpenFace;
}
FaceDetector::~FaceDetector() {}
void FaceDetector::initialize(int number_users=5) {
string root_path = OPENFACE_DIR;
root_path = root_path + "/build/bin";
//string openface_root = OpenFace_ROOT_DIR;
// (currently) hard-coded setting
num_faces_max_ = number_users;
detection_resize_rate_ = 2.0; // resize the input image to detect face, crucial for speed
detection_skip_frames_ = 1;
nonoverlap_threshold_ = 0.5;
certainty_threshold_ = 0.0; // the smaller the better, 1 is the best, -1 is the worst
landmark_indices_[0] = 36; landmark_indices_[1] = 39; landmark_indices_[2] = 42;
landmark_indices_[3] = 45; landmark_indices_[4] = 48; landmark_indices_[5] = 54;
tracking_loss_limit_ = 10;
// initialize the tracking models
LandmarkDetector::FaceModelParameters det_parameter;
det_parameter.reinit_video_every = -1; // This is so that the model would not try re-initialising itself
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR;
det_parameter.model_location = root_path + "/model/main_clm_wild.txt";
det_parameter.haar_face_detector_location = root_path + "/classifiers/haarcascade_frontalface_alt.xml";// this line will be disable due to "curr_face_detector"
det_parameter.mtcnn_face_detector_location = root_path + "/model/mtcnn_detector/MTCNN_detector.txt";
det_parameter.use_face_template = true;
det_parameter.reinit_video_every = 5;
// det_parameter.quiet_mode = true; not avaliable fro OpenFace v2.1
// // For in the wild fitting these parameters are suitable
det_parameter.window_sizes_init = vector<int>(4);
det_parameter.window_sizes_init[0] = 15;
det_parameter.window_sizes_init[1] = 13;
det_parameter.window_sizes_init[2] = 11;
det_parameter.window_sizes_init[3] = 9;
det_parameter.sigma = 1.25;
det_parameter.reg_factor = 35;
det_parameter.weight_factor = 2.5;
det_parameter.num_optimisation_iteration = 10;
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
det_parameters_.push_back(det_parameter);
LandmarkDetector::CLNF clnf_model_ = LandmarkDetector::CLNF(det_parameter.model_location);
if (!clnf_model_.loaded_successfully){
cout << "ERROR: Could not load the landmark detector" << endl;
exit(-1);
}
clnf_model_.face_detector_HAAR.load(det_parameter.haar_face_detector_location);
clnf_model_.haar_face_detector_location = det_parameter.haar_face_detector_location;
clnf_model_.face_detector_MTCNN.Read(det_parameter.mtcnn_face_detector_location);
clnf_model_.mtcnn_face_detector_location = det_parameter.mtcnn_face_detector_location;
// If can't find MTCNN face detector, default to HOG one
if (det_parameter.curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && clnf_model_.face_detector_MTCNN.empty()){
cout << "INFO: defaulting to HOG-SVM face detector" << endl;
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
}
clnf_models_.reserve(num_faces_max_);
clnf_models_.push_back(clnf_model_);
active_models_.push_back(false);
for(int i=1; i<num_faces_max_; ++i)
{
clnf_models_.push_back(clnf_model_);
active_models_.push_back(false);
det_parameters_.push_back(det_parameter);
}
// variables
frame_counter_ = 0;
current_face_id_ = 1;
for(int i=0; i<num_faces_max_; ++i) face_ids_.push_back(0);
}
void FaceDetector::reset() {
// reset all status
frame_counter_ = 0;
current_face_id_ = 1;
for(unsigned int model = 0; model < clnf_models_.size(); ++model)
{
active_models_[model] = false;
face_ids_[model] = 0;
clnf_models_[model].Reset();
}
}
void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models, vector<cv::Rect_<float> >& face_detections){
// Go over the model and eliminate detections that are not informative (there already is a tracker there)
for (size_t model = 0; model < clnf_models.size(); ++model){
// See if the detections intersect
cv::Rect_<float> model_rect = clnf_models[model].GetBoundingBox();
for (int detection=face_detections.size()-1; detection >= 0; --detection)
{
double intersection_area = (model_rect & face_detections[detection]).area();
double union_area = model_rect.area() + face_detections[detection].area() - 2 * intersection_area;
// If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap
if (intersection_area / union_area > 0.5)
{
face_detections.erase(face_detections.begin() + detection);
}
}
}
}
double NonOverlapingDetection(const LandmarkDetector::CLNF &ref_model, const LandmarkDetector::CLNF &tgt_model)
{
Rect_<double> ref_rect = ref_model.GetBoundingBox();
Rect_<double> tgt_rect = tgt_model.GetBoundingBox();
double intersection_area = (ref_rect & tgt_rect).area();
double union_area = ref_rect.area() + tgt_rect.area() - 2 * intersection_area;
return intersection_area/union_area;
}
void FaceDetector::track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output) {
if(input_img.channels() < 3){
cout << "The input must be a color image!" <<endl;
exit(EXIT_FAILURE);
}
Mat_<uchar> grayscale_image;
cvtColor(input_img, grayscale_image, CV_BGR2GRAY);
bool all_models_active = true;
for(unsigned int model = 0; model < clnf_models_.size(); ++model)
{
if(!active_models_[model])
{
all_models_active = false;
break;
}
}
// Detect faces
// Get the detections (every Xth frame and when there are free models available for tracking)
std::vector<Rect_<float> > face_detections;
cv::Mat small_grayscale_image_;
if (frame_counter_ % detection_skip_frames_ == 0 && !all_models_active) {
// resized image for faster face detection
if (detection_resize_rate_ != 1) resize(grayscale_image, small_grayscale_image_,
Size(), 1.0/detection_resize_rate_, 1.0/detection_resize_rate_);
else small_grayscale_image_ = grayscale_image;
if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR){
vector<float> confidences;
LandmarkDetector::DetectFacesHOG(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HOG, confidences);
}
else if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR){
LandmarkDetector::DetectFaces(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HAAR);
}
else{
vector<float> confidences;
LandmarkDetector::DetectFacesMTCNN(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_MTCNN, confidences);
}
// resize the face deteciton back
if (detection_resize_rate_ != 1) {
for(auto& face_detection : face_detections) {
face_detection.x *= detection_resize_rate_;
face_detection.y *= detection_resize_rate_;
face_detection.width *= detection_resize_rate_;
face_detection.height *= detection_resize_rate_;
}
}
// Keep only non overlapping detections (also convert to a concurrent vector
NonOverlapingDetections(clnf_models_, face_detections);
}
vector< tbb::atomic<bool> > face_detections_used(face_detections.size());
// Go through every model and update the tracking
tbb::parallel_for(0, (int)clnf_models_.size(), [&](int model) {
//for (unsigned int model = 0; model < clnf_models_.size(); ++model) {
bool detection_success = false;
// If the current model has failed more than threshold, remove it
if (clnf_models_[model].failures_in_a_row > tracking_loss_limit_) {
active_models_[model] = false;
clnf_models_[model].Reset();
}
// If the model is inactive reactivate it with new detections
if (!active_models_[model]){
for (size_t detection_ind = 0; detection_ind < face_detections.size(); ++detection_ind)
{
// if it was not taken by another tracker take it (if it is false swap it to true and enter detection, this makes it parallel safe)
if (!face_detections_used[detection_ind].compare_and_swap(true, false)) {
// Reinitialise the model
clnf_models_[model].Reset();
// This ensures that a wider window is used for the initial landmark localisation
clnf_models_[model].detection_success = false;
LandmarkDetector::DetectLandmarksInVideo(input_img, face_detections[detection_ind], clnf_models_[model], det_parameters_[model], grayscale_image);
// This activates the model
active_models_[model] = true;
face_ids_[model] = current_face_id_;
current_face_id_++;
// break out of the loop as the tracker has been reinitialised
break;
}
}
}
else
{
// The actual facial landmark detection / tracking
detection_success = LandmarkDetector::DetectLandmarksInVideo(input_img, clnf_models_[model], det_parameters_[model], grayscale_image);
}
//}
});
// Go through every model and check the results
for(size_t model=0; model<clnf_models_.size(); ++model){
// Check if the alignment result is overlapping previous models
bool overlapping = false;
for(size_t model_ref=0; model_ref<model; ++model_ref){
double overlap_ratio = NonOverlapingDetection(clnf_models_[model_ref], clnf_models_[model]);
if(overlap_ratio > nonoverlap_threshold_) overlapping = true;
}
if(overlapping){
active_models_[model] = false;
face_ids_[model] = 0;
clnf_models_[model].Reset();
continue;
}
if(clnf_models_[model].detection_certainty < certainty_threshold_) continue;
Sample temp;
temp.face_data.certainty = clnf_models_[model].detection_certainty;
temp.face_data.face_id = face_ids_[model];
temp.face_data.face_bb.x = (int)clnf_models_[model].GetBoundingBox().x;
temp.face_data.face_bb.y = (int)clnf_models_[model].GetBoundingBox().y;
temp.face_data.face_bb.height = (int)clnf_models_[model].GetBoundingBox().height;
temp.face_data.face_bb.width = (int)clnf_models_[model].GetBoundingBox().width;
for(int p=0; p<6; p++){
int num_p = landmark_indices_[p];
temp.face_data.landmarks[p] = Point2d(
clnf_models_[model].detected_landmarks.at<float>(num_p,0),
clnf_models_[model].detected_landmarks.at<float>(num_p+68,0)
);
}
output.emplace_back(temp);
}
}
}

112
src/gaze_estimator.cpp Normal file
View file

@ -0,0 +1,112 @@
#include <iostream>
#include "gaze_estimator.hpp"
using namespace std;
using namespace cv;
namespace opengaze{
GazeEstimator::GazeEstimator() {
}
GazeEstimator::~GazeEstimator() {}
void GazeEstimator::setRootPath(std::string root_path) {
normalizer_.loadFaceModel(root_path);
}
void GazeEstimator::estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
for (int i=0; i< outputs.size(); ++i) {
// estimate head pose first, no matter what gaze estimation method, head pose is estimated here
normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
if (method_type_ == Method::MPIIGaze){
// if we use face model
if (input_type_ == InputType::face){
Mat face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
//outputs[i].face_patch_data.debug_img = face_patch;
Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(face_patch); // gaze estimates in normalization space
Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
gaze_3d.copyTo(outputs[i].gaze_data.gaze3d);
}
else if (input_type_ == InputType::eye) {
vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
// for left eye
Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(eye_iamges[0]);
Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].eye_data.leye_rot);
gaze_3d.copyTo(outputs[i].gaze_data.lgaze3d);
// for right eye
Mat flip_right;
flip(eye_iamges[0], flip_right, 1);
gaze_norm = gaze_predictor_.predictGazeMPIIGaze(flip_right); // for left right image input
gaze_norm.x *= -1.0;
gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
gaze_3d.copyTo(outputs[i].gaze_data.rgaze3d);
}
}
else if (method_type_ == Method::OpenFace) {
cout << "Please use gaze estimation method MPIIGaze." << endl;
exit(EXIT_FAILURE);
}
}
}
void GazeEstimator::getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
for (int i=0; i< outputs.size(); ++i) {
// estimate head pose first, no matter what gaze estimation method, head pose is estimated here
normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
if (method_type_ == Method::MPIIGaze){
// if we use face model
if (input_type_ == InputType::face){
outputs[i].face_patch_data.face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
}
else if (input_type_ == InputType::eye) {
vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
outputs[i].eye_data.leye_img = eye_iamges[0];
outputs[i].eye_data.reye_img = eye_iamges[1];
}
}
else if (method_type_ == Method::OpenFace) {
cout << "Please use method MPIIGaze for image patch extraction." << endl;
exit(EXIT_FAILURE);
}
}
}
void GazeEstimator::setMethod(Method input_method_type, const std::vector<std::string> arguments={}) {
method_type_ = input_method_type;
if (method_type_ == Method::MPIIGaze) {
gaze_predictor_.initiaMPIIGaze(arguments);
if (arguments.size() < 2)
input_type_ = InputType::face;
else {
if (arguments[2] == "face"){
input_type_ = InputType::face;
normalizer_.setParameters(1600, 1000, 224, 224);
}
else if (arguments[2] == "eye") {
input_type_ = InputType::eye;
normalizer_.setParameters(960, 600, 60, 36);
}
}
}
}
void GazeEstimator::setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist) {
camera_matrix_ = move(camera_matrix);
camera_dist_ = move(camera_dist_);
normalizer_.setCameraMatrix(camera_matrix_);
}
void GazeEstimator::initialFaceDetector(int number_users){
face_detector_.initialize(number_users);
face_detector_.setMethodType(FaceDetector::Method::OpenFace);
}
};

160
src/gaze_predictor.cpp Normal file
View file

@ -0,0 +1,160 @@
#include "gaze_predictor.hpp"
#include <string>
// caffe
#define USE_OPENCV 1;
#include <caffe/caffe.hpp>
#include <caffe/util/io.hpp>
#include <caffe/blob.hpp>
#include <caffe/layers/pose_data_layer.hpp>
#include <caffe/layers/memory_data_layer.hpp>
using namespace cv;
using namespace std;
using namespace caffe;
namespace opengaze {
caffe::Net<float> *p_net_;
GazePredictor::GazePredictor() {
}
GazePredictor::~GazePredictor() {
delete p_net_;
}
void GazePredictor::initiaMPIIGaze(const std::vector<std::string> arguments={}) {
p_net_ = nullptr;
string param_path = arguments[0];
string model_path = arguments[1];
int gpu_id = stoi(arguments[3]);
// Set GPU (or CPU)
/*caffe::Caffe::set_mode(caffe::Caffe::CPU);
cout << "Using CPU model" << endl;*/
caffe::Caffe::set_mode(caffe::Caffe::GPU);
cout << "Using GPU with id " << gpu_id << endl;
Caffe::SetDevice(gpu_id);
cout << "load caffe model parameters from " << param_path << endl;
// create CNN
p_net_ = new Net<float>(param_path, caffe::TEST);
cout << "load caffe model from " << model_path << endl;
// load pre-trained weights (binary proto)
p_net_->CopyTrainedLayersFrom(model_path);
// judge model type base on the paramater file name
size_t i = param_path.rfind("/", param_path.length());
string filename;
if (i != string::npos)
filename = param_path.substr(i+1, param_path.length() - i);
if (!filename.compare(string("lenet_test.prototxt")))
model_type_ = 1;
else if (!filename.compare(string("googlenet.prototxt")))
model_type_ = 2;
else if (!filename.compare(string("alexnet_eye.prototxt")))
model_type_ = 3;
else if (!filename.compare(string("alexnet_face.prototxt")))
model_type_ = 4; // the single face model
else if (!filename.compare(string("alexnet_face_448.prototxt")))
model_type_ = 4; // the single face model
else{
model_type_ = 0;
cout<<"Cannot define the type of model!"<<endl;
exit(EXIT_FAILURE);
}
}
// gaze estimation with single face input image and with MPIIGaze method
Point3f GazePredictor::predictGazeMPIIGaze(cv::Mat input_image) {
vector<Mat> img_vec;
img_vec.push_back(input_image);
Vec2f gaze_norm_2d;
Point3f gaze_norm_3d;
std::vector<int> labelVector;
labelVector.clear();
labelVector.push_back(1);
labelVector.push_back(1);
float loss = 0.0;
caffe::shared_ptr<caffe::MemoryDataLayer<float> > data_layer_;
data_layer_ = boost::static_pointer_cast<MemoryDataLayer<float> >(p_net_->layer_by_name("data"));
data_layer_->AddMatVector(img_vec, labelVector);
// run network
p_net_->ForwardPrefilled(&loss);
if (model_type_==1)
{
// get output layer "ip2"
float *temp = (float*)p_net_->blob_by_name("ip2")->cpu_data();
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
temp = nullptr;
}
else if (model_type_==2)// if it is googlenet
{
float *temp1 = (float*)p_net_->blob_by_name("loss1/classifier")->cpu_data();
float *temp2 = (float*)p_net_->blob_by_name("loss2/classifier")->cpu_data();
float *temp3 = (float*)p_net_->blob_by_name("loss3/classifier")->cpu_data();
// average the output of three output values
gaze_norm_2d.val[0] = (temp1[0]+temp2[0]+temp3[0]) / 3.0f;
gaze_norm_2d.val[1] = (temp1[1]+temp2[1]+temp3[1]) / 3.0f;
temp1 = nullptr;
temp2 = nullptr;
temp3 = nullptr;
}
else if (model_type_==3)// if it is alexnet
{
float *temp;
temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
if (temp == NULL)
temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
if (temp == NULL) {
cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
exit(EXIT_FAILURE);
}
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
temp = NULL;
}
else if (model_type_==4)// if it is alexnet
{
float *temp;
temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
if (temp == NULL)
temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
if (temp == NULL) {
cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
exit(EXIT_FAILURE);
}
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
//// get the feature out
//temp = (float*)p_net_->blob_by_name("fc6_gaze")->cpu_data();
//for (int num_f=0; num_f<4096; ++num_f)
//{
// feature[num_f] = temp[num_f];
//}
temp = NULL;
}
float theta = gaze_norm_2d.val[0];
float phi = gaze_norm_2d.val[1];
gaze_norm_3d.x = (-1.0f)*cos(theta)*sin(phi);
gaze_norm_3d.y = (-1.0f)*sin(theta);
gaze_norm_3d.z = (-1.0f)*cos(theta)*cos(phi);
return gaze_norm_3d;
}
}

185
src/input_handler.cpp Normal file
View file

@ -0,0 +1,185 @@
#include "input_handler.hpp"
#include <iostream>
#include <cstdlib>
using namespace cv;
using namespace std;
namespace opengaze {
#if WIN32
#include <windows.h>
#else
#include <X11/Xlib.h>
#endif
void InputHandler::getScreenResolution(int &width, int &height) {
#if WIN32
width = (int) GetSystemMetrics(SM_CXSCREEN);
height = (int) GetSystemMetrics(SM_CYSCREEN);
#else
Display* disp = XOpenDisplay(NULL);
Screen* scrn = DefaultScreenOfDisplay(disp);
width = scrn->width;
height = scrn->height;
#endif
}
InputHandler::InputHandler(){
input_type_ = InputType::Camera;// defualt input type
camera_id_ = 0;
getScreenResolution(screen_width_, screen_height_);
screen_width_ = screen_width_;
}
InputHandler::~InputHandler(){}
void InputHandler::initialize()
{
if (input_type_ == InputType::Camera){
cap_.open(camera_id_);
if(!cap_.isOpened()) { // open Camera
cout << "Could not open Camera with id " << camera_id_ << endl;
std::exit(EXIT_FAILURE);
}
setFrameSize(1280, 720); // 800*600, 1280*720, 1920*1080,
}
else if (input_type_ == InputType::Video){
cap_.open(input_file_video_name_);
if(!cap_.isOpened()) { // open Camera
cout << "Error: Could not open video file " << input_file_video_name_ << endl;
std::exit(EXIT_FAILURE);
}
}
else if (input_type_ == InputType::Directory) {
if (!boost::filesystem::is_directory(input_path_)){
cout << "Error: The input must be a directory, but it is " << input_path_ << endl;
std::exit(EXIT_FAILURE);
}
current_itr_ = boost::filesystem::directory_iterator(input_path_);
}
else if (input_type_ == InputType::Memory) {}
is_reach_end_ = false;
}
void InputHandler::setFrameSize(int frame_width, int frame_height){
cap_.set(cv::CAP_PROP_FRAME_HEIGHT, frame_height);//720 1080
cap_.set(cv::CAP_PROP_FRAME_WIDTH, frame_width);//1280 1980
double dWidth = cap_.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap_.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Input frame size is : " << dWidth << " x " << dHeight << endl;
}
Mat InputHandler::getNextSample() {
Mat frame;
if (input_type_ == InputType::Camera) cap_ >> frame;
else if (input_type_ == InputType::Video) {
cap_ >> frame;
if (frame.empty()) // we reach the end of video
is_reach_end_ = true;
}
else if (input_type_ == InputType::Directory) {
boost::filesystem::path file_path = current_itr_->path();
if (file_path.extension() != ".jpg" && file_path.extension() != ".png" && file_path.extension() != ".bmp"){
cout << "Error: The input file is not image file with extension of jpg, png or bmp!" << endl;
cout << "The input file name is: " << file_path.string() << endl;
std::exit(EXIT_FAILURE);
}
cout << "process image " << file_path << endl;
frame = imread(file_path.string());
if (current_itr_ == boost::filesystem::directory_iterator())
is_reach_end_ = true;
}
else if (input_type_ == InputType::Memory) {}
return frame;
}
bool InputHandler::closeInput() {
if (input_type_ == InputType::Camera || input_type_ == InputType::Video){
cap_.release();
is_reach_end_ = true;
}
return true;
}
void InputHandler::setInput(std::string input_path) {
if (input_type_ == InputType::Directory){
input_path_ = move(input_path);
}
else if (input_type_ == InputType::Video){
input_file_video_name_ = move(input_path);
}
}
void InputHandler::readScreenConfiguration(string calib_file) {
FileStorage fs_disp(calib_file, FileStorage::READ);
fs_disp["monitor_W"] >> monitor_W_;
fs_disp["monitor_H"] >> monitor_H_;
fs_disp["monitor_R"] >> monitor_R_;
fs_disp["monitor_T"] >> monitor_T_;
// compute monitor plane
Vec3f corners[4];
corners[0] = Vec3f(0.0, 0.0, 0.0);
corners[1] = Vec3f(monitor_W_, 0.0, 0.0);
corners[2] = Vec3f(0.0, monitor_H_, 0.0);
corners[3] = Vec3f(monitor_W_, monitor_H_, 0.0);
for(int i=0; i<4; i++){
Mat corners_cam = monitor_R_ * Mat(corners[i]) + monitor_T_;
corners_cam.copyTo(monitor_corners_[i]);
}
Vec3f normal = Vec3f(0.0, 0.0, 1.0); // normal direction
monitor_normal_ = monitor_R_ * Mat(normal);
monitor_normal_.convertTo(monitor_normal_, CV_32F);
}
void InputHandler::readCameraConfiguration(string calib_file){
cout << endl << "Reading calibration information from : " << calib_file << endl;
FileStorage fs;
fs.open(calib_file, FileStorage::READ);
fs["camera_matrix"] >> camera_matrix_;
fs["dist_coeffs"] >> camera_distortion_;
fs.release();
}
void InputHandler::projectToDisplay(std::vector<opengaze::Sample> &inputs, bool is_face_model) {
for(auto & sample : inputs) {
if (is_face_model) {
Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
sample.gaze_data.gaze2d = mapToDisplay(face_center, sample.gaze_data.gaze3d);
}
else {
Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
sample.gaze_data.lgaze2d = mapToDisplay(leye_pose, sample.gaze_data.lgaze3d);
sample.gaze_data.rgaze2d = mapToDisplay(reye_pose, sample.gaze_data.rgaze3d);
float gaze_x = (sample.gaze_data.lgaze2d.x + sample.gaze_data.rgaze2d.x) / 2.0f;
float gaze_y = (sample.gaze_data.lgaze2d.y + sample.gaze_data.rgaze2d.y) / 2.0f;
sample.gaze_data.gaze2d.x = gaze_x;
sample.gaze_data.gaze2d.y = gaze_y;
}
}
}
cv::Point2f InputHandler::mapToDisplay(Vec3f origin, Vec3f gaze_vec) {
Point2f gaze_on_screen;
// compute intersection
float gaze_len = (float)(monitor_normal_.dot(Mat(monitor_corners_[0]-origin))/monitor_normal_.dot(Mat(gaze_vec)));
Vec3f gaze_pos_cam = origin + gaze_len * gaze_vec;
// convert to monitor coodinate system
Mat gaze_pos_ = monitor_R_.inv() * (Mat(gaze_pos_cam) - monitor_T_);
Vec3f gaze_pos_3d;
gaze_pos_.copyTo(gaze_pos_3d);
gaze_on_screen.x = gaze_pos_3d.val[0] / monitor_W_;
gaze_on_screen.y = gaze_pos_3d.val[1] / monitor_H_;
return gaze_on_screen;
}
}

184
src/normalizer.cpp Normal file
View file

@ -0,0 +1,184 @@
#include "normalizer.hpp"
using namespace cv;
using namespace std;
namespace opengaze {
Normalizer::Normalizer() {
// parameters for data normalization
focal_norm_ = 1600;
distance_norm_ = 1000; // 600 500 1000
roiSize_norm_ = cv::Size(224, 224); // 224 448
cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
}
Normalizer::~Normalizer() {}
void Normalizer::setParameters(int focal_length, int distance, int img_w, int img_h){
// parameters for data normalization
focal_norm_ = focal_length;
distance_norm_ = distance; // 600 500 1000
roiSize_norm_ = cv::Size(img_w, img_h); // 224 448
cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
}
// convert vector from normalization space to camera coordinate system
cv::Mat Normalizer::cvtToCamera(cv::Point3f input, const Mat cnv_mat) {
// convert to the original camera coordinate system
Vec3f gaze_v(input.x, input.y, input.z);
// apply de-normalization
Mat gaze_v_cam = cnv_mat.inv() * Mat(gaze_v);
gaze_v_cam = gaze_v_cam / norm(gaze_v_cam);
return gaze_v_cam;
}
cv::Mat Normalizer::normalizeFace(Mat input_image, opengaze::Sample &sample) {
// get the face center in 3D space
Mat HR;
cv::Rodrigues(sample.face_patch_data.head_r, HR);
Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
Mat Fc;
add(HR*face_model_mat_, HT, Fc);
float distance = (float)norm(sample.face_patch_data.face_center); // original distance
float z_scale = distance_norm_ / distance; // scaling factor
cv::Mat scaleMat;
scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
scaleMat.convertTo(scaleMat, CV_32F);
// get the look_at matrix
Mat hRx = HR.col(0);
Mat forward = sample.face_patch_data.face_center /distance;
Mat down = forward.cross(hRx);
down = down / norm(down);
Mat right = down.cross(forward);
right = right / norm(right);
// rotation matrix
sample.face_patch_data.face_rot = Mat(3, 3, CV_32F);
right.copyTo(sample.face_patch_data.face_rot.col(0));
down.copyTo(sample.face_patch_data.face_rot.col(1));
forward.copyTo(sample.face_patch_data.face_rot.col(2));
sample.face_patch_data.face_rot = sample.face_patch_data.face_rot.t(); // there is no scaling
sample.face_patch_data.face_rot.convertTo(sample.face_patch_data.face_rot, CV_32F);
Mat warpMat = cam_norm_ * (scaleMat * sample.face_patch_data.face_rot) * camera_matrix_.inv();// transformation matrix
// crop image and copy the equalized image
Mat face_patch;
warpPerspective(input_image, face_patch, warpMat, roiSize_norm_);
return face_patch;
}
vector<cv::Mat> Normalizer::normalizeEyes(cv::Mat input_image, Sample &sample){
vector<cv::Mat> eye_images;
Mat img_gray;
cvtColor(input_image, img_gray, CV_BGR2GRAY);
Mat eye_center;
Mat* eye_rot;
for (int i=0; i<2; ++i) {
if (i==0){
eye_center = sample.eye_data.leye_pos;
eye_rot = &sample.eye_data.leye_rot;
}
else {
eye_center = sample.eye_data.reye_pos;
eye_rot = &sample.eye_data.reye_rot;
}
float distance = (float)norm(eye_center);
float z_scale = distance_norm_ / distance;
Mat scaleMat;
scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
scaleMat.convertTo(scaleMat, CV_32F);
// get the look_at matrix
Mat HR;
cv::Rodrigues(sample.face_patch_data.head_r, HR);
Mat hRx = HR.col(0);
Mat forward = eye_center/distance;
Mat down = forward.cross(hRx);
down = down / norm(down);
Mat right = down.cross(forward);
right = right / norm(right);
// rotation matrix
*eye_rot = Mat(3, 3, CV_32F);
right.copyTo(eye_rot->col(0));
down.copyTo(eye_rot->col(1));
forward.copyTo(eye_rot->col(2));
*eye_rot = eye_rot->t(); // there is no scaling
Mat warpMat = cam_norm_ * (scaleMat * *eye_rot) * camera_matrix_.inv();// transformation matrix
// crop image and copy the equalized image
Mat eye_patch, eye_patch_equal;
warpPerspective(img_gray, eye_patch, warpMat, roiSize_norm_);
equalizeHist(eye_patch, eye_patch_equal);
eye_images.push_back(eye_patch_equal);
}
eye_rot = nullptr;
return eye_images;
}
void Normalizer::loadFaceModel(std::string path) {
string face_model_file_path = path + "/content/model/face_model.yml";
//
cout << endl << "Loading 3D face model for head pose estimation from : " << face_model_file_path << endl;
FileStorage fs;
if (!fs.open(face_model_file_path, FileStorage::READ)) {
cout << "Cannot load the 3D face model!" << endl;
exit(EXIT_FAILURE);
}
fs["face_model"] >> face_model_mat_;
for(int p=0; p<6; ++p)
face_model_.emplace_back(Point3d(face_model_mat_.at<float>(0,p),
face_model_mat_.at<float>(1,p),
face_model_mat_.at<float>(2,p)));
fs.release();
}
// estimate head pose via model fitting
void Normalizer::estimateHeadPose(const Point2f *landmarks, opengaze::Sample &sample) {
Mat zero_dist = Mat::zeros(1, 5, CV_64F);
vector<Point2d> landmarks_orig(landmarks,
landmarks + 6);
cv::Mat head_r, head_t;
camera_matrix_.convertTo(camera_matrix_, CV_64F); // input must be double type
solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, false, SOLVEPNP_DLS);
solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, true);
head_r.convertTo(sample.face_patch_data.head_r, CV_32F);
head_t.convertTo(sample.face_patch_data.head_t, CV_32F);
camera_matrix_.convertTo(camera_matrix_, CV_32F);
// get the face center in 3D space
Mat HR;
cv::Rodrigues(sample.face_patch_data.head_r, HR);
Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
Mat Fc;
add(HR*face_model_mat_, HT, Fc);
Mat face_center = (Fc.col(0) + Fc.col(1) + Fc.col(2) + Fc.col(3) + Fc.col(4) + Fc.col(5)) / 6.0; // face center
face_center.copyTo(sample.face_patch_data.face_center); // copy to output
sample.face_patch_data.face_center.convertTo(sample.face_patch_data.face_center, CV_32F);
Mat le = 0.5*(Fc.col(2) + Fc.col(3)); // left eye
le.copyTo(sample.eye_data.leye_pos);
sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
Mat re = 0.5*(Fc.col(0) + Fc.col(1)); // right eye
re.copyTo(sample.eye_data.reye_pos);
sample.eye_data.reye_pos.convertTo(sample.eye_data.reye_pos, CV_32F);
}
void Normalizer::setCameraMatrix(cv::Mat input) {
camera_matrix_ = input;
camera_matrix_.convertTo(camera_matrix_, CV_32F);
}
}

502
src/opengaze.cpp Normal file
View file

@ -0,0 +1,502 @@
#include "opengaze.hpp"
#include <iostream>
#include <time.h>
using namespace std;
using namespace cv;
namespace opengaze {
double clockToMilliseconds(clock_t ticks){
// units/(units/time) => time (seconds) * 1000 = milliseconds
return (ticks/(double)CLOCKS_PER_SEC)*1000.0;
}
OpenGaze::OpenGaze(int argc, char** argv){
namespace fs = boost::filesystem;
namespace po = boost::program_options;
// default value of parameters
camera_id_ = 0;
input_type_ = InputHandler::InputType::Camera;
is_face_model_ = true;
string gaze_method;
string gpu_id;
string temp;
int number_user;
fs::path calib_camera, calib_screen, cnn_param_path, cnn_model_path;
// parse command line options for input/output paths
po::options_description command_line("Command line options");
command_line.add_options()
("root_dir,r", po::value<string>(), "configuration file")
("input_type,t", po::value<string>(), "input type (camera, video file, directory)")
("gaze_method,g", po::value<string>(), "gaze estimation method, could be MPIIGaze or OpenFace")
("input,i", po::value<string>(), "parameter for input")
("output,o", po::value<string>(), "output directory")
("calib_camera", po::value<string>(), "camera calibration file")
("calib_screen", po::value<string>(), "camera-screen calibration file")
("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
("debug,d", "show debug output")
("face_model,f", "to use face model or not")
("save_video,s", "save output visualization or not")
("number_user,n", "the maximum number of users in the input image")
;
cout << "Parsing command line options..." << endl;
po::variables_map vm_command;
po::store(po::parse_command_line(argc, argv, command_line), vm_command);
po::notify(vm_command);
// parse config file for data paths
po::options_description config_file("Config file options");
config_file.add_options()
("root_dir,r", po::value<string>(), "configuration file")
("input_type, t", po::value<string>(), "input type (camera, video file, directory)")
("input, i", po::value<string>(), "parameter for input")
("output,o", po::value<string>(), "output directory")
("cnn_param_path", po::value<string>(), "Caffe prototxt path")
("cnn_model_path", po::value<string>(), "Caffe model path")
("calib_camera", po::value<string>(), "camera calibration file")
("calib_screen", po::value<string>(), "camera-screen calibration file")
("gaze_method", po::value<string>(), "gaze estimation method, could be cnn or openface")
("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
("face_model", po::value<bool>(), "face model or not")
("save_video", po::value<bool>(), "save output visualization or not")
("number_user", po::value<string>(), "the maximum number of users in the input image")
;
fs::path root_dir, config_path;
if(vm_command.count("root_dir")) root_dir = vm_command["root_dir"].as<string>();
else {
root_dir = OPENGAZE_CON_DIR;
cout << "No root directory is found, default value " << root_dir << " will be use" << endl;
}
config_path = root_dir / "default.cfg";
cout << "Reading config from \"" << config_path.string() << "\""<< endl;
if(!fs::exists(config_path)){
cout << "Config file does not exist" << endl;
exit(EXIT_FAILURE);
}
ifstream settings_file(config_path.string());
po::variables_map vm_config;
po::store(po::parse_config_file(settings_file , config_file), vm_config);
po::notify(vm_config);
if(vm_command.count("gpu_id")) gpu_id = vm_command["gpu_id"].as<string>();
else if (vm_config.count("gpu_id")) gpu_id = vm_config["gpu_id"].as<string>();
else gpu_id = "0";
// CNN paramters
if(vm_command.count("cnn_param_path")) cnn_param_path = vm_command["cnn_param_path"].as<string>();
else if (vm_config.count("cnn_param_path")) cnn_param_path = vm_config["cnn_param_path"].as<string>();
else cnn_param_path = root_dir / "content/caffeModel/alexnet_face.prototxt";
if(vm_command.count("cnn_model_path")) cnn_model_path = vm_command["cnn_model_path"].as<string>();
else if (vm_config.count("cnn_model_path")) cnn_model_path = vm_config["cnn_model_path"].as<string>();
else cnn_model_path = root_dir / "content/caffeModel/alexnet_face.caffemodel";
// check input requirements
if(vm_command.count("gaze_method")) gaze_method = vm_command["gaze_method"].as<string>();
else if (vm_config.count("gaze_method")) gaze_method = vm_config["gaze_method"].as<string>();
else gaze_method = "MPIIGaze";
if(vm_command.count("calib_screen")) calib_screen = vm_command["calib_screen"].as<string>();
else if (vm_config.count("calib_screen")) calib_screen = vm_config["calib_screen"].as<string>();
else calib_screen = root_dir / "content/calib/monitor_laptop.yml";
if(vm_command.count("calib_camera")) calib_camera = vm_command["calib_camera"].as<string>();
else if (vm_config.count("calib_camera")) calib_camera = vm_config["calib_camera"].as<string>();
else calib_camera = root_dir / "content/calib/calibration.yml";
// read calibration file
if(!fs::exists(calib_camera)){
cout << "Camera calibration file does not exist: " << calib_camera <<endl;
exit(EXIT_FAILURE);
}
else input_handler_.readCameraConfiguration(calib_camera.string());
if(!fs::exists(calib_screen)){
cout << "Camera-screen calibration file does not exist: " << calib_screen << endl;
exit(EXIT_FAILURE);
}
else input_handler_.readScreenConfiguration(calib_screen.string());
if(vm_command.count("input_type")) temp = vm_command["input_type"].as<string>();
else if (vm_config.count("input_type")) temp = vm_config["input_type"].as<string>();
else temp = "";
if (temp == "camera") {input_type_ = InputHandler::InputType::Camera;}
else if (temp == "video") {input_type_ = InputHandler::InputType::Video;}
else if (temp == "directory") {input_type_ = InputHandler::InputType::Directory;}
else cout<<"No input type specified, default value (camera) will be use" << endl;
if (vm_command.count("input")) temp = vm_command["input"].as<string>();
else if (vm_config.count("input")) temp = vm_config["input"].as<string>();
else temp = "0";
if (input_type_ == InputHandler::InputType::Camera) camera_id_ = stoi(temp);
else if (input_type_ == InputHandler::InputType::Video || input_type_ == InputHandler::InputType::Directory) input_dir_ = temp;
else cout<<"No input parameter specified, default value will be use" << endl;
if(vm_command.count("face_model")) is_face_model_ = true;
else if(vm_config.count("face_model")) is_face_model_ = vm_config["face_model"].as<bool>();
else is_face_model_ = true;
if(vm_command.count("save_video")) is_save_video_ = true;
else if(vm_config.count("save_video")) is_save_video_ = vm_config["save_video"].as<bool>();
else is_save_video_ = false;
if(vm_command.count("debug")) show_debug_ = true;
else if(vm_config.count("debug")) show_debug_ = vm_config["debug"].as<bool>();
else show_debug_ = false;
if(vm_command.count("output")) output_dir_ = vm_command["output"].as<string>();
else if(vm_config.count("output")) output_dir_ = vm_config["output"].as<string>();
else {
if (input_type_ == InputHandler::InputType::Video) output_dir_ = input_dir_.parent_path();
else if (input_type_ == InputHandler::InputType::Directory) output_dir_ = input_dir_.parent_path();
else if (input_type_ == InputHandler::InputType::Camera)
output_dir_ = root_dir;
}
string face_detector_root_path;
if(vm_command.count("openface_path")) face_detector_root_path = vm_command["openface_path"].as<string>();
else if(vm_config.count("openface_path")) face_detector_root_path = vm_config["openface_path"].as<string>();
else cout<< "No face detector root specified, default detector will be use" << endl;
if(vm_command.count("per_model_save_path")) per_model_save_path_ = vm_command["per_model_save_path"].as<string>();
else if (vm_config.count("per_model_save_path")) per_model_save_path_ = vm_config["per_model_save_path"].as<string>();
else per_model_save_path_ = root_dir.string() + "/content/calib/user0.txt";
if(vm_command.count("number_user")) temp = vm_command["number_user"].as<string>();
else if (vm_config.count("number_user")) temp = vm_config["number_user"].as<string>();
else temp = "5";
number_user = stoi(temp);
// initial class instance
if (input_type_ == InputHandler::InputType::Camera){ // Camera as input
input_handler_.setInputType(InputHandler::InputType::Camera);// set input type
input_handler_.setInput(camera_id_); // set Camera id
}
else if (input_type_ == InputHandler::InputType::Video) {
input_handler_.setInputType(InputHandler::InputType::Video);// set input type
input_handler_.setInput(input_dir_.string()); // set camera file
}
else if (input_type_ == InputHandler::InputType::Directory){
input_handler_.setInputType(InputHandler::InputType::Directory);
}
// initialize other classes
gaze_estimator_.setCameraParameters(input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.setRootPath(root_dir.string());
gaze_estimator_.initialFaceDetector(number_user);
vector<std::string> arguments;
if (gaze_method == "MPIIGaze") {
arguments.push_back(cnn_param_path.string());
arguments.push_back(cnn_model_path.string());
if (is_face_model_)
arguments.emplace_back("face");
else
arguments.emplace_back("eye");
arguments.push_back(gpu_id);
gaze_estimator_.setMethod(GazeEstimator::Method::MPIIGaze, arguments);
}
else if (gaze_method == "OpenFace"){
//gaze_estimator_.setMethod(GazeEstimator::Method::OpenFace, arguments);
cout << "OpenFace gaze estimation is current not support" << endl;
exit(EXIT_FAILURE);
}
else {
cout << "The method setting is not right! Options are MPIIGaze or OpenFace!" << endl;
exit(EXIT_FAILURE);
}
}
OpenGaze::~OpenGaze() {
input_handler_.closeInput();
}
// do gaze estimation with camera as input
void OpenGaze::runGazeVisualization() {
input_handler_.initialize();
namedWindow("Gaze");
int key;
Mat input_image;
vector<Sample> output;
cv::VideoWriter m_writer;
if (is_save_video_){
boost::filesystem::path save_video_file;
save_video_file = output_dir_ / (input_dir_.stem().string() + "_gaze_video.avi");
m_writer.open(save_video_file.string(), CV_FOURCC('M','J','P','G'), 25,
Size(input_handler_.getFrameWidth(),input_handler_.getFrameHeight()), true);
cout << "Saving video to " << save_video_file << endl;
}
// construct saving file
ofstream output_stream;
boost::filesystem::path output_file_name = output_dir_ / (input_dir_.stem().string() + "_gaze_output.txt");
output_stream.open(output_file_name.string());
cout << "Created output file: " << output_file_name.string() << endl;
// for fps calculation
double fps_tracker = -1.0;
double t_start = 0;
double t_end = 0;
unsigned int frame_count = 0;
while(true){// loop all the sample or read frame from Video
frame_count++;
t_start = t_end;
output.clear();
input_image = input_handler_.getNextSample();// get input image
if(input_handler_.isReachEnd()){ // check if all sample are processed
cout<<"Processed all the samples."<<endl;
break;
}
Mat undist_img;
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
// get the fps values
t_end = cv::getTickCount();
fps_tracker = 1.0 / (double(t_end - t_start) / cv::getTickFrequency());
// save output
for(auto & sample : output) {
output_stream << frame_count << ",";
output_stream << sample.face_data.face_id << ",";
output_stream << sample.face_data.certainty << ",";
output_stream << sample.face_patch_data.face_center.at<float>(0) << ",";
output_stream << sample.face_patch_data.face_center.at<float>(1) << ",";
output_stream << sample.face_patch_data.face_center.at<float>(2) << ",";
output_stream << sample.gaze_data.gaze2d.x << ",";
output_stream << sample.gaze_data.gaze2d.y << ",";
output_stream << sample.eye_data.leye_pos.at<float>(0) << ",";
output_stream << sample.eye_data.leye_pos.at<float>(1) << ",";
output_stream << sample.eye_data.leye_pos.at<float>(2) << ",";
output_stream << sample.eye_data.reye_pos.at<float>(0) << ",";
output_stream << sample.eye_data.reye_pos.at<float>(1) << ",";
output_stream << sample.eye_data.reye_pos.at<float>(2) << endl;
}
if (is_save_video_ || show_debug_) {
//////// visualization //////////////////////////////////////////////////
// draw results
for(const auto & sample : output){
//drawLandmarks(sample, undist_img); // draw face landmarks
drawGazeOnFace(sample, undist_img); // draw gaze ray on face image
//drawGazeOnSimScreen(sample, undist_img); // draw screen target
}
if (show_debug_) {
// show fps
char fpsC[255];
std::sprintf(fpsC, "%02f", fps_tracker);
string fpsSt("FPS: ");
fpsSt += fpsC;
cv::putText(undist_img, fpsSt, cv::Point(100, 100), CV_FONT_HERSHEY_SIMPLEX, 1, CV_RGB(255, 0, 0), 2);
// show the image
imshow("Gaze", undist_img);
key = cv::waitKey(1);
if (key==27) exit(EXIT_SUCCESS); // press ESC to exit
}
if (is_save_video_) {
if (is_save_video_)
m_writer << undist_img;
}
}
}
if (is_save_video_)
m_writer.release();
}
void OpenGaze::runDataExtraction() {
assert(input_handler_.getInputType() == InputHandler::InputType::Directory);// Here we just accept the directory folder
input_handler_.initialize();
vector<Sample> output;
Mat input_image;
while(true){// loop all the sample or read frame from Video
output.clear();
input_image = input_handler_.getNextSample();// get input image
if(input_handler_.isReachEnd()){ // check if all sample are processed
cout << "Processed all the samples." << endl;
break;
}
Mat undist_img;
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.getImagePatch(undist_img, output); // extract the face image
// save the output
for (int i=0; i<output.size(); ++i) {
string save_file_name = output_dir_.stem().string() + "/img_" + input_handler_.getFileName() + "_" +to_string(i)+".jpg";
cv::imwrite(save_file_name, output[i].face_patch_data.face_patch);
}
}
}
void OpenGaze::runGazeOnScreen() {
input_handler_.initialize();
int key;
Mat input_image, undist_img, show_img;
vector<Sample> output;
cv::namedWindow("screen", CV_WINDOW_NORMAL);
cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
while(true){// loop all the sample or read frame from Video
output.clear();
if(input_handler_.isReachEnd()){ // check if all sample are processed
cout<<"Processed all the samples."<<endl;
break;
}
input_image = input_handler_.getNextSample();// get input image
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
// save output
for(auto & sample : output) {
int loc_x = (int)(sample.gaze_data.gaze2d.x * input_handler_.getScreenWidth());
int loc_y = (int)(sample.gaze_data.gaze2d.y * input_handler_.getScreenHeight());
circle(show_img, cv::Point(loc_x, loc_y), 10, CV_RGB(255,255,255), -1);
}
imshow("screen", show_img);
cv::Mat save_img;
cv::resize(show_img, save_img, cv::Size(1280, 720));
key = cv::waitKey(1);
show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
if (key==27) break; // press ESC to exit
}
cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
cv::destroyWindow("screen");
}
void OpenGaze::runPersonalCalibration(int num_calibration_point) {
if (input_handler_.getInputType() != InputHandler::InputType::Camera){ // personal calibration has to be done with camera
cout << "Error: the input type must be camera for personal calibration!" << endl;
exit(EXIT_FAILURE);
}
Mat input_image, undist_img;
input_handler_.initialize();
PersonalCalibrator m_calibrator(input_handler_.getScreenWidth(), input_handler_.getScreenHeight());
m_calibrator.generatePoints(num_calibration_point);
m_calibrator.initialWindow(); // show start windows
vector<cv::Point2f> pred, gt; // prediction and ground-truth
for (int i=0; i<num_calibration_point; ++i){
if (m_calibrator.showNextPoint()) {// wait for clicking
vector<Sample> output;
input_image = input_handler_.getNextSample(); // get the sample when user clicking
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);// convert to 2D projection
m_calibrator.confirmClicking(); // give feedback to user that they successfully did calibration
pred.emplace_back(output[0].gaze_data.gaze2d);
gt.emplace_back(cv::Point2f((m_calibrator.getCurrentPoint().x/(float)input_handler_.getScreenWidth()),
(m_calibrator.getCurrentPoint().y/(float)input_handler_.getScreenHeight())));
}
else
break; // if user press ESC button, we break
}
if (pred.size() > 0){
m_calibrator.generateModel(pred, gt, 1); // get the mapping model
string per_model_save_path_ = output_dir_.stem().string() + "/personal_gaze_model.yml";
m_calibrator.saveModel(per_model_save_path_);
}
}
void OpenGaze::drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image) {
static const int dW = 640;
static const int dH = 360;
Mat debug_disp = Mat::zeros(Size(dW, dH), CV_8UC3);
Point2f g_s;
g_s.x = dW*sample.gaze_data.gaze2d.x;
g_s.y = dH*sample.gaze_data.gaze2d.y;
circle(debug_disp, g_s, 10, CV_RGB(255,0,0), -1);
debug_disp.copyTo(image(Rect(0, 0, dW, dH)));
}
void OpenGaze::drawGazeOnFace(opengaze::Sample sample, cv::Mat &image) {
// draw gaze on the face
if (gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
&& gaze_estimator_.input_type_ == GazeEstimator::InputType::face) {
static const float gaze_length = 300.0;
Mat zero = Mat::zeros(1, 3, CV_32F);
Mat rvec, tvec;
sample.face_patch_data.head_r.convertTo(rvec, CV_32F);
sample.face_patch_data.head_t.convertTo(tvec, CV_32F);
vector<Point3f> cam_points;
Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
cam_points.emplace_back(face_center);
cam_points.emplace_back(face_center + gaze_length * sample.gaze_data.gaze3d);
vector<Point2f> img_points;
projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); // gaze ray
circle(image, img_points[0], 5, CV_RGB(255,0,0), -1); // staring point
circle(image, img_points[1], 5, CV_RGB(255,0,0), -1); // end point
}
else if ((gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
&& gaze_estimator_.input_type_ == GazeEstimator::InputType::eye)
|| gaze_estimator_.method_type_ == GazeEstimator::Method::OpenFace) {
int gaze_length = 300;
Mat zero = Mat::zeros(1, 3, CV_32F);
vector<Point3f> cam_points;
sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
cam_points.emplace_back(leye_pose);
cam_points.emplace_back(leye_pose + gaze_length*sample.gaze_data.lgaze3d);
Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
cam_points.emplace_back(reye_pose);
cam_points.emplace_back(reye_pose + gaze_length*sample.gaze_data.rgaze3d);
vector<Point2f> img_points;
projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5);
line(image, img_points[2], img_points[3], CV_RGB(255,0,0), 5);
circle(image, img_points[1], 3, CV_RGB(255,0,0), -1);
circle(image, img_points[3], 3, CV_RGB(255,0,0), -1);
}
}
void OpenGaze::drawLandmarks(opengaze::Sample sample, cv::Mat &image) {
cv::Rect_<int> face_bb = sample.face_data.face_bb;
rectangle(image, cv::Point(face_bb.x, face_bb.y),
cv::Point(face_bb.x+face_bb.width,face_bb.y+face_bb.height), CV_RGB(0,255,0), 5);
for(int p=0; p<6; ++p)
circle(image, sample.face_data.landmarks[p], 5, CV_RGB(0,255,0), -1);
}
}

145
src/personal_calibrator.cpp Normal file
View file

@ -0,0 +1,145 @@
#include "personal_calibrator.hpp"
using namespace cv;
using namespace std;
void CallBackFunc(int event, int x, int y, int flags, void* is_click) {
if (event == EVENT_LBUTTONDOWN){
bool* temp = (bool*)is_click;
*temp = true;
}
}
PersonalCalibrator::PersonalCalibrator (int screen_width, int screen_height) {
cv::namedWindow("calibration", CV_WINDOW_NORMAL);
cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
// set the mouse
is_click_ = false;
//set the callback function for any mouse event
setMouseCallback("calibration", CallBackFunc, &is_click_); // wait for clicking
screen_width_ = screen_width;
screen_height_ = screen_height;
center_radius_ = (int)((float)screen_width_ / 200.0f);
}
PersonalCalibrator::~PersonalCalibrator() {
cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
cv::destroyWindow("calibration");
}
void PersonalCalibrator::generatePoints(int num_points) {
index_point_ = -1;
srand(time(NULL));
Point2i current_point;
for (int num = 0; num < num_points; ++num) {
current_point.x = (rand() % screen_width_); // range is [0, 1]
current_point.y = (rand() % screen_height_); // range is [0, 1]
points_.emplace_back(current_point);
}
}
void PersonalCalibrator::initialWindow() {
// get the focus of the window
namedWindow("GetFocus", CV_WINDOW_NORMAL);
cv::Mat img = cv::Mat::zeros(100, 100, CV_8UC3);
cv::imshow("GetFocus", img);
cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
waitKey(1);
cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
cv::destroyWindow("GetFocus");
// show instruction
cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
string show_text = "Please click/touch when looking at the dots";
cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(255,255,255), 2);
imshow("calibration", show_img);
cv::waitKey(3000);
for (int i=255; i > 0; i-=5) { //Gradient disappear, nice!
show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(i,i,i), 2);
imshow("calibration", show_img);
cv::waitKey(1);
}
}
bool PersonalCalibrator::showNextPoint() {
cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
index_point_ ++;
cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(255, 255, 255), -1);
is_click_ = false;
while (true) {
imshow("calibration", show_img);
int key = cv::waitKey(10); // wait for interaction
if (key == 27) // if press the ESC key
return false;
if (is_click_) {
break;
}
}
return true;
}
void PersonalCalibrator::confirmClicking() {
cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(0, 200, 0), -1);
imshow("calibration", show_img);
cv::waitKey(500);
}
// this polyfit function is copied from opencv/modules/contrib/src/polyfit.cpp
// This original code was written by
// Onkar Raut
// Graduate Student,
// University of North Carolina at Charlotte
cv::Mat polyfit(const Mat& src_x, const Mat& src_y, int order)
{
CV_Assert((src_x.rows>0)&&(src_y.rows>0)&&(src_x.cols>0)&&(src_y.cols>0)&&(order>=1));
Mat matrix;
Mat bias = Mat::ones((int)src_x.rows, 1, CV_32FC1);
Mat input_x = Mat::zeros(src_x.rows, order*src_x.cols, CV_32FC1);
Mat copy;
for(int i=1; i<=order;i++){
copy = src_x.clone();
pow(copy,i,copy);
copy.copyTo(input_x(Rect((i-1)*src_x.cols, 0, copy.cols, copy.rows)));
}
Mat new_mat;
cv::hconcat(input_x, bias, new_mat);
cout << "new_mat: " << new_mat << endl;
cv::solve(new_mat, src_y, matrix, DECOMP_NORMAL);
cout << "model_matrix: " << matrix << endl;
Mat calibrated = new_mat * matrix;
cout << "calibrated: " << calibrated << endl;
double dist_original = norm(src_x, src_y, NORM_L2);
cout << "dist_original: " << dist_original << endl;
double dist_calibrated = norm(calibrated, src_y, NORM_L2);
cout << "dist_calibrated: " << dist_calibrated << endl;
return matrix;
}
void PersonalCalibrator::generateModel(vector<Point2f> prediction, vector<Point2f> ground_truth, int order) {
cv::Mat input_x = cv::Mat((int)prediction.size(), 2, CV_32FC1, prediction.data());
cv::Mat input_y = cv::Mat((int)ground_truth.size(), 2, CV_32FC1, ground_truth.data());
cout << "input_x: " << input_x << endl;
cout << "input_y: " << input_y << endl;
cv::Mat model_matrix;
model_matrix_ = polyfit(input_x, input_y, order);
}
void PersonalCalibrator::saveModel(std::string file_path) {
cv::FileStorage storage(file_path, cv::FileStorage::WRITE);
storage << model_matrix_;
storage.release();
}