initial commit

This commit is contained in:
Xucong Zhang 2019-01-10 13:26:03 +01:00
commit e505acdb29
41 changed files with 2922 additions and 0 deletions

32
.gitignore vendored Normal file
View file

@ -0,0 +1,32 @@
### C++ ###
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
### CMake ###
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake

90
CMakeLists.txt Normal file
View file

@ -0,0 +1,90 @@
cmake_minimum_required(VERSION 3.0)
project(OpenGaze VERSION 0.1)
set(CMAKE_BUILD_TYPE Release)
# create a directory for models and configuration files
set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
add_definitions(-DOPENGAZE_CON_DIR="${OPENGAZE_DIR}")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_STANDARD 11)
# OpenCV
find_package( OpenCV 3.4 REQUIRED COMPONENTS core imgproc calib3d highgui objdetect)
# Boost, for reading configuration file
find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
# Caffe
set(CAFFE_INSTALL_DIR "/home/xucong/library/caffe/build/install")
set(Caffe_INCLUDE_DIRS ${CAFFE_INSTALL_DIR}/include)
set(Caffe_LIBRARY_DIRS ${CAFFE_INSTALL_DIR}/lib)
set(Caffe_LIBS lmdb glog caffe)
# Face and facial landmark detection methods
option(USE_OPENFACE "with OpenFace" ON)
add_definitions(-DUSE_OPENFACE=1)
# OpenFace
set(OPENFACE_ROOT_DIR "/home/xucong/library/OpenFace")
add_definitions(-DOPENFACE_DIR="${OPENFACE_ROOT_DIR}")
set(CLM_INCLUDE_DIRS ${OPENFACE_ROOT_DIR}/lib/local/LandmarkDetector/include)
set(CLM_LIBRARY_DIRS ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector)
set(CLM_LIBS LandmarkDetector tbb openblas dlib)
set(USE_OPENFACE ON) # we use OpenFace method here
# suppress auto_ptr deprecation warnings
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
add_compile_options("-Wno-deprecated-declarations")
endif()
include_directories(./ ./include /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${CLM_INCLUDE_DIRS} ${Caffe_INCLUDE_DIRS})
link_directories(./ ./build/lib /usr/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${CLM_LIBRARY_DIRS} ${Caffe_LIBRARY_DIRS})
file(GLOB SOURCE "./src/*.cpp")
file(GLOB HEADERS "./include/*.hpp")
# compile opengaze library
add_library(opengaze SHARED ${SOURCE} ${HEADERS})
set_target_properties(opengaze PROPERTIES VERSION ${PROJECT_VERSION})
#if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
# set (CMAKE_INSTALL_PREFIX "/usr/local" CACHE PATH "default install path" FORCE )
#endif()
install (TARGETS opengaze EXPORT OpenGazeTargets LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install (FILES ${HEADERS} DESTINATION include/opengaze)
# install caffe and OpenFace
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/3rdParty" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (FILES ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector/libLandmarkDetector.a DESTINATION ${OPENGAZE_DIR}/3rdParty)
install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so DESTINATION ${OPENGAZE_DIR}/3rdParty)
install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so.1.0.0 DESTINATION ${OPENGAZE_DIR}/3rdParty)
# install configuration files
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/calib" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/model" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (FILES ./content/calib/calibration.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
install (FILES ./content/calib/monitor_laptop.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
install (FILES ./content/model/face_model.yml DESTINATION ${OPENGAZE_DIR}/content/model PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
install (FILES default.cfg DESTINATION ${OPENGAZE_DIR} PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)

55
README.md Normal file
View file

@ -0,0 +1,55 @@
# OpenGaze: Open Source Toolkit for Camera-Based Gaze Estimation and Interaction
<!--The current demo video includes clips from Friends, which may violate the copyright. Although people think 28 seconds could be a boundary:https://productforums.google.com/forum/#!topic/youtube/rQhkI20Rm8k, there is no golden rule for it: https://www.youtube.com/yt/about/copyright/fair-use/#yt-copyright-protection-->
<!--//[![Demo](https://img.youtube.com/vi/OORxOdu8USQ/0.jpg)](https://youtu.be/OORxOdu8USQ "OpenGaze Friends Demo")-->
Appearance-based gaze estimation methods that only require an off-the-shelf camera have significantly improved and promise a wide range of new applications in gaze-based interaction and attentive user interfaces. However, these methods are not yet widely used in the human-computer interaction (HCI) community.
To democratize their use in HCI, we present OpenGaze, the first software toolkit that is specifically developed for gaze interface designers. OpenGaze is open source and aims to implement state-of-the-art methods for camera-based gaze estimation and interaction.
<img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_mpiinf.png" height="80"/><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_pui.png" height="80"><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_osaka-u.png" height="80">
## Functionality
The toolkit is capable of performing the following gaze-related tasks:
* **Gaze Estimation**
Show estimated gaze on the screen given screen-camera relationship.
[![Demo](https://img.youtube.com/vi/R1vb7mV3y_M/0.jpg)](https://youtu.be/R1vb7mV3y_M "Gaze visualization demo")
<p>&nbsp;</p>
* **Gaze Visualization**
Show gaze direction inital from the center of faces in the input image.
[![Demo](https://img.youtube.com/vi/8yMTvvr0rRU/0.jpg)](https://youtu.be/8yMTvvr0rRU "Gaze visualization demo")
<p>&nbsp;</p>
* **Personal Calibration**
Perform personal calibration and remapped the gaze target on the screen.
[![Demo](https://img.youtube.com/vi/ntBv1wcNGAo/0.jpg)](https://youtu.be/ntBv1wcNGAo "Gaze visualization demo")
<p>&nbsp;</p>
## Installation
[Unix Installation](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Unix-Installation)
## Use
[Command line arguments](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Command-line-arguments)
## Citation
If you use any of the resources provided on this page in any of your publications, please cite the following paper:
**Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications?** <br/>
Xucong Zhang, Yusuke Sugano, Andreas Bulling<br/>
Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019<br/>
BibTex, PDF
## License
The license agreement can be found in Copyright.txt
You have to respect boost, OpenFace and OpenCV licenses.
Furthermore, you have to respect the licenses of the datasets used for [model training](:https://github.molgen.mpg.de/perceptual/opengaze/wiki/Model-training).

3
RELEASE.md Normal file
View file

@ -0,0 +1,3 @@
# Release 0.1.0
Initial release of OpenGaze.

View file

@ -0,0 +1,50 @@
#ifndef CAFFE_DSPP_LAYER_HPP_
#define CAFFE_DSPP_LAYER_HPP_
#include <string>
#include <utility>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layers/data_layer.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/loss_layer.hpp"
#include "caffe/layers/neuron_layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
template <typename Dtype>
class DSPPLayer : public Layer<Dtype> {
public:
explicit DSPPLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "DSPPLayer"; }
virtual inline int ExactNumBottomBlobs() const { return 2; };
virtual inline int MinTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
//virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
//virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
// const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int width_;
int height_;
int channel_;
int num_;
};
} // namespace caffe
#endif // CAFFE_DSPP_LAYER_HPP_

View file

@ -0,0 +1,56 @@
#ifndef CAFFE_POSE_DATA_LAYER_HPP_
#define CAFFE_POSE_DATA_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/base_data_layer.hpp"
namespace caffe {
template <typename Dtype>
class PoseDataLayer : public BaseDataLayer<Dtype> {
public:
explicit PoseDataLayer(const LayerParameter& param)
: BaseDataLayer<Dtype>(param), has_new_data_(false) {}
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "PoseData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
virtual void AddDatumVector(const vector<Datum>& datum_vector);
virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<float>& labels);
// Reset should accept const pointers, but can't, because the memory
// will be given to Blob, which is mutable
void Reset(Dtype* data, Dtype* label, int n);
void set_batch_size(int new_size);
int batch_size() { return batch_size_; }
int channels() { return channels_; }
int height() { return height_; }
int width() { return width_; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
int batch_size_, channels_, height_, width_, size_;
Dtype* data_;
Dtype* labels_;
int n_;
size_t pos_;
Blob<Dtype> added_data_;
Blob<Dtype> added_label_;
bool has_new_data_;
};
} // namespace caffe
#endif

View file

@ -0,0 +1,92 @@
#include <cmath>
#include <algorithm>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/layers/dspp_layer.hpp"
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
namespace caffe {
template <typename Dtype>
void DSPPLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
template <typename Dtype>
void DSPPLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
num_ = bottom[1]->shape()[0];
channel_ = bottom[1]->shape()[1]; // the input data size
height_ = bottom[1]->shape()[2];
width_ = bottom[1]->shape()[3];
// init output size
vector<int> output_shape;
output_shape.push_back(num_);
output_shape.push_back(channel_);
output_shape.push_back(height_);
output_shape.push_back(width_);
top[0]->Reshape(output_shape);
}
template <typename Dtype>
void DSPPLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
Dtype* top_data = top[0]->mutable_cpu_data();
caffe_set<Dtype>(top[0]->count(), 0, top_data); // initilize to be 0
for (int n=0; n<num_; ++n) {
for (int h = 0; h < height_; ++h) { // for the input data size
for (int w = 0; w < width_; ++w) {
for (int c = 0; c < channel_; ++c) {
top_data[top[0]->offset(n, c, h, w)] = bottom[1]->data_at(n, c, h, w) * bottom[0]->data_at(n, 0, h, w);
}
}
}
}
top_data = NULL;
}
template <typename Dtype>
void DSPPLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* data_diff = bottom[1]->mutable_cpu_diff();
Dtype* heat_map_diff = bottom[0]->mutable_cpu_diff();
caffe_set<Dtype>(bottom[1]->count(), 0, data_diff);
caffe_set<Dtype>(bottom[0]->count(), 0, heat_map_diff);
// Dtype activation_h, activation_w;
for (int n = 0; n < num_; ++n) {
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
for (int c = 0; c < channel_; ++c) {
Dtype buffer = top_diff[top[0]->offset(n, c, h, w)];
data_diff[bottom[1]->offset(n, c, h, w)] = buffer * (bottom[0]->data_at(n, 0, h, w));
buffer *= bottom[1]->data_at(n,c,h,w) / channel_;
heat_map_diff[bottom[0]->offset(n,0,h,w)] += buffer;
}
}
}
}
top_diff = NULL;
data_diff = NULL;
heat_map_diff = NULL;
}
}
INSTANTIATE_CLASS(DSPPLayer);
REGISTER_LAYER_CLASS(DSPP);
} // namespace caffe

View file

@ -0,0 +1,128 @@
#include <opencv2/core/core.hpp>
#include <vector>
#include "caffe/layers/pose_data_layer.hpp"
namespace caffe {
template <typename Dtype>
void PoseDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
batch_size_ = this->layer_param_.memory_data_param().batch_size();
channels_ = this->layer_param_.memory_data_param().channels();
height_ = this->layer_param_.memory_data_param().height();
width_ = this->layer_param_.memory_data_param().width();
size_ = channels_ * height_ * width_;
CHECK_GT(batch_size_ * size_, 0) <<
"batch_size, channels, height, and width must be specified and"
" positive in memory_data_param";
int label_shape_[] = {batch_size_, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
top[0]->Reshape(batch_size_, channels_, height_, width_);
top[1]->Reshape(label_shape);
added_data_.Reshape(batch_size_, channels_, height_, width_);
added_label_.Reshape(label_shape);
data_ = NULL;
labels_ = NULL;
added_data_.cpu_data();
added_label_.cpu_data();
}
template <typename Dtype>
void PoseDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
CHECK(!has_new_data_) <<
"Can't add data until current data has been consumed.";
size_t num = datum_vector.size();
CHECK_GT(num, 0) << "There is no datum to add.";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
added_data_.Reshape(num, channels_, height_, width_);
int label_shape_[] = {(int)num, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
// Apply data transformations (mirror, scale, crop...)
this->data_transformer_->Transform(datum_vector, &added_data_);
// Copy Labels
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
top_label[item_id] = datum_vector[item_id].label();
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
Reset(top_data, top_label, num);
has_new_data_ = true;
}
template <typename Dtype>
void PoseDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<float>& labels) {
size_t num = mat_vector.size();
CHECK(!has_new_data_) <<
"Can't add mat until current data has been consumed.";
CHECK_GT(num, 0) << "There is no mat to add";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
added_data_.Reshape(num, channels_, height_, width_);
int label_shape_[] = {(int)num, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
// Apply data transformations (mirror, scale, crop...)
this->data_transformer_->Transform(mat_vector, &added_data_);
// Copy Labels
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
top_label[item_id] = labels[item_id];
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
Reset(top_data, top_label, num);
has_new_data_ = true;
}
template <typename Dtype>
void PoseDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
CHECK(data);
CHECK(labels);
CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
// Warn with transformation parameters since a memory array is meant to
// be generic and no transformations are done with Reset().
//if (this->layer_param_.has_transform_param()) {
// LOG(WARNING) << this->type() << " does not transform array data on Reset()";
//}
data_ = data;
labels_ = labels;
n_ = n;
pos_ = 0;
}
template <typename Dtype>
void PoseDataLayer<Dtype>::set_batch_size(int new_size) {
CHECK(!has_new_data_) <<
"Can't change batch_size until current data has been consumed.";
batch_size_ = new_size;
added_data_.Reshape(batch_size_, channels_, height_, width_);
int label_shape_[] = {(int)batch_size_, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
}
template <typename Dtype>
void PoseDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK(data_) << "PoseDataLayer needs to be initalized by calling Reset";
top[0]->Reshape(batch_size_, channels_, height_, width_);
int label_shape_[] = {(int)batch_size_, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
top[0]->set_cpu_data(data_ + pos_ * size_);
top[1]->set_cpu_data(labels_ + pos_);
pos_ = (pos_ + batch_size_) % n_;
if (pos_ == 0)
has_new_data_ = false;
}
INSTANTIATE_CLASS(PoseDataLayer);
REGISTER_LAYER_CLASS(PoseData);
} // namespace caffe

11
content/calib/calibration.yml Executable file
View file

@ -0,0 +1,11 @@
%YAML:1.0
camera_matrix: !!opencv-matrix
rows: 3
cols: 3
dt: f
data: [ 1891.07, 0.0, 640, 0.0, 1891.07, 360, 0.0, 0.0, 1.0]
dist_coeffs: !!opencv-matrix
rows: 1
cols: 5
dt: f
data: [1.68091e-02, -7.14552e-02, -5.65886e-03, -5.23482e-04, -3.39946e-02]

View file

@ -0,0 +1,13 @@
%YAML:1.0
monitor_W: 516
monitor_H: 323
monitor_R: !!opencv-matrix
rows: 3
cols: 3
dt: f
data: [ -0.99955, -0.02891, -0.0082861, -0.028948, 0.99957, 0.0044949, 0.0081526, 0.0047327, -0.99996]
monitor_T: !!opencv-matrix
rows: 3
cols: 1
dt: f
data: [269.41, 48.561, 5.8344]

View file

@ -0,0 +1,13 @@
%YAML:1.0
monitor_W: 310
monitor_H: 174
monitor_R: !!opencv-matrix
rows: 3
cols: 3
dt: f
data: [ -0.99988, -0.009735, -0.01203, -0.0094674, 0.99971, -0.022108, 0.012242, -0.021992, -0.99968]
monitor_T: !!opencv-matrix
rows: 3
cols: 1
dt: f
data: [149.91, 29.575, -18.884]

6
content/model/face_model.yml Executable file
View file

@ -0,0 +1,6 @@
%YAML:1.0
face_model: !!opencv-matrix
rows: 3
cols: 6
dt: f
data: [ -45.096768, -21.312858, 21.312858, 45.096768, -26.299577, 26.299577, -0.483773,0.483773, 0.483773, -0.483773, 68.595035,68.595035, 2.397030, -2.397030, -2.397030, 2.397030, -0.000000, -0.000000]

26
default.cfg Normal file
View file

@ -0,0 +1,26 @@
## input and ouput
# input_type = camera # camera, video, or directory
# input = 0 # caemra id, video file name, or directory of image files
# output = /BS/zhang-semi/work/opengaze/test/
# input = YOUR_VIDEO OR IMAGE FOLDER
# output = MUST BE A DIRECTORY
## gaze estimation method
# gaze_method = MPIIGaze # OpenFace MPIIGaze
# gpu_id = 0
## gaze estimation method/model selection
# face_model = 1 # 1 for the face model, 0 for eye image model
## CNN model for face image, trained on MPIIGaze + EYEDIAP HD
# cnn_param_path = YOUR_PATH/alexnet_face.prototxt
# cnn_model_path = YOUR_PATH/alexnet_face.caffemodel
# calibration file, calibration file
# calib_camera = YOUR_PATH/calibration.yml
# calib_screen = YOUR_PATH/monitor.yml
## parameters for personal calibration
# per_model_save_path = YOUR_PATH/user1.txt
# num_calibration = 9

9
download_models.sh Executable file
View file

@ -0,0 +1,9 @@
OPENGAZE_DIR=~/OpenGaze
mkdir -p $OPENGAZE_DIR/content/caffeModel
cd $OPENGAZE_DIR/content/caffeModel
wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.prototxt
wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.caffemodel

36
exe/CMakeLists.txt Normal file
View file

@ -0,0 +1,36 @@
cmake_minimum_required(VERSION 3.0)
project(OpenGazeExe VERSION 1.0)
set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
add_definitions(-DOPENGAZE_DIR="${CMAKE_SOURCE_DIR}")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_STANDARD 11)
find_package( OpenCV 3.1 REQUIRED COMPONENTS calib3d highgui objdetect imgproc core)
# Boost, for reading configuration file
find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
include_directories(/usr/local/include/opengaze /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
link_directories(/usr/lib /usr/local/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${OPENGAZE_DIR}/3rdParty)
## -lX11 is for getting screen resolution in pixel in the Linux system
set(LIBS opengaze LandmarkDetector ${OpenCV_LIBS} ${Boost_LIBRARIES} tbb openblas dlib lmdb glog caffe X11)
add_executable(GazeVisualization GazeVisualization.cpp)
target_link_libraries(GazeVisualization ${LIBS})
add_executable(Calibration Calibration.cpp)
target_link_libraries(Calibration ${LIBS})
add_executable(GazeEstimation GazeEstimation.cpp)
target_link_libraries(GazeEstimation ${LIBS})
add_executable(DataExtraction DataExtraction.cpp)
target_link_libraries(DataExtraction ${LIBS})

29
exe/Calibration.cpp Normal file
View file

@ -0,0 +1,29 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
int num_calibration_point = 20;
open_gaze.runPersonalCalibration(num_calibration_point);
return 1;
}

28
exe/DataExtraction.cpp Normal file
View file

@ -0,0 +1,28 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
open_gaze.runDataExtraction();
return 1;
}

28
exe/GazeEstimation.cpp Normal file
View file

@ -0,0 +1,28 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
open_gaze.runGazeOnScreen();
return 1;
}

28
exe/GazeVisualization.cpp Normal file
View file

@ -0,0 +1,28 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
open_gaze.runGazeVisualization();
return 1;
}

BIN
exe/test.mp4 Normal file

Binary file not shown.

BIN
imgs/logo_mpiinf.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
imgs/logo_osaka-u.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 177 KiB

BIN
imgs/logo_pui.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

88
include/data.hpp Normal file
View file

@ -0,0 +1,88 @@
#ifndef DATA_HPP
#define DATA_HPP
#include <opencv2/opencv.hpp>
namespace opengaze{
/**
* face and facial landmark detection data
* @param face_id personal id from tracking across frames
* @param certainty detection score, 1 is the best, -1 is the worst
* @param landmarks detected six facial landmarks as four eye corners and two mouth corners
* @param face_bb detected face bounding box
*/
struct FaceData
{
unsigned long face_id;
double certainty;
cv::Point2f landmarks[6];
cv::Rect_<int> face_bb;
};
/**
* eye image related data
* @param leye_pos/reye_pose 3D eyeball center position for left and right eyes in the original camera coordinate system
* @param leye_img/reye_img eye image
* @param leye_rot/reye_rot rotation matrix during the data normalization procedure
*/
struct EyeData
{
// cv::Mat head_r, head_t;
cv::Mat leye_pos, reye_pos; //
// normalized eyes
cv::Mat leye_img, reye_img;
cv::Mat leye_rot, reye_rot;
};
/**
* face patch data related to data normalization
* @param head_r head pose as center of the face
* @param head_t head translation as center of the face
* @param face_rot rotation matrix during the data normalization procedure
* @param face_center 3D face center in the original camera coordinate system
* @param debug_img use for debug to show the normalized face image
* @param face_patch normalized face image
*/
struct FacePatchData
{
cv::Mat head_r, head_t;
cv::Mat face_rot;
cv::Mat face_center;
cv::Mat debug_img;
cv::Mat face_patch;
};
/**
* gaze data
* @param lgaze3d/lgaze3d gaze directions of left and right eyes in the camera coordinate system
* @param gaze3d gaze direction estimated from face patch in the in the camera coordinate system
* @param lgaze2d/rgaze2d projected gaze positions on the screen coordinate from left and right eyes
* @param gaze2d projected gaze positions from face patch on the screen coordinate
*/
struct GazeData
{
cv::Vec3f lgaze3d, rgaze3d;
cv::Vec3f gaze3d;
cv::Point2f lgaze2d, rgaze2d;
cv::Point2f gaze2d;
};
/**
* The general output data structure
* @param face_data store face and facial landmark detection data
* @param eye_data store data related to eye image input
* @param face_patch_data normalized face path data
* @param gaze_data gaze data in 2D and 3D spaces
*/
struct Sample
{
FaceData face_data;
EyeData eye_data;
FacePatchData face_patch_data;
GazeData gaze_data;
};
}
#endif //DATA_HPP

72
include/face_detector.hpp Normal file
View file

@ -0,0 +1,72 @@
#ifndef FACE_DETECTOR_HPP
#define FACE_DETECTOR_HPP
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#if USE_DLIB
// if we use dlib
#include <dlib/opencv.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/image_processing.h>
#include <dlib/gui_widgets.h>
#include <dlib/image_io.h>
#endif
#include "data.hpp"
namespace opengaze{
class FaceDetector {
public:
FaceDetector();
~FaceDetector();
/**
* face and facial landmark detection selection
* The current implementation is only OpenFace. OpenFace use dlib for face detection
*/
enum Method{OpenFace, OpenCV, Dlib};
/**
* main function to detect and track face and facial landmarks
* @param input_img input image
* @param output output data structure
*/
void track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output);
void reset();
void setMethodType(Method method_type) {method_type_ = method_type;}
Method getMethodType() {return method_type_;}
void initialize(int number_users);
private:
Method method_type_;
#if USE_DLIB
dlib::frontal_face_detector dlib_detector_;
dlib::shape_predictor dlib_sp_;
#endif
// parameters for OpenFace
std::vector<bool> active_models_;
unsigned long num_faces_max_;
int detection_skip_frames_, tracking_loss_limit_;
float detection_resize_rate_;
float nonoverlap_threshold_;
double certainty_threshold_;
int landmark_indices_[6];
int frame_counter_;
unsigned long current_face_id_;
std::vector<unsigned long> face_ids_;
};
}
#endif //FACE_DETECTOR_HPP

View file

@ -0,0 +1,65 @@
#ifndef GAZE_ESTIMATOR_HPP
#define GAZE_ESTIMATOR_HPP
#include <opencv2/opencv.hpp>
#include "data.hpp"
#include "face_detector.hpp"
#include "normalizer.hpp"
#include "gaze_predictor.hpp"
namespace opengaze{
class GazeEstimator {
public:
GazeEstimator();
~GazeEstimator();
/**
* On the current implementation, we only has the "MPIIGaze" method which uses the input face/eye image
* and output gaze direction directly. It is an appearance-based method. The "OpenFace" can also output
* the gaze vector according to the pupil detection results. However, "OpenFace" implementation is not
* included inside our OpenGaze toolkit yet.
*/
enum Method{MPIIGaze, OpenFace};
/**
* for the "MPIIGaze" method, the input image can be face or eye. The full-face patch model can output
* more accurate gaze prediction than the eye image model, while the eye image base model is much faster.
*/
enum InputType{face, eye};
/**
* the main function to estimate the gaze.
* It performs the face and facial landmarks detection, head pose estimation and then gaze prediction.
* @param input_image input scene image
* @param output data structure for output
*/
void estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &output);
void getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs);
void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist);
void setRootPath(std::string root_path);
void setMethod(Method, std::vector<std::string> arguments);
void initialFaceDetector(int number_users);
Method method_type_;
InputType input_type_; // the input type
private:
// class instances
FaceDetector face_detector_;
Normalizer normalizer_;
GazePredictor gaze_predictor_;
// camera intrinsic matrix
cv::Mat camera_matrix_;
// camera distortion matrix
cv::Mat camera_dist_;
// the root pat is used for load configuration file and models
std::string root_path_;
};
}
#endif //GAZE_ESTIMATOR_HPP

View file

@ -0,0 +1,29 @@
#ifndef GAZE_PREDICTOR_HPP
#define GAZE_PREDICTOR_HPP
#include <opencv2/opencv.hpp>
#include "data.hpp"
#include "face_detector.hpp"
namespace opengaze{
class GazePredictor {
public:
GazePredictor();
~GazePredictor();
void initiaMPIIGaze(std::vector<std::string> arguments);
cv::Point3f predictGazeMPIIGaze(cv::Mat face_patch);
private:
int model_type_;
bool is_extract_feature;
};
}
#endif //GAZE_PREDICTOR_HPP

125
include/input_handler.hpp Normal file
View file

@ -0,0 +1,125 @@
#ifndef INPUT_HANDLER_HPP
#define INPUT_HANDLER_HPP
#include <opencv2/opencv.hpp>
#include <vector>
#include <iostream>
#include <boost/filesystem.hpp>
#include "data.hpp"
namespace opengaze {
class InputHandler {
public:
enum InputType {Camera, Video, Image, Directory, Memory};
InputHandler();
~InputHandler();
/**
* get the camera intrisic parameters
* @param camera_matrix camera instric matrix
* @param camera_dist caemra distortion matrix
*/
void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist){
camera_matrix_ = std::move(camera_matrix);
camera_distortion_ = std::move(camera_dist);
}
/**
* function to return next sample, could come from any input source
* @return next sample
*/
cv::Mat getNextSample();
/**
* set the input type
* @param type the input typ, could be found in InputType defination
*/
void setInputType(InputType type){input_type_ = type;}
/**
* set the input
* according the input type, here the input value are different.
* For the type "Camera", this input value indicates the camera id
* For the type "video", this input value is the video file name
* For input type "Directory", this input value is the directory path
*/
void setInput(int camera_id) {camera_id_ = camera_id;}
void setInput(std::vector<cv::Mat> images) {images_ = std::move(images);}
void setInput(std::string input_path);
/**
* read the parameters related to the screen
* @param calib_file file for the configuration
*/
void readScreenConfiguration(std::string calib_file);
/**
* read the camera instrinic parameters from the configuration file
* @param calib_file file for the configuration
*/
void readCameraConfiguration(std::string calib_file);
/**
* When the 3D gaze vector is achieved, there is a need to project the gaze on the 2D screen.
* This function also needs the input to indicate if use the full-face model or not,
* since the initial of gaze vector will be center of the face for the full-face models
* and eye center for the eye-based models.
* @param input input data contains the 3D gaze vector
* @param is_face_model a boolen value indicates if the gaze vectors is from face model or eye model
*/
void projectToDisplay(std::vector<opengaze::Sample> &input, bool is_face_model=true);
int getFrameHeight(){return cap_.get(cv::CAP_PROP_FRAME_HEIGHT);}
int getFrameWidth(){return cap_.get(cv::CAP_PROP_FRAME_WIDTH);}
InputType getInputType() {return input_type_;}
int getScreenWidth() {return screen_width_;}
int getScreenHeight() {return screen_height_;}
std::string getFileName() {return current_file_name_;}
cv::Point2f mapToDisplay(cv::Vec3f obj_center, cv::Vec3f gaze_point);
void initialize();
bool closeInput();
void getScreenResolution(int &width, int &height);
cv::Mat getCameraMatrix() { return camera_matrix_;}
cv::Mat getCameraDistortion() {return camera_distortion_;}
void setFrameSize(int frame_width, int frame_height);
bool isReachEnd() {return is_reach_end_;}
cv::Mat camera_matrix_;
cv::Mat camera_distortion_;
private:
// indicator if we reach the end of sample stream
bool is_reach_end_;
int camera_id_;
int sample_height_, sample_width_;
std::vector<cv::Mat> images_;
std::string input_path_;
std::string input_file_video_name_;
int screen_width_, screen_height_;
// monitor
float monitor_W_, monitor_H_; // monitor width and height in mm
cv::Mat monitor_R_, monitor_T_;
cv::Vec3f monitor_corners_[4];
cv::Mat monitor_normal_;
// input variable
InputType input_type_;
cv::VideoCapture cap_;
std::string current_file_name_;
// variable for directory input
boost::filesystem::directory_iterator current_itr_;
};
}
#endif //INPUT_HANDLER_HPP

42
include/normalizer.hpp Normal file
View file

@ -0,0 +1,42 @@
#ifndef NORMALIZER_HPP
#define NORMALIZER_HPP
#include <opencv2/opencv.hpp>
#include "data.hpp"
namespace opengaze{
class Normalizer {
public:
Normalizer();
~Normalizer();
void estimateHeadPose(const cv::Point2f *landmarks, opengaze::Sample &sample);
void setCameraMatrix(cv::Mat input);
void loadFaceModel(std::string path);
void setParameters(int focal_length, int distance, int img_w, int img_h);
cv::Mat normalizeFace(cv::Mat input_image, Sample &sample);
std::vector<cv::Mat> normalizeEyes(cv::Mat input_image, Sample &sample);
cv::Mat cvtToCamera(cv::Point3f input, const cv::Mat cnv_mat);
private:
cv::Mat camera_matrix_;
std::vector<cv::Point3f> face_model_;
cv::Mat face_model_mat_, cam_norm_;
float focal_norm_, distance_norm_;
cv::Size roiSize_norm_;
};
}
#endif //NORMALIZER_HPP

80
include/opengaze.hpp Normal file
View file

@ -0,0 +1,80 @@
#ifndef OPEN_GAZE_H
#define OPEN_GAZE_H
#include <string>
#include <vector>
#include <boost/program_options.hpp>
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include "input_handler.hpp"
#include "gaze_estimator.hpp"
#include "data.hpp"
#include "personal_calibrator.hpp"
namespace opengaze {
class OpenGaze {
public:
explicit OpenGaze(int argc, char** argv); //read configuration file
~OpenGaze();
// main function to estimate and show the gaze vector drawn on the input face image.
void runGazeVisualization();
/**
* main function to run personal calibration.
* @param num_calibration_point the numbers of points for calibration.
*/
void runPersonalCalibration(int num_calibration_point=5);
// main function to estimate and draw gaze point on the screen.
void runGazeOnScreen();
// main function to extract the face image from input image. The face image can then
// be used to train a custom gaze estimation model
void runDataExtraction();
private:
// visualization
/**
* function to draw the gaze vector on the input face image.
* @param sample the input data includes the gaze vector, head pose etc.
* @param image the input image contains the face. This function will draw the gaze vector on this input image
*/
void drawGazeOnFace(opengaze::Sample sample, cv::Mat &image);
// draw the detected facial landmark
void drawLandmarks(opengaze::Sample sample, cv::Mat &image);
// draw the estimated gaze on the top left corner of the input image
// to show the relative position on the screen. In this case,
//the user can see both the input image and the projected gaze target on the screen.
//This function is mainly used for debugging.
void drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image);
// estimate and draw gaze point on the screen
void drawGazeOnScreen(opengaze::Sample sample, cv::Mat &image);
// show debug mode will show the gaze draw on the face
bool show_debug_;
//class instances
InputHandler input_handler_;
GazeEstimator gaze_estimator_;
// input camera id
int camera_id_;
// temporary variables to store the input path, output path, input type
boost::filesystem::path input_dir_;
InputHandler::InputType input_type_;
boost::filesystem::path output_dir_;
bool is_face_model_;
bool is_save_video_;
// path to save the personal calibration model
std::string per_model_save_path_;
};
}
#endif //OPEN_GAZE_H

View file

@ -0,0 +1,64 @@
#ifndef PERSONAL_CALIBRATOR_HPP
#define PERSONAL_CALIBRATOR_HPP
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
class PersonalCalibrator {
public:
PersonalCalibrator(int screen_width, int screen_height);
~PersonalCalibrator();
/**
* generate the random locations for calibration
* @param num_points number of points to generate
*/
void generatePoints(int num_points);
// get the show window ready, it should be full-screen
void initialWindow();
// show the next calibration point
bool showNextPoint();
// wait for 0.5 second to receive the confirmation (mouse click) from user
void confirmClicking();
/**
* generate a polynomial function for the personal calibration
* @param prediction prediction from gaze estimation method
* @param ground_truth calibration points locations on the screen
* @param order the order of polynomial function, 1 means the linear
*/
void generateModel(std::vector<cv::Point2f> prediction, std::vector<cv::Point2f> ground_truth, int order=1);
/**
* save the personal calibration model
* @param file_path path to save the model
*/
void saveModel(std::string file_path);
/**
* load the personal calibration model
* @param file_path path to load the model
*/
void loadModel(std::string file_path);
/**
* return current calibration point location on the screen
* @return location on the screen
*/
cv::Point2f getCurrentPoint() {return points_[index_point_];}
// function to calculate the polynomial function
void calibratePolynomial();
private:
// indicator if the user click the mouse or not
bool is_click_;
// number of points for personal calibration
int num_points_;
// index for the current calibration points
int index_point_;
// vector to store the generated calibration points
std::vector<cv::Point2i> points_;
int screen_width_, screen_height_, center_radius_; // monitor width and height in pixel
// personal model
cv::Mat model_matrix_;
};
#endif //PERSONAL_CALIBRATOR_HPP

60
install.sh Normal file
View file

@ -0,0 +1,60 @@
#!/bin/bash
set -e
set -o pipefail
if [ $# -ne 0 ]
then
echo "Usage: install.sh"
exit 1
fi
# Essential Dependencies
echo "Installing Essential dependencies..."
sudo apt-get -y update
sudo apt-get -y install build-essential
sudo apt-get -y install cmake
sudo apt-get -y install libopenblas-dev liblapack-dev
sudo apt-get -y install git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
sudo apt-get -y install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev
echo "Essential dependencies installed."
# OpenCV Dependency
echo "Downloading OpenCV..."
wget https://github.com/opencv/opencv/archive/3.4.0.zip
unzip 3.4.0.zip
cd opencv-3.4.0
mkdir -p build
cd build
echo "Installing OpenCV..."
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_CUDA=OFF -D BUILD_SHARED_LIBS=ON ..
make -j4
sudo make install
cd ../..
rm 3.4.0.zip
sudo rm -r opencv-3.4.0
echo "OpenCV installed."
# dlib dependecy
echo "Downloading dlib"
wget http://dlib.net/files/dlib-19.13.tar.bz2;
tar xf dlib-19.13.tar.bz2;
cd dlib-19.13;
mkdir -p build;
cd build;
echo "Installing dlib"
cmake ..;
cmake --build . --config Release;
sudo make install;
sudo ldconfig;
cd ../..;
rm -r dlib-19.13.tar.bz2
echo "dlib installed"
# Boost C++ Dependency
echo "Installing Boost..."
sudo apt-get install libboost-all-dev
echo "Boost installed."
# Dependency for caffe
sudo apt-get install protobuf glog gflags hdf5

BIN
pre-complile/opengaze.deb Normal file

Binary file not shown.

3
pre-complile/readme.txt Normal file
View file

@ -0,0 +1,3 @@
This is pre-complied package for OpenGaze, including OpenFace and Caffe library insides.
To use, you still need to install other dependencies, Nvidia drive, cuda and cudnn.
To install, just run `sudo dpkg -i build_1.0-1_amd64.deb`

273
src/face_detector.cpp Normal file
View file

@ -0,0 +1,273 @@
#include <iostream>
#include <tbb/tbb.h>
// if we use OpenFace
#if USE_OPENFACE
#include <LandmarkCoreIncludes.h> // from "OpenFace-master/lib/local/LandmarkDetector/include/"
#endif
#include "face_detector.hpp"
using namespace std;
using namespace cv;
vector<LandmarkDetector::FaceModelParameters> det_parameters_;
vector<LandmarkDetector::CLNF> clnf_models_;
namespace opengaze {
FaceDetector::FaceDetector() {
method_type_ = Method::OpenFace;
}
FaceDetector::~FaceDetector() {}
void FaceDetector::initialize(int number_users=5) {
string root_path = OPENFACE_DIR;
root_path = root_path + "/build/bin";
//string openface_root = OpenFace_ROOT_DIR;
// (currently) hard-coded setting
num_faces_max_ = number_users;
detection_resize_rate_ = 2.0; // resize the input image to detect face, crucial for speed
detection_skip_frames_ = 1;
nonoverlap_threshold_ = 0.5;
certainty_threshold_ = 0.0; // the smaller the better, 1 is the best, -1 is the worst
landmark_indices_[0] = 36; landmark_indices_[1] = 39; landmark_indices_[2] = 42;
landmark_indices_[3] = 45; landmark_indices_[4] = 48; landmark_indices_[5] = 54;
tracking_loss_limit_ = 10;
// initialize the tracking models
LandmarkDetector::FaceModelParameters det_parameter;
det_parameter.reinit_video_every = -1; // This is so that the model would not try re-initialising itself
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR;
det_parameter.model_location = root_path + "/model/main_clm_wild.txt";
det_parameter.haar_face_detector_location = root_path + "/classifiers/haarcascade_frontalface_alt.xml";// this line will be disable due to "curr_face_detector"
det_parameter.mtcnn_face_detector_location = root_path + "/model/mtcnn_detector/MTCNN_detector.txt";
det_parameter.use_face_template = true;
det_parameter.reinit_video_every = 5;
// det_parameter.quiet_mode = true; not avaliable fro OpenFace v2.1
// // For in the wild fitting these parameters are suitable
det_parameter.window_sizes_init = vector<int>(4);
det_parameter.window_sizes_init[0] = 15;
det_parameter.window_sizes_init[1] = 13;
det_parameter.window_sizes_init[2] = 11;
det_parameter.window_sizes_init[3] = 9;
det_parameter.sigma = 1.25;
det_parameter.reg_factor = 35;
det_parameter.weight_factor = 2.5;
det_parameter.num_optimisation_iteration = 10;
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
det_parameters_.push_back(det_parameter);
LandmarkDetector::CLNF clnf_model_ = LandmarkDetector::CLNF(det_parameter.model_location);
if (!clnf_model_.loaded_successfully){
cout << "ERROR: Could not load the landmark detector" << endl;
exit(-1);
}
clnf_model_.face_detector_HAAR.load(det_parameter.haar_face_detector_location);
clnf_model_.haar_face_detector_location = det_parameter.haar_face_detector_location;
clnf_model_.face_detector_MTCNN.Read(det_parameter.mtcnn_face_detector_location);
clnf_model_.mtcnn_face_detector_location = det_parameter.mtcnn_face_detector_location;
// If can't find MTCNN face detector, default to HOG one
if (det_parameter.curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && clnf_model_.face_detector_MTCNN.empty()){
cout << "INFO: defaulting to HOG-SVM face detector" << endl;
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
}
clnf_models_.reserve(num_faces_max_);
clnf_models_.push_back(clnf_model_);
active_models_.push_back(false);
for(int i=1; i<num_faces_max_; ++i)
{
clnf_models_.push_back(clnf_model_);
active_models_.push_back(false);
det_parameters_.push_back(det_parameter);
}
// variables
frame_counter_ = 0;
current_face_id_ = 1;
for(int i=0; i<num_faces_max_; ++i) face_ids_.push_back(0);
}
void FaceDetector::reset() {
// reset all status
frame_counter_ = 0;
current_face_id_ = 1;
for(unsigned int model = 0; model < clnf_models_.size(); ++model)
{
active_models_[model] = false;
face_ids_[model] = 0;
clnf_models_[model].Reset();
}
}
void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models, vector<cv::Rect_<float> >& face_detections){
// Go over the model and eliminate detections that are not informative (there already is a tracker there)
for (size_t model = 0; model < clnf_models.size(); ++model){
// See if the detections intersect
cv::Rect_<float> model_rect = clnf_models[model].GetBoundingBox();
for (int detection=face_detections.size()-1; detection >= 0; --detection)
{
double intersection_area = (model_rect & face_detections[detection]).area();
double union_area = model_rect.area() + face_detections[detection].area() - 2 * intersection_area;
// If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap
if (intersection_area / union_area > 0.5)
{
face_detections.erase(face_detections.begin() + detection);
}
}
}
}
double NonOverlapingDetection(const LandmarkDetector::CLNF &ref_model, const LandmarkDetector::CLNF &tgt_model)
{
Rect_<double> ref_rect = ref_model.GetBoundingBox();
Rect_<double> tgt_rect = tgt_model.GetBoundingBox();
double intersection_area = (ref_rect & tgt_rect).area();
double union_area = ref_rect.area() + tgt_rect.area() - 2 * intersection_area;
return intersection_area/union_area;
}
void FaceDetector::track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output) {
if(input_img.channels() < 3){
cout << "The input must be a color image!" <<endl;
exit(EXIT_FAILURE);
}
Mat_<uchar> grayscale_image;
cvtColor(input_img, grayscale_image, CV_BGR2GRAY);
bool all_models_active = true;
for(unsigned int model = 0; model < clnf_models_.size(); ++model)
{
if(!active_models_[model])
{
all_models_active = false;
break;
}
}
// Detect faces
// Get the detections (every Xth frame and when there are free models available for tracking)
std::vector<Rect_<float> > face_detections;
cv::Mat small_grayscale_image_;
if (frame_counter_ % detection_skip_frames_ == 0 && !all_models_active) {
// resized image for faster face detection
if (detection_resize_rate_ != 1) resize(grayscale_image, small_grayscale_image_,
Size(), 1.0/detection_resize_rate_, 1.0/detection_resize_rate_);
else small_grayscale_image_ = grayscale_image;
if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR){
vector<float> confidences;
LandmarkDetector::DetectFacesHOG(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HOG, confidences);
}
else if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR){
LandmarkDetector::DetectFaces(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HAAR);
}
else{
vector<float> confidences;
LandmarkDetector::DetectFacesMTCNN(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_MTCNN, confidences);
}
// resize the face deteciton back
if (detection_resize_rate_ != 1) {
for(auto& face_detection : face_detections) {
face_detection.x *= detection_resize_rate_;
face_detection.y *= detection_resize_rate_;
face_detection.width *= detection_resize_rate_;
face_detection.height *= detection_resize_rate_;
}
}
// Keep only non overlapping detections (also convert to a concurrent vector
NonOverlapingDetections(clnf_models_, face_detections);
}
vector< tbb::atomic<bool> > face_detections_used(face_detections.size());
// Go through every model and update the tracking
tbb::parallel_for(0, (int)clnf_models_.size(), [&](int model) {
//for (unsigned int model = 0; model < clnf_models_.size(); ++model) {
bool detection_success = false;
// If the current model has failed more than threshold, remove it
if (clnf_models_[model].failures_in_a_row > tracking_loss_limit_) {
active_models_[model] = false;
clnf_models_[model].Reset();
}
// If the model is inactive reactivate it with new detections
if (!active_models_[model]){
for (size_t detection_ind = 0; detection_ind < face_detections.size(); ++detection_ind)
{
// if it was not taken by another tracker take it (if it is false swap it to true and enter detection, this makes it parallel safe)
if (!face_detections_used[detection_ind].compare_and_swap(true, false)) {
// Reinitialise the model
clnf_models_[model].Reset();
// This ensures that a wider window is used for the initial landmark localisation
clnf_models_[model].detection_success = false;
LandmarkDetector::DetectLandmarksInVideo(input_img, face_detections[detection_ind], clnf_models_[model], det_parameters_[model], grayscale_image);
// This activates the model
active_models_[model] = true;
face_ids_[model] = current_face_id_;
current_face_id_++;
// break out of the loop as the tracker has been reinitialised
break;
}
}
}
else
{
// The actual facial landmark detection / tracking
detection_success = LandmarkDetector::DetectLandmarksInVideo(input_img, clnf_models_[model], det_parameters_[model], grayscale_image);
}
//}
});
// Go through every model and check the results
for(size_t model=0; model<clnf_models_.size(); ++model){
// Check if the alignment result is overlapping previous models
bool overlapping = false;
for(size_t model_ref=0; model_ref<model; ++model_ref){
double overlap_ratio = NonOverlapingDetection(clnf_models_[model_ref], clnf_models_[model]);
if(overlap_ratio > nonoverlap_threshold_) overlapping = true;
}
if(overlapping){
active_models_[model] = false;
face_ids_[model] = 0;
clnf_models_[model].Reset();
continue;
}
if(clnf_models_[model].detection_certainty < certainty_threshold_) continue;
Sample temp;
temp.face_data.certainty = clnf_models_[model].detection_certainty;
temp.face_data.face_id = face_ids_[model];
temp.face_data.face_bb.x = (int)clnf_models_[model].GetBoundingBox().x;
temp.face_data.face_bb.y = (int)clnf_models_[model].GetBoundingBox().y;
temp.face_data.face_bb.height = (int)clnf_models_[model].GetBoundingBox().height;
temp.face_data.face_bb.width = (int)clnf_models_[model].GetBoundingBox().width;
for(int p=0; p<6; p++){
int num_p = landmark_indices_[p];
temp.face_data.landmarks[p] = Point2d(
clnf_models_[model].detected_landmarks.at<float>(num_p,0),
clnf_models_[model].detected_landmarks.at<float>(num_p+68,0)
);
}
output.emplace_back(temp);
}
}
}

112
src/gaze_estimator.cpp Normal file
View file

@ -0,0 +1,112 @@
#include <iostream>
#include "gaze_estimator.hpp"
using namespace std;
using namespace cv;
namespace opengaze{
GazeEstimator::GazeEstimator() {
}
GazeEstimator::~GazeEstimator() {}
void GazeEstimator::setRootPath(std::string root_path) {
normalizer_.loadFaceModel(root_path);
}
void GazeEstimator::estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
for (int i=0; i< outputs.size(); ++i) {
// estimate head pose first, no matter what gaze estimation method, head pose is estimated here
normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
if (method_type_ == Method::MPIIGaze){
// if we use face model
if (input_type_ == InputType::face){
Mat face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
//outputs[i].face_patch_data.debug_img = face_patch;
Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(face_patch); // gaze estimates in normalization space
Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
gaze_3d.copyTo(outputs[i].gaze_data.gaze3d);
}
else if (input_type_ == InputType::eye) {
vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
// for left eye
Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(eye_iamges[0]);
Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].eye_data.leye_rot);
gaze_3d.copyTo(outputs[i].gaze_data.lgaze3d);
// for right eye
Mat flip_right;
flip(eye_iamges[0], flip_right, 1);
gaze_norm = gaze_predictor_.predictGazeMPIIGaze(flip_right); // for left right image input
gaze_norm.x *= -1.0;
gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
gaze_3d.copyTo(outputs[i].gaze_data.rgaze3d);
}
}
else if (method_type_ == Method::OpenFace) {
cout << "Please use gaze estimation method MPIIGaze." << endl;
exit(EXIT_FAILURE);
}
}
}
void GazeEstimator::getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
for (int i=0; i< outputs.size(); ++i) {
// estimate head pose first, no matter what gaze estimation method, head pose is estimated here
normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
if (method_type_ == Method::MPIIGaze){
// if we use face model
if (input_type_ == InputType::face){
outputs[i].face_patch_data.face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
}
else if (input_type_ == InputType::eye) {
vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
outputs[i].eye_data.leye_img = eye_iamges[0];
outputs[i].eye_data.reye_img = eye_iamges[1];
}
}
else if (method_type_ == Method::OpenFace) {
cout << "Please use method MPIIGaze for image patch extraction." << endl;
exit(EXIT_FAILURE);
}
}
}
void GazeEstimator::setMethod(Method input_method_type, const std::vector<std::string> arguments={}) {
method_type_ = input_method_type;
if (method_type_ == Method::MPIIGaze) {
gaze_predictor_.initiaMPIIGaze(arguments);
if (arguments.size() < 2)
input_type_ = InputType::face;
else {
if (arguments[2] == "face"){
input_type_ = InputType::face;
normalizer_.setParameters(1600, 1000, 224, 224);
}
else if (arguments[2] == "eye") {
input_type_ = InputType::eye;
normalizer_.setParameters(960, 600, 60, 36);
}
}
}
}
void GazeEstimator::setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist) {
camera_matrix_ = move(camera_matrix);
camera_dist_ = move(camera_dist_);
normalizer_.setCameraMatrix(camera_matrix_);
}
void GazeEstimator::initialFaceDetector(int number_users){
face_detector_.initialize(number_users);
face_detector_.setMethodType(FaceDetector::Method::OpenFace);
}
};

160
src/gaze_predictor.cpp Normal file
View file

@ -0,0 +1,160 @@
#include "gaze_predictor.hpp"
#include <string>
// caffe
#define USE_OPENCV 1;
#include <caffe/caffe.hpp>
#include <caffe/util/io.hpp>
#include <caffe/blob.hpp>
#include <caffe/layers/pose_data_layer.hpp>
#include <caffe/layers/memory_data_layer.hpp>
using namespace cv;
using namespace std;
using namespace caffe;
namespace opengaze {
caffe::Net<float> *p_net_;
GazePredictor::GazePredictor() {
}
GazePredictor::~GazePredictor() {
delete p_net_;
}
void GazePredictor::initiaMPIIGaze(const std::vector<std::string> arguments={}) {
p_net_ = nullptr;
string param_path = arguments[0];
string model_path = arguments[1];
int gpu_id = stoi(arguments[3]);
// Set GPU (or CPU)
/*caffe::Caffe::set_mode(caffe::Caffe::CPU);
cout << "Using CPU model" << endl;*/
caffe::Caffe::set_mode(caffe::Caffe::GPU);
cout << "Using GPU with id " << gpu_id << endl;
Caffe::SetDevice(gpu_id);
cout << "load caffe model parameters from " << param_path << endl;
// create CNN
p_net_ = new Net<float>(param_path, caffe::TEST);
cout << "load caffe model from " << model_path << endl;
// load pre-trained weights (binary proto)
p_net_->CopyTrainedLayersFrom(model_path);
// judge model type base on the paramater file name
size_t i = param_path.rfind("/", param_path.length());
string filename;
if (i != string::npos)
filename = param_path.substr(i+1, param_path.length() - i);
if (!filename.compare(string("lenet_test.prototxt")))
model_type_ = 1;
else if (!filename.compare(string("googlenet.prototxt")))
model_type_ = 2;
else if (!filename.compare(string("alexnet_eye.prototxt")))
model_type_ = 3;
else if (!filename.compare(string("alexnet_face.prototxt")))
model_type_ = 4; // the single face model
else if (!filename.compare(string("alexnet_face_448.prototxt")))
model_type_ = 4; // the single face model
else{
model_type_ = 0;
cout<<"Cannot define the type of model!"<<endl;
exit(EXIT_FAILURE);
}
}
// gaze estimation with single face input image and with MPIIGaze method
Point3f GazePredictor::predictGazeMPIIGaze(cv::Mat input_image) {
vector<Mat> img_vec;
img_vec.push_back(input_image);
Vec2f gaze_norm_2d;
Point3f gaze_norm_3d;
std::vector<int> labelVector;
labelVector.clear();
labelVector.push_back(1);
labelVector.push_back(1);
float loss = 0.0;
caffe::shared_ptr<caffe::MemoryDataLayer<float> > data_layer_;
data_layer_ = boost::static_pointer_cast<MemoryDataLayer<float> >(p_net_->layer_by_name("data"));
data_layer_->AddMatVector(img_vec, labelVector);
// run network
p_net_->ForwardPrefilled(&loss);
if (model_type_==1)
{
// get output layer "ip2"
float *temp = (float*)p_net_->blob_by_name("ip2")->cpu_data();
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
temp = nullptr;
}
else if (model_type_==2)// if it is googlenet
{
float *temp1 = (float*)p_net_->blob_by_name("loss1/classifier")->cpu_data();
float *temp2 = (float*)p_net_->blob_by_name("loss2/classifier")->cpu_data();
float *temp3 = (float*)p_net_->blob_by_name("loss3/classifier")->cpu_data();
// average the output of three output values
gaze_norm_2d.val[0] = (temp1[0]+temp2[0]+temp3[0]) / 3.0f;
gaze_norm_2d.val[1] = (temp1[1]+temp2[1]+temp3[1]) / 3.0f;
temp1 = nullptr;
temp2 = nullptr;
temp3 = nullptr;
}
else if (model_type_==3)// if it is alexnet
{
float *temp;
temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
if (temp == NULL)
temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
if (temp == NULL) {
cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
exit(EXIT_FAILURE);
}
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
temp = NULL;
}
else if (model_type_==4)// if it is alexnet
{
float *temp;
temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
if (temp == NULL)
temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
if (temp == NULL) {
cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
exit(EXIT_FAILURE);
}
// copy estimated gaze vector
gaze_norm_2d.val[0] = temp[0];
gaze_norm_2d.val[1] = temp[1];
//// get the feature out
//temp = (float*)p_net_->blob_by_name("fc6_gaze")->cpu_data();
//for (int num_f=0; num_f<4096; ++num_f)
//{
// feature[num_f] = temp[num_f];
//}
temp = NULL;
}
float theta = gaze_norm_2d.val[0];
float phi = gaze_norm_2d.val[1];
gaze_norm_3d.x = (-1.0f)*cos(theta)*sin(phi);
gaze_norm_3d.y = (-1.0f)*sin(theta);
gaze_norm_3d.z = (-1.0f)*cos(theta)*cos(phi);
return gaze_norm_3d;
}
}

185
src/input_handler.cpp Normal file
View file

@ -0,0 +1,185 @@
#include "input_handler.hpp"
#include <iostream>
#include <cstdlib>
using namespace cv;
using namespace std;
namespace opengaze {
#if WIN32
#include <windows.h>
#else
#include <X11/Xlib.h>
#endif
void InputHandler::getScreenResolution(int &width, int &height) {
#if WIN32
width = (int) GetSystemMetrics(SM_CXSCREEN);
height = (int) GetSystemMetrics(SM_CYSCREEN);
#else
Display* disp = XOpenDisplay(NULL);
Screen* scrn = DefaultScreenOfDisplay(disp);
width = scrn->width;
height = scrn->height;
#endif
}
InputHandler::InputHandler(){
input_type_ = InputType::Camera;// defualt input type
camera_id_ = 0;
getScreenResolution(screen_width_, screen_height_);
screen_width_ = screen_width_;
}
InputHandler::~InputHandler(){}
void InputHandler::initialize()
{
if (input_type_ == InputType::Camera){
cap_.open(camera_id_);
if(!cap_.isOpened()) { // open Camera
cout << "Could not open Camera with id " << camera_id_ << endl;
std::exit(EXIT_FAILURE);
}
setFrameSize(1280, 720); // 800*600, 1280*720, 1920*1080,
}
else if (input_type_ == InputType::Video){
cap_.open(input_file_video_name_);
if(!cap_.isOpened()) { // open Camera
cout << "Error: Could not open video file " << input_file_video_name_ << endl;
std::exit(EXIT_FAILURE);
}
}
else if (input_type_ == InputType::Directory) {
if (!boost::filesystem::is_directory(input_path_)){
cout << "Error: The input must be a directory, but it is " << input_path_ << endl;
std::exit(EXIT_FAILURE);
}
current_itr_ = boost::filesystem::directory_iterator(input_path_);
}
else if (input_type_ == InputType::Memory) {}
is_reach_end_ = false;
}
void InputHandler::setFrameSize(int frame_width, int frame_height){
cap_.set(cv::CAP_PROP_FRAME_HEIGHT, frame_height);//720 1080
cap_.set(cv::CAP_PROP_FRAME_WIDTH, frame_width);//1280 1980
double dWidth = cap_.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap_.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Input frame size is : " << dWidth << " x " << dHeight << endl;
}
Mat InputHandler::getNextSample() {
Mat frame;
if (input_type_ == InputType::Camera) cap_ >> frame;
else if (input_type_ == InputType::Video) {
cap_ >> frame;
if (frame.empty()) // we reach the end of video
is_reach_end_ = true;
}
else if (input_type_ == InputType::Directory) {
boost::filesystem::path file_path = current_itr_->path();
if (file_path.extension() != ".jpg" && file_path.extension() != ".png" && file_path.extension() != ".bmp"){
cout << "Error: The input file is not image file with extension of jpg, png or bmp!" << endl;
cout << "The input file name is: " << file_path.string() << endl;
std::exit(EXIT_FAILURE);
}
cout << "process image " << file_path << endl;
frame = imread(file_path.string());
if (current_itr_ == boost::filesystem::directory_iterator())
is_reach_end_ = true;
}
else if (input_type_ == InputType::Memory) {}
return frame;
}
bool InputHandler::closeInput() {
if (input_type_ == InputType::Camera || input_type_ == InputType::Video){
cap_.release();
is_reach_end_ = true;
}
return true;
}
void InputHandler::setInput(std::string input_path) {
if (input_type_ == InputType::Directory){
input_path_ = move(input_path);
}
else if (input_type_ == InputType::Video){
input_file_video_name_ = move(input_path);
}
}
void InputHandler::readScreenConfiguration(string calib_file) {
FileStorage fs_disp(calib_file, FileStorage::READ);
fs_disp["monitor_W"] >> monitor_W_;
fs_disp["monitor_H"] >> monitor_H_;
fs_disp["monitor_R"] >> monitor_R_;
fs_disp["monitor_T"] >> monitor_T_;
// compute monitor plane
Vec3f corners[4];
corners[0] = Vec3f(0.0, 0.0, 0.0);
corners[1] = Vec3f(monitor_W_, 0.0, 0.0);
corners[2] = Vec3f(0.0, monitor_H_, 0.0);
corners[3] = Vec3f(monitor_W_, monitor_H_, 0.0);
for(int i=0; i<4; i++){
Mat corners_cam = monitor_R_ * Mat(corners[i]) + monitor_T_;
corners_cam.copyTo(monitor_corners_[i]);
}
Vec3f normal = Vec3f(0.0, 0.0, 1.0); // normal direction
monitor_normal_ = monitor_R_ * Mat(normal);
monitor_normal_.convertTo(monitor_normal_, CV_32F);
}
void InputHandler::readCameraConfiguration(string calib_file){
cout << endl << "Reading calibration information from : " << calib_file << endl;
FileStorage fs;
fs.open(calib_file, FileStorage::READ);
fs["camera_matrix"] >> camera_matrix_;
fs["dist_coeffs"] >> camera_distortion_;
fs.release();
}
void InputHandler::projectToDisplay(std::vector<opengaze::Sample> &inputs, bool is_face_model) {
for(auto & sample : inputs) {
if (is_face_model) {
Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
sample.gaze_data.gaze2d = mapToDisplay(face_center, sample.gaze_data.gaze3d);
}
else {
Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
sample.gaze_data.lgaze2d = mapToDisplay(leye_pose, sample.gaze_data.lgaze3d);
sample.gaze_data.rgaze2d = mapToDisplay(reye_pose, sample.gaze_data.rgaze3d);
float gaze_x = (sample.gaze_data.lgaze2d.x + sample.gaze_data.rgaze2d.x) / 2.0f;
float gaze_y = (sample.gaze_data.lgaze2d.y + sample.gaze_data.rgaze2d.y) / 2.0f;
sample.gaze_data.gaze2d.x = gaze_x;
sample.gaze_data.gaze2d.y = gaze_y;
}
}
}
cv::Point2f InputHandler::mapToDisplay(Vec3f origin, Vec3f gaze_vec) {
Point2f gaze_on_screen;
// compute intersection
float gaze_len = (float)(monitor_normal_.dot(Mat(monitor_corners_[0]-origin))/monitor_normal_.dot(Mat(gaze_vec)));
Vec3f gaze_pos_cam = origin + gaze_len * gaze_vec;
// convert to monitor coodinate system
Mat gaze_pos_ = monitor_R_.inv() * (Mat(gaze_pos_cam) - monitor_T_);
Vec3f gaze_pos_3d;
gaze_pos_.copyTo(gaze_pos_3d);
gaze_on_screen.x = gaze_pos_3d.val[0] / monitor_W_;
gaze_on_screen.y = gaze_pos_3d.val[1] / monitor_H_;
return gaze_on_screen;
}
}

184
src/normalizer.cpp Normal file
View file

@ -0,0 +1,184 @@
#include "normalizer.hpp"
using namespace cv;
using namespace std;
namespace opengaze {
Normalizer::Normalizer() {
// parameters for data normalization
focal_norm_ = 1600;
distance_norm_ = 1000; // 600 500 1000
roiSize_norm_ = cv::Size(224, 224); // 224 448
cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
}
Normalizer::~Normalizer() {}
void Normalizer::setParameters(int focal_length, int distance, int img_w, int img_h){
// parameters for data normalization
focal_norm_ = focal_length;
distance_norm_ = distance; // 600 500 1000
roiSize_norm_ = cv::Size(img_w, img_h); // 224 448
cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
}
// convert vector from normalization space to camera coordinate system
cv::Mat Normalizer::cvtToCamera(cv::Point3f input, const Mat cnv_mat) {
// convert to the original camera coordinate system
Vec3f gaze_v(input.x, input.y, input.z);
// apply de-normalization
Mat gaze_v_cam = cnv_mat.inv() * Mat(gaze_v);
gaze_v_cam = gaze_v_cam / norm(gaze_v_cam);
return gaze_v_cam;
}
cv::Mat Normalizer::normalizeFace(Mat input_image, opengaze::Sample &sample) {
// get the face center in 3D space
Mat HR;
cv::Rodrigues(sample.face_patch_data.head_r, HR);
Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
Mat Fc;
add(HR*face_model_mat_, HT, Fc);
float distance = (float)norm(sample.face_patch_data.face_center); // original distance
float z_scale = distance_norm_ / distance; // scaling factor
cv::Mat scaleMat;
scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
scaleMat.convertTo(scaleMat, CV_32F);
// get the look_at matrix
Mat hRx = HR.col(0);
Mat forward = sample.face_patch_data.face_center /distance;
Mat down = forward.cross(hRx);
down = down / norm(down);
Mat right = down.cross(forward);
right = right / norm(right);
// rotation matrix
sample.face_patch_data.face_rot = Mat(3, 3, CV_32F);
right.copyTo(sample.face_patch_data.face_rot.col(0));
down.copyTo(sample.face_patch_data.face_rot.col(1));
forward.copyTo(sample.face_patch_data.face_rot.col(2));
sample.face_patch_data.face_rot = sample.face_patch_data.face_rot.t(); // there is no scaling
sample.face_patch_data.face_rot.convertTo(sample.face_patch_data.face_rot, CV_32F);
Mat warpMat = cam_norm_ * (scaleMat * sample.face_patch_data.face_rot) * camera_matrix_.inv();// transformation matrix
// crop image and copy the equalized image
Mat face_patch;
warpPerspective(input_image, face_patch, warpMat, roiSize_norm_);
return face_patch;
}
vector<cv::Mat> Normalizer::normalizeEyes(cv::Mat input_image, Sample &sample){
vector<cv::Mat> eye_images;
Mat img_gray;
cvtColor(input_image, img_gray, CV_BGR2GRAY);
Mat eye_center;
Mat* eye_rot;
for (int i=0; i<2; ++i) {
if (i==0){
eye_center = sample.eye_data.leye_pos;
eye_rot = &sample.eye_data.leye_rot;
}
else {
eye_center = sample.eye_data.reye_pos;
eye_rot = &sample.eye_data.reye_rot;
}
float distance = (float)norm(eye_center);
float z_scale = distance_norm_ / distance;
Mat scaleMat;
scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
scaleMat.convertTo(scaleMat, CV_32F);
// get the look_at matrix
Mat HR;
cv::Rodrigues(sample.face_patch_data.head_r, HR);
Mat hRx = HR.col(0);
Mat forward = eye_center/distance;
Mat down = forward.cross(hRx);
down = down / norm(down);
Mat right = down.cross(forward);
right = right / norm(right);
// rotation matrix
*eye_rot = Mat(3, 3, CV_32F);
right.copyTo(eye_rot->col(0));
down.copyTo(eye_rot->col(1));
forward.copyTo(eye_rot->col(2));
*eye_rot = eye_rot->t(); // there is no scaling
Mat warpMat = cam_norm_ * (scaleMat * *eye_rot) * camera_matrix_.inv();// transformation matrix
// crop image and copy the equalized image
Mat eye_patch, eye_patch_equal;
warpPerspective(img_gray, eye_patch, warpMat, roiSize_norm_);
equalizeHist(eye_patch, eye_patch_equal);
eye_images.push_back(eye_patch_equal);
}
eye_rot = nullptr;
return eye_images;
}
void Normalizer::loadFaceModel(std::string path) {
string face_model_file_path = path + "/content/model/face_model.yml";
//
cout << endl << "Loading 3D face model for head pose estimation from : " << face_model_file_path << endl;
FileStorage fs;
if (!fs.open(face_model_file_path, FileStorage::READ)) {
cout << "Cannot load the 3D face model!" << endl;
exit(EXIT_FAILURE);
}
fs["face_model"] >> face_model_mat_;
for(int p=0; p<6; ++p)
face_model_.emplace_back(Point3d(face_model_mat_.at<float>(0,p),
face_model_mat_.at<float>(1,p),
face_model_mat_.at<float>(2,p)));
fs.release();
}
// estimate head pose via model fitting
void Normalizer::estimateHeadPose(const Point2f *landmarks, opengaze::Sample &sample) {
Mat zero_dist = Mat::zeros(1, 5, CV_64F);
vector<Point2d> landmarks_orig(landmarks,
landmarks + 6);
cv::Mat head_r, head_t;
camera_matrix_.convertTo(camera_matrix_, CV_64F); // input must be double type
solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, false, SOLVEPNP_DLS);
solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, true);
head_r.convertTo(sample.face_patch_data.head_r, CV_32F);
head_t.convertTo(sample.face_patch_data.head_t, CV_32F);
camera_matrix_.convertTo(camera_matrix_, CV_32F);
// get the face center in 3D space
Mat HR;
cv::Rodrigues(sample.face_patch_data.head_r, HR);
Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
Mat Fc;
add(HR*face_model_mat_, HT, Fc);
Mat face_center = (Fc.col(0) + Fc.col(1) + Fc.col(2) + Fc.col(3) + Fc.col(4) + Fc.col(5)) / 6.0; // face center
face_center.copyTo(sample.face_patch_data.face_center); // copy to output
sample.face_patch_data.face_center.convertTo(sample.face_patch_data.face_center, CV_32F);
Mat le = 0.5*(Fc.col(2) + Fc.col(3)); // left eye
le.copyTo(sample.eye_data.leye_pos);
sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
Mat re = 0.5*(Fc.col(0) + Fc.col(1)); // right eye
re.copyTo(sample.eye_data.reye_pos);
sample.eye_data.reye_pos.convertTo(sample.eye_data.reye_pos, CV_32F);
}
void Normalizer::setCameraMatrix(cv::Mat input) {
camera_matrix_ = input;
camera_matrix_.convertTo(camera_matrix_, CV_32F);
}
}

502
src/opengaze.cpp Normal file
View file

@ -0,0 +1,502 @@
#include "opengaze.hpp"
#include <iostream>
#include <time.h>
using namespace std;
using namespace cv;
namespace opengaze {
double clockToMilliseconds(clock_t ticks){
// units/(units/time) => time (seconds) * 1000 = milliseconds
return (ticks/(double)CLOCKS_PER_SEC)*1000.0;
}
OpenGaze::OpenGaze(int argc, char** argv){
namespace fs = boost::filesystem;
namespace po = boost::program_options;
// default value of parameters
camera_id_ = 0;
input_type_ = InputHandler::InputType::Camera;
is_face_model_ = true;
string gaze_method;
string gpu_id;
string temp;
int number_user;
fs::path calib_camera, calib_screen, cnn_param_path, cnn_model_path;
// parse command line options for input/output paths
po::options_description command_line("Command line options");
command_line.add_options()
("root_dir,r", po::value<string>(), "configuration file")
("input_type,t", po::value<string>(), "input type (camera, video file, directory)")
("gaze_method,g", po::value<string>(), "gaze estimation method, could be MPIIGaze or OpenFace")
("input,i", po::value<string>(), "parameter for input")
("output,o", po::value<string>(), "output directory")
("calib_camera", po::value<string>(), "camera calibration file")
("calib_screen", po::value<string>(), "camera-screen calibration file")
("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
("debug,d", "show debug output")
("face_model,f", "to use face model or not")
("save_video,s", "save output visualization or not")
("number_user,n", "the maximum number of users in the input image")
;
cout << "Parsing command line options..." << endl;
po::variables_map vm_command;
po::store(po::parse_command_line(argc, argv, command_line), vm_command);
po::notify(vm_command);
// parse config file for data paths
po::options_description config_file("Config file options");
config_file.add_options()
("root_dir,r", po::value<string>(), "configuration file")
("input_type, t", po::value<string>(), "input type (camera, video file, directory)")
("input, i", po::value<string>(), "parameter for input")
("output,o", po::value<string>(), "output directory")
("cnn_param_path", po::value<string>(), "Caffe prototxt path")
("cnn_model_path", po::value<string>(), "Caffe model path")
("calib_camera", po::value<string>(), "camera calibration file")
("calib_screen", po::value<string>(), "camera-screen calibration file")
("gaze_method", po::value<string>(), "gaze estimation method, could be cnn or openface")
("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
("face_model", po::value<bool>(), "face model or not")
("save_video", po::value<bool>(), "save output visualization or not")
("number_user", po::value<string>(), "the maximum number of users in the input image")
;
fs::path root_dir, config_path;
if(vm_command.count("root_dir")) root_dir = vm_command["root_dir"].as<string>();
else {
root_dir = OPENGAZE_CON_DIR;
cout << "No root directory is found, default value " << root_dir << " will be use" << endl;
}
config_path = root_dir / "default.cfg";
cout << "Reading config from \"" << config_path.string() << "\""<< endl;
if(!fs::exists(config_path)){
cout << "Config file does not exist" << endl;
exit(EXIT_FAILURE);
}
ifstream settings_file(config_path.string());
po::variables_map vm_config;
po::store(po::parse_config_file(settings_file , config_file), vm_config);
po::notify(vm_config);
if(vm_command.count("gpu_id")) gpu_id = vm_command["gpu_id"].as<string>();
else if (vm_config.count("gpu_id")) gpu_id = vm_config["gpu_id"].as<string>();
else gpu_id = "0";
// CNN paramters
if(vm_command.count("cnn_param_path")) cnn_param_path = vm_command["cnn_param_path"].as<string>();
else if (vm_config.count("cnn_param_path")) cnn_param_path = vm_config["cnn_param_path"].as<string>();
else cnn_param_path = root_dir / "content/caffeModel/alexnet_face.prototxt";
if(vm_command.count("cnn_model_path")) cnn_model_path = vm_command["cnn_model_path"].as<string>();
else if (vm_config.count("cnn_model_path")) cnn_model_path = vm_config["cnn_model_path"].as<string>();
else cnn_model_path = root_dir / "content/caffeModel/alexnet_face.caffemodel";
// check input requirements
if(vm_command.count("gaze_method")) gaze_method = vm_command["gaze_method"].as<string>();
else if (vm_config.count("gaze_method")) gaze_method = vm_config["gaze_method"].as<string>();
else gaze_method = "MPIIGaze";
if(vm_command.count("calib_screen")) calib_screen = vm_command["calib_screen"].as<string>();
else if (vm_config.count("calib_screen")) calib_screen = vm_config["calib_screen"].as<string>();
else calib_screen = root_dir / "content/calib/monitor_laptop.yml";
if(vm_command.count("calib_camera")) calib_camera = vm_command["calib_camera"].as<string>();
else if (vm_config.count("calib_camera")) calib_camera = vm_config["calib_camera"].as<string>();
else calib_camera = root_dir / "content/calib/calibration.yml";
// read calibration file
if(!fs::exists(calib_camera)){
cout << "Camera calibration file does not exist: " << calib_camera <<endl;
exit(EXIT_FAILURE);
}
else input_handler_.readCameraConfiguration(calib_camera.string());
if(!fs::exists(calib_screen)){
cout << "Camera-screen calibration file does not exist: " << calib_screen << endl;
exit(EXIT_FAILURE);
}
else input_handler_.readScreenConfiguration(calib_screen.string());
if(vm_command.count("input_type")) temp = vm_command["input_type"].as<string>();
else if (vm_config.count("input_type")) temp = vm_config["input_type"].as<string>();
else temp = "";
if (temp == "camera") {input_type_ = InputHandler::InputType::Camera;}
else if (temp == "video") {input_type_ = InputHandler::InputType::Video;}
else if (temp == "directory") {input_type_ = InputHandler::InputType::Directory;}
else cout<<"No input type specified, default value (camera) will be use" << endl;
if (vm_command.count("input")) temp = vm_command["input"].as<string>();
else if (vm_config.count("input")) temp = vm_config["input"].as<string>();
else temp = "0";
if (input_type_ == InputHandler::InputType::Camera) camera_id_ = stoi(temp);
else if (input_type_ == InputHandler::InputType::Video || input_type_ == InputHandler::InputType::Directory) input_dir_ = temp;
else cout<<"No input parameter specified, default value will be use" << endl;
if(vm_command.count("face_model")) is_face_model_ = true;
else if(vm_config.count("face_model")) is_face_model_ = vm_config["face_model"].as<bool>();
else is_face_model_ = true;
if(vm_command.count("save_video")) is_save_video_ = true;
else if(vm_config.count("save_video")) is_save_video_ = vm_config["save_video"].as<bool>();
else is_save_video_ = false;
if(vm_command.count("debug")) show_debug_ = true;
else if(vm_config.count("debug")) show_debug_ = vm_config["debug"].as<bool>();
else show_debug_ = false;
if(vm_command.count("output")) output_dir_ = vm_command["output"].as<string>();
else if(vm_config.count("output")) output_dir_ = vm_config["output"].as<string>();
else {
if (input_type_ == InputHandler::InputType::Video) output_dir_ = input_dir_.parent_path();
else if (input_type_ == InputHandler::InputType::Directory) output_dir_ = input_dir_.parent_path();
else if (input_type_ == InputHandler::InputType::Camera)
output_dir_ = root_dir;
}
string face_detector_root_path;
if(vm_command.count("openface_path")) face_detector_root_path = vm_command["openface_path"].as<string>();
else if(vm_config.count("openface_path")) face_detector_root_path = vm_config["openface_path"].as<string>();
else cout<< "No face detector root specified, default detector will be use" << endl;
if(vm_command.count("per_model_save_path")) per_model_save_path_ = vm_command["per_model_save_path"].as<string>();
else if (vm_config.count("per_model_save_path")) per_model_save_path_ = vm_config["per_model_save_path"].as<string>();
else per_model_save_path_ = root_dir.string() + "/content/calib/user0.txt";
if(vm_command.count("number_user")) temp = vm_command["number_user"].as<string>();
else if (vm_config.count("number_user")) temp = vm_config["number_user"].as<string>();
else temp = "5";
number_user = stoi(temp);
// initial class instance
if (input_type_ == InputHandler::InputType::Camera){ // Camera as input
input_handler_.setInputType(InputHandler::InputType::Camera);// set input type
input_handler_.setInput(camera_id_); // set Camera id
}
else if (input_type_ == InputHandler::InputType::Video) {
input_handler_.setInputType(InputHandler::InputType::Video);// set input type
input_handler_.setInput(input_dir_.string()); // set camera file
}
else if (input_type_ == InputHandler::InputType::Directory){
input_handler_.setInputType(InputHandler::InputType::Directory);
}
// initialize other classes
gaze_estimator_.setCameraParameters(input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.setRootPath(root_dir.string());
gaze_estimator_.initialFaceDetector(number_user);
vector<std::string> arguments;
if (gaze_method == "MPIIGaze") {
arguments.push_back(cnn_param_path.string());
arguments.push_back(cnn_model_path.string());
if (is_face_model_)
arguments.emplace_back("face");
else
arguments.emplace_back("eye");
arguments.push_back(gpu_id);
gaze_estimator_.setMethod(GazeEstimator::Method::MPIIGaze, arguments);
}
else if (gaze_method == "OpenFace"){
//gaze_estimator_.setMethod(GazeEstimator::Method::OpenFace, arguments);
cout << "OpenFace gaze estimation is current not support" << endl;
exit(EXIT_FAILURE);
}
else {
cout << "The method setting is not right! Options are MPIIGaze or OpenFace!" << endl;
exit(EXIT_FAILURE);
}
}
OpenGaze::~OpenGaze() {
input_handler_.closeInput();
}
// do gaze estimation with camera as input
void OpenGaze::runGazeVisualization() {
input_handler_.initialize();
namedWindow("Gaze");
int key;
Mat input_image;
vector<Sample> output;
cv::VideoWriter m_writer;
if (is_save_video_){
boost::filesystem::path save_video_file;
save_video_file = output_dir_ / (input_dir_.stem().string() + "_gaze_video.avi");
m_writer.open(save_video_file.string(), CV_FOURCC('M','J','P','G'), 25,
Size(input_handler_.getFrameWidth(),input_handler_.getFrameHeight()), true);
cout << "Saving video to " << save_video_file << endl;
}
// construct saving file
ofstream output_stream;
boost::filesystem::path output_file_name = output_dir_ / (input_dir_.stem().string() + "_gaze_output.txt");
output_stream.open(output_file_name.string());
cout << "Created output file: " << output_file_name.string() << endl;
// for fps calculation
double fps_tracker = -1.0;
double t_start = 0;
double t_end = 0;
unsigned int frame_count = 0;
while(true){// loop all the sample or read frame from Video
frame_count++;
t_start = t_end;
output.clear();
input_image = input_handler_.getNextSample();// get input image
if(input_handler_.isReachEnd()){ // check if all sample are processed
cout<<"Processed all the samples."<<endl;
break;
}
Mat undist_img;
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
// get the fps values
t_end = cv::getTickCount();
fps_tracker = 1.0 / (double(t_end - t_start) / cv::getTickFrequency());
// save output
for(auto & sample : output) {
output_stream << frame_count << ",";
output_stream << sample.face_data.face_id << ",";
output_stream << sample.face_data.certainty << ",";
output_stream << sample.face_patch_data.face_center.at<float>(0) << ",";
output_stream << sample.face_patch_data.face_center.at<float>(1) << ",";
output_stream << sample.face_patch_data.face_center.at<float>(2) << ",";
output_stream << sample.gaze_data.gaze2d.x << ",";
output_stream << sample.gaze_data.gaze2d.y << ",";
output_stream << sample.eye_data.leye_pos.at<float>(0) << ",";
output_stream << sample.eye_data.leye_pos.at<float>(1) << ",";
output_stream << sample.eye_data.leye_pos.at<float>(2) << ",";
output_stream << sample.eye_data.reye_pos.at<float>(0) << ",";
output_stream << sample.eye_data.reye_pos.at<float>(1) << ",";
output_stream << sample.eye_data.reye_pos.at<float>(2) << endl;
}
if (is_save_video_ || show_debug_) {
//////// visualization //////////////////////////////////////////////////
// draw results
for(const auto & sample : output){
//drawLandmarks(sample, undist_img); // draw face landmarks
drawGazeOnFace(sample, undist_img); // draw gaze ray on face image
//drawGazeOnSimScreen(sample, undist_img); // draw screen target
}
if (show_debug_) {
// show fps
char fpsC[255];
std::sprintf(fpsC, "%02f", fps_tracker);
string fpsSt("FPS: ");
fpsSt += fpsC;
cv::putText(undist_img, fpsSt, cv::Point(100, 100), CV_FONT_HERSHEY_SIMPLEX, 1, CV_RGB(255, 0, 0), 2);
// show the image
imshow("Gaze", undist_img);
key = cv::waitKey(1);
if (key==27) exit(EXIT_SUCCESS); // press ESC to exit
}
if (is_save_video_) {
if (is_save_video_)
m_writer << undist_img;
}
}
}
if (is_save_video_)
m_writer.release();
}
void OpenGaze::runDataExtraction() {
assert(input_handler_.getInputType() == InputHandler::InputType::Directory);// Here we just accept the directory folder
input_handler_.initialize();
vector<Sample> output;
Mat input_image;
while(true){// loop all the sample or read frame from Video
output.clear();
input_image = input_handler_.getNextSample();// get input image
if(input_handler_.isReachEnd()){ // check if all sample are processed
cout << "Processed all the samples." << endl;
break;
}
Mat undist_img;
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.getImagePatch(undist_img, output); // extract the face image
// save the output
for (int i=0; i<output.size(); ++i) {
string save_file_name = output_dir_.stem().string() + "/img_" + input_handler_.getFileName() + "_" +to_string(i)+".jpg";
cv::imwrite(save_file_name, output[i].face_patch_data.face_patch);
}
}
}
void OpenGaze::runGazeOnScreen() {
input_handler_.initialize();
int key;
Mat input_image, undist_img, show_img;
vector<Sample> output;
cv::namedWindow("screen", CV_WINDOW_NORMAL);
cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
while(true){// loop all the sample or read frame from Video
output.clear();
if(input_handler_.isReachEnd()){ // check if all sample are processed
cout<<"Processed all the samples."<<endl;
break;
}
input_image = input_handler_.getNextSample();// get input image
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
// save output
for(auto & sample : output) {
int loc_x = (int)(sample.gaze_data.gaze2d.x * input_handler_.getScreenWidth());
int loc_y = (int)(sample.gaze_data.gaze2d.y * input_handler_.getScreenHeight());
circle(show_img, cv::Point(loc_x, loc_y), 10, CV_RGB(255,255,255), -1);
}
imshow("screen", show_img);
cv::Mat save_img;
cv::resize(show_img, save_img, cv::Size(1280, 720));
key = cv::waitKey(1);
show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
if (key==27) break; // press ESC to exit
}
cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
cv::destroyWindow("screen");
}
void OpenGaze::runPersonalCalibration(int num_calibration_point) {
if (input_handler_.getInputType() != InputHandler::InputType::Camera){ // personal calibration has to be done with camera
cout << "Error: the input type must be camera for personal calibration!" << endl;
exit(EXIT_FAILURE);
}
Mat input_image, undist_img;
input_handler_.initialize();
PersonalCalibrator m_calibrator(input_handler_.getScreenWidth(), input_handler_.getScreenHeight());
m_calibrator.generatePoints(num_calibration_point);
m_calibrator.initialWindow(); // show start windows
vector<cv::Point2f> pred, gt; // prediction and ground-truth
for (int i=0; i<num_calibration_point; ++i){
if (m_calibrator.showNextPoint()) {// wait for clicking
vector<Sample> output;
input_image = input_handler_.getNextSample(); // get the sample when user clicking
undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);// convert to 2D projection
m_calibrator.confirmClicking(); // give feedback to user that they successfully did calibration
pred.emplace_back(output[0].gaze_data.gaze2d);
gt.emplace_back(cv::Point2f((m_calibrator.getCurrentPoint().x/(float)input_handler_.getScreenWidth()),
(m_calibrator.getCurrentPoint().y/(float)input_handler_.getScreenHeight())));
}
else
break; // if user press ESC button, we break
}
if (pred.size() > 0){
m_calibrator.generateModel(pred, gt, 1); // get the mapping model
string per_model_save_path_ = output_dir_.stem().string() + "/personal_gaze_model.yml";
m_calibrator.saveModel(per_model_save_path_);
}
}
void OpenGaze::drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image) {
static const int dW = 640;
static const int dH = 360;
Mat debug_disp = Mat::zeros(Size(dW, dH), CV_8UC3);
Point2f g_s;
g_s.x = dW*sample.gaze_data.gaze2d.x;
g_s.y = dH*sample.gaze_data.gaze2d.y;
circle(debug_disp, g_s, 10, CV_RGB(255,0,0), -1);
debug_disp.copyTo(image(Rect(0, 0, dW, dH)));
}
void OpenGaze::drawGazeOnFace(opengaze::Sample sample, cv::Mat &image) {
// draw gaze on the face
if (gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
&& gaze_estimator_.input_type_ == GazeEstimator::InputType::face) {
static const float gaze_length = 300.0;
Mat zero = Mat::zeros(1, 3, CV_32F);
Mat rvec, tvec;
sample.face_patch_data.head_r.convertTo(rvec, CV_32F);
sample.face_patch_data.head_t.convertTo(tvec, CV_32F);
vector<Point3f> cam_points;
Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
cam_points.emplace_back(face_center);
cam_points.emplace_back(face_center + gaze_length * sample.gaze_data.gaze3d);
vector<Point2f> img_points;
projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); // gaze ray
circle(image, img_points[0], 5, CV_RGB(255,0,0), -1); // staring point
circle(image, img_points[1], 5, CV_RGB(255,0,0), -1); // end point
}
else if ((gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
&& gaze_estimator_.input_type_ == GazeEstimator::InputType::eye)
|| gaze_estimator_.method_type_ == GazeEstimator::Method::OpenFace) {
int gaze_length = 300;
Mat zero = Mat::zeros(1, 3, CV_32F);
vector<Point3f> cam_points;
sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
cam_points.emplace_back(leye_pose);
cam_points.emplace_back(leye_pose + gaze_length*sample.gaze_data.lgaze3d);
Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
cam_points.emplace_back(reye_pose);
cam_points.emplace_back(reye_pose + gaze_length*sample.gaze_data.rgaze3d);
vector<Point2f> img_points;
projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5);
line(image, img_points[2], img_points[3], CV_RGB(255,0,0), 5);
circle(image, img_points[1], 3, CV_RGB(255,0,0), -1);
circle(image, img_points[3], 3, CV_RGB(255,0,0), -1);
}
}
void OpenGaze::drawLandmarks(opengaze::Sample sample, cv::Mat &image) {
cv::Rect_<int> face_bb = sample.face_data.face_bb;
rectangle(image, cv::Point(face_bb.x, face_bb.y),
cv::Point(face_bb.x+face_bb.width,face_bb.y+face_bb.height), CV_RGB(0,255,0), 5);
for(int p=0; p<6; ++p)
circle(image, sample.face_data.landmarks[p], 5, CV_RGB(0,255,0), -1);
}
}

145
src/personal_calibrator.cpp Normal file
View file

@ -0,0 +1,145 @@
#include "personal_calibrator.hpp"
using namespace cv;
using namespace std;
void CallBackFunc(int event, int x, int y, int flags, void* is_click) {
if (event == EVENT_LBUTTONDOWN){
bool* temp = (bool*)is_click;
*temp = true;
}
}
PersonalCalibrator::PersonalCalibrator (int screen_width, int screen_height) {
cv::namedWindow("calibration", CV_WINDOW_NORMAL);
cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
// set the mouse
is_click_ = false;
//set the callback function for any mouse event
setMouseCallback("calibration", CallBackFunc, &is_click_); // wait for clicking
screen_width_ = screen_width;
screen_height_ = screen_height;
center_radius_ = (int)((float)screen_width_ / 200.0f);
}
PersonalCalibrator::~PersonalCalibrator() {
cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
cv::destroyWindow("calibration");
}
void PersonalCalibrator::generatePoints(int num_points) {
index_point_ = -1;
srand(time(NULL));
Point2i current_point;
for (int num = 0; num < num_points; ++num) {
current_point.x = (rand() % screen_width_); // range is [0, 1]
current_point.y = (rand() % screen_height_); // range is [0, 1]
points_.emplace_back(current_point);
}
}
void PersonalCalibrator::initialWindow() {
// get the focus of the window
namedWindow("GetFocus", CV_WINDOW_NORMAL);
cv::Mat img = cv::Mat::zeros(100, 100, CV_8UC3);
cv::imshow("GetFocus", img);
cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
waitKey(1);
cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
cv::destroyWindow("GetFocus");
// show instruction
cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
string show_text = "Please click/touch when looking at the dots";
cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(255,255,255), 2);
imshow("calibration", show_img);
cv::waitKey(3000);
for (int i=255; i > 0; i-=5) { //Gradient disappear, nice!
show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(i,i,i), 2);
imshow("calibration", show_img);
cv::waitKey(1);
}
}
bool PersonalCalibrator::showNextPoint() {
cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
index_point_ ++;
cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(255, 255, 255), -1);
is_click_ = false;
while (true) {
imshow("calibration", show_img);
int key = cv::waitKey(10); // wait for interaction
if (key == 27) // if press the ESC key
return false;
if (is_click_) {
break;
}
}
return true;
}
void PersonalCalibrator::confirmClicking() {
cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(0, 200, 0), -1);
imshow("calibration", show_img);
cv::waitKey(500);
}
// this polyfit function is copied from opencv/modules/contrib/src/polyfit.cpp
// This original code was written by
// Onkar Raut
// Graduate Student,
// University of North Carolina at Charlotte
cv::Mat polyfit(const Mat& src_x, const Mat& src_y, int order)
{
CV_Assert((src_x.rows>0)&&(src_y.rows>0)&&(src_x.cols>0)&&(src_y.cols>0)&&(order>=1));
Mat matrix;
Mat bias = Mat::ones((int)src_x.rows, 1, CV_32FC1);
Mat input_x = Mat::zeros(src_x.rows, order*src_x.cols, CV_32FC1);
Mat copy;
for(int i=1; i<=order;i++){
copy = src_x.clone();
pow(copy,i,copy);
copy.copyTo(input_x(Rect((i-1)*src_x.cols, 0, copy.cols, copy.rows)));
}
Mat new_mat;
cv::hconcat(input_x, bias, new_mat);
cout << "new_mat: " << new_mat << endl;
cv::solve(new_mat, src_y, matrix, DECOMP_NORMAL);
cout << "model_matrix: " << matrix << endl;
Mat calibrated = new_mat * matrix;
cout << "calibrated: " << calibrated << endl;
double dist_original = norm(src_x, src_y, NORM_L2);
cout << "dist_original: " << dist_original << endl;
double dist_calibrated = norm(calibrated, src_y, NORM_L2);
cout << "dist_calibrated: " << dist_calibrated << endl;
return matrix;
}
void PersonalCalibrator::generateModel(vector<Point2f> prediction, vector<Point2f> ground_truth, int order) {
cv::Mat input_x = cv::Mat((int)prediction.size(), 2, CV_32FC1, prediction.data());
cv::Mat input_y = cv::Mat((int)ground_truth.size(), 2, CV_32FC1, ground_truth.data());
cout << "input_x: " << input_x << endl;
cout << "input_y: " << input_y << endl;
cv::Mat model_matrix;
model_matrix_ = polyfit(input_x, input_y, order);
}
void PersonalCalibrator::saveModel(std::string file_path) {
cv::FileStorage storage(file_path, cv::FileStorage::WRITE);
storage << model_matrix_;
storage.release();
}