initial commit

This commit is contained in:
Xucong Zhang 2019-01-10 13:26:03 +01:00
commit e505acdb29
41 changed files with 2922 additions and 0 deletions

32
.gitignore vendored Normal file
View file

@ -0,0 +1,32 @@
### C++ ###
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
### CMake ###
CMakeLists.txt.user
CMakeCache.txt
CMakeFiles
CMakeScripts
Testing
Makefile
cmake_install.cmake
install_manifest.txt
compile_commands.json
CTestTestfile.cmake

90
CMakeLists.txt Normal file
View file

@ -0,0 +1,90 @@
cmake_minimum_required(VERSION 3.0)
project(OpenGaze VERSION 0.1)
set(CMAKE_BUILD_TYPE Release)
# create a directory for models and configuration files
set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
add_definitions(-DOPENGAZE_CON_DIR="${OPENGAZE_DIR}")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_STANDARD 11)
# OpenCV
find_package( OpenCV 3.4 REQUIRED COMPONENTS core imgproc calib3d highgui objdetect)
# Boost, for reading configuration file
find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
# Caffe
set(CAFFE_INSTALL_DIR "/home/xucong/library/caffe/build/install")
set(Caffe_INCLUDE_DIRS ${CAFFE_INSTALL_DIR}/include)
set(Caffe_LIBRARY_DIRS ${CAFFE_INSTALL_DIR}/lib)
set(Caffe_LIBS lmdb glog caffe)
# Face and facial landmark detection methods
option(USE_OPENFACE "with OpenFace" ON)
add_definitions(-DUSE_OPENFACE=1)
# OpenFace
set(OPENFACE_ROOT_DIR "/home/xucong/library/OpenFace")
add_definitions(-DOPENFACE_DIR="${OPENFACE_ROOT_DIR}")
set(CLM_INCLUDE_DIRS ${OPENFACE_ROOT_DIR}/lib/local/LandmarkDetector/include)
set(CLM_LIBRARY_DIRS ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector)
set(CLM_LIBS LandmarkDetector tbb openblas dlib)
set(USE_OPENFACE ON) # we use OpenFace method here
# suppress auto_ptr deprecation warnings
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
add_compile_options("-Wno-deprecated-declarations")
endif()
include_directories(./ ./include /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${CLM_INCLUDE_DIRS} ${Caffe_INCLUDE_DIRS})
link_directories(./ ./build/lib /usr/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${CLM_LIBRARY_DIRS} ${Caffe_LIBRARY_DIRS})
file(GLOB SOURCE "./src/*.cpp")
file(GLOB HEADERS "./include/*.hpp")
# compile opengaze library
add_library(opengaze SHARED ${SOURCE} ${HEADERS})
set_target_properties(opengaze PROPERTIES VERSION ${PROJECT_VERSION})
#if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
# set (CMAKE_INSTALL_PREFIX "/usr/local" CACHE PATH "default install path" FORCE )
#endif()
install (TARGETS opengaze EXPORT OpenGazeTargets LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
install (FILES ${HEADERS} DESTINATION include/opengaze)
# install caffe and OpenFace
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/3rdParty" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (FILES ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector/libLandmarkDetector.a DESTINATION ${OPENGAZE_DIR}/3rdParty)
install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so DESTINATION ${OPENGAZE_DIR}/3rdParty)
install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so.1.0.0 DESTINATION ${OPENGAZE_DIR}/3rdParty)
# install configuration files
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/calib" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/model" DIRECTORY_PERMISSIONS
OWNER_WRITE OWNER_READ OWNER_EXECUTE
GROUP_WRITE GROUP_READ GROUP_EXECUTE
WORLD_WRITE WORLD_READ WORLD_EXECUTE)
install (FILES ./content/calib/calibration.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
install (FILES ./content/calib/monitor_laptop.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
install (FILES ./content/model/face_model.yml DESTINATION ${OPENGAZE_DIR}/content/model PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
install (FILES default.cfg DESTINATION ${OPENGAZE_DIR} PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)

55
README.md Normal file
View file

@ -0,0 +1,55 @@
# OpenGaze: Open Source Toolkit for Camera-Based Gaze Estimation and Interaction
<!--The current demo video includes clips from Friends, which may violate the copyright. Although people think 28 seconds could be a boundary:https://productforums.google.com/forum/#!topic/youtube/rQhkI20Rm8k, there is no golden rule for it: https://www.youtube.com/yt/about/copyright/fair-use/#yt-copyright-protection-->
<!--//[![Demo](https://img.youtube.com/vi/OORxOdu8USQ/0.jpg)](https://youtu.be/OORxOdu8USQ "OpenGaze Friends Demo")-->
Appearance-based gaze estimation methods that only require an off-the-shelf camera have significantly improved and promise a wide range of new applications in gaze-based interaction and attentive user interfaces. However, these methods are not yet widely used in the human-computer interaction (HCI) community.
To democratize their use in HCI, we present OpenGaze, the first software toolkit that is specifically developed for gaze interface designers. OpenGaze is open source and aims to implement state-of-the-art methods for camera-based gaze estimation and interaction.
<img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_mpiinf.png" height="80"/><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_pui.png" height="80"><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_osaka-u.png" height="80">
## Functionality
The toolkit is capable of performing the following gaze-related tasks:
* **Gaze Estimation**
Show estimated gaze on the screen given screen-camera relationship.
[![Demo](https://img.youtube.com/vi/R1vb7mV3y_M/0.jpg)](https://youtu.be/R1vb7mV3y_M "Gaze visualization demo")
<p>&nbsp;</p>
* **Gaze Visualization**
Show gaze direction inital from the center of faces in the input image.
[![Demo](https://img.youtube.com/vi/8yMTvvr0rRU/0.jpg)](https://youtu.be/8yMTvvr0rRU "Gaze visualization demo")
<p>&nbsp;</p>
* **Personal Calibration**
Perform personal calibration and remapped the gaze target on the screen.
[![Demo](https://img.youtube.com/vi/ntBv1wcNGAo/0.jpg)](https://youtu.be/ntBv1wcNGAo "Gaze visualization demo")
<p>&nbsp;</p>
## Installation
[Unix Installation](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Unix-Installation)
## Use
[Command line arguments](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Command-line-arguments)
## Citation
If you use any of the resources provided on this page in any of your publications, please cite the following paper:
**Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications?** <br/>
Xucong Zhang, Yusuke Sugano, Andreas Bulling<br/>
Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019<br/>
BibTex, PDF
## License
The license agreement can be found in Copyright.txt
You have to respect boost, OpenFace and OpenCV licenses.
Furthermore, you have to respect the licenses of the datasets used for [model training](:https://github.molgen.mpg.de/perceptual/opengaze/wiki/Model-training).

3
RELEASE.md Normal file
View file

@ -0,0 +1,3 @@
# Release 0.1.0
Initial release of OpenGaze.

View file

@ -0,0 +1,50 @@
#ifndef CAFFE_DSPP_LAYER_HPP_
#define CAFFE_DSPP_LAYER_HPP_
#include <string>
#include <utility>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layers/data_layer.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/loss_layer.hpp"
#include "caffe/layers/neuron_layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
template <typename Dtype>
class DSPPLayer : public Layer<Dtype> {
public:
explicit DSPPLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "DSPPLayer"; }
virtual inline int ExactNumBottomBlobs() const { return 2; };
virtual inline int MinTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
//virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
// const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
//virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
// const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
int width_;
int height_;
int channel_;
int num_;
};
} // namespace caffe
#endif // CAFFE_DSPP_LAYER_HPP_

View file

@ -0,0 +1,56 @@
#ifndef CAFFE_POSE_DATA_LAYER_HPP_
#define CAFFE_POSE_DATA_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/base_data_layer.hpp"
namespace caffe {
template <typename Dtype>
class PoseDataLayer : public BaseDataLayer<Dtype> {
public:
explicit PoseDataLayer(const LayerParameter& param)
: BaseDataLayer<Dtype>(param), has_new_data_(false) {}
virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "PoseData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
virtual void AddDatumVector(const vector<Datum>& datum_vector);
virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<float>& labels);
// Reset should accept const pointers, but can't, because the memory
// will be given to Blob, which is mutable
void Reset(Dtype* data, Dtype* label, int n);
void set_batch_size(int new_size);
int batch_size() { return batch_size_; }
int channels() { return channels_; }
int height() { return height_; }
int width() { return width_; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
int batch_size_, channels_, height_, width_, size_;
Dtype* data_;
Dtype* labels_;
int n_;
size_t pos_;
Blob<Dtype> added_data_;
Blob<Dtype> added_label_;
bool has_new_data_;
};
} // namespace caffe
#endif

View file

@ -0,0 +1,92 @@
#include <cmath>
#include <algorithm>
#include <vector>
#include "caffe/layer.hpp"
#include "caffe/layers/dspp_layer.hpp"
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
namespace caffe {
template <typename Dtype>
void DSPPLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
}
template <typename Dtype>
void DSPPLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
num_ = bottom[1]->shape()[0];
channel_ = bottom[1]->shape()[1]; // the input data size
height_ = bottom[1]->shape()[2];
width_ = bottom[1]->shape()[3];
// init output size
vector<int> output_shape;
output_shape.push_back(num_);
output_shape.push_back(channel_);
output_shape.push_back(height_);
output_shape.push_back(width_);
top[0]->Reshape(output_shape);
}
template <typename Dtype>
void DSPPLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
Dtype* top_data = top[0]->mutable_cpu_data();
caffe_set<Dtype>(top[0]->count(), 0, top_data); // initilize to be 0
for (int n=0; n<num_; ++n) {
for (int h = 0; h < height_; ++h) { // for the input data size
for (int w = 0; w < width_; ++w) {
for (int c = 0; c < channel_; ++c) {
top_data[top[0]->offset(n, c, h, w)] = bottom[1]->data_at(n, c, h, w) * bottom[0]->data_at(n, 0, h, w);
}
}
}
}
top_data = NULL;
}
template <typename Dtype>
void DSPPLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* data_diff = bottom[1]->mutable_cpu_diff();
Dtype* heat_map_diff = bottom[0]->mutable_cpu_diff();
caffe_set<Dtype>(bottom[1]->count(), 0, data_diff);
caffe_set<Dtype>(bottom[0]->count(), 0, heat_map_diff);
// Dtype activation_h, activation_w;
for (int n = 0; n < num_; ++n) {
for (int h = 0; h < height_; ++h) {
for (int w = 0; w < width_; ++w) {
for (int c = 0; c < channel_; ++c) {
Dtype buffer = top_diff[top[0]->offset(n, c, h, w)];
data_diff[bottom[1]->offset(n, c, h, w)] = buffer * (bottom[0]->data_at(n, 0, h, w));
buffer *= bottom[1]->data_at(n,c,h,w) / channel_;
heat_map_diff[bottom[0]->offset(n,0,h,w)] += buffer;
}
}
}
}
top_diff = NULL;
data_diff = NULL;
heat_map_diff = NULL;
}
}
INSTANTIATE_CLASS(DSPPLayer);
REGISTER_LAYER_CLASS(DSPP);
} // namespace caffe

View file

@ -0,0 +1,128 @@
#include <opencv2/core/core.hpp>
#include <vector>
#include "caffe/layers/pose_data_layer.hpp"
namespace caffe {
template <typename Dtype>
void PoseDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
batch_size_ = this->layer_param_.memory_data_param().batch_size();
channels_ = this->layer_param_.memory_data_param().channels();
height_ = this->layer_param_.memory_data_param().height();
width_ = this->layer_param_.memory_data_param().width();
size_ = channels_ * height_ * width_;
CHECK_GT(batch_size_ * size_, 0) <<
"batch_size, channels, height, and width must be specified and"
" positive in memory_data_param";
int label_shape_[] = {batch_size_, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
top[0]->Reshape(batch_size_, channels_, height_, width_);
top[1]->Reshape(label_shape);
added_data_.Reshape(batch_size_, channels_, height_, width_);
added_label_.Reshape(label_shape);
data_ = NULL;
labels_ = NULL;
added_data_.cpu_data();
added_label_.cpu_data();
}
template <typename Dtype>
void PoseDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
CHECK(!has_new_data_) <<
"Can't add data until current data has been consumed.";
size_t num = datum_vector.size();
CHECK_GT(num, 0) << "There is no datum to add.";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
added_data_.Reshape(num, channels_, height_, width_);
int label_shape_[] = {(int)num, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
// Apply data transformations (mirror, scale, crop...)
this->data_transformer_->Transform(datum_vector, &added_data_);
// Copy Labels
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
top_label[item_id] = datum_vector[item_id].label();
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
Reset(top_data, top_label, num);
has_new_data_ = true;
}
template <typename Dtype>
void PoseDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
const vector<float>& labels) {
size_t num = mat_vector.size();
CHECK(!has_new_data_) <<
"Can't add mat until current data has been consumed.";
CHECK_GT(num, 0) << "There is no mat to add";
CHECK_EQ(num % batch_size_, 0) <<
"The added data must be a multiple of the batch size.";
added_data_.Reshape(num, channels_, height_, width_);
int label_shape_[] = {(int)num, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
// Apply data transformations (mirror, scale, crop...)
this->data_transformer_->Transform(mat_vector, &added_data_);
// Copy Labels
Dtype* top_label = added_label_.mutable_cpu_data();
for (int item_id = 0; item_id < num; ++item_id) {
top_label[item_id] = labels[item_id];
}
// num_images == batch_size_
Dtype* top_data = added_data_.mutable_cpu_data();
Reset(top_data, top_label, num);
has_new_data_ = true;
}
template <typename Dtype>
void PoseDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
CHECK(data);
CHECK(labels);
CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
// Warn with transformation parameters since a memory array is meant to
// be generic and no transformations are done with Reset().
//if (this->layer_param_.has_transform_param()) {
// LOG(WARNING) << this->type() << " does not transform array data on Reset()";
//}
data_ = data;
labels_ = labels;
n_ = n;
pos_ = 0;
}
template <typename Dtype>
void PoseDataLayer<Dtype>::set_batch_size(int new_size) {
CHECK(!has_new_data_) <<
"Can't change batch_size until current data has been consumed.";
batch_size_ = new_size;
added_data_.Reshape(batch_size_, channels_, height_, width_);
int label_shape_[] = {(int)batch_size_, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
}
template <typename Dtype>
void PoseDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK(data_) << "PoseDataLayer needs to be initalized by calling Reset";
top[0]->Reshape(batch_size_, channels_, height_, width_);
int label_shape_[] = {(int)batch_size_, 4};
vector<int> label_shape(label_shape_, label_shape_+2);
added_label_.Reshape(label_shape);
top[0]->set_cpu_data(data_ + pos_ * size_);
top[1]->set_cpu_data(labels_ + pos_);
pos_ = (pos_ + batch_size_) % n_;
if (pos_ == 0)
has_new_data_ = false;
}
INSTANTIATE_CLASS(PoseDataLayer);
REGISTER_LAYER_CLASS(PoseData);
} // namespace caffe

11
content/calib/calibration.yml Executable file
View file

@ -0,0 +1,11 @@
%YAML:1.0
camera_matrix: !!opencv-matrix
rows: 3
cols: 3
dt: f
data: [ 1891.07, 0.0, 640, 0.0, 1891.07, 360, 0.0, 0.0, 1.0]
dist_coeffs: !!opencv-matrix
rows: 1
cols: 5
dt: f
data: [1.68091e-02, -7.14552e-02, -5.65886e-03, -5.23482e-04, -3.39946e-02]

View file

@ -0,0 +1,13 @@
%YAML:1.0
monitor_W: 516
monitor_H: 323
monitor_R: !!opencv-matrix
rows: 3
cols: 3
dt: f
data: [ -0.99955, -0.02891, -0.0082861, -0.028948, 0.99957, 0.0044949, 0.0081526, 0.0047327, -0.99996]
monitor_T: !!opencv-matrix
rows: 3
cols: 1
dt: f
data: [269.41, 48.561, 5.8344]

View file

@ -0,0 +1,13 @@
%YAML:1.0
monitor_W: 310
monitor_H: 174
monitor_R: !!opencv-matrix
rows: 3
cols: 3
dt: f
data: [ -0.99988, -0.009735, -0.01203, -0.0094674, 0.99971, -0.022108, 0.012242, -0.021992, -0.99968]
monitor_T: !!opencv-matrix
rows: 3
cols: 1
dt: f
data: [149.91, 29.575, -18.884]

6
content/model/face_model.yml Executable file
View file

@ -0,0 +1,6 @@
%YAML:1.0
face_model: !!opencv-matrix
rows: 3
cols: 6
dt: f
data: [ -45.096768, -21.312858, 21.312858, 45.096768, -26.299577, 26.299577, -0.483773,0.483773, 0.483773, -0.483773, 68.595035,68.595035, 2.397030, -2.397030, -2.397030, 2.397030, -0.000000, -0.000000]

26
default.cfg Normal file
View file

@ -0,0 +1,26 @@
## input and ouput
# input_type = camera # camera, video, or directory
# input = 0 # caemra id, video file name, or directory of image files
# output = /BS/zhang-semi/work/opengaze/test/
# input = YOUR_VIDEO OR IMAGE FOLDER
# output = MUST BE A DIRECTORY
## gaze estimation method
# gaze_method = MPIIGaze # OpenFace MPIIGaze
# gpu_id = 0
## gaze estimation method/model selection
# face_model = 1 # 1 for the face model, 0 for eye image model
## CNN model for face image, trained on MPIIGaze + EYEDIAP HD
# cnn_param_path = YOUR_PATH/alexnet_face.prototxt
# cnn_model_path = YOUR_PATH/alexnet_face.caffemodel
# calibration file, calibration file
# calib_camera = YOUR_PATH/calibration.yml
# calib_screen = YOUR_PATH/monitor.yml
## parameters for personal calibration
# per_model_save_path = YOUR_PATH/user1.txt
# num_calibration = 9

9
download_models.sh Executable file
View file

@ -0,0 +1,9 @@
OPENGAZE_DIR=~/OpenGaze
mkdir -p $OPENGAZE_DIR/content/caffeModel
cd $OPENGAZE_DIR/content/caffeModel
wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.prototxt
wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.caffemodel

36
exe/CMakeLists.txt Normal file
View file

@ -0,0 +1,36 @@
cmake_minimum_required(VERSION 3.0)
project(OpenGazeExe VERSION 1.0)
set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
add_definitions(-DOPENGAZE_DIR="${CMAKE_SOURCE_DIR}")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_CXX_STANDARD 11)
find_package( OpenCV 3.1 REQUIRED COMPONENTS calib3d highgui objdetect imgproc core)
# Boost, for reading configuration file
find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
include_directories(/usr/local/include/opengaze /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
link_directories(/usr/lib /usr/local/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${OPENGAZE_DIR}/3rdParty)
## -lX11 is for getting screen resolution in pixel in the Linux system
set(LIBS opengaze LandmarkDetector ${OpenCV_LIBS} ${Boost_LIBRARIES} tbb openblas dlib lmdb glog caffe X11)
add_executable(GazeVisualization GazeVisualization.cpp)
target_link_libraries(GazeVisualization ${LIBS})
add_executable(Calibration Calibration.cpp)
target_link_libraries(Calibration ${LIBS})
add_executable(GazeEstimation GazeEstimation.cpp)
target_link_libraries(GazeEstimation ${LIBS})
add_executable(DataExtraction DataExtraction.cpp)
target_link_libraries(DataExtraction ${LIBS})

29
exe/Calibration.cpp Normal file
View file

@ -0,0 +1,29 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
int num_calibration_point = 20;
open_gaze.runPersonalCalibration(num_calibration_point);
return 1;
}

28
exe/DataExtraction.cpp Normal file
View file

@ -0,0 +1,28 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
open_gaze.runDataExtraction();
return 1;
}

28
exe/GazeEstimation.cpp Normal file
View file

@ -0,0 +1,28 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
open_gaze.runGazeOnScreen();
return 1;
}

28
exe/GazeVisualization.cpp Normal file
View file

@ -0,0 +1,28 @@
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#include "opengaze/opengaze.hpp"
using namespace std;
using namespace cv;
using namespace opengaze;
vector<string> get_arguments(int argc, char **argv) {
vector<string> arguments;
for (int i = 0; i < argc; ++i){
arguments.emplace_back(string(argv[i]));
}
return arguments;
}
int main(int argc, char** argv)
{
vector<string> arguments = get_arguments(argc, argv);
OpenGaze open_gaze(argc, argv);
open_gaze.runGazeVisualization();
return 1;
}

BIN
exe/test.mp4 Normal file

Binary file not shown.

BIN
imgs/logo_mpiinf.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
imgs/logo_osaka-u.png Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 177 KiB

BIN
imgs/logo_pui.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

88
include/data.hpp Normal file
View file

@ -0,0 +1,88 @@
#ifndef DATA_HPP
#define DATA_HPP
#include <opencv2/opencv.hpp>
namespace opengaze{
/**
* face and facial landmark detection data
* @param face_id personal id from tracking across frames
* @param certainty detection score, 1 is the best, -1 is the worst
* @param landmarks detected six facial landmarks as four eye corners and two mouth corners
* @param face_bb detected face bounding box
*/
struct FaceData
{
unsigned long face_id;
double certainty;
cv::Point2f landmarks[6];
cv::Rect_<int> face_bb;
};
/**
* eye image related data
* @param leye_pos/reye_pose 3D eyeball center position for left and right eyes in the original camera coordinate system
* @param leye_img/reye_img eye image
* @param leye_rot/reye_rot rotation matrix during the data normalization procedure
*/
struct EyeData
{
// cv::Mat head_r, head_t;
cv::Mat leye_pos, reye_pos; //
// normalized eyes
cv::Mat leye_img, reye_img;
cv::Mat leye_rot, reye_rot;
};
/**
* face patch data related to data normalization
* @param head_r head pose as center of the face
* @param head_t head translation as center of the face
* @param face_rot rotation matrix during the data normalization procedure
* @param face_center 3D face center in the original camera coordinate system
* @param debug_img use for debug to show the normalized face image
* @param face_patch normalized face image
*/
struct FacePatchData
{
cv::Mat head_r, head_t;
cv::Mat face_rot;
cv::Mat face_center;
cv::Mat debug_img;
cv::Mat face_patch;
};
/**
* gaze data
* @param lgaze3d/lgaze3d gaze directions of left and right eyes in the camera coordinate system
* @param gaze3d gaze direction estimated from face patch in the in the camera coordinate system
* @param lgaze2d/rgaze2d projected gaze positions on the screen coordinate from left and right eyes
* @param gaze2d projected gaze positions from face patch on the screen coordinate
*/
struct GazeData
{
cv::Vec3f lgaze3d, rgaze3d;
cv::Vec3f gaze3d;
cv::Point2f lgaze2d, rgaze2d;
cv::Point2f gaze2d;
};
/**
* The general output data structure
* @param face_data store face and facial landmark detection data
* @param eye_data store data related to eye image input
* @param face_patch_data normalized face path data
* @param gaze_data gaze data in 2D and 3D spaces
*/
struct Sample
{
FaceData face_data;
EyeData eye_data;
FacePatchData face_patch_data;
GazeData gaze_data;
};
}
#endif //DATA_HPP

72
include/face_detector.hpp Normal file
View file

@ -0,0 +1,72 @@
#ifndef FACE_DETECTOR_HPP
#define FACE_DETECTOR_HPP
#include <iostream>
#include <vector>
#include <string>
#include <opencv2/opencv.hpp>
#if USE_DLIB
// if we use dlib
#include <dlib/opencv.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/image_processing.h>
#include <dlib/gui_widgets.h>
#include <dlib/image_io.h>
#endif
#include "data.hpp"
namespace opengaze{
class FaceDetector {
public:
FaceDetector();
~FaceDetector();
/**
* face and facial landmark detection selection
* The current implementation is only OpenFace. OpenFace use dlib for face detection
*/
enum Method{OpenFace, OpenCV, Dlib};
/**
* main function to detect and track face and facial landmarks
* @param input_img input image
* @param output output data structure
*/
void track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output);
void reset();
void setMethodType(Method method_type) {method_type_ = method_type;}
Method getMethodType() {return method_type_;}
void initialize(int number_users);
private:
Method method_type_;
#if USE_DLIB
dlib::frontal_face_detector dlib_detector_;
dlib::shape_predictor dlib_sp_;
#endif
// parameters for OpenFace
std::vector<bool> active_models_;
unsigned long num_faces_max_;
int detection_skip_frames_, tracking_loss_limit_;
float detection_resize_rate_;
float nonoverlap_threshold_;
double certainty_threshold_;
int landmark_indices_[6];
int frame_counter_;
unsigned long current_face_id_;
std::vector<unsigned long> face_ids_;
};
}
#endif //FACE_DETECTOR_HPP

View file

@ -0,0 +1,65 @@
#ifndef GAZE_ESTIMATOR_HPP
#define GAZE_ESTIMATOR_HPP
#include <opencv2/opencv.hpp>
#include "data.hpp"
#include "face_detector.hpp"
#include "normalizer.hpp"
#include "gaze_predictor.hpp"
namespace opengaze{
class GazeEstimator {
public:
GazeEstimator();
~GazeEstimator();
/**
* On the current implementation, we only has the "MPIIGaze" method which uses the input face/eye image
* and output gaze direction directly. It is an appearance-based method. The "OpenFace" can also output
* the gaze vector according to the pupil detection results. However, "OpenFace" implementation is not
* included inside our OpenGaze toolkit yet.
*/
enum Method{MPIIGaze, OpenFace};
/**
* for the "MPIIGaze" method, the input image can be face or eye. The full-face patch model can output
* more accurate gaze prediction than the eye image model, while the eye image base model is much faster.
*/
enum InputType{face, eye};
/**
* the main function to estimate the gaze.
* It performs the face and facial landmarks detection, head pose estimation and then gaze prediction.
* @param input_image input scene image
* @param output data structure for output
*/
void estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &output);
void getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs);
void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist);
void setRootPath(std::string root_path);
void setMethod(Method, std::vector<std::string> arguments);
void initialFaceDetector(int number_users);
Method method_type_;
InputType input_type_; // the input type
private:
// class instances
FaceDetector face_detector_;
Normalizer normalizer_;
GazePredictor gaze_predictor_;
// camera intrinsic matrix
cv::Mat camera_matrix_;
// camera distortion matrix
cv::Mat camera_dist_;
// the root pat is used for load configuration file and models
std::string root_path_;
};
}
#endif //GAZE_ESTIMATOR_HPP

View file

@ -0,0 +1,29 @@
#ifndef GAZE_PREDICTOR_HPP
#define GAZE_PREDICTOR_HPP
#include <opencv2/opencv.hpp>
#include "data.hpp"
#include "face_detector.hpp"
namespace opengaze{
class GazePredictor {
public:
GazePredictor();
~GazePredictor();
void initiaMPIIGaze(std::vector<std::string> arguments);
cv::Point3f predictGazeMPIIGaze(cv::Mat face_patch);
private:
int model_type_;
bool is_extract_feature;
};
}
#endif //GAZE_PREDICTOR_HPP

125
include/input_handler.hpp Normal file
View file

@ -0,0 +1,125 @@
#ifndef INPUT_HANDLER_HPP
#define INPUT_HANDLER_HPP
#include <opencv2/opencv.hpp>
#include <vector>
#include <iostream>
#include <boost/filesystem.hpp>
#include "data.hpp"
namespace opengaze {
class InputHandler {
public:
enum InputType {Camera, Video, Image, Directory, Memory};
InputHandler();
~InputHandler();
/**
* get the camera intrisic parameters
* @param camera_matrix camera instric matrix
* @param camera_dist caemra distortion matrix
*/
void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist){
camera_matrix_ = std::move(camera_matrix);
camera_distortion_ = std::move(camera_dist);
}
/**
* function to return next sample, could come from any input source
* @return next sample
*/
cv::Mat getNextSample();
/**
* set the input type
* @param type the input typ, could be found in InputType defination
*/
void setInputType(InputType type){input_type_ = type;}
/**
* set the input
* according the input type, here the input value are different.
* For the type "Camera", this input value indicates the camera id
* For the type "video", this input value is the video file name
* For input type "Directory", this input value is the directory path
*/
void setInput(int camera_id) {camera_id_ = camera_id;}
void setInput(std::vector<cv::Mat> images) {images_ = std::move(images);}
void setInput(std::string input_path);
/**
* read the parameters related to the screen
* @param calib_file file for the configuration
*/
void readScreenConfiguration(std::string calib_file);
/**
* read the camera instrinic parameters from the configuration file
* @param calib_file file for the configuration
*/
void readCameraConfiguration(std::string calib_file);
/**
* When the 3D gaze vector is achieved, there is a need to project the gaze on the 2D screen.
* This function also needs the input to indicate if use the full-face model or not,
* since the initial of gaze vector will be center of the face for the full-face models
* and eye center for the eye-based models.
* @param input input data contains the 3D gaze vector
* @param is_face_model a boolen value indicates if the gaze vectors is from face model or eye model
*/
void projectToDisplay(std::vector<opengaze::Sample> &input, bool is_face_model=true);
int getFrameHeight(){return cap_.get(cv::CAP_PROP_FRAME_HEIGHT);}
int getFrameWidth(){return cap_.get(cv::CAP_PROP_FRAME_WIDTH);}
InputType getInputType() {return input_type_;}
int getScreenWidth() {return screen_width_;}
int getScreenHeight() {return screen_height_;}
std::string getFileName() {return current_file_name_;}
cv::Point2f mapToDisplay(cv::Vec3f obj_center, cv::Vec3f gaze_point);
void initialize();
bool closeInput();
void getScreenResolution(int &width, int &height);
cv::Mat getCameraMatrix() { return camera_matrix_;}
cv::Mat getCameraDistortion() {return camera_distortion_;}
void setFrameSize(int frame_width, int frame_height);
bool isReachEnd() {return is_reach_end_;}
cv::Mat camera_matrix_;
cv::Mat camera_distortion_;
private:
// indicator if we reach the end of sample stream
bool is_reach_end_;
int camera_id_;
int sample_height_, sample_width_;
std::vector<cv::Mat> images_;
std::string input_path_;
std::string input_file_video_name_;
int screen_width_, screen_height_;
// monitor
float monitor_W_, monitor_H_; // monitor width and height in mm
cv::Mat monitor_R_, monitor_T_;
cv::Vec3f monitor_corners_[4];
cv::Mat monitor_normal_;
// input variable
InputType input_type_;
cv::VideoCapture cap_;
std::string current_file_name_;
// variable for directory input
boost::filesystem::directory_iterator current_itr_;
};
}
#endif //INPUT_HANDLER_HPP

42
include/normalizer.hpp Normal file
View file

@ -0,0 +1,42 @@
#ifndef NORMALIZER_HPP
#define NORMALIZER_HPP
#include <opencv2/opencv.hpp>
#include "data.hpp"
namespace opengaze{
class Normalizer {
public:
Normalizer();
~Normalizer();
void estimateHeadPose(const cv::Point2f *landmarks, opengaze::Sample &sample);
void setCameraMatrix(cv::Mat input);
void loadFaceModel(std::string path);
void setParameters(int focal_length, int distance, int img_w, int img_h);
cv::Mat normalizeFace(cv::Mat input_image, Sample &sample);
std::vector<cv::Mat> normalizeEyes(cv::Mat input_image, Sample &sample);
cv::Mat cvtToCamera(cv::Point3f input, const cv::Mat cnv_mat);
private:
cv::Mat camera_matrix_;
std::vector<cv::Point3f> face_model_;
cv::Mat face_model_mat_, cam_norm_;
float focal_norm_, distance_norm_;
cv::Size roiSize_norm_;
};
}
#endif //NORMALIZER_HPP

80
include/opengaze.hpp Normal file
View file

@ -0,0 +1,80 @@
#ifndef OPEN_GAZE_H
#define OPEN_GAZE_H
#include <string>
#include <vector>
#include <boost/program_options.hpp>
#include <boost/filesystem.hpp>
#include <opencv2/opencv.hpp>
#include "input_handler.hpp"
#include "gaze_estimator.hpp"
#include "data.hpp"
#include "personal_calibrator.hpp"
namespace opengaze {
class OpenGaze {
public:
explicit OpenGaze(int argc, char** argv); //read configuration file
~OpenGaze();
// main function to estimate and show the gaze vector drawn on the input face image.
void runGazeVisualization();
/**
* main function to run personal calibration.
* @param num_calibration_point the numbers of points for calibration.
*/
void runPersonalCalibration(int num_calibration_point=5);
// main function to estimate and draw gaze point on the screen.
void runGazeOnScreen();
// main function to extract the face image from input image. The face image can then
// be used to train a custom gaze estimation model
void runDataExtraction();
private:
// visualization
/**
* function to draw the gaze vector on the input face image.
* @param sample the input data includes the gaze vector, head pose etc.
* @param image the input image contains the face. This function will draw the gaze vector on this input image
*/
void drawGazeOnFace(opengaze::Sample sample, cv::Mat &image);
// draw the detected facial landmark
void drawLandmarks(opengaze::Sample sample, cv::Mat &image);
// draw the estimated gaze on the top left corner of the input image
// to show the relative position on the screen. In this case,
//the user can see both the input image and the projected gaze target on the screen.
//This function is mainly used for debugging.
void drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image);
// estimate and draw gaze point on the screen
void drawGazeOnScreen(opengaze::Sample sample, cv::Mat &image);
// show debug mode will show the gaze draw on the face
bool show_debug_;
//class instances
InputHandler input_handler_;
GazeEstimator gaze_estimator_;
// input camera id
int camera_id_;
// temporary variables to store the input path, output path, input type
boost::filesystem::path input_dir_;
InputHandler::InputType input_type_;
boost::filesystem::path output_dir_;
bool is_face_model_;
bool is_save_video_;
// path to save the personal calibration model
std::string per_model_save_path_;
};
}
#endif //OPEN_GAZE_H

View file

@ -0,0 +1,64 @@
#ifndef PERSONAL_CALIBRATOR_HPP
#define PERSONAL_CALIBRATOR_HPP
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
class PersonalCalibrator {
public:
PersonalCalibrator(int screen_width, int screen_height);
~PersonalCalibrator();
/**
* generate the random locations for calibration
* @param num_points number of points to generate
*/
void generatePoints(int num_points);
// get the show window ready, it should be full-screen
void initialWindow();
// show the next calibration point
bool showNextPoint();
// wait for 0.5 second to receive the confirmation (mouse click) from user
void confirmClicking();
/**
* generate a polynomial function for the personal calibration
* @param prediction prediction from gaze estimation method
* @param ground_truth calibration points locations on the screen
* @param order the order of polynomial function, 1 means the linear
*/
void generateModel(std::vector<cv::Point2f> prediction, std::vector<cv::Point2f> ground_truth, int order=1);
/**
* save the personal calibration model
* @param file_path path to save the model
*/
void saveModel(std::string file_path);
/**
* load the personal calibration model
* @param file_path path to load the model
*/
void loadModel(std::string file_path);
/**
* return current calibration point location on the screen
* @return location on the screen
*/
cv::Point2f getCurrentPoint() {return points_[index_point_];}
// function to calculate the polynomial function
void calibratePolynomial();
private:
// indicator if the user click the mouse or not
bool is_click_;
// number of points for personal calibration
int num_points_;
// index for the current calibration points
int index_point_;
// vector to store the generated calibration points
std::vector<cv::Point2i> points_;
int screen_width_, screen_height_, center_radius_; // monitor width and height in pixel
// personal model
cv::Mat model_matrix_;
};
#endif //PERSONAL_CALIBRATOR_HPP

60
install.sh Normal file
View file

@ -0,0 +1,60 @@
#!/bin/bash
set -e
set -o pipefail
if [ $# -ne 0 ]
then
echo "Usage: install.sh"
exit 1
fi
# Essential Dependencies
echo "Installing Essential dependencies..."
sudo apt-get -y update
sudo apt-get -y install build-essential
sudo apt-get -y install cmake
sudo apt-get -y install libopenblas-dev liblapack-dev
sudo apt-get -y install git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
sudo apt-get -y install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev
echo "Essential dependencies installed."
# OpenCV Dependency
echo "Downloading OpenCV..."
wget https://github.com/opencv/opencv/archive/3.4.0.zip
unzip 3.4.0.zip
cd opencv-3.4.0
mkdir -p build
cd build
echo "Installing OpenCV..."
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_CUDA=OFF -D BUILD_SHARED_LIBS=ON ..