initial commit

2019-01-10 13:26:03 +01:00 · 2019-01-10 13:26:03 +01:00 · e505acdb29
commit e505acdb29
41 changed files with 2922 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,32 @@
 ### C++ ###
 # Prerequisites
 *.d
 # Compiled Object files
 *.slo
 *.lo
 *.o
 *.obj
 # Compiled Dynamic libraries
 *.so
 *.dylib
 *.dll
 # Compiled Static libraries
 *.lai
 *.la
 *.a
 *.lib
 ### CMake ###
 CMakeLists.txt.user
 CMakeCache.txt
 CMakeFiles
 CMakeScripts
 Testing
 Makefile
 cmake_install.cmake
 install_manifest.txt
 compile_commands.json
 CTestTestfile.cmake
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,90 @@
 cmake_minimum_required(VERSION 3.0)
 project(OpenGaze VERSION 0.1)
 set(CMAKE_BUILD_TYPE Release)
 # create a directory for models and configuration files
 set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
 add_definitions(-DOPENGAZE_CON_DIR="${OPENGAZE_DIR}")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 set(CMAKE_CXX_STANDARD 11)
 # OpenCV
 find_package( OpenCV 3.4 REQUIRED COMPONENTS core imgproc calib3d highgui objdetect)
 # Boost, for reading configuration file
 find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
 set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
 # Caffe
 set(CAFFE_INSTALL_DIR "/home/xucong/library/caffe/build/install")
 set(Caffe_INCLUDE_DIRS ${CAFFE_INSTALL_DIR}/include)
 set(Caffe_LIBRARY_DIRS ${CAFFE_INSTALL_DIR}/lib)
 set(Caffe_LIBS lmdb glog caffe)
 # Face and facial landmark detection methods
 option(USE_OPENFACE "with OpenFace" ON)
 add_definitions(-DUSE_OPENFACE=1)
 # OpenFace
 set(OPENFACE_ROOT_DIR "/home/xucong/library/OpenFace")
 add_definitions(-DOPENFACE_DIR="${OPENFACE_ROOT_DIR}")
 set(CLM_INCLUDE_DIRS ${OPENFACE_ROOT_DIR}/lib/local/LandmarkDetector/include)
 set(CLM_LIBRARY_DIRS ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector)
 set(CLM_LIBS LandmarkDetector tbb openblas dlib)
 set(USE_OPENFACE ON) # we use OpenFace method here
 # suppress auto_ptr deprecation warnings
 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
    add_compile_options("-Wno-deprecated-declarations")
 endif()
 include_directories(./ ./include /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${CLM_INCLUDE_DIRS} ${Caffe_INCLUDE_DIRS})
 link_directories(./ ./build/lib /usr/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${CLM_LIBRARY_DIRS} ${Caffe_LIBRARY_DIRS})
 file(GLOB SOURCE "./src/*.cpp")
 file(GLOB HEADERS "./include/*.hpp")
 # compile opengaze library
 add_library(opengaze SHARED ${SOURCE} ${HEADERS})
 set_target_properties(opengaze PROPERTIES VERSION ${PROJECT_VERSION})
 #if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
 #    set (CMAKE_INSTALL_PREFIX "/usr/local" CACHE PATH "default install path" FORCE )
 #endif()
 install (TARGETS opengaze EXPORT OpenGazeTargets LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
 install (FILES ${HEADERS} DESTINATION include/opengaze)
 # install caffe and OpenFace
 install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/3rdParty" DIRECTORY_PERMISSIONS 
    OWNER_WRITE OWNER_READ OWNER_EXECUTE
    GROUP_WRITE GROUP_READ GROUP_EXECUTE
    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
 install (FILES ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector/libLandmarkDetector.a DESTINATION ${OPENGAZE_DIR}/3rdParty)
 install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so DESTINATION ${OPENGAZE_DIR}/3rdParty)
 install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so.1.0.0 DESTINATION ${OPENGAZE_DIR}/3rdParty)
 # install configuration files
 install (DIRECTORY DESTINATION "${OPENGAZE_DIR}" DIRECTORY_PERMISSIONS 
    OWNER_WRITE OWNER_READ OWNER_EXECUTE
    GROUP_WRITE GROUP_READ GROUP_EXECUTE
    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
 install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content" DIRECTORY_PERMISSIONS 
    OWNER_WRITE OWNER_READ OWNER_EXECUTE
    GROUP_WRITE GROUP_READ GROUP_EXECUTE
    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
 install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/calib" DIRECTORY_PERMISSIONS 
    OWNER_WRITE OWNER_READ OWNER_EXECUTE
    GROUP_WRITE GROUP_READ GROUP_EXECUTE
    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
 install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/model" DIRECTORY_PERMISSIONS 
    OWNER_WRITE OWNER_READ OWNER_EXECUTE
    GROUP_WRITE GROUP_READ GROUP_EXECUTE
    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
 install (FILES ./content/calib/calibration.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
 install (FILES ./content/calib/monitor_laptop.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
 install (FILES ./content/model/face_model.yml DESTINATION ${OPENGAZE_DIR}/content/model PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
 install (FILES default.cfg DESTINATION ${OPENGAZE_DIR} PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
--- a/README.md
+++ b/README.md
@ -0,0 +1,55 @@
 # OpenGaze: Open Source Toolkit for Camera-Based Gaze Estimation and Interaction
 <!--The current demo video includes clips from Friends, which may violate the copyright. Although people think 28 seconds could be a boundary:https://productforums.google.com/forum/#!topic/youtube/rQhkI20Rm8k, there is no golden rule for it: https://www.youtube.com/yt/about/copyright/fair-use/#yt-copyright-protection-->
 <!--//[![Demo](https://img.youtube.com/vi/OORxOdu8USQ/0.jpg)](https://youtu.be/OORxOdu8USQ "OpenGaze Friends Demo")-->
 Appearance-based gaze estimation methods that only require an off-the-shelf camera have significantly improved and promise a wide range of new applications in gaze-based interaction and attentive user interfaces. However, these methods are not yet widely used in the human-computer interaction (HCI) community.
 To democratize their use in HCI, we present OpenGaze, the first software toolkit that is specifically developed for gaze interface designers. OpenGaze is open source and aims to implement state-of-the-art methods for camera-based gaze estimation and interaction.
 <img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_mpiinf.png" height="80"/><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_pui.png" height="80"><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_osaka-u.png" height="80">
 ## Functionality
 The toolkit is capable of performing the following gaze-related tasks:
 * **Gaze Estimation**
 Show estimated gaze on the screen given screen-camera relationship.
 [![Demo](https://img.youtube.com/vi/R1vb7mV3y_M/0.jpg)](https://youtu.be/R1vb7mV3y_M "Gaze visualization demo")
 <p>&nbsp;</p>
 * **Gaze Visualization**
 Show gaze direction inital from the center of faces in the input image.
 [![Demo](https://img.youtube.com/vi/8yMTvvr0rRU/0.jpg)](https://youtu.be/8yMTvvr0rRU "Gaze visualization demo")
 <p>&nbsp;</p>
 * **Personal Calibration**
 Perform personal calibration and remapped the gaze target on the screen.
 [![Demo](https://img.youtube.com/vi/ntBv1wcNGAo/0.jpg)](https://youtu.be/ntBv1wcNGAo "Gaze visualization demo")
 <p>&nbsp;</p>
 ## Installation
 [Unix Installation](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Unix-Installation)
 ## Use
 [Command line arguments](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Command-line-arguments)
 ## Citation
 If you use any of the resources provided on this page in any of your publications, please cite the following paper:
 **Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications?** <br/>
 Xucong Zhang, Yusuke Sugano, Andreas Bulling<br/>
 Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019<br/>
 BibTex, PDF
 ## License
 The license agreement can be found in Copyright.txt
 You have to respect boost, OpenFace and OpenCV licenses.
 Furthermore, you have to respect the licenses of the datasets used for [model training](:https://github.molgen.mpg.de/perceptual/opengaze/wiki/Model-training).
--- a/RELEASE.md
+++ b/RELEASE.md
@ -0,0 +1,3 @@
 # Release 0.1.0
 Initial release of OpenGaze.
--- a/caffe-layers/include/caffe/layers/dspp_layer.hpp
+++ b/caffe-layers/include/caffe/layers/dspp_layer.hpp
@ -0,0 +1,50 @@
 #ifndef CAFFE_DSPP_LAYER_HPP_
 #define CAFFE_DSPP_LAYER_HPP_
 #include <string>
 #include <utility>
 #include <vector>
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/layers/data_layer.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/layers/loss_layer.hpp"
 #include "caffe/layers/neuron_layer.hpp"
 #include "caffe/proto/caffe.pb.h"
 namespace caffe {
 template <typename Dtype>
 class DSPPLayer : public Layer<Dtype> {
 public:
  explicit DSPPLayer(const LayerParameter& param)
      : Layer<Dtype>(param) {}
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual inline const char* type() const { return "DSPPLayer"; }
  virtual inline int ExactNumBottomBlobs() const { return 2; };
  virtual inline int MinTopBlobs() const { return 1; }
 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  //virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
  //    const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  //virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
  //    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  int width_;
  int height_;
  int channel_;
  int num_;
 };
 }  // namespace caffe
 #endif  // CAFFE_DSPP_LAYER_HPP_
--- a/caffe-layers/include/caffe/layers/pose_data_layer.hpp
+++ b/caffe-layers/include/caffe/layers/pose_data_layer.hpp
@ -0,0 +1,56 @@
 #ifndef CAFFE_POSE_DATA_LAYER_HPP_
 #define CAFFE_POSE_DATA_LAYER_HPP_
 #include <vector>
 #include "caffe/blob.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/layers/base_data_layer.hpp"
 namespace caffe {
 template <typename Dtype>
 class PoseDataLayer : public BaseDataLayer<Dtype> {
 public:
  explicit PoseDataLayer(const LayerParameter& param)
      : BaseDataLayer<Dtype>(param), has_new_data_(false) {}
  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual inline const char* type() const { return "PoseData"; }
  virtual inline int ExactNumBottomBlobs() const { return 0; }
  virtual inline int ExactNumTopBlobs() const { return 2; }
  virtual void AddDatumVector(const vector<Datum>& datum_vector);
  virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
      const vector<float>& labels);
  // Reset should accept const pointers, but can't, because the memory
  //  will be given to Blob, which is mutable
  void Reset(Dtype* data, Dtype* label, int n);
  void set_batch_size(int new_size);
  int batch_size() { return batch_size_; }
  int channels() { return channels_; }
  int height() { return height_; }
  int width() { return width_; }
 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  int batch_size_, channels_, height_, width_, size_;
  Dtype* data_;
  Dtype* labels_;
  int n_;
  size_t pos_;
  Blob<Dtype> added_data_;
  Blob<Dtype> added_label_;
  bool has_new_data_;
 };
 }  // namespace caffe
 #endif
--- a/caffe-layers/src/caffe/layers/dspp_layer.cpp
+++ b/caffe-layers/src/caffe/layers/dspp_layer.cpp
@ -0,0 +1,92 @@
 #include <cmath>
 #include <algorithm>
 #include <vector>
 #include "caffe/layer.hpp"
 #include "caffe/layers/dspp_layer.hpp"
 #include <boost/spirit/include/phoenix_core.hpp>
 #include <boost/spirit/include/phoenix_operator.hpp>
 namespace caffe {
    template <typename Dtype>
    void DSPPLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top) {
    }
    template <typename Dtype>
    void DSPPLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
        num_ = bottom[1]->shape()[0];
        channel_ = bottom[1]->shape()[1]; // the input data size
        height_ = bottom[1]->shape()[2];
        width_ = bottom[1]->shape()[3];
        // init output size
        vector<int> output_shape;
        output_shape.push_back(num_);
        output_shape.push_back(channel_);
        output_shape.push_back(height_);
        output_shape.push_back(width_);
        top[0]->Reshape(output_shape);
    }
    template <typename Dtype>
    void DSPPLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top) {
        Dtype* top_data = top[0]->mutable_cpu_data();
        caffe_set<Dtype>(top[0]->count(), 0, top_data); // initilize to be 0
        for (int n=0; n<num_; ++n) {
            for (int h = 0; h < height_; ++h) { // for the input data size
                for (int w = 0; w < width_; ++w) {
                    for (int c = 0; c < channel_; ++c) {
                        top_data[top[0]->offset(n, c, h, w)] = bottom[1]->data_at(n, c, h, w) * bottom[0]->data_at(n, 0, h, w);
                    }   
                }
            }
        }
        top_data = NULL;
    }
    template <typename Dtype>
    void DSPPLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
            const vector<bool>& propagate_down,
            const vector<Blob<Dtype>*>& bottom) {
        if (propagate_down[0]) {
            const Dtype* top_diff = top[0]->cpu_diff();
            Dtype* data_diff = bottom[1]->mutable_cpu_diff();
            Dtype* heat_map_diff = bottom[0]->mutable_cpu_diff();
            caffe_set<Dtype>(bottom[1]->count(), 0, data_diff);
            caffe_set<Dtype>(bottom[0]->count(), 0, heat_map_diff);
            // Dtype activation_h, activation_w;
            for (int n = 0; n < num_; ++n) {
                for (int h = 0; h < height_; ++h) {
                    for (int w = 0; w < width_; ++w) {
                        for (int c = 0; c < channel_; ++c) {
                            Dtype buffer = top_diff[top[0]->offset(n, c, h, w)];
                            data_diff[bottom[1]->offset(n, c, h, w)] = buffer * (bottom[0]->data_at(n, 0, h, w));
                            buffer *= bottom[1]->data_at(n,c,h,w) / channel_;
                            heat_map_diff[bottom[0]->offset(n,0,h,w)] += buffer;
                        }
                    }
                }
            }
            top_diff = NULL;
            data_diff = NULL;
            heat_map_diff = NULL;
        }
    }
 INSTANTIATE_CLASS(DSPPLayer);
 REGISTER_LAYER_CLASS(DSPP);
 } // namespace caffe
--- a/caffe-layers/src/caffe/layers/pose_data_layer.cpp
+++ b/caffe-layers/src/caffe/layers/pose_data_layer.cpp
@ -0,0 +1,128 @@
 #include <opencv2/core/core.hpp>
 #include <vector>
 #include "caffe/layers/pose_data_layer.hpp"
 namespace caffe {
 template <typename Dtype>
 void PoseDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
  batch_size_ = this->layer_param_.memory_data_param().batch_size();
  channels_ = this->layer_param_.memory_data_param().channels();
  height_ = this->layer_param_.memory_data_param().height();
  width_ = this->layer_param_.memory_data_param().width();
  size_ = channels_ * height_ * width_;
  CHECK_GT(batch_size_ * size_, 0) <<
      "batch_size, channels, height, and width must be specified and"
      " positive in memory_data_param";
  int label_shape_[] = {batch_size_, 4};
  vector<int> label_shape(label_shape_, label_shape_+2);
  top[0]->Reshape(batch_size_, channels_, height_, width_);
  top[1]->Reshape(label_shape);
  added_data_.Reshape(batch_size_, channels_, height_, width_);
  added_label_.Reshape(label_shape);
  data_ = NULL;
  labels_ = NULL;
  added_data_.cpu_data();
  added_label_.cpu_data();
 }
 template <typename Dtype>
 void PoseDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
  CHECK(!has_new_data_) <<
      "Can't add data until current data has been consumed.";
  size_t num = datum_vector.size();
  CHECK_GT(num, 0) << "There is no datum to add.";
  CHECK_EQ(num % batch_size_, 0) <<
      "The added data must be a multiple of the batch size.";
  added_data_.Reshape(num, channels_, height_, width_);
  int label_shape_[] = {(int)num, 4};
  vector<int> label_shape(label_shape_, label_shape_+2);
  added_label_.Reshape(label_shape);
  // Apply data transformations (mirror, scale, crop...)
  this->data_transformer_->Transform(datum_vector, &added_data_);
  // Copy Labels
  Dtype* top_label = added_label_.mutable_cpu_data();
  for (int item_id = 0; item_id < num; ++item_id) {
    top_label[item_id] = datum_vector[item_id].label();
  }
  // num_images == batch_size_
  Dtype* top_data = added_data_.mutable_cpu_data();
  Reset(top_data, top_label, num);
  has_new_data_ = true;
 }
 template <typename Dtype>
 void PoseDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
    const vector<float>& labels) {
  size_t num = mat_vector.size();
  CHECK(!has_new_data_) <<
      "Can't add mat until current data has been consumed.";
  CHECK_GT(num, 0) << "There is no mat to add";
  CHECK_EQ(num % batch_size_, 0) <<
      "The added data must be a multiple of the batch size.";
  added_data_.Reshape(num, channels_, height_, width_);
  int label_shape_[] = {(int)num, 4};
  vector<int> label_shape(label_shape_, label_shape_+2);
  added_label_.Reshape(label_shape);
  // Apply data transformations (mirror, scale, crop...)
  this->data_transformer_->Transform(mat_vector, &added_data_);
  // Copy Labels
  Dtype* top_label = added_label_.mutable_cpu_data();
  for (int item_id = 0; item_id < num; ++item_id) {
    top_label[item_id] = labels[item_id];
  }
  // num_images == batch_size_
  Dtype* top_data = added_data_.mutable_cpu_data();
  Reset(top_data, top_label, num);
  has_new_data_ = true;
 }
 template <typename Dtype>
 void PoseDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
  CHECK(data);
  CHECK(labels);
  CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
  // Warn with transformation parameters since a memory array is meant to
  // be generic and no transformations are done with Reset().
  //if (this->layer_param_.has_transform_param()) {
  //  LOG(WARNING) << this->type() << " does not transform array data on Reset()";
  //}
  data_ = data;
  labels_ = labels;
  n_ = n;
  pos_ = 0;
 }
 template <typename Dtype>
 void PoseDataLayer<Dtype>::set_batch_size(int new_size) {
  CHECK(!has_new_data_) <<
      "Can't change batch_size until current data has been consumed.";
  batch_size_ = new_size;
  added_data_.Reshape(batch_size_, channels_, height_, width_);
  int label_shape_[] = {(int)batch_size_, 4};
  vector<int> label_shape(label_shape_, label_shape_+2);
  added_label_.Reshape(label_shape);
 }
 template <typename Dtype>
 void PoseDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  CHECK(data_) << "PoseDataLayer needs to be initalized by calling Reset";
  top[0]->Reshape(batch_size_, channels_, height_, width_);
  int label_shape_[] = {(int)batch_size_, 4};
  vector<int> label_shape(label_shape_, label_shape_+2);
  added_label_.Reshape(label_shape);
  top[0]->set_cpu_data(data_ + pos_ * size_);
  top[1]->set_cpu_data(labels_ + pos_);
  pos_ = (pos_ + batch_size_) % n_;
  if (pos_ == 0)
    has_new_data_ = false;
 }
 INSTANTIATE_CLASS(PoseDataLayer);
 REGISTER_LAYER_CLASS(PoseData);
 }  // namespace caffe
--- a/content/calib/calibration.yml
+++ b/content/calib/calibration.yml
@ -0,0 +1,11 @@
 %YAML:1.0
 camera_matrix: !!opencv-matrix
   rows: 3
   cols: 3
   dt: f
   data: [ 1891.07, 0.0, 640, 0.0, 1891.07, 360, 0.0, 0.0, 1.0]
 dist_coeffs: !!opencv-matrix
   rows: 1
   cols: 5
   dt: f
   data: [1.68091e-02, -7.14552e-02, -5.65886e-03, -5.23482e-04, -3.39946e-02]
--- a/content/calib/monitor_desktop.yml
+++ b/content/calib/monitor_desktop.yml
@ -0,0 +1,13 @@
 %YAML:1.0
 monitor_W: 516
 monitor_H: 323
 monitor_R: !!opencv-matrix
   rows: 3
   cols: 3
   dt: f
   data: [ -0.99955, -0.02891, -0.0082861, -0.028948, 0.99957, 0.0044949, 0.0081526, 0.0047327, -0.99996]
 monitor_T: !!opencv-matrix
   rows: 3
   cols: 1
   dt: f
   data: [269.41, 48.561, 5.8344]
--- a/content/calib/monitor_laptop.yml
+++ b/content/calib/monitor_laptop.yml
@ -0,0 +1,13 @@
 %YAML:1.0
 monitor_W: 310
 monitor_H: 174
 monitor_R: !!opencv-matrix
   rows: 3
   cols: 3
   dt: f
   data: [ -0.99988, -0.009735, -0.01203, -0.0094674, 0.99971, -0.022108, 0.012242, -0.021992, -0.99968]
 monitor_T: !!opencv-matrix
   rows: 3
   cols: 1
   dt: f
   data: [149.91, 29.575, -18.884]
--- a/content/model/face_model.yml
+++ b/content/model/face_model.yml
@ -0,0 +1,6 @@
 %YAML:1.0
 face_model: !!opencv-matrix
    rows: 3
    cols: 6
    dt: f
    data: [ -45.096768, -21.312858, 21.312858, 45.096768, -26.299577, 26.299577, -0.483773,0.483773, 0.483773, -0.483773, 68.595035,68.595035, 2.397030, -2.397030, -2.397030, 2.397030, -0.000000, -0.000000]
--- a/default.cfg
+++ b/default.cfg
@ -0,0 +1,26 @@
 ## input and ouput
 # input_type = camera # camera, video, or directory
 # input = 0 # caemra id, video file name, or directory of image files
 # output = /BS/zhang-semi/work/opengaze/test/
 # input = YOUR_VIDEO OR IMAGE FOLDER
 # output = MUST BE A DIRECTORY
 ## gaze estimation method
 # gaze_method = MPIIGaze # OpenFace  MPIIGaze
 # gpu_id = 0
 ## gaze estimation method/model selection
 # face_model = 1 # 1 for the face model, 0 for eye image model
 ## CNN model for face image, trained on MPIIGaze + EYEDIAP HD
 # cnn_param_path = YOUR_PATH/alexnet_face.prototxt
 # cnn_model_path = YOUR_PATH/alexnet_face.caffemodel
 # calibration file, calibration file
 # calib_camera = YOUR_PATH/calibration.yml
 # calib_screen = YOUR_PATH/monitor.yml
 ## parameters for personal calibration
 # per_model_save_path = YOUR_PATH/user1.txt
 # num_calibration = 9
--- a/download_models.sh
+++ b/download_models.sh
@ -0,0 +1,9 @@
 OPENGAZE_DIR=~/OpenGaze
 mkdir -p $OPENGAZE_DIR/content/caffeModel
 cd $OPENGAZE_DIR/content/caffeModel
 wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.prototxt
 wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.caffemodel
--- a/exe/CMakeLists.txt
+++ b/exe/CMakeLists.txt
@ -0,0 +1,36 @@
 cmake_minimum_required(VERSION 3.0)
 project(OpenGazeExe VERSION 1.0)
 set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
 add_definitions(-DOPENGAZE_DIR="${CMAKE_SOURCE_DIR}")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 set(CMAKE_CXX_STANDARD 11)
 find_package( OpenCV 3.1 REQUIRED COMPONENTS calib3d highgui objdetect imgproc core)
 # Boost, for reading configuration file
 find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
 set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
 include_directories(/usr/local/include/opengaze /usr/local/cuda/include  ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
 link_directories(/usr/lib /usr/local/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${OPENGAZE_DIR}/3rdParty)
 ## -lX11 is for getting screen resolution in pixel in the Linux system
 set(LIBS opengaze LandmarkDetector ${OpenCV_LIBS} ${Boost_LIBRARIES} tbb openblas dlib lmdb glog caffe X11)
 add_executable(GazeVisualization GazeVisualization.cpp)
 target_link_libraries(GazeVisualization ${LIBS})
 add_executable(Calibration Calibration.cpp)
 target_link_libraries(Calibration ${LIBS})
 add_executable(GazeEstimation GazeEstimation.cpp)
 target_link_libraries(GazeEstimation ${LIBS})
 add_executable(DataExtraction DataExtraction.cpp)
 target_link_libraries(DataExtraction ${LIBS})
--- a/exe/Calibration.cpp
+++ b/exe/Calibration.cpp
@ -0,0 +1,29 @@
 #include <iostream>
 #include <vector>
 #include <string>
 #include <opencv2/opencv.hpp>
 #include "opengaze/opengaze.hpp"
 using namespace std;
 using namespace cv;
 using namespace opengaze;
 vector<string> get_arguments(int argc, char **argv) {
    vector<string> arguments;
    for (int i = 0; i < argc; ++i){
        arguments.emplace_back(string(argv[i]));
    }
    return arguments;
 }
 int main(int argc, char** argv)
 {
    vector<string> arguments = get_arguments(argc, argv);
    OpenGaze open_gaze(argc, argv);
    int num_calibration_point = 20;
    open_gaze.runPersonalCalibration(num_calibration_point);
    return 1;
 }
--- a/exe/DataExtraction.cpp
+++ b/exe/DataExtraction.cpp
@ -0,0 +1,28 @@
 #include <iostream>
 #include <vector>
 #include <string>
 #include <opencv2/opencv.hpp>
 #include "opengaze/opengaze.hpp"
 using namespace std;
 using namespace cv;
 using namespace opengaze;
 vector<string> get_arguments(int argc, char **argv) {
    vector<string> arguments;
    for (int i = 0; i < argc; ++i){
        arguments.emplace_back(string(argv[i]));
    }
    return arguments;
 }
 int main(int argc, char** argv)
 {
    vector<string> arguments = get_arguments(argc, argv);
    OpenGaze open_gaze(argc, argv);
    open_gaze.runDataExtraction();
    return 1;
 }
--- a/exe/GazeEstimation.cpp
+++ b/exe/GazeEstimation.cpp
@ -0,0 +1,28 @@
 #include <iostream>
 #include <vector>
 #include <string>
 #include <opencv2/opencv.hpp>
 #include "opengaze/opengaze.hpp"
 using namespace std;
 using namespace cv;
 using namespace opengaze;
 vector<string> get_arguments(int argc, char **argv) {
    vector<string> arguments;
    for (int i = 0; i < argc; ++i){
        arguments.emplace_back(string(argv[i]));
    }
    return arguments;
 }
 int main(int argc, char** argv)
 {
    vector<string> arguments = get_arguments(argc, argv);
    OpenGaze open_gaze(argc, argv);
    open_gaze.runGazeOnScreen();
    return 1;
 }
--- a/exe/GazeVisualization.cpp
+++ b/exe/GazeVisualization.cpp
@ -0,0 +1,28 @@
 #include <iostream>
 #include <vector>
 #include <string>
 #include <opencv2/opencv.hpp>
 #include "opengaze/opengaze.hpp"
 using namespace std;
 using namespace cv;
 using namespace opengaze;
 vector<string> get_arguments(int argc, char **argv) {
    vector<string> arguments;
    for (int i = 0; i < argc; ++i){
        arguments.emplace_back(string(argv[i]));
    }
    return arguments;
 }
 int main(int argc, char** argv)
 {
    vector<string> arguments = get_arguments(argc, argv);
    OpenGaze open_gaze(argc, argv);
    open_gaze.runGazeVisualization();
    return 1;
 }
--- a/exe/test.mp4
+++ b/exe/test.mp4
--- a/imgs/logo_mpiinf.png
+++ b/imgs/logo_mpiinf.png
--- a/imgs/logo_osaka-u.png
+++ b/imgs/logo_osaka-u.png
--- a/imgs/logo_pui.png
+++ b/imgs/logo_pui.png
--- a/include/data.hpp
+++ b/include/data.hpp
@ -0,0 +1,88 @@
 #ifndef DATA_HPP
 #define DATA_HPP
 #include <opencv2/opencv.hpp>
 namespace opengaze{
 /**
 * face and facial landmark detection data
 * @param face_id personal id from tracking across frames
 * @param certainty detection score, 1 is the best, -1 is the worst
 * @param landmarks detected six facial landmarks as four eye corners and two mouth corners
 * @param face_bb detected face bounding box
 */
 struct FaceData
 {
    unsigned long face_id;
    double certainty;
    cv::Point2f landmarks[6];
    cv::Rect_<int> face_bb;
 };
 /**
 * eye image related data
 * @param leye_pos/reye_pose 3D eyeball center position for left and right eyes in the original camera coordinate system
 * @param leye_img/reye_img eye image
 * @param leye_rot/reye_rot rotation matrix during the data normalization procedure
 */
 struct EyeData
 {
    // cv::Mat head_r, head_t; 
    cv::Mat leye_pos, reye_pos; // 
    // normalized eyes
    cv::Mat leye_img, reye_img;
    cv::Mat leye_rot, reye_rot;
 };
 /**
 * face patch data related to data normalization
 * @param head_r head pose as center of the face
 * @param head_t head translation as center of the face
 * @param face_rot rotation matrix during the data normalization procedure
 * @param face_center 3D face center in the original camera coordinate system
 * @param debug_img use for debug to show the normalized face image
 * @param face_patch normalized face image
 */
 struct FacePatchData
 {
    cv::Mat head_r, head_t;
    cv::Mat face_rot;
    cv::Mat face_center;
    cv::Mat debug_img;
    cv::Mat face_patch;
 };
 /**
 * gaze data
 * @param lgaze3d/lgaze3d gaze directions of left and right eyes in the camera coordinate system
 * @param gaze3d gaze direction estimated from face patch in the in the camera coordinate system
 * @param lgaze2d/rgaze2d projected gaze positions on the screen coordinate from left and right eyes
 * @param gaze2d projected gaze positions from face patch on the screen coordinate
 */
 struct GazeData
 {
    cv::Vec3f lgaze3d, rgaze3d;
    cv::Vec3f gaze3d;
    cv::Point2f lgaze2d, rgaze2d;
    cv::Point2f gaze2d;
 };
 /**
 * The general output data structure
 * @param face_data store face and facial landmark detection data
 * @param eye_data store data related to eye image input
 * @param face_patch_data normalized face path data
 * @param gaze_data gaze data in 2D and 3D spaces
 */
 struct Sample
 {
    FaceData face_data;
    EyeData eye_data;
    FacePatchData face_patch_data;
    GazeData gaze_data;
 };
 }
 #endif //DATA_HPP
--- a/include/face_detector.hpp
+++ b/include/face_detector.hpp
@ -0,0 +1,72 @@
 #ifndef FACE_DETECTOR_HPP
 #define FACE_DETECTOR_HPP
 #include <iostream>
 #include <vector>
 #include <string>
 #include <opencv2/opencv.hpp>
 #if USE_DLIB
 // if we use dlib
 #include <dlib/opencv.h>
 #include <dlib/image_processing/frontal_face_detector.h>
 #include <dlib/image_processing/render_face_detections.h>
 #include <dlib/image_processing.h>
 #include <dlib/gui_widgets.h>
 #include <dlib/image_io.h>
 #endif
 #include "data.hpp"
 namespace opengaze{
 class FaceDetector {
 public:
    FaceDetector();
    ~FaceDetector();
    /**
     * face and facial landmark detection selection
     * The current implementation is only OpenFace. OpenFace use dlib for face detection
     */
    enum Method{OpenFace, OpenCV, Dlib};
    /**
     * main function to detect and track face and facial landmarks
     * @param input_img input image
     * @param output output data structure
     */
    void track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output);
    void reset();
    void setMethodType(Method method_type) {method_type_ = method_type;}
    Method getMethodType() {return method_type_;}
    void initialize(int number_users);
 private:
    Method method_type_;
    #if USE_DLIB
    dlib::frontal_face_detector dlib_detector_;
    dlib::shape_predictor dlib_sp_;
    #endif
    // parameters for OpenFace
    std::vector<bool> active_models_;
    unsigned long num_faces_max_;
    int detection_skip_frames_, tracking_loss_limit_;
    float detection_resize_rate_;
    float nonoverlap_threshold_;
    double certainty_threshold_;
    int landmark_indices_[6];
    int frame_counter_;
    unsigned long current_face_id_;
    std::vector<unsigned long> face_ids_;
 };
 }
 #endif //FACE_DETECTOR_HPP
--- a/include/gaze_estimator.hpp
+++ b/include/gaze_estimator.hpp
@ -0,0 +1,65 @@
 #ifndef GAZE_ESTIMATOR_HPP
 #define GAZE_ESTIMATOR_HPP
 #include <opencv2/opencv.hpp>
 #include "data.hpp"
 #include "face_detector.hpp"
 #include "normalizer.hpp"
 #include "gaze_predictor.hpp"
 namespace opengaze{
 class GazeEstimator {
 public:
    GazeEstimator();
    ~GazeEstimator();
    /**
     * On the current implementation, we only has the "MPIIGaze" method which uses the input face/eye image
     * and output gaze direction directly. It is an appearance-based method. The "OpenFace" can also output
     * the gaze vector according to the pupil detection results. However, "OpenFace" implementation is not 
     * included inside our OpenGaze toolkit yet.
     */
    enum Method{MPIIGaze, OpenFace};
    /**
     * for the "MPIIGaze" method, the input image can be face or eye. The full-face patch model can output
     * more accurate gaze prediction than the eye image model, while the eye image base model is much faster.
     */
    enum InputType{face, eye};
    /**
     * the main function to estimate the gaze. 
     * It performs the face and facial landmarks detection, head pose estimation and then gaze prediction.
     * @param input_image input scene image
     * @param output  data structure for output
     */
    void estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &output);
    void getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs);
    void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist);
    void setRootPath(std::string root_path);
    void setMethod(Method, std::vector<std::string> arguments);
    void initialFaceDetector(int number_users);
    Method method_type_;
    InputType input_type_; // the input type
 private:
    // class instances
    FaceDetector face_detector_;
    Normalizer normalizer_;
    GazePredictor gaze_predictor_;
    // camera intrinsic matrix
    cv::Mat camera_matrix_;
    // camera distortion matrix
    cv::Mat camera_dist_;
    // the root pat is used for load configuration file and models
    std::string root_path_;
 };
 }
 #endif //GAZE_ESTIMATOR_HPP
--- a/include/gaze_predictor.hpp
+++ b/include/gaze_predictor.hpp
@ -0,0 +1,29 @@
 #ifndef GAZE_PREDICTOR_HPP
 #define GAZE_PREDICTOR_HPP
 #include <opencv2/opencv.hpp>
 #include "data.hpp"
 #include "face_detector.hpp"
 namespace opengaze{
 class GazePredictor {
 public:
    GazePredictor();
    ~GazePredictor();
    void initiaMPIIGaze(std::vector<std::string> arguments);
    cv::Point3f predictGazeMPIIGaze(cv::Mat face_patch);
 private:
    int model_type_;
    bool is_extract_feature;
 };
 }
 #endif //GAZE_PREDICTOR_HPP
--- a/include/input_handler.hpp
+++ b/include/input_handler.hpp
@ -0,0 +1,125 @@
 #ifndef INPUT_HANDLER_HPP
 #define INPUT_HANDLER_HPP
 #include <opencv2/opencv.hpp>
 #include <vector>
 #include <iostream>
 #include <boost/filesystem.hpp>
 #include "data.hpp"
 namespace opengaze {
 class InputHandler {
 public:
    enum InputType {Camera, Video, Image, Directory, Memory};
    InputHandler();
    ~InputHandler();
    /**
     * get the camera intrisic parameters
     * @param camera_matrix camera instric matrix
     * @param camera_dist caemra distortion matrix
     */
    void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist){
        camera_matrix_ = std::move(camera_matrix);
        camera_distortion_ = std::move(camera_dist);
    }
    /**
     * function to return next sample, could come from any input source
     * @return next sample
     */
    cv::Mat getNextSample();
    /**
     * set the input type
     * @param type the input typ, could be found in InputType defination
     */
    void setInputType(InputType type){input_type_ = type;}
    /**
     * set the input 
     *  according the input type, here the input value are different. 
     * For the type "Camera", this input value indicates the camera id
     * For the type "video", this input value is the video file name
     * For input type "Directory", this input value is the directory path
     */
    void setInput(int camera_id) {camera_id_ = camera_id;}
    void setInput(std::vector<cv::Mat> images) {images_ = std::move(images);}
    void setInput(std::string input_path);
    /**
     * read the parameters related to the screen
     * @param calib_file file for the configuration 
     */
    void readScreenConfiguration(std::string calib_file);
    /**
     * read the camera instrinic parameters from the configuration file
     * @param calib_file file for the configuration 
     */
    void readCameraConfiguration(std::string calib_file);
    /**
     * When the 3D gaze vector is achieved, there is a need to project the gaze on the 2D screen.
     * This function also needs the input to indicate if use the full-face model or not, 
     * since the initial of gaze vector will be center of the face for the full-face models 
     * and eye center for the eye-based models.
     * @param input input data contains the 3D gaze vector
     * @param is_face_model a boolen value indicates if the gaze vectors is from face model or eye model 
     */
    void projectToDisplay(std::vector<opengaze::Sample> &input, bool is_face_model=true);
    int getFrameHeight(){return cap_.get(cv::CAP_PROP_FRAME_HEIGHT);}
    int getFrameWidth(){return cap_.get(cv::CAP_PROP_FRAME_WIDTH);}
    InputType getInputType() {return input_type_;}
    int getScreenWidth() {return screen_width_;}
    int getScreenHeight() {return screen_height_;}
    std::string getFileName() {return current_file_name_;}
    cv::Point2f mapToDisplay(cv::Vec3f obj_center, cv::Vec3f gaze_point);
    void initialize();
    bool closeInput();
    void getScreenResolution(int &width, int &height);
    cv::Mat getCameraMatrix() { return camera_matrix_;}
    cv::Mat getCameraDistortion() {return camera_distortion_;}
    void setFrameSize(int frame_width, int frame_height);
    bool isReachEnd() {return is_reach_end_;}
    cv::Mat camera_matrix_;
    cv::Mat camera_distortion_;
 private:
    // indicator if we reach the end of sample stream
    bool is_reach_end_;
    int camera_id_;
    int sample_height_, sample_width_;
    std::vector<cv::Mat> images_;
    std::string input_path_;
    std::string input_file_video_name_;
    int screen_width_, screen_height_;
    // monitor
    float monitor_W_, monitor_H_; // monitor width and height in mm
    cv::Mat monitor_R_, monitor_T_;
    cv::Vec3f monitor_corners_[4];
    cv::Mat monitor_normal_;
    // input variable
    InputType input_type_;
    cv::VideoCapture cap_;
    std::string current_file_name_;
    // variable for directory input
    boost::filesystem::directory_iterator current_itr_;
 };
 }
 #endif //INPUT_HANDLER_HPP
--- a/include/normalizer.hpp
+++ b/include/normalizer.hpp
@ -0,0 +1,42 @@
 #ifndef NORMALIZER_HPP
 #define NORMALIZER_HPP
 #include <opencv2/opencv.hpp>
 #include "data.hpp"
 namespace opengaze{
 class Normalizer {
 public:
    Normalizer();
    ~Normalizer();
    void estimateHeadPose(const cv::Point2f *landmarks, opengaze::Sample &sample);
    void setCameraMatrix(cv::Mat input);
    void loadFaceModel(std::string path);
    void setParameters(int focal_length, int distance, int img_w, int img_h);
    cv::Mat normalizeFace(cv::Mat input_image, Sample &sample);
    std::vector<cv::Mat> normalizeEyes(cv::Mat input_image, Sample &sample);
    cv::Mat cvtToCamera(cv::Point3f input, const cv::Mat cnv_mat);
 private:
    cv::Mat camera_matrix_;
    std::vector<cv::Point3f> face_model_;
    cv::Mat face_model_mat_, cam_norm_;
    float focal_norm_, distance_norm_;
    cv::Size roiSize_norm_;
 };
 }
 #endif //NORMALIZER_HPP
--- a/include/opengaze.hpp
+++ b/include/opengaze.hpp
@ -0,0 +1,80 @@
 #ifndef OPEN_GAZE_H
 #define OPEN_GAZE_H
 #include <string>
 #include <vector>
 #include <boost/program_options.hpp>
 #include <boost/filesystem.hpp>
 #include <opencv2/opencv.hpp>
 #include "input_handler.hpp"
 #include "gaze_estimator.hpp"
 #include "data.hpp"
 #include "personal_calibrator.hpp"
 namespace opengaze {
 class OpenGaze {
 public:
    explicit OpenGaze(int argc, char** argv); //read configuration file
    ~OpenGaze();
    // main function to estimate and show the gaze vector drawn on the input face image.
    void runGazeVisualization(); 
    /**
     * main function to run personal calibration.
     * @param num_calibration_point the numbers of points for calibration.
     */
    void runPersonalCalibration(int num_calibration_point=5);
    // main function to estimate and draw gaze point on the screen.
    void runGazeOnScreen();
    // main function to extract the face image from input image. The face image can then 
    // be used to train a custom gaze estimation model
    void runDataExtraction();
 private:
    // visualization
    /**
     * function to draw the gaze vector on the input face image.
     * @param sample the input data includes the gaze vector, head pose etc.
     * @param image the input image contains the face. This function will draw the gaze vector on this input image
     */
    void drawGazeOnFace(opengaze::Sample sample, cv::Mat &image);
    // draw the detected facial landmark
    void drawLandmarks(opengaze::Sample sample, cv::Mat &image);
    // draw the estimated gaze on the top left corner of the input image 
    // to show the relative position on the screen. In this case, 
    //the user can see both the input image and the projected gaze target on the screen. 
    //This function is mainly used for debugging.
    void drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image);
    // estimate and draw gaze point on the screen
    void drawGazeOnScreen(opengaze::Sample sample, cv::Mat &image);
    // show debug mode will show the gaze draw on the face
    bool show_debug_;
    //class instances
    InputHandler input_handler_;
    GazeEstimator gaze_estimator_;
    // input camera id
    int camera_id_;
    // temporary variables to store the input path, output path, input type
    boost::filesystem::path input_dir_;
    InputHandler::InputType input_type_;
    boost::filesystem::path output_dir_;
    bool is_face_model_;
    bool is_save_video_;
    // path to save the personal calibration model
    std::string per_model_save_path_;
 };
 }
 #endif //OPEN_GAZE_H
--- a/include/personal_calibrator.hpp
+++ b/include/personal_calibrator.hpp
@ -0,0 +1,64 @@
 #ifndef PERSONAL_CALIBRATOR_HPP
 #define PERSONAL_CALIBRATOR_HPP
 #include <string>
 #include <vector>
 #include <opencv2/opencv.hpp>
 class PersonalCalibrator {
 public:
    PersonalCalibrator(int screen_width, int screen_height);
    ~PersonalCalibrator();
    /**
     * generate the random locations for calibration
     * @param num_points number of points to generate
     */
    void generatePoints(int num_points);
    // get the show window ready, it should be full-screen
    void initialWindow();
    // show the next calibration point
    bool showNextPoint();
    // wait for 0.5 second to receive the confirmation (mouse click) from user
    void confirmClicking();
    /**
     * generate a polynomial function for the personal calibration
     * @param prediction prediction from gaze estimation method
     * @param ground_truth calibration points locations on the screen
     * @param order the order of polynomial function, 1 means the linear
     */
    void generateModel(std::vector<cv::Point2f> prediction, std::vector<cv::Point2f> ground_truth, int order=1);
    /**
     * save the personal calibration model
     * @param file_path path to save the model
     */
    void saveModel(std::string file_path);
    /**
     * load the personal calibration model
     * @param file_path path to load the model
     */
    void loadModel(std::string file_path);
    /**
     * return current calibration point location on the screen
     * @return location on the screen
     */
    cv::Point2f getCurrentPoint() {return points_[index_point_];}
    // function to calculate the polynomial function
    void calibratePolynomial();
 private:
    // indicator if the user click the mouse or not
    bool is_click_;
    // number of points for personal calibration
    int num_points_;
    // index for the current calibration points
    int index_point_;
    // vector to store the generated calibration points
    std::vector<cv::Point2i> points_;
    int screen_width_, screen_height_, center_radius_; // monitor width and height in pixel
    // personal model
    cv::Mat model_matrix_;
 };
 #endif //PERSONAL_CALIBRATOR_HPP
--- a/install.sh
+++ b/install.sh
@ -0,0 +1,60 @@
 #!/bin/bash
 set -e 
 set -o pipefail
 if [ $# -ne 0 ]
  then
    echo "Usage: install.sh"
    exit 1
 fi
 # Essential Dependencies
 echo "Installing Essential dependencies..."
 sudo apt-get -y update
 sudo apt-get -y install build-essential
 sudo apt-get -y install cmake
 sudo apt-get -y install libopenblas-dev liblapack-dev
 sudo apt-get -y install git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
 sudo apt-get -y install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev
 echo "Essential dependencies installed."
 # OpenCV Dependency
 echo "Downloading OpenCV..."
 wget https://github.com/opencv/opencv/archive/3.4.0.zip
 unzip 3.4.0.zip
 cd opencv-3.4.0
 mkdir -p build
 cd build
 echo "Installing OpenCV..."
 cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_CUDA=OFF -D BUILD_SHARED_LIBS=ON ..
 make -j4
 sudo make install
 cd ../..
 rm 3.4.0.zip
 sudo rm -r opencv-3.4.0
 echo "OpenCV installed."
 # dlib dependecy
 echo "Downloading dlib"
 wget http://dlib.net/files/dlib-19.13.tar.bz2;
 tar xf dlib-19.13.tar.bz2;
 cd dlib-19.13;
 mkdir -p build;
 cd build;
 echo "Installing dlib"
 cmake ..;
 cmake --build . --config Release;
 sudo make install;
 sudo ldconfig;
 cd ../..;    
 rm -r dlib-19.13.tar.bz2
 echo "dlib installed"
 # Boost C++ Dependency
 echo "Installing Boost..."
 sudo apt-get install libboost-all-dev
 echo "Boost installed."
 # Dependency for caffe
 sudo apt-get install protobuf glog gflags hdf5
--- a/pre-complile/opengaze.deb
+++ b/pre-complile/opengaze.deb
--- a/pre-complile/readme.txt
+++ b/pre-complile/readme.txt
@ -0,0 +1,3 @@
 This is pre-complied package for OpenGaze, including OpenFace and Caffe library insides.
 To use, you still need to install other dependencies, Nvidia drive, cuda and cudnn.
 To install, just run `sudo dpkg -i build_1.0-1_amd64.deb`
--- a/src/face_detector.cpp
+++ b/src/face_detector.cpp
@ -0,0 +1,273 @@
 #include <iostream>
 #include <tbb/tbb.h>
 // if we use OpenFace
 #if USE_OPENFACE
 #include <LandmarkCoreIncludes.h> // from "OpenFace-master/lib/local/LandmarkDetector/include/"
 #endif
 #include "face_detector.hpp"
 using namespace std;
 using namespace cv;
 vector<LandmarkDetector::FaceModelParameters> det_parameters_;
 vector<LandmarkDetector::CLNF> clnf_models_;
 namespace opengaze {
 FaceDetector::FaceDetector() {
    method_type_ = Method::OpenFace;
 }
 FaceDetector::~FaceDetector() {}
 void FaceDetector::initialize(int number_users=5) {
    string root_path = OPENFACE_DIR;
    root_path = root_path + "/build/bin";
    //string openface_root = OpenFace_ROOT_DIR;
    // (currently) hard-coded setting
    num_faces_max_ = number_users;
    detection_resize_rate_ = 2.0; // resize the input image to detect face, crucial for speed
    detection_skip_frames_ = 1;
    nonoverlap_threshold_ = 0.5;
    certainty_threshold_ = 0.0; // the smaller the better, 1 is the best, -1 is the worst
    landmark_indices_[0] = 36; landmark_indices_[1] = 39; landmark_indices_[2] = 42;
    landmark_indices_[3] = 45; landmark_indices_[4] = 48; landmark_indices_[5] = 54;
    tracking_loss_limit_ = 10;
    // initialize the tracking models
    LandmarkDetector::FaceModelParameters det_parameter;
    det_parameter.reinit_video_every = -1; // This is so that the model would not try re-initialising itself
    det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR;
    det_parameter.model_location = root_path + "/model/main_clm_wild.txt";
    det_parameter.haar_face_detector_location = root_path + "/classifiers/haarcascade_frontalface_alt.xml";// this line will be disable due to "curr_face_detector"
    det_parameter.mtcnn_face_detector_location = root_path + "/model/mtcnn_detector/MTCNN_detector.txt";
    det_parameter.use_face_template = true;
    det_parameter.reinit_video_every = 5;
    // det_parameter.quiet_mode = true; not avaliable fro OpenFace v2.1
    // // For in the wild fitting these parameters are suitable
    det_parameter.window_sizes_init = vector<int>(4);
    det_parameter.window_sizes_init[0] = 15;
    det_parameter.window_sizes_init[1] = 13;
    det_parameter.window_sizes_init[2] = 11;
    det_parameter.window_sizes_init[3] = 9;
    det_parameter.sigma = 1.25;
    det_parameter.reg_factor = 35;
    det_parameter.weight_factor = 2.5;
    det_parameter.num_optimisation_iteration = 10;
    det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
    det_parameters_.push_back(det_parameter);
    LandmarkDetector::CLNF clnf_model_ = LandmarkDetector::CLNF(det_parameter.model_location);
    if (!clnf_model_.loaded_successfully){
        cout << "ERROR: Could not load the landmark detector" << endl;
        exit(-1);
    }
    clnf_model_.face_detector_HAAR.load(det_parameter.haar_face_detector_location);
    clnf_model_.haar_face_detector_location = det_parameter.haar_face_detector_location;
    clnf_model_.face_detector_MTCNN.Read(det_parameter.mtcnn_face_detector_location);
    clnf_model_.mtcnn_face_detector_location = det_parameter.mtcnn_face_detector_location;
    // If can't find MTCNN face detector, default to HOG one
    if (det_parameter.curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && clnf_model_.face_detector_MTCNN.empty()){
        cout << "INFO: defaulting to HOG-SVM face detector" << endl;
        det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
    }
    clnf_models_.reserve(num_faces_max_);
    clnf_models_.push_back(clnf_model_);
    active_models_.push_back(false);
    for(int i=1; i<num_faces_max_; ++i)
    {
        clnf_models_.push_back(clnf_model_);
        active_models_.push_back(false);
        det_parameters_.push_back(det_parameter);
    }
    // variables
    frame_counter_ = 0;
    current_face_id_ = 1;
    for(int i=0; i<num_faces_max_; ++i) face_ids_.push_back(0);
 }
 void FaceDetector::reset() {
    // reset all status
    frame_counter_ = 0;
    current_face_id_ = 1;
    for(unsigned int model = 0; model < clnf_models_.size(); ++model)
    {
        active_models_[model] = false;
        face_ids_[model] = 0;
        clnf_models_[model].Reset();
    }
 }
 void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models, vector<cv::Rect_<float> >& face_detections){
    // Go over the model and eliminate detections that are not informative (there already is a tracker there)
    for (size_t model = 0; model < clnf_models.size(); ++model){
        // See if the detections intersect
        cv::Rect_<float> model_rect = clnf_models[model].GetBoundingBox();
        for (int detection=face_detections.size()-1; detection >= 0; --detection)
        {
            double intersection_area = (model_rect & face_detections[detection]).area();
            double union_area = model_rect.area() + face_detections[detection].area() - 2 * intersection_area;
            // If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap
            if (intersection_area / union_area > 0.5)
            {
                face_detections.erase(face_detections.begin() + detection);
            }
        }
    }
 }
 double NonOverlapingDetection(const LandmarkDetector::CLNF &ref_model, const LandmarkDetector::CLNF &tgt_model)
 {
    Rect_<double> ref_rect = ref_model.GetBoundingBox();
    Rect_<double> tgt_rect = tgt_model.GetBoundingBox();
    double intersection_area = (ref_rect & tgt_rect).area();
    double union_area = ref_rect.area() + tgt_rect.area() - 2 * intersection_area;
    return intersection_area/union_area;
 }
 void FaceDetector::track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output) {
    if(input_img.channels() < 3){
        cout << "The input must be a color image!" <<endl;
        exit(EXIT_FAILURE);
    }
    Mat_<uchar> grayscale_image;
    cvtColor(input_img, grayscale_image, CV_BGR2GRAY);
    bool all_models_active = true;
    for(unsigned int model = 0; model < clnf_models_.size(); ++model)
    {
        if(!active_models_[model])
        {
            all_models_active = false;
            break;
        }
    }
    // Detect faces
    // Get the detections (every Xth frame and when there are free models available for tracking)
    std::vector<Rect_<float> > face_detections;
    cv::Mat small_grayscale_image_;
    if (frame_counter_ % detection_skip_frames_ == 0 && !all_models_active) {
        // resized image for faster face detection
        if (detection_resize_rate_ != 1) resize(grayscale_image, small_grayscale_image_,
               Size(), 1.0/detection_resize_rate_, 1.0/detection_resize_rate_);
        else small_grayscale_image_ = grayscale_image;
        if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR){
            vector<float> confidences;
            LandmarkDetector::DetectFacesHOG(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HOG, confidences);
        }
        else if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR){
            LandmarkDetector::DetectFaces(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HAAR);
        }
        else{
            vector<float> confidences;
            LandmarkDetector::DetectFacesMTCNN(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_MTCNN, confidences);
        }
        // resize the face deteciton back
        if (detection_resize_rate_ != 1) {
            for(auto& face_detection : face_detections) {
                face_detection.x *= detection_resize_rate_;
                face_detection.y *= detection_resize_rate_;
                face_detection.width *= detection_resize_rate_;
                face_detection.height *= detection_resize_rate_;
            }
        }
        // Keep only non overlapping detections (also convert to a concurrent vector
        NonOverlapingDetections(clnf_models_, face_detections);
    }
    vector< tbb::atomic<bool> > face_detections_used(face_detections.size());
    // Go through every model and update the tracking
    tbb::parallel_for(0, (int)clnf_models_.size(), [&](int model) {
    //for (unsigned int model = 0; model < clnf_models_.size(); ++model) {
        bool detection_success = false;
        // If the current model has failed more than threshold, remove it
        if (clnf_models_[model].failures_in_a_row > tracking_loss_limit_) {
            active_models_[model] = false;
            clnf_models_[model].Reset();
        }
        // If the model is inactive reactivate it with new detections
        if (!active_models_[model]){
            for (size_t detection_ind = 0; detection_ind < face_detections.size(); ++detection_ind)
            {
                // if it was not taken by another tracker take it (if it is false swap it to true and enter detection, this makes it parallel safe)
                if (!face_detections_used[detection_ind].compare_and_swap(true, false)) {
                    // Reinitialise the model
                    clnf_models_[model].Reset();
                    // This ensures that a wider window is used for the initial landmark localisation
                    clnf_models_[model].detection_success = false;
                    LandmarkDetector::DetectLandmarksInVideo(input_img, face_detections[detection_ind], clnf_models_[model], det_parameters_[model], grayscale_image);
                    // This activates the model
                    active_models_[model] = true;
                    face_ids_[model] = current_face_id_;
                    current_face_id_++;
                    // break out of the loop as the tracker has been reinitialised
                    break;
                }
            }
        }
        else
        {
            // The actual facial landmark detection / tracking
            detection_success = LandmarkDetector::DetectLandmarksInVideo(input_img, clnf_models_[model], det_parameters_[model], grayscale_image);
        }
    //}
    });
    // Go through every model and check the results
    for(size_t model=0; model<clnf_models_.size(); ++model){
        // Check if the alignment result is overlapping previous models
        bool overlapping = false;
        for(size_t model_ref=0; model_ref<model; ++model_ref){
            double overlap_ratio = NonOverlapingDetection(clnf_models_[model_ref], clnf_models_[model]);
            if(overlap_ratio > nonoverlap_threshold_) overlapping = true;
        }
        if(overlapping){
            active_models_[model] = false;
            face_ids_[model] = 0;
            clnf_models_[model].Reset();
            continue;
        }
        if(clnf_models_[model].detection_certainty < certainty_threshold_) continue;
        Sample temp;
        temp.face_data.certainty = clnf_models_[model].detection_certainty;
        temp.face_data.face_id = face_ids_[model];
        temp.face_data.face_bb.x = (int)clnf_models_[model].GetBoundingBox().x;
        temp.face_data.face_bb.y = (int)clnf_models_[model].GetBoundingBox().y;
        temp.face_data.face_bb.height = (int)clnf_models_[model].GetBoundingBox().height;
        temp.face_data.face_bb.width = (int)clnf_models_[model].GetBoundingBox().width;
        for(int p=0; p<6; p++){
            int num_p = landmark_indices_[p];
            temp.face_data.landmarks[p] = Point2d(
                    clnf_models_[model].detected_landmarks.at<float>(num_p,0),
                    clnf_models_[model].detected_landmarks.at<float>(num_p+68,0)
            );
        }
        output.emplace_back(temp);
    }
 }
 }
--- a/src/gaze_estimator.cpp
+++ b/src/gaze_estimator.cpp
@ -0,0 +1,112 @@
 #include <iostream>
 #include "gaze_estimator.hpp"
 using namespace std;
 using namespace cv;
 namespace opengaze{
 GazeEstimator::GazeEstimator() {
 }
 GazeEstimator::~GazeEstimator() {}
 void GazeEstimator::setRootPath(std::string root_path) {
    normalizer_.loadFaceModel(root_path);
 }
 void  GazeEstimator::estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
    face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
    for (int i=0; i< outputs.size(); ++i) {
        // estimate head pose first, no matter what gaze estimation method, head pose is estimated here
        normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
         if (method_type_ == Method::MPIIGaze){
             // if we use face model
             if  (input_type_ == InputType::face){
                 Mat face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
                 //outputs[i].face_patch_data.debug_img = face_patch;
                 Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(face_patch); // gaze estimates in normalization space
                 Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
                 gaze_3d.copyTo(outputs[i].gaze_data.gaze3d);
             }
             else if (input_type_ == InputType::eye) {
                 vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
                 // for left eye
                 Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(eye_iamges[0]); 
                 Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].eye_data.leye_rot);
                 gaze_3d.copyTo(outputs[i].gaze_data.lgaze3d);
                 // for right eye
                 Mat flip_right;
                 flip(eye_iamges[0], flip_right, 1);
                 gaze_norm = gaze_predictor_.predictGazeMPIIGaze(flip_right); // for left right image input
                 gaze_norm.x *= -1.0;
                 gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
                 gaze_3d.copyTo(outputs[i].gaze_data.rgaze3d);
             }
         }
         else if (method_type_ == Method::OpenFace) {
            cout << "Please use gaze estimation method MPIIGaze." << endl;
            exit(EXIT_FAILURE);
         }
    }
 }
 void GazeEstimator::getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
    face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
    for (int i=0; i< outputs.size(); ++i) {
        // estimate head pose first, no matter what gaze estimation method, head pose is estimated here
        normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
         if (method_type_ == Method::MPIIGaze){
             // if we use face model
             if  (input_type_ == InputType::face){
                 outputs[i].face_patch_data.face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
             }
             else if (input_type_ == InputType::eye) {
                 vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
                 outputs[i].eye_data.leye_img = eye_iamges[0];
                 outputs[i].eye_data.reye_img = eye_iamges[1];
             }
         }
         else if (method_type_ == Method::OpenFace) {
            cout << "Please use method MPIIGaze for image patch extraction." << endl;
            exit(EXIT_FAILURE);
         }
    }
 }
 void GazeEstimator::setMethod(Method input_method_type, const std::vector<std::string> arguments={}) {
    method_type_ = input_method_type;
    if (method_type_ == Method::MPIIGaze) {
        gaze_predictor_.initiaMPIIGaze(arguments);
        if (arguments.size() < 2)
            input_type_ = InputType::face;
        else {
            if (arguments[2] == "face"){
                input_type_ = InputType::face;
                normalizer_.setParameters(1600, 1000, 224, 224);
            }
            else if (arguments[2] == "eye") {
                input_type_ = InputType::eye;
                normalizer_.setParameters(960, 600, 60, 36);
            }
        }
    }
 }
 void GazeEstimator::setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist) {
    camera_matrix_ = move(camera_matrix);
    camera_dist_ = move(camera_dist_);
    normalizer_.setCameraMatrix(camera_matrix_);
 }
 void GazeEstimator::initialFaceDetector(int number_users){
    face_detector_.initialize(number_users);
    face_detector_.setMethodType(FaceDetector::Method::OpenFace);
 }
 };
--- a/src/gaze_predictor.cpp
+++ b/src/gaze_predictor.cpp
@ -0,0 +1,160 @@
 #include "gaze_predictor.hpp"
 #include <string>
 // caffe
 #define USE_OPENCV 1;
 #include <caffe/caffe.hpp>
 #include <caffe/util/io.hpp>
 #include <caffe/blob.hpp>
 #include <caffe/layers/pose_data_layer.hpp>
 #include <caffe/layers/memory_data_layer.hpp>
 using namespace cv;
 using namespace std;
 using namespace caffe;
 namespace opengaze {
 caffe::Net<float> *p_net_;
 GazePredictor::GazePredictor() {
 }
 GazePredictor::~GazePredictor() {
    delete p_net_;
 }
 void GazePredictor::initiaMPIIGaze(const std::vector<std::string> arguments={}) {
    p_net_ = nullptr;
    string param_path = arguments[0];
    string model_path = arguments[1];
    int gpu_id = stoi(arguments[3]);
    // Set GPU (or CPU)
    /*caffe::Caffe::set_mode(caffe::Caffe::CPU);
    cout << "Using CPU model" << endl;*/
    caffe::Caffe::set_mode(caffe::Caffe::GPU);
    cout << "Using GPU with id " << gpu_id << endl;
    Caffe::SetDevice(gpu_id);
    cout << "load caffe model parameters from " << param_path << endl;
    // create CNN
    p_net_ = new Net<float>(param_path, caffe::TEST);
    cout << "load caffe model from " << model_path << endl;
    // load pre-trained weights (binary proto)
    p_net_->CopyTrainedLayersFrom(model_path);
    // judge model type base on the paramater file name
    size_t i = param_path.rfind("/", param_path.length());
    string filename;
    if (i != string::npos)
        filename = param_path.substr(i+1, param_path.length() - i);
    if (!filename.compare(string("lenet_test.prototxt")))
        model_type_ = 1;
    else if (!filename.compare(string("googlenet.prototxt")))
        model_type_ = 2;
    else if (!filename.compare(string("alexnet_eye.prototxt")))
        model_type_ = 3;
    else if (!filename.compare(string("alexnet_face.prototxt")))
        model_type_ = 4; // the single face model
    else if (!filename.compare(string("alexnet_face_448.prototxt")))
        model_type_ = 4; // the single face model
    else{
        model_type_ = 0;
        cout<<"Cannot define the type of model!"<<endl;
        exit(EXIT_FAILURE);
    }
 }
 // gaze estimation with single face input image and with MPIIGaze method
 Point3f GazePredictor::predictGazeMPIIGaze(cv::Mat input_image) {
    vector<Mat> img_vec;
    img_vec.push_back(input_image);
    Vec2f gaze_norm_2d;
    Point3f gaze_norm_3d;
    std::vector<int> labelVector;
    labelVector.clear();
    labelVector.push_back(1);
    labelVector.push_back(1);
    float loss = 0.0;
    caffe::shared_ptr<caffe::MemoryDataLayer<float> > data_layer_;
    data_layer_ = boost::static_pointer_cast<MemoryDataLayer<float> >(p_net_->layer_by_name("data"));
    data_layer_->AddMatVector(img_vec, labelVector);
    // run network
    p_net_->ForwardPrefilled(&loss);
    if (model_type_==1)
    {
        // get output layer "ip2"
        float *temp = (float*)p_net_->blob_by_name("ip2")->cpu_data();
        // copy estimated gaze vector
        gaze_norm_2d.val[0] = temp[0];
        gaze_norm_2d.val[1] = temp[1];
        temp = nullptr;
    }
    else if (model_type_==2)// if it is googlenet
    {
        float *temp1 = (float*)p_net_->blob_by_name("loss1/classifier")->cpu_data();
        float *temp2 = (float*)p_net_->blob_by_name("loss2/classifier")->cpu_data();
        float *temp3 = (float*)p_net_->blob_by_name("loss3/classifier")->cpu_data();
        // average the output of three output values
        gaze_norm_2d.val[0] = (temp1[0]+temp2[0]+temp3[0]) / 3.0f;
        gaze_norm_2d.val[1] = (temp1[1]+temp2[1]+temp3[1]) / 3.0f;
        temp1 = nullptr;
        temp2 = nullptr;
        temp3 = nullptr;
    }
    else if (model_type_==3)// if it is alexnet
    {
        float *temp;
        temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
        if (temp == NULL)
            temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
        if (temp == NULL) {
            cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
            exit(EXIT_FAILURE);
        }
        // copy estimated gaze vector
        gaze_norm_2d.val[0] = temp[0];
        gaze_norm_2d.val[1] = temp[1];
        temp = NULL;
    }
    else if (model_type_==4)// if it is alexnet
    {
        float *temp;
        temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
        if (temp == NULL)
            temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
        if (temp == NULL) {
            cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
            exit(EXIT_FAILURE);
        }
        // copy estimated gaze vector
        gaze_norm_2d.val[0] = temp[0];
        gaze_norm_2d.val[1] = temp[1];
        //// get the feature out
        //temp = (float*)p_net_->blob_by_name("fc6_gaze")->cpu_data();
        //for (int num_f=0; num_f<4096; ++num_f)
        //{
        //    feature[num_f] = temp[num_f];
        //}
        temp = NULL;
    }
    float theta = gaze_norm_2d.val[0];
    float phi = gaze_norm_2d.val[1];
    gaze_norm_3d.x = (-1.0f)*cos(theta)*sin(phi);
    gaze_norm_3d.y = (-1.0f)*sin(theta);
    gaze_norm_3d.z = (-1.0f)*cos(theta)*cos(phi);
    return gaze_norm_3d;
 }
 }
--- a/src/input_handler.cpp
+++ b/src/input_handler.cpp
@ -0,0 +1,185 @@
 #include "input_handler.hpp"
 #include <iostream>
 #include <cstdlib>
 using namespace cv;
 using namespace std;
 namespace opengaze {
 #if WIN32
 #include <windows.h>
 #else
 #include <X11/Xlib.h>
 #endif
 void InputHandler::getScreenResolution(int &width, int &height) {
 #if WIN32
    width  = (int) GetSystemMetrics(SM_CXSCREEN);
    height = (int) GetSystemMetrics(SM_CYSCREEN);
 #else
    Display* disp = XOpenDisplay(NULL);
    Screen*  scrn = DefaultScreenOfDisplay(disp);
    width  = scrn->width;
    height = scrn->height;
 #endif
 }
 InputHandler::InputHandler(){
    input_type_ = InputType::Camera;// defualt input type
    camera_id_ = 0;
    getScreenResolution(screen_width_, screen_height_);
    screen_width_ = screen_width_;
 }
 InputHandler::~InputHandler(){}
 void InputHandler::initialize()
 {
    if (input_type_ == InputType::Camera){
        cap_.open(camera_id_);
        if(!cap_.isOpened()) { // open Camera
            cout << "Could not open Camera with id " << camera_id_ << endl;
            std::exit(EXIT_FAILURE);
        }
        setFrameSize(1280, 720); // 800*600, 1280*720, 1920*1080,
    }
    else if (input_type_ == InputType::Video){
        cap_.open(input_file_video_name_);
        if(!cap_.isOpened()) { // open Camera
            cout << "Error: Could not open video file " << input_file_video_name_ << endl;
            std::exit(EXIT_FAILURE);
        }
    }
    else if (input_type_ == InputType::Directory) {
        if (!boost::filesystem::is_directory(input_path_)){
            cout << "Error: The input must be a directory, but it is " << input_path_ << endl;
            std::exit(EXIT_FAILURE);
        }
        current_itr_ = boost::filesystem::directory_iterator(input_path_);
    }
    else if (input_type_ == InputType::Memory) {}
    is_reach_end_ = false;
 }
 void InputHandler::setFrameSize(int frame_width, int frame_height){
    cap_.set(cv::CAP_PROP_FRAME_HEIGHT, frame_height);//720  1080
    cap_.set(cv::CAP_PROP_FRAME_WIDTH, frame_width);//1280 1980
    double dWidth = cap_.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
    double dHeight = cap_.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
    cout << "Input frame size is : " << dWidth << " x " << dHeight << endl;
 }
 Mat InputHandler::getNextSample() {
    Mat frame;
    if (input_type_ == InputType::Camera) cap_ >> frame;
    else if (input_type_ == InputType::Video) {
        cap_ >> frame;
        if (frame.empty()) // we reach the end of video
            is_reach_end_ = true;
    }
    else if (input_type_ == InputType::Directory) {
        boost::filesystem::path file_path = current_itr_->path();
        if (file_path.extension() != ".jpg" && file_path.extension() != ".png" && file_path.extension() != ".bmp"){
            cout << "Error: The input file is not image file with extension of jpg, png or bmp!" << endl;
            cout << "The input file name is: " << file_path.string() << endl;
            std::exit(EXIT_FAILURE);
        }
        cout << "process image " << file_path << endl;
        frame = imread(file_path.string());
        if (current_itr_ == boost::filesystem::directory_iterator())
            is_reach_end_ = true;
    }
    else if (input_type_ == InputType::Memory) {}
    return frame;
 }
 bool InputHandler::closeInput() {
    if (input_type_ == InputType::Camera || input_type_ == InputType::Video){
        cap_.release();
        is_reach_end_ = true;
    }
    return true;
 }
 void InputHandler::setInput(std::string input_path) {
    if (input_type_ == InputType::Directory){
        input_path_ = move(input_path);
    }
    else if (input_type_ == InputType::Video){
        input_file_video_name_ = move(input_path);
    }
 }
 void InputHandler::readScreenConfiguration(string calib_file) {
    FileStorage fs_disp(calib_file, FileStorage::READ);
    fs_disp["monitor_W"] >> monitor_W_;
    fs_disp["monitor_H"] >> monitor_H_;
    fs_disp["monitor_R"] >> monitor_R_;
    fs_disp["monitor_T"] >> monitor_T_;
    // compute monitor plane
    Vec3f corners[4];
    corners[0] = Vec3f(0.0, 0.0, 0.0);
    corners[1] = Vec3f(monitor_W_, 0.0, 0.0);
    corners[2] = Vec3f(0.0, monitor_H_, 0.0);
    corners[3] = Vec3f(monitor_W_, monitor_H_, 0.0);
    for(int i=0; i<4; i++){
        Mat corners_cam = monitor_R_ * Mat(corners[i]) + monitor_T_;
        corners_cam.copyTo(monitor_corners_[i]);
    }
    Vec3f normal = Vec3f(0.0, 0.0, 1.0); // normal direction
    monitor_normal_ = monitor_R_ * Mat(normal);
    monitor_normal_.convertTo(monitor_normal_, CV_32F);
 }
 void InputHandler::readCameraConfiguration(string calib_file){
    cout << endl << "Reading calibration information from : " << calib_file << endl;
    FileStorage fs;
    fs.open(calib_file, FileStorage::READ);
    fs["camera_matrix"] >> camera_matrix_;
    fs["dist_coeffs"] >> camera_distortion_;
    fs.release();
 }
 void InputHandler::projectToDisplay(std::vector<opengaze::Sample> &inputs, bool is_face_model) {
    for(auto & sample : inputs) {
        if (is_face_model) {
            Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
            sample.gaze_data.gaze2d = mapToDisplay(face_center, sample.gaze_data.gaze3d);
        }
        else {
            Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
            Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
            sample.gaze_data.lgaze2d = mapToDisplay(leye_pose, sample.gaze_data.lgaze3d);
            sample.gaze_data.rgaze2d = mapToDisplay(reye_pose, sample.gaze_data.rgaze3d);
            float gaze_x = (sample.gaze_data.lgaze2d.x + sample.gaze_data.rgaze2d.x) / 2.0f;
            float gaze_y = (sample.gaze_data.lgaze2d.y + sample.gaze_data.rgaze2d.y) / 2.0f;
            sample.gaze_data.gaze2d.x = gaze_x;
            sample.gaze_data.gaze2d.y = gaze_y;
        }
    }
 }
 cv::Point2f InputHandler::mapToDisplay(Vec3f origin, Vec3f gaze_vec) {
    Point2f gaze_on_screen;
    // compute intersection
    float gaze_len = (float)(monitor_normal_.dot(Mat(monitor_corners_[0]-origin))/monitor_normal_.dot(Mat(gaze_vec)));
    Vec3f gaze_pos_cam = origin + gaze_len * gaze_vec;
    // convert to monitor coodinate system
    Mat gaze_pos_ = monitor_R_.inv() * (Mat(gaze_pos_cam) - monitor_T_);
    Vec3f gaze_pos_3d;
    gaze_pos_.copyTo(gaze_pos_3d);
    gaze_on_screen.x = gaze_pos_3d.val[0] / monitor_W_;
    gaze_on_screen.y = gaze_pos_3d.val[1] / monitor_H_;
    return gaze_on_screen;
 }
 }
--- a/src/normalizer.cpp
+++ b/src/normalizer.cpp
@ -0,0 +1,184 @@
 #include "normalizer.hpp"
 using namespace cv;
 using namespace std;
 namespace opengaze {
 Normalizer::Normalizer() {
    // parameters for data normalization
    focal_norm_ = 1600;
    distance_norm_ = 1000; // 600 500 1000
    roiSize_norm_ = cv::Size(224, 224); // 224 448
    cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
 }
 Normalizer::~Normalizer() {}
 void Normalizer::setParameters(int focal_length, int distance, int img_w, int img_h){
    // parameters for data normalization
    focal_norm_ = focal_length;
    distance_norm_ = distance; // 600 500 1000
    roiSize_norm_ = cv::Size(img_w, img_h); // 224 448
    cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
 }
 // convert vector from normalization space to camera coordinate system
 cv::Mat Normalizer::cvtToCamera(cv::Point3f input, const Mat cnv_mat) {
    // convert to the original camera coordinate system
    Vec3f gaze_v(input.x, input.y, input.z);
    // apply de-normalization
    Mat gaze_v_cam = cnv_mat.inv() * Mat(gaze_v);
    gaze_v_cam = gaze_v_cam / norm(gaze_v_cam);
    return gaze_v_cam;
 }
 cv::Mat Normalizer::normalizeFace(Mat input_image, opengaze::Sample &sample) {
    // get the face center in 3D space
    Mat HR;
    cv::Rodrigues(sample.face_patch_data.head_r, HR);
    Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
    Mat Fc;
    add(HR*face_model_mat_, HT, Fc);
    float distance = (float)norm(sample.face_patch_data.face_center); // original distance
    float z_scale = distance_norm_ / distance; // scaling factor
    cv::Mat scaleMat;
    scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
    scaleMat.convertTo(scaleMat, CV_32F);
    // get the look_at matrix
    Mat hRx = HR.col(0);
    Mat forward = sample.face_patch_data.face_center /distance;
    Mat down = forward.cross(hRx);
    down = down / norm(down);
    Mat right = down.cross(forward);
    right = right / norm(right);
    // rotation matrix
    sample.face_patch_data.face_rot = Mat(3, 3, CV_32F);
    right.copyTo(sample.face_patch_data.face_rot.col(0));
    down.copyTo(sample.face_patch_data.face_rot.col(1));
    forward.copyTo(sample.face_patch_data.face_rot.col(2));
    sample.face_patch_data.face_rot = sample.face_patch_data.face_rot.t(); // there is no scaling
    sample.face_patch_data.face_rot.convertTo(sample.face_patch_data.face_rot, CV_32F);
    Mat warpMat = cam_norm_ * (scaleMat * sample.face_patch_data.face_rot) * camera_matrix_.inv();// transformation matrix
    // crop image and copy the equalized image
    Mat face_patch;
    warpPerspective(input_image, face_patch, warpMat, roiSize_norm_);
    return face_patch;
 }
 vector<cv::Mat> Normalizer::normalizeEyes(cv::Mat input_image, Sample &sample){
    vector<cv::Mat> eye_images;
    Mat img_gray;
    cvtColor(input_image, img_gray, CV_BGR2GRAY);
    Mat eye_center;
    Mat* eye_rot;
    for (int i=0; i<2; ++i) {
        if (i==0){
            eye_center = sample.eye_data.leye_pos;
            eye_rot = &sample.eye_data.leye_rot;
        }
        else {
            eye_center = sample.eye_data.reye_pos;
            eye_rot = &sample.eye_data.reye_rot;
        }
        float distance = (float)norm(eye_center);
        float z_scale = distance_norm_ / distance;
        Mat scaleMat;
        scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
        scaleMat.convertTo(scaleMat, CV_32F);
        // get the look_at matrix
        Mat HR;
        cv::Rodrigues(sample.face_patch_data.head_r, HR);
        Mat hRx = HR.col(0);
        Mat forward = eye_center/distance;
        Mat down = forward.cross(hRx);
        down = down / norm(down);
        Mat right = down.cross(forward);
        right = right / norm(right);
        // rotation matrix
        *eye_rot = Mat(3, 3, CV_32F);
        right.copyTo(eye_rot->col(0));
        down.copyTo(eye_rot->col(1));
        forward.copyTo(eye_rot->col(2));
        *eye_rot = eye_rot->t(); // there is no scaling
        Mat warpMat = cam_norm_ * (scaleMat * *eye_rot) * camera_matrix_.inv();// transformation matrix
        // crop image and copy the equalized image
        Mat eye_patch, eye_patch_equal;
        warpPerspective(img_gray, eye_patch, warpMat, roiSize_norm_);
        equalizeHist(eye_patch, eye_patch_equal);
        eye_images.push_back(eye_patch_equal);
    }
    eye_rot = nullptr;
    return eye_images;
 }
 void Normalizer::loadFaceModel(std::string path) {
    string face_model_file_path = path + "/content/model/face_model.yml";
    //
    cout << endl << "Loading 3D face model for head pose estimation from : " << face_model_file_path << endl;
    FileStorage fs;
    if (!fs.open(face_model_file_path, FileStorage::READ)) {
        cout << "Cannot load the 3D face model!" << endl;
        exit(EXIT_FAILURE);
    }
    fs["face_model"] >> face_model_mat_;
    for(int p=0; p<6; ++p)
        face_model_.emplace_back(Point3d(face_model_mat_.at<float>(0,p),
                                         face_model_mat_.at<float>(1,p),
                                         face_model_mat_.at<float>(2,p)));
    fs.release();
 }
 // estimate head pose via model fitting
 void Normalizer::estimateHeadPose(const Point2f *landmarks, opengaze::Sample &sample) {
    Mat zero_dist = Mat::zeros(1, 5, CV_64F);
    vector<Point2d> landmarks_orig(landmarks,
                                   landmarks + 6);
    cv::Mat head_r, head_t;
    camera_matrix_.convertTo(camera_matrix_, CV_64F); // input must be double type
    solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, false, SOLVEPNP_DLS);
    solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, true);
    head_r.convertTo(sample.face_patch_data.head_r, CV_32F);
    head_t.convertTo(sample.face_patch_data.head_t, CV_32F);
    camera_matrix_.convertTo(camera_matrix_, CV_32F);
    // get the face center in 3D space
    Mat HR;
    cv::Rodrigues(sample.face_patch_data.head_r, HR);
    Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
    Mat Fc;
    add(HR*face_model_mat_, HT, Fc);
    Mat face_center = (Fc.col(0) + Fc.col(1) + Fc.col(2) + Fc.col(3) + Fc.col(4) + Fc.col(5)) / 6.0; // face center
    face_center.copyTo(sample.face_patch_data.face_center); // copy to output
    sample.face_patch_data.face_center.convertTo(sample.face_patch_data.face_center, CV_32F);
    Mat le = 0.5*(Fc.col(2) + Fc.col(3)); // left eye
    le.copyTo(sample.eye_data.leye_pos);
    sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
    Mat re = 0.5*(Fc.col(0) + Fc.col(1)); // right eye
    re.copyTo(sample.eye_data.reye_pos);
    sample.eye_data.reye_pos.convertTo(sample.eye_data.reye_pos, CV_32F);
 }
 void Normalizer::setCameraMatrix(cv::Mat input) {
    camera_matrix_ = input;
    camera_matrix_.convertTo(camera_matrix_, CV_32F);
 }
 }
--- a/src/opengaze.cpp
+++ b/src/opengaze.cpp
@ -0,0 +1,502 @@
 #include "opengaze.hpp"
 #include <iostream>
 #include <time.h>
 using namespace std;
 using namespace cv;
 namespace opengaze {
 double clockToMilliseconds(clock_t ticks){
    // units/(units/time) => time (seconds) * 1000 = milliseconds
    return (ticks/(double)CLOCKS_PER_SEC)*1000.0;
 }
 OpenGaze::OpenGaze(int argc, char** argv){
    namespace fs = boost::filesystem;
    namespace po = boost::program_options;
    // default value of parameters
    camera_id_ = 0;
    input_type_ = InputHandler::InputType::Camera;
    is_face_model_ = true;
    string gaze_method;
    string gpu_id;
    string temp;
    int number_user;
    fs::path calib_camera, calib_screen, cnn_param_path, cnn_model_path;
    // parse command line options for input/output paths
    po::options_description command_line("Command line options");
    command_line.add_options()
            ("root_dir,r", po::value<string>(), "configuration file")
            ("input_type,t", po::value<string>(), "input type (camera, video file, directory)")
            ("gaze_method,g", po::value<string>(), "gaze estimation method, could be MPIIGaze or OpenFace")
            ("input,i", po::value<string>(), "parameter for input")
            ("output,o", po::value<string>(), "output directory")
            ("calib_camera", po::value<string>(), "camera calibration file")
            ("calib_screen", po::value<string>(), "camera-screen calibration file")
            ("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
            ("debug,d", "show debug output")
            ("face_model,f", "to use face model or not")
            ("save_video,s", "save output visualization or not")
            ("number_user,n", "the maximum number of users in the input image")
            ;
    cout << "Parsing command line options..." << endl;
    po::variables_map vm_command;
    po::store(po::parse_command_line(argc, argv, command_line), vm_command);
    po::notify(vm_command);
    // parse config file for data paths
    po::options_description config_file("Config file options");
    config_file.add_options()
            ("root_dir,r", po::value<string>(), "configuration file")
            ("input_type, t", po::value<string>(), "input type (camera, video file, directory)")
            ("input, i", po::value<string>(), "parameter for input")
            ("output,o", po::value<string>(), "output directory")
            ("cnn_param_path", po::value<string>(), "Caffe prototxt path")
            ("cnn_model_path", po::value<string>(), "Caffe model path")
            ("calib_camera", po::value<string>(), "camera calibration file")
            ("calib_screen", po::value<string>(), "camera-screen calibration file")
            ("gaze_method", po::value<string>(), "gaze estimation method, could be cnn or openface")
            ("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
            ("face_model", po::value<bool>(), "face model or not")
            ("save_video", po::value<bool>(), "save output visualization or not")
            ("number_user", po::value<string>(), "the maximum number of users in the input image")
            ;
    fs::path root_dir, config_path;
    if(vm_command.count("root_dir")) root_dir = vm_command["root_dir"].as<string>();
    else {
        root_dir = OPENGAZE_CON_DIR;
        cout << "No root directory is found, default value " << root_dir << " will be use" << endl;
    }
    config_path = root_dir / "default.cfg";
    cout << "Reading config from \"" << config_path.string() << "\""<< endl;
    if(!fs::exists(config_path)){
        cout << "Config file does not exist" << endl;
        exit(EXIT_FAILURE);
    }
    ifstream settings_file(config_path.string());
    po::variables_map vm_config;
    po::store(po::parse_config_file(settings_file , config_file), vm_config);
    po::notify(vm_config);
    if(vm_command.count("gpu_id")) gpu_id = vm_command["gpu_id"].as<string>();
    else if (vm_config.count("gpu_id")) gpu_id = vm_config["gpu_id"].as<string>();
    else gpu_id = "0";
    // CNN paramters
    if(vm_command.count("cnn_param_path")) cnn_param_path = vm_command["cnn_param_path"].as<string>();
    else if (vm_config.count("cnn_param_path")) cnn_param_path = vm_config["cnn_param_path"].as<string>();
    else cnn_param_path = root_dir / "content/caffeModel/alexnet_face.prototxt";
    if(vm_command.count("cnn_model_path")) cnn_model_path = vm_command["cnn_model_path"].as<string>();
    else if (vm_config.count("cnn_model_path")) cnn_model_path = vm_config["cnn_model_path"].as<string>();
    else cnn_model_path = root_dir / "content/caffeModel/alexnet_face.caffemodel";
    // check input requirements
    if(vm_command.count("gaze_method")) gaze_method = vm_command["gaze_method"].as<string>();
    else if (vm_config.count("gaze_method")) gaze_method = vm_config["gaze_method"].as<string>();
    else gaze_method = "MPIIGaze";
    if(vm_command.count("calib_screen")) calib_screen = vm_command["calib_screen"].as<string>();
    else if (vm_config.count("calib_screen")) calib_screen = vm_config["calib_screen"].as<string>();
    else calib_screen = root_dir / "content/calib/monitor_laptop.yml";
    if(vm_command.count("calib_camera")) calib_camera = vm_command["calib_camera"].as<string>();
    else if (vm_config.count("calib_camera")) calib_camera = vm_config["calib_camera"].as<string>();
    else calib_camera = root_dir / "content/calib/calibration.yml";
    // read calibration file
    if(!fs::exists(calib_camera)){
        cout << "Camera calibration file does not exist: " << calib_camera <<endl;
        exit(EXIT_FAILURE);
    }
    else input_handler_.readCameraConfiguration(calib_camera.string());
    if(!fs::exists(calib_screen)){
        cout << "Camera-screen calibration file does not exist: " << calib_screen << endl;
        exit(EXIT_FAILURE);
    }
    else input_handler_.readScreenConfiguration(calib_screen.string());
    if(vm_command.count("input_type")) temp = vm_command["input_type"].as<string>();
    else if (vm_config.count("input_type")) temp = vm_config["input_type"].as<string>();
    else temp = "";
    if (temp == "camera") {input_type_ = InputHandler::InputType::Camera;}
    else if (temp == "video") {input_type_ = InputHandler::InputType::Video;}
    else if (temp == "directory") {input_type_ = InputHandler::InputType::Directory;}
    else cout<<"No input type specified, default value (camera) will be use" << endl;
    if (vm_command.count("input")) temp = vm_command["input"].as<string>();
    else if (vm_config.count("input")) temp = vm_config["input"].as<string>();
    else temp = "0";
    if (input_type_ == InputHandler::InputType::Camera) camera_id_ = stoi(temp);
    else if (input_type_ == InputHandler::InputType::Video || input_type_ == InputHandler::InputType::Directory)  input_dir_ = temp;
    else cout<<"No input parameter specified, default value will be use" << endl;
    if(vm_command.count("face_model")) is_face_model_ = true;
    else if(vm_config.count("face_model")) is_face_model_ = vm_config["face_model"].as<bool>();
    else is_face_model_ = true;
    if(vm_command.count("save_video")) is_save_video_ = true;
    else if(vm_config.count("save_video")) is_save_video_ = vm_config["save_video"].as<bool>();
    else is_save_video_ = false;
    if(vm_command.count("debug")) show_debug_ = true;
    else if(vm_config.count("debug")) show_debug_ = vm_config["debug"].as<bool>();
    else show_debug_ = false;
    if(vm_command.count("output")) output_dir_ = vm_command["output"].as<string>();
    else if(vm_config.count("output")) output_dir_ = vm_config["output"].as<string>();
    else {
        if (input_type_ == InputHandler::InputType::Video) output_dir_ = input_dir_.parent_path();
        else if (input_type_ == InputHandler::InputType::Directory) output_dir_ = input_dir_.parent_path();
        else if (input_type_ == InputHandler::InputType::Camera)
            output_dir_ = root_dir;
    }
    string face_detector_root_path;
    if(vm_command.count("openface_path")) face_detector_root_path = vm_command["openface_path"].as<string>();
    else if(vm_config.count("openface_path")) face_detector_root_path = vm_config["openface_path"].as<string>();
    else cout<< "No face detector root specified, default detector will be use" << endl;
    if(vm_command.count("per_model_save_path")) per_model_save_path_ = vm_command["per_model_save_path"].as<string>();
    else if (vm_config.count("per_model_save_path")) per_model_save_path_ = vm_config["per_model_save_path"].as<string>();
    else per_model_save_path_ = root_dir.string() + "/content/calib/user0.txt";
    if(vm_command.count("number_user")) temp = vm_command["number_user"].as<string>();
    else if (vm_config.count("number_user")) temp = vm_config["number_user"].as<string>();
    else temp = "5";
    number_user = stoi(temp);
    // initial class instance
    if (input_type_ == InputHandler::InputType::Camera){ // Camera as input
        input_handler_.setInputType(InputHandler::InputType::Camera);// set input type
        input_handler_.setInput(camera_id_); // set Camera id
    }
    else if (input_type_ == InputHandler::InputType::Video) {
        input_handler_.setInputType(InputHandler::InputType::Video);// set input type
        input_handler_.setInput(input_dir_.string()); // set camera file
    }
    else if (input_type_ == InputHandler::InputType::Directory){
        input_handler_.setInputType(InputHandler::InputType::Directory);
    }
    // initialize other classes
    gaze_estimator_.setCameraParameters(input_handler_.camera_matrix_, input_handler_.camera_distortion_);
    gaze_estimator_.setRootPath(root_dir.string());
    gaze_estimator_.initialFaceDetector(number_user);
    vector<std::string> arguments;
    if (gaze_method == "MPIIGaze") {
        arguments.push_back(cnn_param_path.string());
        arguments.push_back(cnn_model_path.string());
        if (is_face_model_)
            arguments.emplace_back("face");
        else
            arguments.emplace_back("eye");
        arguments.push_back(gpu_id);
        gaze_estimator_.setMethod(GazeEstimator::Method::MPIIGaze, arguments);
    }
    else if (gaze_method == "OpenFace"){
        //gaze_estimator_.setMethod(GazeEstimator::Method::OpenFace, arguments);
        cout << "OpenFace gaze estimation is current not support" << endl;
        exit(EXIT_FAILURE);
    }
    else {
        cout << "The method setting is not right! Options are MPIIGaze or OpenFace!" << endl;
        exit(EXIT_FAILURE);
    }
 }
 OpenGaze::~OpenGaze() {
    input_handler_.closeInput();
 }
 // do gaze estimation with camera as input
 void OpenGaze::runGazeVisualization() {
    input_handler_.initialize();
    namedWindow("Gaze");
    int key;
    Mat input_image;
    vector<Sample> output;
    cv::VideoWriter m_writer;
    if (is_save_video_){
            boost::filesystem::path save_video_file;
        save_video_file = output_dir_ / (input_dir_.stem().string() + "_gaze_video.avi");
        m_writer.open(save_video_file.string(), CV_FOURCC('M','J','P','G'), 25,
                      Size(input_handler_.getFrameWidth(),input_handler_.getFrameHeight()), true);
        cout << "Saving video to " << save_video_file << endl;
    }
    // construct saving file
    ofstream output_stream;
    boost::filesystem::path output_file_name = output_dir_ / (input_dir_.stem().string() + "_gaze_output.txt");
    output_stream.open(output_file_name.string());
    cout << "Created output file: " << output_file_name.string() << endl;
    // for fps calculation
    double fps_tracker = -1.0;
    double t_start = 0;
    double t_end = 0;
    unsigned int frame_count = 0;
    while(true){// loop all the sample or read frame from Video
        frame_count++;
        t_start = t_end;
        output.clear();
        input_image = input_handler_.getNextSample();// get input image
        if(input_handler_.isReachEnd()){ // check if all sample are processed
            cout<<"Processed all the samples."<<endl;
            break;
        }
        Mat undist_img;
        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
        gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
        input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
        // get the fps values
        t_end = cv::getTickCount();
        fps_tracker = 1.0 / (double(t_end - t_start) / cv::getTickFrequency());
        // save output
        for(auto & sample : output) {
            output_stream << frame_count << ",";
            output_stream << sample.face_data.face_id << ",";
            output_stream << sample.face_data.certainty << ",";
            output_stream << sample.face_patch_data.face_center.at<float>(0) << ",";
            output_stream << sample.face_patch_data.face_center.at<float>(1) << ",";
            output_stream << sample.face_patch_data.face_center.at<float>(2) << ",";
            output_stream << sample.gaze_data.gaze2d.x << ",";
            output_stream << sample.gaze_data.gaze2d.y << ",";
            output_stream << sample.eye_data.leye_pos.at<float>(0) << ",";
            output_stream << sample.eye_data.leye_pos.at<float>(1) << ",";
            output_stream << sample.eye_data.leye_pos.at<float>(2) << ",";
            output_stream << sample.eye_data.reye_pos.at<float>(0) << ",";
            output_stream << sample.eye_data.reye_pos.at<float>(1) << ",";
            output_stream << sample.eye_data.reye_pos.at<float>(2) << endl;
        }
        if (is_save_video_ || show_debug_) {
            //////// visualization //////////////////////////////////////////////////
            // draw results
            for(const auto & sample : output){
                //drawLandmarks(sample, undist_img); // draw face landmarks
                drawGazeOnFace(sample, undist_img); // draw gaze ray on face image
                //drawGazeOnSimScreen(sample, undist_img); // draw screen target
            }
            if (show_debug_) {
                // show fps
                char fpsC[255];
                std::sprintf(fpsC, "%02f", fps_tracker);
                string fpsSt("FPS: ");
                fpsSt += fpsC;
                cv::putText(undist_img, fpsSt, cv::Point(100, 100), CV_FONT_HERSHEY_SIMPLEX, 1, CV_RGB(255, 0, 0), 2);
                // show the image
                imshow("Gaze", undist_img);
                key = cv::waitKey(1);
                if (key==27) exit(EXIT_SUCCESS); // press ESC to exit
            }
            if (is_save_video_) {
                if (is_save_video_)
                    m_writer << undist_img;
            }
        }
    }
    if (is_save_video_)
        m_writer.release();
 }
 void OpenGaze::runDataExtraction() {
    assert(input_handler_.getInputType() == InputHandler::InputType::Directory);// Here we just accept the directory folder
    input_handler_.initialize();
    vector<Sample> output;
    Mat input_image;
    while(true){// loop all the sample or read frame from Video
        output.clear();
        input_image = input_handler_.getNextSample();// get input image
        if(input_handler_.isReachEnd()){ // check if all sample are processed
            cout << "Processed all the samples." << endl;
            break;
        }
        Mat undist_img;
        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
        gaze_estimator_.getImagePatch(undist_img, output); // extract the face image
        // save the output
        for (int i=0; i<output.size(); ++i) {
            string save_file_name  = output_dir_.stem().string() + "/img_" + input_handler_.getFileName() + "_" +to_string(i)+".jpg";
            cv::imwrite(save_file_name, output[i].face_patch_data.face_patch);
        }
    }
 }
 void OpenGaze::runGazeOnScreen() {
    input_handler_.initialize();
    int key;
    Mat input_image, undist_img, show_img;
    vector<Sample> output;
    cv::namedWindow("screen", CV_WINDOW_NORMAL);
    cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
    show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
    while(true){// loop all the sample or read frame from Video
        output.clear();
        if(input_handler_.isReachEnd()){ // check if all sample are processed
            cout<<"Processed all the samples."<<endl;
            break;
        }
        input_image = input_handler_.getNextSample();// get input image
        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
        gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
        input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
        // save output
        for(auto & sample : output) {
            int loc_x = (int)(sample.gaze_data.gaze2d.x * input_handler_.getScreenWidth());
            int loc_y = (int)(sample.gaze_data.gaze2d.y * input_handler_.getScreenHeight());
            circle(show_img, cv::Point(loc_x, loc_y), 10, CV_RGB(255,255,255), -1);
        }
        imshow("screen", show_img);
        cv::Mat save_img;
        cv::resize(show_img, save_img, cv::Size(1280, 720));
        key = cv::waitKey(1);
        show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
        if (key==27) break; // press ESC to exit
    }
    cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
    cv::destroyWindow("screen");
 }
 void OpenGaze::runPersonalCalibration(int num_calibration_point) {
    if (input_handler_.getInputType() != InputHandler::InputType::Camera){ // personal calibration has to be done with camera
        cout << "Error: the input type must be camera for personal calibration!" << endl;
        exit(EXIT_FAILURE);
    }
    Mat input_image, undist_img;
    input_handler_.initialize();
    PersonalCalibrator m_calibrator(input_handler_.getScreenWidth(), input_handler_.getScreenHeight());
    m_calibrator.generatePoints(num_calibration_point);
    m_calibrator.initialWindow(); // show start windows
    vector<cv::Point2f> pred, gt; // prediction and ground-truth
    for (int i=0; i<num_calibration_point; ++i){
        if (m_calibrator.showNextPoint()) {// wait for clicking
            vector<Sample> output;
            input_image = input_handler_.getNextSample(); // get the sample when user clicking
            undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
            gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
            input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);// convert to 2D projection
            m_calibrator.confirmClicking(); // give feedback to user that they successfully did calibration
            pred.emplace_back(output[0].gaze_data.gaze2d);
            gt.emplace_back(cv::Point2f((m_calibrator.getCurrentPoint().x/(float)input_handler_.getScreenWidth()),
                                      (m_calibrator.getCurrentPoint().y/(float)input_handler_.getScreenHeight())));
        }
        else
            break; // if user press ESC button, we break
    }
    if (pred.size() > 0){
        m_calibrator.generateModel(pred, gt, 1); // get the mapping model
        string per_model_save_path_ = output_dir_.stem().string() + "/personal_gaze_model.yml";
        m_calibrator.saveModel(per_model_save_path_);
    }
 }
 void OpenGaze::drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image) {
    static const int dW = 640;
    static const int dH = 360;
    Mat debug_disp = Mat::zeros(Size(dW, dH), CV_8UC3);
    Point2f g_s;
    g_s.x = dW*sample.gaze_data.gaze2d.x;
    g_s.y = dH*sample.gaze_data.gaze2d.y;
    circle(debug_disp, g_s, 10, CV_RGB(255,0,0), -1);
    debug_disp.copyTo(image(Rect(0, 0, dW, dH)));
 }
 void OpenGaze::drawGazeOnFace(opengaze::Sample sample, cv::Mat &image) {
    // draw gaze on the face
    if (gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
        && gaze_estimator_.input_type_ == GazeEstimator::InputType::face) {
        static const float gaze_length = 300.0;
        Mat zero = Mat::zeros(1, 3, CV_32F);
        Mat rvec, tvec;
        sample.face_patch_data.head_r.convertTo(rvec, CV_32F);
        sample.face_patch_data.head_t.convertTo(tvec, CV_32F);
        vector<Point3f> cam_points;
        Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
        cam_points.emplace_back(face_center);
        cam_points.emplace_back(face_center + gaze_length * sample.gaze_data.gaze3d);
        vector<Point2f> img_points;
        projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
        line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); // gaze ray
        circle(image, img_points[0], 5, CV_RGB(255,0,0), -1); // staring point
        circle(image, img_points[1], 5, CV_RGB(255,0,0), -1); // end point
    }
    else if ((gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
              && gaze_estimator_.input_type_ == GazeEstimator::InputType::eye)
             || gaze_estimator_.method_type_ == GazeEstimator::Method::OpenFace) {
        int gaze_length = 300;
        Mat zero = Mat::zeros(1, 3, CV_32F);
        vector<Point3f> cam_points;
        sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
        Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
        cam_points.emplace_back(leye_pose);
        cam_points.emplace_back(leye_pose + gaze_length*sample.gaze_data.lgaze3d);
        Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
        cam_points.emplace_back(reye_pose);
        cam_points.emplace_back(reye_pose + gaze_length*sample.gaze_data.rgaze3d);
        vector<Point2f> img_points;
        projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
        line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5);
        line(image, img_points[2], img_points[3], CV_RGB(255,0,0), 5);
        circle(image, img_points[1], 3, CV_RGB(255,0,0), -1);
        circle(image, img_points[3], 3, CV_RGB(255,0,0), -1);
    }
 }
 void OpenGaze::drawLandmarks(opengaze::Sample sample, cv::Mat &image) {
    cv::Rect_<int> face_bb = sample.face_data.face_bb;
    rectangle(image, cv::Point(face_bb.x, face_bb.y),
              cv::Point(face_bb.x+face_bb.width,face_bb.y+face_bb.height), CV_RGB(0,255,0), 5);
    for(int p=0; p<6; ++p)
        circle(image, sample.face_data.landmarks[p], 5, CV_RGB(0,255,0), -1);
 }
 }
--- a/src/personal_calibrator.cpp
+++ b/src/personal_calibrator.cpp
@ -0,0 +1,145 @@
 #include "personal_calibrator.hpp"
 using namespace cv;
 using namespace std;
 void CallBackFunc(int event, int x, int y, int flags, void* is_click)  {
    if (event == EVENT_LBUTTONDOWN){
        bool* temp = (bool*)is_click;
        *temp = true;
    }
 }
 PersonalCalibrator::PersonalCalibrator (int screen_width, int screen_height) {
    cv::namedWindow("calibration", CV_WINDOW_NORMAL);
    cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
    // set the mouse
    is_click_ = false;
    //set the callback function for any mouse event
    setMouseCallback("calibration", CallBackFunc, &is_click_); // wait for clicking
    screen_width_ = screen_width;
    screen_height_ = screen_height;
    center_radius_ = (int)((float)screen_width_ / 200.0f);
 }
 PersonalCalibrator::~PersonalCalibrator() {
    cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
    cv::destroyWindow("calibration");
 }
 void PersonalCalibrator::generatePoints(int num_points) {
    index_point_ = -1;
    srand(time(NULL));
    Point2i current_point;
    for (int num = 0; num < num_points; ++num) {
        current_point.x = (rand() % screen_width_); // range is [0, 1]
        current_point.y = (rand() % screen_height_); // range is [0, 1]
        points_.emplace_back(current_point);
    }
 }
 void PersonalCalibrator::initialWindow() {
    // get the focus of the window
    namedWindow("GetFocus", CV_WINDOW_NORMAL);
    cv::Mat img = cv::Mat::zeros(100, 100, CV_8UC3);
    cv::imshow("GetFocus", img);
    cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
    waitKey(1);
    cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
    cv::destroyWindow("GetFocus");
    // show instruction
    cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
    string show_text = "Please click/touch when looking at the dots";
    cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(255,255,255), 2);
    imshow("calibration", show_img);
    cv::waitKey(3000);
    for (int i=255; i > 0; i-=5) { //Gradient disappear, nice!
        show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
        cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(i,i,i), 2);
        imshow("calibration", show_img);
        cv::waitKey(1);
    }
 }
 bool PersonalCalibrator::showNextPoint() {
    cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
    index_point_ ++;
    cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(255, 255, 255), -1);
    is_click_ = false;
    while (true) {
        imshow("calibration", show_img);
        int key = cv::waitKey(10); // wait for interaction
        if (key == 27) // if press the ESC key
            return false;
        if (is_click_) {
            break;
        }
    }
    return true;
 }
 void PersonalCalibrator::confirmClicking() {
    cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
    cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(0, 200, 0), -1);
    imshow("calibration", show_img);
    cv::waitKey(500);
 }
 // this polyfit function is copied from  opencv/modules/contrib/src/polyfit.cpp
 // This original code was written by
 //  Onkar Raut
 //  Graduate Student,
 //  University of North Carolina at Charlotte
 cv::Mat polyfit(const Mat& src_x, const Mat& src_y, int order)
 {
    CV_Assert((src_x.rows>0)&&(src_y.rows>0)&&(src_x.cols>0)&&(src_y.cols>0)&&(order>=1));
    Mat matrix;
    Mat bias = Mat::ones((int)src_x.rows, 1, CV_32FC1);
    Mat input_x = Mat::zeros(src_x.rows, order*src_x.cols, CV_32FC1);
    Mat copy;
    for(int i=1; i<=order;i++){
        copy = src_x.clone();
        pow(copy,i,copy);
        copy.copyTo(input_x(Rect((i-1)*src_x.cols, 0, copy.cols, copy.rows)));
    }
    Mat new_mat;
    cv::hconcat(input_x, bias, new_mat);
    cout << "new_mat: " << new_mat << endl;
    cv::solve(new_mat, src_y, matrix, DECOMP_NORMAL);
    cout << "model_matrix: " << matrix << endl;
    Mat calibrated = new_mat * matrix;
    cout << "calibrated: " << calibrated << endl;
    double dist_original = norm(src_x, src_y, NORM_L2);
    cout << "dist_original: " << dist_original << endl;
    double dist_calibrated = norm(calibrated, src_y, NORM_L2);
    cout << "dist_calibrated: " << dist_calibrated << endl;
    return matrix;
 }
 void PersonalCalibrator::generateModel(vector<Point2f> prediction, vector<Point2f> ground_truth, int order) {
    cv::Mat input_x = cv::Mat((int)prediction.size(), 2, CV_32FC1, prediction.data());
    cv::Mat input_y = cv::Mat((int)ground_truth.size(), 2, CV_32FC1, ground_truth.data());
    cout << "input_x: " << input_x << endl;
    cout << "input_y: " << input_y << endl;
    cv::Mat model_matrix;
    model_matrix_ = polyfit(input_x, input_y, order);
 }
 void PersonalCalibrator::saveModel(std::string file_path) {
    cv::FileStorage storage(file_path, cv::FileStorage::WRITE);
    storage << model_matrix_;
    storage.release();  
 }
		`@ -0,0 +1,3 @@`
							`# Release 0.1.0`

							`Initial release of OpenGaze.`