initial commit

2019-01-10 13:26:03 +01:00 · 2019-01-10 13:26:03 +01:00 · e505acdb29
commit e505acdb29
41 changed files with 2922 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,32 @@
+### C++ ###
+# Prerequisites
+*.d
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+### CMake ###
+CMakeLists.txt.user
+CMakeCache.txt
+CMakeFiles
+CMakeScripts
+Testing
+Makefile
+cmake_install.cmake
+install_manifest.txt
+compile_commands.json
+CTestTestfile.cmake
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,90 @@
+cmake_minimum_required(VERSION 3.0)
+project(OpenGaze VERSION 0.1)
+set(CMAKE_BUILD_TYPE Release)
+
+# create a directory for models and configuration files
+set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
+add_definitions(-DOPENGAZE_CON_DIR="${OPENGAZE_DIR}")
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/)
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+set(CMAKE_CXX_STANDARD 11)
+
+# OpenCV
+find_package( OpenCV 3.4 REQUIRED COMPONENTS core imgproc calib3d highgui objdetect)
+
+# Boost, for reading configuration file
+find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
+set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
+
+# Caffe
+set(CAFFE_INSTALL_DIR "/home/xucong/library/caffe/build/install")
+set(Caffe_INCLUDE_DIRS ${CAFFE_INSTALL_DIR}/include)
+set(Caffe_LIBRARY_DIRS ${CAFFE_INSTALL_DIR}/lib)
+set(Caffe_LIBS lmdb glog caffe)
+
+# Face and facial landmark detection methods
+option(USE_OPENFACE "with OpenFace" ON)
+add_definitions(-DUSE_OPENFACE=1)
+# OpenFace
+set(OPENFACE_ROOT_DIR "/home/xucong/library/OpenFace")
+add_definitions(-DOPENFACE_DIR="${OPENFACE_ROOT_DIR}")
+set(CLM_INCLUDE_DIRS ${OPENFACE_ROOT_DIR}/lib/local/LandmarkDetector/include)
+set(CLM_LIBRARY_DIRS ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector)
+set(CLM_LIBS LandmarkDetector tbb openblas dlib)
+set(USE_OPENFACE ON) # we use OpenFace method here
+
+# suppress auto_ptr deprecation warnings
+if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+    add_compile_options("-Wno-deprecated-declarations")
+endif()
+
+include_directories(./ ./include /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${CLM_INCLUDE_DIRS} ${Caffe_INCLUDE_DIRS})
+link_directories(./ ./build/lib /usr/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${CLM_LIBRARY_DIRS} ${Caffe_LIBRARY_DIRS})
+
+file(GLOB SOURCE "./src/*.cpp")
+file(GLOB HEADERS "./include/*.hpp")
+
+# compile opengaze library
+add_library(opengaze SHARED ${SOURCE} ${HEADERS})
+set_target_properties(opengaze PROPERTIES VERSION ${PROJECT_VERSION})
+
+
+#if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+#    set (CMAKE_INSTALL_PREFIX "/usr/local" CACHE PATH "default install path" FORCE )
+#endif()
+
+install (TARGETS opengaze EXPORT OpenGazeTargets LIBRARY DESTINATION lib ARCHIVE DESTINATION lib)
+install (FILES ${HEADERS} DESTINATION include/opengaze)
+
+# install caffe and OpenFace
+install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/3rdParty" DIRECTORY_PERMISSIONS 
+    OWNER_WRITE OWNER_READ OWNER_EXECUTE
+    GROUP_WRITE GROUP_READ GROUP_EXECUTE
+    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
+install (FILES ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector/libLandmarkDetector.a DESTINATION ${OPENGAZE_DIR}/3rdParty)
+install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so DESTINATION ${OPENGAZE_DIR}/3rdParty)
+install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so.1.0.0 DESTINATION ${OPENGAZE_DIR}/3rdParty)
+
+# install configuration files
+install (DIRECTORY DESTINATION "${OPENGAZE_DIR}" DIRECTORY_PERMISSIONS 
+    OWNER_WRITE OWNER_READ OWNER_EXECUTE
+    GROUP_WRITE GROUP_READ GROUP_EXECUTE
+    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
+install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content" DIRECTORY_PERMISSIONS 
+    OWNER_WRITE OWNER_READ OWNER_EXECUTE
+    GROUP_WRITE GROUP_READ GROUP_EXECUTE
+    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
+install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/calib" DIRECTORY_PERMISSIONS 
+    OWNER_WRITE OWNER_READ OWNER_EXECUTE
+    GROUP_WRITE GROUP_READ GROUP_EXECUTE
+    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
+install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/model" DIRECTORY_PERMISSIONS 
+    OWNER_WRITE OWNER_READ OWNER_EXECUTE
+    GROUP_WRITE GROUP_READ GROUP_EXECUTE
+    WORLD_WRITE WORLD_READ WORLD_EXECUTE)
+install (FILES ./content/calib/calibration.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
+install (FILES ./content/calib/monitor_laptop.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
+install (FILES ./content/model/face_model.yml DESTINATION ${OPENGAZE_DIR}/content/model PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
+install (FILES default.cfg DESTINATION ${OPENGAZE_DIR} PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE)
--- a/README.md
+++ b/README.md
@ -0,0 +1,55 @@
+# OpenGaze: Open Source Toolkit for Camera-Based Gaze Estimation and Interaction
+
+<!--The current demo video includes clips from Friends, which may violate the copyright. Although people think 28 seconds could be a boundary:https://productforums.google.com/forum/#!topic/youtube/rQhkI20Rm8k, there is no golden rule for it: https://www.youtube.com/yt/about/copyright/fair-use/#yt-copyright-protection-->
+<!--//[![Demo](https://img.youtube.com/vi/OORxOdu8USQ/0.jpg)](https://youtu.be/OORxOdu8USQ "OpenGaze Friends Demo")-->
+
+Appearance-based gaze estimation methods that only require an off-the-shelf camera have significantly improved and promise a wide range of new applications in gaze-based interaction and attentive user interfaces. However, these methods are not yet widely used in the human-computer interaction (HCI) community.
+
+To democratize their use in HCI, we present OpenGaze, the first software toolkit that is specifically developed for gaze interface designers. OpenGaze is open source and aims to implement state-of-the-art methods for camera-based gaze estimation and interaction.
+
+<img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_mpiinf.png" height="80"/><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_pui.png" height="80"><img src="https://github.molgen.mpg.de/perceptual/opengaze/blob/master/imgs/logo_osaka-u.png" height="80">
+
+## Functionality
+
+The toolkit is capable of performing the following gaze-related tasks:
+
+* **Gaze Estimation**
+Show estimated gaze on the screen given screen-camera relationship.
+
+[![Demo](https://img.youtube.com/vi/R1vb7mV3y_M/0.jpg)](https://youtu.be/R1vb7mV3y_M "Gaze visualization demo")
+<p>&nbsp;</p>
+
+* **Gaze Visualization**
+Show gaze direction inital from the center of faces in the input image.
+
+[![Demo](https://img.youtube.com/vi/8yMTvvr0rRU/0.jpg)](https://youtu.be/8yMTvvr0rRU "Gaze visualization demo")
+<p>&nbsp;</p>
+
+* **Personal Calibration**
+Perform personal calibration and remapped the gaze target on the screen.
+
+[![Demo](https://img.youtube.com/vi/ntBv1wcNGAo/0.jpg)](https://youtu.be/ntBv1wcNGAo "Gaze visualization demo")
+<p>&nbsp;</p>
+
+## Installation
+[Unix Installation](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Unix-Installation)
+
+## Use
+[Command line arguments](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Command-line-arguments)
+
+## Citation
+If you use any of the resources provided on this page in any of your publications, please cite the following paper:
+
+**Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications?** <br/>
+Xucong Zhang, Yusuke Sugano, Andreas Bulling<br/>
+Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019<br/>
+
+BibTex, PDF
+
+## License
+
+The license agreement can be found in Copyright.txt
+
+You have to respect boost, OpenFace and OpenCV licenses.
+
+Furthermore, you have to respect the licenses of the datasets used for [model training](:https://github.molgen.mpg.de/perceptual/opengaze/wiki/Model-training).
--- a/RELEASE.md
+++ b/RELEASE.md
@ -0,0 +1,3 @@
+# Release 0.1.0
+
+Initial release of OpenGaze.
--- a/caffe-layers/include/caffe/layers/dspp_layer.hpp
+++ b/caffe-layers/include/caffe/layers/dspp_layer.hpp
@ -0,0 +1,50 @@
+#ifndef CAFFE_DSPP_LAYER_HPP_
+#define CAFFE_DSPP_LAYER_HPP_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layers/data_layer.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/loss_layer.hpp"
+#include "caffe/layers/neuron_layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+template <typename Dtype>
+class DSPPLayer : public Layer<Dtype> {
+ public:
+  explicit DSPPLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual inline const char* type() const { return "DSPPLayer"; }
+  virtual inline int ExactNumBottomBlobs() const { return 2; };
+  virtual inline int MinTopBlobs() const { return 1; }
+ 
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  //virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+  //    const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  //virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+  //    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  
+  int width_;
+  int height_;
+  int channel_;
+  int num_;
+
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_DSPP_LAYER_HPP_
--- a/caffe-layers/include/caffe/layers/pose_data_layer.hpp
+++ b/caffe-layers/include/caffe/layers/pose_data_layer.hpp
@ -0,0 +1,56 @@
+#ifndef CAFFE_POSE_DATA_LAYER_HPP_
+#define CAFFE_POSE_DATA_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/base_data_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+class PoseDataLayer : public BaseDataLayer<Dtype> {
+ public:
+  explicit PoseDataLayer(const LayerParameter& param)
+      : BaseDataLayer<Dtype>(param), has_new_data_(false) {}
+  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "PoseData"; }
+  virtual inline int ExactNumBottomBlobs() const { return 0; }
+  virtual inline int ExactNumTopBlobs() const { return 2; }
+
+  virtual void AddDatumVector(const vector<Datum>& datum_vector);
+  virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
+      const vector<float>& labels);
+
+  // Reset should accept const pointers, but can't, because the memory
+  //  will be given to Blob, which is mutable
+  void Reset(Dtype* data, Dtype* label, int n);
+  void set_batch_size(int new_size);
+
+  int batch_size() { return batch_size_; }
+  int channels() { return channels_; }
+  int height() { return height_; }
+  int width() { return width_; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  int batch_size_, channels_, height_, width_, size_;
+  Dtype* data_;
+  Dtype* labels_;
+  int n_;
+  size_t pos_;
+  Blob<Dtype> added_data_;
+  Blob<Dtype> added_label_;
+  bool has_new_data_;
+};
+
+}  // namespace caffe
+
+#endif
--- a/caffe-layers/src/caffe/layers/dspp_layer.cpp
+++ b/caffe-layers/src/caffe/layers/dspp_layer.cpp
@ -0,0 +1,92 @@
+#include <cmath>
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/layers/dspp_layer.hpp"
+
+#include <boost/spirit/include/phoenix_core.hpp>
+#include <boost/spirit/include/phoenix_operator.hpp>
+
+
+namespace caffe {
+    template <typename Dtype>
+    void DSPPLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top) {
+    }
+
+    template <typename Dtype>
+    void DSPPLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+
+        num_ = bottom[1]->shape()[0];
+        channel_ = bottom[1]->shape()[1]; // the input data size
+        height_ = bottom[1]->shape()[2];
+        width_ = bottom[1]->shape()[3];
+
+        // init output size
+        vector<int> output_shape;
+        output_shape.push_back(num_);
+        output_shape.push_back(channel_);
+        output_shape.push_back(height_);
+        output_shape.push_back(width_);
+        top[0]->Reshape(output_shape);
+    }
+
+    template <typename Dtype>
+    void DSPPLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top) {
+        Dtype* top_data = top[0]->mutable_cpu_data();
+
+        caffe_set<Dtype>(top[0]->count(), 0, top_data); // initilize to be 0
+
+        for (int n=0; n<num_; ++n) {
+            for (int h = 0; h < height_; ++h) { // for the input data size
+                for (int w = 0; w < width_; ++w) {
+                    for (int c = 0; c < channel_; ++c) {
+                        top_data[top[0]->offset(n, c, h, w)] = bottom[1]->data_at(n, c, h, w) * bottom[0]->data_at(n, 0, h, w);
+                    }   
+                }
+            }
+        }
+        top_data = NULL;
+    }
+
+    template <typename Dtype>
+    void DSPPLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+            const vector<bool>& propagate_down,
+            const vector<Blob<Dtype>*>& bottom) {
+        if (propagate_down[0]) {
+            const Dtype* top_diff = top[0]->cpu_diff();
+            Dtype* data_diff = bottom[1]->mutable_cpu_diff();
+            Dtype* heat_map_diff = bottom[0]->mutable_cpu_diff();
+
+            caffe_set<Dtype>(bottom[1]->count(), 0, data_diff);
+            caffe_set<Dtype>(bottom[0]->count(), 0, heat_map_diff);
+            // Dtype activation_h, activation_w;
+
+            for (int n = 0; n < num_; ++n) {
+                for (int h = 0; h < height_; ++h) {
+                    for (int w = 0; w < width_; ++w) {
+                        for (int c = 0; c < channel_; ++c) {
+
+                            Dtype buffer = top_diff[top[0]->offset(n, c, h, w)];
+                            data_diff[bottom[1]->offset(n, c, h, w)] = buffer * (bottom[0]->data_at(n, 0, h, w));
+
+                            buffer *= bottom[1]->data_at(n,c,h,w) / channel_;
+                            
+                            heat_map_diff[bottom[0]->offset(n,0,h,w)] += buffer;
+                        }
+                    }
+                }
+            }
+            top_diff = NULL;
+            data_diff = NULL;
+            heat_map_diff = NULL;
+
+        }
+    }
+
+INSTANTIATE_CLASS(DSPPLayer);
+REGISTER_LAYER_CLASS(DSPP);
+
+} // namespace caffe
--- a/caffe-layers/src/caffe/layers/pose_data_layer.cpp
+++ b/caffe-layers/src/caffe/layers/pose_data_layer.cpp
@ -0,0 +1,128 @@
+#include <opencv2/core/core.hpp>
+
+#include <vector>
+
+#include "caffe/layers/pose_data_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void PoseDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
+     const vector<Blob<Dtype>*>& top) {
+  batch_size_ = this->layer_param_.memory_data_param().batch_size();
+  channels_ = this->layer_param_.memory_data_param().channels();
+  height_ = this->layer_param_.memory_data_param().height();
+  width_ = this->layer_param_.memory_data_param().width();
+  size_ = channels_ * height_ * width_;
+  CHECK_GT(batch_size_ * size_, 0) <<
+      "batch_size, channels, height, and width must be specified and"
+      " positive in memory_data_param";
+  int label_shape_[] = {batch_size_, 4};
+  vector<int> label_shape(label_shape_, label_shape_+2);
+  top[0]->Reshape(batch_size_, channels_, height_, width_);
+  top[1]->Reshape(label_shape);
+  added_data_.Reshape(batch_size_, channels_, height_, width_);
+  added_label_.Reshape(label_shape);
+  data_ = NULL;
+  labels_ = NULL;
+  added_data_.cpu_data();
+  added_label_.cpu_data();
+}
+
+template <typename Dtype>
+void PoseDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
+  CHECK(!has_new_data_) <<
+      "Can't add data until current data has been consumed.";
+  size_t num = datum_vector.size();
+  CHECK_GT(num, 0) << "There is no datum to add.";
+  CHECK_EQ(num % batch_size_, 0) <<
+      "The added data must be a multiple of the batch size.";
+  added_data_.Reshape(num, channels_, height_, width_);
+  int label_shape_[] = {(int)num, 4};
+  vector<int> label_shape(label_shape_, label_shape_+2);
+  added_label_.Reshape(label_shape);
+  // Apply data transformations (mirror, scale, crop...)
+  this->data_transformer_->Transform(datum_vector, &added_data_);
+  // Copy Labels
+  Dtype* top_label = added_label_.mutable_cpu_data();
+  for (int item_id = 0; item_id < num; ++item_id) {
+    top_label[item_id] = datum_vector[item_id].label();
+  }
+  // num_images == batch_size_
+  Dtype* top_data = added_data_.mutable_cpu_data();
+  Reset(top_data, top_label, num);
+  has_new_data_ = true;
+}
+
+template <typename Dtype>
+void PoseDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
+    const vector<float>& labels) {
+  size_t num = mat_vector.size();
+  CHECK(!has_new_data_) <<
+      "Can't add mat until current data has been consumed.";
+  CHECK_GT(num, 0) << "There is no mat to add";
+  CHECK_EQ(num % batch_size_, 0) <<
+      "The added data must be a multiple of the batch size.";
+  added_data_.Reshape(num, channels_, height_, width_);
+  int label_shape_[] = {(int)num, 4};
+  vector<int> label_shape(label_shape_, label_shape_+2);
+  added_label_.Reshape(label_shape);
+  // Apply data transformations (mirror, scale, crop...)
+  this->data_transformer_->Transform(mat_vector, &added_data_);
+  // Copy Labels
+  Dtype* top_label = added_label_.mutable_cpu_data();
+  for (int item_id = 0; item_id < num; ++item_id) {
+    top_label[item_id] = labels[item_id];
+  }
+  // num_images == batch_size_
+  Dtype* top_data = added_data_.mutable_cpu_data();
+  Reset(top_data, top_label, num);
+  has_new_data_ = true;
+}
+
+template <typename Dtype>
+void PoseDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
+  CHECK(data);
+  CHECK(labels);
+  CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
+  // Warn with transformation parameters since a memory array is meant to
+  // be generic and no transformations are done with Reset().
+  //if (this->layer_param_.has_transform_param()) {
+  //  LOG(WARNING) << this->type() << " does not transform array data on Reset()";
+  //}
+  data_ = data;
+  labels_ = labels;
+  n_ = n;
+  pos_ = 0;
+}
+
+template <typename Dtype>
+void PoseDataLayer<Dtype>::set_batch_size(int new_size) {
+  CHECK(!has_new_data_) <<
+      "Can't change batch_size until current data has been consumed.";
+  batch_size_ = new_size;
+  added_data_.Reshape(batch_size_, channels_, height_, width_);
+  int label_shape_[] = {(int)batch_size_, 4};
+  vector<int> label_shape(label_shape_, label_shape_+2);
+  added_label_.Reshape(label_shape);
+}
+
+template <typename Dtype>
+void PoseDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  CHECK(data_) << "PoseDataLayer needs to be initalized by calling Reset";
+  top[0]->Reshape(batch_size_, channels_, height_, width_);
+  int label_shape_[] = {(int)batch_size_, 4};
+  vector<int> label_shape(label_shape_, label_shape_+2);
+  added_label_.Reshape(label_shape);
+  top[0]->set_cpu_data(data_ + pos_ * size_);
+  top[1]->set_cpu_data(labels_ + pos_);
+  pos_ = (pos_ + batch_size_) % n_;
+  if (pos_ == 0)
+    has_new_data_ = false;
+}
+
+INSTANTIATE_CLASS(PoseDataLayer);
+REGISTER_LAYER_CLASS(PoseData);
+
+}  // namespace caffe
--- a/content/calib/calibration.yml
+++ b/content/calib/calibration.yml
@ -0,0 +1,11 @@
+%YAML:1.0
+camera_matrix: !!opencv-matrix
+   rows: 3
+   cols: 3
+   dt: f
+   data: [ 1891.07, 0.0, 640, 0.0, 1891.07, 360, 0.0, 0.0, 1.0]
+dist_coeffs: !!opencv-matrix
+   rows: 1
+   cols: 5
+   dt: f
+   data: [1.68091e-02, -7.14552e-02, -5.65886e-03, -5.23482e-04, -3.39946e-02]
--- a/content/calib/monitor_desktop.yml
+++ b/content/calib/monitor_desktop.yml
@ -0,0 +1,13 @@
+%YAML:1.0
+monitor_W: 516
+monitor_H: 323
+monitor_R: !!opencv-matrix
+   rows: 3
+   cols: 3
+   dt: f
+   data: [ -0.99955, -0.02891, -0.0082861, -0.028948, 0.99957, 0.0044949, 0.0081526, 0.0047327, -0.99996]
+monitor_T: !!opencv-matrix
+   rows: 3
+   cols: 1
+   dt: f
+   data: [269.41, 48.561, 5.8344]
--- a/content/calib/monitor_laptop.yml
+++ b/content/calib/monitor_laptop.yml
@ -0,0 +1,13 @@
+%YAML:1.0
+monitor_W: 310
+monitor_H: 174
+monitor_R: !!opencv-matrix
+   rows: 3
+   cols: 3
+   dt: f
+   data: [ -0.99988, -0.009735, -0.01203, -0.0094674, 0.99971, -0.022108, 0.012242, -0.021992, -0.99968]
+monitor_T: !!opencv-matrix
+   rows: 3
+   cols: 1
+   dt: f
+   data: [149.91, 29.575, -18.884]
--- a/content/model/face_model.yml
+++ b/content/model/face_model.yml
@ -0,0 +1,6 @@
+%YAML:1.0
+face_model: !!opencv-matrix
+    rows: 3
+    cols: 6
+    dt: f
+    data: [ -45.096768, -21.312858, 21.312858, 45.096768, -26.299577, 26.299577, -0.483773,0.483773, 0.483773, -0.483773, 68.595035,68.595035, 2.397030, -2.397030, -2.397030, 2.397030, -0.000000, -0.000000]
--- a/default.cfg
+++ b/default.cfg
@ -0,0 +1,26 @@
+## input and ouput
+# input_type = camera # camera, video, or directory
+# input = 0 # caemra id, video file name, or directory of image files
+# output = /BS/zhang-semi/work/opengaze/test/
+
+# input = YOUR_VIDEO OR IMAGE FOLDER
+# output = MUST BE A DIRECTORY
+
+## gaze estimation method
+# gaze_method = MPIIGaze # OpenFace  MPIIGaze
+# gpu_id = 0
+
+## gaze estimation method/model selection
+# face_model = 1 # 1 for the face model, 0 for eye image model
+
+## CNN model for face image, trained on MPIIGaze + EYEDIAP HD
+# cnn_param_path = YOUR_PATH/alexnet_face.prototxt
+# cnn_model_path = YOUR_PATH/alexnet_face.caffemodel
+
+# calibration file, calibration file
+# calib_camera = YOUR_PATH/calibration.yml
+# calib_screen = YOUR_PATH/monitor.yml
+
+## parameters for personal calibration
+# per_model_save_path = YOUR_PATH/user1.txt
+# num_calibration = 9
--- a/download_models.sh
+++ b/download_models.sh
@ -0,0 +1,9 @@
+OPENGAZE_DIR=~/OpenGaze
+
+mkdir -p $OPENGAZE_DIR/content/caffeModel
+
+cd $OPENGAZE_DIR/content/caffeModel
+
+wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.prototxt
+
+wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.caffemodel
--- a/exe/CMakeLists.txt
+++ b/exe/CMakeLists.txt
@ -0,0 +1,36 @@
+cmake_minimum_required(VERSION 3.0)
+project(OpenGazeExe VERSION 1.0)
+
+set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze")
+
+add_definitions(-DOPENGAZE_DIR="${CMAKE_SOURCE_DIR}")
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/)
+
+SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+
+set(CMAKE_CXX_STANDARD 11)
+
+find_package( OpenCV 3.1 REQUIRED COMPONENTS calib3d highgui objdetect imgproc core)
+
+# Boost, for reading configuration file
+find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED)
+set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost)
+
+include_directories(/usr/local/include/opengaze /usr/local/cuda/include  ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
+link_directories(/usr/lib /usr/local/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${OPENGAZE_DIR}/3rdParty)
+
+## -lX11 is for getting screen resolution in pixel in the Linux system
+set(LIBS opengaze LandmarkDetector ${OpenCV_LIBS} ${Boost_LIBRARIES} tbb openblas dlib lmdb glog caffe X11)
+
+add_executable(GazeVisualization GazeVisualization.cpp)
+target_link_libraries(GazeVisualization ${LIBS})
+
+add_executable(Calibration Calibration.cpp)
+target_link_libraries(Calibration ${LIBS})
+
+add_executable(GazeEstimation GazeEstimation.cpp)
+target_link_libraries(GazeEstimation ${LIBS})
+
+add_executable(DataExtraction DataExtraction.cpp)
+target_link_libraries(DataExtraction ${LIBS})
--- a/exe/Calibration.cpp
+++ b/exe/Calibration.cpp
@ -0,0 +1,29 @@
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include <opencv2/opencv.hpp>
+
+#include "opengaze/opengaze.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace opengaze;
+
+vector<string> get_arguments(int argc, char **argv) {
+    vector<string> arguments;
+    for (int i = 0; i < argc; ++i){
+        arguments.emplace_back(string(argv[i]));
+    }
+    return arguments;
+}
+
+int main(int argc, char** argv)
+{
+    vector<string> arguments = get_arguments(argc, argv);
+    OpenGaze open_gaze(argc, argv);
+    int num_calibration_point = 20;
+    open_gaze.runPersonalCalibration(num_calibration_point);
+
+    return 1;
+}
--- a/exe/DataExtraction.cpp
+++ b/exe/DataExtraction.cpp
@ -0,0 +1,28 @@
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include <opencv2/opencv.hpp>
+
+#include "opengaze/opengaze.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace opengaze;
+
+vector<string> get_arguments(int argc, char **argv) {
+    vector<string> arguments;
+    for (int i = 0; i < argc; ++i){
+        arguments.emplace_back(string(argv[i]));
+    }
+    return arguments;
+}
+
+int main(int argc, char** argv)
+{
+    vector<string> arguments = get_arguments(argc, argv);
+    OpenGaze open_gaze(argc, argv);
+    open_gaze.runDataExtraction();
+
+    return 1;
+}
--- a/exe/GazeEstimation.cpp
+++ b/exe/GazeEstimation.cpp
@ -0,0 +1,28 @@
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include <opencv2/opencv.hpp>
+
+#include "opengaze/opengaze.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace opengaze;
+
+vector<string> get_arguments(int argc, char **argv) {
+    vector<string> arguments;
+    for (int i = 0; i < argc; ++i){
+        arguments.emplace_back(string(argv[i]));
+    }
+    return arguments;
+}
+
+int main(int argc, char** argv)
+{
+    vector<string> arguments = get_arguments(argc, argv);
+    OpenGaze open_gaze(argc, argv);
+    open_gaze.runGazeOnScreen();
+
+    return 1;
+}
--- a/exe/GazeVisualization.cpp
+++ b/exe/GazeVisualization.cpp
@ -0,0 +1,28 @@
+#include <iostream>
+#include <vector>
+#include <string>
+
+#include <opencv2/opencv.hpp>
+
+#include "opengaze/opengaze.hpp"
+
+using namespace std;
+using namespace cv;
+using namespace opengaze;
+
+vector<string> get_arguments(int argc, char **argv) {
+    vector<string> arguments;
+    for (int i = 0; i < argc; ++i){
+        arguments.emplace_back(string(argv[i]));
+    }
+    return arguments;
+}
+
+int main(int argc, char** argv)
+{
+    vector<string> arguments = get_arguments(argc, argv);
+    OpenGaze open_gaze(argc, argv);
+    open_gaze.runGazeVisualization();
+
+    return 1;
+}
--- a/exe/test.mp4
+++ b/exe/test.mp4
--- a/imgs/logo_mpiinf.png
+++ b/imgs/logo_mpiinf.png
--- a/imgs/logo_osaka-u.png
+++ b/imgs/logo_osaka-u.png
--- a/imgs/logo_pui.png
+++ b/imgs/logo_pui.png
--- a/include/data.hpp
+++ b/include/data.hpp
@ -0,0 +1,88 @@
+#ifndef DATA_HPP
+#define DATA_HPP
+
+#include <opencv2/opencv.hpp>
+
+namespace opengaze{
+
+/**
+ * face and facial landmark detection data
+ * @param face_id personal id from tracking across frames
+ * @param certainty detection score, 1 is the best, -1 is the worst
+ * @param landmarks detected six facial landmarks as four eye corners and two mouth corners
+ * @param face_bb detected face bounding box
+ */
+struct FaceData
+{
+    unsigned long face_id;
+    double certainty;
+    cv::Point2f landmarks[6];
+    cv::Rect_<int> face_bb;
+};
+/**
+ * eye image related data
+ * @param leye_pos/reye_pose 3D eyeball center position for left and right eyes in the original camera coordinate system
+ * @param leye_img/reye_img eye image
+ * @param leye_rot/reye_rot rotation matrix during the data normalization procedure
+ */
+struct EyeData
+{
+    // cv::Mat head_r, head_t; 
+    cv::Mat leye_pos, reye_pos; // 
+
+    // normalized eyes
+    cv::Mat leye_img, reye_img;
+    cv::Mat leye_rot, reye_rot;
+};
+/**
+ * face patch data related to data normalization
+ * @param head_r head pose as center of the face
+ * @param head_t head translation as center of the face
+ * @param face_rot rotation matrix during the data normalization procedure
+ * @param face_center 3D face center in the original camera coordinate system
+ * @param debug_img use for debug to show the normalized face image
+ * @param face_patch normalized face image
+ */
+struct FacePatchData
+{
+    cv::Mat head_r, head_t;
+    cv::Mat face_rot;
+    cv::Mat face_center;
+    cv::Mat debug_img;
+    cv::Mat face_patch;
+};
+/**
+ * gaze data
+ * @param lgaze3d/lgaze3d gaze directions of left and right eyes in the camera coordinate system
+ * @param gaze3d gaze direction estimated from face patch in the in the camera coordinate system
+ * @param lgaze2d/rgaze2d projected gaze positions on the screen coordinate from left and right eyes
+ * @param gaze2d projected gaze positions from face patch on the screen coordinate
+ */
+struct GazeData
+{
+    cv::Vec3f lgaze3d, rgaze3d;
+    cv::Vec3f gaze3d;
+    cv::Point2f lgaze2d, rgaze2d;
+    cv::Point2f gaze2d;
+};
+/**
+ * The general output data structure
+ * @param face_data store face and facial landmark detection data
+ * @param eye_data store data related to eye image input
+ * @param face_patch_data normalized face path data
+ * @param gaze_data gaze data in 2D and 3D spaces
+ */
+struct Sample
+{
+    FaceData face_data;
+    EyeData eye_data;
+    FacePatchData face_patch_data;
+    GazeData gaze_data;
+};
+
+}
+
+
+
+
+#endif //DATA_HPP
--- a/include/face_detector.hpp
+++ b/include/face_detector.hpp
@ -0,0 +1,72 @@
+#ifndef FACE_DETECTOR_HPP
+#define FACE_DETECTOR_HPP
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <opencv2/opencv.hpp>
+
+#if USE_DLIB
+// if we use dlib
+#include <dlib/opencv.h>
+#include <dlib/image_processing/frontal_face_detector.h>
+#include <dlib/image_processing/render_face_detections.h>
+#include <dlib/image_processing.h>
+#include <dlib/gui_widgets.h>
+#include <dlib/image_io.h>
+#endif
+
+
+#include "data.hpp"
+
+namespace opengaze{
+
+class FaceDetector {
+public:
+    FaceDetector();
+    ~FaceDetector();
+
+    /**
+     * face and facial landmark detection selection
+     * The current implementation is only OpenFace. OpenFace use dlib for face detection
+     */
+    enum Method{OpenFace, OpenCV, Dlib};
+
+    /**
+     * main function to detect and track face and facial landmarks
+     * @param input_img input image
+     * @param output output data structure
+     */
+    void track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output);
+
+    void reset();
+    void setMethodType(Method method_type) {method_type_ = method_type;}
+    Method getMethodType() {return method_type_;}
+    void initialize(int number_users);
+
+private:
+    Method method_type_;
+
+    #if USE_DLIB
+    dlib::frontal_face_detector dlib_detector_;
+    dlib::shape_predictor dlib_sp_;
+    #endif
+
+    // parameters for OpenFace
+    std::vector<bool> active_models_;
+    unsigned long num_faces_max_;
+    int detection_skip_frames_, tracking_loss_limit_;
+    float detection_resize_rate_;
+    float nonoverlap_threshold_;
+    double certainty_threshold_;
+    int landmark_indices_[6];
+    int frame_counter_;
+    unsigned long current_face_id_;
+    std::vector<unsigned long> face_ids_;
+};
+}
+
+
+
+
+#endif //FACE_DETECTOR_HPP
--- a/include/gaze_estimator.hpp
+++ b/include/gaze_estimator.hpp
@ -0,0 +1,65 @@
+#ifndef GAZE_ESTIMATOR_HPP
+#define GAZE_ESTIMATOR_HPP
+
+#include <opencv2/opencv.hpp>
+
+#include "data.hpp"
+#include "face_detector.hpp"
+#include "normalizer.hpp"
+#include "gaze_predictor.hpp"
+
+namespace opengaze{
+
+class GazeEstimator {
+public:
+    GazeEstimator();
+    ~GazeEstimator();
+
+    /**
+     * On the current implementation, we only has the "MPIIGaze" method which uses the input face/eye image
+     * and output gaze direction directly. It is an appearance-based method. The "OpenFace" can also output
+     * the gaze vector according to the pupil detection results. However, "OpenFace" implementation is not 
+     * included inside our OpenGaze toolkit yet.
+     */
+    enum Method{MPIIGaze, OpenFace};
+    /**
+     * for the "MPIIGaze" method, the input image can be face or eye. The full-face patch model can output
+     * more accurate gaze prediction than the eye image model, while the eye image base model is much faster.
+     */
+    enum InputType{face, eye};
+
+    /**
+     * the main function to estimate the gaze. 
+     * It performs the face and facial landmarks detection, head pose estimation and then gaze prediction.
+     * @param input_image input scene image
+     * @param output  data structure for output
+     */
+    void estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &output);
+    void getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs);
+    void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist);
+    void setRootPath(std::string root_path);
+    void setMethod(Method, std::vector<std::string> arguments);
+    void initialFaceDetector(int number_users);
+
+    Method method_type_;
+    InputType input_type_; // the input type
+
+private:
+    // class instances
+    FaceDetector face_detector_;
+    Normalizer normalizer_;
+    GazePredictor gaze_predictor_;
+    // camera intrinsic matrix
+    cv::Mat camera_matrix_;
+    // camera distortion matrix
+    cv::Mat camera_dist_;
+    // the root pat is used for load configuration file and models
+    std::string root_path_;
+};
+
+}
+
+
+
+
+#endif //GAZE_ESTIMATOR_HPP
--- a/include/gaze_predictor.hpp
+++ b/include/gaze_predictor.hpp
@ -0,0 +1,29 @@
+#ifndef GAZE_PREDICTOR_HPP
+#define GAZE_PREDICTOR_HPP
+
+#include <opencv2/opencv.hpp>
+#include "data.hpp"
+#include "face_detector.hpp"
+
+
+namespace opengaze{
+
+class GazePredictor {
+
+public:
+    GazePredictor();
+    ~GazePredictor();
+
+    void initiaMPIIGaze(std::vector<std::string> arguments);
+    cv::Point3f predictGazeMPIIGaze(cv::Mat face_patch);
+
+private:
+    int model_type_;
+    bool is_extract_feature;
+};
+
+}
+
+
+
+#endif //GAZE_PREDICTOR_HPP
--- a/include/input_handler.hpp
+++ b/include/input_handler.hpp
@ -0,0 +1,125 @@
+#ifndef INPUT_HANDLER_HPP
+#define INPUT_HANDLER_HPP
+
+#include <opencv2/opencv.hpp>
+#include <vector>
+#include <iostream>
+#include <boost/filesystem.hpp>
+#include "data.hpp"
+
+namespace opengaze {
+
+class InputHandler {
+public:
+    enum InputType {Camera, Video, Image, Directory, Memory};
+
+    InputHandler();
+    ~InputHandler();
+
+    /**
+     * get the camera intrisic parameters
+     * @param camera_matrix camera instric matrix
+     * @param camera_dist caemra distortion matrix
+     */
+    void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist){
+        camera_matrix_ = std::move(camera_matrix);
+        camera_distortion_ = std::move(camera_dist);
+    }
+    
+    /**
+     * function to return next sample, could come from any input source
+     * @return next sample
+     */
+    cv::Mat getNextSample();
+
+    /**
+     * set the input type
+     * @param type the input typ, could be found in InputType defination
+     */
+    void setInputType(InputType type){input_type_ = type;}
+
+    /**
+     * set the input 
+     *  according the input type, here the input value are different. 
+     * For the type "Camera", this input value indicates the camera id
+     * For the type "video", this input value is the video file name
+     * For input type "Directory", this input value is the directory path
+     */
+    void setInput(int camera_id) {camera_id_ = camera_id;}
+    void setInput(std::vector<cv::Mat> images) {images_ = std::move(images);}
+    void setInput(std::string input_path);
+    
+    /**
+     * read the parameters related to the screen
+     * @param calib_file file for the configuration 
+     */
+    void readScreenConfiguration(std::string calib_file);
+    /**
+     * read the camera instrinic parameters from the configuration file
+     * @param calib_file file for the configuration 
+     */
+    void readCameraConfiguration(std::string calib_file);
+
+    /**
+     * When the 3D gaze vector is achieved, there is a need to project the gaze on the 2D screen.
+     * This function also needs the input to indicate if use the full-face model or not, 
+     * since the initial of gaze vector will be center of the face for the full-face models 
+     * and eye center for the eye-based models.
+     * @param input input data contains the 3D gaze vector
+     * @param is_face_model a boolen value indicates if the gaze vectors is from face model or eye model 
+     */
+    void projectToDisplay(std::vector<opengaze::Sample> &input, bool is_face_model=true);
+
+    int getFrameHeight(){return cap_.get(cv::CAP_PROP_FRAME_HEIGHT);}
+    int getFrameWidth(){return cap_.get(cv::CAP_PROP_FRAME_WIDTH);}
+    InputType getInputType() {return input_type_;}
+    int getScreenWidth() {return screen_width_;}
+    int getScreenHeight() {return screen_height_;}
+    std::string getFileName() {return current_file_name_;}
+    
+    cv::Point2f mapToDisplay(cv::Vec3f obj_center, cv::Vec3f gaze_point);
+
+    void initialize();
+    bool closeInput();
+    void getScreenResolution(int &width, int &height);
+    
+    cv::Mat getCameraMatrix() { return camera_matrix_;}
+    cv::Mat getCameraDistortion() {return camera_distortion_;}
+    void setFrameSize(int frame_width, int frame_height);
+
+    bool isReachEnd() {return is_reach_end_;}
+
+    cv::Mat camera_matrix_;
+    cv::Mat camera_distortion_;
+
+private:
+
+    // indicator if we reach the end of sample stream
+    bool is_reach_end_;
+
+    int camera_id_;
+    int sample_height_, sample_width_;
+    std::vector<cv::Mat> images_;
+    std::string input_path_;
+    std::string input_file_video_name_;
+    int screen_width_, screen_height_;
+
+    // monitor
+    float monitor_W_, monitor_H_; // monitor width and height in mm
+    cv::Mat monitor_R_, monitor_T_;
+    cv::Vec3f monitor_corners_[4];
+    cv::Mat monitor_normal_;
+
+    // input variable
+    InputType input_type_;
+    cv::VideoCapture cap_;
+    std::string current_file_name_;
+
+    // variable for directory input
+    boost::filesystem::directory_iterator current_itr_;
+
+};
+
+}
+
+#endif //INPUT_HANDLER_HPP
--- a/include/normalizer.hpp
+++ b/include/normalizer.hpp
@ -0,0 +1,42 @@
+#ifndef NORMALIZER_HPP
+#define NORMALIZER_HPP
+
+#include <opencv2/opencv.hpp>
+#include "data.hpp"
+
+namespace opengaze{
+class Normalizer {
+
+public:
+    Normalizer();
+    ~Normalizer();
+
+    void estimateHeadPose(const cv::Point2f *landmarks, opengaze::Sample &sample);
+
+    void setCameraMatrix(cv::Mat input);
+
+    void loadFaceModel(std::string path);
+
+    void setParameters(int focal_length, int distance, int img_w, int img_h);
+
+    cv::Mat normalizeFace(cv::Mat input_image, Sample &sample);
+
+    std::vector<cv::Mat> normalizeEyes(cv::Mat input_image, Sample &sample);
+
+    cv::Mat cvtToCamera(cv::Point3f input, const cv::Mat cnv_mat);
+
+private:
+    cv::Mat camera_matrix_;
+    std::vector<cv::Point3f> face_model_;
+    cv::Mat face_model_mat_, cam_norm_;
+    float focal_norm_, distance_norm_;
+    cv::Size roiSize_norm_;
+};
+
+
+}
+
+
+
+
+#endif //NORMALIZER_HPP
--- a/include/opengaze.hpp
+++ b/include/opengaze.hpp
@ -0,0 +1,80 @@
+#ifndef OPEN_GAZE_H
+#define OPEN_GAZE_H
+
+#include <string>
+#include <vector>
+#include <boost/program_options.hpp>
+#include <boost/filesystem.hpp>
+
+#include <opencv2/opencv.hpp>
+
+#include "input_handler.hpp"
+#include "gaze_estimator.hpp"
+#include "data.hpp"
+#include "personal_calibrator.hpp"
+
+namespace opengaze {
+
+class OpenGaze {
+public:
+    explicit OpenGaze(int argc, char** argv); //read configuration file
+    ~OpenGaze();
+
+    // main function to estimate and show the gaze vector drawn on the input face image.
+    void runGazeVisualization(); 
+
+    /**
+     * main function to run personal calibration.
+     * @param num_calibration_point the numbers of points for calibration.
+     */
+    void runPersonalCalibration(int num_calibration_point=5);
+
+    // main function to estimate and draw gaze point on the screen.
+    void runGazeOnScreen();
+
+    // main function to extract the face image from input image. The face image can then 
+    // be used to train a custom gaze estimation model
+    void runDataExtraction();
+
+private:
+    // visualization
+    /**
+     * function to draw the gaze vector on the input face image.
+     * @param sample the input data includes the gaze vector, head pose etc.
+     * @param image the input image contains the face. This function will draw the gaze vector on this input image
+     */
+    void drawGazeOnFace(opengaze::Sample sample, cv::Mat &image);
+    // draw the detected facial landmark
+    void drawLandmarks(opengaze::Sample sample, cv::Mat &image);
+    // draw the estimated gaze on the top left corner of the input image 
+    // to show the relative position on the screen. In this case, 
+    //the user can see both the input image and the projected gaze target on the screen. 
+    //This function is mainly used for debugging.
+    void drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image);
+    // estimate and draw gaze point on the screen
+    void drawGazeOnScreen(opengaze::Sample sample, cv::Mat &image);
+
+    // show debug mode will show the gaze draw on the face
+    bool show_debug_;
+
+    //class instances
+    InputHandler input_handler_;
+    GazeEstimator gaze_estimator_;
+
+    // input camera id
+    int camera_id_;
+    // temporary variables to store the input path, output path, input type
+    boost::filesystem::path input_dir_;
+    InputHandler::InputType input_type_;
+    boost::filesystem::path output_dir_;
+
+    bool is_face_model_;
+    bool is_save_video_;
+    
+    // path to save the personal calibration model
+    std::string per_model_save_path_;
+};
+
+}
+
+#endif //OPEN_GAZE_H
--- a/include/personal_calibrator.hpp
+++ b/include/personal_calibrator.hpp
@ -0,0 +1,64 @@
+#ifndef PERSONAL_CALIBRATOR_HPP
+#define PERSONAL_CALIBRATOR_HPP
+
+#include <string>
+#include <vector>
+#include <opencv2/opencv.hpp>
+
+class PersonalCalibrator {
+
+public:
+    PersonalCalibrator(int screen_width, int screen_height);
+    ~PersonalCalibrator();
+    /**
+     * generate the random locations for calibration
+     * @param num_points number of points to generate
+     */
+    void generatePoints(int num_points);
+    // get the show window ready, it should be full-screen
+    void initialWindow();
+    // show the next calibration point
+    bool showNextPoint();
+    // wait for 0.5 second to receive the confirmation (mouse click) from user
+    void confirmClicking();
+    /**
+     * generate a polynomial function for the personal calibration
+     * @param prediction prediction from gaze estimation method
+     * @param ground_truth calibration points locations on the screen
+     * @param order the order of polynomial function, 1 means the linear
+     */
+    void generateModel(std::vector<cv::Point2f> prediction, std::vector<cv::Point2f> ground_truth, int order=1);
+    /**
+     * save the personal calibration model
+     * @param file_path path to save the model
+     */
+    void saveModel(std::string file_path);
+    /**
+     * load the personal calibration model
+     * @param file_path path to load the model
+     */
+    void loadModel(std::string file_path);
+    /**
+     * return current calibration point location on the screen
+     * @return location on the screen
+     */
+    cv::Point2f getCurrentPoint() {return points_[index_point_];}
+    // function to calculate the polynomial function
+    void calibratePolynomial();
+
+private:
+    // indicator if the user click the mouse or not
+    bool is_click_;
+    // number of points for personal calibration
+    int num_points_;
+    // index for the current calibration points
+    int index_point_;
+    // vector to store the generated calibration points
+    std::vector<cv::Point2i> points_;
+    int screen_width_, screen_height_, center_radius_; // monitor width and height in pixel
+    // personal model
+    cv::Mat model_matrix_;
+};
+
+
+#endif //PERSONAL_CALIBRATOR_HPP
--- a/install.sh
+++ b/install.sh
@ -0,0 +1,60 @@
+#!/bin/bash
+
+set -e 
+set -o pipefail
+
+if [ $# -ne 0 ]
+  then
+    echo "Usage: install.sh"
+    exit 1
+fi
+
+# Essential Dependencies
+echo "Installing Essential dependencies..."
+sudo apt-get -y update
+sudo apt-get -y install build-essential
+sudo apt-get -y install cmake
+sudo apt-get -y install libopenblas-dev liblapack-dev
+sudo apt-get -y install git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev
+sudo apt-get -y install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev
+echo "Essential dependencies installed."
+
+# OpenCV Dependency
+echo "Downloading OpenCV..."
+wget https://github.com/opencv/opencv/archive/3.4.0.zip
+unzip 3.4.0.zip
+cd opencv-3.4.0
+mkdir -p build
+cd build
+echo "Installing OpenCV..."
+cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_CUDA=OFF -D BUILD_SHARED_LIBS=ON ..
+make -j4
+sudo make install
+cd ../..
+rm 3.4.0.zip
+sudo rm -r opencv-3.4.0
+echo "OpenCV installed."
+
+# dlib dependecy
+echo "Downloading dlib"
+wget http://dlib.net/files/dlib-19.13.tar.bz2;
+tar xf dlib-19.13.tar.bz2;
+cd dlib-19.13;
+mkdir -p build;
+cd build;
+echo "Installing dlib"
+cmake ..;
+cmake --build . --config Release;
+sudo make install;
+sudo ldconfig;
+cd ../..;    
+rm -r dlib-19.13.tar.bz2
+echo "dlib installed"
+
+# Boost C++ Dependency
+echo "Installing Boost..."
+sudo apt-get install libboost-all-dev
+echo "Boost installed."
+
+# Dependency for caffe
+sudo apt-get install protobuf glog gflags hdf5
--- a/pre-complile/opengaze.deb
+++ b/pre-complile/opengaze.deb
--- a/pre-complile/readme.txt
+++ b/pre-complile/readme.txt
@ -0,0 +1,3 @@
+This is pre-complied package for OpenGaze, including OpenFace and Caffe library insides.
+To use, you still need to install other dependencies, Nvidia drive, cuda and cudnn.
+To install, just run `sudo dpkg -i build_1.0-1_amd64.deb`
--- a/src/face_detector.cpp
+++ b/src/face_detector.cpp
@ -0,0 +1,273 @@
+#include <iostream>
+#include <tbb/tbb.h>
+
+// if we use OpenFace
+#if USE_OPENFACE
+#include <LandmarkCoreIncludes.h> // from "OpenFace-master/lib/local/LandmarkDetector/include/"
+#endif
+
+#include "face_detector.hpp"
+
+using namespace std;
+using namespace cv;
+
+vector<LandmarkDetector::FaceModelParameters> det_parameters_;
+vector<LandmarkDetector::CLNF> clnf_models_;
+
+namespace opengaze {
+
+FaceDetector::FaceDetector() {
+    method_type_ = Method::OpenFace;
+}
+
+FaceDetector::~FaceDetector() {}
+
+void FaceDetector::initialize(int number_users=5) {
+    string root_path = OPENFACE_DIR;
+    root_path = root_path + "/build/bin";
+    //string openface_root = OpenFace_ROOT_DIR;
+    // (currently) hard-coded setting
+    num_faces_max_ = number_users;
+    detection_resize_rate_ = 2.0; // resize the input image to detect face, crucial for speed
+    detection_skip_frames_ = 1;
+    nonoverlap_threshold_ = 0.5;
+    certainty_threshold_ = 0.0; // the smaller the better, 1 is the best, -1 is the worst
+    landmark_indices_[0] = 36; landmark_indices_[1] = 39; landmark_indices_[2] = 42;
+    landmark_indices_[3] = 45; landmark_indices_[4] = 48; landmark_indices_[5] = 54;
+    tracking_loss_limit_ = 10;
+    // initialize the tracking models
+    LandmarkDetector::FaceModelParameters det_parameter;
+    det_parameter.reinit_video_every = -1; // This is so that the model would not try re-initialising itself
+    det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR;
+
+    det_parameter.model_location = root_path + "/model/main_clm_wild.txt";
+    det_parameter.haar_face_detector_location = root_path + "/classifiers/haarcascade_frontalface_alt.xml";// this line will be disable due to "curr_face_detector"
+    det_parameter.mtcnn_face_detector_location = root_path + "/model/mtcnn_detector/MTCNN_detector.txt";
+
+    det_parameter.use_face_template = true;
+    det_parameter.reinit_video_every = 5;
+    // det_parameter.quiet_mode = true; not avaliable fro OpenFace v2.1
+    // // For in the wild fitting these parameters are suitable
+    det_parameter.window_sizes_init = vector<int>(4);
+    det_parameter.window_sizes_init[0] = 15;
+    det_parameter.window_sizes_init[1] = 13;
+    det_parameter.window_sizes_init[2] = 11;
+    det_parameter.window_sizes_init[3] = 9;
+    det_parameter.sigma = 1.25;
+    det_parameter.reg_factor = 35;
+    det_parameter.weight_factor = 2.5;
+    det_parameter.num_optimisation_iteration = 10;
+    det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
+    det_parameters_.push_back(det_parameter);
+
+    LandmarkDetector::CLNF clnf_model_ = LandmarkDetector::CLNF(det_parameter.model_location);
+    if (!clnf_model_.loaded_successfully){
+        cout << "ERROR: Could not load the landmark detector" << endl;
+        exit(-1);
+    }
+    clnf_model_.face_detector_HAAR.load(det_parameter.haar_face_detector_location);
+    clnf_model_.haar_face_detector_location = det_parameter.haar_face_detector_location;
+    clnf_model_.face_detector_MTCNN.Read(det_parameter.mtcnn_face_detector_location);
+    clnf_model_.mtcnn_face_detector_location = det_parameter.mtcnn_face_detector_location;
+
+    // If can't find MTCNN face detector, default to HOG one
+    if (det_parameter.curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && clnf_model_.face_detector_MTCNN.empty()){
+        cout << "INFO: defaulting to HOG-SVM face detector" << endl;
+        det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
+    }
+
+    clnf_models_.reserve(num_faces_max_);
+    clnf_models_.push_back(clnf_model_);
+    active_models_.push_back(false);
+
+    for(int i=1; i<num_faces_max_; ++i)
+    {
+        clnf_models_.push_back(clnf_model_);
+        active_models_.push_back(false);
+        det_parameters_.push_back(det_parameter);
+    }
+
+    // variables
+    frame_counter_ = 0;
+    current_face_id_ = 1;
+    for(int i=0; i<num_faces_max_; ++i) face_ids_.push_back(0);
+
+}
+
+void FaceDetector::reset() {
+    // reset all status
+    frame_counter_ = 0;
+    current_face_id_ = 1;
+
+    for(unsigned int model = 0; model < clnf_models_.size(); ++model)
+    {
+        active_models_[model] = false;
+        face_ids_[model] = 0;
+        clnf_models_[model].Reset();
+    }
+}
+
+void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models, vector<cv::Rect_<float> >& face_detections){
+    // Go over the model and eliminate detections that are not informative (there already is a tracker there)
+    for (size_t model = 0; model < clnf_models.size(); ++model){
+
+        // See if the detections intersect
+        cv::Rect_<float> model_rect = clnf_models[model].GetBoundingBox();
+
+        for (int detection=face_detections.size()-1; detection >= 0; --detection)
+        {
+            double intersection_area = (model_rect & face_detections[detection]).area();
+            double union_area = model_rect.area() + face_detections[detection].area() - 2 * intersection_area;
+
+            // If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap
+            if (intersection_area / union_area > 0.5)
+            {
+                face_detections.erase(face_detections.begin() + detection);
+            }
+        }
+    }
+}
+
+double NonOverlapingDetection(const LandmarkDetector::CLNF &ref_model, const LandmarkDetector::CLNF &tgt_model)
+{
+    Rect_<double> ref_rect = ref_model.GetBoundingBox();
+    Rect_<double> tgt_rect = tgt_model.GetBoundingBox();
+
+    double intersection_area = (ref_rect & tgt_rect).area();
+    double union_area = ref_rect.area() + tgt_rect.area() - 2 * intersection_area;
+
+    return intersection_area/union_area;
+}
+
+void FaceDetector::track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output) {
+    if(input_img.channels() < 3){
+        cout << "The input must be a color image!" <<endl;
+        exit(EXIT_FAILURE);
+    }
+    Mat_<uchar> grayscale_image;
+    cvtColor(input_img, grayscale_image, CV_BGR2GRAY);
+
+    bool all_models_active = true;
+    for(unsigned int model = 0; model < clnf_models_.size(); ++model)
+    {
+        if(!active_models_[model])
+        {
+            all_models_active = false;
+            break;
+        }
+    }
+
+    // Detect faces
+    // Get the detections (every Xth frame and when there are free models available for tracking)
+    std::vector<Rect_<float> > face_detections;
+    cv::Mat small_grayscale_image_;
+    if (frame_counter_ % detection_skip_frames_ == 0 && !all_models_active) {
+        // resized image for faster face detection
+        if (detection_resize_rate_ != 1) resize(grayscale_image, small_grayscale_image_,
+               Size(), 1.0/detection_resize_rate_, 1.0/detection_resize_rate_);
+        else small_grayscale_image_ = grayscale_image;
+
+        if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR){
+            vector<float> confidences;
+            LandmarkDetector::DetectFacesHOG(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HOG, confidences);
+        }
+        else if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR){
+            LandmarkDetector::DetectFaces(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HAAR);
+        }
+        else{
+            vector<float> confidences;
+            LandmarkDetector::DetectFacesMTCNN(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_MTCNN, confidences);
+        }
+
+        // resize the face deteciton back
+        if (detection_resize_rate_ != 1) {
+            for(auto& face_detection : face_detections) {
+                face_detection.x *= detection_resize_rate_;
+                face_detection.y *= detection_resize_rate_;
+                face_detection.width *= detection_resize_rate_;
+                face_detection.height *= detection_resize_rate_;
+            }
+        }
+
+        // Keep only non overlapping detections (also convert to a concurrent vector
+        NonOverlapingDetections(clnf_models_, face_detections);
+    }
+
+    vector< tbb::atomic<bool> > face_detections_used(face_detections.size());
+    // Go through every model and update the tracking
+    tbb::parallel_for(0, (int)clnf_models_.size(), [&](int model) {
+    //for (unsigned int model = 0; model < clnf_models_.size(); ++model) {
+        bool detection_success = false;
+        // If the current model has failed more than threshold, remove it
+        if (clnf_models_[model].failures_in_a_row > tracking_loss_limit_) {
+            active_models_[model] = false;
+            clnf_models_[model].Reset();
+        }
+        // If the model is inactive reactivate it with new detections
+        if (!active_models_[model]){
+            for (size_t detection_ind = 0; detection_ind < face_detections.size(); ++detection_ind)
+            {
+                // if it was not taken by another tracker take it (if it is false swap it to true and enter detection, this makes it parallel safe)
+                if (!face_detections_used[detection_ind].compare_and_swap(true, false)) {
+                    // Reinitialise the model
+                    clnf_models_[model].Reset();
+                    // This ensures that a wider window is used for the initial landmark localisation
+                    clnf_models_[model].detection_success = false;
+                    LandmarkDetector::DetectLandmarksInVideo(input_img, face_detections[detection_ind], clnf_models_[model], det_parameters_[model], grayscale_image);
+                    // This activates the model
+                    active_models_[model] = true;
+                    face_ids_[model] = current_face_id_;
+                    current_face_id_++;
+                    // break out of the loop as the tracker has been reinitialised
+                    break;
+                }
+
+            }
+        }
+        else
+        {
+            // The actual facial landmark detection / tracking
+            detection_success = LandmarkDetector::DetectLandmarksInVideo(input_img, clnf_models_[model], det_parameters_[model], grayscale_image);
+        }
+    //}
+    });
+
+    // Go through every model and check the results
+    for(size_t model=0; model<clnf_models_.size(); ++model){
+        // Check if the alignment result is overlapping previous models
+        bool overlapping = false;
+        for(size_t model_ref=0; model_ref<model; ++model_ref){
+            double overlap_ratio = NonOverlapingDetection(clnf_models_[model_ref], clnf_models_[model]);
+            if(overlap_ratio > nonoverlap_threshold_) overlapping = true;
+        }
+        if(overlapping){
+            active_models_[model] = false;
+            face_ids_[model] = 0;
+            clnf_models_[model].Reset();
+            continue;
+        }
+
+        if(clnf_models_[model].detection_certainty < certainty_threshold_) continue;
+
+        Sample temp;
+        temp.face_data.certainty = clnf_models_[model].detection_certainty;
+        temp.face_data.face_id = face_ids_[model];
+        temp.face_data.face_bb.x = (int)clnf_models_[model].GetBoundingBox().x;
+        temp.face_data.face_bb.y = (int)clnf_models_[model].GetBoundingBox().y;
+        temp.face_data.face_bb.height = (int)clnf_models_[model].GetBoundingBox().height;
+        temp.face_data.face_bb.width = (int)clnf_models_[model].GetBoundingBox().width;
+        for(int p=0; p<6; p++){
+            int num_p = landmark_indices_[p];
+            temp.face_data.landmarks[p] = Point2d(
+                    clnf_models_[model].detected_landmarks.at<float>(num_p,0),
+                    clnf_models_[model].detected_landmarks.at<float>(num_p+68,0)
+            );
+        }
+        output.emplace_back(temp);
+    }
+}
+
+
+
+}
+
--- a/src/gaze_estimator.cpp
+++ b/src/gaze_estimator.cpp
@ -0,0 +1,112 @@
+#include <iostream>
+
+#include "gaze_estimator.hpp"
+
+using namespace std;
+using namespace cv;
+
+namespace opengaze{
+
+GazeEstimator::GazeEstimator() {
+}
+GazeEstimator::~GazeEstimator() {}
+
+void GazeEstimator::setRootPath(std::string root_path) {
+    normalizer_.loadFaceModel(root_path);
+}
+
+void  GazeEstimator::estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
+    face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
+    for (int i=0; i< outputs.size(); ++i) {
+        // estimate head pose first, no matter what gaze estimation method, head pose is estimated here
+        normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
+         if (method_type_ == Method::MPIIGaze){
+
+             // if we use face model
+             if  (input_type_ == InputType::face){
+                 Mat face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
+                 //outputs[i].face_patch_data.debug_img = face_patch;
+                 Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(face_patch); // gaze estimates in normalization space
+                 Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
+                 gaze_3d.copyTo(outputs[i].gaze_data.gaze3d);
+             }
+             else if (input_type_ == InputType::eye) {
+                 vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
+                 // for left eye
+                 Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(eye_iamges[0]); 
+                 Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].eye_data.leye_rot);
+                 gaze_3d.copyTo(outputs[i].gaze_data.lgaze3d);
+                 // for right eye
+                 Mat flip_right;
+                 flip(eye_iamges[0], flip_right, 1);
+                 gaze_norm = gaze_predictor_.predictGazeMPIIGaze(flip_right); // for left right image input
+                 gaze_norm.x *= -1.0;
+                 gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system
+                 gaze_3d.copyTo(outputs[i].gaze_data.rgaze3d);
+             }
+         }
+         else if (method_type_ == Method::OpenFace) {
+            cout << "Please use gaze estimation method MPIIGaze." << endl;
+            exit(EXIT_FAILURE);
+         }
+    }
+}
+
+void GazeEstimator::getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs) {
+    face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks
+    for (int i=0; i< outputs.size(); ++i) {
+        // estimate head pose first, no matter what gaze estimation method, head pose is estimated here
+        normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]);
+         if (method_type_ == Method::MPIIGaze){
+
+             // if we use face model
+             if  (input_type_ == InputType::face){
+                 outputs[i].face_patch_data.face_patch = normalizer_.normalizeFace(input_image, outputs[i]);
+             }
+             else if (input_type_ == InputType::eye) {
+                 vector<cv::Mat> eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images
+                 outputs[i].eye_data.leye_img = eye_iamges[0];
+                 outputs[i].eye_data.reye_img = eye_iamges[1];
+             }
+         }
+         else if (method_type_ == Method::OpenFace) {
+            cout << "Please use method MPIIGaze for image patch extraction." << endl;
+            exit(EXIT_FAILURE);
+         }
+    }
+}
+
+void GazeEstimator::setMethod(Method input_method_type, const std::vector<std::string> arguments={}) {
+    method_type_ = input_method_type;
+
+    if (method_type_ == Method::MPIIGaze) {
+        gaze_predictor_.initiaMPIIGaze(arguments);
+        if (arguments.size() < 2)
+            input_type_ = InputType::face;
+        else {
+            if (arguments[2] == "face"){
+                input_type_ = InputType::face;
+                normalizer_.setParameters(1600, 1000, 224, 224);
+            }
+
+            else if (arguments[2] == "eye") {
+                input_type_ = InputType::eye;
+                normalizer_.setParameters(960, 600, 60, 36);
+            }
+        }
+    }
+
+}
+
+void GazeEstimator::setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist) {
+    camera_matrix_ = move(camera_matrix);
+    camera_dist_ = move(camera_dist_);
+    normalizer_.setCameraMatrix(camera_matrix_);
+}
+
+void GazeEstimator::initialFaceDetector(int number_users){
+    face_detector_.initialize(number_users);
+    face_detector_.setMethodType(FaceDetector::Method::OpenFace);
+}
+
+};
--- a/src/gaze_predictor.cpp
+++ b/src/gaze_predictor.cpp
@ -0,0 +1,160 @@
+#include "gaze_predictor.hpp"
+
+#include <string>
+
+// caffe
+#define USE_OPENCV 1;
+#include <caffe/caffe.hpp>
+#include <caffe/util/io.hpp>
+#include <caffe/blob.hpp>
+#include <caffe/layers/pose_data_layer.hpp>
+#include <caffe/layers/memory_data_layer.hpp>
+
+
+using namespace cv;
+using namespace std;
+using namespace caffe;
+
+namespace opengaze {
+caffe::Net<float> *p_net_;
+
+GazePredictor::GazePredictor() {
+
+}
+GazePredictor::~GazePredictor() {
+    delete p_net_;
+}
+
+void GazePredictor::initiaMPIIGaze(const std::vector<std::string> arguments={}) {
+    p_net_ = nullptr;
+    string param_path = arguments[0];
+    string model_path = arguments[1];
+    int gpu_id = stoi(arguments[3]);
+
+    // Set GPU (or CPU)
+    /*caffe::Caffe::set_mode(caffe::Caffe::CPU);
+    cout << "Using CPU model" << endl;*/
+    caffe::Caffe::set_mode(caffe::Caffe::GPU);
+    cout << "Using GPU with id " << gpu_id << endl;
+    Caffe::SetDevice(gpu_id);
+
+    cout << "load caffe model parameters from " << param_path << endl;
+    // create CNN
+    p_net_ = new Net<float>(param_path, caffe::TEST);
+
+    cout << "load caffe model from " << model_path << endl;
+    // load pre-trained weights (binary proto)
+    p_net_->CopyTrainedLayersFrom(model_path);
+
+    // judge model type base on the paramater file name
+    size_t i = param_path.rfind("/", param_path.length());
+    string filename;
+    if (i != string::npos)
+        filename = param_path.substr(i+1, param_path.length() - i);
+    if (!filename.compare(string("lenet_test.prototxt")))
+        model_type_ = 1;
+    else if (!filename.compare(string("googlenet.prototxt")))
+        model_type_ = 2;
+    else if (!filename.compare(string("alexnet_eye.prototxt")))
+        model_type_ = 3;
+    else if (!filename.compare(string("alexnet_face.prototxt")))
+        model_type_ = 4; // the single face model
+    else if (!filename.compare(string("alexnet_face_448.prototxt")))
+        model_type_ = 4; // the single face model
+    else{
+        model_type_ = 0;
+        cout<<"Cannot define the type of model!"<<endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+// gaze estimation with single face input image and with MPIIGaze method
+Point3f GazePredictor::predictGazeMPIIGaze(cv::Mat input_image) {
+    vector<Mat> img_vec;
+
+    img_vec.push_back(input_image);
+    Vec2f gaze_norm_2d;
+    Point3f gaze_norm_3d;
+
+    std::vector<int> labelVector;
+    labelVector.clear();
+    labelVector.push_back(1);
+    labelVector.push_back(1);
+    float loss = 0.0;
+    caffe::shared_ptr<caffe::MemoryDataLayer<float> > data_layer_;
+    data_layer_ = boost::static_pointer_cast<MemoryDataLayer<float> >(p_net_->layer_by_name("data"));
+    data_layer_->AddMatVector(img_vec, labelVector);
+
+    // run network
+    p_net_->ForwardPrefilled(&loss);
+
+    if (model_type_==1)
+    {
+        // get output layer "ip2"
+        float *temp = (float*)p_net_->blob_by_name("ip2")->cpu_data();
+        // copy estimated gaze vector
+        gaze_norm_2d.val[0] = temp[0];
+        gaze_norm_2d.val[1] = temp[1];
+        temp = nullptr;
+    }
+    else if (model_type_==2)// if it is googlenet
+    {
+        float *temp1 = (float*)p_net_->blob_by_name("loss1/classifier")->cpu_data();
+        float *temp2 = (float*)p_net_->blob_by_name("loss2/classifier")->cpu_data();
+        float *temp3 = (float*)p_net_->blob_by_name("loss3/classifier")->cpu_data();
+        // average the output of three output values
+        gaze_norm_2d.val[0] = (temp1[0]+temp2[0]+temp3[0]) / 3.0f;
+        gaze_norm_2d.val[1] = (temp1[1]+temp2[1]+temp3[1]) / 3.0f;
+        temp1 = nullptr;
+        temp2 = nullptr;
+        temp3 = nullptr;
+    }
+    else if (model_type_==3)// if it is alexnet
+    {
+        float *temp;
+        temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
+        if (temp == NULL)
+            temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
+        if (temp == NULL) {
+            cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
+            exit(EXIT_FAILURE);
+        }
+        // copy estimated gaze vector
+        gaze_norm_2d.val[0] = temp[0];
+        gaze_norm_2d.val[1] = temp[1];
+        temp = NULL;
+    }
+    else if (model_type_==4)// if it is alexnet
+    {
+        float *temp;
+        temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8
+        if (temp == NULL)
+            temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output
+        if (temp == NULL) {
+            cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl;
+            exit(EXIT_FAILURE);
+        }
+
+        // copy estimated gaze vector
+        gaze_norm_2d.val[0] = temp[0];
+        gaze_norm_2d.val[1] = temp[1];
+
+        //// get the feature out
+        //temp = (float*)p_net_->blob_by_name("fc6_gaze")->cpu_data();
+        //for (int num_f=0; num_f<4096; ++num_f)
+        //{
+        //    feature[num_f] = temp[num_f];
+        //}
+        temp = NULL;
+    }
+
+    float theta = gaze_norm_2d.val[0];
+    float phi = gaze_norm_2d.val[1];
+    gaze_norm_3d.x = (-1.0f)*cos(theta)*sin(phi);
+    gaze_norm_3d.y = (-1.0f)*sin(theta);
+    gaze_norm_3d.z = (-1.0f)*cos(theta)*cos(phi);
+
+    return gaze_norm_3d;
+}
+
+}
--- a/src/input_handler.cpp
+++ b/src/input_handler.cpp
@ -0,0 +1,185 @@
+#include "input_handler.hpp"
+#include <iostream>
+#include <cstdlib>
+using namespace cv;
+using namespace std;
+
+namespace opengaze {
+
+#if WIN32
+#include <windows.h>
+#else
+#include <X11/Xlib.h>
+#endif
+
+void InputHandler::getScreenResolution(int &width, int &height) {
+#if WIN32
+    width  = (int) GetSystemMetrics(SM_CXSCREEN);
+    height = (int) GetSystemMetrics(SM_CYSCREEN);
+#else
+    Display* disp = XOpenDisplay(NULL);
+    Screen*  scrn = DefaultScreenOfDisplay(disp);
+    width  = scrn->width;
+    height = scrn->height;
+#endif
+}
+
+InputHandler::InputHandler(){
+    input_type_ = InputType::Camera;// defualt input type
+    camera_id_ = 0;
+    getScreenResolution(screen_width_, screen_height_);
+    screen_width_ = screen_width_;
+}
+InputHandler::~InputHandler(){}
+
+
+void InputHandler::initialize()
+{
+    if (input_type_ == InputType::Camera){
+        cap_.open(camera_id_);
+        if(!cap_.isOpened()) { // open Camera
+            cout << "Could not open Camera with id " << camera_id_ << endl;
+            std::exit(EXIT_FAILURE);
+        }
+        setFrameSize(1280, 720); // 800*600, 1280*720, 1920*1080,
+    }
+    else if (input_type_ == InputType::Video){
+        cap_.open(input_file_video_name_);
+        if(!cap_.isOpened()) { // open Camera
+            cout << "Error: Could not open video file " << input_file_video_name_ << endl;
+            std::exit(EXIT_FAILURE);
+        }
+    }
+    else if (input_type_ == InputType::Directory) {
+        if (!boost::filesystem::is_directory(input_path_)){
+            cout << "Error: The input must be a directory, but it is " << input_path_ << endl;
+            std::exit(EXIT_FAILURE);
+        }
+        current_itr_ = boost::filesystem::directory_iterator(input_path_);
+
+    }
+    else if (input_type_ == InputType::Memory) {}
+
+    is_reach_end_ = false;
+}
+
+void InputHandler::setFrameSize(int frame_width, int frame_height){
+    cap_.set(cv::CAP_PROP_FRAME_HEIGHT, frame_height);//720  1080
+    cap_.set(cv::CAP_PROP_FRAME_WIDTH, frame_width);//1280 1980
+    double dWidth = cap_.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
+    double dHeight = cap_.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
+    cout << "Input frame size is : " << dWidth << " x " << dHeight << endl;
+}
+
+Mat InputHandler::getNextSample() {
+    Mat frame;
+    if (input_type_ == InputType::Camera) cap_ >> frame;
+    else if (input_type_ == InputType::Video) {
+        cap_ >> frame;
+        if (frame.empty()) // we reach the end of video
+            is_reach_end_ = true;
+    }
+    else if (input_type_ == InputType::Directory) {
+        boost::filesystem::path file_path = current_itr_->path();
+        if (file_path.extension() != ".jpg" && file_path.extension() != ".png" && file_path.extension() != ".bmp"){
+            cout << "Error: The input file is not image file with extension of jpg, png or bmp!" << endl;
+            cout << "The input file name is: " << file_path.string() << endl;
+            std::exit(EXIT_FAILURE);
+        }
+        cout << "process image " << file_path << endl;
+        frame = imread(file_path.string());
+        if (current_itr_ == boost::filesystem::directory_iterator())
+            is_reach_end_ = true;
+    }
+    else if (input_type_ == InputType::Memory) {}
+
+    return frame;
+}
+
+bool InputHandler::closeInput() {
+    if (input_type_ == InputType::Camera || input_type_ == InputType::Video){
+        cap_.release();
+        is_reach_end_ = true;
+    }
+    return true;
+}
+
+void InputHandler::setInput(std::string input_path) {
+    if (input_type_ == InputType::Directory){
+        input_path_ = move(input_path);
+    }
+    else if (input_type_ == InputType::Video){
+        input_file_video_name_ = move(input_path);
+    }
+}
+
+void InputHandler::readScreenConfiguration(string calib_file) {
+    FileStorage fs_disp(calib_file, FileStorage::READ);
+    fs_disp["monitor_W"] >> monitor_W_;
+    fs_disp["monitor_H"] >> monitor_H_;
+    fs_disp["monitor_R"] >> monitor_R_;
+    fs_disp["monitor_T"] >> monitor_T_;
+    // compute monitor plane
+    Vec3f corners[4];
+    corners[0] = Vec3f(0.0, 0.0, 0.0);
+    corners[1] = Vec3f(monitor_W_, 0.0, 0.0);
+    corners[2] = Vec3f(0.0, monitor_H_, 0.0);
+    corners[3] = Vec3f(monitor_W_, monitor_H_, 0.0);
+
+    for(int i=0; i<4; i++){
+        Mat corners_cam = monitor_R_ * Mat(corners[i]) + monitor_T_;
+        corners_cam.copyTo(monitor_corners_[i]);
+    }
+
+    Vec3f normal = Vec3f(0.0, 0.0, 1.0); // normal direction
+    monitor_normal_ = monitor_R_ * Mat(normal);
+    monitor_normal_.convertTo(monitor_normal_, CV_32F);
+}
+
+void InputHandler::readCameraConfiguration(string calib_file){
+    cout << endl << "Reading calibration information from : " << calib_file << endl;
+    FileStorage fs;
+    fs.open(calib_file, FileStorage::READ);
+    fs["camera_matrix"] >> camera_matrix_;
+    fs["dist_coeffs"] >> camera_distortion_;
+    fs.release();
+}
+
+void InputHandler::projectToDisplay(std::vector<opengaze::Sample> &inputs, bool is_face_model) {
+    for(auto & sample : inputs) {
+        if (is_face_model) {
+            Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
+            sample.gaze_data.gaze2d = mapToDisplay(face_center, sample.gaze_data.gaze3d);
+        }
+        else {
+            Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
+            Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
+            sample.gaze_data.lgaze2d = mapToDisplay(leye_pose, sample.gaze_data.lgaze3d);
+            sample.gaze_data.rgaze2d = mapToDisplay(reye_pose, sample.gaze_data.rgaze3d);
+            float gaze_x = (sample.gaze_data.lgaze2d.x + sample.gaze_data.rgaze2d.x) / 2.0f;
+            float gaze_y = (sample.gaze_data.lgaze2d.y + sample.gaze_data.rgaze2d.y) / 2.0f;
+            sample.gaze_data.gaze2d.x = gaze_x;
+            sample.gaze_data.gaze2d.y = gaze_y;
+        }
+    }
+}
+
+cv::Point2f InputHandler::mapToDisplay(Vec3f origin, Vec3f gaze_vec) {
+    Point2f gaze_on_screen;
+    // compute intersection
+    float gaze_len = (float)(monitor_normal_.dot(Mat(monitor_corners_[0]-origin))/monitor_normal_.dot(Mat(gaze_vec)));
+    Vec3f gaze_pos_cam = origin + gaze_len * gaze_vec;
+
+    // convert to monitor coodinate system
+    Mat gaze_pos_ = monitor_R_.inv() * (Mat(gaze_pos_cam) - monitor_T_);
+    Vec3f gaze_pos_3d;
+    gaze_pos_.copyTo(gaze_pos_3d);
+
+    gaze_on_screen.x = gaze_pos_3d.val[0] / monitor_W_;
+    gaze_on_screen.y = gaze_pos_3d.val[1] / monitor_H_;
+
+    return gaze_on_screen;
+
+}
+
+}
--- a/src/normalizer.cpp
+++ b/src/normalizer.cpp
@ -0,0 +1,184 @@
+#include "normalizer.hpp"
+
+using namespace cv;
+using namespace std;
+
+namespace opengaze {
+
+Normalizer::Normalizer() {
+    // parameters for data normalization
+    focal_norm_ = 1600;
+    distance_norm_ = 1000; // 600 500 1000
+    roiSize_norm_ = cv::Size(224, 224); // 224 448
+    cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
+}
+
+Normalizer::~Normalizer() {}
+
+void Normalizer::setParameters(int focal_length, int distance, int img_w, int img_h){
+    // parameters for data normalization
+    focal_norm_ = focal_length;
+    distance_norm_ = distance; // 600 500 1000
+    roiSize_norm_ = cv::Size(img_w, img_h); // 224 448
+    cam_norm_ = (Mat_<float>(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f);
+}
+
+// convert vector from normalization space to camera coordinate system
+cv::Mat Normalizer::cvtToCamera(cv::Point3f input, const Mat cnv_mat) {
+    // convert to the original camera coordinate system
+    Vec3f gaze_v(input.x, input.y, input.z);
+    // apply de-normalization
+    Mat gaze_v_cam = cnv_mat.inv() * Mat(gaze_v);
+    gaze_v_cam = gaze_v_cam / norm(gaze_v_cam);
+
+    return gaze_v_cam;
+}
+
+cv::Mat Normalizer::normalizeFace(Mat input_image, opengaze::Sample &sample) {
+    // get the face center in 3D space
+    Mat HR;
+    cv::Rodrigues(sample.face_patch_data.head_r, HR);
+    Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
+    Mat Fc;
+    add(HR*face_model_mat_, HT, Fc);
+
+    float distance = (float)norm(sample.face_patch_data.face_center); // original distance
+    float z_scale = distance_norm_ / distance; // scaling factor
+    cv::Mat scaleMat;
+    scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
+    scaleMat.convertTo(scaleMat, CV_32F);
+
+    // get the look_at matrix
+    Mat hRx = HR.col(0);
+    Mat forward = sample.face_patch_data.face_center /distance;
+    Mat down = forward.cross(hRx);
+    down = down / norm(down);
+    Mat right = down.cross(forward);
+    right = right / norm(right);
+
+    // rotation matrix
+    sample.face_patch_data.face_rot = Mat(3, 3, CV_32F);
+    right.copyTo(sample.face_patch_data.face_rot.col(0));
+    down.copyTo(sample.face_patch_data.face_rot.col(1));
+    forward.copyTo(sample.face_patch_data.face_rot.col(2));
+    sample.face_patch_data.face_rot = sample.face_patch_data.face_rot.t(); // there is no scaling
+    sample.face_patch_data.face_rot.convertTo(sample.face_patch_data.face_rot, CV_32F);
+
+    Mat warpMat = cam_norm_ * (scaleMat * sample.face_patch_data.face_rot) * camera_matrix_.inv();// transformation matrix
+    // crop image and copy the equalized image
+    Mat face_patch;
+    warpPerspective(input_image, face_patch, warpMat, roiSize_norm_);
+
+    return face_patch;
+}
+
+vector<cv::Mat> Normalizer::normalizeEyes(cv::Mat input_image, Sample &sample){
+    vector<cv::Mat> eye_images;
+
+    Mat img_gray;
+    cvtColor(input_image, img_gray, CV_BGR2GRAY);
+
+    Mat eye_center;
+    Mat* eye_rot;
+    for (int i=0; i<2; ++i) {
+        if (i==0){
+            eye_center = sample.eye_data.leye_pos;
+            eye_rot = &sample.eye_data.leye_rot;
+        }
+        else {
+            eye_center = sample.eye_data.reye_pos;
+            eye_rot = &sample.eye_data.reye_rot;
+        }
+
+
+        float distance = (float)norm(eye_center);
+        float z_scale = distance_norm_ / distance;
+
+        Mat scaleMat;
+        scaleMat = (Mat_<float>(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix
+        scaleMat.convertTo(scaleMat, CV_32F);
+
+        // get the look_at matrix
+        Mat HR;
+        cv::Rodrigues(sample.face_patch_data.head_r, HR);
+        Mat hRx = HR.col(0);
+        Mat forward = eye_center/distance;
+        Mat down = forward.cross(hRx);
+        down = down / norm(down);
+        Mat right = down.cross(forward);
+        right = right / norm(right);
+
+        // rotation matrix
+        *eye_rot = Mat(3, 3, CV_32F);
+        right.copyTo(eye_rot->col(0));
+        down.copyTo(eye_rot->col(1));
+        forward.copyTo(eye_rot->col(2));
+        *eye_rot = eye_rot->t(); // there is no scaling
+
+        Mat warpMat = cam_norm_ * (scaleMat * *eye_rot) * camera_matrix_.inv();// transformation matrix
+        // crop image and copy the equalized image
+        Mat eye_patch, eye_patch_equal;
+        warpPerspective(img_gray, eye_patch, warpMat, roiSize_norm_);
+        equalizeHist(eye_patch, eye_patch_equal);
+        eye_images.push_back(eye_patch_equal);
+
+    }
+    eye_rot = nullptr;
+    return eye_images;
+}
+
+void Normalizer::loadFaceModel(std::string path) {
+    string face_model_file_path = path + "/content/model/face_model.yml";
+    //
+    cout << endl << "Loading 3D face model for head pose estimation from : " << face_model_file_path << endl;
+    FileStorage fs;
+    if (!fs.open(face_model_file_path, FileStorage::READ)) {
+        cout << "Cannot load the 3D face model!" << endl;
+        exit(EXIT_FAILURE);
+    }
+    fs["face_model"] >> face_model_mat_;
+    for(int p=0; p<6; ++p)
+        face_model_.emplace_back(Point3d(face_model_mat_.at<float>(0,p),
+                                         face_model_mat_.at<float>(1,p),
+                                         face_model_mat_.at<float>(2,p)));
+    fs.release();
+}
+
+// estimate head pose via model fitting
+void Normalizer::estimateHeadPose(const Point2f *landmarks, opengaze::Sample &sample) {
+    Mat zero_dist = Mat::zeros(1, 5, CV_64F);
+    vector<Point2d> landmarks_orig(landmarks,
+                                   landmarks + 6);
+    cv::Mat head_r, head_t;
+    camera_matrix_.convertTo(camera_matrix_, CV_64F); // input must be double type
+    solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, false, SOLVEPNP_DLS);
+    solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, true);
+    head_r.convertTo(sample.face_patch_data.head_r, CV_32F);
+    head_t.convertTo(sample.face_patch_data.head_t, CV_32F);
+    camera_matrix_.convertTo(camera_matrix_, CV_32F);
+
+    // get the face center in 3D space
+    Mat HR;
+    cv::Rodrigues(sample.face_patch_data.head_r, HR);
+    Mat HT = repeat(sample.face_patch_data.head_t, 1, 6);
+    Mat Fc;
+    add(HR*face_model_mat_, HT, Fc);
+    Mat face_center = (Fc.col(0) + Fc.col(1) + Fc.col(2) + Fc.col(3) + Fc.col(4) + Fc.col(5)) / 6.0; // face center
+    face_center.copyTo(sample.face_patch_data.face_center); // copy to output
+    sample.face_patch_data.face_center.convertTo(sample.face_patch_data.face_center, CV_32F);
+
+    Mat le = 0.5*(Fc.col(2) + Fc.col(3)); // left eye
+    le.copyTo(sample.eye_data.leye_pos);
+    sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
+    Mat re = 0.5*(Fc.col(0) + Fc.col(1)); // right eye
+    re.copyTo(sample.eye_data.reye_pos);
+    sample.eye_data.reye_pos.convertTo(sample.eye_data.reye_pos, CV_32F);
+
+}
+
+void Normalizer::setCameraMatrix(cv::Mat input) {
+    camera_matrix_ = input;
+    camera_matrix_.convertTo(camera_matrix_, CV_32F);
+}
+
+}
--- a/src/opengaze.cpp
+++ b/src/opengaze.cpp
@ -0,0 +1,502 @@
+#include "opengaze.hpp"
+
+#include <iostream>
+#include <time.h>
+
+using namespace std;
+using namespace cv;
+
+namespace opengaze {
+
+double clockToMilliseconds(clock_t ticks){
+    // units/(units/time) => time (seconds) * 1000 = milliseconds
+    return (ticks/(double)CLOCKS_PER_SEC)*1000.0;
+}
+
+OpenGaze::OpenGaze(int argc, char** argv){
+    namespace fs = boost::filesystem;
+    namespace po = boost::program_options;
+
+    // default value of parameters
+    camera_id_ = 0;
+    input_type_ = InputHandler::InputType::Camera;
+    is_face_model_ = true;
+    string gaze_method;
+    string gpu_id;
+    string temp;
+    int number_user;
+    fs::path calib_camera, calib_screen, cnn_param_path, cnn_model_path;
+
+    // parse command line options for input/output paths
+    po::options_description command_line("Command line options");
+    command_line.add_options()
+            ("root_dir,r", po::value<string>(), "configuration file")
+            ("input_type,t", po::value<string>(), "input type (camera, video file, directory)")
+            ("gaze_method,g", po::value<string>(), "gaze estimation method, could be MPIIGaze or OpenFace")
+            ("input,i", po::value<string>(), "parameter for input")
+            ("output,o", po::value<string>(), "output directory")
+            ("calib_camera", po::value<string>(), "camera calibration file")
+            ("calib_screen", po::value<string>(), "camera-screen calibration file")
+            ("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
+            ("debug,d", "show debug output")
+            ("face_model,f", "to use face model or not")
+            ("save_video,s", "save output visualization or not")
+            ("number_user,n", "the maximum number of users in the input image")
+            ;
+
+    cout << "Parsing command line options..." << endl;
+    po::variables_map vm_command;
+    po::store(po::parse_command_line(argc, argv, command_line), vm_command);
+    po::notify(vm_command);
+
+    // parse config file for data paths
+    po::options_description config_file("Config file options");
+    config_file.add_options()
+            ("root_dir,r", po::value<string>(), "configuration file")
+            ("input_type, t", po::value<string>(), "input type (camera, video file, directory)")
+            ("input, i", po::value<string>(), "parameter for input")
+            ("output,o", po::value<string>(), "output directory")
+            ("cnn_param_path", po::value<string>(), "Caffe prototxt path")
+            ("cnn_model_path", po::value<string>(), "Caffe model path")
+            ("calib_camera", po::value<string>(), "camera calibration file")
+            ("calib_screen", po::value<string>(), "camera-screen calibration file")
+            ("gaze_method", po::value<string>(), "gaze estimation method, could be cnn or openface")
+            ("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
+            ("face_model", po::value<bool>(), "face model or not")
+            ("save_video", po::value<bool>(), "save output visualization or not")
+            ("number_user", po::value<string>(), "the maximum number of users in the input image")
+            ;
+
+    fs::path root_dir, config_path;
+
+    if(vm_command.count("root_dir")) root_dir = vm_command["root_dir"].as<string>();
+    else {
+        root_dir = OPENGAZE_CON_DIR;
+        cout << "No root directory is found, default value " << root_dir << " will be use" << endl;
+    }
+
+    config_path = root_dir / "default.cfg";
+    cout << "Reading config from \"" << config_path.string() << "\""<< endl;
+    if(!fs::exists(config_path)){
+        cout << "Config file does not exist" << endl;
+        exit(EXIT_FAILURE);
+    }
+    ifstream settings_file(config_path.string());
+    po::variables_map vm_config;
+    po::store(po::parse_config_file(settings_file , config_file), vm_config);
+    po::notify(vm_config);
+
+    if(vm_command.count("gpu_id")) gpu_id = vm_command["gpu_id"].as<string>();
+    else if (vm_config.count("gpu_id")) gpu_id = vm_config["gpu_id"].as<string>();
+    else gpu_id = "0";
+
+    // CNN paramters
+    if(vm_command.count("cnn_param_path")) cnn_param_path = vm_command["cnn_param_path"].as<string>();
+    else if (vm_config.count("cnn_param_path")) cnn_param_path = vm_config["cnn_param_path"].as<string>();
+    else cnn_param_path = root_dir / "content/caffeModel/alexnet_face.prototxt";
+
+    if(vm_command.count("cnn_model_path")) cnn_model_path = vm_command["cnn_model_path"].as<string>();
+    else if (vm_config.count("cnn_model_path")) cnn_model_path = vm_config["cnn_model_path"].as<string>();
+    else cnn_model_path = root_dir / "content/caffeModel/alexnet_face.caffemodel";
+
+    // check input requirements
+    if(vm_command.count("gaze_method")) gaze_method = vm_command["gaze_method"].as<string>();
+    else if (vm_config.count("gaze_method")) gaze_method = vm_config["gaze_method"].as<string>();
+    else gaze_method = "MPIIGaze";
+
+    if(vm_command.count("calib_screen")) calib_screen = vm_command["calib_screen"].as<string>();
+    else if (vm_config.count("calib_screen")) calib_screen = vm_config["calib_screen"].as<string>();
+    else calib_screen = root_dir / "content/calib/monitor_laptop.yml";
+
+    if(vm_command.count("calib_camera")) calib_camera = vm_command["calib_camera"].as<string>();
+    else if (vm_config.count("calib_camera")) calib_camera = vm_config["calib_camera"].as<string>();
+    else calib_camera = root_dir / "content/calib/calibration.yml";
+
+    // read calibration file
+    if(!fs::exists(calib_camera)){
+        cout << "Camera calibration file does not exist: " << calib_camera <<endl;
+        exit(EXIT_FAILURE);
+    }
+    else input_handler_.readCameraConfiguration(calib_camera.string());
+
+    if(!fs::exists(calib_screen)){
+        cout << "Camera-screen calibration file does not exist: " << calib_screen << endl;
+        exit(EXIT_FAILURE);
+    }
+    else input_handler_.readScreenConfiguration(calib_screen.string());
+
+    if(vm_command.count("input_type")) temp = vm_command["input_type"].as<string>();
+    else if (vm_config.count("input_type")) temp = vm_config["input_type"].as<string>();
+    else temp = "";
+    if (temp == "camera") {input_type_ = InputHandler::InputType::Camera;}
+    else if (temp == "video") {input_type_ = InputHandler::InputType::Video;}
+    else if (temp == "directory") {input_type_ = InputHandler::InputType::Directory;}
+    else cout<<"No input type specified, default value (camera) will be use" << endl;
+
+    if (vm_command.count("input")) temp = vm_command["input"].as<string>();
+    else if (vm_config.count("input")) temp = vm_config["input"].as<string>();
+    else temp = "0";
+
+    if (input_type_ == InputHandler::InputType::Camera) camera_id_ = stoi(temp);
+    else if (input_type_ == InputHandler::InputType::Video || input_type_ == InputHandler::InputType::Directory)  input_dir_ = temp;
+    else cout<<"No input parameter specified, default value will be use" << endl;
+
+    if(vm_command.count("face_model")) is_face_model_ = true;
+    else if(vm_config.count("face_model")) is_face_model_ = vm_config["face_model"].as<bool>();
+    else is_face_model_ = true;
+
+    if(vm_command.count("save_video")) is_save_video_ = true;
+    else if(vm_config.count("save_video")) is_save_video_ = vm_config["save_video"].as<bool>();
+    else is_save_video_ = false;
+
+    if(vm_command.count("debug")) show_debug_ = true;
+    else if(vm_config.count("debug")) show_debug_ = vm_config["debug"].as<bool>();
+    else show_debug_ = false;
+
+    if(vm_command.count("output")) output_dir_ = vm_command["output"].as<string>();
+    else if(vm_config.count("output")) output_dir_ = vm_config["output"].as<string>();
+    else {
+        if (input_type_ == InputHandler::InputType::Video) output_dir_ = input_dir_.parent_path();
+        else if (input_type_ == InputHandler::InputType::Directory) output_dir_ = input_dir_.parent_path();
+        else if (input_type_ == InputHandler::InputType::Camera)
+            output_dir_ = root_dir;
+    }
+
+    string face_detector_root_path;
+    if(vm_command.count("openface_path")) face_detector_root_path = vm_command["openface_path"].as<string>();
+    else if(vm_config.count("openface_path")) face_detector_root_path = vm_config["openface_path"].as<string>();
+    else cout<< "No face detector root specified, default detector will be use" << endl;
+
+    if(vm_command.count("per_model_save_path")) per_model_save_path_ = vm_command["per_model_save_path"].as<string>();
+    else if (vm_config.count("per_model_save_path")) per_model_save_path_ = vm_config["per_model_save_path"].as<string>();
+    else per_model_save_path_ = root_dir.string() + "/content/calib/user0.txt";
+
+    if(vm_command.count("number_user")) temp = vm_command["number_user"].as<string>();
+    else if (vm_config.count("number_user")) temp = vm_config["number_user"].as<string>();
+    else temp = "5";
+    number_user = stoi(temp);
+
+    // initial class instance
+    if (input_type_ == InputHandler::InputType::Camera){ // Camera as input
+        input_handler_.setInputType(InputHandler::InputType::Camera);// set input type
+        input_handler_.setInput(camera_id_); // set Camera id
+    }
+    else if (input_type_ == InputHandler::InputType::Video) {
+        input_handler_.setInputType(InputHandler::InputType::Video);// set input type
+        input_handler_.setInput(input_dir_.string()); // set camera file
+    }
+    else if (input_type_ == InputHandler::InputType::Directory){
+        input_handler_.setInputType(InputHandler::InputType::Directory);
+    }
+    // initialize other classes
+    gaze_estimator_.setCameraParameters(input_handler_.camera_matrix_, input_handler_.camera_distortion_);
+    gaze_estimator_.setRootPath(root_dir.string());
+    gaze_estimator_.initialFaceDetector(number_user);
+
+    vector<std::string> arguments;
+    if (gaze_method == "MPIIGaze") {
+        arguments.push_back(cnn_param_path.string());
+        arguments.push_back(cnn_model_path.string());
+        if (is_face_model_)
+            arguments.emplace_back("face");
+        else
+            arguments.emplace_back("eye");
+        arguments.push_back(gpu_id);
+        gaze_estimator_.setMethod(GazeEstimator::Method::MPIIGaze, arguments);
+    }
+    else if (gaze_method == "OpenFace"){
+        //gaze_estimator_.setMethod(GazeEstimator::Method::OpenFace, arguments);
+        cout << "OpenFace gaze estimation is current not support" << endl;
+        exit(EXIT_FAILURE);
+    }
+    else {
+        cout << "The method setting is not right! Options are MPIIGaze or OpenFace!" << endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+OpenGaze::~OpenGaze() {
+    input_handler_.closeInput();
+}
+
+// do gaze estimation with camera as input
+void OpenGaze::runGazeVisualization() {
+    input_handler_.initialize();
+
+    namedWindow("Gaze");
+    int key;
+    Mat input_image;
+    vector<Sample> output;
+
+    cv::VideoWriter m_writer;
+    if (is_save_video_){
+            boost::filesystem::path save_video_file;
+        save_video_file = output_dir_ / (input_dir_.stem().string() + "_gaze_video.avi");
+        m_writer.open(save_video_file.string(), CV_FOURCC('M','J','P','G'), 25,
+                      Size(input_handler_.getFrameWidth(),input_handler_.getFrameHeight()), true);
+        cout << "Saving video to " << save_video_file << endl;
+    }
+
+    // construct saving file
+    ofstream output_stream;
+    boost::filesystem::path output_file_name = output_dir_ / (input_dir_.stem().string() + "_gaze_output.txt");
+    output_stream.open(output_file_name.string());
+    cout << "Created output file: " << output_file_name.string() << endl;
+
+    // for fps calculation
+    double fps_tracker = -1.0;
+    double t_start = 0;
+    double t_end = 0;
+
+    unsigned int frame_count = 0;
+
+    while(true){// loop all the sample or read frame from Video
+        frame_count++;
+        t_start = t_end;
+        output.clear();
+
+        input_image = input_handler_.getNextSample();// get input image
+        if(input_handler_.isReachEnd()){ // check if all sample are processed
+            cout<<"Processed all the samples."<<endl;
+            break;
+        }
+        
+        Mat undist_img;
+        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
+        gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
+        input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
+
+        // get the fps values
+        t_end = cv::getTickCount();
+        fps_tracker = 1.0 / (double(t_end - t_start) / cv::getTickFrequency());
+
+
+        // save output
+        for(auto & sample : output) {
+            output_stream << frame_count << ",";
+            output_stream << sample.face_data.face_id << ",";
+            output_stream << sample.face_data.certainty << ",";
+            output_stream << sample.face_patch_data.face_center.at<float>(0) << ",";
+            output_stream << sample.face_patch_data.face_center.at<float>(1) << ",";
+            output_stream << sample.face_patch_data.face_center.at<float>(2) << ",";
+            output_stream << sample.gaze_data.gaze2d.x << ",";
+            output_stream << sample.gaze_data.gaze2d.y << ",";
+            output_stream << sample.eye_data.leye_pos.at<float>(0) << ",";
+            output_stream << sample.eye_data.leye_pos.at<float>(1) << ",";
+            output_stream << sample.eye_data.leye_pos.at<float>(2) << ",";
+            output_stream << sample.eye_data.reye_pos.at<float>(0) << ",";
+            output_stream << sample.eye_data.reye_pos.at<float>(1) << ",";
+            output_stream << sample.eye_data.reye_pos.at<float>(2) << endl;
+        }
+
+        if (is_save_video_ || show_debug_) {
+            //////// visualization //////////////////////////////////////////////////
+            // draw results
+            for(const auto & sample : output){
+                //drawLandmarks(sample, undist_img); // draw face landmarks
+                drawGazeOnFace(sample, undist_img); // draw gaze ray on face image
+                //drawGazeOnSimScreen(sample, undist_img); // draw screen target
+            }
+            
+            if (show_debug_) {
+                // show fps
+                char fpsC[255];
+                std::sprintf(fpsC, "%02f", fps_tracker);
+                string fpsSt("FPS: ");
+                fpsSt += fpsC;
+                cv::putText(undist_img, fpsSt, cv::Point(100, 100), CV_FONT_HERSHEY_SIMPLEX, 1, CV_RGB(255, 0, 0), 2);
+                // show the image
+                imshow("Gaze", undist_img);
+                key = cv::waitKey(1);
+                if (key==27) exit(EXIT_SUCCESS); // press ESC to exit
+            }
+
+            if (is_save_video_) {
+                if (is_save_video_)
+                    m_writer << undist_img;
+            }
+        }
+
+    }
+    if (is_save_video_)
+        m_writer.release();
+}
+
+void OpenGaze::runDataExtraction() {
+    assert(input_handler_.getInputType() == InputHandler::InputType::Directory);// Here we just accept the directory folder
+    input_handler_.initialize();
+
+    vector<Sample> output;
+    Mat input_image;
+
+    while(true){// loop all the sample or read frame from Video
+        output.clear();
+
+        input_image = input_handler_.getNextSample();// get input image
+        if(input_handler_.isReachEnd()){ // check if all sample are processed
+            cout << "Processed all the samples." << endl;
+            break;
+        }
+        
+        Mat undist_img;
+        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
+        gaze_estimator_.getImagePatch(undist_img, output); // extract the face image
+
+        // save the output
+        for (int i=0; i<output.size(); ++i) {
+            string save_file_name  = output_dir_.stem().string() + "/img_" + input_handler_.getFileName() + "_" +to_string(i)+".jpg";
+            cv::imwrite(save_file_name, output[i].face_patch_data.face_patch);
+        }
+    }
+}
+
+void OpenGaze::runGazeOnScreen() {
+    input_handler_.initialize();
+
+    int key;
+    Mat input_image, undist_img, show_img;
+    vector<Sample> output;
+
+    cv::namedWindow("screen", CV_WINDOW_NORMAL);
+    cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
+
+    show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
+
+    while(true){// loop all the sample or read frame from Video
+        output.clear();
+
+        if(input_handler_.isReachEnd()){ // check if all sample are processed
+            cout<<"Processed all the samples."<<endl;
+            break;
+        }
+        input_image = input_handler_.getNextSample();// get input image
+        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
+        gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
+        input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);
+
+        // save output
+        for(auto & sample : output) {
+            int loc_x = (int)(sample.gaze_data.gaze2d.x * input_handler_.getScreenWidth());
+            int loc_y = (int)(sample.gaze_data.gaze2d.y * input_handler_.getScreenHeight());
+            circle(show_img, cv::Point(loc_x, loc_y), 10, CV_RGB(255,255,255), -1);
+        }
+
+        imshow("screen", show_img);
+        cv::Mat save_img;
+        cv::resize(show_img, save_img, cv::Size(1280, 720));
+        key = cv::waitKey(1);
+        show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
+        if (key==27) break; // press ESC to exit
+    }
+    cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
+    cv::destroyWindow("screen");
+}
+
+void OpenGaze::runPersonalCalibration(int num_calibration_point) {
+    if (input_handler_.getInputType() != InputHandler::InputType::Camera){ // personal calibration has to be done with camera
+        cout << "Error: the input type must be camera for personal calibration!" << endl;
+        exit(EXIT_FAILURE);
+    }
+
+    Mat input_image, undist_img;
+    input_handler_.initialize();
+
+    PersonalCalibrator m_calibrator(input_handler_.getScreenWidth(), input_handler_.getScreenHeight());
+    m_calibrator.generatePoints(num_calibration_point);
+    m_calibrator.initialWindow(); // show start windows
+
+    vector<cv::Point2f> pred, gt; // prediction and ground-truth
+    for (int i=0; i<num_calibration_point; ++i){
+        if (m_calibrator.showNextPoint()) {// wait for clicking
+            vector<Sample> output;
+            input_image = input_handler_.getNextSample(); // get the sample when user clicking
+            undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
+            gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
+            input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);// convert to 2D projection
+            m_calibrator.confirmClicking(); // give feedback to user that they successfully did calibration
+            pred.emplace_back(output[0].gaze_data.gaze2d);
+            gt.emplace_back(cv::Point2f((m_calibrator.getCurrentPoint().x/(float)input_handler_.getScreenWidth()),
+                                      (m_calibrator.getCurrentPoint().y/(float)input_handler_.getScreenHeight())));
+        }
+        else
+            break; // if user press ESC button, we break
+
+    }
+    if (pred.size() > 0){
+        m_calibrator.generateModel(pred, gt, 1); // get the mapping model
+        string per_model_save_path_ = output_dir_.stem().string() + "/personal_gaze_model.yml";
+        m_calibrator.saveModel(per_model_save_path_);
+    }
+}
+
+void OpenGaze::drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image) {
+    static const int dW = 640;
+    static const int dH = 360;
+    Mat debug_disp = Mat::zeros(Size(dW, dH), CV_8UC3);
+
+    Point2f g_s;
+    g_s.x = dW*sample.gaze_data.gaze2d.x;
+    g_s.y = dH*sample.gaze_data.gaze2d.y;
+
+    circle(debug_disp, g_s, 10, CV_RGB(255,0,0), -1);
+
+    debug_disp.copyTo(image(Rect(0, 0, dW, dH)));
+}
+
+void OpenGaze::drawGazeOnFace(opengaze::Sample sample, cv::Mat &image) {
+    // draw gaze on the face
+    if (gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
+        && gaze_estimator_.input_type_ == GazeEstimator::InputType::face) {
+        static const float gaze_length = 300.0;
+        Mat zero = Mat::zeros(1, 3, CV_32F);
+        Mat rvec, tvec;
+        sample.face_patch_data.head_r.convertTo(rvec, CV_32F);
+        sample.face_patch_data.head_t.convertTo(tvec, CV_32F);
+
+        vector<Point3f> cam_points;
+        Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
+        cam_points.emplace_back(face_center);
+        cam_points.emplace_back(face_center + gaze_length * sample.gaze_data.gaze3d);
+
+        vector<Point2f> img_points;
+        projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
+
+        line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); // gaze ray
+        circle(image, img_points[0], 5, CV_RGB(255,0,0), -1); // staring point
+        circle(image, img_points[1], 5, CV_RGB(255,0,0), -1); // end point
+    }
+    else if ((gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
+              && gaze_estimator_.input_type_ == GazeEstimator::InputType::eye)
+             || gaze_estimator_.method_type_ == GazeEstimator::Method::OpenFace) {
+        int gaze_length = 300;
+        Mat zero = Mat::zeros(1, 3, CV_32F);
+        vector<Point3f> cam_points;
+        sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
+        Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
+        cam_points.emplace_back(leye_pose);
+        cam_points.emplace_back(leye_pose + gaze_length*sample.gaze_data.lgaze3d);
+        Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
+        cam_points.emplace_back(reye_pose);
+        cam_points.emplace_back(reye_pose + gaze_length*sample.gaze_data.rgaze3d);
+
+        vector<Point2f> img_points;
+        projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);
+
+        line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5);
+        line(image, img_points[2], img_points[3], CV_RGB(255,0,0), 5);
+        circle(image, img_points[1], 3, CV_RGB(255,0,0), -1);
+        circle(image, img_points[3], 3, CV_RGB(255,0,0), -1);
+    }
+}
+
+void OpenGaze::drawLandmarks(opengaze::Sample sample, cv::Mat &image) {
+    cv::Rect_<int> face_bb = sample.face_data.face_bb;
+    rectangle(image, cv::Point(face_bb.x, face_bb.y),
+              cv::Point(face_bb.x+face_bb.width,face_bb.y+face_bb.height), CV_RGB(0,255,0), 5);
+    for(int p=0; p<6; ++p)
+        circle(image, sample.face_data.landmarks[p], 5, CV_RGB(0,255,0), -1);
+}
+
+
+
+}
--- a/src/personal_calibrator.cpp
+++ b/src/personal_calibrator.cpp
@ -0,0 +1,145 @@
+
+#include "personal_calibrator.hpp"
+
+using namespace cv;
+using namespace std;
+
+void CallBackFunc(int event, int x, int y, int flags, void* is_click)  {
+    if (event == EVENT_LBUTTONDOWN){
+        bool* temp = (bool*)is_click;
+        *temp = true;
+    }
+}
+
+PersonalCalibrator::PersonalCalibrator (int screen_width, int screen_height) {
+    cv::namedWindow("calibration", CV_WINDOW_NORMAL);
+    cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
+    // set the mouse
+    is_click_ = false;
+    //set the callback function for any mouse event
+    setMouseCallback("calibration", CallBackFunc, &is_click_); // wait for clicking
+
+    screen_width_ = screen_width;
+    screen_height_ = screen_height;
+
+    center_radius_ = (int)((float)screen_width_ / 200.0f);
+}
+
+PersonalCalibrator::~PersonalCalibrator() {
+    cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
+    cv::destroyWindow("calibration");
+}
+
+void PersonalCalibrator::generatePoints(int num_points) {
+    index_point_ = -1;
+    srand(time(NULL));
+    Point2i current_point;
+
+    for (int num = 0; num < num_points; ++num) {
+        current_point.x = (rand() % screen_width_); // range is [0, 1]
+        current_point.y = (rand() % screen_height_); // range is [0, 1]
+        points_.emplace_back(current_point);
+    }
+}
+
+void PersonalCalibrator::initialWindow() {
+    // get the focus of the window
+    namedWindow("GetFocus", CV_WINDOW_NORMAL);
+    cv::Mat img = cv::Mat::zeros(100, 100, CV_8UC3);
+    cv::imshow("GetFocus", img);
+    cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
+    waitKey(1);
+    cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
+    cv::destroyWindow("GetFocus");
+
+    // show instruction
+    cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
+    string show_text = "Please click/touch when looking at the dots";
+    cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(255,255,255), 2);
+    imshow("calibration", show_img);
+    cv::waitKey(3000);
+    for (int i=255; i > 0; i-=5) { //Gradient disappear, nice!
+        show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
+        cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(i,i,i), 2);
+        imshow("calibration", show_img);
+        cv::waitKey(1);
+    }
+}
+
+bool PersonalCalibrator::showNextPoint() {
+    cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
+    index_point_ ++;
+    cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(255, 255, 255), -1);
+    is_click_ = false;
+
+    while (true) {
+        imshow("calibration", show_img);
+        int key = cv::waitKey(10); // wait for interaction
+        if (key == 27) // if press the ESC key
+            return false;
+        if (is_click_) {
+            break;
+        }
+    }
+    return true;
+}
+
+void PersonalCalibrator::confirmClicking() {
+    cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3);
+    cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(0, 200, 0), -1);
+    imshow("calibration", show_img);
+    cv::waitKey(500);
+}
+
+// this polyfit function is copied from  opencv/modules/contrib/src/polyfit.cpp
+// This original code was written by
+//  Onkar Raut
+//  Graduate Student,
+//  University of North Carolina at Charlotte
+cv::Mat polyfit(const Mat& src_x, const Mat& src_y, int order)
+{
+    CV_Assert((src_x.rows>0)&&(src_y.rows>0)&&(src_x.cols>0)&&(src_y.cols>0)&&(order>=1));
+    Mat matrix;
+    Mat bias = Mat::ones((int)src_x.rows, 1, CV_32FC1);
+
+    Mat input_x = Mat::zeros(src_x.rows, order*src_x.cols, CV_32FC1);
+
+    Mat copy;
+    for(int i=1; i<=order;i++){
+        copy = src_x.clone();
+        pow(copy,i,copy);
+        copy.copyTo(input_x(Rect((i-1)*src_x.cols, 0, copy.cols, copy.rows)));
+    }
+
+    Mat new_mat;
+    cv::hconcat(input_x, bias, new_mat);
+    cout << "new_mat: " << new_mat << endl;
+    cv::solve(new_mat, src_y, matrix, DECOMP_NORMAL);
+
+    cout << "model_matrix: " << matrix << endl;
+    Mat calibrated = new_mat * matrix;
+    cout << "calibrated: " << calibrated << endl;
+    double dist_original = norm(src_x, src_y, NORM_L2);
+    cout << "dist_original: " << dist_original << endl;
+    double dist_calibrated = norm(calibrated, src_y, NORM_L2);
+    cout << "dist_calibrated: " << dist_calibrated << endl;
+
+    return matrix;
+
+}
+
+void PersonalCalibrator::generateModel(vector<Point2f> prediction, vector<Point2f> ground_truth, int order) {
+
+    cv::Mat input_x = cv::Mat((int)prediction.size(), 2, CV_32FC1, prediction.data());
+    cv::Mat input_y = cv::Mat((int)ground_truth.size(), 2, CV_32FC1, ground_truth.data());
+    cout << "input_x: " << input_x << endl;
+    cout << "input_y: " << input_y << endl;
+    cv::Mat model_matrix;
+    model_matrix_ = polyfit(input_x, input_y, order);
+}
+
+void PersonalCalibrator::saveModel(std::string file_path) {
+    cv::FileStorage storage(file_path, cv::FileStorage::WRITE);
+    storage << model_matrix_;
+    storage.release();  
+}