commit e505acdb29dcdb1a5281e589007cc725c790b9e8 Author: Xucong Zhang Date: Thu Jan 10 13:26:03 2019 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..07c5b37 --- /dev/null +++ b/.gitignore @@ -0,0 +1,32 @@ +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +### CMake ### +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..34e8c3e --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,90 @@ +cmake_minimum_required(VERSION 3.0) +project(OpenGaze VERSION 0.1) +set(CMAKE_BUILD_TYPE Release) + +# create a directory for models and configuration files +set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze") +add_definitions(-DOPENGAZE_CON_DIR="${OPENGAZE_DIR}") + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib/) +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_STANDARD 11) + +# OpenCV +find_package( OpenCV 3.4 REQUIRED COMPONENTS core imgproc calib3d highgui objdetect) + +# Boost, for reading configuration file +find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED) +set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost) + +# Caffe +set(CAFFE_INSTALL_DIR "/home/xucong/library/caffe/build/install") +set(Caffe_INCLUDE_DIRS ${CAFFE_INSTALL_DIR}/include) +set(Caffe_LIBRARY_DIRS ${CAFFE_INSTALL_DIR}/lib) +set(Caffe_LIBS lmdb glog caffe) + +# Face and facial landmark detection methods +option(USE_OPENFACE "with OpenFace" ON) +add_definitions(-DUSE_OPENFACE=1) +# OpenFace +set(OPENFACE_ROOT_DIR "/home/xucong/library/OpenFace") +add_definitions(-DOPENFACE_DIR="${OPENFACE_ROOT_DIR}") +set(CLM_INCLUDE_DIRS ${OPENFACE_ROOT_DIR}/lib/local/LandmarkDetector/include) +set(CLM_LIBRARY_DIRS ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector) +set(CLM_LIBS LandmarkDetector tbb openblas dlib) +set(USE_OPENFACE ON) # we use OpenFace method here + +# suppress auto_ptr deprecation warnings +if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + add_compile_options("-Wno-deprecated-declarations") +endif() + +include_directories(./ ./include /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS} ${CLM_INCLUDE_DIRS} ${Caffe_INCLUDE_DIRS}) +link_directories(./ ./build/lib /usr/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${CLM_LIBRARY_DIRS} ${Caffe_LIBRARY_DIRS}) + +file(GLOB SOURCE "./src/*.cpp") +file(GLOB HEADERS "./include/*.hpp") + +# compile opengaze library +add_library(opengaze SHARED ${SOURCE} ${HEADERS}) +set_target_properties(opengaze PROPERTIES VERSION ${PROJECT_VERSION}) + + +#if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) +# set (CMAKE_INSTALL_PREFIX "/usr/local" CACHE PATH "default install path" FORCE ) +#endif() + +install (TARGETS opengaze EXPORT OpenGazeTargets LIBRARY DESTINATION lib ARCHIVE DESTINATION lib) +install (FILES ${HEADERS} DESTINATION include/opengaze) + +# install caffe and OpenFace +install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/3rdParty" DIRECTORY_PERMISSIONS + OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_WRITE GROUP_READ GROUP_EXECUTE + WORLD_WRITE WORLD_READ WORLD_EXECUTE) +install (FILES ${OPENFACE_ROOT_DIR}/build/lib/local/LandmarkDetector/libLandmarkDetector.a DESTINATION ${OPENGAZE_DIR}/3rdParty) +install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so DESTINATION ${OPENGAZE_DIR}/3rdParty) +install (FILES ${Caffe_LIBRARY_DIRS}/libcaffe.so.1.0.0 DESTINATION ${OPENGAZE_DIR}/3rdParty) + +# install configuration files +install (DIRECTORY DESTINATION "${OPENGAZE_DIR}" DIRECTORY_PERMISSIONS + OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_WRITE GROUP_READ GROUP_EXECUTE + WORLD_WRITE WORLD_READ WORLD_EXECUTE) +install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content" DIRECTORY_PERMISSIONS + OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_WRITE GROUP_READ GROUP_EXECUTE + WORLD_WRITE WORLD_READ WORLD_EXECUTE) +install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/calib" DIRECTORY_PERMISSIONS + OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_WRITE GROUP_READ GROUP_EXECUTE + WORLD_WRITE WORLD_READ WORLD_EXECUTE) +install (DIRECTORY DESTINATION "${OPENGAZE_DIR}/content/model" DIRECTORY_PERMISSIONS + OWNER_WRITE OWNER_READ OWNER_EXECUTE + GROUP_WRITE GROUP_READ GROUP_EXECUTE + WORLD_WRITE WORLD_READ WORLD_EXECUTE) +install (FILES ./content/calib/calibration.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE) +install (FILES ./content/calib/monitor_laptop.yml DESTINATION ${OPENGAZE_DIR}/content/calib PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE) +install (FILES ./content/model/face_model.yml DESTINATION ${OPENGAZE_DIR}/content/model PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE) +install (FILES default.cfg DESTINATION ${OPENGAZE_DIR} PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ GROUP_WRITE WORLD_READ WORLD_WRITE WORLD_EXECUTE) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..bf7bd20 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# OpenGaze: Open Source Toolkit for Camera-Based Gaze Estimation and Interaction + + + + +Appearance-based gaze estimation methods that only require an off-the-shelf camera have significantly improved and promise a wide range of new applications in gaze-based interaction and attentive user interfaces. However, these methods are not yet widely used in the human-computer interaction (HCI) community. + +To democratize their use in HCI, we present OpenGaze, the first software toolkit that is specifically developed for gaze interface designers. OpenGaze is open source and aims to implement state-of-the-art methods for camera-based gaze estimation and interaction. + + + +## Functionality + +The toolkit is capable of performing the following gaze-related tasks: + +* **Gaze Estimation** +Show estimated gaze on the screen given screen-camera relationship. + +[![Demo](https://img.youtube.com/vi/R1vb7mV3y_M/0.jpg)](https://youtu.be/R1vb7mV3y_M "Gaze visualization demo") +

 

+ +* **Gaze Visualization** +Show gaze direction inital from the center of faces in the input image. + +[![Demo](https://img.youtube.com/vi/8yMTvvr0rRU/0.jpg)](https://youtu.be/8yMTvvr0rRU "Gaze visualization demo") +

 

+ +* **Personal Calibration** +Perform personal calibration and remapped the gaze target on the screen. + +[![Demo](https://img.youtube.com/vi/ntBv1wcNGAo/0.jpg)](https://youtu.be/ntBv1wcNGAo "Gaze visualization demo") +

 

+ +## Installation +[Unix Installation](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Unix-Installation) + +## Use +[Command line arguments](https://github.molgen.mpg.de/perceptual/opengaze/wiki/Command-line-arguments) + +## Citation +If you use any of the resources provided on this page in any of your publications, please cite the following paper: + +**Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications?**
+Xucong Zhang, Yusuke Sugano, Andreas Bulling
+Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019
+ +BibTex, PDF + +## License + +The license agreement can be found in Copyright.txt + +You have to respect boost, OpenFace and OpenCV licenses. + +Furthermore, you have to respect the licenses of the datasets used for [model training](:https://github.molgen.mpg.de/perceptual/opengaze/wiki/Model-training). diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..40a874d --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,3 @@ +# Release 0.1.0 + +Initial release of OpenGaze. \ No newline at end of file diff --git a/caffe-layers/include/caffe/layers/dspp_layer.hpp b/caffe-layers/include/caffe/layers/dspp_layer.hpp new file mode 100644 index 0000000..f1e78ba --- /dev/null +++ b/caffe-layers/include/caffe/layers/dspp_layer.hpp @@ -0,0 +1,50 @@ +#ifndef CAFFE_DSPP_LAYER_HPP_ +#define CAFFE_DSPP_LAYER_HPP_ + +#include +#include +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/layers/data_layer.hpp" +#include "caffe/layer.hpp" +#include "caffe/layers/loss_layer.hpp" +#include "caffe/layers/neuron_layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +template +class DSPPLayer : public Layer { + public: + explicit DSPPLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + virtual inline const char* type() const { return "DSPPLayer"; } + virtual inline int ExactNumBottomBlobs() const { return 2; }; + virtual inline int MinTopBlobs() const { return 1; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + //virtual void Forward_gpu(const vector*>& bottom, + // const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + //virtual void Backward_gpu(const vector*>& top, + // const vector& propagate_down, const vector*>& bottom); + + int width_; + int height_; + int channel_; + int num_; + +}; + +} // namespace caffe + +#endif // CAFFE_DSPP_LAYER_HPP_ \ No newline at end of file diff --git a/caffe-layers/include/caffe/layers/pose_data_layer.hpp b/caffe-layers/include/caffe/layers/pose_data_layer.hpp new file mode 100644 index 0000000..aefd2be --- /dev/null +++ b/caffe-layers/include/caffe/layers/pose_data_layer.hpp @@ -0,0 +1,56 @@ +#ifndef CAFFE_POSE_DATA_LAYER_HPP_ +#define CAFFE_POSE_DATA_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/base_data_layer.hpp" + +namespace caffe { + +template +class PoseDataLayer : public BaseDataLayer { + public: + explicit PoseDataLayer(const LayerParameter& param) + : BaseDataLayer(param), has_new_data_(false) {} + virtual void DataLayerSetUp(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "PoseData"; } + virtual inline int ExactNumBottomBlobs() const { return 0; } + virtual inline int ExactNumTopBlobs() const { return 2; } + + virtual void AddDatumVector(const vector& datum_vector); + virtual void AddMatVector(const vector& mat_vector, + const vector& labels); + + // Reset should accept const pointers, but can't, because the memory + // will be given to Blob, which is mutable + void Reset(Dtype* data, Dtype* label, int n); + void set_batch_size(int new_size); + + int batch_size() { return batch_size_; } + int channels() { return channels_; } + int height() { return height_; } + int width() { return width_; } + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + + int batch_size_, channels_, height_, width_, size_; + Dtype* data_; + Dtype* labels_; + int n_; + size_t pos_; + Blob added_data_; + Blob added_label_; + bool has_new_data_; +}; + +} // namespace caffe + +#endif diff --git a/caffe-layers/src/caffe/layers/dspp_layer.cpp b/caffe-layers/src/caffe/layers/dspp_layer.cpp new file mode 100644 index 0000000..a8f3072 --- /dev/null +++ b/caffe-layers/src/caffe/layers/dspp_layer.cpp @@ -0,0 +1,92 @@ +#include +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/layers/dspp_layer.hpp" + +#include +#include + + +namespace caffe { + template + void DSPPLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + } + + template + void DSPPLayer::Reshape(const vector*>& bottom, const vector*>& top) { + + num_ = bottom[1]->shape()[0]; + channel_ = bottom[1]->shape()[1]; // the input data size + height_ = bottom[1]->shape()[2]; + width_ = bottom[1]->shape()[3]; + + // init output size + vector output_shape; + output_shape.push_back(num_); + output_shape.push_back(channel_); + output_shape.push_back(height_); + output_shape.push_back(width_); + top[0]->Reshape(output_shape); + } + + template + void DSPPLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + Dtype* top_data = top[0]->mutable_cpu_data(); + + caffe_set(top[0]->count(), 0, top_data); // initilize to be 0 + + for (int n=0; noffset(n, c, h, w)] = bottom[1]->data_at(n, c, h, w) * bottom[0]->data_at(n, 0, h, w); + } + } + } + } + top_data = NULL; + } + + template + void DSPPLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, + const vector*>& bottom) { + if (propagate_down[0]) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* data_diff = bottom[1]->mutable_cpu_diff(); + Dtype* heat_map_diff = bottom[0]->mutable_cpu_diff(); + + caffe_set(bottom[1]->count(), 0, data_diff); + caffe_set(bottom[0]->count(), 0, heat_map_diff); + // Dtype activation_h, activation_w; + + for (int n = 0; n < num_; ++n) { + for (int h = 0; h < height_; ++h) { + for (int w = 0; w < width_; ++w) { + for (int c = 0; c < channel_; ++c) { + + Dtype buffer = top_diff[top[0]->offset(n, c, h, w)]; + data_diff[bottom[1]->offset(n, c, h, w)] = buffer * (bottom[0]->data_at(n, 0, h, w)); + + buffer *= bottom[1]->data_at(n,c,h,w) / channel_; + + heat_map_diff[bottom[0]->offset(n,0,h,w)] += buffer; + } + } + } + } + top_diff = NULL; + data_diff = NULL; + heat_map_diff = NULL; + + } + } + +INSTANTIATE_CLASS(DSPPLayer); +REGISTER_LAYER_CLASS(DSPP); + +} // namespace caffe diff --git a/caffe-layers/src/caffe/layers/pose_data_layer.cpp b/caffe-layers/src/caffe/layers/pose_data_layer.cpp new file mode 100644 index 0000000..a3a154d --- /dev/null +++ b/caffe-layers/src/caffe/layers/pose_data_layer.cpp @@ -0,0 +1,128 @@ +#include + +#include + +#include "caffe/layers/pose_data_layer.hpp" + +namespace caffe { + +template +void PoseDataLayer::DataLayerSetUp(const vector*>& bottom, + const vector*>& top) { + batch_size_ = this->layer_param_.memory_data_param().batch_size(); + channels_ = this->layer_param_.memory_data_param().channels(); + height_ = this->layer_param_.memory_data_param().height(); + width_ = this->layer_param_.memory_data_param().width(); + size_ = channels_ * height_ * width_; + CHECK_GT(batch_size_ * size_, 0) << + "batch_size, channels, height, and width must be specified and" + " positive in memory_data_param"; + int label_shape_[] = {batch_size_, 4}; + vector label_shape(label_shape_, label_shape_+2); + top[0]->Reshape(batch_size_, channels_, height_, width_); + top[1]->Reshape(label_shape); + added_data_.Reshape(batch_size_, channels_, height_, width_); + added_label_.Reshape(label_shape); + data_ = NULL; + labels_ = NULL; + added_data_.cpu_data(); + added_label_.cpu_data(); +} + +template +void PoseDataLayer::AddDatumVector(const vector& datum_vector) { + CHECK(!has_new_data_) << + "Can't add data until current data has been consumed."; + size_t num = datum_vector.size(); + CHECK_GT(num, 0) << "There is no datum to add."; + CHECK_EQ(num % batch_size_, 0) << + "The added data must be a multiple of the batch size."; + added_data_.Reshape(num, channels_, height_, width_); + int label_shape_[] = {(int)num, 4}; + vector label_shape(label_shape_, label_shape_+2); + added_label_.Reshape(label_shape); + // Apply data transformations (mirror, scale, crop...) + this->data_transformer_->Transform(datum_vector, &added_data_); + // Copy Labels + Dtype* top_label = added_label_.mutable_cpu_data(); + for (int item_id = 0; item_id < num; ++item_id) { + top_label[item_id] = datum_vector[item_id].label(); + } + // num_images == batch_size_ + Dtype* top_data = added_data_.mutable_cpu_data(); + Reset(top_data, top_label, num); + has_new_data_ = true; +} + +template +void PoseDataLayer::AddMatVector(const vector& mat_vector, + const vector& labels) { + size_t num = mat_vector.size(); + CHECK(!has_new_data_) << + "Can't add mat until current data has been consumed."; + CHECK_GT(num, 0) << "There is no mat to add"; + CHECK_EQ(num % batch_size_, 0) << + "The added data must be a multiple of the batch size."; + added_data_.Reshape(num, channels_, height_, width_); + int label_shape_[] = {(int)num, 4}; + vector label_shape(label_shape_, label_shape_+2); + added_label_.Reshape(label_shape); + // Apply data transformations (mirror, scale, crop...) + this->data_transformer_->Transform(mat_vector, &added_data_); + // Copy Labels + Dtype* top_label = added_label_.mutable_cpu_data(); + for (int item_id = 0; item_id < num; ++item_id) { + top_label[item_id] = labels[item_id]; + } + // num_images == batch_size_ + Dtype* top_data = added_data_.mutable_cpu_data(); + Reset(top_data, top_label, num); + has_new_data_ = true; +} + +template +void PoseDataLayer::Reset(Dtype* data, Dtype* labels, int n) { + CHECK(data); + CHECK(labels); + CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size"; + // Warn with transformation parameters since a memory array is meant to + // be generic and no transformations are done with Reset(). + //if (this->layer_param_.has_transform_param()) { + // LOG(WARNING) << this->type() << " does not transform array data on Reset()"; + //} + data_ = data; + labels_ = labels; + n_ = n; + pos_ = 0; +} + +template +void PoseDataLayer::set_batch_size(int new_size) { + CHECK(!has_new_data_) << + "Can't change batch_size until current data has been consumed."; + batch_size_ = new_size; + added_data_.Reshape(batch_size_, channels_, height_, width_); + int label_shape_[] = {(int)batch_size_, 4}; + vector label_shape(label_shape_, label_shape_+2); + added_label_.Reshape(label_shape); +} + +template +void PoseDataLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + CHECK(data_) << "PoseDataLayer needs to be initalized by calling Reset"; + top[0]->Reshape(batch_size_, channels_, height_, width_); + int label_shape_[] = {(int)batch_size_, 4}; + vector label_shape(label_shape_, label_shape_+2); + added_label_.Reshape(label_shape); + top[0]->set_cpu_data(data_ + pos_ * size_); + top[1]->set_cpu_data(labels_ + pos_); + pos_ = (pos_ + batch_size_) % n_; + if (pos_ == 0) + has_new_data_ = false; +} + +INSTANTIATE_CLASS(PoseDataLayer); +REGISTER_LAYER_CLASS(PoseData); + +} // namespace caffe diff --git a/content/calib/calibration.yml b/content/calib/calibration.yml new file mode 100755 index 0000000..158c752 --- /dev/null +++ b/content/calib/calibration.yml @@ -0,0 +1,11 @@ +%YAML:1.0 +camera_matrix: !!opencv-matrix + rows: 3 + cols: 3 + dt: f + data: [ 1891.07, 0.0, 640, 0.0, 1891.07, 360, 0.0, 0.0, 1.0] +dist_coeffs: !!opencv-matrix + rows: 1 + cols: 5 + dt: f + data: [1.68091e-02, -7.14552e-02, -5.65886e-03, -5.23482e-04, -3.39946e-02] diff --git a/content/calib/monitor_desktop.yml b/content/calib/monitor_desktop.yml new file mode 100755 index 0000000..6879cf3 --- /dev/null +++ b/content/calib/monitor_desktop.yml @@ -0,0 +1,13 @@ +%YAML:1.0 +monitor_W: 516 +monitor_H: 323 +monitor_R: !!opencv-matrix + rows: 3 + cols: 3 + dt: f + data: [ -0.99955, -0.02891, -0.0082861, -0.028948, 0.99957, 0.0044949, 0.0081526, 0.0047327, -0.99996] +monitor_T: !!opencv-matrix + rows: 3 + cols: 1 + dt: f + data: [269.41, 48.561, 5.8344] diff --git a/content/calib/monitor_laptop.yml b/content/calib/monitor_laptop.yml new file mode 100755 index 0000000..12ab1d9 --- /dev/null +++ b/content/calib/monitor_laptop.yml @@ -0,0 +1,13 @@ +%YAML:1.0 +monitor_W: 310 +monitor_H: 174 +monitor_R: !!opencv-matrix + rows: 3 + cols: 3 + dt: f + data: [ -0.99988, -0.009735, -0.01203, -0.0094674, 0.99971, -0.022108, 0.012242, -0.021992, -0.99968] +monitor_T: !!opencv-matrix + rows: 3 + cols: 1 + dt: f + data: [149.91, 29.575, -18.884] diff --git a/content/model/face_model.yml b/content/model/face_model.yml new file mode 100755 index 0000000..7d8f397 --- /dev/null +++ b/content/model/face_model.yml @@ -0,0 +1,6 @@ +%YAML:1.0 +face_model: !!opencv-matrix + rows: 3 + cols: 6 + dt: f + data: [ -45.096768, -21.312858, 21.312858, 45.096768, -26.299577, 26.299577, -0.483773,0.483773, 0.483773, -0.483773, 68.595035,68.595035, 2.397030, -2.397030, -2.397030, 2.397030, -0.000000, -0.000000] diff --git a/default.cfg b/default.cfg new file mode 100644 index 0000000..66c18ab --- /dev/null +++ b/default.cfg @@ -0,0 +1,26 @@ +## input and ouput +# input_type = camera # camera, video, or directory +# input = 0 # caemra id, video file name, or directory of image files +# output = /BS/zhang-semi/work/opengaze/test/ + +# input = YOUR_VIDEO OR IMAGE FOLDER +# output = MUST BE A DIRECTORY + +## gaze estimation method +# gaze_method = MPIIGaze # OpenFace MPIIGaze +# gpu_id = 0 + +## gaze estimation method/model selection +# face_model = 1 # 1 for the face model, 0 for eye image model + +## CNN model for face image, trained on MPIIGaze + EYEDIAP HD +# cnn_param_path = YOUR_PATH/alexnet_face.prototxt +# cnn_model_path = YOUR_PATH/alexnet_face.caffemodel + +# calibration file, calibration file +# calib_camera = YOUR_PATH/calibration.yml +# calib_screen = YOUR_PATH/monitor.yml + +## parameters for personal calibration +# per_model_save_path = YOUR_PATH/user1.txt +# num_calibration = 9 \ No newline at end of file diff --git a/download_models.sh b/download_models.sh new file mode 100755 index 0000000..40f7433 --- /dev/null +++ b/download_models.sh @@ -0,0 +1,9 @@ +OPENGAZE_DIR=~/OpenGaze + +mkdir -p $OPENGAZE_DIR/content/caffeModel + +cd $OPENGAZE_DIR/content/caffeModel + +wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.prototxt + +wget https://datasets.d2.mpi-inf.mpg.de/MPIIGaze/alexnet_face.caffemodel \ No newline at end of file diff --git a/exe/CMakeLists.txt b/exe/CMakeLists.txt new file mode 100644 index 0000000..8b1c6e3 --- /dev/null +++ b/exe/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.0) +project(OpenGazeExe VERSION 1.0) + +set(OPENGAZE_DIR "$ENV{HOME}/OpenGaze") + +add_definitions(-DOPENGAZE_DIR="${CMAKE_SOURCE_DIR}") + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/) + +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + +set(CMAKE_CXX_STANDARD 11) + +find_package( OpenCV 3.1 REQUIRED COMPONENTS calib3d highgui objdetect imgproc core) + +# Boost, for reading configuration file +find_package(Boost 1.5 COMPONENTS system filesystem timer thread program_options REQUIRED) +set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR} ${Boost_INCLUDE_DIR}/boost) + +include_directories(/usr/local/include/opengaze /usr/local/cuda/include ${OpenCV_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS}) +link_directories(/usr/lib /usr/local/lib /usr/local/cuda/lib64 ${Boost_LIBRARY_DIRS} ${OPENGAZE_DIR}/3rdParty) + +## -lX11 is for getting screen resolution in pixel in the Linux system +set(LIBS opengaze LandmarkDetector ${OpenCV_LIBS} ${Boost_LIBRARIES} tbb openblas dlib lmdb glog caffe X11) + +add_executable(GazeVisualization GazeVisualization.cpp) +target_link_libraries(GazeVisualization ${LIBS}) + +add_executable(Calibration Calibration.cpp) +target_link_libraries(Calibration ${LIBS}) + +add_executable(GazeEstimation GazeEstimation.cpp) +target_link_libraries(GazeEstimation ${LIBS}) + +add_executable(DataExtraction DataExtraction.cpp) +target_link_libraries(DataExtraction ${LIBS}) \ No newline at end of file diff --git a/exe/Calibration.cpp b/exe/Calibration.cpp new file mode 100644 index 0000000..33ba553 --- /dev/null +++ b/exe/Calibration.cpp @@ -0,0 +1,29 @@ +#include +#include +#include + +#include + +#include "opengaze/opengaze.hpp" + +using namespace std; +using namespace cv; +using namespace opengaze; + +vector get_arguments(int argc, char **argv) { + vector arguments; + for (int i = 0; i < argc; ++i){ + arguments.emplace_back(string(argv[i])); + } + return arguments; +} + +int main(int argc, char** argv) +{ + vector arguments = get_arguments(argc, argv); + OpenGaze open_gaze(argc, argv); + int num_calibration_point = 20; + open_gaze.runPersonalCalibration(num_calibration_point); + + return 1; +} \ No newline at end of file diff --git a/exe/DataExtraction.cpp b/exe/DataExtraction.cpp new file mode 100644 index 0000000..4b5551f --- /dev/null +++ b/exe/DataExtraction.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +#include + +#include "opengaze/opengaze.hpp" + +using namespace std; +using namespace cv; +using namespace opengaze; + +vector get_arguments(int argc, char **argv) { + vector arguments; + for (int i = 0; i < argc; ++i){ + arguments.emplace_back(string(argv[i])); + } + return arguments; +} + +int main(int argc, char** argv) +{ + vector arguments = get_arguments(argc, argv); + OpenGaze open_gaze(argc, argv); + open_gaze.runDataExtraction(); + + return 1; +} \ No newline at end of file diff --git a/exe/GazeEstimation.cpp b/exe/GazeEstimation.cpp new file mode 100644 index 0000000..4b1e737 --- /dev/null +++ b/exe/GazeEstimation.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +#include + +#include "opengaze/opengaze.hpp" + +using namespace std; +using namespace cv; +using namespace opengaze; + +vector get_arguments(int argc, char **argv) { + vector arguments; + for (int i = 0; i < argc; ++i){ + arguments.emplace_back(string(argv[i])); + } + return arguments; +} + +int main(int argc, char** argv) +{ + vector arguments = get_arguments(argc, argv); + OpenGaze open_gaze(argc, argv); + open_gaze.runGazeOnScreen(); + + return 1; +} \ No newline at end of file diff --git a/exe/GazeVisualization.cpp b/exe/GazeVisualization.cpp new file mode 100644 index 0000000..773dec5 --- /dev/null +++ b/exe/GazeVisualization.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +#include + +#include "opengaze/opengaze.hpp" + +using namespace std; +using namespace cv; +using namespace opengaze; + +vector get_arguments(int argc, char **argv) { + vector arguments; + for (int i = 0; i < argc; ++i){ + arguments.emplace_back(string(argv[i])); + } + return arguments; +} + +int main(int argc, char** argv) +{ + vector arguments = get_arguments(argc, argv); + OpenGaze open_gaze(argc, argv); + open_gaze.runGazeVisualization(); + + return 1; +} \ No newline at end of file diff --git a/exe/test.mp4 b/exe/test.mp4 new file mode 100644 index 0000000..92232b5 Binary files /dev/null and b/exe/test.mp4 differ diff --git a/imgs/logo_mpiinf.png b/imgs/logo_mpiinf.png new file mode 100644 index 0000000..2e6c37f Binary files /dev/null and b/imgs/logo_mpiinf.png differ diff --git a/imgs/logo_osaka-u.png b/imgs/logo_osaka-u.png new file mode 100755 index 0000000..6ed066a Binary files /dev/null and b/imgs/logo_osaka-u.png differ diff --git a/imgs/logo_pui.png b/imgs/logo_pui.png new file mode 100644 index 0000000..3ec86fa Binary files /dev/null and b/imgs/logo_pui.png differ diff --git a/include/data.hpp b/include/data.hpp new file mode 100644 index 0000000..c5c28d3 --- /dev/null +++ b/include/data.hpp @@ -0,0 +1,88 @@ +#ifndef DATA_HPP +#define DATA_HPP + +#include + +namespace opengaze{ + +/** + * face and facial landmark detection data + * @param face_id personal id from tracking across frames + * @param certainty detection score, 1 is the best, -1 is the worst + * @param landmarks detected six facial landmarks as four eye corners and two mouth corners + * @param face_bb detected face bounding box + */ +struct FaceData +{ + unsigned long face_id; + double certainty; + cv::Point2f landmarks[6]; + cv::Rect_ face_bb; +}; +/** + * eye image related data + * @param leye_pos/reye_pose 3D eyeball center position for left and right eyes in the original camera coordinate system + * @param leye_img/reye_img eye image + * @param leye_rot/reye_rot rotation matrix during the data normalization procedure + */ +struct EyeData +{ + // cv::Mat head_r, head_t; + cv::Mat leye_pos, reye_pos; // + + // normalized eyes + cv::Mat leye_img, reye_img; + cv::Mat leye_rot, reye_rot; +}; +/** + * face patch data related to data normalization + * @param head_r head pose as center of the face + * @param head_t head translation as center of the face + * @param face_rot rotation matrix during the data normalization procedure + * @param face_center 3D face center in the original camera coordinate system + * @param debug_img use for debug to show the normalized face image + * @param face_patch normalized face image + */ +struct FacePatchData +{ + cv::Mat head_r, head_t; + cv::Mat face_rot; + cv::Mat face_center; + cv::Mat debug_img; + cv::Mat face_patch; +}; +/** + * gaze data + * @param lgaze3d/lgaze3d gaze directions of left and right eyes in the camera coordinate system + * @param gaze3d gaze direction estimated from face patch in the in the camera coordinate system + * @param lgaze2d/rgaze2d projected gaze positions on the screen coordinate from left and right eyes + * @param gaze2d projected gaze positions from face patch on the screen coordinate + */ +struct GazeData +{ + cv::Vec3f lgaze3d, rgaze3d; + cv::Vec3f gaze3d; + cv::Point2f lgaze2d, rgaze2d; + cv::Point2f gaze2d; +}; +/** + * The general output data structure + * @param face_data store face and facial landmark detection data + * @param eye_data store data related to eye image input + * @param face_patch_data normalized face path data + * @param gaze_data gaze data in 2D and 3D spaces + */ +struct Sample +{ + FaceData face_data; + EyeData eye_data; + FacePatchData face_patch_data; + GazeData gaze_data; +}; + +} + + + + +#endif //DATA_HPP diff --git a/include/face_detector.hpp b/include/face_detector.hpp new file mode 100644 index 0000000..3842a3a --- /dev/null +++ b/include/face_detector.hpp @@ -0,0 +1,72 @@ +#ifndef FACE_DETECTOR_HPP +#define FACE_DETECTOR_HPP + +#include +#include +#include +#include + +#if USE_DLIB +// if we use dlib +#include +#include +#include +#include +#include +#include +#endif + + +#include "data.hpp" + +namespace opengaze{ + +class FaceDetector { +public: + FaceDetector(); + ~FaceDetector(); + + /** + * face and facial landmark detection selection + * The current implementation is only OpenFace. OpenFace use dlib for face detection + */ + enum Method{OpenFace, OpenCV, Dlib}; + + /** + * main function to detect and track face and facial landmarks + * @param input_img input image + * @param output output data structure + */ + void track_faces(cv::Mat input_img, std::vector &output); + + void reset(); + void setMethodType(Method method_type) {method_type_ = method_type;} + Method getMethodType() {return method_type_;} + void initialize(int number_users); + +private: + Method method_type_; + + #if USE_DLIB + dlib::frontal_face_detector dlib_detector_; + dlib::shape_predictor dlib_sp_; + #endif + + // parameters for OpenFace + std::vector active_models_; + unsigned long num_faces_max_; + int detection_skip_frames_, tracking_loss_limit_; + float detection_resize_rate_; + float nonoverlap_threshold_; + double certainty_threshold_; + int landmark_indices_[6]; + int frame_counter_; + unsigned long current_face_id_; + std::vector face_ids_; +}; +} + + + + +#endif //FACE_DETECTOR_HPP diff --git a/include/gaze_estimator.hpp b/include/gaze_estimator.hpp new file mode 100644 index 0000000..581f56c --- /dev/null +++ b/include/gaze_estimator.hpp @@ -0,0 +1,65 @@ +#ifndef GAZE_ESTIMATOR_HPP +#define GAZE_ESTIMATOR_HPP + +#include + +#include "data.hpp" +#include "face_detector.hpp" +#include "normalizer.hpp" +#include "gaze_predictor.hpp" + +namespace opengaze{ + +class GazeEstimator { +public: + GazeEstimator(); + ~GazeEstimator(); + + /** + * On the current implementation, we only has the "MPIIGaze" method which uses the input face/eye image + * and output gaze direction directly. It is an appearance-based method. The "OpenFace" can also output + * the gaze vector according to the pupil detection results. However, "OpenFace" implementation is not + * included inside our OpenGaze toolkit yet. + */ + enum Method{MPIIGaze, OpenFace}; + /** + * for the "MPIIGaze" method, the input image can be face or eye. The full-face patch model can output + * more accurate gaze prediction than the eye image model, while the eye image base model is much faster. + */ + enum InputType{face, eye}; + + /** + * the main function to estimate the gaze. + * It performs the face and facial landmarks detection, head pose estimation and then gaze prediction. + * @param input_image input scene image + * @param output data structure for output + */ + void estimateGaze(cv::Mat input_image, std::vector &output); + void getImagePatch(cv::Mat input_image, std::vector &outputs); + void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist); + void setRootPath(std::string root_path); + void setMethod(Method, std::vector arguments); + void initialFaceDetector(int number_users); + + Method method_type_; + InputType input_type_; // the input type + +private: + // class instances + FaceDetector face_detector_; + Normalizer normalizer_; + GazePredictor gaze_predictor_; + // camera intrinsic matrix + cv::Mat camera_matrix_; + // camera distortion matrix + cv::Mat camera_dist_; + // the root pat is used for load configuration file and models + std::string root_path_; +}; + +} + + + + +#endif //GAZE_ESTIMATOR_HPP diff --git a/include/gaze_predictor.hpp b/include/gaze_predictor.hpp new file mode 100644 index 0000000..d23a53b --- /dev/null +++ b/include/gaze_predictor.hpp @@ -0,0 +1,29 @@ +#ifndef GAZE_PREDICTOR_HPP +#define GAZE_PREDICTOR_HPP + +#include +#include "data.hpp" +#include "face_detector.hpp" + + +namespace opengaze{ + +class GazePredictor { + +public: + GazePredictor(); + ~GazePredictor(); + + void initiaMPIIGaze(std::vector arguments); + cv::Point3f predictGazeMPIIGaze(cv::Mat face_patch); + +private: + int model_type_; + bool is_extract_feature; +}; + +} + + + +#endif //GAZE_PREDICTOR_HPP diff --git a/include/input_handler.hpp b/include/input_handler.hpp new file mode 100644 index 0000000..9b77742 --- /dev/null +++ b/include/input_handler.hpp @@ -0,0 +1,125 @@ +#ifndef INPUT_HANDLER_HPP +#define INPUT_HANDLER_HPP + +#include +#include +#include +#include +#include "data.hpp" + +namespace opengaze { + +class InputHandler { +public: + enum InputType {Camera, Video, Image, Directory, Memory}; + + InputHandler(); + ~InputHandler(); + + /** + * get the camera intrisic parameters + * @param camera_matrix camera instric matrix + * @param camera_dist caemra distortion matrix + */ + void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist){ + camera_matrix_ = std::move(camera_matrix); + camera_distortion_ = std::move(camera_dist); + } + + /** + * function to return next sample, could come from any input source + * @return next sample + */ + cv::Mat getNextSample(); + + /** + * set the input type + * @param type the input typ, could be found in InputType defination + */ + void setInputType(InputType type){input_type_ = type;} + + /** + * set the input + * according the input type, here the input value are different. + * For the type "Camera", this input value indicates the camera id + * For the type "video", this input value is the video file name + * For input type "Directory", this input value is the directory path + */ + void setInput(int camera_id) {camera_id_ = camera_id;} + void setInput(std::vector images) {images_ = std::move(images);} + void setInput(std::string input_path); + + /** + * read the parameters related to the screen + * @param calib_file file for the configuration + */ + void readScreenConfiguration(std::string calib_file); + /** + * read the camera instrinic parameters from the configuration file + * @param calib_file file for the configuration + */ + void readCameraConfiguration(std::string calib_file); + + /** + * When the 3D gaze vector is achieved, there is a need to project the gaze on the 2D screen. + * This function also needs the input to indicate if use the full-face model or not, + * since the initial of gaze vector will be center of the face for the full-face models + * and eye center for the eye-based models. + * @param input input data contains the 3D gaze vector + * @param is_face_model a boolen value indicates if the gaze vectors is from face model or eye model + */ + void projectToDisplay(std::vector &input, bool is_face_model=true); + + int getFrameHeight(){return cap_.get(cv::CAP_PROP_FRAME_HEIGHT);} + int getFrameWidth(){return cap_.get(cv::CAP_PROP_FRAME_WIDTH);} + InputType getInputType() {return input_type_;} + int getScreenWidth() {return screen_width_;} + int getScreenHeight() {return screen_height_;} + std::string getFileName() {return current_file_name_;} + + cv::Point2f mapToDisplay(cv::Vec3f obj_center, cv::Vec3f gaze_point); + + void initialize(); + bool closeInput(); + void getScreenResolution(int &width, int &height); + + cv::Mat getCameraMatrix() { return camera_matrix_;} + cv::Mat getCameraDistortion() {return camera_distortion_;} + void setFrameSize(int frame_width, int frame_height); + + bool isReachEnd() {return is_reach_end_;} + + cv::Mat camera_matrix_; + cv::Mat camera_distortion_; + +private: + + // indicator if we reach the end of sample stream + bool is_reach_end_; + + int camera_id_; + int sample_height_, sample_width_; + std::vector images_; + std::string input_path_; + std::string input_file_video_name_; + int screen_width_, screen_height_; + + // monitor + float monitor_W_, monitor_H_; // monitor width and height in mm + cv::Mat monitor_R_, monitor_T_; + cv::Vec3f monitor_corners_[4]; + cv::Mat monitor_normal_; + + // input variable + InputType input_type_; + cv::VideoCapture cap_; + std::string current_file_name_; + + // variable for directory input + boost::filesystem::directory_iterator current_itr_; + +}; + +} + +#endif //INPUT_HANDLER_HPP diff --git a/include/normalizer.hpp b/include/normalizer.hpp new file mode 100644 index 0000000..a72e2d9 --- /dev/null +++ b/include/normalizer.hpp @@ -0,0 +1,42 @@ +#ifndef NORMALIZER_HPP +#define NORMALIZER_HPP + +#include +#include "data.hpp" + +namespace opengaze{ +class Normalizer { + +public: + Normalizer(); + ~Normalizer(); + + void estimateHeadPose(const cv::Point2f *landmarks, opengaze::Sample &sample); + + void setCameraMatrix(cv::Mat input); + + void loadFaceModel(std::string path); + + void setParameters(int focal_length, int distance, int img_w, int img_h); + + cv::Mat normalizeFace(cv::Mat input_image, Sample &sample); + + std::vector normalizeEyes(cv::Mat input_image, Sample &sample); + + cv::Mat cvtToCamera(cv::Point3f input, const cv::Mat cnv_mat); + +private: + cv::Mat camera_matrix_; + std::vector face_model_; + cv::Mat face_model_mat_, cam_norm_; + float focal_norm_, distance_norm_; + cv::Size roiSize_norm_; +}; + + +} + + + + +#endif //NORMALIZER_HPP diff --git a/include/opengaze.hpp b/include/opengaze.hpp new file mode 100644 index 0000000..c175be6 --- /dev/null +++ b/include/opengaze.hpp @@ -0,0 +1,80 @@ +#ifndef OPEN_GAZE_H +#define OPEN_GAZE_H + +#include +#include +#include +#include + +#include + +#include "input_handler.hpp" +#include "gaze_estimator.hpp" +#include "data.hpp" +#include "personal_calibrator.hpp" + +namespace opengaze { + +class OpenGaze { +public: + explicit OpenGaze(int argc, char** argv); //read configuration file + ~OpenGaze(); + + // main function to estimate and show the gaze vector drawn on the input face image. + void runGazeVisualization(); + + /** + * main function to run personal calibration. + * @param num_calibration_point the numbers of points for calibration. + */ + void runPersonalCalibration(int num_calibration_point=5); + + // main function to estimate and draw gaze point on the screen. + void runGazeOnScreen(); + + // main function to extract the face image from input image. The face image can then + // be used to train a custom gaze estimation model + void runDataExtraction(); + +private: + // visualization + /** + * function to draw the gaze vector on the input face image. + * @param sample the input data includes the gaze vector, head pose etc. + * @param image the input image contains the face. This function will draw the gaze vector on this input image + */ + void drawGazeOnFace(opengaze::Sample sample, cv::Mat &image); + // draw the detected facial landmark + void drawLandmarks(opengaze::Sample sample, cv::Mat &image); + // draw the estimated gaze on the top left corner of the input image + // to show the relative position on the screen. In this case, + //the user can see both the input image and the projected gaze target on the screen. + //This function is mainly used for debugging. + void drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image); + // estimate and draw gaze point on the screen + void drawGazeOnScreen(opengaze::Sample sample, cv::Mat &image); + + // show debug mode will show the gaze draw on the face + bool show_debug_; + + //class instances + InputHandler input_handler_; + GazeEstimator gaze_estimator_; + + // input camera id + int camera_id_; + // temporary variables to store the input path, output path, input type + boost::filesystem::path input_dir_; + InputHandler::InputType input_type_; + boost::filesystem::path output_dir_; + + bool is_face_model_; + bool is_save_video_; + + // path to save the personal calibration model + std::string per_model_save_path_; +}; + +} + +#endif //OPEN_GAZE_H diff --git a/include/personal_calibrator.hpp b/include/personal_calibrator.hpp new file mode 100644 index 0000000..ddfbfdb --- /dev/null +++ b/include/personal_calibrator.hpp @@ -0,0 +1,64 @@ +#ifndef PERSONAL_CALIBRATOR_HPP +#define PERSONAL_CALIBRATOR_HPP + +#include +#include +#include + +class PersonalCalibrator { + +public: + PersonalCalibrator(int screen_width, int screen_height); + ~PersonalCalibrator(); + /** + * generate the random locations for calibration + * @param num_points number of points to generate + */ + void generatePoints(int num_points); + // get the show window ready, it should be full-screen + void initialWindow(); + // show the next calibration point + bool showNextPoint(); + // wait for 0.5 second to receive the confirmation (mouse click) from user + void confirmClicking(); + /** + * generate a polynomial function for the personal calibration + * @param prediction prediction from gaze estimation method + * @param ground_truth calibration points locations on the screen + * @param order the order of polynomial function, 1 means the linear + */ + void generateModel(std::vector prediction, std::vector ground_truth, int order=1); + /** + * save the personal calibration model + * @param file_path path to save the model + */ + void saveModel(std::string file_path); + /** + * load the personal calibration model + * @param file_path path to load the model + */ + void loadModel(std::string file_path); + /** + * return current calibration point location on the screen + * @return location on the screen + */ + cv::Point2f getCurrentPoint() {return points_[index_point_];} + // function to calculate the polynomial function + void calibratePolynomial(); + +private: + // indicator if the user click the mouse or not + bool is_click_; + // number of points for personal calibration + int num_points_; + // index for the current calibration points + int index_point_; + // vector to store the generated calibration points + std::vector points_; + int screen_width_, screen_height_, center_radius_; // monitor width and height in pixel + // personal model + cv::Mat model_matrix_; +}; + + +#endif //PERSONAL_CALIBRATOR_HPP diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..8fd9279 --- /dev/null +++ b/install.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e +set -o pipefail + +if [ $# -ne 0 ] + then + echo "Usage: install.sh" + exit 1 +fi + +# Essential Dependencies +echo "Installing Essential dependencies..." +sudo apt-get -y update +sudo apt-get -y install build-essential +sudo apt-get -y install cmake +sudo apt-get -y install libopenblas-dev liblapack-dev +sudo apt-get -y install git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev +sudo apt-get -y install python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libdc1394-22-dev +echo "Essential dependencies installed." + +# OpenCV Dependency +echo "Downloading OpenCV..." +wget https://github.com/opencv/opencv/archive/3.4.0.zip +unzip 3.4.0.zip +cd opencv-3.4.0 +mkdir -p build +cd build +echo "Installing OpenCV..." +cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_CUDA=OFF -D BUILD_SHARED_LIBS=ON .. +make -j4 +sudo make install +cd ../.. +rm 3.4.0.zip +sudo rm -r opencv-3.4.0 +echo "OpenCV installed." + +# dlib dependecy +echo "Downloading dlib" +wget http://dlib.net/files/dlib-19.13.tar.bz2; +tar xf dlib-19.13.tar.bz2; +cd dlib-19.13; +mkdir -p build; +cd build; +echo "Installing dlib" +cmake ..; +cmake --build . --config Release; +sudo make install; +sudo ldconfig; +cd ../..; +rm -r dlib-19.13.tar.bz2 +echo "dlib installed" + +# Boost C++ Dependency +echo "Installing Boost..." +sudo apt-get install libboost-all-dev +echo "Boost installed." + +# Dependency for caffe +sudo apt-get install protobuf glog gflags hdf5 \ No newline at end of file diff --git a/pre-complile/opengaze.deb b/pre-complile/opengaze.deb new file mode 100644 index 0000000..3e8f6c3 Binary files /dev/null and b/pre-complile/opengaze.deb differ diff --git a/pre-complile/readme.txt b/pre-complile/readme.txt new file mode 100644 index 0000000..4c9b397 --- /dev/null +++ b/pre-complile/readme.txt @@ -0,0 +1,3 @@ +This is pre-complied package for OpenGaze, including OpenFace and Caffe library insides. +To use, you still need to install other dependencies, Nvidia drive, cuda and cudnn. +To install, just run `sudo dpkg -i build_1.0-1_amd64.deb` \ No newline at end of file diff --git a/src/face_detector.cpp b/src/face_detector.cpp new file mode 100644 index 0000000..81a21e3 --- /dev/null +++ b/src/face_detector.cpp @@ -0,0 +1,273 @@ +#include +#include + +// if we use OpenFace +#if USE_OPENFACE +#include // from "OpenFace-master/lib/local/LandmarkDetector/include/" +#endif + +#include "face_detector.hpp" + +using namespace std; +using namespace cv; + +vector det_parameters_; +vector clnf_models_; + +namespace opengaze { + +FaceDetector::FaceDetector() { + method_type_ = Method::OpenFace; +} + +FaceDetector::~FaceDetector() {} + +void FaceDetector::initialize(int number_users=5) { + string root_path = OPENFACE_DIR; + root_path = root_path + "/build/bin"; + //string openface_root = OpenFace_ROOT_DIR; + // (currently) hard-coded setting + num_faces_max_ = number_users; + detection_resize_rate_ = 2.0; // resize the input image to detect face, crucial for speed + detection_skip_frames_ = 1; + nonoverlap_threshold_ = 0.5; + certainty_threshold_ = 0.0; // the smaller the better, 1 is the best, -1 is the worst + landmark_indices_[0] = 36; landmark_indices_[1] = 39; landmark_indices_[2] = 42; + landmark_indices_[3] = 45; landmark_indices_[4] = 48; landmark_indices_[5] = 54; + tracking_loss_limit_ = 10; + // initialize the tracking models + LandmarkDetector::FaceModelParameters det_parameter; + det_parameter.reinit_video_every = -1; // This is so that the model would not try re-initialising itself + det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR; + + det_parameter.model_location = root_path + "/model/main_clm_wild.txt"; + det_parameter.haar_face_detector_location = root_path + "/classifiers/haarcascade_frontalface_alt.xml";// this line will be disable due to "curr_face_detector" + det_parameter.mtcnn_face_detector_location = root_path + "/model/mtcnn_detector/MTCNN_detector.txt"; + + det_parameter.use_face_template = true; + det_parameter.reinit_video_every = 5; + // det_parameter.quiet_mode = true; not avaliable fro OpenFace v2.1 + // // For in the wild fitting these parameters are suitable + det_parameter.window_sizes_init = vector(4); + det_parameter.window_sizes_init[0] = 15; + det_parameter.window_sizes_init[1] = 13; + det_parameter.window_sizes_init[2] = 11; + det_parameter.window_sizes_init[3] = 9; + det_parameter.sigma = 1.25; + det_parameter.reg_factor = 35; + det_parameter.weight_factor = 2.5; + det_parameter.num_optimisation_iteration = 10; + det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR; + det_parameters_.push_back(det_parameter); + + LandmarkDetector::CLNF clnf_model_ = LandmarkDetector::CLNF(det_parameter.model_location); + if (!clnf_model_.loaded_successfully){ + cout << "ERROR: Could not load the landmark detector" << endl; + exit(-1); + } + clnf_model_.face_detector_HAAR.load(det_parameter.haar_face_detector_location); + clnf_model_.haar_face_detector_location = det_parameter.haar_face_detector_location; + clnf_model_.face_detector_MTCNN.Read(det_parameter.mtcnn_face_detector_location); + clnf_model_.mtcnn_face_detector_location = det_parameter.mtcnn_face_detector_location; + + // If can't find MTCNN face detector, default to HOG one + if (det_parameter.curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && clnf_model_.face_detector_MTCNN.empty()){ + cout << "INFO: defaulting to HOG-SVM face detector" << endl; + det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR; + } + + clnf_models_.reserve(num_faces_max_); + clnf_models_.push_back(clnf_model_); + active_models_.push_back(false); + + for(int i=1; i& clnf_models, vector >& face_detections){ + // Go over the model and eliminate detections that are not informative (there already is a tracker there) + for (size_t model = 0; model < clnf_models.size(); ++model){ + + // See if the detections intersect + cv::Rect_ model_rect = clnf_models[model].GetBoundingBox(); + + for (int detection=face_detections.size()-1; detection >= 0; --detection) + { + double intersection_area = (model_rect & face_detections[detection]).area(); + double union_area = model_rect.area() + face_detections[detection].area() - 2 * intersection_area; + + // If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap + if (intersection_area / union_area > 0.5) + { + face_detections.erase(face_detections.begin() + detection); + } + } + } +} + +double NonOverlapingDetection(const LandmarkDetector::CLNF &ref_model, const LandmarkDetector::CLNF &tgt_model) +{ + Rect_ ref_rect = ref_model.GetBoundingBox(); + Rect_ tgt_rect = tgt_model.GetBoundingBox(); + + double intersection_area = (ref_rect & tgt_rect).area(); + double union_area = ref_rect.area() + tgt_rect.area() - 2 * intersection_area; + + return intersection_area/union_area; +} + +void FaceDetector::track_faces(cv::Mat input_img, std::vector &output) { + if(input_img.channels() < 3){ + cout << "The input must be a color image!" < grayscale_image; + cvtColor(input_img, grayscale_image, CV_BGR2GRAY); + + bool all_models_active = true; + for(unsigned int model = 0; model < clnf_models_.size(); ++model) + { + if(!active_models_[model]) + { + all_models_active = false; + break; + } + } + + // Detect faces + // Get the detections (every Xth frame and when there are free models available for tracking) + std::vector > face_detections; + cv::Mat small_grayscale_image_; + if (frame_counter_ % detection_skip_frames_ == 0 && !all_models_active) { + // resized image for faster face detection + if (detection_resize_rate_ != 1) resize(grayscale_image, small_grayscale_image_, + Size(), 1.0/detection_resize_rate_, 1.0/detection_resize_rate_); + else small_grayscale_image_ = grayscale_image; + + if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR){ + vector confidences; + LandmarkDetector::DetectFacesHOG(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HOG, confidences); + } + else if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR){ + LandmarkDetector::DetectFaces(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HAAR); + } + else{ + vector confidences; + LandmarkDetector::DetectFacesMTCNN(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_MTCNN, confidences); + } + + // resize the face deteciton back + if (detection_resize_rate_ != 1) { + for(auto& face_detection : face_detections) { + face_detection.x *= detection_resize_rate_; + face_detection.y *= detection_resize_rate_; + face_detection.width *= detection_resize_rate_; + face_detection.height *= detection_resize_rate_; + } + } + + // Keep only non overlapping detections (also convert to a concurrent vector + NonOverlapingDetections(clnf_models_, face_detections); + } + + vector< tbb::atomic > face_detections_used(face_detections.size()); + // Go through every model and update the tracking + tbb::parallel_for(0, (int)clnf_models_.size(), [&](int model) { + //for (unsigned int model = 0; model < clnf_models_.size(); ++model) { + bool detection_success = false; + // If the current model has failed more than threshold, remove it + if (clnf_models_[model].failures_in_a_row > tracking_loss_limit_) { + active_models_[model] = false; + clnf_models_[model].Reset(); + } + // If the model is inactive reactivate it with new detections + if (!active_models_[model]){ + for (size_t detection_ind = 0; detection_ind < face_detections.size(); ++detection_ind) + { + // if it was not taken by another tracker take it (if it is false swap it to true and enter detection, this makes it parallel safe) + if (!face_detections_used[detection_ind].compare_and_swap(true, false)) { + // Reinitialise the model + clnf_models_[model].Reset(); + // This ensures that a wider window is used for the initial landmark localisation + clnf_models_[model].detection_success = false; + LandmarkDetector::DetectLandmarksInVideo(input_img, face_detections[detection_ind], clnf_models_[model], det_parameters_[model], grayscale_image); + // This activates the model + active_models_[model] = true; + face_ids_[model] = current_face_id_; + current_face_id_++; + // break out of the loop as the tracker has been reinitialised + break; + } + + } + } + else + { + // The actual facial landmark detection / tracking + detection_success = LandmarkDetector::DetectLandmarksInVideo(input_img, clnf_models_[model], det_parameters_[model], grayscale_image); + } + //} + }); + + // Go through every model and check the results + for(size_t model=0; model nonoverlap_threshold_) overlapping = true; + } + if(overlapping){ + active_models_[model] = false; + face_ids_[model] = 0; + clnf_models_[model].Reset(); + continue; + } + + if(clnf_models_[model].detection_certainty < certainty_threshold_) continue; + + Sample temp; + temp.face_data.certainty = clnf_models_[model].detection_certainty; + temp.face_data.face_id = face_ids_[model]; + temp.face_data.face_bb.x = (int)clnf_models_[model].GetBoundingBox().x; + temp.face_data.face_bb.y = (int)clnf_models_[model].GetBoundingBox().y; + temp.face_data.face_bb.height = (int)clnf_models_[model].GetBoundingBox().height; + temp.face_data.face_bb.width = (int)clnf_models_[model].GetBoundingBox().width; + for(int p=0; p<6; p++){ + int num_p = landmark_indices_[p]; + temp.face_data.landmarks[p] = Point2d( + clnf_models_[model].detected_landmarks.at(num_p,0), + clnf_models_[model].detected_landmarks.at(num_p+68,0) + ); + } + output.emplace_back(temp); + } +} + + + +} + diff --git a/src/gaze_estimator.cpp b/src/gaze_estimator.cpp new file mode 100644 index 0000000..02186e0 --- /dev/null +++ b/src/gaze_estimator.cpp @@ -0,0 +1,112 @@ +#include + +#include "gaze_estimator.hpp" + +using namespace std; +using namespace cv; + +namespace opengaze{ + +GazeEstimator::GazeEstimator() { +} +GazeEstimator::~GazeEstimator() {} + +void GazeEstimator::setRootPath(std::string root_path) { + normalizer_.loadFaceModel(root_path); +} + +void GazeEstimator::estimateGaze(cv::Mat input_image, std::vector &outputs) { + face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks + for (int i=0; i< outputs.size(); ++i) { + // estimate head pose first, no matter what gaze estimation method, head pose is estimated here + normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]); + if (method_type_ == Method::MPIIGaze){ + + // if we use face model + if (input_type_ == InputType::face){ + Mat face_patch = normalizer_.normalizeFace(input_image, outputs[i]); + //outputs[i].face_patch_data.debug_img = face_patch; + Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(face_patch); // gaze estimates in normalization space + Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system + gaze_3d.copyTo(outputs[i].gaze_data.gaze3d); + } + else if (input_type_ == InputType::eye) { + vector eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images + // for left eye + Point3f gaze_norm = gaze_predictor_.predictGazeMPIIGaze(eye_iamges[0]); + Mat gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].eye_data.leye_rot); + gaze_3d.copyTo(outputs[i].gaze_data.lgaze3d); + // for right eye + Mat flip_right; + flip(eye_iamges[0], flip_right, 1); + gaze_norm = gaze_predictor_.predictGazeMPIIGaze(flip_right); // for left right image input + gaze_norm.x *= -1.0; + gaze_3d = normalizer_.cvtToCamera(gaze_norm, outputs[i].face_patch_data.face_rot); // convert gaze to camera coordinate system + gaze_3d.copyTo(outputs[i].gaze_data.rgaze3d); + } + } + else if (method_type_ == Method::OpenFace) { + cout << "Please use gaze estimation method MPIIGaze." << endl; + exit(EXIT_FAILURE); + } + } +} + +void GazeEstimator::getImagePatch(cv::Mat input_image, std::vector &outputs) { + face_detector_.track_faces(input_image, outputs); // detect faces and facial landmarks + for (int i=0; i< outputs.size(); ++i) { + // estimate head pose first, no matter what gaze estimation method, head pose is estimated here + normalizer_.estimateHeadPose(outputs[i].face_data.landmarks, outputs[i]); + if (method_type_ == Method::MPIIGaze){ + + // if we use face model + if (input_type_ == InputType::face){ + outputs[i].face_patch_data.face_patch = normalizer_.normalizeFace(input_image, outputs[i]); + } + else if (input_type_ == InputType::eye) { + vector eye_iamges = normalizer_.normalizeEyes(input_image, outputs[i]); // generate eye images + outputs[i].eye_data.leye_img = eye_iamges[0]; + outputs[i].eye_data.reye_img = eye_iamges[1]; + } + } + else if (method_type_ == Method::OpenFace) { + cout << "Please use method MPIIGaze for image patch extraction." << endl; + exit(EXIT_FAILURE); + } + } +} + +void GazeEstimator::setMethod(Method input_method_type, const std::vector arguments={}) { + method_type_ = input_method_type; + + if (method_type_ == Method::MPIIGaze) { + gaze_predictor_.initiaMPIIGaze(arguments); + if (arguments.size() < 2) + input_type_ = InputType::face; + else { + if (arguments[2] == "face"){ + input_type_ = InputType::face; + normalizer_.setParameters(1600, 1000, 224, 224); + } + + else if (arguments[2] == "eye") { + input_type_ = InputType::eye; + normalizer_.setParameters(960, 600, 60, 36); + } + } + } + +} + +void GazeEstimator::setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist) { + camera_matrix_ = move(camera_matrix); + camera_dist_ = move(camera_dist_); + normalizer_.setCameraMatrix(camera_matrix_); +} + +void GazeEstimator::initialFaceDetector(int number_users){ + face_detector_.initialize(number_users); + face_detector_.setMethodType(FaceDetector::Method::OpenFace); +} + +}; \ No newline at end of file diff --git a/src/gaze_predictor.cpp b/src/gaze_predictor.cpp new file mode 100644 index 0000000..579bac5 --- /dev/null +++ b/src/gaze_predictor.cpp @@ -0,0 +1,160 @@ +#include "gaze_predictor.hpp" + +#include + +// caffe +#define USE_OPENCV 1; +#include +#include +#include +#include +#include + + +using namespace cv; +using namespace std; +using namespace caffe; + +namespace opengaze { +caffe::Net *p_net_; + +GazePredictor::GazePredictor() { + +} +GazePredictor::~GazePredictor() { + delete p_net_; +} + +void GazePredictor::initiaMPIIGaze(const std::vector arguments={}) { + p_net_ = nullptr; + string param_path = arguments[0]; + string model_path = arguments[1]; + int gpu_id = stoi(arguments[3]); + + // Set GPU (or CPU) + /*caffe::Caffe::set_mode(caffe::Caffe::CPU); + cout << "Using CPU model" << endl;*/ + caffe::Caffe::set_mode(caffe::Caffe::GPU); + cout << "Using GPU with id " << gpu_id << endl; + Caffe::SetDevice(gpu_id); + + cout << "load caffe model parameters from " << param_path << endl; + // create CNN + p_net_ = new Net(param_path, caffe::TEST); + + cout << "load caffe model from " << model_path << endl; + // load pre-trained weights (binary proto) + p_net_->CopyTrainedLayersFrom(model_path); + + // judge model type base on the paramater file name + size_t i = param_path.rfind("/", param_path.length()); + string filename; + if (i != string::npos) + filename = param_path.substr(i+1, param_path.length() - i); + if (!filename.compare(string("lenet_test.prototxt"))) + model_type_ = 1; + else if (!filename.compare(string("googlenet.prototxt"))) + model_type_ = 2; + else if (!filename.compare(string("alexnet_eye.prototxt"))) + model_type_ = 3; + else if (!filename.compare(string("alexnet_face.prototxt"))) + model_type_ = 4; // the single face model + else if (!filename.compare(string("alexnet_face_448.prototxt"))) + model_type_ = 4; // the single face model + else{ + model_type_ = 0; + cout<<"Cannot define the type of model!"< img_vec; + + img_vec.push_back(input_image); + Vec2f gaze_norm_2d; + Point3f gaze_norm_3d; + + std::vector labelVector; + labelVector.clear(); + labelVector.push_back(1); + labelVector.push_back(1); + float loss = 0.0; + caffe::shared_ptr > data_layer_; + data_layer_ = boost::static_pointer_cast >(p_net_->layer_by_name("data")); + data_layer_->AddMatVector(img_vec, labelVector); + + // run network + p_net_->ForwardPrefilled(&loss); + + if (model_type_==1) + { + // get output layer "ip2" + float *temp = (float*)p_net_->blob_by_name("ip2")->cpu_data(); + // copy estimated gaze vector + gaze_norm_2d.val[0] = temp[0]; + gaze_norm_2d.val[1] = temp[1]; + temp = nullptr; + } + else if (model_type_==2)// if it is googlenet + { + float *temp1 = (float*)p_net_->blob_by_name("loss1/classifier")->cpu_data(); + float *temp2 = (float*)p_net_->blob_by_name("loss2/classifier")->cpu_data(); + float *temp3 = (float*)p_net_->blob_by_name("loss3/classifier")->cpu_data(); + // average the output of three output values + gaze_norm_2d.val[0] = (temp1[0]+temp2[0]+temp3[0]) / 3.0f; + gaze_norm_2d.val[1] = (temp1[1]+temp2[1]+temp3[1]) / 3.0f; + temp1 = nullptr; + temp2 = nullptr; + temp3 = nullptr; + } + else if (model_type_==3)// if it is alexnet + { + float *temp; + temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8 + if (temp == NULL) + temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output + if (temp == NULL) { + cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl; + exit(EXIT_FAILURE); + } + // copy estimated gaze vector + gaze_norm_2d.val[0] = temp[0]; + gaze_norm_2d.val[1] = temp[1]; + temp = NULL; + } + else if (model_type_==4)// if it is alexnet + { + float *temp; + temp = (float*)p_net_->blob_by_name("fc8")->cpu_data();// blob name can be fc8 + if (temp == NULL) + temp = (float*)p_net_->blob_by_name("gaze_output")->cpu_data(); //blob name can be gaze_output + if (temp == NULL) { + cout << "ERROR: cannot find the blob name in the model. The final blob name muse be fc8 or gaze_output" << endl; + exit(EXIT_FAILURE); + } + + // copy estimated gaze vector + gaze_norm_2d.val[0] = temp[0]; + gaze_norm_2d.val[1] = temp[1]; + + //// get the feature out + //temp = (float*)p_net_->blob_by_name("fc6_gaze")->cpu_data(); + //for (int num_f=0; num_f<4096; ++num_f) + //{ + // feature[num_f] = temp[num_f]; + //} + temp = NULL; + } + + float theta = gaze_norm_2d.val[0]; + float phi = gaze_norm_2d.val[1]; + gaze_norm_3d.x = (-1.0f)*cos(theta)*sin(phi); + gaze_norm_3d.y = (-1.0f)*sin(theta); + gaze_norm_3d.z = (-1.0f)*cos(theta)*cos(phi); + + return gaze_norm_3d; +} + +} \ No newline at end of file diff --git a/src/input_handler.cpp b/src/input_handler.cpp new file mode 100644 index 0000000..e98eb57 --- /dev/null +++ b/src/input_handler.cpp @@ -0,0 +1,185 @@ +#include "input_handler.hpp" +#include +#include +using namespace cv; +using namespace std; + +namespace opengaze { + +#if WIN32 +#include +#else +#include +#endif + +void InputHandler::getScreenResolution(int &width, int &height) { +#if WIN32 + width = (int) GetSystemMetrics(SM_CXSCREEN); + height = (int) GetSystemMetrics(SM_CYSCREEN); +#else + Display* disp = XOpenDisplay(NULL); + Screen* scrn = DefaultScreenOfDisplay(disp); + width = scrn->width; + height = scrn->height; +#endif +} + +InputHandler::InputHandler(){ + input_type_ = InputType::Camera;// defualt input type + camera_id_ = 0; + getScreenResolution(screen_width_, screen_height_); + screen_width_ = screen_width_; +} +InputHandler::~InputHandler(){} + + +void InputHandler::initialize() +{ + if (input_type_ == InputType::Camera){ + cap_.open(camera_id_); + if(!cap_.isOpened()) { // open Camera + cout << "Could not open Camera with id " << camera_id_ << endl; + std::exit(EXIT_FAILURE); + } + setFrameSize(1280, 720); // 800*600, 1280*720, 1920*1080, + } + else if (input_type_ == InputType::Video){ + cap_.open(input_file_video_name_); + if(!cap_.isOpened()) { // open Camera + cout << "Error: Could not open video file " << input_file_video_name_ << endl; + std::exit(EXIT_FAILURE); + } + } + else if (input_type_ == InputType::Directory) { + if (!boost::filesystem::is_directory(input_path_)){ + cout << "Error: The input must be a directory, but it is " << input_path_ << endl; + std::exit(EXIT_FAILURE); + } + current_itr_ = boost::filesystem::directory_iterator(input_path_); + + } + else if (input_type_ == InputType::Memory) {} + + is_reach_end_ = false; +} + +void InputHandler::setFrameSize(int frame_width, int frame_height){ + cap_.set(cv::CAP_PROP_FRAME_HEIGHT, frame_height);//720 1080 + cap_.set(cv::CAP_PROP_FRAME_WIDTH, frame_width);//1280 1980 + double dWidth = cap_.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video + double dHeight = cap_.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video + cout << "Input frame size is : " << dWidth << " x " << dHeight << endl; +} + +Mat InputHandler::getNextSample() { + Mat frame; + if (input_type_ == InputType::Camera) cap_ >> frame; + else if (input_type_ == InputType::Video) { + cap_ >> frame; + if (frame.empty()) // we reach the end of video + is_reach_end_ = true; + } + else if (input_type_ == InputType::Directory) { + boost::filesystem::path file_path = current_itr_->path(); + if (file_path.extension() != ".jpg" && file_path.extension() != ".png" && file_path.extension() != ".bmp"){ + cout << "Error: The input file is not image file with extension of jpg, png or bmp!" << endl; + cout << "The input file name is: " << file_path.string() << endl; + std::exit(EXIT_FAILURE); + } + cout << "process image " << file_path << endl; + frame = imread(file_path.string()); + if (current_itr_ == boost::filesystem::directory_iterator()) + is_reach_end_ = true; + } + else if (input_type_ == InputType::Memory) {} + + return frame; +} + +bool InputHandler::closeInput() { + if (input_type_ == InputType::Camera || input_type_ == InputType::Video){ + cap_.release(); + is_reach_end_ = true; + } + return true; +} + +void InputHandler::setInput(std::string input_path) { + if (input_type_ == InputType::Directory){ + input_path_ = move(input_path); + } + else if (input_type_ == InputType::Video){ + input_file_video_name_ = move(input_path); + } +} + +void InputHandler::readScreenConfiguration(string calib_file) { + FileStorage fs_disp(calib_file, FileStorage::READ); + fs_disp["monitor_W"] >> monitor_W_; + fs_disp["monitor_H"] >> monitor_H_; + fs_disp["monitor_R"] >> monitor_R_; + fs_disp["monitor_T"] >> monitor_T_; + // compute monitor plane + Vec3f corners[4]; + corners[0] = Vec3f(0.0, 0.0, 0.0); + corners[1] = Vec3f(monitor_W_, 0.0, 0.0); + corners[2] = Vec3f(0.0, monitor_H_, 0.0); + corners[3] = Vec3f(monitor_W_, monitor_H_, 0.0); + + for(int i=0; i<4; i++){ + Mat corners_cam = monitor_R_ * Mat(corners[i]) + monitor_T_; + corners_cam.copyTo(monitor_corners_[i]); + } + + Vec3f normal = Vec3f(0.0, 0.0, 1.0); // normal direction + monitor_normal_ = monitor_R_ * Mat(normal); + monitor_normal_.convertTo(monitor_normal_, CV_32F); +} + +void InputHandler::readCameraConfiguration(string calib_file){ + cout << endl << "Reading calibration information from : " << calib_file << endl; + FileStorage fs; + fs.open(calib_file, FileStorage::READ); + fs["camera_matrix"] >> camera_matrix_; + fs["dist_coeffs"] >> camera_distortion_; + fs.release(); +} + +void InputHandler::projectToDisplay(std::vector &inputs, bool is_face_model) { + for(auto & sample : inputs) { + if (is_face_model) { + Vec3f face_center(sample.face_patch_data.face_center.at(0), sample.face_patch_data.face_center.at(1), sample.face_patch_data.face_center.at(2)); + sample.gaze_data.gaze2d = mapToDisplay(face_center, sample.gaze_data.gaze3d); + } + else { + Vec3f leye_pose(sample.eye_data.leye_pos.at(0),sample.eye_data.leye_pos.at(1),sample.eye_data.leye_pos.at(2)); + Vec3f reye_pose(sample.eye_data.reye_pos.at(0),sample.eye_data.reye_pos.at(1),sample.eye_data.reye_pos.at(2)); + sample.gaze_data.lgaze2d = mapToDisplay(leye_pose, sample.gaze_data.lgaze3d); + sample.gaze_data.rgaze2d = mapToDisplay(reye_pose, sample.gaze_data.rgaze3d); + float gaze_x = (sample.gaze_data.lgaze2d.x + sample.gaze_data.rgaze2d.x) / 2.0f; + float gaze_y = (sample.gaze_data.lgaze2d.y + sample.gaze_data.rgaze2d.y) / 2.0f; + sample.gaze_data.gaze2d.x = gaze_x; + sample.gaze_data.gaze2d.y = gaze_y; + } + } +} + +cv::Point2f InputHandler::mapToDisplay(Vec3f origin, Vec3f gaze_vec) { + Point2f gaze_on_screen; + // compute intersection + float gaze_len = (float)(monitor_normal_.dot(Mat(monitor_corners_[0]-origin))/monitor_normal_.dot(Mat(gaze_vec))); + Vec3f gaze_pos_cam = origin + gaze_len * gaze_vec; + + // convert to monitor coodinate system + Mat gaze_pos_ = monitor_R_.inv() * (Mat(gaze_pos_cam) - monitor_T_); + Vec3f gaze_pos_3d; + gaze_pos_.copyTo(gaze_pos_3d); + + gaze_on_screen.x = gaze_pos_3d.val[0] / monitor_W_; + gaze_on_screen.y = gaze_pos_3d.val[1] / monitor_H_; + + return gaze_on_screen; + +} + +} \ No newline at end of file diff --git a/src/normalizer.cpp b/src/normalizer.cpp new file mode 100644 index 0000000..1e9ce45 --- /dev/null +++ b/src/normalizer.cpp @@ -0,0 +1,184 @@ +#include "normalizer.hpp" + +using namespace cv; +using namespace std; + +namespace opengaze { + +Normalizer::Normalizer() { + // parameters for data normalization + focal_norm_ = 1600; + distance_norm_ = 1000; // 600 500 1000 + roiSize_norm_ = cv::Size(224, 224); // 224 448 + cam_norm_ = (Mat_(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f); +} + +Normalizer::~Normalizer() {} + +void Normalizer::setParameters(int focal_length, int distance, int img_w, int img_h){ + // parameters for data normalization + focal_norm_ = focal_length; + distance_norm_ = distance; // 600 500 1000 + roiSize_norm_ = cv::Size(img_w, img_h); // 224 448 + cam_norm_ = (Mat_(3,3) << focal_norm_, 0, roiSize_norm_.width/2, 0, focal_norm_, roiSize_norm_.height/2.0f, 0, 0, 1.0f); +} + +// convert vector from normalization space to camera coordinate system +cv::Mat Normalizer::cvtToCamera(cv::Point3f input, const Mat cnv_mat) { + // convert to the original camera coordinate system + Vec3f gaze_v(input.x, input.y, input.z); + // apply de-normalization + Mat gaze_v_cam = cnv_mat.inv() * Mat(gaze_v); + gaze_v_cam = gaze_v_cam / norm(gaze_v_cam); + + return gaze_v_cam; +} + +cv::Mat Normalizer::normalizeFace(Mat input_image, opengaze::Sample &sample) { + // get the face center in 3D space + Mat HR; + cv::Rodrigues(sample.face_patch_data.head_r, HR); + Mat HT = repeat(sample.face_patch_data.head_t, 1, 6); + Mat Fc; + add(HR*face_model_mat_, HT, Fc); + + float distance = (float)norm(sample.face_patch_data.face_center); // original distance + float z_scale = distance_norm_ / distance; // scaling factor + cv::Mat scaleMat; + scaleMat = (Mat_(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix + scaleMat.convertTo(scaleMat, CV_32F); + + // get the look_at matrix + Mat hRx = HR.col(0); + Mat forward = sample.face_patch_data.face_center /distance; + Mat down = forward.cross(hRx); + down = down / norm(down); + Mat right = down.cross(forward); + right = right / norm(right); + + // rotation matrix + sample.face_patch_data.face_rot = Mat(3, 3, CV_32F); + right.copyTo(sample.face_patch_data.face_rot.col(0)); + down.copyTo(sample.face_patch_data.face_rot.col(1)); + forward.copyTo(sample.face_patch_data.face_rot.col(2)); + sample.face_patch_data.face_rot = sample.face_patch_data.face_rot.t(); // there is no scaling + sample.face_patch_data.face_rot.convertTo(sample.face_patch_data.face_rot, CV_32F); + + Mat warpMat = cam_norm_ * (scaleMat * sample.face_patch_data.face_rot) * camera_matrix_.inv();// transformation matrix + // crop image and copy the equalized image + Mat face_patch; + warpPerspective(input_image, face_patch, warpMat, roiSize_norm_); + + return face_patch; +} + +vector Normalizer::normalizeEyes(cv::Mat input_image, Sample &sample){ + vector eye_images; + + Mat img_gray; + cvtColor(input_image, img_gray, CV_BGR2GRAY); + + Mat eye_center; + Mat* eye_rot; + for (int i=0; i<2; ++i) { + if (i==0){ + eye_center = sample.eye_data.leye_pos; + eye_rot = &sample.eye_data.leye_rot; + } + else { + eye_center = sample.eye_data.reye_pos; + eye_rot = &sample.eye_data.reye_rot; + } + + + float distance = (float)norm(eye_center); + float z_scale = distance_norm_ / distance; + + Mat scaleMat; + scaleMat = (Mat_(3,3) << 1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, z_scale);// scaling matrix + scaleMat.convertTo(scaleMat, CV_32F); + + // get the look_at matrix + Mat HR; + cv::Rodrigues(sample.face_patch_data.head_r, HR); + Mat hRx = HR.col(0); + Mat forward = eye_center/distance; + Mat down = forward.cross(hRx); + down = down / norm(down); + Mat right = down.cross(forward); + right = right / norm(right); + + // rotation matrix + *eye_rot = Mat(3, 3, CV_32F); + right.copyTo(eye_rot->col(0)); + down.copyTo(eye_rot->col(1)); + forward.copyTo(eye_rot->col(2)); + *eye_rot = eye_rot->t(); // there is no scaling + + Mat warpMat = cam_norm_ * (scaleMat * *eye_rot) * camera_matrix_.inv();// transformation matrix + // crop image and copy the equalized image + Mat eye_patch, eye_patch_equal; + warpPerspective(img_gray, eye_patch, warpMat, roiSize_norm_); + equalizeHist(eye_patch, eye_patch_equal); + eye_images.push_back(eye_patch_equal); + + } + eye_rot = nullptr; + return eye_images; +} + +void Normalizer::loadFaceModel(std::string path) { + string face_model_file_path = path + "/content/model/face_model.yml"; + // + cout << endl << "Loading 3D face model for head pose estimation from : " << face_model_file_path << endl; + FileStorage fs; + if (!fs.open(face_model_file_path, FileStorage::READ)) { + cout << "Cannot load the 3D face model!" << endl; + exit(EXIT_FAILURE); + } + fs["face_model"] >> face_model_mat_; + for(int p=0; p<6; ++p) + face_model_.emplace_back(Point3d(face_model_mat_.at(0,p), + face_model_mat_.at(1,p), + face_model_mat_.at(2,p))); + fs.release(); +} + +// estimate head pose via model fitting +void Normalizer::estimateHeadPose(const Point2f *landmarks, opengaze::Sample &sample) { + Mat zero_dist = Mat::zeros(1, 5, CV_64F); + vector landmarks_orig(landmarks, + landmarks + 6); + cv::Mat head_r, head_t; + camera_matrix_.convertTo(camera_matrix_, CV_64F); // input must be double type + solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, false, SOLVEPNP_DLS); + solvePnP(face_model_, landmarks_orig, camera_matrix_, zero_dist, head_r, head_t, true); + head_r.convertTo(sample.face_patch_data.head_r, CV_32F); + head_t.convertTo(sample.face_patch_data.head_t, CV_32F); + camera_matrix_.convertTo(camera_matrix_, CV_32F); + + // get the face center in 3D space + Mat HR; + cv::Rodrigues(sample.face_patch_data.head_r, HR); + Mat HT = repeat(sample.face_patch_data.head_t, 1, 6); + Mat Fc; + add(HR*face_model_mat_, HT, Fc); + Mat face_center = (Fc.col(0) + Fc.col(1) + Fc.col(2) + Fc.col(3) + Fc.col(4) + Fc.col(5)) / 6.0; // face center + face_center.copyTo(sample.face_patch_data.face_center); // copy to output + sample.face_patch_data.face_center.convertTo(sample.face_patch_data.face_center, CV_32F); + + Mat le = 0.5*(Fc.col(2) + Fc.col(3)); // left eye + le.copyTo(sample.eye_data.leye_pos); + sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F); + Mat re = 0.5*(Fc.col(0) + Fc.col(1)); // right eye + re.copyTo(sample.eye_data.reye_pos); + sample.eye_data.reye_pos.convertTo(sample.eye_data.reye_pos, CV_32F); + +} + +void Normalizer::setCameraMatrix(cv::Mat input) { + camera_matrix_ = input; + camera_matrix_.convertTo(camera_matrix_, CV_32F); +} + +} \ No newline at end of file diff --git a/src/opengaze.cpp b/src/opengaze.cpp new file mode 100644 index 0000000..a80b211 --- /dev/null +++ b/src/opengaze.cpp @@ -0,0 +1,502 @@ +#include "opengaze.hpp" + +#include +#include + +using namespace std; +using namespace cv; + +namespace opengaze { + +double clockToMilliseconds(clock_t ticks){ + // units/(units/time) => time (seconds) * 1000 = milliseconds + return (ticks/(double)CLOCKS_PER_SEC)*1000.0; +} + +OpenGaze::OpenGaze(int argc, char** argv){ + namespace fs = boost::filesystem; + namespace po = boost::program_options; + + // default value of parameters + camera_id_ = 0; + input_type_ = InputHandler::InputType::Camera; + is_face_model_ = true; + string gaze_method; + string gpu_id; + string temp; + int number_user; + fs::path calib_camera, calib_screen, cnn_param_path, cnn_model_path; + + // parse command line options for input/output paths + po::options_description command_line("Command line options"); + command_line.add_options() + ("root_dir,r", po::value(), "configuration file") + ("input_type,t", po::value(), "input type (camera, video file, directory)") + ("gaze_method,g", po::value(), "gaze estimation method, could be MPIIGaze or OpenFace") + ("input,i", po::value(), "parameter for input") + ("output,o", po::value(), "output directory") + ("calib_camera", po::value(), "camera calibration file") + ("calib_screen", po::value(), "camera-screen calibration file") + ("gpu_id,p", po::value(), "gpu id number, default is 0") + ("debug,d", "show debug output") + ("face_model,f", "to use face model or not") + ("save_video,s", "save output visualization or not") + ("number_user,n", "the maximum number of users in the input image") + ; + + cout << "Parsing command line options..." << endl; + po::variables_map vm_command; + po::store(po::parse_command_line(argc, argv, command_line), vm_command); + po::notify(vm_command); + + // parse config file for data paths + po::options_description config_file("Config file options"); + config_file.add_options() + ("root_dir,r", po::value(), "configuration file") + ("input_type, t", po::value(), "input type (camera, video file, directory)") + ("input, i", po::value(), "parameter for input") + ("output,o", po::value(), "output directory") + ("cnn_param_path", po::value(), "Caffe prototxt path") + ("cnn_model_path", po::value(), "Caffe model path") + ("calib_camera", po::value(), "camera calibration file") + ("calib_screen", po::value(), "camera-screen calibration file") + ("gaze_method", po::value(), "gaze estimation method, could be cnn or openface") + ("gpu_id,p", po::value(), "gpu id number, default is 0") + ("face_model", po::value(), "face model or not") + ("save_video", po::value(), "save output visualization or not") + ("number_user", po::value(), "the maximum number of users in the input image") + ; + + fs::path root_dir, config_path; + + if(vm_command.count("root_dir")) root_dir = vm_command["root_dir"].as(); + else { + root_dir = OPENGAZE_CON_DIR; + cout << "No root directory is found, default value " << root_dir << " will be use" << endl; + } + + config_path = root_dir / "default.cfg"; + cout << "Reading config from \"" << config_path.string() << "\""<< endl; + if(!fs::exists(config_path)){ + cout << "Config file does not exist" << endl; + exit(EXIT_FAILURE); + } + ifstream settings_file(config_path.string()); + po::variables_map vm_config; + po::store(po::parse_config_file(settings_file , config_file), vm_config); + po::notify(vm_config); + + if(vm_command.count("gpu_id")) gpu_id = vm_command["gpu_id"].as(); + else if (vm_config.count("gpu_id")) gpu_id = vm_config["gpu_id"].as(); + else gpu_id = "0"; + + // CNN paramters + if(vm_command.count("cnn_param_path")) cnn_param_path = vm_command["cnn_param_path"].as(); + else if (vm_config.count("cnn_param_path")) cnn_param_path = vm_config["cnn_param_path"].as(); + else cnn_param_path = root_dir / "content/caffeModel/alexnet_face.prototxt"; + + if(vm_command.count("cnn_model_path")) cnn_model_path = vm_command["cnn_model_path"].as(); + else if (vm_config.count("cnn_model_path")) cnn_model_path = vm_config["cnn_model_path"].as(); + else cnn_model_path = root_dir / "content/caffeModel/alexnet_face.caffemodel"; + + // check input requirements + if(vm_command.count("gaze_method")) gaze_method = vm_command["gaze_method"].as(); + else if (vm_config.count("gaze_method")) gaze_method = vm_config["gaze_method"].as(); + else gaze_method = "MPIIGaze"; + + if(vm_command.count("calib_screen")) calib_screen = vm_command["calib_screen"].as(); + else if (vm_config.count("calib_screen")) calib_screen = vm_config["calib_screen"].as(); + else calib_screen = root_dir / "content/calib/monitor_laptop.yml"; + + if(vm_command.count("calib_camera")) calib_camera = vm_command["calib_camera"].as(); + else if (vm_config.count("calib_camera")) calib_camera = vm_config["calib_camera"].as(); + else calib_camera = root_dir / "content/calib/calibration.yml"; + + // read calibration file + if(!fs::exists(calib_camera)){ + cout << "Camera calibration file does not exist: " << calib_camera <(); + else if (vm_config.count("input_type")) temp = vm_config["input_type"].as(); + else temp = ""; + if (temp == "camera") {input_type_ = InputHandler::InputType::Camera;} + else if (temp == "video") {input_type_ = InputHandler::InputType::Video;} + else if (temp == "directory") {input_type_ = InputHandler::InputType::Directory;} + else cout<<"No input type specified, default value (camera) will be use" << endl; + + if (vm_command.count("input")) temp = vm_command["input"].as(); + else if (vm_config.count("input")) temp = vm_config["input"].as(); + else temp = "0"; + + if (input_type_ == InputHandler::InputType::Camera) camera_id_ = stoi(temp); + else if (input_type_ == InputHandler::InputType::Video || input_type_ == InputHandler::InputType::Directory) input_dir_ = temp; + else cout<<"No input parameter specified, default value will be use" << endl; + + if(vm_command.count("face_model")) is_face_model_ = true; + else if(vm_config.count("face_model")) is_face_model_ = vm_config["face_model"].as(); + else is_face_model_ = true; + + if(vm_command.count("save_video")) is_save_video_ = true; + else if(vm_config.count("save_video")) is_save_video_ = vm_config["save_video"].as(); + else is_save_video_ = false; + + if(vm_command.count("debug")) show_debug_ = true; + else if(vm_config.count("debug")) show_debug_ = vm_config["debug"].as(); + else show_debug_ = false; + + if(vm_command.count("output")) output_dir_ = vm_command["output"].as(); + else if(vm_config.count("output")) output_dir_ = vm_config["output"].as(); + else { + if (input_type_ == InputHandler::InputType::Video) output_dir_ = input_dir_.parent_path(); + else if (input_type_ == InputHandler::InputType::Directory) output_dir_ = input_dir_.parent_path(); + else if (input_type_ == InputHandler::InputType::Camera) + output_dir_ = root_dir; + } + + string face_detector_root_path; + if(vm_command.count("openface_path")) face_detector_root_path = vm_command["openface_path"].as(); + else if(vm_config.count("openface_path")) face_detector_root_path = vm_config["openface_path"].as(); + else cout<< "No face detector root specified, default detector will be use" << endl; + + if(vm_command.count("per_model_save_path")) per_model_save_path_ = vm_command["per_model_save_path"].as(); + else if (vm_config.count("per_model_save_path")) per_model_save_path_ = vm_config["per_model_save_path"].as(); + else per_model_save_path_ = root_dir.string() + "/content/calib/user0.txt"; + + if(vm_command.count("number_user")) temp = vm_command["number_user"].as(); + else if (vm_config.count("number_user")) temp = vm_config["number_user"].as(); + else temp = "5"; + number_user = stoi(temp); + + // initial class instance + if (input_type_ == InputHandler::InputType::Camera){ // Camera as input + input_handler_.setInputType(InputHandler::InputType::Camera);// set input type + input_handler_.setInput(camera_id_); // set Camera id + } + else if (input_type_ == InputHandler::InputType::Video) { + input_handler_.setInputType(InputHandler::InputType::Video);// set input type + input_handler_.setInput(input_dir_.string()); // set camera file + } + else if (input_type_ == InputHandler::InputType::Directory){ + input_handler_.setInputType(InputHandler::InputType::Directory); + } + // initialize other classes + gaze_estimator_.setCameraParameters(input_handler_.camera_matrix_, input_handler_.camera_distortion_); + gaze_estimator_.setRootPath(root_dir.string()); + gaze_estimator_.initialFaceDetector(number_user); + + vector arguments; + if (gaze_method == "MPIIGaze") { + arguments.push_back(cnn_param_path.string()); + arguments.push_back(cnn_model_path.string()); + if (is_face_model_) + arguments.emplace_back("face"); + else + arguments.emplace_back("eye"); + arguments.push_back(gpu_id); + gaze_estimator_.setMethod(GazeEstimator::Method::MPIIGaze, arguments); + } + else if (gaze_method == "OpenFace"){ + //gaze_estimator_.setMethod(GazeEstimator::Method::OpenFace, arguments); + cout << "OpenFace gaze estimation is current not support" << endl; + exit(EXIT_FAILURE); + } + else { + cout << "The method setting is not right! Options are MPIIGaze or OpenFace!" << endl; + exit(EXIT_FAILURE); + } +} + +OpenGaze::~OpenGaze() { + input_handler_.closeInput(); +} + +// do gaze estimation with camera as input +void OpenGaze::runGazeVisualization() { + input_handler_.initialize(); + + namedWindow("Gaze"); + int key; + Mat input_image; + vector output; + + cv::VideoWriter m_writer; + if (is_save_video_){ + boost::filesystem::path save_video_file; + save_video_file = output_dir_ / (input_dir_.stem().string() + "_gaze_video.avi"); + m_writer.open(save_video_file.string(), CV_FOURCC('M','J','P','G'), 25, + Size(input_handler_.getFrameWidth(),input_handler_.getFrameHeight()), true); + cout << "Saving video to " << save_video_file << endl; + } + + // construct saving file + ofstream output_stream; + boost::filesystem::path output_file_name = output_dir_ / (input_dir_.stem().string() + "_gaze_output.txt"); + output_stream.open(output_file_name.string()); + cout << "Created output file: " << output_file_name.string() << endl; + + // for fps calculation + double fps_tracker = -1.0; + double t_start = 0; + double t_end = 0; + + unsigned int frame_count = 0; + + while(true){// loop all the sample or read frame from Video + frame_count++; + t_start = t_end; + output.clear(); + + input_image = input_handler_.getNextSample();// get input image + if(input_handler_.isReachEnd()){ // check if all sample are processed + cout<<"Processed all the samples."<(0) << ","; + output_stream << sample.face_patch_data.face_center.at(1) << ","; + output_stream << sample.face_patch_data.face_center.at(2) << ","; + output_stream << sample.gaze_data.gaze2d.x << ","; + output_stream << sample.gaze_data.gaze2d.y << ","; + output_stream << sample.eye_data.leye_pos.at(0) << ","; + output_stream << sample.eye_data.leye_pos.at(1) << ","; + output_stream << sample.eye_data.leye_pos.at(2) << ","; + output_stream << sample.eye_data.reye_pos.at(0) << ","; + output_stream << sample.eye_data.reye_pos.at(1) << ","; + output_stream << sample.eye_data.reye_pos.at(2) << endl; + } + + if (is_save_video_ || show_debug_) { + //////// visualization ////////////////////////////////////////////////// + // draw results + for(const auto & sample : output){ + //drawLandmarks(sample, undist_img); // draw face landmarks + drawGazeOnFace(sample, undist_img); // draw gaze ray on face image + //drawGazeOnSimScreen(sample, undist_img); // draw screen target + } + + if (show_debug_) { + // show fps + char fpsC[255]; + std::sprintf(fpsC, "%02f", fps_tracker); + string fpsSt("FPS: "); + fpsSt += fpsC; + cv::putText(undist_img, fpsSt, cv::Point(100, 100), CV_FONT_HERSHEY_SIMPLEX, 1, CV_RGB(255, 0, 0), 2); + // show the image + imshow("Gaze", undist_img); + key = cv::waitKey(1); + if (key==27) exit(EXIT_SUCCESS); // press ESC to exit + } + + if (is_save_video_) { + if (is_save_video_) + m_writer << undist_img; + } + } + + } + if (is_save_video_) + m_writer.release(); +} + +void OpenGaze::runDataExtraction() { + assert(input_handler_.getInputType() == InputHandler::InputType::Directory);// Here we just accept the directory folder + input_handler_.initialize(); + + vector output; + Mat input_image; + + while(true){// loop all the sample or read frame from Video + output.clear(); + + input_image = input_handler_.getNextSample();// get input image + if(input_handler_.isReachEnd()){ // check if all sample are processed + cout << "Processed all the samples." << endl; + break; + } + + Mat undist_img; + undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_); + gaze_estimator_.getImagePatch(undist_img, output); // extract the face image + + // save the output + for (int i=0; i output; + + cv::namedWindow("screen", CV_WINDOW_NORMAL); + cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + + show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3); + + while(true){// loop all the sample or read frame from Video + output.clear(); + + if(input_handler_.isReachEnd()){ // check if all sample are processed + cout<<"Processed all the samples."< pred, gt; // prediction and ground-truth + for (int i=0; i output; + input_image = input_handler_.getNextSample(); // get the sample when user clicking + undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_); + gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation + input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);// convert to 2D projection + m_calibrator.confirmClicking(); // give feedback to user that they successfully did calibration + pred.emplace_back(output[0].gaze_data.gaze2d); + gt.emplace_back(cv::Point2f((m_calibrator.getCurrentPoint().x/(float)input_handler_.getScreenWidth()), + (m_calibrator.getCurrentPoint().y/(float)input_handler_.getScreenHeight()))); + } + else + break; // if user press ESC button, we break + + } + if (pred.size() > 0){ + m_calibrator.generateModel(pred, gt, 1); // get the mapping model + string per_model_save_path_ = output_dir_.stem().string() + "/personal_gaze_model.yml"; + m_calibrator.saveModel(per_model_save_path_); + } +} + +void OpenGaze::drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image) { + static const int dW = 640; + static const int dH = 360; + Mat debug_disp = Mat::zeros(Size(dW, dH), CV_8UC3); + + Point2f g_s; + g_s.x = dW*sample.gaze_data.gaze2d.x; + g_s.y = dH*sample.gaze_data.gaze2d.y; + + circle(debug_disp, g_s, 10, CV_RGB(255,0,0), -1); + + debug_disp.copyTo(image(Rect(0, 0, dW, dH))); +} + +void OpenGaze::drawGazeOnFace(opengaze::Sample sample, cv::Mat &image) { + // draw gaze on the face + if (gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze + && gaze_estimator_.input_type_ == GazeEstimator::InputType::face) { + static const float gaze_length = 300.0; + Mat zero = Mat::zeros(1, 3, CV_32F); + Mat rvec, tvec; + sample.face_patch_data.head_r.convertTo(rvec, CV_32F); + sample.face_patch_data.head_t.convertTo(tvec, CV_32F); + + vector cam_points; + Vec3f face_center(sample.face_patch_data.face_center.at(0), sample.face_patch_data.face_center.at(1), sample.face_patch_data.face_center.at(2)); + cam_points.emplace_back(face_center); + cam_points.emplace_back(face_center + gaze_length * sample.gaze_data.gaze3d); + + vector img_points; + projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points); + + line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); // gaze ray + circle(image, img_points[0], 5, CV_RGB(255,0,0), -1); // staring point + circle(image, img_points[1], 5, CV_RGB(255,0,0), -1); // end point + } + else if ((gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze + && gaze_estimator_.input_type_ == GazeEstimator::InputType::eye) + || gaze_estimator_.method_type_ == GazeEstimator::Method::OpenFace) { + int gaze_length = 300; + Mat zero = Mat::zeros(1, 3, CV_32F); + vector cam_points; + sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F); + Vec3f leye_pose(sample.eye_data.leye_pos.at(0),sample.eye_data.leye_pos.at(1),sample.eye_data.leye_pos.at(2)); + cam_points.emplace_back(leye_pose); + cam_points.emplace_back(leye_pose + gaze_length*sample.gaze_data.lgaze3d); + Vec3f reye_pose(sample.eye_data.reye_pos.at(0),sample.eye_data.reye_pos.at(1),sample.eye_data.reye_pos.at(2)); + cam_points.emplace_back(reye_pose); + cam_points.emplace_back(reye_pose + gaze_length*sample.gaze_data.rgaze3d); + + vector img_points; + projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points); + + line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); + line(image, img_points[2], img_points[3], CV_RGB(255,0,0), 5); + circle(image, img_points[1], 3, CV_RGB(255,0,0), -1); + circle(image, img_points[3], 3, CV_RGB(255,0,0), -1); + } +} + +void OpenGaze::drawLandmarks(opengaze::Sample sample, cv::Mat &image) { + cv::Rect_ face_bb = sample.face_data.face_bb; + rectangle(image, cv::Point(face_bb.x, face_bb.y), + cv::Point(face_bb.x+face_bb.width,face_bb.y+face_bb.height), CV_RGB(0,255,0), 5); + for(int p=0; p<6; ++p) + circle(image, sample.face_data.landmarks[p], 5, CV_RGB(0,255,0), -1); +} + + + +} \ No newline at end of file diff --git a/src/personal_calibrator.cpp b/src/personal_calibrator.cpp new file mode 100644 index 0000000..dd5ab0d --- /dev/null +++ b/src/personal_calibrator.cpp @@ -0,0 +1,145 @@ + +#include "personal_calibrator.hpp" + +using namespace cv; +using namespace std; + +void CallBackFunc(int event, int x, int y, int flags, void* is_click) { + if (event == EVENT_LBUTTONDOWN){ + bool* temp = (bool*)is_click; + *temp = true; + } +} + +PersonalCalibrator::PersonalCalibrator (int screen_width, int screen_height) { + cv::namedWindow("calibration", CV_WINDOW_NORMAL); + cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + // set the mouse + is_click_ = false; + //set the callback function for any mouse event + setMouseCallback("calibration", CallBackFunc, &is_click_); // wait for clicking + + screen_width_ = screen_width; + screen_height_ = screen_height; + + center_radius_ = (int)((float)screen_width_ / 200.0f); +} + +PersonalCalibrator::~PersonalCalibrator() { + cv::setWindowProperty("calibration", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL); + cv::destroyWindow("calibration"); +} + +void PersonalCalibrator::generatePoints(int num_points) { + index_point_ = -1; + srand(time(NULL)); + Point2i current_point; + + for (int num = 0; num < num_points; ++num) { + current_point.x = (rand() % screen_width_); // range is [0, 1] + current_point.y = (rand() % screen_height_); // range is [0, 1] + points_.emplace_back(current_point); + } +} + +void PersonalCalibrator::initialWindow() { + // get the focus of the window + namedWindow("GetFocus", CV_WINDOW_NORMAL); + cv::Mat img = cv::Mat::zeros(100, 100, CV_8UC3); + cv::imshow("GetFocus", img); + cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); + waitKey(1); + cv::setWindowProperty("GetFocus", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL); + cv::destroyWindow("GetFocus"); + + // show instruction + cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3); + string show_text = "Please click/touch when looking at the dots"; + cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(255,255,255), 2); + imshow("calibration", show_img); + cv::waitKey(3000); + for (int i=255; i > 0; i-=5) { //Gradient disappear, nice! + show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3); + cv::putText(show_img, show_text, cv::Point(400,600), FONT_HERSHEY_COMPLEX_SMALL, 2, cv::Scalar(i,i,i), 2); + imshow("calibration", show_img); + cv::waitKey(1); + } +} + +bool PersonalCalibrator::showNextPoint() { + cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3); + index_point_ ++; + cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(255, 255, 255), -1); + is_click_ = false; + + while (true) { + imshow("calibration", show_img); + int key = cv::waitKey(10); // wait for interaction + if (key == 27) // if press the ESC key + return false; + if (is_click_) { + break; + } + } + return true; +} + +void PersonalCalibrator::confirmClicking() { + cv::Mat show_img = cv::Mat::zeros(screen_height_, screen_width_, CV_8UC3); + cv::circle(show_img, cv::Point(points_[index_point_].x, points_[index_point_].y), center_radius_, cv::Scalar(0, 200, 0), -1); + imshow("calibration", show_img); + cv::waitKey(500); +} + +// this polyfit function is copied from opencv/modules/contrib/src/polyfit.cpp +// This original code was written by +// Onkar Raut +// Graduate Student, +// University of North Carolina at Charlotte +cv::Mat polyfit(const Mat& src_x, const Mat& src_y, int order) +{ + CV_Assert((src_x.rows>0)&&(src_y.rows>0)&&(src_x.cols>0)&&(src_y.cols>0)&&(order>=1)); + Mat matrix; + Mat bias = Mat::ones((int)src_x.rows, 1, CV_32FC1); + + Mat input_x = Mat::zeros(src_x.rows, order*src_x.cols, CV_32FC1); + + Mat copy; + for(int i=1; i<=order;i++){ + copy = src_x.clone(); + pow(copy,i,copy); + copy.copyTo(input_x(Rect((i-1)*src_x.cols, 0, copy.cols, copy.rows))); + } + + Mat new_mat; + cv::hconcat(input_x, bias, new_mat); + cout << "new_mat: " << new_mat << endl; + cv::solve(new_mat, src_y, matrix, DECOMP_NORMAL); + + cout << "model_matrix: " << matrix << endl; + Mat calibrated = new_mat * matrix; + cout << "calibrated: " << calibrated << endl; + double dist_original = norm(src_x, src_y, NORM_L2); + cout << "dist_original: " << dist_original << endl; + double dist_calibrated = norm(calibrated, src_y, NORM_L2); + cout << "dist_calibrated: " << dist_calibrated << endl; + + return matrix; + +} + +void PersonalCalibrator::generateModel(vector prediction, vector ground_truth, int order) { + + cv::Mat input_x = cv::Mat((int)prediction.size(), 2, CV_32FC1, prediction.data()); + cv::Mat input_y = cv::Mat((int)ground_truth.size(), 2, CV_32FC1, ground_truth.data()); + cout << "input_x: " << input_x << endl; + cout << "input_y: " << input_y << endl; + cv::Mat model_matrix; + model_matrix_ = polyfit(input_x, input_y, order); +} + +void PersonalCalibrator::saveModel(std::string file_path) { + cv::FileStorage storage(file_path, cv::FileStorage::WRITE); + storage << model_matrix_; + storage.release(); +} \ No newline at end of file