initial commit

2019-01-10 13:26:03 +01:00 · 2019-01-10 13:26:03 +01:00 · e505acdb29
commit e505acdb29
41 changed files with 2922 additions and 0 deletions
--- a/include/data.hpp
+++ b/include/data.hpp
@ -0,0 +1,88 @@
+#ifndef DATA_HPP
+#define DATA_HPP
+
+#include <opencv2/opencv.hpp>
+
+namespace opengaze{
+
+/**
+ * face and facial landmark detection data
+ * @param face_id personal id from tracking across frames
+ * @param certainty detection score, 1 is the best, -1 is the worst
+ * @param landmarks detected six facial landmarks as four eye corners and two mouth corners
+ * @param face_bb detected face bounding box
+ */
+struct FaceData
+{
+    unsigned long face_id;
+    double certainty;
+    cv::Point2f landmarks[6];
+    cv::Rect_<int> face_bb;
+};
+/**
+ * eye image related data
+ * @param leye_pos/reye_pose 3D eyeball center position for left and right eyes in the original camera coordinate system
+ * @param leye_img/reye_img eye image
+ * @param leye_rot/reye_rot rotation matrix during the data normalization procedure
+ */
+struct EyeData
+{
+    // cv::Mat head_r, head_t; 
+    cv::Mat leye_pos, reye_pos; // 
+
+    // normalized eyes
+    cv::Mat leye_img, reye_img;
+    cv::Mat leye_rot, reye_rot;
+};
+/**
+ * face patch data related to data normalization
+ * @param head_r head pose as center of the face
+ * @param head_t head translation as center of the face
+ * @param face_rot rotation matrix during the data normalization procedure
+ * @param face_center 3D face center in the original camera coordinate system
+ * @param debug_img use for debug to show the normalized face image
+ * @param face_patch normalized face image
+ */
+struct FacePatchData
+{
+    cv::Mat head_r, head_t;
+    cv::Mat face_rot;
+    cv::Mat face_center;
+    cv::Mat debug_img;
+    cv::Mat face_patch;
+};
+/**
+ * gaze data
+ * @param lgaze3d/lgaze3d gaze directions of left and right eyes in the camera coordinate system
+ * @param gaze3d gaze direction estimated from face patch in the in the camera coordinate system
+ * @param lgaze2d/rgaze2d projected gaze positions on the screen coordinate from left and right eyes
+ * @param gaze2d projected gaze positions from face patch on the screen coordinate
+ */
+struct GazeData
+{
+    cv::Vec3f lgaze3d, rgaze3d;
+    cv::Vec3f gaze3d;
+    cv::Point2f lgaze2d, rgaze2d;
+    cv::Point2f gaze2d;
+};
+/**
+ * The general output data structure
+ * @param face_data store face and facial landmark detection data
+ * @param eye_data store data related to eye image input
+ * @param face_patch_data normalized face path data
+ * @param gaze_data gaze data in 2D and 3D spaces
+ */
+struct Sample
+{
+    FaceData face_data;
+    EyeData eye_data;
+    FacePatchData face_patch_data;
+    GazeData gaze_data;
+};
+
+}
+
+
+
+
+#endif //DATA_HPP
--- a/include/face_detector.hpp
+++ b/include/face_detector.hpp
@ -0,0 +1,72 @@
+#ifndef FACE_DETECTOR_HPP
+#define FACE_DETECTOR_HPP
+
+#include <iostream>
+#include <vector>
+#include <string>
+#include <opencv2/opencv.hpp>
+
+#if USE_DLIB
+// if we use dlib
+#include <dlib/opencv.h>
+#include <dlib/image_processing/frontal_face_detector.h>
+#include <dlib/image_processing/render_face_detections.h>
+#include <dlib/image_processing.h>
+#include <dlib/gui_widgets.h>
+#include <dlib/image_io.h>
+#endif
+
+
+#include "data.hpp"
+
+namespace opengaze{
+
+class FaceDetector {
+public:
+    FaceDetector();
+    ~FaceDetector();
+
+    /**
+     * face and facial landmark detection selection
+     * The current implementation is only OpenFace. OpenFace use dlib for face detection
+     */
+    enum Method{OpenFace, OpenCV, Dlib};
+
+    /**
+     * main function to detect and track face and facial landmarks
+     * @param input_img input image
+     * @param output output data structure
+     */
+    void track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output);
+
+    void reset();
+    void setMethodType(Method method_type) {method_type_ = method_type;}
+    Method getMethodType() {return method_type_;}
+    void initialize(int number_users);
+
+private:
+    Method method_type_;
+
+    #if USE_DLIB
+    dlib::frontal_face_detector dlib_detector_;
+    dlib::shape_predictor dlib_sp_;
+    #endif
+
+    // parameters for OpenFace
+    std::vector<bool> active_models_;
+    unsigned long num_faces_max_;
+    int detection_skip_frames_, tracking_loss_limit_;
+    float detection_resize_rate_;
+    float nonoverlap_threshold_;
+    double certainty_threshold_;
+    int landmark_indices_[6];
+    int frame_counter_;
+    unsigned long current_face_id_;
+    std::vector<unsigned long> face_ids_;
+};
+}
+
+
+
+
+#endif //FACE_DETECTOR_HPP
--- a/include/gaze_estimator.hpp
+++ b/include/gaze_estimator.hpp
@ -0,0 +1,65 @@
+#ifndef GAZE_ESTIMATOR_HPP
+#define GAZE_ESTIMATOR_HPP
+
+#include <opencv2/opencv.hpp>
+
+#include "data.hpp"
+#include "face_detector.hpp"
+#include "normalizer.hpp"
+#include "gaze_predictor.hpp"
+
+namespace opengaze{
+
+class GazeEstimator {
+public:
+    GazeEstimator();
+    ~GazeEstimator();
+
+    /**
+     * On the current implementation, we only has the "MPIIGaze" method which uses the input face/eye image
+     * and output gaze direction directly. It is an appearance-based method. The "OpenFace" can also output
+     * the gaze vector according to the pupil detection results. However, "OpenFace" implementation is not 
+     * included inside our OpenGaze toolkit yet.
+     */
+    enum Method{MPIIGaze, OpenFace};
+    /**
+     * for the "MPIIGaze" method, the input image can be face or eye. The full-face patch model can output
+     * more accurate gaze prediction than the eye image model, while the eye image base model is much faster.
+     */
+    enum InputType{face, eye};
+
+    /**
+     * the main function to estimate the gaze. 
+     * It performs the face and facial landmarks detection, head pose estimation and then gaze prediction.
+     * @param input_image input scene image
+     * @param output  data structure for output
+     */
+    void estimateGaze(cv::Mat input_image, std::vector<opengaze::Sample> &output);
+    void getImagePatch(cv::Mat input_image, std::vector<opengaze::Sample> &outputs);
+    void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist);
+    void setRootPath(std::string root_path);
+    void setMethod(Method, std::vector<std::string> arguments);
+    void initialFaceDetector(int number_users);
+
+    Method method_type_;
+    InputType input_type_; // the input type
+
+private:
+    // class instances
+    FaceDetector face_detector_;
+    Normalizer normalizer_;
+    GazePredictor gaze_predictor_;
+    // camera intrinsic matrix
+    cv::Mat camera_matrix_;
+    // camera distortion matrix
+    cv::Mat camera_dist_;
+    // the root pat is used for load configuration file and models
+    std::string root_path_;
+};
+
+}
+
+
+
+
+#endif //GAZE_ESTIMATOR_HPP
--- a/include/gaze_predictor.hpp
+++ b/include/gaze_predictor.hpp
@ -0,0 +1,29 @@
+#ifndef GAZE_PREDICTOR_HPP
+#define GAZE_PREDICTOR_HPP
+
+#include <opencv2/opencv.hpp>
+#include "data.hpp"
+#include "face_detector.hpp"
+
+
+namespace opengaze{
+
+class GazePredictor {
+
+public:
+    GazePredictor();
+    ~GazePredictor();
+
+    void initiaMPIIGaze(std::vector<std::string> arguments);
+    cv::Point3f predictGazeMPIIGaze(cv::Mat face_patch);
+
+private:
+    int model_type_;
+    bool is_extract_feature;
+};
+
+}
+
+
+
+#endif //GAZE_PREDICTOR_HPP
--- a/include/input_handler.hpp
+++ b/include/input_handler.hpp
@ -0,0 +1,125 @@
+#ifndef INPUT_HANDLER_HPP
+#define INPUT_HANDLER_HPP
+
+#include <opencv2/opencv.hpp>
+#include <vector>
+#include <iostream>
+#include <boost/filesystem.hpp>
+#include "data.hpp"
+
+namespace opengaze {
+
+class InputHandler {
+public:
+    enum InputType {Camera, Video, Image, Directory, Memory};
+
+    InputHandler();
+    ~InputHandler();
+
+    /**
+     * get the camera intrisic parameters
+     * @param camera_matrix camera instric matrix
+     * @param camera_dist caemra distortion matrix
+     */
+    void setCameraParameters(cv::Mat camera_matrix, cv::Mat camera_dist){
+        camera_matrix_ = std::move(camera_matrix);
+        camera_distortion_ = std::move(camera_dist);
+    }
+    
+    /**
+     * function to return next sample, could come from any input source
+     * @return next sample
+     */
+    cv::Mat getNextSample();
+
+    /**
+     * set the input type
+     * @param type the input typ, could be found in InputType defination
+     */
+    void setInputType(InputType type){input_type_ = type;}
+
+    /**
+     * set the input 
+     *  according the input type, here the input value are different. 
+     * For the type "Camera", this input value indicates the camera id
+     * For the type "video", this input value is the video file name
+     * For input type "Directory", this input value is the directory path
+     */
+    void setInput(int camera_id) {camera_id_ = camera_id;}
+    void setInput(std::vector<cv::Mat> images) {images_ = std::move(images);}
+    void setInput(std::string input_path);
+    
+    /**
+     * read the parameters related to the screen
+     * @param calib_file file for the configuration 
+     */
+    void readScreenConfiguration(std::string calib_file);
+    /**
+     * read the camera instrinic parameters from the configuration file
+     * @param calib_file file for the configuration 
+     */
+    void readCameraConfiguration(std::string calib_file);
+
+    /**
+     * When the 3D gaze vector is achieved, there is a need to project the gaze on the 2D screen.
+     * This function also needs the input to indicate if use the full-face model or not, 
+     * since the initial of gaze vector will be center of the face for the full-face models 
+     * and eye center for the eye-based models.
+     * @param input input data contains the 3D gaze vector
+     * @param is_face_model a boolen value indicates if the gaze vectors is from face model or eye model 
+     */
+    void projectToDisplay(std::vector<opengaze::Sample> &input, bool is_face_model=true);
+
+    int getFrameHeight(){return cap_.get(cv::CAP_PROP_FRAME_HEIGHT);}
+    int getFrameWidth(){return cap_.get(cv::CAP_PROP_FRAME_WIDTH);}
+    InputType getInputType() {return input_type_;}
+    int getScreenWidth() {return screen_width_;}
+    int getScreenHeight() {return screen_height_;}
+    std::string getFileName() {return current_file_name_;}
+    
+    cv::Point2f mapToDisplay(cv::Vec3f obj_center, cv::Vec3f gaze_point);
+
+    void initialize();
+    bool closeInput();
+    void getScreenResolution(int &width, int &height);
+    
+    cv::Mat getCameraMatrix() { return camera_matrix_;}
+    cv::Mat getCameraDistortion() {return camera_distortion_;}
+    void setFrameSize(int frame_width, int frame_height);
+
+    bool isReachEnd() {return is_reach_end_;}
+
+    cv::Mat camera_matrix_;
+    cv::Mat camera_distortion_;
+
+private:
+
+    // indicator if we reach the end of sample stream
+    bool is_reach_end_;
+
+    int camera_id_;
+    int sample_height_, sample_width_;
+    std::vector<cv::Mat> images_;
+    std::string input_path_;
+    std::string input_file_video_name_;
+    int screen_width_, screen_height_;
+
+    // monitor
+    float monitor_W_, monitor_H_; // monitor width and height in mm
+    cv::Mat monitor_R_, monitor_T_;
+    cv::Vec3f monitor_corners_[4];
+    cv::Mat monitor_normal_;
+
+    // input variable
+    InputType input_type_;
+    cv::VideoCapture cap_;
+    std::string current_file_name_;
+
+    // variable for directory input
+    boost::filesystem::directory_iterator current_itr_;
+
+};
+
+}
+
+#endif //INPUT_HANDLER_HPP
--- a/include/normalizer.hpp
+++ b/include/normalizer.hpp
@ -0,0 +1,42 @@
+#ifndef NORMALIZER_HPP
+#define NORMALIZER_HPP
+
+#include <opencv2/opencv.hpp>
+#include "data.hpp"
+
+namespace opengaze{
+class Normalizer {
+
+public:
+    Normalizer();
+    ~Normalizer();
+
+    void estimateHeadPose(const cv::Point2f *landmarks, opengaze::Sample &sample);
+
+    void setCameraMatrix(cv::Mat input);
+
+    void loadFaceModel(std::string path);
+
+    void setParameters(int focal_length, int distance, int img_w, int img_h);
+
+    cv::Mat normalizeFace(cv::Mat input_image, Sample &sample);
+
+    std::vector<cv::Mat> normalizeEyes(cv::Mat input_image, Sample &sample);
+
+    cv::Mat cvtToCamera(cv::Point3f input, const cv::Mat cnv_mat);
+
+private:
+    cv::Mat camera_matrix_;
+    std::vector<cv::Point3f> face_model_;
+    cv::Mat face_model_mat_, cam_norm_;
+    float focal_norm_, distance_norm_;
+    cv::Size roiSize_norm_;
+};
+
+
+}
+
+
+
+
+#endif //NORMALIZER_HPP
--- a/include/opengaze.hpp
+++ b/include/opengaze.hpp
@ -0,0 +1,80 @@
+#ifndef OPEN_GAZE_H
+#define OPEN_GAZE_H
+
+#include <string>
+#include <vector>
+#include <boost/program_options.hpp>
+#include <boost/filesystem.hpp>
+
+#include <opencv2/opencv.hpp>
+
+#include "input_handler.hpp"
+#include "gaze_estimator.hpp"
+#include "data.hpp"
+#include "personal_calibrator.hpp"
+
+namespace opengaze {
+
+class OpenGaze {
+public:
+    explicit OpenGaze(int argc, char** argv); //read configuration file
+    ~OpenGaze();
+
+    // main function to estimate and show the gaze vector drawn on the input face image.
+    void runGazeVisualization(); 
+
+    /**
+     * main function to run personal calibration.
+     * @param num_calibration_point the numbers of points for calibration.
+     */
+    void runPersonalCalibration(int num_calibration_point=5);
+
+    // main function to estimate and draw gaze point on the screen.
+    void runGazeOnScreen();
+
+    // main function to extract the face image from input image. The face image can then 
+    // be used to train a custom gaze estimation model
+    void runDataExtraction();
+
+private:
+    // visualization
+    /**
+     * function to draw the gaze vector on the input face image.
+     * @param sample the input data includes the gaze vector, head pose etc.
+     * @param image the input image contains the face. This function will draw the gaze vector on this input image
+     */
+    void drawGazeOnFace(opengaze::Sample sample, cv::Mat &image);
+    // draw the detected facial landmark
+    void drawLandmarks(opengaze::Sample sample, cv::Mat &image);
+    // draw the estimated gaze on the top left corner of the input image 
+    // to show the relative position on the screen. In this case, 
+    //the user can see both the input image and the projected gaze target on the screen. 
+    //This function is mainly used for debugging.
+    void drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image);
+    // estimate and draw gaze point on the screen
+    void drawGazeOnScreen(opengaze::Sample sample, cv::Mat &image);
+
+    // show debug mode will show the gaze draw on the face
+    bool show_debug_;
+
+    //class instances
+    InputHandler input_handler_;
+    GazeEstimator gaze_estimator_;
+
+    // input camera id
+    int camera_id_;
+    // temporary variables to store the input path, output path, input type
+    boost::filesystem::path input_dir_;
+    InputHandler::InputType input_type_;
+    boost::filesystem::path output_dir_;
+
+    bool is_face_model_;
+    bool is_save_video_;
+    
+    // path to save the personal calibration model
+    std::string per_model_save_path_;
+};
+
+}
+
+#endif //OPEN_GAZE_H
--- a/include/personal_calibrator.hpp
+++ b/include/personal_calibrator.hpp
@ -0,0 +1,64 @@
+#ifndef PERSONAL_CALIBRATOR_HPP
+#define PERSONAL_CALIBRATOR_HPP
+
+#include <string>
+#include <vector>
+#include <opencv2/opencv.hpp>
+
+class PersonalCalibrator {
+
+public:
+    PersonalCalibrator(int screen_width, int screen_height);
+    ~PersonalCalibrator();
+    /**
+     * generate the random locations for calibration
+     * @param num_points number of points to generate
+     */
+    void generatePoints(int num_points);
+    // get the show window ready, it should be full-screen
+    void initialWindow();
+    // show the next calibration point
+    bool showNextPoint();
+    // wait for 0.5 second to receive the confirmation (mouse click) from user
+    void confirmClicking();
+    /**
+     * generate a polynomial function for the personal calibration
+     * @param prediction prediction from gaze estimation method
+     * @param ground_truth calibration points locations on the screen
+     * @param order the order of polynomial function, 1 means the linear
+     */
+    void generateModel(std::vector<cv::Point2f> prediction, std::vector<cv::Point2f> ground_truth, int order=1);
+    /**
+     * save the personal calibration model
+     * @param file_path path to save the model
+     */
+    void saveModel(std::string file_path);
+    /**
+     * load the personal calibration model
+     * @param file_path path to load the model
+     */
+    void loadModel(std::string file_path);
+    /**
+     * return current calibration point location on the screen
+     * @return location on the screen
+     */
+    cv::Point2f getCurrentPoint() {return points_[index_point_];}
+    // function to calculate the polynomial function
+    void calibratePolynomial();
+
+private:
+    // indicator if the user click the mouse or not
+    bool is_click_;
+    // number of points for personal calibration
+    int num_points_;
+    // index for the current calibration points
+    int index_point_;
+    // vector to store the generated calibration points
+    std::vector<cv::Point2i> points_;
+    int screen_width_, screen_height_, center_radius_; // monitor width and height in pixel
+    // personal model
+    cv::Mat model_matrix_;
+};
+
+
+#endif //PERSONAL_CALIBRATOR_HPP