opengaze/src/opengaze.cpp

#include "opengaze.hpp"

#include <iostream>
#include <time.h>

using namespace std;
using namespace cv;

namespace opengaze {

double clockToMilliseconds(clock_t ticks){
    // units/(units/time) => time (seconds) * 1000 = milliseconds
    return (ticks/(double)CLOCKS_PER_SEC)*1000.0;
}

OpenGaze::OpenGaze(int argc, char** argv){
    namespace fs = boost::filesystem;
    namespace po = boost::program_options;

    // default value of parameters
    camera_id_ = 0;
    input_type_ = InputHandler::InputType::Camera;
    is_face_model_ = true;
    string gaze_method;
    string gpu_id;
    string temp;
    int number_user;
    fs::path calib_camera, calib_screen, cnn_param_path, cnn_model_path;

    // parse command line options for input/output paths
    po::options_description command_line("Command line options");
    command_line.add_options()
            ("root_dir,r", po::value<string>(), "configuration file")
            ("input_type,t", po::value<string>(), "input type (camera, video file, directory)")
            ("gaze_method,g", po::value<string>(), "gaze estimation method, could be MPIIGaze or OpenFace")
            ("input,i", po::value<string>(), "parameter for input")
            ("output,o", po::value<string>(), "output directory")
            ("calib_camera", po::value<string>(), "camera calibration file")
            ("calib_screen", po::value<string>(), "camera-screen calibration file")
            ("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
            ("debug,d", "show debug output")
            ("face_model,f", "to use face model or not")
            ("save_video,s", "save output visualization or not")
            ("number_user,n", "the maximum number of users in the input image")
            ;

    cout << "Parsing command line options..." << endl;
    po::variables_map vm_command;
    po::store(po::parse_command_line(argc, argv, command_line), vm_command);
    po::notify(vm_command);

    // parse config file for data paths
    po::options_description config_file("Config file options");
    config_file.add_options()
            ("root_dir,r", po::value<string>(), "configuration file")
            ("input_type, t", po::value<string>(), "input type (camera, video file, directory)")
            ("input, i", po::value<string>(), "parameter for input")
            ("output,o", po::value<string>(), "output directory")
            ("cnn_param_path", po::value<string>(), "Caffe prototxt path")
            ("cnn_model_path", po::value<string>(), "Caffe model path")
            ("calib_camera", po::value<string>(), "camera calibration file")
            ("calib_screen", po::value<string>(), "camera-screen calibration file")
            ("gaze_method", po::value<string>(), "gaze estimation method, could be cnn or openface")
            ("gpu_id,p", po::value<string>(), "gpu id number, default is 0")
            ("face_model", po::value<bool>(), "face model or not")
            ("save_video", po::value<bool>(), "save output visualization or not")
            ("number_user", po::value<string>(), "the maximum number of users in the input image")
            ;

    fs::path root_dir, config_path;

    if(vm_command.count("root_dir")) root_dir = vm_command["root_dir"].as<string>();
    else {
        root_dir = OPENGAZE_CON_DIR;
        cout << "No root directory is found, default value " << root_dir << " will be use" << endl;
    }

    config_path = root_dir / "default.cfg";
    cout << "Reading config from \"" << config_path.string() << "\""<< endl;
    if(!fs::exists(config_path)){
        cout << "Config file does not exist" << endl;
        exit(EXIT_FAILURE);
    }
    ifstream settings_file(config_path.string());
    po::variables_map vm_config;
    po::store(po::parse_config_file(settings_file , config_file), vm_config);
    po::notify(vm_config);

    if(vm_command.count("gpu_id")) gpu_id = vm_command["gpu_id"].as<string>();
    else if (vm_config.count("gpu_id")) gpu_id = vm_config["gpu_id"].as<string>();
    else gpu_id = "0";

    // CNN paramters
    if(vm_command.count("cnn_param_path")) cnn_param_path = vm_command["cnn_param_path"].as<string>();
    else if (vm_config.count("cnn_param_path")) cnn_param_path = vm_config["cnn_param_path"].as<string>();
    else cnn_param_path = root_dir / "content/caffeModel/alexnet_face.prototxt";

    if(vm_command.count("cnn_model_path")) cnn_model_path = vm_command["cnn_model_path"].as<string>();
    else if (vm_config.count("cnn_model_path")) cnn_model_path = vm_config["cnn_model_path"].as<string>();
    else cnn_model_path = root_dir / "content/caffeModel/alexnet_face.caffemodel";

    // check input requirements
    if(vm_command.count("gaze_method")) gaze_method = vm_command["gaze_method"].as<string>();
    else if (vm_config.count("gaze_method")) gaze_method = vm_config["gaze_method"].as<string>();
    else gaze_method = "MPIIGaze";

    if(vm_command.count("calib_screen")) calib_screen = vm_command["calib_screen"].as<string>();
    else if (vm_config.count("calib_screen")) calib_screen = vm_config["calib_screen"].as<string>();
    else calib_screen = root_dir / "content/calib/monitor_laptop.yml";

    if(vm_command.count("calib_camera")) calib_camera = vm_command["calib_camera"].as<string>();
    else if (vm_config.count("calib_camera")) calib_camera = vm_config["calib_camera"].as<string>();
    else calib_camera = root_dir / "content/calib/calibration.yml";

    // read calibration file
    if(!fs::exists(calib_camera)){
        cout << "Camera calibration file does not exist: " << calib_camera <<endl;
        exit(EXIT_FAILURE);
    }
    else input_handler_.readCameraConfiguration(calib_camera.string());

    if(!fs::exists(calib_screen)){
        cout << "Camera-screen calibration file does not exist: " << calib_screen << endl;
        exit(EXIT_FAILURE);
    }
    else input_handler_.readScreenConfiguration(calib_screen.string());

    if(vm_command.count("input_type")) temp = vm_command["input_type"].as<string>();
    else if (vm_config.count("input_type")) temp = vm_config["input_type"].as<string>();
    else temp = "";
    if (temp == "camera") {input_type_ = InputHandler::InputType::Camera;}
    else if (temp == "video") {input_type_ = InputHandler::InputType::Video;}
    else if (temp == "directory") {input_type_ = InputHandler::InputType::Directory;}
    else cout<<"No input type specified, default value (camera) will be use" << endl;

    if (vm_command.count("input")) temp = vm_command["input"].as<string>();
    else if (vm_config.count("input")) temp = vm_config["input"].as<string>();
    else temp = "0";

    if (input_type_ == InputHandler::InputType::Camera) camera_id_ = stoi(temp);
    else if (input_type_ == InputHandler::InputType::Video || input_type_ == InputHandler::InputType::Directory)  input_dir_ = temp;
    else cout<<"No input parameter specified, default value will be use" << endl;

    if(vm_command.count("face_model")) is_face_model_ = true;
    else if(vm_config.count("face_model")) is_face_model_ = vm_config["face_model"].as<bool>();
    else is_face_model_ = true;

    if(vm_command.count("save_video")) is_save_video_ = true;
    else if(vm_config.count("save_video")) is_save_video_ = vm_config["save_video"].as<bool>();
    else is_save_video_ = false;

    if(vm_command.count("debug")) show_debug_ = true;
    else if(vm_config.count("debug")) show_debug_ = vm_config["debug"].as<bool>();
    else show_debug_ = false;

    if(vm_command.count("output")) output_dir_ = vm_command["output"].as<string>();
    else if(vm_config.count("output")) output_dir_ = vm_config["output"].as<string>();
    else {
        if (input_type_ == InputHandler::InputType::Video) output_dir_ = input_dir_.parent_path();
        else if (input_type_ == InputHandler::InputType::Directory) output_dir_ = input_dir_.parent_path();
        else if (input_type_ == InputHandler::InputType::Camera)
            output_dir_ = root_dir;
    }

    string face_detector_root_path;
    if(vm_command.count("openface_path")) face_detector_root_path = vm_command["openface_path"].as<string>();
    else if(vm_config.count("openface_path")) face_detector_root_path = vm_config["openface_path"].as<string>();
    else cout<< "No face detector root specified, default detector will be use" << endl;

    if(vm_command.count("per_model_save_path")) per_model_save_path_ = vm_command["per_model_save_path"].as<string>();
    else if (vm_config.count("per_model_save_path")) per_model_save_path_ = vm_config["per_model_save_path"].as<string>();
    else per_model_save_path_ = root_dir.string() + "/content/calib/user0.txt";

    if(vm_command.count("number_user")) temp = vm_command["number_user"].as<string>();
    else if (vm_config.count("number_user")) temp = vm_config["number_user"].as<string>();
    else temp = "5";
    number_user = stoi(temp);

    // initial class instance
    if (input_type_ == InputHandler::InputType::Camera){ // Camera as input
        input_handler_.setInputType(InputHandler::InputType::Camera);// set input type
        input_handler_.setInput(camera_id_); // set Camera id
    }
    else if (input_type_ == InputHandler::InputType::Video) {
        input_handler_.setInputType(InputHandler::InputType::Video);// set input type
        input_handler_.setInput(input_dir_.string()); // set camera file
    }
    else if (input_type_ == InputHandler::InputType::Directory){
        input_handler_.setInputType(InputHandler::InputType::Directory);
    }
    // initialize other classes
    gaze_estimator_.setCameraParameters(input_handler_.camera_matrix_, input_handler_.camera_distortion_);
    gaze_estimator_.setRootPath(root_dir.string());
    gaze_estimator_.initialFaceDetector(number_user);

    vector<std::string> arguments;
    if (gaze_method == "MPIIGaze") {
        arguments.push_back(cnn_param_path.string());
        arguments.push_back(cnn_model_path.string());
        if (is_face_model_)
            arguments.emplace_back("face");
        else
            arguments.emplace_back("eye");
        arguments.push_back(gpu_id);
        gaze_estimator_.setMethod(GazeEstimator::Method::MPIIGaze, arguments);
    }
    else if (gaze_method == "OpenFace"){
        //gaze_estimator_.setMethod(GazeEstimator::Method::OpenFace, arguments);
        cout << "OpenFace gaze estimation is current not support" << endl;
        exit(EXIT_FAILURE);
    }
    else {
        cout << "The method setting is not right! Options are MPIIGaze or OpenFace!" << endl;
        exit(EXIT_FAILURE);
    }
}

OpenGaze::~OpenGaze() {
    input_handler_.closeInput();
}

// do gaze estimation with camera as input
void OpenGaze::runGazeVisualization() {
    input_handler_.initialize();

    namedWindow("Gaze");
    int key;
    Mat input_image;
    vector<Sample> output;

    cv::VideoWriter m_writer;
    if (is_save_video_){
            boost::filesystem::path save_video_file;
        save_video_file = output_dir_ / (input_dir_.stem().string() + "_gaze_video.avi");
        m_writer.open(save_video_file.string(), CV_FOURCC('M','J','P','G'), 25,
                      Size(input_handler_.getFrameWidth(),input_handler_.getFrameHeight()), true);
        cout << "Saving video to " << save_video_file << endl;
    }

    // construct saving file
    ofstream output_stream;
    boost::filesystem::path output_file_name = output_dir_ / (input_dir_.stem().string() + "_gaze_output.txt");
    output_stream.open(output_file_name.string());
    cout << "Created output file: " << output_file_name.string() << endl;

    // for fps calculation
    double fps_tracker = -1.0;
    double t_start = 0;
    double t_end = 0;

    unsigned int frame_count = 0;

    while(true){// loop all the sample or read frame from Video
        frame_count++;
        t_start = t_end;
        output.clear();

        input_image = input_handler_.getNextSample();// get input image
        if(input_handler_.isReachEnd()){ // check if all sample are processed
            cout<<"Processed all the samples."<<endl;
            break;
        }

        Mat undist_img;
        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
        gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
        input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);

        // get the fps values
        t_end = cv::getTickCount();
        fps_tracker = 1.0 / (double(t_end - t_start) / cv::getTickFrequency());


        // save output
        for(auto & sample : output) {
            output_stream << frame_count << ",";
            output_stream << sample.face_data.face_id << ",";
            output_stream << sample.face_data.certainty << ",";
            output_stream << sample.face_patch_data.face_center.at<float>(0) << ",";
            output_stream << sample.face_patch_data.face_center.at<float>(1) << ",";
            output_stream << sample.face_patch_data.face_center.at<float>(2) << ",";
            output_stream << sample.gaze_data.gaze2d.x << ",";
            output_stream << sample.gaze_data.gaze2d.y << ",";
            output_stream << sample.eye_data.leye_pos.at<float>(0) << ",";
            output_stream << sample.eye_data.leye_pos.at<float>(1) << ",";
            output_stream << sample.eye_data.leye_pos.at<float>(2) << ",";
            output_stream << sample.eye_data.reye_pos.at<float>(0) << ",";
            output_stream << sample.eye_data.reye_pos.at<float>(1) << ",";
            output_stream << sample.eye_data.reye_pos.at<float>(2) << endl;
        }

        if (is_save_video_ || show_debug_) {
            //////// visualization //////////////////////////////////////////////////
            // draw results
            for(const auto & sample : output){
                //drawLandmarks(sample, undist_img); // draw face landmarks
                drawGazeOnFace(sample, undist_img); // draw gaze ray on face image
                //drawGazeOnSimScreen(sample, undist_img); // draw screen target
            }

            if (show_debug_) {
                // show fps
                char fpsC[255];
                std::sprintf(fpsC, "%02f", fps_tracker);
                string fpsSt("FPS: ");
                fpsSt += fpsC;
                cv::putText(undist_img, fpsSt, cv::Point(100, 100), CV_FONT_HERSHEY_SIMPLEX, 1, CV_RGB(255, 0, 0), 2);
                // show the image
                imshow("Gaze", undist_img);
                key = cv::waitKey(1);
                if (key==27) exit(EXIT_SUCCESS); // press ESC to exit
            }

            if (is_save_video_) {
                if (is_save_video_)
                    m_writer << undist_img;
            }
        }

    }
    if (is_save_video_)
        m_writer.release();
}

void OpenGaze::runDataExtraction() {
    assert(input_handler_.getInputType() == InputHandler::InputType::Directory);// Here we just accept the directory folder
    input_handler_.initialize();

    vector<Sample> output;
    Mat input_image;

    while(true){// loop all the sample or read frame from Video
        output.clear();

        input_image = input_handler_.getNextSample();// get input image
        if(input_handler_.isReachEnd()){ // check if all sample are processed
            cout << "Processed all the samples." << endl;
            break;
        }

        Mat undist_img;
        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
        gaze_estimator_.getImagePatch(undist_img, output); // extract the face image

        // save the output
        for (int i=0; i<output.size(); ++i) {
            string save_file_name  = output_dir_.stem().string() + "/img_" + input_handler_.getFileName() + "_" +to_string(i)+".jpg";
            cv::imwrite(save_file_name, output[i].face_patch_data.face_patch);
        }
    }
}

void OpenGaze::runGazeOnScreen() {
    input_handler_.initialize();

    int key;
    Mat input_image, undist_img, show_img;
    vector<Sample> output;

    cv::namedWindow("screen", CV_WINDOW_NORMAL);
    cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);

    show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);

    while(true){// loop all the sample or read frame from Video
        output.clear();

        if(input_handler_.isReachEnd()){ // check if all sample are processed
            cout<<"Processed all the samples."<<endl;
            break;
        }
        input_image = input_handler_.getNextSample();// get input image
        undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
        gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
        input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);

        // save output
        for(auto & sample : output) {
            int loc_x = (int)(sample.gaze_data.gaze2d.x * input_handler_.getScreenWidth());
            int loc_y = (int)(sample.gaze_data.gaze2d.y * input_handler_.getScreenHeight());
            circle(show_img, cv::Point(loc_x, loc_y), 10, CV_RGB(255,255,255), -1);
        }

        imshow("screen", show_img);
        cv::Mat save_img;
        cv::resize(show_img, save_img, cv::Size(1280, 720));
        key = cv::waitKey(1);
        show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3);
        if (key==27) break; // press ESC to exit
    }
    cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_NORMAL);
    cv::destroyWindow("screen");
}

void OpenGaze::runPersonalCalibration(int num_calibration_point) {
    if (input_handler_.getInputType() != InputHandler::InputType::Camera){ // personal calibration has to be done with camera
        cout << "Error: the input type must be camera for personal calibration!" << endl;
        exit(EXIT_FAILURE);
    }

    Mat input_image, undist_img;
    input_handler_.initialize();

    PersonalCalibrator m_calibrator(input_handler_.getScreenWidth(), input_handler_.getScreenHeight());
    m_calibrator.generatePoints(num_calibration_point);
    m_calibrator.initialWindow(); // show start windows

    vector<cv::Point2f> pred, gt; // prediction and ground-truth
    for (int i=0; i<num_calibration_point; ++i){
        if (m_calibrator.showNextPoint()) {// wait for clicking
            vector<Sample> output;
            input_image = input_handler_.getNextSample(); // get the sample when user clicking
            undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_);
            gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation
            input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);// convert to 2D projection
            m_calibrator.confirmClicking(); // give feedback to user that they successfully did calibration
            pred.emplace_back(output[0].gaze_data.gaze2d);
            gt.emplace_back(cv::Point2f((m_calibrator.getCurrentPoint().x/(float)input_handler_.getScreenWidth()),
                                      (m_calibrator.getCurrentPoint().y/(float)input_handler_.getScreenHeight())));
        }
        else
            break; // if user press ESC button, we break

    }
    if (pred.size() > 0){
        m_calibrator.generateModel(pred, gt, 1); // get the mapping model
        string per_model_save_path_ = output_dir_.stem().string() + "/personal_gaze_model.yml";
        m_calibrator.saveModel(per_model_save_path_);
    }
}

void OpenGaze::drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image) {
    static const int dW = 640;
    static const int dH = 360;
    Mat debug_disp = Mat::zeros(Size(dW, dH), CV_8UC3);

    Point2f g_s;
    g_s.x = dW*sample.gaze_data.gaze2d.x;
    g_s.y = dH*sample.gaze_data.gaze2d.y;

    circle(debug_disp, g_s, 10, CV_RGB(255,0,0), -1);

    debug_disp.copyTo(image(Rect(0, 0, dW, dH)));
}

void OpenGaze::drawGazeOnFace(opengaze::Sample sample, cv::Mat &image) {
    // draw gaze on the face
    if (gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
        && gaze_estimator_.input_type_ == GazeEstimator::InputType::face) {
        static const float gaze_length = 300.0;
        Mat zero = Mat::zeros(1, 3, CV_32F);
        Mat rvec, tvec;
        sample.face_patch_data.head_r.convertTo(rvec, CV_32F);
        sample.face_patch_data.head_t.convertTo(tvec, CV_32F);

        vector<Point3f> cam_points;
        Vec3f face_center(sample.face_patch_data.face_center.at<float>(0), sample.face_patch_data.face_center.at<float>(1), sample.face_patch_data.face_center.at<float>(2));
        cam_points.emplace_back(face_center);
        cam_points.emplace_back(face_center + gaze_length * sample.gaze_data.gaze3d);

        vector<Point2f> img_points;
        projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);

        line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); // gaze ray
        circle(image, img_points[0], 5, CV_RGB(255,0,0), -1); // staring point
        circle(image, img_points[1], 5, CV_RGB(255,0,0), -1); // end point
    }
    else if ((gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze
              && gaze_estimator_.input_type_ == GazeEstimator::InputType::eye)
             || gaze_estimator_.method_type_ == GazeEstimator::Method::OpenFace) {
        int gaze_length = 300;
        Mat zero = Mat::zeros(1, 3, CV_32F);
        vector<Point3f> cam_points;
        sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F);
        Vec3f leye_pose(sample.eye_data.leye_pos.at<float>(0),sample.eye_data.leye_pos.at<float>(1),sample.eye_data.leye_pos.at<float>(2));
        cam_points.emplace_back(leye_pose);
        cam_points.emplace_back(leye_pose + gaze_length*sample.gaze_data.lgaze3d);
        Vec3f reye_pose(sample.eye_data.reye_pos.at<float>(0),sample.eye_data.reye_pos.at<float>(1),sample.eye_data.reye_pos.at<float>(2));
        cam_points.emplace_back(reye_pose);
        cam_points.emplace_back(reye_pose + gaze_length*sample.gaze_data.rgaze3d);

        vector<Point2f> img_points;
        projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points);

        line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5);
        line(image, img_points[2], img_points[3], CV_RGB(255,0,0), 5);
        circle(image, img_points[1], 3, CV_RGB(255,0,0), -1);
        circle(image, img_points[3], 3, CV_RGB(255,0,0), -1);
    }
}

void OpenGaze::drawLandmarks(opengaze::Sample sample, cv::Mat &image) {
    cv::Rect_<int> face_bb = sample.face_data.face_bb;
    rectangle(image, cv::Point(face_bb.x, face_bb.y),
              cv::Point(face_bb.x+face_bb.width,face_bb.y+face_bb.height), CV_RGB(0,255,0), 5);
    for(int p=0; p<6; ++p)
        circle(image, sample.face_data.landmarks[p], 5, CV_RGB(0,255,0), -1);
}


}