#include "opengaze.hpp" #include #include using namespace std; using namespace cv; namespace opengaze { double clockToMilliseconds(clock_t ticks){ // units/(units/time) => time (seconds) * 1000 = milliseconds return (ticks/(double)CLOCKS_PER_SEC)*1000.0; } OpenGaze::OpenGaze(int argc, char** argv){ namespace fs = boost::filesystem; namespace po = boost::program_options; // default value of parameters camera_id_ = 0; input_type_ = InputHandler::InputType::Camera; is_face_model_ = true; string gaze_method; string gpu_id; string temp; int number_user; fs::path calib_camera, calib_screen, cnn_param_path, cnn_model_path; // parse command line options for input/output paths po::options_description command_line("Command line options"); command_line.add_options() ("root_dir,r", po::value(), "configuration file") ("input_type,t", po::value(), "input type (camera, video file, directory)") ("gaze_method,g", po::value(), "gaze estimation method, could be MPIIGaze or OpenFace") ("input,i", po::value(), "parameter for input") ("output,o", po::value(), "output directory") ("calib_camera", po::value(), "camera calibration file") ("calib_screen", po::value(), "camera-screen calibration file") ("gpu_id,p", po::value(), "gpu id number, default is 0") ("debug,d", "show debug output") ("face_model,f", "to use face model or not") ("save_video,s", "save output visualization or not") ("number_user,n", "the maximum number of users in the input image") ; cout << "Parsing command line options..." << endl; po::variables_map vm_command; po::store(po::parse_command_line(argc, argv, command_line), vm_command); po::notify(vm_command); // parse config file for data paths po::options_description config_file("Config file options"); config_file.add_options() ("root_dir,r", po::value(), "configuration file") ("input_type, t", po::value(), "input type (camera, video file, directory)") ("input, i", po::value(), "parameter for input") ("output,o", po::value(), "output directory") ("cnn_param_path", po::value(), "Caffe prototxt path") ("cnn_model_path", po::value(), "Caffe model path") ("calib_camera", po::value(), "camera calibration file") ("calib_screen", po::value(), "camera-screen calibration file") ("gaze_method", po::value(), "gaze estimation method, could be cnn or openface") ("gpu_id,p", po::value(), "gpu id number, default is 0") ("face_model", po::value(), "face model or not") ("save_video", po::value(), "save output visualization or not") ("number_user", po::value(), "the maximum number of users in the input image") ; fs::path root_dir, config_path; if(vm_command.count("root_dir")) root_dir = vm_command["root_dir"].as(); else { root_dir = OPENGAZE_CON_DIR; cout << "No root directory is found, default value " << root_dir << " will be use" << endl; } config_path = root_dir / "default.cfg"; cout << "Reading config from \"" << config_path.string() << "\""<< endl; if(!fs::exists(config_path)){ cout << "Config file does not exist" << endl; exit(EXIT_FAILURE); } ifstream settings_file(config_path.string()); po::variables_map vm_config; po::store(po::parse_config_file(settings_file , config_file), vm_config); po::notify(vm_config); if(vm_command.count("gpu_id")) gpu_id = vm_command["gpu_id"].as(); else if (vm_config.count("gpu_id")) gpu_id = vm_config["gpu_id"].as(); else gpu_id = "0"; // CNN paramters if(vm_command.count("cnn_param_path")) cnn_param_path = vm_command["cnn_param_path"].as(); else if (vm_config.count("cnn_param_path")) cnn_param_path = vm_config["cnn_param_path"].as(); else cnn_param_path = root_dir / "content/caffeModel/alexnet_face.prototxt"; if(vm_command.count("cnn_model_path")) cnn_model_path = vm_command["cnn_model_path"].as(); else if (vm_config.count("cnn_model_path")) cnn_model_path = vm_config["cnn_model_path"].as(); else cnn_model_path = root_dir / "content/caffeModel/alexnet_face.caffemodel"; // check input requirements if(vm_command.count("gaze_method")) gaze_method = vm_command["gaze_method"].as(); else if (vm_config.count("gaze_method")) gaze_method = vm_config["gaze_method"].as(); else gaze_method = "MPIIGaze"; if(vm_command.count("calib_screen")) calib_screen = vm_command["calib_screen"].as(); else if (vm_config.count("calib_screen")) calib_screen = vm_config["calib_screen"].as(); else calib_screen = root_dir / "content/calib/monitor_laptop.yml"; if(vm_command.count("calib_camera")) calib_camera = vm_command["calib_camera"].as(); else if (vm_config.count("calib_camera")) calib_camera = vm_config["calib_camera"].as(); else calib_camera = root_dir / "content/calib/calibration.yml"; // read calibration file if(!fs::exists(calib_camera)){ cout << "Camera calibration file does not exist: " << calib_camera <(); else if (vm_config.count("input_type")) temp = vm_config["input_type"].as(); else temp = ""; if (temp == "camera") {input_type_ = InputHandler::InputType::Camera;} else if (temp == "video") {input_type_ = InputHandler::InputType::Video;} else if (temp == "directory") {input_type_ = InputHandler::InputType::Directory;} else cout<<"No input type specified, default value (camera) will be use" << endl; if (vm_command.count("input")) temp = vm_command["input"].as(); else if (vm_config.count("input")) temp = vm_config["input"].as(); else temp = "0"; if (input_type_ == InputHandler::InputType::Camera) camera_id_ = stoi(temp); else if (input_type_ == InputHandler::InputType::Video || input_type_ == InputHandler::InputType::Directory) input_dir_ = temp; else cout<<"No input parameter specified, default value will be use" << endl; if(vm_command.count("face_model")) is_face_model_ = true; else if(vm_config.count("face_model")) is_face_model_ = vm_config["face_model"].as(); else is_face_model_ = true; if(vm_command.count("save_video")) is_save_video_ = true; else if(vm_config.count("save_video")) is_save_video_ = vm_config["save_video"].as(); else is_save_video_ = false; if(vm_command.count("debug")) show_debug_ = true; else if(vm_config.count("debug")) show_debug_ = vm_config["debug"].as(); else show_debug_ = false; if(vm_command.count("output")) output_dir_ = vm_command["output"].as(); else if(vm_config.count("output")) output_dir_ = vm_config["output"].as(); else { if (input_type_ == InputHandler::InputType::Video) output_dir_ = input_dir_.parent_path(); else if (input_type_ == InputHandler::InputType::Directory) output_dir_ = input_dir_.parent_path(); else if (input_type_ == InputHandler::InputType::Camera) output_dir_ = root_dir; } string face_detector_root_path; if(vm_command.count("openface_path")) face_detector_root_path = vm_command["openface_path"].as(); else if(vm_config.count("openface_path")) face_detector_root_path = vm_config["openface_path"].as(); else cout<< "No face detector root specified, default detector will be use" << endl; if(vm_command.count("per_model_save_path")) per_model_save_path_ = vm_command["per_model_save_path"].as(); else if (vm_config.count("per_model_save_path")) per_model_save_path_ = vm_config["per_model_save_path"].as(); else per_model_save_path_ = root_dir.string() + "/content/calib/user0.txt"; if(vm_command.count("number_user")) temp = vm_command["number_user"].as(); else if (vm_config.count("number_user")) temp = vm_config["number_user"].as(); else temp = "5"; number_user = stoi(temp); // initial class instance if (input_type_ == InputHandler::InputType::Camera){ // Camera as input input_handler_.setInputType(InputHandler::InputType::Camera);// set input type input_handler_.setInput(camera_id_); // set Camera id } else if (input_type_ == InputHandler::InputType::Video) { input_handler_.setInputType(InputHandler::InputType::Video);// set input type input_handler_.setInput(input_dir_.string()); // set camera file } else if (input_type_ == InputHandler::InputType::Directory){ input_handler_.setInputType(InputHandler::InputType::Directory); } // initialize other classes gaze_estimator_.setCameraParameters(input_handler_.camera_matrix_, input_handler_.camera_distortion_); gaze_estimator_.setRootPath(root_dir.string()); gaze_estimator_.initialFaceDetector(number_user); vector arguments; if (gaze_method == "MPIIGaze") { arguments.push_back(cnn_param_path.string()); arguments.push_back(cnn_model_path.string()); if (is_face_model_) arguments.emplace_back("face"); else arguments.emplace_back("eye"); arguments.push_back(gpu_id); gaze_estimator_.setMethod(GazeEstimator::Method::MPIIGaze, arguments); } else if (gaze_method == "OpenFace"){ //gaze_estimator_.setMethod(GazeEstimator::Method::OpenFace, arguments); cout << "OpenFace gaze estimation is current not support" << endl; exit(EXIT_FAILURE); } else { cout << "The method setting is not right! Options are MPIIGaze or OpenFace!" << endl; exit(EXIT_FAILURE); } } OpenGaze::~OpenGaze() { input_handler_.closeInput(); } // do gaze estimation with camera as input void OpenGaze::runGazeVisualization() { input_handler_.initialize(); namedWindow("Gaze"); int key; Mat input_image; vector output; cv::VideoWriter m_writer; if (is_save_video_){ boost::filesystem::path save_video_file; save_video_file = output_dir_ / (input_dir_.stem().string() + "_gaze_video.avi"); m_writer.open(save_video_file.string(), CV_FOURCC('M','J','P','G'), 25, Size(input_handler_.getFrameWidth(),input_handler_.getFrameHeight()), true); cout << "Saving video to " << save_video_file << endl; } // construct saving file ofstream output_stream; boost::filesystem::path output_file_name = output_dir_ / (input_dir_.stem().string() + "_gaze_output.txt"); output_stream.open(output_file_name.string()); cout << "Created output file: " << output_file_name.string() << endl; // for fps calculation double fps_tracker = -1.0; double t_start = 0; double t_end = 0; unsigned int frame_count = 0; while(true){// loop all the sample or read frame from Video frame_count++; t_start = t_end; output.clear(); input_image = input_handler_.getNextSample();// get input image if(input_handler_.isReachEnd()){ // check if all sample are processed cout<<"Processed all the samples."<(0) << ","; output_stream << sample.face_patch_data.face_center.at(1) << ","; output_stream << sample.face_patch_data.face_center.at(2) << ","; output_stream << sample.gaze_data.gaze2d.x << ","; output_stream << sample.gaze_data.gaze2d.y << ","; output_stream << sample.eye_data.leye_pos.at(0) << ","; output_stream << sample.eye_data.leye_pos.at(1) << ","; output_stream << sample.eye_data.leye_pos.at(2) << ","; output_stream << sample.eye_data.reye_pos.at(0) << ","; output_stream << sample.eye_data.reye_pos.at(1) << ","; output_stream << sample.eye_data.reye_pos.at(2) << endl; } if (is_save_video_ || show_debug_) { //////// visualization ////////////////////////////////////////////////// // draw results for(const auto & sample : output){ //drawLandmarks(sample, undist_img); // draw face landmarks drawGazeOnFace(sample, undist_img); // draw gaze ray on face image //drawGazeOnSimScreen(sample, undist_img); // draw screen target } if (show_debug_) { // show fps char fpsC[255]; std::sprintf(fpsC, "%02f", fps_tracker); string fpsSt("FPS: "); fpsSt += fpsC; cv::putText(undist_img, fpsSt, cv::Point(100, 100), CV_FONT_HERSHEY_SIMPLEX, 1, CV_RGB(255, 0, 0), 2); // show the image imshow("Gaze", undist_img); key = cv::waitKey(1); if (key==27) exit(EXIT_SUCCESS); // press ESC to exit } if (is_save_video_) { if (is_save_video_) m_writer << undist_img; } } } if (is_save_video_) m_writer.release(); } void OpenGaze::runDataExtraction() { assert(input_handler_.getInputType() == InputHandler::InputType::Directory);// Here we just accept the directory folder input_handler_.initialize(); vector output; Mat input_image; while(true){// loop all the sample or read frame from Video output.clear(); input_image = input_handler_.getNextSample();// get input image if(input_handler_.isReachEnd()){ // check if all sample are processed cout << "Processed all the samples." << endl; break; } Mat undist_img; undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_); gaze_estimator_.getImagePatch(undist_img, output); // extract the face image // save the output for (int i=0; i output; cv::namedWindow("screen", CV_WINDOW_NORMAL); cv::setWindowProperty("screen", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); show_img = cv::Mat::zeros(input_handler_.getScreenHeight(), input_handler_.getScreenWidth(), CV_8UC3); while(true){// loop all the sample or read frame from Video output.clear(); if(input_handler_.isReachEnd()){ // check if all sample are processed cout<<"Processed all the samples."< pred, gt; // prediction and ground-truth for (int i=0; i output; input_image = input_handler_.getNextSample(); // get the sample when user clicking undistort(input_image, undist_img, input_handler_.camera_matrix_, input_handler_.camera_distortion_); gaze_estimator_.estimateGaze(undist_img, output); // do gaze estimation input_handler_.projectToDisplay(output, gaze_estimator_.input_type_==GazeEstimator::InputType::face);// convert to 2D projection m_calibrator.confirmClicking(); // give feedback to user that they successfully did calibration pred.emplace_back(output[0].gaze_data.gaze2d); gt.emplace_back(cv::Point2f((m_calibrator.getCurrentPoint().x/(float)input_handler_.getScreenWidth()), (m_calibrator.getCurrentPoint().y/(float)input_handler_.getScreenHeight()))); } else break; // if user press ESC button, we break } if (pred.size() > 0){ m_calibrator.generateModel(pred, gt, 1); // get the mapping model string per_model_save_path_ = output_dir_.stem().string() + "/personal_gaze_model.yml"; m_calibrator.saveModel(per_model_save_path_); } } void OpenGaze::drawGazeOnSimScreen(opengaze::Sample sample, cv::Mat &image) { static const int dW = 640; static const int dH = 360; Mat debug_disp = Mat::zeros(Size(dW, dH), CV_8UC3); Point2f g_s; g_s.x = dW*sample.gaze_data.gaze2d.x; g_s.y = dH*sample.gaze_data.gaze2d.y; circle(debug_disp, g_s, 10, CV_RGB(255,0,0), -1); debug_disp.copyTo(image(Rect(0, 0, dW, dH))); } void OpenGaze::drawGazeOnFace(opengaze::Sample sample, cv::Mat &image) { // draw gaze on the face if (gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze && gaze_estimator_.input_type_ == GazeEstimator::InputType::face) { static const float gaze_length = 300.0; Mat zero = Mat::zeros(1, 3, CV_32F); Mat rvec, tvec; sample.face_patch_data.head_r.convertTo(rvec, CV_32F); sample.face_patch_data.head_t.convertTo(tvec, CV_32F); vector cam_points; Vec3f face_center(sample.face_patch_data.face_center.at(0), sample.face_patch_data.face_center.at(1), sample.face_patch_data.face_center.at(2)); cam_points.emplace_back(face_center); cam_points.emplace_back(face_center + gaze_length * sample.gaze_data.gaze3d); vector img_points; projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points); line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); // gaze ray circle(image, img_points[0], 5, CV_RGB(255,0,0), -1); // staring point circle(image, img_points[1], 5, CV_RGB(255,0,0), -1); // end point } else if ((gaze_estimator_.method_type_ == GazeEstimator::Method::MPIIGaze && gaze_estimator_.input_type_ == GazeEstimator::InputType::eye) || gaze_estimator_.method_type_ == GazeEstimator::Method::OpenFace) { int gaze_length = 300; Mat zero = Mat::zeros(1, 3, CV_32F); vector cam_points; sample.eye_data.leye_pos.convertTo(sample.eye_data.leye_pos, CV_32F); Vec3f leye_pose(sample.eye_data.leye_pos.at(0),sample.eye_data.leye_pos.at(1),sample.eye_data.leye_pos.at(2)); cam_points.emplace_back(leye_pose); cam_points.emplace_back(leye_pose + gaze_length*sample.gaze_data.lgaze3d); Vec3f reye_pose(sample.eye_data.reye_pos.at(0),sample.eye_data.reye_pos.at(1),sample.eye_data.reye_pos.at(2)); cam_points.emplace_back(reye_pose); cam_points.emplace_back(reye_pose + gaze_length*sample.gaze_data.rgaze3d); vector img_points; projectPoints(cam_points, zero, zero, input_handler_.camera_matrix_, input_handler_.camera_distortion_, img_points); line(image, img_points[0], img_points[1], CV_RGB(255,0,0), 5); line(image, img_points[2], img_points[3], CV_RGB(255,0,0), 5); circle(image, img_points[1], 3, CV_RGB(255,0,0), -1); circle(image, img_points[3], 3, CV_RGB(255,0,0), -1); } } void OpenGaze::drawLandmarks(opengaze::Sample sample, cv::Mat &image) { cv::Rect_ face_bb = sample.face_data.face_bb; rectangle(image, cv::Point(face_bb.x, face_bb.y), cv::Point(face_bb.x+face_bb.width,face_bb.y+face_bb.height), CV_RGB(0,255,0), 5); for(int p=0; p<6; ++p) circle(image, sample.face_data.landmarks[p], 5, CV_RGB(0,255,0), -1); } }