opengaze/src/face_detector.cpp

274 lines
12 KiB
C++

#include <iostream>
#include <tbb/tbb.h>
// if we use OpenFace
#if USE_OPENFACE
#include <LandmarkCoreIncludes.h> // from "OpenFace-master/lib/local/LandmarkDetector/include/"
#endif
#include "face_detector.hpp"
using namespace std;
using namespace cv;
vector<LandmarkDetector::FaceModelParameters> det_parameters_;
vector<LandmarkDetector::CLNF> clnf_models_;
namespace opengaze {
FaceDetector::FaceDetector() {
method_type_ = Method::OpenFace;
}
FaceDetector::~FaceDetector() {}
void FaceDetector::initialize(int number_users=5) {
string root_path = OPENFACE_DIR;
root_path = root_path + "/build/bin";
//string openface_root = OpenFace_ROOT_DIR;
// (currently) hard-coded setting
num_faces_max_ = number_users;
detection_resize_rate_ = 2.0; // resize the input image to detect face, crucial for speed
detection_skip_frames_ = 1;
nonoverlap_threshold_ = 0.5;
certainty_threshold_ = 0.0; // the smaller the better, 1 is the best, -1 is the worst
landmark_indices_[0] = 36; landmark_indices_[1] = 39; landmark_indices_[2] = 42;
landmark_indices_[3] = 45; landmark_indices_[4] = 48; landmark_indices_[5] = 54;
tracking_loss_limit_ = 10;
// initialize the tracking models
LandmarkDetector::FaceModelParameters det_parameter;
det_parameter.reinit_video_every = -1; // This is so that the model would not try re-initialising itself
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR;
det_parameter.model_location = root_path + "/model/main_clm_wild.txt";
det_parameter.haar_face_detector_location = root_path + "/classifiers/haarcascade_frontalface_alt.xml";// this line will be disable due to "curr_face_detector"
det_parameter.mtcnn_face_detector_location = root_path + "/model/mtcnn_detector/MTCNN_detector.txt";
det_parameter.use_face_template = true;
det_parameter.reinit_video_every = 5;
// det_parameter.quiet_mode = true; not avaliable fro OpenFace v2.1
// // For in the wild fitting these parameters are suitable
det_parameter.window_sizes_init = vector<int>(4);
det_parameter.window_sizes_init[0] = 15;
det_parameter.window_sizes_init[1] = 13;
det_parameter.window_sizes_init[2] = 11;
det_parameter.window_sizes_init[3] = 9;
det_parameter.sigma = 1.25;
det_parameter.reg_factor = 35;
det_parameter.weight_factor = 2.5;
det_parameter.num_optimisation_iteration = 10;
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
det_parameters_.push_back(det_parameter);
LandmarkDetector::CLNF clnf_model_ = LandmarkDetector::CLNF(det_parameter.model_location);
if (!clnf_model_.loaded_successfully){
cout << "ERROR: Could not load the landmark detector" << endl;
exit(-1);
}
clnf_model_.face_detector_HAAR.load(det_parameter.haar_face_detector_location);
clnf_model_.haar_face_detector_location = det_parameter.haar_face_detector_location;
clnf_model_.face_detector_MTCNN.Read(det_parameter.mtcnn_face_detector_location);
clnf_model_.mtcnn_face_detector_location = det_parameter.mtcnn_face_detector_location;
// If can't find MTCNN face detector, default to HOG one
if (det_parameter.curr_face_detector == LandmarkDetector::FaceModelParameters::MTCNN_DETECTOR && clnf_model_.face_detector_MTCNN.empty()){
cout << "INFO: defaulting to HOG-SVM face detector" << endl;
det_parameter.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR;
}
clnf_models_.reserve(num_faces_max_);
clnf_models_.push_back(clnf_model_);
active_models_.push_back(false);
for(int i=1; i<num_faces_max_; ++i)
{
clnf_models_.push_back(clnf_model_);
active_models_.push_back(false);
det_parameters_.push_back(det_parameter);
}
// variables
frame_counter_ = 0;
current_face_id_ = 1;
for(int i=0; i<num_faces_max_; ++i) face_ids_.push_back(0);
}
void FaceDetector::reset() {
// reset all status
frame_counter_ = 0;
current_face_id_ = 1;
for(unsigned int model = 0; model < clnf_models_.size(); ++model)
{
active_models_[model] = false;
face_ids_[model] = 0;
clnf_models_[model].Reset();
}
}
void NonOverlapingDetections(const vector<LandmarkDetector::CLNF>& clnf_models, vector<cv::Rect_<float> >& face_detections){
// Go over the model and eliminate detections that are not informative (there already is a tracker there)
for (size_t model = 0; model < clnf_models.size(); ++model){
// See if the detections intersect
cv::Rect_<float> model_rect = clnf_models[model].GetBoundingBox();
for (int detection=face_detections.size()-1; detection >= 0; --detection)
{
double intersection_area = (model_rect & face_detections[detection]).area();
double union_area = model_rect.area() + face_detections[detection].area() - 2 * intersection_area;
// If the model is already tracking what we're detecting ignore the detection, this is determined by amount of overlap
if (intersection_area / union_area > 0.5)
{
face_detections.erase(face_detections.begin() + detection);
}
}
}
}
double NonOverlapingDetection(const LandmarkDetector::CLNF &ref_model, const LandmarkDetector::CLNF &tgt_model)
{
Rect_<double> ref_rect = ref_model.GetBoundingBox();
Rect_<double> tgt_rect = tgt_model.GetBoundingBox();
double intersection_area = (ref_rect & tgt_rect).area();
double union_area = ref_rect.area() + tgt_rect.area() - 2 * intersection_area;
return intersection_area/union_area;
}
void FaceDetector::track_faces(cv::Mat input_img, std::vector<opengaze::Sample> &output) {
if(input_img.channels() < 3){
cout << "The input must be a color image!" <<endl;
exit(EXIT_FAILURE);
}
Mat_<uchar> grayscale_image;
cvtColor(input_img, grayscale_image, CV_BGR2GRAY);
bool all_models_active = true;
for(unsigned int model = 0; model < clnf_models_.size(); ++model)
{
if(!active_models_[model])
{
all_models_active = false;
break;
}
}
// Detect faces
// Get the detections (every Xth frame and when there are free models available for tracking)
std::vector<Rect_<float> > face_detections;
cv::Mat small_grayscale_image_;
if (frame_counter_ % detection_skip_frames_ == 0 && !all_models_active) {
// resized image for faster face detection
if (detection_resize_rate_ != 1) resize(grayscale_image, small_grayscale_image_,
Size(), 1.0/detection_resize_rate_, 1.0/detection_resize_rate_);
else small_grayscale_image_ = grayscale_image;
if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR){
vector<float> confidences;
LandmarkDetector::DetectFacesHOG(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HOG, confidences);
}
else if (det_parameters_[0].curr_face_detector == LandmarkDetector::FaceModelParameters::HAAR_DETECTOR){
LandmarkDetector::DetectFaces(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_HAAR);
}
else{
vector<float> confidences;
LandmarkDetector::DetectFacesMTCNN(face_detections, small_grayscale_image_, clnf_models_[0].face_detector_MTCNN, confidences);
}
// resize the face deteciton back
if (detection_resize_rate_ != 1) {
for(auto& face_detection : face_detections) {
face_detection.x *= detection_resize_rate_;
face_detection.y *= detection_resize_rate_;
face_detection.width *= detection_resize_rate_;
face_detection.height *= detection_resize_rate_;
}
}
// Keep only non overlapping detections (also convert to a concurrent vector
NonOverlapingDetections(clnf_models_, face_detections);
}
vector< tbb::atomic<bool> > face_detections_used(face_detections.size());
// Go through every model and update the tracking
tbb::parallel_for(0, (int)clnf_models_.size(), [&](int model) {
//for (unsigned int model = 0; model < clnf_models_.size(); ++model) {
bool detection_success = false;
// If the current model has failed more than threshold, remove it
if (clnf_models_[model].failures_in_a_row > tracking_loss_limit_) {
active_models_[model] = false;
clnf_models_[model].Reset();
}
// If the model is inactive reactivate it with new detections
if (!active_models_[model]){
for (size_t detection_ind = 0; detection_ind < face_detections.size(); ++detection_ind)
{
// if it was not taken by another tracker take it (if it is false swap it to true and enter detection, this makes it parallel safe)
if (!face_detections_used[detection_ind].compare_and_swap(true, false)) {
// Reinitialise the model
clnf_models_[model].Reset();
// This ensures that a wider window is used for the initial landmark localisation
clnf_models_[model].detection_success = false;
LandmarkDetector::DetectLandmarksInVideo(input_img, face_detections[detection_ind], clnf_models_[model], det_parameters_[model], grayscale_image);
// This activates the model
active_models_[model] = true;
face_ids_[model] = current_face_id_;
current_face_id_++;
// break out of the loop as the tracker has been reinitialised
break;
}
}
}
else
{
// The actual facial landmark detection / tracking
detection_success = LandmarkDetector::DetectLandmarksInVideo(input_img, clnf_models_[model], det_parameters_[model], grayscale_image);
}
//}
});
// Go through every model and check the results
for(size_t model=0; model<clnf_models_.size(); ++model){
// Check if the alignment result is overlapping previous models
bool overlapping = false;
for(size_t model_ref=0; model_ref<model; ++model_ref){
double overlap_ratio = NonOverlapingDetection(clnf_models_[model_ref], clnf_models_[model]);
if(overlap_ratio > nonoverlap_threshold_) overlapping = true;
}
if(overlapping){
active_models_[model] = false;
face_ids_[model] = 0;
clnf_models_[model].Reset();
continue;
}
if(clnf_models_[model].detection_certainty < certainty_threshold_) continue;
Sample temp;
temp.face_data.certainty = clnf_models_[model].detection_certainty;
temp.face_data.face_id = face_ids_[model];
temp.face_data.face_bb.x = (int)clnf_models_[model].GetBoundingBox().x;
temp.face_data.face_bb.y = (int)clnf_models_[model].GetBoundingBox().y;
temp.face_data.face_bb.height = (int)clnf_models_[model].GetBoundingBox().height;
temp.face_data.face_bb.width = (int)clnf_models_[model].GetBoundingBox().width;
for(int p=0; p<6; p++){
int num_p = landmark_indices_[p];
temp.face_data.landmarks[p] = Point2d(
clnf_models_[model].detected_landmarks.at<float>(num_p,0),
clnf_models_[model].detected_landmarks.at<float>(num_p+68,0)
);
}
output.emplace_back(temp);
}
}
}