conan/processing/process_RTGene.py

import tensorflow as tf

import sys
import os
import argparse

import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from tqdm.notebook import tqdm

from rt_gene.gaze_tools import get_phi_theta_from_euler, limit_yaw
from rt_gene.extract_landmarks_method_base import LandmarkMethodBase
from rt_gene.estimate_gaze_base import GazeEstimatorBase
from rt_gene.estimate_gaze_tensorflow import GazeEstimator
from rt_gene.gaze_tools_standalone import euler_from_matrix

import itertools
import pandas as pd

#os.environ["CUDA_VISIBLE_DEVICES"]="1"

def getCenter(box):
    return np.array([box[2]+box[0], box[3]+box[1]])/2

def load_camera_calibration(calibration_file):
    
    fileType = calibration_file.split(".")[-1]
    if fileType == "pkl":
        import pickle
        infile = open(calibration_file,'rb')
        data = pickle.load(infile)
        return data["distortion_coef"], data["camera_matrix"]
    elif fileType == "yaml":
        import yaml
        with open(calibration_file, 'r') as f:
            cal = yaml.safe_load(f)

        dist_coefficients = np.array(cal['distortion_coefficients']['data'], dtype='float32').reshape(1, 5)
        camera_matrix = np.array(cal['camera_matrix']['data'], dtype='float32').reshape(3, 3)

        return dist_coefficients, camera_matrix


def extract_eye_image_patches(subjects, landmark_estimator):
    for subject in subjects:
        le_c, re_c, _, _ = subject.get_eye_image_from_landmarks(subject, landmark_estimator.eye_image_size)
        subject.left_eye_color = le_c
        subject.right_eye_color = re_c


def estimate_gaze(base_name, color_img, landmark_estimator, gaze_estimator, dist_coefficients, camera_matrix, args):
    faceboxes = landmark_estimator.get_face_bb(color_img)
    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        return

    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
    extract_eye_image_patches(subjects, landmark_estimator)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []
    roll_pitch_yaw_list = []

    for idx, subject in enumerate(subjects):
        if subject.left_eye_color is None or subject.right_eye_color is None:
            #tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
                                                   subject.landmarks.reshape(len(subject.landmarks), 1, 2),
                                                   cameraMatrix=camera_matrix,
                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
            continue

        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                          [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0],
                          [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)
        roll_pitch_yaw_list.append(roll_pitch_yaw)

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)

        face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))

        if args['vis_headpose']:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            plt.show()

        if args['save_headpose']:
            cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)

        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)

    if len(valid_subject_list) == 0:
        return

    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                                    inference_input_right_list=input_r_list,
                                                    inference_headpose_list=input_head_list)
    
    file_base = os.path.splitext(base_name)[0]
    file = "_".join(file_base.split("_")[:-1])
    frame = int(file_base.split("_")[-1])
    ret = []
    for subject_id, gaze, headpose, roll_pitch_yaw in zip(valid_subject_list, gaze_est.tolist(), input_head_list, roll_pitch_yaw_list):
        subject = subjects[subject_id]
        #print(roll_pitch_yaw)
        # Build visualizations
        r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
        l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)

        if args['vis_gaze']:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
            plt.show()

        if args['save_gaze']:
            cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)

        if args['save_estimate']:
            with open(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
                f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +
                        ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
        # Phi: pos - look down,  neg -   look up
        # Theta: pos - rotate left,  neg - rotate right
        d = {"File":file, "Frame": frame, "SubjectId":subject_id, "HeadBox":subject.box, "Landmarks": subject.landmarks, "GazeTheta":gaze[0], "GazePhi":gaze[1], "HeadPoseTheta":headpose[0], "HeadPosePhi":headpose[1], "HeadPoseRoll":roll_pitch_yaw[0], "HeadPosePitch":roll_pitch_yaw[1], "HeadPoseYaw":roll_pitch_yaw[2]}
        ret.append(d)
        
    return ret


def visualize(df, FRAMES):
    path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
    path_list.sort()

    image = cv2.imread(os.path.join(FRAMES, path_list[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    fig, ax = plt.subplots(1, figsize=(18,10))

    for i in range(len(df.SubjectId.unique())):
        bbox = df.loc[(df.Frame == 0) & (df.SubjectId == i)]['HeadBox'].values
        print(bbox)
        if not np.any(pd.isna(bbox)) and len(bbox) > 0:
            bbox = np.array(bbox[0])
            rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2]-bbox[0],bbox[3]-bbox[1],linewidth=1,edgecolor='c',facecolor='none')
            plt.text(bbox[0], bbox[1], 'ID%i' % i, color='c' ,fontsize=20)
            ax.add_patch(rect)

    ax.imshow(image)
    plt.show()


def visualize_sorting(df_sorted):
    subs = sorted(df_sorted[~df_sorted.PId.isna()].PId.unique())
    for sid in subs:
        x = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[0])
        y = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[1])
        frames = df_sorted[df_sorted.PId==sid].Frame.to_list()
        plt.scatter(frames, x, alpha=.2, label = "Sub %i" % sid)
    plt.legend()
    plt.show()


def process(file, maxPeople, cameraRes = [5760, 2880]):

    VIDEO = file
    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)

    if not os.path.exists(VIDEO):
        print('WARNING: Could not find video file')
        return

    script_path = "./"

    args = {}
    args["calib_file"] = "./calib_insta.pkl"
    args["vis_headpose"] = False  # store_false
    args["save_headpose"] = False  # store_false
    args["vis_gaze"] = False  # store_false
    args["save_gaze"] = False  # store_false
    args["save_estimate"] = False  # store_false
    args["device_id_facedetection"] = "cuda:0"  # store_false

    args["im_path"] = os.path.join(script_path, './samples_gaze/')
    args["output_path"] = os.path.join(script_path, './samples_gaze/')
    args["models"] = [os.path.join(script_path, '../model_nets/Model_allsubjects1.h5')]
    args['gaze_backend'] = 'tensorflow'
    tqdm.write('Loading networks')
    landmark_estimator = LandmarkMethodBase(device_id_facedetection=args["device_id_facedetection"],
                                            checkpoint_path_face=os.path.join(script_path,
                                                                              "../model_nets/SFD/s3fd_facedetector.pth"),
                                            checkpoint_path_landmark=os.path.join(script_path,
                                                                                  "../model_nets/phase1_wpdc_vdc.pth.tar"),
                                            model_points_file=os.path.join(script_path,
                                                                           "../model_nets/face_model_68.txt"))

    #gaze_estimator = GazeEstimator("/gpu:0", args['models'])

    if args['gaze_backend'] == "tensorflow":
        from rt_gene.estimate_gaze_tensorflow import GazeEstimator
        gaze_estimator = GazeEstimator("/gpu:0", args['models'])
    elif args['gaze_backend'] == "pytorch":
        from rt_gene.estimate_gaze_pytorch import GazeEstimator

        gaze_estimator = GazeEstimator("cuda:0", args['models'])
    else:
        raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch")

    if not os.path.isdir(args["output_path"]):
        os.makedirs(args["output_path"])
        
    video = cv2.VideoCapture(VIDEO)
    print('Video frame count: ', video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    if args["calib_file"] is not None and os.path.exists(args["calib_file"]):
        _dist_coefficients, _camera_matrix = load_camera_calibration(args["calib_file"])
    else:
        im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
        im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
        print('WARNING!!! You should provide the camera calibration file, otherwise you might get bad results. \n\
               Using a crude approximation!')
        _dist_coefficients, _camera_matrix = np.zeros((1, 5)), np.array(
                [[im_height, 0.0, im_width / 2.0], [0.0, im_height, im_height / 2.0], [0.0, 0.0, 1.0]])

    lstRet = []
    for i in tqdm(list(range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))))):
        
        image_file_name = "%s_%i.XXX" % (os.path.splitext(VIDEO)[0], i)
        ret, image = video.read()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        lstRet.append(estimate_gaze(image_file_name, image, landmark_estimator, gaze_estimator, _dist_coefficients, _camera_matrix, args))

    lst = list(itertools.chain.from_iterable(lstRet))
    df = pd.DataFrame(lst)
    df["HeadCenter"] = df.HeadBox.apply(lambda x: getCenter(x))
    df["Phi"] = df.GazePhi + df.HeadPosePhi  # gaze yaw
    df["Theta"] = df.GazeTheta + df.HeadPoseTheta  # gaze pitch
    df['Yaw'] = df.GazePhi + df.HeadPoseYaw
    df['Pitch'] = df.GazeTheta + df.HeadPosePitch

    # path = '%s%s_raw.pkl' % (TMP_DIR, VIDEOOUT)
    # df.to_pickle(path)
    # print('Saved raw detections to: ', path)

    visualize(df, FRAMES)

    # Sort ID detections
    ###############################################################################################################
    
    # Find first frame where all are detected 
    for frame in sorted(df.Frame.unique()):
        frame_df = df.loc[df.Frame == frame]
        if len(frame_df['SubjectId'].unique()) == maxPeople:
            first_frame = frame
            print('First frame where all are detected: ', first_frame)
            break

    empty_rows = pd.DataFrame()
    empty_rows['Frame'] = np.zeros(maxPeople).astype(int)

    for col in df.columns: 
        if not col == 'Frame':
            empty_rows[col] = df.loc[df.Frame == first_frame, [col]].values

    df = df.loc[df.Frame != 0]
    df = df.append(empty_rows).sort_values(by=['Frame'])
    df.head()

    df_sorted = df.copy()
    df_sorted["PId"] = None
    df_sorted.loc[df_sorted.Frame == 0, "PId"] = list(range(maxPeople))
    df_sorted = df_sorted.sort_values("Frame")
    df_sorted.index = list(range(len(df_sorted)))

    for frameId in tqdm(sorted(df_sorted.Frame.unique())[1:]):
        pidAssignement = []
        for frameIdBefore in range(frameId - 1, -1, -1):
            allFramesBefore = df_sorted[(df_sorted.Frame == frameIdBefore) & (~df_sorted.PId.isna())]
            if (np.array_equal(sorted(allFramesBefore.PId.to_list()), np.arange(maxPeople))):

                dfFramesCurrent = df_sorted[df_sorted.Frame == frameId]
                for indexCurrentFrame, frameCurrent in dfFramesCurrent.iterrows():

                    lst = []
                    for indexBeforeFrame, frameBefore in allFramesBefore.iterrows():
                        if (frameBefore.HeadCenter[0] > frameCurrent.HeadCenter[0]):
                            p1 = np.array(frameCurrent.HeadCenter)
                            p2 = np.array(frameBefore.HeadCenter)
                        else:
                            p1 = np.array(frameBefore.HeadCenter)
                            p2 = np.array(frameCurrent.HeadCenter)

                        v1 = p1 - p2
                        dist1 = np.linalg.norm(v1)

                        p1[0] = p1[0] + cameraRes[0]
                        v2 = p1 - p2
                        dist2 = np.linalg.norm(v2)

                        dist = min([dist1, dist2])

                        lst.append([dist, frameCurrent.name, indexBeforeFrame, frameBefore])

                    lst.sort(key=lambda x: x[0])
                    pidAssignement.append([indexCurrentFrame, lst[0][-1].PId])

                break
        for index, pid in pidAssignement:
            df_sorted.loc[df_sorted.index == index, "PId"] = pid

    visualize_sorting(df_sorted)

    del df_sorted["SubjectId"]

    # Rearrange DataFrame: each ID has specific columns
    ###############################################################################################################
    df_sorted = df_sorted[~df_sorted.PId.isna()].drop_duplicates(subset=['Frame', 'PId'])
    FACE_COUNT = len(df_sorted[~df_sorted.PId.isna()].PId.unique())
    
    df2 = df_sorted.pivot(index='Frame', columns="PId",
                          values=["Landmarks", "GazeTheta", "GazePhi", "HeadCenter", "HeadPoseTheta", "HeadPosePhi",
                                  "HeadPoseYaw", "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"])
    lst = []
    for label in ["Landmarks", "GazeTheta", "GazePhi", "Head", "HeadPoseTheta", "HeadPosePhi", "HeadPoseYaw",
                  "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"]:
        for head_id in range(FACE_COUNT):
            lst.append("ID%i_%s" % (head_id, label))

    df2.columns = lst
    df2 = df2.reset_index()

    path = "%s%s_RTGene.pkl" % (TMP_DIR, VIDEOOUT)
    df2.to_pickle(path)
    print("Saved RT-Gene detections to %s" % path)
Added processing 2021-10-17 21:32:30 +02:00			`import tensorflow as tf`

			`import sys`
			`import os`
			`import argparse`

			`import cv2`
			`import numpy as np`
			`import matplotlib.pyplot as plt`
			`import matplotlib.patches as patches`
			`from tqdm.notebook import tqdm`

			`from rt_gene.gaze_tools import get_phi_theta_from_euler, limit_yaw`
			`from rt_gene.extract_landmarks_method_base import LandmarkMethodBase`
			`from rt_gene.estimate_gaze_base import GazeEstimatorBase`
			`from rt_gene.estimate_gaze_tensorflow import GazeEstimator`
			`from rt_gene.gaze_tools_standalone import euler_from_matrix`

			`import itertools`
			`import pandas as pd`

			`#os.environ["CUDA_VISIBLE_DEVICES"]="1"`

			`def getCenter(box):`
			`return np.array([box[2]+box[0], box[3]+box[1]])/2`

			`def load_camera_calibration(calibration_file):`

			`fileType = calibration_file.split(".")[-1]`
			`if fileType == "pkl":`
			`import pickle`
			`infile = open(calibration_file,'rb')`
			`data = pickle.load(infile)`
			`return data["distortion_coef"], data["camera_matrix"]`
			`elif fileType == "yaml":`
			`import yaml`
			`with open(calibration_file, 'r') as f:`
			`cal = yaml.safe_load(f)`

			`dist_coefficients = np.array(cal['distortion_coefficients']['data'], dtype='float32').reshape(1, 5)`
			`camera_matrix = np.array(cal['camera_matrix']['data'], dtype='float32').reshape(3, 3)`

			`return dist_coefficients, camera_matrix`


			`def extract_eye_image_patches(subjects, landmark_estimator):`
			`for subject in subjects:`
			`le_c, re_c, _, _ = subject.get_eye_image_from_landmarks(subject, landmark_estimator.eye_image_size)`
			`subject.left_eye_color = le_c`
			`subject.right_eye_color = re_c`


			`def estimate_gaze(base_name, color_img, landmark_estimator, gaze_estimator, dist_coefficients, camera_matrix, args):`
			`faceboxes = landmark_estimator.get_face_bb(color_img)`
			`if len(faceboxes) == 0:`
			`tqdm.write('Could not find faces in the image')`
			`return`

			`subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)`
			`extract_eye_image_patches(subjects, landmark_estimator)`

			`input_r_list = []`
			`input_l_list = []`
			`input_head_list = []`
			`valid_subject_list = []`
			`roll_pitch_yaw_list = []`

			`for idx, subject in enumerate(subjects):`
			`if subject.left_eye_color is None or subject.right_eye_color is None:`
			`#tqdm.write('Failed to extract eye image patches')`
			`continue`

			`success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,`
			`subject.landmarks.reshape(len(subject.landmarks), 1, 2),`
			`cameraMatrix=camera_matrix,`
			`distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)`

			`if not success:`
			`tqdm.write('Not able to extract head pose for subject {}'.format(idx))`
			`continue`

			`_rotation_matrix, _ = cv2.Rodrigues(rotation_vector)`
			`_rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))`
			`_m = np.zeros((4, 4))`
			`_m[:3, :3] = _rotation_matrix`
			`_m[3, 3] = 1`
			`# Go from camera space to ROS space`
			`_camera_to_ros = [[0.0, 0.0, 1.0, 0.0],`
			`[-1.0, 0.0, 0.0, 0.0],`
			`[0.0, -1.0, 0.0, 0.0],`
			`[0.0, 0.0, 0.0, 1.0]]`
			`roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))`
			`roll_pitch_yaw = limit_yaw(roll_pitch_yaw)`
			`roll_pitch_yaw_list.append(roll_pitch_yaw)`

			`phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)`

			`face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)`
			`head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))`

			`if args['vis_headpose']:`
			`plt.axis("off")`
			`plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))`
			`plt.show()`

			`if args['save_headpose']:`
			`cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)`

			`input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))`
			`input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))`
			`input_head_list.append([theta_head, phi_head])`
			`valid_subject_list.append(idx)`

			`if len(valid_subject_list) == 0:`
			`return`

			`gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,`
			`inference_input_right_list=input_r_list,`
			`inference_headpose_list=input_head_list)`

			`file_base = os.path.splitext(base_name)[0]`
			`file = "_".join(file_base.split("_")[:-1])`
			`frame = int(file_base.split("_")[-1])`
			`ret = []`
			`for subject_id, gaze, headpose, roll_pitch_yaw in zip(valid_subject_list, gaze_est.tolist(), input_head_list, roll_pitch_yaw_list):`
			`subject = subjects[subject_id]`
			`#print(roll_pitch_yaw)`
			`# Build visualizations`
			`r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)`
			`l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)`
			`s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)`

			`if args['vis_gaze']:`
			`plt.axis("off")`
			`plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))`
			`plt.show()`

			`if args['save_gaze']:`
			`cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)`
			`# cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)`
			`# cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)`

			`if args['save_estimate']:`
			`with open(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:`
			`f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +`
			`', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')`
			`# Phi: pos - look down, neg - look up`
			`# Theta: pos - rotate left, neg - rotate right`
			`d = {"File":file, "Frame": frame, "SubjectId":subject_id, "HeadBox":subject.box, "Landmarks": subject.landmarks, "GazeTheta":gaze[0], "GazePhi":gaze[1], "HeadPoseTheta":headpose[0], "HeadPosePhi":headpose[1], "HeadPoseRoll":roll_pitch_yaw[0], "HeadPosePitch":roll_pitch_yaw[1], "HeadPoseYaw":roll_pitch_yaw[2]}`
			`ret.append(d)`

			`return ret`


			`def visualize(df, FRAMES):`
			`path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]`
			`path_list.sort()`

			`image = cv2.imread(os.path.join(FRAMES, path_list[0]))`
			`image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)`

			`fig, ax = plt.subplots(1, figsize=(18,10))`

			`for i in range(len(df.SubjectId.unique())):`
			`bbox = df.loc[(df.Frame == 0) & (df.SubjectId == i)]['HeadBox'].values`
			`print(bbox)`
			`if not np.any(pd.isna(bbox)) and len(bbox) > 0:`
			`bbox = np.array(bbox[0])`
			`rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2]-bbox[0],bbox[3]-bbox[1],linewidth=1,edgecolor='c',facecolor='none')`
			`plt.text(bbox[0], bbox[1], 'ID%i' % i, color='c' ,fontsize=20)`
			`ax.add_patch(rect)`

			`ax.imshow(image)`
			`plt.show()`


			`def visualize_sorting(df_sorted):`
			`subs = sorted(df_sorted[~df_sorted.PId.isna()].PId.unique())`
			`for sid in subs:`
			`x = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[0])`
			`y = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[1])`
			`frames = df_sorted[df_sorted.PId==sid].Frame.to_list()`
			`plt.scatter(frames, x, alpha=.2, label = "Sub %i" % sid)`
			`plt.legend()`
			`plt.show()`


			`def process(file, maxPeople, cameraRes = [5760, 2880]):`

			`VIDEO = file`
			`VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]`
			`ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"`
			`TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"`
			`FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)`

			`if not os.path.exists(VIDEO):`
			`print('WARNING: Could not find video file')`
			`return`

			`script_path = "./"`

			`args = {}`
			`args["calib_file"] = "./calib_insta.pkl"`
			`args["vis_headpose"] = False # store_false`
			`args["save_headpose"] = False # store_false`
			`args["vis_gaze"] = False # store_false`
			`args["save_gaze"] = False # store_false`
			`args["save_estimate"] = False # store_false`
			`args["device_id_facedetection"] = "cuda:0" # store_false`

			`args["im_path"] = os.path.join(script_path, './samples_gaze/')`
			`args["output_path"] = os.path.join(script_path, './samples_gaze/')`
			`args["models"] = [os.path.join(script_path, '../model_nets/Model_allsubjects1.h5')]`
			`args['gaze_backend'] = 'tensorflow'`
			`tqdm.write('Loading networks')`
			`landmark_estimator = LandmarkMethodBase(device_id_facedetection=args["device_id_facedetection"],`
			`checkpoint_path_face=os.path.join(script_path,`
			`"../model_nets/SFD/s3fd_facedetector.pth"),`
			`checkpoint_path_landmark=os.path.join(script_path,`
			`"../model_nets/phase1_wpdc_vdc.pth.tar"),`
			`model_points_file=os.path.join(script_path,`
			`"../model_nets/face_model_68.txt"))`

			`#gaze_estimator = GazeEstimator("/gpu:0", args['models'])`

			`if args['gaze_backend'] == "tensorflow":`
			`from rt_gene.estimate_gaze_tensorflow import GazeEstimator`
			`gaze_estimator = GazeEstimator("/gpu:0", args['models'])`
			`elif args['gaze_backend'] == "pytorch":`
			`from rt_gene.estimate_gaze_pytorch import GazeEstimator`

			`gaze_estimator = GazeEstimator("cuda:0", args['models'])`
			`else:`
			`raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch")`

			`if not os.path.isdir(args["output_path"]):`
			`os.makedirs(args["output_path"])`

			`video = cv2.VideoCapture(VIDEO)`
			`print('Video frame count: ', video.get(cv2.CAP_PROP_FRAME_COUNT))`

			`if args["calib_file"] is not None and os.path.exists(args["calib_file"]):`
			`_dist_coefficients, _camera_matrix = load_camera_calibration(args["calib_file"])`
			`else:`
			`im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)`
			`im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)`
			`print('WARNING!!! You should provide the camera calibration file, otherwise you might get bad results. \n\`
			`Using a crude approximation!')`
			`_dist_coefficients, _camera_matrix = np.zeros((1, 5)), np.array(`
			`[[im_height, 0.0, im_width / 2.0], [0.0, im_height, im_height / 2.0], [0.0, 0.0, 1.0]])`

			`lstRet = []`
			`for i in tqdm(list(range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))))):`

			`image_file_name = "%s_%i.XXX" % (os.path.splitext(VIDEO)[0], i)`
			`ret, image = video.read()`
			`image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)`

			`lstRet.append(estimate_gaze(image_file_name, image, landmark_estimator, gaze_estimator, _dist_coefficients, _camera_matrix, args))`

			`lst = list(itertools.chain.from_iterable(lstRet))`
			`df = pd.DataFrame(lst)`
			`df["HeadCenter"] = df.HeadBox.apply(lambda x: getCenter(x))`
			`df["Phi"] = df.GazePhi + df.HeadPosePhi # gaze yaw`
			`df["Theta"] = df.GazeTheta + df.HeadPoseTheta # gaze pitch`
			`df['Yaw'] = df.GazePhi + df.HeadPoseYaw`
			`df['Pitch'] = df.GazeTheta + df.HeadPosePitch`

			`# path = '%s%s_raw.pkl' % (TMP_DIR, VIDEOOUT)`
			`# df.to_pickle(path)`
			`# print('Saved raw detections to: ', path)`

			`visualize(df, FRAMES)`

			`# Sort ID detections`
			`###############################################################################################################`

			`# Find first frame where all are detected`
			`for frame in sorted(df.Frame.unique()):`
			`frame_df = df.loc[df.Frame == frame]`
			`if len(frame_df['SubjectId'].unique()) == maxPeople:`
			`first_frame = frame`
			`print('First frame where all are detected: ', first_frame)`
			`break`

			`empty_rows = pd.DataFrame()`
			`empty_rows['Frame'] = np.zeros(maxPeople).astype(int)`

			`for col in df.columns:`
			`if not col == 'Frame':`
			`empty_rows[col] = df.loc[df.Frame == first_frame, [col]].values`

			`df = df.loc[df.Frame != 0]`
			`df = df.append(empty_rows).sort_values(by=['Frame'])`
			`df.head()`

			`df_sorted = df.copy()`
			`df_sorted["PId"] = None`
			`df_sorted.loc[df_sorted.Frame == 0, "PId"] = list(range(maxPeople))`
			`df_sorted = df_sorted.sort_values("Frame")`
			`df_sorted.index = list(range(len(df_sorted)))`

			`for frameId in tqdm(sorted(df_sorted.Frame.unique())[1:]):`
			`pidAssignement = []`
			`for frameIdBefore in range(frameId - 1, -1, -1):`
			`allFramesBefore = df_sorted[(df_sorted.Frame == frameIdBefore) & (~df_sorted.PId.isna())]`
			`if (np.array_equal(sorted(allFramesBefore.PId.to_list()), np.arange(maxPeople))):`

			`dfFramesCurrent = df_sorted[df_sorted.Frame == frameId]`
			`for indexCurrentFrame, frameCurrent in dfFramesCurrent.iterrows():`

			`lst = []`
			`for indexBeforeFrame, frameBefore in allFramesBefore.iterrows():`
			`if (frameBefore.HeadCenter[0] > frameCurrent.HeadCenter[0]):`
			`p1 = np.array(frameCurrent.HeadCenter)`
			`p2 = np.array(frameBefore.HeadCenter)`
			`else:`
			`p1 = np.array(frameBefore.HeadCenter)`
			`p2 = np.array(frameCurrent.HeadCenter)`

			`v1 = p1 - p2`
			`dist1 = np.linalg.norm(v1)`

			`p1[0] = p1[0] + cameraRes[0]`
			`v2 = p1 - p2`
			`dist2 = np.linalg.norm(v2)`

			`dist = min([dist1, dist2])`

			`lst.append([dist, frameCurrent.name, indexBeforeFrame, frameBefore])`

			`lst.sort(key=lambda x: x[0])`
			`pidAssignement.append([indexCurrentFrame, lst[0][-1].PId])`

			`break`
			`for index, pid in pidAssignement:`
			`df_sorted.loc[df_sorted.index == index, "PId"] = pid`

			`visualize_sorting(df_sorted)`

			`del df_sorted["SubjectId"]`

			`# Rearrange DataFrame: each ID has specific columns`
			`###############################################################################################################`
			`df_sorted = df_sorted[~df_sorted.PId.isna()].drop_duplicates(subset=['Frame', 'PId'])`
			`FACE_COUNT = len(df_sorted[~df_sorted.PId.isna()].PId.unique())`

			`df2 = df_sorted.pivot(index='Frame', columns="PId",`
			`values=["Landmarks", "GazeTheta", "GazePhi", "HeadCenter", "HeadPoseTheta", "HeadPosePhi",`
			`"HeadPoseYaw", "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"])`
			`lst = []`
			`for label in ["Landmarks", "GazeTheta", "GazePhi", "Head", "HeadPoseTheta", "HeadPosePhi", "HeadPoseYaw",`
			`"HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"]:`
			`for head_id in range(FACE_COUNT):`
			`lst.append("ID%i_%s" % (head_id, label))`

			`df2.columns = lst`
			`df2 = df2.reset_index()`

			`path = "%s%s_RTGene.pkl" % (TMP_DIR, VIDEOOUT)`
			`df2.to_pickle(path)`
			`print("Saved RT-Gene detections to %s" % path)`