Added processing

2021-10-17 21:32:30 +02:00 · 2021-10-17 21:32:30 +02:00 · 9e0ac5daa8
commit 9e0ac5daa8
parent 74df5cb3f0
9 changed files with 2287 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -11,11 +11,12 @@ pip install requirements.txt
 ```
 ## Get Started 
 To test the GUI you can download our example use case videos from googledrive: <br>
-As well as the respective processed ``.dat`` files which include all the analyses. 
-Run [main.py](main.py) and import the video file you would like to analyze. 
+As well as the respective processed ``.dat`` files which include all the analyses. <br>
+You can then run [main.py](main.py) and import the video file you would like to analyze. 
+
 ## Processing
-
-
+If you would like to analyze your own 360° video you can find the processing pipeline at [processing/](processing).
+Please note the processing pipeline requires a GPU.

 ## Citation
 Please cite this paper if you use ConAn or parts of this publication in your research: 
--- a/exampledata/combine.py
+++ b/exampledata/combine.py
@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+
+import pickle as pkl
+
+
+def main():
+    data = dict()
+    with open('G2_VID4_BodyMovement.pkl', 'rb') as handle:
+        data["BodyMovement"] = pkl.load(handle)
+    with open('G2_VID4_HeadPose.pkl', 'rb') as handle:
+        data["HeadPose"] = pkl.load(handle)
+    with open('G2_VID4_JAActivityUnits_V2.pkl', 'rb') as handle:
+        data['ActivityUnits'] = pkl.load(handle)
+    with open('G2_VID4_RTGene.pkl', 'rb') as handle:
+        data['RTGene'] = pkl.load(handle)
+    with open("G2_VID4_speakDiar.pkl", 'rb') as handle:
+        data["Speaker"] = pkl.load(handle)
+    data["originalVideoResolution"] = (5760, 2880)
+    with open('G2_VID4.dat', 'wb') as handle:
+        pkl.dump(data, handle, protocol=pkl.HIGHEST_PROTOCOL)
+
+
+if __name__ == '__main__':
+    main()
--- a/processing/ConAn_RunProcessing.ipynb
+++ b/processing/ConAn_RunProcessing.ipynb
--- a/processing/README.md
+++ b/processing/README.md
@ -0,0 +1,23 @@
+# Processing Pipeline 
+## Conda Environment Setup
+```
+conda env create -f conan_windows.yml
+conda activate conan_windows_env
+```
+
+### OpenPose
+### RT-Gene
+- Run [processing/install_RTGene.py](/processing/install_RTGene.py)
+- [OPTIONAL] Provide camera calibration file calib.pkl 
+- Provide maximum number of people in the video
+### JAA-Net
+### AVA-Active Speaker
+### Apriltag
+
+[https://www.wikihow.com/Install-FFmpeg-on-Windows](https://www.wikihow.com/Install-FFmpeg-on-Windows)
+### Training 
+```
+conda install -c anaconda cupy
+conda install -c anaconda chainer
+conda install -c anaconda ipykernel
+```
--- a/processing/conan_windows.yml
+++ b/processing/conan_windows.yml
@ -0,0 +1,192 @@
+name: conan_windows_env
+channels:
+  - pytorch
+  - anaconda
+  - defaults
+dependencies:
+  - _tflow_select=2.1.0=gpu
+  - absl-py=0.10.0=py36_0
+  - aiohttp=3.6.3=py36he774522_0
+  - argon2-cffi=20.1.0=py36he774522_1
+  - astor=0.8.1=py36_0
+  - async-timeout=3.0.1=py36_0
+  - async_generator=1.10=py36h28b3542_0
+  - attrs=20.2.0=py_0
+  - backcall=0.2.0=py_0
+  - blas=1.0=mkl
+  - bleach=3.2.1=py_0
+  - blinker=1.4=py36_0
+  - brotlipy=0.7.0=py36he774522_1000
+  - ca-certificates=2021.9.30=haa95532_1
+  - cachetools=4.1.1=py_0
+  - certifi=2021.5.30=py36haa95532_0
+  - cffi=1.14.3=py36h7a1dbc1_0
+  - chardet=3.0.4=py36_1003
+  - click=7.1.2=py_0
+  - colorama=0.4.4=py_0
+  - cryptography=3.1.1=py36h7a1dbc1_0
+  - cudatoolkit=10.1.243=h74a9793_0
+  - cudnn=7.6.5=cuda10.1_0
+  - cycler=0.10.0=py36haa95532_0
+  - dataclasses=0.8=pyh4f3eec9_6
+  - decorator=4.4.2=py_0
+  - defusedxml=0.6.0=py_0
+  - entrypoints=0.3=py36_0
+  - freetype=2.10.4=hd328e21_0
+  - gast=0.2.2=py36_0
+  - google-auth=1.22.1=py_0
+  - google-auth-oauthlib=0.4.1=py_2
+  - google-pasta=0.2.0=py_0
+  - grpcio=1.31.0=py36he7da953_0
+  - h5py=2.10.0=py36h5e291fa_0
+  - hdf5=1.10.4=h7ebc959_0
+  - icc_rt=2019.0.0=h0cc432a_1
+  - icu=58.2=vc14hc45fdbb_0
+  - idna=2.10=py_0
+  - idna_ssl=1.1.0=py36_0
+  - importlib-metadata=2.0.0=py_1
+  - importlib_metadata=2.0.0=1
+  - intel-openmp=2020.2=254
+  - ipykernel=5.3.4=py36h5ca1d4c_0
+  - ipython=7.16.1=py36h5ca1d4c_0
+  - ipython_genutils=0.2.0=py36h3c5d0ee_0
+  - ipywidgets=7.5.1=py_1
+  - jedi=0.18.0=py36haa95532_1
+  - jinja2=2.11.2=py_0
+  - jpeg=9b=hb83a4c4_2
+  - jsonschema=3.2.0=py_2
+  - jupyter=1.0.0=py36_7
+  - jupyter_client=6.1.7=py_0
+  - jupyter_console=6.2.0=py_0
+  - jupyter_core=4.6.3=py36_0
+  - jupyterlab_pygments=0.1.2=py_0
+  - keras-applications=1.0.8=py_1
+  - keras-preprocessing=1.1.0=py_1
+  - kiwisolver=1.3.1=py36hd77b12b_0
+  - libpng=1.6.37=h2a8f88b_0
+  - libprotobuf=3.13.0.1=h200bbdf_0
+  - libsodium=1.0.18=h62dcd97_0
+  - libtiff=4.2.0=hd0e1b90_0
+  - libuv=1.40.0=he774522_0
+  - lz4-c=1.9.3=h2bbff1b_1
+  - m2w64-gcc-libgfortran=5.3.0=6
+  - m2w64-gcc-libs=5.3.0=7
+  - m2w64-gcc-libs-core=5.3.0=7
+  - m2w64-gmp=6.1.0=2
+  - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
+  - markdown=3.3.2=py36_0
+  - markupsafe=1.1.1=py36he774522_0
+  - matplotlib=3.3.4=py36haa95532_0
+  - matplotlib-base=3.3.4=py36h49ac443_0
+  - mistune=0.8.4=py36he774522_0
+  - mkl=2019.4=245
+  - mkl-service=2.3.0=py36hb782905_0
+  - mkl_fft=1.2.0=py36h45dec08_0
+  - mkl_random=1.0.4=py36h343c172_0
+  - msys2-conda-epoch=20160418=1
+  - multidict=4.7.6=py36he774522_1
+  - nbclient=0.5.1=py_0
+  - nbconvert=6.0.7=py36_0
+  - nbformat=5.0.8=py_0
+  - nest-asyncio=1.4.1=py_0
+  - ninja=1.10.2=h6d14046_1
+  - notebook=6.1.4=py36_0
+  - numpy=1.19.1=py36h5510c5b_0
+  - numpy-base=1.19.1=py36ha3acd2a_0
+  - oauthlib=3.1.0=py_0
+  - olefile=0.46=py36_0
+  - openssl=1.1.1l=h2bbff1b_0
+  - opt_einsum=3.1.0=py_0
+  - packaging=20.4=py_0
+  - pandas=1.1.3=py36ha925a31_0
+  - pandoc=2.11=h9490d1a_0
+  - pandocfilters=1.4.2=py36_1
+  - parso=0.8.0=py_0
+  - pickleshare=0.7.5=py36_0
+  - pillow=8.0.0=py36hca74424_0
+  - pip=21.0.1=py36haa95532_0
+  - prometheus_client=0.8.0=py_0
+  - prompt-toolkit=3.0.8=py_0
+  - prompt_toolkit=3.0.8=0
+  - protobuf=3.13.0.1=py36ha925a31_1
+  - pyasn1=0.4.8=py_0
+  - pyasn1-modules=0.2.8=py_0
+  - pycparser=2.20=py_2
+  - pygments=2.7.1=py_0
+  - pyjwt=1.7.1=py36_0
+  - pyopengl=3.1.1a1=py36_0
+  - pyopenssl=19.1.0=py_1
+  - pyparsing=2.4.7=py_0
+  - pyqt=5.9.2=py36ha878b3d_0
+  - pyreadline=2.1=py36_1
+  - pyrsistent=0.17.3=py36he774522_0
+  - pysocks=1.7.1=py36_0
+  - python=3.6.13=h3758d61_0
+  - python-dateutil=2.8.1=py_0
+  - pytorch=1.8.1=py3.6_cuda10.1_cudnn7_0
+  - pytz=2020.1=py_0
+  - pywin32=227=py36he774522_1
+  - pywinpty=0.5.7=py36_0
+  - pyzmq=19.0.2=py36ha925a31_1
+  - qt=5.9.7=vc14h73c81de_0
+  - qtconsole=4.7.7=py_0
+  - qtpy=1.9.0=py_0
+  - requests=2.24.0=py_0
+  - requests-oauthlib=1.3.0=py_0
+  - rsa=4.6=py_0
+  - scipy=1.5.2=py36h9439919_0
+  - send2trash=1.5.0=py36_0
+  - setuptools=58.0.4=py36haa95532_0
+  - sip=4.19.24=py36ha925a31_0
+  - six=1.15.0=py_0
+  - sqlite=3.36.0=h2bbff1b_0
+  - tensorboard=2.2.1=pyh532a8cf_0
+  - tensorboard-plugin-wit=1.6.0=py_0
+  - tensorflow=2.1.0=gpu_py36h3346743_0
+  - tensorflow-base=2.1.0=gpu_py36h55f5790_0
+  - tensorflow-estimator=2.6.0=pyh7b7c402_0
+  - tensorflow-gpu=2.1.0=h0d30ee6_0
+  - termcolor=1.1.0=py36_1
+  - terminado=0.9.1=py36_0
+  - testpath=0.4.4=py_0
+  - tk=8.6.11=h2bbff1b_0
+  - torchvision=0.9.1=py36_cu101
+  - tornado=6.0.4=py36he774522_1
+  - traitlets=4.3.3=py36_0
+  - typing_extensions=3.7.4.3=py_0
+  - urllib3=1.25.11=py_0
+  - vc=14.2=h21ff451_1
+  - vs2015_runtime=14.27.29016=h5e58377_2
+  - wcwidth=0.2.5=py_0
+  - webencodings=0.5.1=py36_1
+  - werkzeug=0.14.1=py36_0
+  - wheel=0.37.0=pyhd3eb1b0_1
+  - widgetsnbextension=3.5.1=py36_0
+  - win_inet_pton=1.1.0=py36_0
+  - wincertstore=0.2=py36h7fe50ca_0
+  - winpty=0.4.3=4
+  - wrapt=1.12.1=py36he774522_1
+  - xz=5.2.5=h62dcd97_0
+  - yarl=1.6.2=py36he774522_0
+  - zeromq=4.3.2=ha925a31_3
+  - zipp=3.3.1=py_0
+  - zlib=1.2.11=vc14h1cdd9ab_1
+  - zstd=1.4.9=h19a0ad4_0
+  - pip:
+    - bidict==0.21.3
+    - dlib==19.22.1
+    - imageio==2.9.0
+    - imageio-ffmpeg==0.4.5
+    - joblib==1.1.0
+    - lru-dict==1.1.7
+    - moviepy==1.0.3
+    - opencv-python==4.5.3.56
+    - overrides==6.1.0
+    - proglog==0.1.9
+    - pupil-apriltags==1.0.4
+    - pupil-pthreads-win==2
+    - scikit-learn==0.24.2
+    - threadpoolctl==3.0.0
+    - tqdm==4.62.3
+    - typing-utils==0.1.0
+
--- a/processing/install_RTGene.py
+++ b/processing/install_RTGene.py
@ -0,0 +1,19 @@
+import os
+import subprocess
+
+def main():
+
+    download_cmds = ['git clone https://github.com/Tobias-Fischer/rt_gene.git',
+                    'mv ./rt_gene ./rt_gene_GIT',
+                    'mv ./rt_gene_GIT/rt_gene/src/rt_gene/ ./',
+                    'mv ./rt_gene_GIT/rt_gene/model_nets ./../model_nets']
+
+    for cmd in download_cmds:
+        subprocess.call(cmd, shell=True)
+    from rt_gene.download_tools import download_gaze_tensorflow_models, download_external_landmark_models
+    download_gaze_tensorflow_models()
+    download_external_landmark_models()
+
+
+if __name__ == '__main__':
+    main()
--- a/processing/process_AprilTag.py
+++ b/processing/process_AprilTag.py
@ -0,0 +1,94 @@
+
+import os 
+from sys import platform
+if platform == "linux" or platform == "linux2":
+    # linux
+    import apriltag
+elif platform == "darwin":
+    # OS X
+    import apriltag
+elif platform == "win32":
+    # Windows
+    import pupil_apriltags as apriltag
+
+import cv2
+import matplotlib.pyplot as plt 
+import pandas as pd
+
+visualize = True
+
+def process(file):
+
+    VIDEO = file
+    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
+    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
+    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
+    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
+
+    if not os.path.exists(FRAMES):
+        print('WARNING: Could not find frame directory')
+        return
+
+    img_paths = [f for f in os.listdir(FRAMES) if 'jpg' in f]
+    print('Number of frames: ', len(img_paths))
+
+    if platform == "linux" or platform == "linux2" or platform == "darwin":
+        # Circumvent error: too many borders in contour_detect (max of 32767!)
+        options = apriltag.DetectorOptions(refine_edges=False, quad_contours=False)
+        detector = apriltag.Detector(options)
+    elif platform == "win32":
+        print('WARNING: apriltag2 not supported on windows, running with pupil_apriltags...')
+        detector = apriltag.Detector(refine_edges=False)
+
+    detections = {}
+
+    if visualize:
+        fig = plt.Figure(figsize=(15, 10))
+        path = os.path.join(FRAMES, img_paths[0])
+
+        img = cv2.imread(path)
+        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+        result = detector.detect(img)
+
+        for i in range(len(result)):
+            tf = result[i].tag_family
+            tag_id = result[i].tag_id
+            cx, cy = result[i].center
+            # print('Found tag: ', tag_id)
+            img = cv2.circle(img, (int(cx), int(cy)), 50, (255, 255, 0), thickness=10)
+
+        plt.imshow(image)
+        plt.axis('off')
+        #plt.savefig('./AprilTag_Detection_%s.jpg' % VIDEOOUT)
+        plt.show()
+
+    tags = dict()
+    for frame, p in enumerate(img_paths):
+
+        path = os.path.join(FRAMES, p)
+
+        img = cv2.imread(path)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+        result = detector.detect(img)
+
+        for i in range(len(result)):
+            tf = result[i].tag_family
+            cx, cy = result[i].center
+            tag_id = result[i].tag_id
+            tags[tag_id] = [cx, cy]
+
+        print('Frame %i found %i tags' % (frame, len(result)))
+        detections[frame] = tags
+
+    df = pd.DataFrame.from_dict(detections, orient='index')
+
+    path = './AprilTag_%s.pkl' % VIDEOOUT
+    df.to_pickle(path)
+    print('Saved AprilTag detections to %s' % path)
+
+
+if __name__ == '__main__':
+    process('./Data/ShowCase_3.mp4')
--- a/processing/process_OpenPose.py
+++ b/processing/process_OpenPose.py
@ -0,0 +1,412 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # OpenPose pose detection 
+# 2D real-time multi-person keypoint detection:
+# **18**-keypoint body/foot keypoint estimation. Running time invariant to number of detected people
+# see https://github.com/CMU-Perceptual-Computing-Lab/openpose
+# 
+# ## Pipeline
+# - Run 18-keypoint model on video frames 
+# - Parse keypoints and PAFs to generate personwise keypoints
+# - Save results to OpenPose.pkl
+
+import os
+import numpy as np
+import cv2
+import sys
+from sys import platform
+import time
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from tqdm import tqdm, tqdm_pandas
+
+tqdm.pandas()
+
+from multiprocessing import cpu_count
+from multiprocessing import Pool
+import itertools
+import os
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+os.environ["OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES"] = "1"
+
+POSE_PAIRS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
+              [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 17], [5, 16]]
+keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip',
+                    'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']
+mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
+          [19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
+          [47, 48], [49, 50], [53, 54], [51, 52], [55, 56],
+          [37, 38], [45, 46]]
+
+colors = np.array([(0, 100, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255),
+                   (0, 255, 0), (255, 200, 100), (255, 0, 255), (0, 255, 0), (255, 200, 100), (255, 0, 255),
+                   (0, 0, 255), (255, 0, 0), (200, 200, 0), (255, 0, 0), (200, 200, 0), (0, 0, 0)])
+
+my_color = []
+for c in colors:
+    my_color.append(tuple(c))
+
+
+# ## Auxiliary Functions
+# see https://www.learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/
+def getKeypoints(probMap, threshold=0.8):
+    mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0)
+    mapMask = np.uint8(mapSmooth > threshold)
+    keypoints = []
+    # find the blobs
+    contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    # for each blob find the maxima
+    # not enough values to unpack (expected 3, got 2)
+    # version issue: https://github.com/facebookresearch/maskrcnn-benchmark/issues/339
+    for cnt in contours:
+        blobMask = np.zeros(mapMask.shape)
+        blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
+        maskedProbMap = mapSmooth * blobMask
+        _, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
+        keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],))
+    return keypoints
+
+
+# Find valid connections between the different joints of a all persons present
+def getValidPairs(output, detected_keypoints, frameWidth, frameHeight):
+    valid_pairs = []
+    invalid_pairs = []
+    n_interp_samples = 10
+    paf_score_th = 0.1
+    conf_th = 0.7
+    # loop for every POSE_PAIR
+    for k in range(len(mapIdx)):
+        # A->B constitute a limb
+        pafA = output[mapIdx[k][0], :, :]
+        pafB = output[mapIdx[k][1], :, :]
+        pafA = cv2.resize(pafA, (frameWidth, frameHeight))
+        pafB = cv2.resize(pafB, (frameWidth, frameHeight))
+        # Find the keypoints for the first and second limb
+        candA = detected_keypoints[POSE_PAIRS[k][0]]
+        candB = detected_keypoints[POSE_PAIRS[k][1]]
+        nA = len(candA)
+        nB = len(candB)
+        # If keypoints for the joint-pair is detected
+        # check every joint in candA with every joint in candB
+        # Calculate the distance vector between the two joints
+        # Find the PAF values at a set of interpolated points between the joints
+        # Use the above formula to compute a score to mark the connection valid
+        if (nA != 0 and nB != 0):
+            valid_pair = np.zeros((0, 3))
+            for i in range(nA):
+                max_j = -1
+                maxScore = -1
+                found = 0
+                for j in range(nB):
+                    # Find d_ij
+                    d_ij = np.subtract(candB[j][:2], candA[i][:2])
+                    norm = np.linalg.norm(d_ij)
+                    if norm:
+                        d_ij = d_ij / norm
+                    else:
+                        continue
+                    # Find p(u)
+                    interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples),
+                                            np.linspace(candA[i][1], candB[j][1], num=n_interp_samples)))
+                    # Find L(p(u))
+                    paf_interp = []
+                    for k in range(len(interp_coord)):
+                        paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))],
+                                           pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))]])
+                    # Find E
+                    paf_scores = np.dot(paf_interp, d_ij)
+                    avg_paf_score = sum(paf_scores) / len(paf_scores)
+                    # Check if the connection is valid
+                    # If the fraction of interpolated vectors aligned with PAF is higher then threshold -> Valid Pair
+                    if (len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples) > conf_th:
+                        if avg_paf_score > maxScore:
+                            max_j = j
+                            maxScore = avg_paf_score
+                            found = 1
+                # Append the connection to the list
+                if found:
+                    valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0)
+            # Append the detected connections to the global list
+            valid_pairs.append(valid_pair)
+        else:  # If no keypoints are detected
+            # ATTENTION: Commented this out by Sven
+            # print("No Connection : k = {}".format(k))
+            invalid_pairs.append(k)
+            valid_pairs.append([])
+    return valid_pairs, invalid_pairs
+
+
+# This function creates a list of keypoints belonging to each person
+# For each detected valid pair, it assigns the joint(s) to a person
+def getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list):
+    # the last number in each row is the overall score
+    personwiseKeypoints = -1 * np.ones((0, 19))
+    for k in range(len(mapIdx)):
+        if k not in invalid_pairs:
+            partAs = valid_pairs[k][:, 0]
+            partBs = valid_pairs[k][:, 1]
+            indexA, indexB = np.array(POSE_PAIRS[k])
+            for i in range(len(valid_pairs[k])):
+                found = 0
+                person_idx = -1
+                for j in range(len(personwiseKeypoints)):
+                    if personwiseKeypoints[j][indexA] == partAs[i]:
+                        person_idx = j
+                        found = 1
+                        break
+                if found:
+                    personwiseKeypoints[person_idx][indexB] = partBs[i]
+                    personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][
+                        2]
+                # if find no partA in the subset, create a new subset
+                elif not found and k < 17:
+                    row = -1 * np.ones(19)
+                    row[indexA] = partAs[i]
+                    row[indexB] = partBs[i]
+                    # add the keypoint_scores for the two keypoints and the paf_score
+                    row[-1] = sum(keypoints_list[valid_pairs[k][i, :2].astype(int), 2]) + valid_pairs[k][i][2]
+                    personwiseKeypoints = np.vstack([personwiseKeypoints, row])
+    return personwiseKeypoints
+
+
+def f(probMap):
+    threshold = 0.5
+    return getKeypoints(probMap, threshold)
+
+
+def getPose(output):
+    detected_keypoints = []
+    keypoints_list = np.zeros((0, 3))
+    keypoint_id = 0
+
+    threshold = 0.5
+    keypointsList = []
+
+    for part in range(18):
+        probMap = output[part, :, :]
+        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
+        keypointsList.append(getKeypoints(probMap, threshold))
+
+    for keypoints in keypointsList:  # nPoints = 18
+        keypoints_with_id = []
+        for i in range(len(keypoints)):
+            keypoints_with_id.append(keypoints[i] + (keypoint_id,))
+            keypoints_list = np.vstack([keypoints_list, keypoints[i]])
+            keypoint_id += 1
+        detected_keypoints.append(keypoints_with_id)
+
+    valid_pairs, invalid_pairs = getValidPairs(output, detected_keypoints, frameWidth, frameHeight)
+    personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list)
+
+    return detected_keypoints, keypoints_list, personwiseKeypoints
+
+
+"""Forward array of 20 images"""
+
+
+def getPoseFromDNN(net, images, frameWidth, frameHeight):
+    inHeight = 368
+    inWidth = int((inHeight / frameHeight) * frameWidth)
+    inpBlob = cv2.dnn.blobFromImages(np.array(images), 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False,
+                                     crop=False)
+    # Set the prepared object as the input blob of the network
+    net.setInput(inpBlob)
+
+    output = net.forward()
+    return output
+
+
+def visualize(image, df, frame):
+    number_ids = len([col for col in df.columns if 'ID' in col])
+    data = df.loc[df['Frame'] == frame]
+    plt.imshow(image)
+    plt.axis('off')
+    for id_no in range(number_ids):
+        keypoints = df['ID%i_Keypoints' % id_no].iloc[frame]
+        for i in range(len(POSE_PAIRS)):
+            index = POSE_PAIRS[i]
+            A, B = keypoints[index]
+            # for idx in index:
+            # print(keypointsMapping[idx])
+            if A is not None and B is not None:
+                plt.plot((A[0], B[0]), (A[1], B[1]), c=colors[i])
+
+    plt.show()
+
+def process(file):
+    global get_keypoints
+    
+    POSE_PROTO_FILE = r"openpose/pose_deploy_linevec.prototxt"
+    POSE_WEIGHTS_FILE = r"openpose/pose_iter_440000.caffemodel"
+    
+    if not os.path.exists(POSE_PROTO_FILE):
+        print('WARNING: Could not find pose file %s' % POSE_PROTO_FILE)
+        return
+    if not os.path.exists(POSE_WEIGHTS_FILE):
+        print('WARNING: Could not find model weights file %s' % POSE_WEIGHTS_FILE)
+        return
+
+    VIDEO = file
+    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
+    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
+    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
+    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
+    
+    if not os.path.exists(FRAMES):
+        print('WARNING: Could not find frame directory')
+        return
+    
+    # Load Model #
+    net = cv2.dnn.readNetFromCaffe(POSE_PROTO_FILE, POSE_WEIGHTS_FILE)
+    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
+    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
+
+    path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
+    path_list.sort()
+
+    image = cv2.imread(os.path.join(FRAMES, path_list[0]))
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    frameWidth = image.shape[1]
+    frameHeight = image.shape[0]
+
+    lst = []
+    images = []
+    for frame, path in enumerate(tqdm(path_list)):
+        image = cv2.imread(os.path.join(FRAMES, path))
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        images.append(image)
+
+        if len(images) == 20:
+            output = getPoseFromDNN(net, images, frameWidth, frameHeight)
+            lst.extend(output)
+            images = []
+
+    if len(images) != 0:
+        output = getPoseFromDNN(net, images, frameWidth, frameHeight)
+        lst.extend(output)
+        images = []
+
+    print(len(lst))
+    df = pd.DataFrame(range(len(lst)))
+    df.columns = ["Frame"]
+    df["Pose"] = lst
+    
+    path = "%s%s_OpenPose_raw.pkl" % (TMP_DIR, VIDEOOUT)
+    df.to_pickle(path)
+
+    def get_keypoints(frames):
+        ret = []
+        for f in frames:
+            output = df[df.Frame == f].Pose.iloc[0]
+            ret.append(getPose(output))
+        return ret
+    
+    if platform == "linux" or platform == "linux2" or platform == "darwin":
+        cores = max(4, cpu_count() - 4)
+        print('Run sorting on {} cores'.format(cores))
+        data_split = np.array_split(df.Frame, cores)
+        pool = Pool(cores)
+        data = pool.map(get_keypoints, data_split)
+        pool.close()
+        pool.join()
+    elif platform == "win32":
+        print('WARNING: Can\'t run multiprocessing on Windows, this might take a while...')
+        data = get_keypoints(df.Frame)
+    else: 
+        print('WARNING: platform not supported')
+    
+    x = np.vstack(data)
+    df["DetectedKeypoints"] = x[:, 0]
+    df["KeypointsList"] = x[:, 1]
+    df["PersonwiseKeypoints"] = x[:, 2]
+    df.head()
+
+    path = "%s%s_OpenPose.pkl" % (TMP_DIR, VIDEOOUT)
+    df.to_pickle(path)
+    print("Saved OpenPose detections to %s" % path)
+
+    del df["Pose"]
+    del df['Pic']
+    del df['DetectedKeypoints']
+
+    number_ids = len(df.PersonwiseKeypoints.values.flatten()[0])
+    print('Number of detected IDs: ', number_ids)
+
+    """Map personwise keypoints to list of keypoints for each ID"""
+    def keypoints_fun(x):
+        # Discard frames where not all ids detected
+        if len(x.PersonwiseKeypoints) < number_ids:
+            # print('None')
+            return None
+            # index is -1 for no detection >> keypoint = None
+        lst = list(x.KeypointsList)
+        lst.append(None)
+        lst = np.array(lst)
+
+        keypoints = lst[x.PersonwiseKeypoints[id_no].astype(int)[:18]]
+        return keypoints
+
+    for id_no in range(number_ids):
+        counter = 0
+        print('ID%i' % id_no)
+        col = 'ID%i_Keypoints' % id_no
+
+        df[col] = df.apply(keypoints_fun, axis=1)
+
+    """Sort IDs to be consistent throughout video"""
+    lst = []
+    columns = [col for col in df.columns if 'ID' in col]
+    data = df[columns]
+
+    lst.append(data.iloc[0].values)
+
+    for i in range(1, len(df.Frame)):
+        row = data.iloc[i]
+        lst2 = []
+        for ids in range(number_ids):
+            keypoints = row['ID%i_Keypoints' % ids]
+
+            if keypoints is not None and keypoints[1] is not None:
+                for j in range(number_ids):
+                    backtrack = 1
+                    while lst[i - backtrack][j] == None:
+                        backtrack = backtrack + 1
+                    keypoints2 = lst[i - backtrack][j]
+
+                    lst2.append([ids, j, np.linalg.norm(np.array(keypoints[1]) - np.array(keypoints2[1])), keypoints])
+            else:
+                lst2.append([ids, None, None, None])
+        dfX = pd.DataFrame(lst2)
+        dfX.columns = ["Id", "GtId", "Distance", "Keypoints"]
+        dfX = dfX.sort_values("Distance")
+        dfX = dfX.drop_duplicates("GtId").drop_duplicates("Id")
+        lstRow = []
+        for j in range(number_ids):
+            if (len(dfX[dfX.GtId == j]) > 0):
+                lstRow.append(dfX[dfX.GtId == j].iloc[0].Keypoints)
+            else:
+                lstRow.append(None)
+        lstRow.append(i)
+        lst.append(lstRow)
+
+    df_new = pd.DataFrame(lst)
+    columns = []
+    for i in range(number_ids):
+        columns.append('ID%i_Keypoints' % i)
+    columns.append("Frame")
+    df_new.columns = columns
+
+    # First frame number is NaN from sorting
+    df_new.Frame = df_new.Frame.fillna(0)
+    df_new = df_new.astype({'Frame': 'int32'})
+
+    path = "%s%s_BodyMovement.pkl" % (TMP_DIR, VIDEOOUT)
+    df_new.to_pickle(path)
+    print('Saved Body Movement to %s' % path)
+    
+    visualize(image, df_new, 0)
+
+
--- a/processing/process_RTGene.py
+++ b/processing/process_RTGene.py
@ -0,0 +1,363 @@
+import tensorflow as tf
+
+import sys
+import os
+import argparse
+
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from tqdm.notebook import tqdm
+
+from rt_gene.gaze_tools import get_phi_theta_from_euler, limit_yaw
+from rt_gene.extract_landmarks_method_base import LandmarkMethodBase
+from rt_gene.estimate_gaze_base import GazeEstimatorBase
+from rt_gene.estimate_gaze_tensorflow import GazeEstimator
+from rt_gene.gaze_tools_standalone import euler_from_matrix
+
+import itertools
+import pandas as pd
+
+#os.environ["CUDA_VISIBLE_DEVICES"]="1"
+
+def getCenter(box):
+    return np.array([box[2]+box[0], box[3]+box[1]])/2
+
+def load_camera_calibration(calibration_file):
+    
+    fileType = calibration_file.split(".")[-1]
+    if fileType == "pkl":
+        import pickle
+        infile = open(calibration_file,'rb')
+        data = pickle.load(infile)
+        return data["distortion_coef"], data["camera_matrix"]
+    elif fileType == "yaml":
+        import yaml
+        with open(calibration_file, 'r') as f:
+            cal = yaml.safe_load(f)
+
+        dist_coefficients = np.array(cal['distortion_coefficients']['data'], dtype='float32').reshape(1, 5)
+        camera_matrix = np.array(cal['camera_matrix']['data'], dtype='float32').reshape(3, 3)
+
+        return dist_coefficients, camera_matrix
+
+
+def extract_eye_image_patches(subjects, landmark_estimator):
+    for subject in subjects:
+        le_c, re_c, _, _ = subject.get_eye_image_from_landmarks(subject, landmark_estimator.eye_image_size)
+        subject.left_eye_color = le_c
+        subject.right_eye_color = re_c
+
+
+def estimate_gaze(base_name, color_img, landmark_estimator, gaze_estimator, dist_coefficients, camera_matrix, args):
+    faceboxes = landmark_estimator.get_face_bb(color_img)
+    if len(faceboxes) == 0:
+        tqdm.write('Could not find faces in the image')
+        return
+
+    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
+    extract_eye_image_patches(subjects, landmark_estimator)
+
+    input_r_list = []
+    input_l_list = []
+    input_head_list = []
+    valid_subject_list = []
+    roll_pitch_yaw_list = []
+
+    for idx, subject in enumerate(subjects):
+        if subject.left_eye_color is None or subject.right_eye_color is None:
+            #tqdm.write('Failed to extract eye image patches')
+            continue
+
+        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
+                                                   subject.landmarks.reshape(len(subject.landmarks), 1, 2),
+                                                   cameraMatrix=camera_matrix,
+                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)
+
+        if not success:
+            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
+            continue
+
+        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
+        _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
+        _m = np.zeros((4, 4))
+        _m[:3, :3] = _rotation_matrix
+        _m[3, 3] = 1
+        # Go from camera space to ROS space
+        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
+                          [-1.0, 0.0, 0.0, 0.0],
+                          [0.0, -1.0, 0.0, 0.0],
+                          [0.0, 0.0, 0.0, 1.0]]
+        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
+        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)
+        roll_pitch_yaw_list.append(roll_pitch_yaw)
+
+        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)
+
+        face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
+        head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))
+
+        if args['vis_headpose']:
+            plt.axis("off")
+            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
+            plt.show()
+
+        if args['save_headpose']:
+            cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)
+
+        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
+        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
+        input_head_list.append([theta_head, phi_head])
+        valid_subject_list.append(idx)
+
+    if len(valid_subject_list) == 0:
+        return
+
+    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
+                                                    inference_input_right_list=input_r_list,
+                                                    inference_headpose_list=input_head_list)
+    
+    file_base = os.path.splitext(base_name)[0]
+    file = "_".join(file_base.split("_")[:-1])
+    frame = int(file_base.split("_")[-1])
+    ret = []
+    for subject_id, gaze, headpose, roll_pitch_yaw in zip(valid_subject_list, gaze_est.tolist(), input_head_list, roll_pitch_yaw_list):
+        subject = subjects[subject_id]
+        #print(roll_pitch_yaw)
+        # Build visualizations
+        r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
+        l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
+        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
+
+        if args['vis_gaze']:
+            plt.axis("off")
+            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
+            plt.show()
+
+        if args['save_gaze']:
+            cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
+            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
+            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)
+
+        if args['save_estimate']:
+            with open(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
+                f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +
+                        ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
+        # Phi: pos - look down,  neg -   look up
+        # Theta: pos - rotate left,  neg - rotate right
+        d = {"File":file, "Frame": frame, "SubjectId":subject_id, "HeadBox":subject.box, "Landmarks": subject.landmarks, "GazeTheta":gaze[0], "GazePhi":gaze[1], "HeadPoseTheta":headpose[0], "HeadPosePhi":headpose[1], "HeadPoseRoll":roll_pitch_yaw[0], "HeadPosePitch":roll_pitch_yaw[1], "HeadPoseYaw":roll_pitch_yaw[2]}
+        ret.append(d)
+        
+    return ret
+
+
+def visualize(df, FRAMES):
+    path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
+    path_list.sort()
+
+    image = cv2.imread(os.path.join(FRAMES, path_list[0]))
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+    fig, ax = plt.subplots(1, figsize=(18,10))
+
+    for i in range(len(df.SubjectId.unique())):
+        bbox = df.loc[(df.Frame == 0) & (df.SubjectId == i)]['HeadBox'].values
+        print(bbox)
+        if not np.any(pd.isna(bbox)) and len(bbox) > 0:
+            bbox = np.array(bbox[0])
+            rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2]-bbox[0],bbox[3]-bbox[1],linewidth=1,edgecolor='c',facecolor='none')
+            plt.text(bbox[0], bbox[1], 'ID%i' % i, color='c' ,fontsize=20)
+            ax.add_patch(rect)
+
+    ax.imshow(image)
+    plt.show()
+
+
+def visualize_sorting(df_sorted):
+    subs = sorted(df_sorted[~df_sorted.PId.isna()].PId.unique())
+    for sid in subs:
+        x = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[0])
+        y = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[1])
+        frames = df_sorted[df_sorted.PId==sid].Frame.to_list()
+        plt.scatter(frames, x, alpha=.2, label = "Sub %i" % sid)
+    plt.legend()
+    plt.show()
+
+
+def process(file, maxPeople, cameraRes = [5760, 2880]):
+
+    VIDEO = file
+    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
+    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
+    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
+    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
+
+    if not os.path.exists(VIDEO):
+        print('WARNING: Could not find video file')
+        return
+
+    script_path = "./"
+
+    args = {}
+    args["calib_file"] = "./calib_insta.pkl"
+    args["vis_headpose"] = False  # store_false
+    args["save_headpose"] = False  # store_false
+    args["vis_gaze"] = False  # store_false
+    args["save_gaze"] = False  # store_false
+    args["save_estimate"] = False  # store_false
+    args["device_id_facedetection"] = "cuda:0"  # store_false
+
+    args["im_path"] = os.path.join(script_path, './samples_gaze/')
+    args["output_path"] = os.path.join(script_path, './samples_gaze/')
+    args["models"] = [os.path.join(script_path, '../model_nets/Model_allsubjects1.h5')]
+    args['gaze_backend'] = 'tensorflow'
+    tqdm.write('Loading networks')
+    landmark_estimator = LandmarkMethodBase(device_id_facedetection=args["device_id_facedetection"],
+                                            checkpoint_path_face=os.path.join(script_path,
+                                                                              "../model_nets/SFD/s3fd_facedetector.pth"),
+                                            checkpoint_path_landmark=os.path.join(script_path,
+                                                                                  "../model_nets/phase1_wpdc_vdc.pth.tar"),
+                                            model_points_file=os.path.join(script_path,
+                                                                           "../model_nets/face_model_68.txt"))
+
+    #gaze_estimator = GazeEstimator("/gpu:0", args['models'])
+
+    if args['gaze_backend'] == "tensorflow":
+        from rt_gene.estimate_gaze_tensorflow import GazeEstimator
+        gaze_estimator = GazeEstimator("/gpu:0", args['models'])
+    elif args['gaze_backend'] == "pytorch":
+        from rt_gene.estimate_gaze_pytorch import GazeEstimator
+
+        gaze_estimator = GazeEstimator("cuda:0", args['models'])
+    else:
+        raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch")
+
+    if not os.path.isdir(args["output_path"]):
+        os.makedirs(args["output_path"])
+        
+    video = cv2.VideoCapture(VIDEO)
+    print('Video frame count: ', video.get(cv2.CAP_PROP_FRAME_COUNT))
+    
+    if args["calib_file"] is not None and os.path.exists(args["calib_file"]):
+        _dist_coefficients, _camera_matrix = load_camera_calibration(args["calib_file"])
+    else:
+        im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
+        im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
+        print('WARNING!!! You should provide the camera calibration file, otherwise you might get bad results. \n\
+               Using a crude approximation!')
+        _dist_coefficients, _camera_matrix = np.zeros((1, 5)), np.array(
+                [[im_height, 0.0, im_width / 2.0], [0.0, im_height, im_height / 2.0], [0.0, 0.0, 1.0]])
+
+    lstRet = []
+    for i in tqdm(list(range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))))):
+        
+        image_file_name = "%s_%i.XXX" % (os.path.splitext(VIDEO)[0], i)
+        ret, image = video.read()
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+        lstRet.append(estimate_gaze(image_file_name, image, landmark_estimator, gaze_estimator, _dist_coefficients, _camera_matrix, args))
+
+    lst = list(itertools.chain.from_iterable(lstRet))
+    df = pd.DataFrame(lst)
+    df["HeadCenter"] = df.HeadBox.apply(lambda x: getCenter(x))
+    df["Phi"] = df.GazePhi + df.HeadPosePhi  # gaze yaw
+    df["Theta"] = df.GazeTheta + df.HeadPoseTheta  # gaze pitch
+    df['Yaw'] = df.GazePhi + df.HeadPoseYaw
+    df['Pitch'] = df.GazeTheta + df.HeadPosePitch
+
+    # path = '%s%s_raw.pkl' % (TMP_DIR, VIDEOOUT)
+    # df.to_pickle(path)
+    # print('Saved raw detections to: ', path)
+
+    visualize(df, FRAMES)
+
+    # Sort ID detections
+    ###############################################################################################################
+    
+    # Find first frame where all are detected 
+    for frame in sorted(df.Frame.unique()):
+        frame_df = df.loc[df.Frame == frame]
+        if len(frame_df['SubjectId'].unique()) == maxPeople:
+            first_frame = frame
+            print('First frame where all are detected: ', first_frame)
+            break
+
+    empty_rows = pd.DataFrame()
+    empty_rows['Frame'] = np.zeros(maxPeople).astype(int)
+
+    for col in df.columns: 
+        if not col == 'Frame':
+            empty_rows[col] = df.loc[df.Frame == first_frame, [col]].values
+
+    df = df.loc[df.Frame != 0]
+    df = df.append(empty_rows).sort_values(by=['Frame'])
+    df.head()
+
+    df_sorted = df.copy()
+    df_sorted["PId"] = None
+    df_sorted.loc[df_sorted.Frame == 0, "PId"] = list(range(maxPeople))
+    df_sorted = df_sorted.sort_values("Frame")
+    df_sorted.index = list(range(len(df_sorted)))
+
+    for frameId in tqdm(sorted(df_sorted.Frame.unique())[1:]):
+        pidAssignement = []
+        for frameIdBefore in range(frameId - 1, -1, -1):
+            allFramesBefore = df_sorted[(df_sorted.Frame == frameIdBefore) & (~df_sorted.PId.isna())]
+            if (np.array_equal(sorted(allFramesBefore.PId.to_list()), np.arange(maxPeople))):
+
+                dfFramesCurrent = df_sorted[df_sorted.Frame == frameId]
+                for indexCurrentFrame, frameCurrent in dfFramesCurrent.iterrows():
+
+                    lst = []
+                    for indexBeforeFrame, frameBefore in allFramesBefore.iterrows():
+                        if (frameBefore.HeadCenter[0] > frameCurrent.HeadCenter[0]):
+                            p1 = np.array(frameCurrent.HeadCenter)
+                            p2 = np.array(frameBefore.HeadCenter)
+                        else:
+                            p1 = np.array(frameBefore.HeadCenter)
+                            p2 = np.array(frameCurrent.HeadCenter)
+
+                        v1 = p1 - p2
+                        dist1 = np.linalg.norm(v1)
+
+                        p1[0] = p1[0] + cameraRes[0]
+                        v2 = p1 - p2
+                        dist2 = np.linalg.norm(v2)
+
+                        dist = min([dist1, dist2])
+
+                        lst.append([dist, frameCurrent.name, indexBeforeFrame, frameBefore])
+
+                    lst.sort(key=lambda x: x[0])
+                    pidAssignement.append([indexCurrentFrame, lst[0][-1].PId])
+
+                break
+        for index, pid in pidAssignement:
+            df_sorted.loc[df_sorted.index == index, "PId"] = pid
+
+    visualize_sorting(df_sorted)
+
+    del df_sorted["SubjectId"]
+
+    # Rearrange DataFrame: each ID has specific columns
+    ###############################################################################################################
+    df_sorted = df_sorted[~df_sorted.PId.isna()].drop_duplicates(subset=['Frame', 'PId'])
+    FACE_COUNT = len(df_sorted[~df_sorted.PId.isna()].PId.unique())
+    
+    df2 = df_sorted.pivot(index='Frame', columns="PId",
+                          values=["Landmarks", "GazeTheta", "GazePhi", "HeadCenter", "HeadPoseTheta", "HeadPosePhi",
+                                  "HeadPoseYaw", "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"])
+    lst = []
+    for label in ["Landmarks", "GazeTheta", "GazePhi", "Head", "HeadPoseTheta", "HeadPosePhi", "HeadPoseYaw",
+                  "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"]:
+        for head_id in range(FACE_COUNT):
+            lst.append("ID%i_%s" % (head_id, label))
+
+    df2.columns = lst
+    df2 = df2.reset_index()
+
+    path = "%s%s_RTGene.pkl" % (TMP_DIR, VIDEOOUT)
+    df2.to_pickle(path)
+    print("Saved RT-Gene detections to %s" % path)
+