Added processing

2021-10-17 21:32:30 +02:00 · 2021-10-17 21:32:30 +02:00 · 9e0ac5daa8
commit 9e0ac5daa8
parent 74df5cb3f0
9 changed files with 2287 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -11,11 +11,12 @@ pip install requirements.txt
 ```
 ## Get Started 
 To test the GUI you can download our example use case videos from googledrive: <br>
-As well as the respective processed ``.dat`` files which include all the analyses. 
+As well as the respective processed ``.dat`` files which include all the analyses. <br>
-Run [main.py](main.py) and import the video file you would like to analyze. 
+You can then run [main.py](main.py) and import the video file you would like to analyze. 
 ## Processing
-
+If you would like to analyze your own 360° video you can find the processing pipeline at [processing/](processing).
-
+Please note the processing pipeline requires a GPU.
 ## Citation
 Please cite this paper if you use ConAn or parts of this publication in your research: 
--- a/exampledata/combine.py
+++ b/exampledata/combine.py
@ -0,0 +1,24 @@
 #!/usr/bin/env python3
 import pickle as pkl
 def main():
    data = dict()
    with open('G2_VID4_BodyMovement.pkl', 'rb') as handle:
        data["BodyMovement"] = pkl.load(handle)
    with open('G2_VID4_HeadPose.pkl', 'rb') as handle:
        data["HeadPose"] = pkl.load(handle)
    with open('G2_VID4_JAActivityUnits_V2.pkl', 'rb') as handle:
        data['ActivityUnits'] = pkl.load(handle)
    with open('G2_VID4_RTGene.pkl', 'rb') as handle:
        data['RTGene'] = pkl.load(handle)
    with open("G2_VID4_speakDiar.pkl", 'rb') as handle:
        data["Speaker"] = pkl.load(handle)
    data["originalVideoResolution"] = (5760, 2880)
    with open('G2_VID4.dat', 'wb') as handle:
        pkl.dump(data, handle, protocol=pkl.HIGHEST_PROTOCOL)
 if __name__ == '__main__':
    main()
--- a/processing/ConAn_RunProcessing.ipynb
+++ b/processing/ConAn_RunProcessing.ipynb
--- a/processing/README.md
+++ b/processing/README.md
@ -0,0 +1,23 @@
 # Processing Pipeline 
 ## Conda Environment Setup
 ```
 conda env create -f conan_windows.yml
 conda activate conan_windows_env
 ```
 ### OpenPose
 ### RT-Gene
 - Run [processing/install_RTGene.py](/processing/install_RTGene.py)
 - [OPTIONAL] Provide camera calibration file calib.pkl 
 - Provide maximum number of people in the video
 ### JAA-Net
 ### AVA-Active Speaker
 ### Apriltag
 [https://www.wikihow.com/Install-FFmpeg-on-Windows](https://www.wikihow.com/Install-FFmpeg-on-Windows)
 ### Training 
 ```
 conda install -c anaconda cupy
 conda install -c anaconda chainer
 conda install -c anaconda ipykernel
 ```
--- a/processing/conan_windows.yml
+++ b/processing/conan_windows.yml
@ -0,0 +1,192 @@
 name: conan_windows_env
 channels:
  - pytorch
  - anaconda
  - defaults
 dependencies:
  - _tflow_select=2.1.0=gpu
  - absl-py=0.10.0=py36_0
  - aiohttp=3.6.3=py36he774522_0
  - argon2-cffi=20.1.0=py36he774522_1
  - astor=0.8.1=py36_0
  - async-timeout=3.0.1=py36_0
  - async_generator=1.10=py36h28b3542_0
  - attrs=20.2.0=py_0
  - backcall=0.2.0=py_0
  - blas=1.0=mkl
  - bleach=3.2.1=py_0
  - blinker=1.4=py36_0
  - brotlipy=0.7.0=py36he774522_1000
  - ca-certificates=2021.9.30=haa95532_1
  - cachetools=4.1.1=py_0
  - certifi=2021.5.30=py36haa95532_0
  - cffi=1.14.3=py36h7a1dbc1_0
  - chardet=3.0.4=py36_1003
  - click=7.1.2=py_0
  - colorama=0.4.4=py_0
  - cryptography=3.1.1=py36h7a1dbc1_0
  - cudatoolkit=10.1.243=h74a9793_0
  - cudnn=7.6.5=cuda10.1_0
  - cycler=0.10.0=py36haa95532_0
  - dataclasses=0.8=pyh4f3eec9_6
  - decorator=4.4.2=py_0
  - defusedxml=0.6.0=py_0
  - entrypoints=0.3=py36_0
  - freetype=2.10.4=hd328e21_0
  - gast=0.2.2=py36_0
  - google-auth=1.22.1=py_0
  - google-auth-oauthlib=0.4.1=py_2
  - google-pasta=0.2.0=py_0
  - grpcio=1.31.0=py36he7da953_0
  - h5py=2.10.0=py36h5e291fa_0
  - hdf5=1.10.4=h7ebc959_0
  - icc_rt=2019.0.0=h0cc432a_1
  - icu=58.2=vc14hc45fdbb_0
  - idna=2.10=py_0
  - idna_ssl=1.1.0=py36_0
  - importlib-metadata=2.0.0=py_1
  - importlib_metadata=2.0.0=1
  - intel-openmp=2020.2=254
  - ipykernel=5.3.4=py36h5ca1d4c_0
  - ipython=7.16.1=py36h5ca1d4c_0
  - ipython_genutils=0.2.0=py36h3c5d0ee_0
  - ipywidgets=7.5.1=py_1
  - jedi=0.18.0=py36haa95532_1
  - jinja2=2.11.2=py_0
  - jpeg=9b=hb83a4c4_2
  - jsonschema=3.2.0=py_2
  - jupyter=1.0.0=py36_7
  - jupyter_client=6.1.7=py_0
  - jupyter_console=6.2.0=py_0
  - jupyter_core=4.6.3=py36_0
  - jupyterlab_pygments=0.1.2=py_0
  - keras-applications=1.0.8=py_1
  - keras-preprocessing=1.1.0=py_1
  - kiwisolver=1.3.1=py36hd77b12b_0
  - libpng=1.6.37=h2a8f88b_0
  - libprotobuf=3.13.0.1=h200bbdf_0
  - libsodium=1.0.18=h62dcd97_0
  - libtiff=4.2.0=hd0e1b90_0
  - libuv=1.40.0=he774522_0
  - lz4-c=1.9.3=h2bbff1b_1
  - m2w64-gcc-libgfortran=5.3.0=6
  - m2w64-gcc-libs=5.3.0=7
  - m2w64-gcc-libs-core=5.3.0=7
  - m2w64-gmp=6.1.0=2
  - m2w64-libwinpthread-git=5.0.0.4634.697f757=2
  - markdown=3.3.2=py36_0
  - markupsafe=1.1.1=py36he774522_0
  - matplotlib=3.3.4=py36haa95532_0
  - matplotlib-base=3.3.4=py36h49ac443_0
  - mistune=0.8.4=py36he774522_0
  - mkl=2019.4=245
  - mkl-service=2.3.0=py36hb782905_0
  - mkl_fft=1.2.0=py36h45dec08_0
  - mkl_random=1.0.4=py36h343c172_0
  - msys2-conda-epoch=20160418=1
  - multidict=4.7.6=py36he774522_1
  - nbclient=0.5.1=py_0
  - nbconvert=6.0.7=py36_0
  - nbformat=5.0.8=py_0
  - nest-asyncio=1.4.1=py_0
  - ninja=1.10.2=h6d14046_1
  - notebook=6.1.4=py36_0
  - numpy=1.19.1=py36h5510c5b_0
  - numpy-base=1.19.1=py36ha3acd2a_0
  - oauthlib=3.1.0=py_0
  - olefile=0.46=py36_0
  - openssl=1.1.1l=h2bbff1b_0
  - opt_einsum=3.1.0=py_0
  - packaging=20.4=py_0
  - pandas=1.1.3=py36ha925a31_0
  - pandoc=2.11=h9490d1a_0
  - pandocfilters=1.4.2=py36_1
  - parso=0.8.0=py_0
  - pickleshare=0.7.5=py36_0
  - pillow=8.0.0=py36hca74424_0
  - pip=21.0.1=py36haa95532_0
  - prometheus_client=0.8.0=py_0
  - prompt-toolkit=3.0.8=py_0
  - prompt_toolkit=3.0.8=0
  - protobuf=3.13.0.1=py36ha925a31_1
  - pyasn1=0.4.8=py_0
  - pyasn1-modules=0.2.8=py_0
  - pycparser=2.20=py_2
  - pygments=2.7.1=py_0
  - pyjwt=1.7.1=py36_0
  - pyopengl=3.1.1a1=py36_0
  - pyopenssl=19.1.0=py_1
  - pyparsing=2.4.7=py_0
  - pyqt=5.9.2=py36ha878b3d_0
  - pyreadline=2.1=py36_1
  - pyrsistent=0.17.3=py36he774522_0
  - pysocks=1.7.1=py36_0
  - python=3.6.13=h3758d61_0
  - python-dateutil=2.8.1=py_0
  - pytorch=1.8.1=py3.6_cuda10.1_cudnn7_0
  - pytz=2020.1=py_0
  - pywin32=227=py36he774522_1
  - pywinpty=0.5.7=py36_0
  - pyzmq=19.0.2=py36ha925a31_1
  - qt=5.9.7=vc14h73c81de_0
  - qtconsole=4.7.7=py_0
  - qtpy=1.9.0=py_0
  - requests=2.24.0=py_0
  - requests-oauthlib=1.3.0=py_0
  - rsa=4.6=py_0
  - scipy=1.5.2=py36h9439919_0
  - send2trash=1.5.0=py36_0
  - setuptools=58.0.4=py36haa95532_0
  - sip=4.19.24=py36ha925a31_0
  - six=1.15.0=py_0
  - sqlite=3.36.0=h2bbff1b_0
  - tensorboard=2.2.1=pyh532a8cf_0
  - tensorboard-plugin-wit=1.6.0=py_0
  - tensorflow=2.1.0=gpu_py36h3346743_0
  - tensorflow-base=2.1.0=gpu_py36h55f5790_0
  - tensorflow-estimator=2.6.0=pyh7b7c402_0
  - tensorflow-gpu=2.1.0=h0d30ee6_0
  - termcolor=1.1.0=py36_1
  - terminado=0.9.1=py36_0
  - testpath=0.4.4=py_0
  - tk=8.6.11=h2bbff1b_0
  - torchvision=0.9.1=py36_cu101
  - tornado=6.0.4=py36he774522_1
  - traitlets=4.3.3=py36_0
  - typing_extensions=3.7.4.3=py_0
  - urllib3=1.25.11=py_0
  - vc=14.2=h21ff451_1
  - vs2015_runtime=14.27.29016=h5e58377_2
  - wcwidth=0.2.5=py_0
  - webencodings=0.5.1=py36_1
  - werkzeug=0.14.1=py36_0
  - wheel=0.37.0=pyhd3eb1b0_1
  - widgetsnbextension=3.5.1=py36_0
  - win_inet_pton=1.1.0=py36_0
  - wincertstore=0.2=py36h7fe50ca_0
  - winpty=0.4.3=4
  - wrapt=1.12.1=py36he774522_1
  - xz=5.2.5=h62dcd97_0
  - yarl=1.6.2=py36he774522_0
  - zeromq=4.3.2=ha925a31_3
  - zipp=3.3.1=py_0
  - zlib=1.2.11=vc14h1cdd9ab_1
  - zstd=1.4.9=h19a0ad4_0
  - pip:
    - bidict==0.21.3
    - dlib==19.22.1
    - imageio==2.9.0
    - imageio-ffmpeg==0.4.5
    - joblib==1.1.0
    - lru-dict==1.1.7
    - moviepy==1.0.3
    - opencv-python==4.5.3.56
    - overrides==6.1.0
    - proglog==0.1.9
    - pupil-apriltags==1.0.4
    - pupil-pthreads-win==2
    - scikit-learn==0.24.2
    - threadpoolctl==3.0.0
    - tqdm==4.62.3
    - typing-utils==0.1.0
--- a/processing/install_RTGene.py
+++ b/processing/install_RTGene.py
@ -0,0 +1,19 @@
 import os
 import subprocess
 def main():
    download_cmds = ['git clone https://github.com/Tobias-Fischer/rt_gene.git',
                    'mv ./rt_gene ./rt_gene_GIT',
                    'mv ./rt_gene_GIT/rt_gene/src/rt_gene/ ./',
                    'mv ./rt_gene_GIT/rt_gene/model_nets ./../model_nets']
    for cmd in download_cmds:
        subprocess.call(cmd, shell=True)
    from rt_gene.download_tools import download_gaze_tensorflow_models, download_external_landmark_models
    download_gaze_tensorflow_models()
    download_external_landmark_models()
 if __name__ == '__main__':
    main()
--- a/processing/process_AprilTag.py
+++ b/processing/process_AprilTag.py
@ -0,0 +1,94 @@
 import os 
 from sys import platform
 if platform == "linux" or platform == "linux2":
    # linux
    import apriltag
 elif platform == "darwin":
    # OS X
    import apriltag
 elif platform == "win32":
    # Windows
    import pupil_apriltags as apriltag
 import cv2
 import matplotlib.pyplot as plt 
 import pandas as pd
 visualize = True
 def process(file):
    VIDEO = file
    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
    if not os.path.exists(FRAMES):
        print('WARNING: Could not find frame directory')
        return
    img_paths = [f for f in os.listdir(FRAMES) if 'jpg' in f]
    print('Number of frames: ', len(img_paths))
    if platform == "linux" or platform == "linux2" or platform == "darwin":
        # Circumvent error: too many borders in contour_detect (max of 32767!)
        options = apriltag.DetectorOptions(refine_edges=False, quad_contours=False)
        detector = apriltag.Detector(options)
    elif platform == "win32":
        print('WARNING: apriltag2 not supported on windows, running with pupil_apriltags...')
        detector = apriltag.Detector(refine_edges=False)
    detections = {}
    if visualize:
        fig = plt.Figure(figsize=(15, 10))
        path = os.path.join(FRAMES, img_paths[0])
        img = cv2.imread(path)
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        result = detector.detect(img)
        for i in range(len(result)):
            tf = result[i].tag_family
            tag_id = result[i].tag_id
            cx, cy = result[i].center
            # print('Found tag: ', tag_id)
            img = cv2.circle(img, (int(cx), int(cy)), 50, (255, 255, 0), thickness=10)
        plt.imshow(image)
        plt.axis('off')
        #plt.savefig('./AprilTag_Detection_%s.jpg' % VIDEOOUT)
        plt.show()
    tags = dict()
    for frame, p in enumerate(img_paths):
        path = os.path.join(FRAMES, p)
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        result = detector.detect(img)
        for i in range(len(result)):
            tf = result[i].tag_family
            cx, cy = result[i].center
            tag_id = result[i].tag_id
            tags[tag_id] = [cx, cy]
        print('Frame %i found %i tags' % (frame, len(result)))
        detections[frame] = tags
    df = pd.DataFrame.from_dict(detections, orient='index')
    path = './AprilTag_%s.pkl' % VIDEOOUT
    df.to_pickle(path)
    print('Saved AprilTag detections to %s' % path)
 if __name__ == '__main__':
    process('./Data/ShowCase_3.mp4')
--- a/processing/process_OpenPose.py
+++ b/processing/process_OpenPose.py
@ -0,0 +1,412 @@
 #!/usr/bin/env python
 # coding: utf-8
 # # OpenPose pose detection 
 # 2D real-time multi-person keypoint detection:
 # **18**-keypoint body/foot keypoint estimation. Running time invariant to number of detected people
 # see https://github.com/CMU-Perceptual-Computing-Lab/openpose
 # 
 # ## Pipeline
 # - Run 18-keypoint model on video frames 
 # - Parse keypoints and PAFs to generate personwise keypoints
 # - Save results to OpenPose.pkl
 import os
 import numpy as np
 import cv2
 import sys
 from sys import platform
 import time
 import pandas as pd
 import matplotlib.pyplot as plt
 from tqdm import tqdm, tqdm_pandas
 tqdm.pandas()
 from multiprocessing import cpu_count
 from multiprocessing import Pool
 import itertools
 import os
 os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 os.environ["OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES"] = "1"
 POSE_PAIRS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
              [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 17], [5, 16]]
 keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip',
                    'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']
 mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
          [19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
          [47, 48], [49, 50], [53, 54], [51, 52], [55, 56],
          [37, 38], [45, 46]]
 colors = np.array([(0, 100, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255),
                   (0, 255, 0), (255, 200, 100), (255, 0, 255), (0, 255, 0), (255, 200, 100), (255, 0, 255),
                   (0, 0, 255), (255, 0, 0), (200, 200, 0), (255, 0, 0), (200, 200, 0), (0, 0, 0)])
 my_color = []
 for c in colors:
    my_color.append(tuple(c))
 # ## Auxiliary Functions
 # see https://www.learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/
 def getKeypoints(probMap, threshold=0.8):
    mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0)
    mapMask = np.uint8(mapSmooth > threshold)
    keypoints = []
    # find the blobs
    contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # for each blob find the maxima
    # not enough values to unpack (expected 3, got 2)
    # version issue: https://github.com/facebookresearch/maskrcnn-benchmark/issues/339
    for cnt in contours:
        blobMask = np.zeros(mapMask.shape)
        blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
        maskedProbMap = mapSmooth * blobMask
        _, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
        keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],))
    return keypoints
 # Find valid connections between the different joints of a all persons present
 def getValidPairs(output, detected_keypoints, frameWidth, frameHeight):
    valid_pairs = []
    invalid_pairs = []
    n_interp_samples = 10
    paf_score_th = 0.1
    conf_th = 0.7
    # loop for every POSE_PAIR
    for k in range(len(mapIdx)):
        # A->B constitute a limb
        pafA = output[mapIdx[k][0], :, :]
        pafB = output[mapIdx[k][1], :, :]
        pafA = cv2.resize(pafA, (frameWidth, frameHeight))
        pafB = cv2.resize(pafB, (frameWidth, frameHeight))
        # Find the keypoints for the first and second limb
        candA = detected_keypoints[POSE_PAIRS[k][0]]
        candB = detected_keypoints[POSE_PAIRS[k][1]]
        nA = len(candA)
        nB = len(candB)
        # If keypoints for the joint-pair is detected
        # check every joint in candA with every joint in candB
        # Calculate the distance vector between the two joints
        # Find the PAF values at a set of interpolated points between the joints
        # Use the above formula to compute a score to mark the connection valid
        if (nA != 0 and nB != 0):
            valid_pair = np.zeros((0, 3))
            for i in range(nA):
                max_j = -1
                maxScore = -1
                found = 0
                for j in range(nB):
                    # Find d_ij
                    d_ij = np.subtract(candB[j][:2], candA[i][:2])
                    norm = np.linalg.norm(d_ij)
                    if norm:
                        d_ij = d_ij / norm
                    else:
                        continue
                    # Find p(u)
                    interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples),
                                            np.linspace(candA[i][1], candB[j][1], num=n_interp_samples)))
                    # Find L(p(u))
                    paf_interp = []
                    for k in range(len(interp_coord)):
                        paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))],
                                           pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))]])
                    # Find E
                    paf_scores = np.dot(paf_interp, d_ij)
                    avg_paf_score = sum(paf_scores) / len(paf_scores)
                    # Check if the connection is valid
                    # If the fraction of interpolated vectors aligned with PAF is higher then threshold -> Valid Pair
                    if (len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples) > conf_th:
                        if avg_paf_score > maxScore:
                            max_j = j
                            maxScore = avg_paf_score
                            found = 1
                # Append the connection to the list
                if found:
                    valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0)
            # Append the detected connections to the global list
            valid_pairs.append(valid_pair)
        else:  # If no keypoints are detected
            # ATTENTION: Commented this out by Sven
            # print("No Connection : k = {}".format(k))
            invalid_pairs.append(k)
            valid_pairs.append([])
    return valid_pairs, invalid_pairs
 # This function creates a list of keypoints belonging to each person
 # For each detected valid pair, it assigns the joint(s) to a person
 def getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list):
    # the last number in each row is the overall score
    personwiseKeypoints = -1 * np.ones((0, 19))
    for k in range(len(mapIdx)):
        if k not in invalid_pairs:
            partAs = valid_pairs[k][:, 0]
            partBs = valid_pairs[k][:, 1]
            indexA, indexB = np.array(POSE_PAIRS[k])
            for i in range(len(valid_pairs[k])):
                found = 0
                person_idx = -1
                for j in range(len(personwiseKeypoints)):
                    if personwiseKeypoints[j][indexA] == partAs[i]:
                        person_idx = j
                        found = 1
                        break
                if found:
                    personwiseKeypoints[person_idx][indexB] = partBs[i]
                    personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][
                        2]
                # if find no partA in the subset, create a new subset
                elif not found and k < 17:
                    row = -1 * np.ones(19)
                    row[indexA] = partAs[i]
                    row[indexB] = partBs[i]
                    # add the keypoint_scores for the two keypoints and the paf_score
                    row[-1] = sum(keypoints_list[valid_pairs[k][i, :2].astype(int), 2]) + valid_pairs[k][i][2]
                    personwiseKeypoints = np.vstack([personwiseKeypoints, row])
    return personwiseKeypoints
 def f(probMap):
    threshold = 0.5
    return getKeypoints(probMap, threshold)
 def getPose(output):
    detected_keypoints = []
    keypoints_list = np.zeros((0, 3))
    keypoint_id = 0
    threshold = 0.5
    keypointsList = []
    for part in range(18):
        probMap = output[part, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        keypointsList.append(getKeypoints(probMap, threshold))
    for keypoints in keypointsList:  # nPoints = 18
        keypoints_with_id = []
        for i in range(len(keypoints)):
            keypoints_with_id.append(keypoints[i] + (keypoint_id,))
            keypoints_list = np.vstack([keypoints_list, keypoints[i]])
            keypoint_id += 1
        detected_keypoints.append(keypoints_with_id)
    valid_pairs, invalid_pairs = getValidPairs(output, detected_keypoints, frameWidth, frameHeight)
    personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list)
    return detected_keypoints, keypoints_list, personwiseKeypoints
 """Forward array of 20 images"""
 def getPoseFromDNN(net, images, frameWidth, frameHeight):
    inHeight = 368
    inWidth = int((inHeight / frameHeight) * frameWidth)
    inpBlob = cv2.dnn.blobFromImages(np.array(images), 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False,
                                     crop=False)
    # Set the prepared object as the input blob of the network
    net.setInput(inpBlob)
    output = net.forward()
    return output
 def visualize(image, df, frame):
    number_ids = len([col for col in df.columns if 'ID' in col])
    data = df.loc[df['Frame'] == frame]
    plt.imshow(image)
    plt.axis('off')
    for id_no in range(number_ids):
        keypoints = df['ID%i_Keypoints' % id_no].iloc[frame]
        for i in range(len(POSE_PAIRS)):
            index = POSE_PAIRS[i]
            A, B = keypoints[index]
            # for idx in index:
            # print(keypointsMapping[idx])
            if A is not None and B is not None:
                plt.plot((A[0], B[0]), (A[1], B[1]), c=colors[i])
    plt.show()
 def process(file):
    global get_keypoints
    POSE_PROTO_FILE = r"openpose/pose_deploy_linevec.prototxt"
    POSE_WEIGHTS_FILE = r"openpose/pose_iter_440000.caffemodel"
    if not os.path.exists(POSE_PROTO_FILE):
        print('WARNING: Could not find pose file %s' % POSE_PROTO_FILE)
        return
    if not os.path.exists(POSE_WEIGHTS_FILE):
        print('WARNING: Could not find model weights file %s' % POSE_WEIGHTS_FILE)
        return
    VIDEO = file
    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
    if not os.path.exists(FRAMES):
        print('WARNING: Could not find frame directory')
        return
    # Load Model #
    net = cv2.dnn.readNetFromCaffe(POSE_PROTO_FILE, POSE_WEIGHTS_FILE)
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
    path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
    path_list.sort()
    image = cv2.imread(os.path.join(FRAMES, path_list[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    frameWidth = image.shape[1]
    frameHeight = image.shape[0]
    lst = []
    images = []
    for frame, path in enumerate(tqdm(path_list)):
        image = cv2.imread(os.path.join(FRAMES, path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        images.append(image)
        if len(images) == 20:
            output = getPoseFromDNN(net, images, frameWidth, frameHeight)
            lst.extend(output)
            images = []
    if len(images) != 0:
        output = getPoseFromDNN(net, images, frameWidth, frameHeight)
        lst.extend(output)
        images = []
    print(len(lst))
    df = pd.DataFrame(range(len(lst)))
    df.columns = ["Frame"]
    df["Pose"] = lst
    path = "%s%s_OpenPose_raw.pkl" % (TMP_DIR, VIDEOOUT)
    df.to_pickle(path)
    def get_keypoints(frames):
        ret = []
        for f in frames:
            output = df[df.Frame == f].Pose.iloc[0]
            ret.append(getPose(output))
        return ret
    if platform == "linux" or platform == "linux2" or platform == "darwin":
        cores = max(4, cpu_count() - 4)
        print('Run sorting on {} cores'.format(cores))
        data_split = np.array_split(df.Frame, cores)
        pool = Pool(cores)
        data = pool.map(get_keypoints, data_split)
        pool.close()
        pool.join()
    elif platform == "win32":
        print('WARNING: Can\'t run multiprocessing on Windows, this might take a while...')
        data = get_keypoints(df.Frame)
    else: 
        print('WARNING: platform not supported')
    x = np.vstack(data)
    df["DetectedKeypoints"] = x[:, 0]
    df["KeypointsList"] = x[:, 1]
    df["PersonwiseKeypoints"] = x[:, 2]
    df.head()
    path = "%s%s_OpenPose.pkl" % (TMP_DIR, VIDEOOUT)
    df.to_pickle(path)
    print("Saved OpenPose detections to %s" % path)
    del df["Pose"]
    del df['Pic']
    del df['DetectedKeypoints']
    number_ids = len(df.PersonwiseKeypoints.values.flatten()[0])
    print('Number of detected IDs: ', number_ids)
    """Map personwise keypoints to list of keypoints for each ID"""
    def keypoints_fun(x):
        # Discard frames where not all ids detected
        if len(x.PersonwiseKeypoints) < number_ids:
            # print('None')
            return None
            # index is -1 for no detection >> keypoint = None
        lst = list(x.KeypointsList)
        lst.append(None)
        lst = np.array(lst)
        keypoints = lst[x.PersonwiseKeypoints[id_no].astype(int)[:18]]
        return keypoints
    for id_no in range(number_ids):
        counter = 0
        print('ID%i' % id_no)
        col = 'ID%i_Keypoints' % id_no
        df[col] = df.apply(keypoints_fun, axis=1)
    """Sort IDs to be consistent throughout video"""
    lst = []
    columns = [col for col in df.columns if 'ID' in col]
    data = df[columns]
    lst.append(data.iloc[0].values)
    for i in range(1, len(df.Frame)):
        row = data.iloc[i]
        lst2 = []
        for ids in range(number_ids):
            keypoints = row['ID%i_Keypoints' % ids]
            if keypoints is not None and keypoints[1] is not None:
                for j in range(number_ids):
                    backtrack = 1
                    while lst[i - backtrack][j] == None:
                        backtrack = backtrack + 1
                    keypoints2 = lst[i - backtrack][j]
                    lst2.append([ids, j, np.linalg.norm(np.array(keypoints[1]) - np.array(keypoints2[1])), keypoints])
            else:
                lst2.append([ids, None, None, None])
        dfX = pd.DataFrame(lst2)
        dfX.columns = ["Id", "GtId", "Distance", "Keypoints"]
        dfX = dfX.sort_values("Distance")
        dfX = dfX.drop_duplicates("GtId").drop_duplicates("Id")
        lstRow = []
        for j in range(number_ids):
            if (len(dfX[dfX.GtId == j]) > 0):
                lstRow.append(dfX[dfX.GtId == j].iloc[0].Keypoints)
            else:
                lstRow.append(None)
        lstRow.append(i)
        lst.append(lstRow)
    df_new = pd.DataFrame(lst)
    columns = []
    for i in range(number_ids):
        columns.append('ID%i_Keypoints' % i)
    columns.append("Frame")
    df_new.columns = columns
    # First frame number is NaN from sorting
    df_new.Frame = df_new.Frame.fillna(0)
    df_new = df_new.astype({'Frame': 'int32'})
    path = "%s%s_BodyMovement.pkl" % (TMP_DIR, VIDEOOUT)
    df_new.to_pickle(path)
    print('Saved Body Movement to %s' % path)
    visualize(image, df_new, 0)
--- a/processing/process_RTGene.py
+++ b/processing/process_RTGene.py
@ -0,0 +1,363 @@
 import tensorflow as tf
 import sys
 import os
 import argparse
 import cv2
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 from tqdm.notebook import tqdm
 from rt_gene.gaze_tools import get_phi_theta_from_euler, limit_yaw
 from rt_gene.extract_landmarks_method_base import LandmarkMethodBase
 from rt_gene.estimate_gaze_base import GazeEstimatorBase
 from rt_gene.estimate_gaze_tensorflow import GazeEstimator
 from rt_gene.gaze_tools_standalone import euler_from_matrix
 import itertools
 import pandas as pd
 #os.environ["CUDA_VISIBLE_DEVICES"]="1"
 def getCenter(box):
    return np.array([box[2]+box[0], box[3]+box[1]])/2
 def load_camera_calibration(calibration_file):
    fileType = calibration_file.split(".")[-1]
    if fileType == "pkl":
        import pickle
        infile = open(calibration_file,'rb')
        data = pickle.load(infile)
        return data["distortion_coef"], data["camera_matrix"]
    elif fileType == "yaml":
        import yaml
        with open(calibration_file, 'r') as f:
            cal = yaml.safe_load(f)
        dist_coefficients = np.array(cal['distortion_coefficients']['data'], dtype='float32').reshape(1, 5)
        camera_matrix = np.array(cal['camera_matrix']['data'], dtype='float32').reshape(3, 3)
        return dist_coefficients, camera_matrix
 def extract_eye_image_patches(subjects, landmark_estimator):
    for subject in subjects:
        le_c, re_c, _, _ = subject.get_eye_image_from_landmarks(subject, landmark_estimator.eye_image_size)
        subject.left_eye_color = le_c
        subject.right_eye_color = re_c
 def estimate_gaze(base_name, color_img, landmark_estimator, gaze_estimator, dist_coefficients, camera_matrix, args):
    faceboxes = landmark_estimator.get_face_bb(color_img)
    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        return
    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
    extract_eye_image_patches(subjects, landmark_estimator)
    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []
    roll_pitch_yaw_list = []
    for idx, subject in enumerate(subjects):
        if subject.left_eye_color is None or subject.right_eye_color is None:
            #tqdm.write('Failed to extract eye image patches')
            continue
        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
                                                   subject.landmarks.reshape(len(subject.landmarks), 1, 2),
                                                   cameraMatrix=camera_matrix,
                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)
        if not success:
            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
            continue
        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                          [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0],
                          [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)
        roll_pitch_yaw_list.append(roll_pitch_yaw)
        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)
        face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))
        if args['vis_headpose']:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            plt.show()
        if args['save_headpose']:
            cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)
        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)
    if len(valid_subject_list) == 0:
        return
    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                                    inference_input_right_list=input_r_list,
                                                    inference_headpose_list=input_head_list)
    file_base = os.path.splitext(base_name)[0]
    file = "_".join(file_base.split("_")[:-1])
    frame = int(file_base.split("_")[-1])
    ret = []
    for subject_id, gaze, headpose, roll_pitch_yaw in zip(valid_subject_list, gaze_est.tolist(), input_head_list, roll_pitch_yaw_list):
        subject = subjects[subject_id]
        #print(roll_pitch_yaw)
        # Build visualizations
        r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
        l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
        if args['vis_gaze']:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
            plt.show()
        if args['save_gaze']:
            cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)
        if args['save_estimate']:
            with open(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
                f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +
                        ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
        # Phi: pos - look down,  neg -   look up
        # Theta: pos - rotate left,  neg - rotate right
        d = {"File":file, "Frame": frame, "SubjectId":subject_id, "HeadBox":subject.box, "Landmarks": subject.landmarks, "GazeTheta":gaze[0], "GazePhi":gaze[1], "HeadPoseTheta":headpose[0], "HeadPosePhi":headpose[1], "HeadPoseRoll":roll_pitch_yaw[0], "HeadPosePitch":roll_pitch_yaw[1], "HeadPoseYaw":roll_pitch_yaw[2]}
        ret.append(d)
    return ret
 def visualize(df, FRAMES):
    path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
    path_list.sort()
    image = cv2.imread(os.path.join(FRAMES, path_list[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    fig, ax = plt.subplots(1, figsize=(18,10))
    for i in range(len(df.SubjectId.unique())):
        bbox = df.loc[(df.Frame == 0) & (df.SubjectId == i)]['HeadBox'].values
        print(bbox)
        if not np.any(pd.isna(bbox)) and len(bbox) > 0:
            bbox = np.array(bbox[0])
            rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2]-bbox[0],bbox[3]-bbox[1],linewidth=1,edgecolor='c',facecolor='none')
            plt.text(bbox[0], bbox[1], 'ID%i' % i, color='c' ,fontsize=20)
            ax.add_patch(rect)
    ax.imshow(image)
    plt.show()
 def visualize_sorting(df_sorted):
    subs = sorted(df_sorted[~df_sorted.PId.isna()].PId.unique())
    for sid in subs:
        x = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[0])
        y = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[1])
        frames = df_sorted[df_sorted.PId==sid].Frame.to_list()
        plt.scatter(frames, x, alpha=.2, label = "Sub %i" % sid)
    plt.legend()
    plt.show()
 def process(file, maxPeople, cameraRes = [5760, 2880]):
    VIDEO = file
    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
    if not os.path.exists(VIDEO):
        print('WARNING: Could not find video file')
        return
    script_path = "./"
    args = {}
    args["calib_file"] = "./calib_insta.pkl"
    args["vis_headpose"] = False  # store_false
    args["save_headpose"] = False  # store_false
    args["vis_gaze"] = False  # store_false
    args["save_gaze"] = False  # store_false
    args["save_estimate"] = False  # store_false
    args["device_id_facedetection"] = "cuda:0"  # store_false
    args["im_path"] = os.path.join(script_path, './samples_gaze/')
    args["output_path"] = os.path.join(script_path, './samples_gaze/')
    args["models"] = [os.path.join(script_path, '../model_nets/Model_allsubjects1.h5')]
    args['gaze_backend'] = 'tensorflow'
    tqdm.write('Loading networks')
    landmark_estimator = LandmarkMethodBase(device_id_facedetection=args["device_id_facedetection"],
                                            checkpoint_path_face=os.path.join(script_path,
                                                                              "../model_nets/SFD/s3fd_facedetector.pth"),
                                            checkpoint_path_landmark=os.path.join(script_path,
                                                                                  "../model_nets/phase1_wpdc_vdc.pth.tar"),
                                            model_points_file=os.path.join(script_path,
                                                                           "../model_nets/face_model_68.txt"))
    #gaze_estimator = GazeEstimator("/gpu:0", args['models'])
    if args['gaze_backend'] == "tensorflow":
        from rt_gene.estimate_gaze_tensorflow import GazeEstimator
        gaze_estimator = GazeEstimator("/gpu:0", args['models'])
    elif args['gaze_backend'] == "pytorch":
        from rt_gene.estimate_gaze_pytorch import GazeEstimator
        gaze_estimator = GazeEstimator("cuda:0", args['models'])
    else:
        raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch")
    if not os.path.isdir(args["output_path"]):
        os.makedirs(args["output_path"])
    video = cv2.VideoCapture(VIDEO)
    print('Video frame count: ', video.get(cv2.CAP_PROP_FRAME_COUNT))
    if args["calib_file"] is not None and os.path.exists(args["calib_file"]):
        _dist_coefficients, _camera_matrix = load_camera_calibration(args["calib_file"])
    else:
        im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
        im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
        print('WARNING!!! You should provide the camera calibration file, otherwise you might get bad results. \n\
               Using a crude approximation!')
        _dist_coefficients, _camera_matrix = np.zeros((1, 5)), np.array(
                [[im_height, 0.0, im_width / 2.0], [0.0, im_height, im_height / 2.0], [0.0, 0.0, 1.0]])
    lstRet = []
    for i in tqdm(list(range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))))):
        image_file_name = "%s_%i.XXX" % (os.path.splitext(VIDEO)[0], i)
        ret, image = video.read()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        lstRet.append(estimate_gaze(image_file_name, image, landmark_estimator, gaze_estimator, _dist_coefficients, _camera_matrix, args))
    lst = list(itertools.chain.from_iterable(lstRet))
    df = pd.DataFrame(lst)
    df["HeadCenter"] = df.HeadBox.apply(lambda x: getCenter(x))
    df["Phi"] = df.GazePhi + df.HeadPosePhi  # gaze yaw
    df["Theta"] = df.GazeTheta + df.HeadPoseTheta  # gaze pitch
    df['Yaw'] = df.GazePhi + df.HeadPoseYaw
    df['Pitch'] = df.GazeTheta + df.HeadPosePitch
    # path = '%s%s_raw.pkl' % (TMP_DIR, VIDEOOUT)
    # df.to_pickle(path)
    # print('Saved raw detections to: ', path)
    visualize(df, FRAMES)
    # Sort ID detections
    ###############################################################################################################
    # Find first frame where all are detected 
    for frame in sorted(df.Frame.unique()):
        frame_df = df.loc[df.Frame == frame]
        if len(frame_df['SubjectId'].unique()) == maxPeople:
            first_frame = frame
            print('First frame where all are detected: ', first_frame)
            break
    empty_rows = pd.DataFrame()
    empty_rows['Frame'] = np.zeros(maxPeople).astype(int)
    for col in df.columns: 
        if not col == 'Frame':
            empty_rows[col] = df.loc[df.Frame == first_frame, [col]].values
    df = df.loc[df.Frame != 0]
    df = df.append(empty_rows).sort_values(by=['Frame'])
    df.head()
    df_sorted = df.copy()
    df_sorted["PId"] = None
    df_sorted.loc[df_sorted.Frame == 0, "PId"] = list(range(maxPeople))
    df_sorted = df_sorted.sort_values("Frame")
    df_sorted.index = list(range(len(df_sorted)))
    for frameId in tqdm(sorted(df_sorted.Frame.unique())[1:]):
        pidAssignement = []
        for frameIdBefore in range(frameId - 1, -1, -1):
            allFramesBefore = df_sorted[(df_sorted.Frame == frameIdBefore) & (~df_sorted.PId.isna())]
            if (np.array_equal(sorted(allFramesBefore.PId.to_list()), np.arange(maxPeople))):
                dfFramesCurrent = df_sorted[df_sorted.Frame == frameId]
                for indexCurrentFrame, frameCurrent in dfFramesCurrent.iterrows():
                    lst = []
                    for indexBeforeFrame, frameBefore in allFramesBefore.iterrows():
                        if (frameBefore.HeadCenter[0] > frameCurrent.HeadCenter[0]):
                            p1 = np.array(frameCurrent.HeadCenter)
                            p2 = np.array(frameBefore.HeadCenter)
                        else:
                            p1 = np.array(frameBefore.HeadCenter)
                            p2 = np.array(frameCurrent.HeadCenter)
                        v1 = p1 - p2
                        dist1 = np.linalg.norm(v1)
                        p1[0] = p1[0] + cameraRes[0]
                        v2 = p1 - p2
                        dist2 = np.linalg.norm(v2)
                        dist = min([dist1, dist2])
                        lst.append([dist, frameCurrent.name, indexBeforeFrame, frameBefore])
                    lst.sort(key=lambda x: x[0])
                    pidAssignement.append([indexCurrentFrame, lst[0][-1].PId])
                break
        for index, pid in pidAssignement:
            df_sorted.loc[df_sorted.index == index, "PId"] = pid
    visualize_sorting(df_sorted)
    del df_sorted["SubjectId"]
    # Rearrange DataFrame: each ID has specific columns
    ###############################################################################################################
    df_sorted = df_sorted[~df_sorted.PId.isna()].drop_duplicates(subset=['Frame', 'PId'])
    FACE_COUNT = len(df_sorted[~df_sorted.PId.isna()].PId.unique())
    df2 = df_sorted.pivot(index='Frame', columns="PId",
                          values=["Landmarks", "GazeTheta", "GazePhi", "HeadCenter", "HeadPoseTheta", "HeadPosePhi",
                                  "HeadPoseYaw", "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"])
    lst = []
    for label in ["Landmarks", "GazeTheta", "GazePhi", "Head", "HeadPoseTheta", "HeadPosePhi", "HeadPoseYaw",
                  "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"]:
        for head_id in range(FACE_COUNT):
            lst.append("ID%i_%s" % (head_id, label))
    df2.columns = lst
    df2 = df2.reset_index()
    path = "%s%s_RTGene.pkl" % (TMP_DIR, VIDEOOUT)
    df2.to_pickle(path)
    print("Saved RT-Gene detections to %s" % path)