Added processing
This commit is contained in:
parent
74df5cb3f0
commit
9e0ac5daa8
|
@ -11,11 +11,12 @@ pip install requirements.txt
|
|||
```
|
||||
## Get Started
|
||||
To test the GUI you can download our example use case videos from googledrive: <br>
|
||||
As well as the respective processed ``.dat`` files which include all the analyses.
|
||||
Run [main.py](main.py) and import the video file you would like to analyze.
|
||||
As well as the respective processed ``.dat`` files which include all the analyses. <br>
|
||||
You can then run [main.py](main.py) and import the video file you would like to analyze.
|
||||
|
||||
## Processing
|
||||
|
||||
|
||||
If you would like to analyze your own 360° video you can find the processing pipeline at [processing/](processing).
|
||||
Please note the processing pipeline requires a GPU.
|
||||
|
||||
## Citation
|
||||
Please cite this paper if you use ConAn or parts of this publication in your research:
|
||||
|
|
24
exampledata/combine.py
Normal file
24
exampledata/combine.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pickle as pkl
|
||||
|
||||
|
||||
def main():
|
||||
data = dict()
|
||||
with open('G2_VID4_BodyMovement.pkl', 'rb') as handle:
|
||||
data["BodyMovement"] = pkl.load(handle)
|
||||
with open('G2_VID4_HeadPose.pkl', 'rb') as handle:
|
||||
data["HeadPose"] = pkl.load(handle)
|
||||
with open('G2_VID4_JAActivityUnits_V2.pkl', 'rb') as handle:
|
||||
data['ActivityUnits'] = pkl.load(handle)
|
||||
with open('G2_VID4_RTGene.pkl', 'rb') as handle:
|
||||
data['RTGene'] = pkl.load(handle)
|
||||
with open("G2_VID4_speakDiar.pkl", 'rb') as handle:
|
||||
data["Speaker"] = pkl.load(handle)
|
||||
data["originalVideoResolution"] = (5760, 2880)
|
||||
with open('G2_VID4.dat', 'wb') as handle:
|
||||
pkl.dump(data, handle, protocol=pkl.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
1155
processing/ConAn_RunProcessing.ipynb
Normal file
1155
processing/ConAn_RunProcessing.ipynb
Normal file
File diff suppressed because one or more lines are too long
23
processing/README.md
Normal file
23
processing/README.md
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Processing Pipeline
|
||||
## Conda Environment Setup
|
||||
```
|
||||
conda env create -f conan_windows.yml
|
||||
conda activate conan_windows_env
|
||||
```
|
||||
|
||||
### OpenPose
|
||||
### RT-Gene
|
||||
- Run [processing/install_RTGene.py](/processing/install_RTGene.py)
|
||||
- [OPTIONAL] Provide camera calibration file calib.pkl
|
||||
- Provide maximum number of people in the video
|
||||
### JAA-Net
|
||||
### AVA-Active Speaker
|
||||
### Apriltag
|
||||
|
||||
[https://www.wikihow.com/Install-FFmpeg-on-Windows](https://www.wikihow.com/Install-FFmpeg-on-Windows)
|
||||
### Training
|
||||
```
|
||||
conda install -c anaconda cupy
|
||||
conda install -c anaconda chainer
|
||||
conda install -c anaconda ipykernel
|
||||
```
|
192
processing/conan_windows.yml
Normal file
192
processing/conan_windows.yml
Normal file
|
@ -0,0 +1,192 @@
|
|||
name: conan_windows_env
|
||||
channels:
|
||||
- pytorch
|
||||
- anaconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- _tflow_select=2.1.0=gpu
|
||||
- absl-py=0.10.0=py36_0
|
||||
- aiohttp=3.6.3=py36he774522_0
|
||||
- argon2-cffi=20.1.0=py36he774522_1
|
||||
- astor=0.8.1=py36_0
|
||||
- async-timeout=3.0.1=py36_0
|
||||
- async_generator=1.10=py36h28b3542_0
|
||||
- attrs=20.2.0=py_0
|
||||
- backcall=0.2.0=py_0
|
||||
- blas=1.0=mkl
|
||||
- bleach=3.2.1=py_0
|
||||
- blinker=1.4=py36_0
|
||||
- brotlipy=0.7.0=py36he774522_1000
|
||||
- ca-certificates=2021.9.30=haa95532_1
|
||||
- cachetools=4.1.1=py_0
|
||||
- certifi=2021.5.30=py36haa95532_0
|
||||
- cffi=1.14.3=py36h7a1dbc1_0
|
||||
- chardet=3.0.4=py36_1003
|
||||
- click=7.1.2=py_0
|
||||
- colorama=0.4.4=py_0
|
||||
- cryptography=3.1.1=py36h7a1dbc1_0
|
||||
- cudatoolkit=10.1.243=h74a9793_0
|
||||
- cudnn=7.6.5=cuda10.1_0
|
||||
- cycler=0.10.0=py36haa95532_0
|
||||
- dataclasses=0.8=pyh4f3eec9_6
|
||||
- decorator=4.4.2=py_0
|
||||
- defusedxml=0.6.0=py_0
|
||||
- entrypoints=0.3=py36_0
|
||||
- freetype=2.10.4=hd328e21_0
|
||||
- gast=0.2.2=py36_0
|
||||
- google-auth=1.22.1=py_0
|
||||
- google-auth-oauthlib=0.4.1=py_2
|
||||
- google-pasta=0.2.0=py_0
|
||||
- grpcio=1.31.0=py36he7da953_0
|
||||
- h5py=2.10.0=py36h5e291fa_0
|
||||
- hdf5=1.10.4=h7ebc959_0
|
||||
- icc_rt=2019.0.0=h0cc432a_1
|
||||
- icu=58.2=vc14hc45fdbb_0
|
||||
- idna=2.10=py_0
|
||||
- idna_ssl=1.1.0=py36_0
|
||||
- importlib-metadata=2.0.0=py_1
|
||||
- importlib_metadata=2.0.0=1
|
||||
- intel-openmp=2020.2=254
|
||||
- ipykernel=5.3.4=py36h5ca1d4c_0
|
||||
- ipython=7.16.1=py36h5ca1d4c_0
|
||||
- ipython_genutils=0.2.0=py36h3c5d0ee_0
|
||||
- ipywidgets=7.5.1=py_1
|
||||
- jedi=0.18.0=py36haa95532_1
|
||||
- jinja2=2.11.2=py_0
|
||||
- jpeg=9b=hb83a4c4_2
|
||||
- jsonschema=3.2.0=py_2
|
||||
- jupyter=1.0.0=py36_7
|
||||
- jupyter_client=6.1.7=py_0
|
||||
- jupyter_console=6.2.0=py_0
|
||||
- jupyter_core=4.6.3=py36_0
|
||||
- jupyterlab_pygments=0.1.2=py_0
|
||||
- keras-applications=1.0.8=py_1
|
||||
- keras-preprocessing=1.1.0=py_1
|
||||
- kiwisolver=1.3.1=py36hd77b12b_0
|
||||
- libpng=1.6.37=h2a8f88b_0
|
||||
- libprotobuf=3.13.0.1=h200bbdf_0
|
||||
- libsodium=1.0.18=h62dcd97_0
|
||||
- libtiff=4.2.0=hd0e1b90_0
|
||||
- libuv=1.40.0=he774522_0
|
||||
- lz4-c=1.9.3=h2bbff1b_1
|
||||
- m2w64-gcc-libgfortran=5.3.0=6
|
||||
- m2w64-gcc-libs=5.3.0=7
|
||||
- m2w64-gcc-libs-core=5.3.0=7
|
||||
- m2w64-gmp=6.1.0=2
|
||||
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
|
||||
- markdown=3.3.2=py36_0
|
||||
- markupsafe=1.1.1=py36he774522_0
|
||||
- matplotlib=3.3.4=py36haa95532_0
|
||||
- matplotlib-base=3.3.4=py36h49ac443_0
|
||||
- mistune=0.8.4=py36he774522_0
|
||||
- mkl=2019.4=245
|
||||
- mkl-service=2.3.0=py36hb782905_0
|
||||
- mkl_fft=1.2.0=py36h45dec08_0
|
||||
- mkl_random=1.0.4=py36h343c172_0
|
||||
- msys2-conda-epoch=20160418=1
|
||||
- multidict=4.7.6=py36he774522_1
|
||||
- nbclient=0.5.1=py_0
|
||||
- nbconvert=6.0.7=py36_0
|
||||
- nbformat=5.0.8=py_0
|
||||
- nest-asyncio=1.4.1=py_0
|
||||
- ninja=1.10.2=h6d14046_1
|
||||
- notebook=6.1.4=py36_0
|
||||
- numpy=1.19.1=py36h5510c5b_0
|
||||
- numpy-base=1.19.1=py36ha3acd2a_0
|
||||
- oauthlib=3.1.0=py_0
|
||||
- olefile=0.46=py36_0
|
||||
- openssl=1.1.1l=h2bbff1b_0
|
||||
- opt_einsum=3.1.0=py_0
|
||||
- packaging=20.4=py_0
|
||||
- pandas=1.1.3=py36ha925a31_0
|
||||
- pandoc=2.11=h9490d1a_0
|
||||
- pandocfilters=1.4.2=py36_1
|
||||
- parso=0.8.0=py_0
|
||||
- pickleshare=0.7.5=py36_0
|
||||
- pillow=8.0.0=py36hca74424_0
|
||||
- pip=21.0.1=py36haa95532_0
|
||||
- prometheus_client=0.8.0=py_0
|
||||
- prompt-toolkit=3.0.8=py_0
|
||||
- prompt_toolkit=3.0.8=0
|
||||
- protobuf=3.13.0.1=py36ha925a31_1
|
||||
- pyasn1=0.4.8=py_0
|
||||
- pyasn1-modules=0.2.8=py_0
|
||||
- pycparser=2.20=py_2
|
||||
- pygments=2.7.1=py_0
|
||||
- pyjwt=1.7.1=py36_0
|
||||
- pyopengl=3.1.1a1=py36_0
|
||||
- pyopenssl=19.1.0=py_1
|
||||
- pyparsing=2.4.7=py_0
|
||||
- pyqt=5.9.2=py36ha878b3d_0
|
||||
- pyreadline=2.1=py36_1
|
||||
- pyrsistent=0.17.3=py36he774522_0
|
||||
- pysocks=1.7.1=py36_0
|
||||
- python=3.6.13=h3758d61_0
|
||||
- python-dateutil=2.8.1=py_0
|
||||
- pytorch=1.8.1=py3.6_cuda10.1_cudnn7_0
|
||||
- pytz=2020.1=py_0
|
||||
- pywin32=227=py36he774522_1
|
||||
- pywinpty=0.5.7=py36_0
|
||||
- pyzmq=19.0.2=py36ha925a31_1
|
||||
- qt=5.9.7=vc14h73c81de_0
|
||||
- qtconsole=4.7.7=py_0
|
||||
- qtpy=1.9.0=py_0
|
||||
- requests=2.24.0=py_0
|
||||
- requests-oauthlib=1.3.0=py_0
|
||||
- rsa=4.6=py_0
|
||||
- scipy=1.5.2=py36h9439919_0
|
||||
- send2trash=1.5.0=py36_0
|
||||
- setuptools=58.0.4=py36haa95532_0
|
||||
- sip=4.19.24=py36ha925a31_0
|
||||
- six=1.15.0=py_0
|
||||
- sqlite=3.36.0=h2bbff1b_0
|
||||
- tensorboard=2.2.1=pyh532a8cf_0
|
||||
- tensorboard-plugin-wit=1.6.0=py_0
|
||||
- tensorflow=2.1.0=gpu_py36h3346743_0
|
||||
- tensorflow-base=2.1.0=gpu_py36h55f5790_0
|
||||
- tensorflow-estimator=2.6.0=pyh7b7c402_0
|
||||
- tensorflow-gpu=2.1.0=h0d30ee6_0
|
||||
- termcolor=1.1.0=py36_1
|
||||
- terminado=0.9.1=py36_0
|
||||
- testpath=0.4.4=py_0
|
||||
- tk=8.6.11=h2bbff1b_0
|
||||
- torchvision=0.9.1=py36_cu101
|
||||
- tornado=6.0.4=py36he774522_1
|
||||
- traitlets=4.3.3=py36_0
|
||||
- typing_extensions=3.7.4.3=py_0
|
||||
- urllib3=1.25.11=py_0
|
||||
- vc=14.2=h21ff451_1
|
||||
- vs2015_runtime=14.27.29016=h5e58377_2
|
||||
- wcwidth=0.2.5=py_0
|
||||
- webencodings=0.5.1=py36_1
|
||||
- werkzeug=0.14.1=py36_0
|
||||
- wheel=0.37.0=pyhd3eb1b0_1
|
||||
- widgetsnbextension=3.5.1=py36_0
|
||||
- win_inet_pton=1.1.0=py36_0
|
||||
- wincertstore=0.2=py36h7fe50ca_0
|
||||
- winpty=0.4.3=4
|
||||
- wrapt=1.12.1=py36he774522_1
|
||||
- xz=5.2.5=h62dcd97_0
|
||||
- yarl=1.6.2=py36he774522_0
|
||||
- zeromq=4.3.2=ha925a31_3
|
||||
- zipp=3.3.1=py_0
|
||||
- zlib=1.2.11=vc14h1cdd9ab_1
|
||||
- zstd=1.4.9=h19a0ad4_0
|
||||
- pip:
|
||||
- bidict==0.21.3
|
||||
- dlib==19.22.1
|
||||
- imageio==2.9.0
|
||||
- imageio-ffmpeg==0.4.5
|
||||
- joblib==1.1.0
|
||||
- lru-dict==1.1.7
|
||||
- moviepy==1.0.3
|
||||
- opencv-python==4.5.3.56
|
||||
- overrides==6.1.0
|
||||
- proglog==0.1.9
|
||||
- pupil-apriltags==1.0.4
|
||||
- pupil-pthreads-win==2
|
||||
- scikit-learn==0.24.2
|
||||
- threadpoolctl==3.0.0
|
||||
- tqdm==4.62.3
|
||||
- typing-utils==0.1.0
|
||||
|
19
processing/install_RTGene.py
Normal file
19
processing/install_RTGene.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
import os
|
||||
import subprocess
|
||||
|
||||
def main():
|
||||
|
||||
download_cmds = ['git clone https://github.com/Tobias-Fischer/rt_gene.git',
|
||||
'mv ./rt_gene ./rt_gene_GIT',
|
||||
'mv ./rt_gene_GIT/rt_gene/src/rt_gene/ ./',
|
||||
'mv ./rt_gene_GIT/rt_gene/model_nets ./../model_nets']
|
||||
|
||||
for cmd in download_cmds:
|
||||
subprocess.call(cmd, shell=True)
|
||||
from rt_gene.download_tools import download_gaze_tensorflow_models, download_external_landmark_models
|
||||
download_gaze_tensorflow_models()
|
||||
download_external_landmark_models()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
94
processing/process_AprilTag.py
Normal file
94
processing/process_AprilTag.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
|
||||
import os
|
||||
from sys import platform
|
||||
if platform == "linux" or platform == "linux2":
|
||||
# linux
|
||||
import apriltag
|
||||
elif platform == "darwin":
|
||||
# OS X
|
||||
import apriltag
|
||||
elif platform == "win32":
|
||||
# Windows
|
||||
import pupil_apriltags as apriltag
|
||||
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
visualize = True
|
||||
|
||||
def process(file):
|
||||
|
||||
VIDEO = file
|
||||
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
|
||||
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
|
||||
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
|
||||
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
|
||||
|
||||
if not os.path.exists(FRAMES):
|
||||
print('WARNING: Could not find frame directory')
|
||||
return
|
||||
|
||||
img_paths = [f for f in os.listdir(FRAMES) if 'jpg' in f]
|
||||
print('Number of frames: ', len(img_paths))
|
||||
|
||||
if platform == "linux" or platform == "linux2" or platform == "darwin":
|
||||
# Circumvent error: too many borders in contour_detect (max of 32767!)
|
||||
options = apriltag.DetectorOptions(refine_edges=False, quad_contours=False)
|
||||
detector = apriltag.Detector(options)
|
||||
elif platform == "win32":
|
||||
print('WARNING: apriltag2 not supported on windows, running with pupil_apriltags...')
|
||||
detector = apriltag.Detector(refine_edges=False)
|
||||
|
||||
detections = {}
|
||||
|
||||
if visualize:
|
||||
fig = plt.Figure(figsize=(15, 10))
|
||||
path = os.path.join(FRAMES, img_paths[0])
|
||||
|
||||
img = cv2.imread(path)
|
||||
image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
result = detector.detect(img)
|
||||
|
||||
for i in range(len(result)):
|
||||
tf = result[i].tag_family
|
||||
tag_id = result[i].tag_id
|
||||
cx, cy = result[i].center
|
||||
# print('Found tag: ', tag_id)
|
||||
img = cv2.circle(img, (int(cx), int(cy)), 50, (255, 255, 0), thickness=10)
|
||||
|
||||
plt.imshow(image)
|
||||
plt.axis('off')
|
||||
#plt.savefig('./AprilTag_Detection_%s.jpg' % VIDEOOUT)
|
||||
plt.show()
|
||||
|
||||
tags = dict()
|
||||
for frame, p in enumerate(img_paths):
|
||||
|
||||
path = os.path.join(FRAMES, p)
|
||||
|
||||
img = cv2.imread(path)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
result = detector.detect(img)
|
||||
|
||||
for i in range(len(result)):
|
||||
tf = result[i].tag_family
|
||||
cx, cy = result[i].center
|
||||
tag_id = result[i].tag_id
|
||||
tags[tag_id] = [cx, cy]
|
||||
|
||||
print('Frame %i found %i tags' % (frame, len(result)))
|
||||
detections[frame] = tags
|
||||
|
||||
df = pd.DataFrame.from_dict(detections, orient='index')
|
||||
|
||||
path = './AprilTag_%s.pkl' % VIDEOOUT
|
||||
df.to_pickle(path)
|
||||
print('Saved AprilTag detections to %s' % path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
process('./Data/ShowCase_3.mp4')
|
412
processing/process_OpenPose.py
Normal file
412
processing/process_OpenPose.py
Normal file
|
@ -0,0 +1,412 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# # OpenPose pose detection
|
||||
# 2D real-time multi-person keypoint detection:
|
||||
# **18**-keypoint body/foot keypoint estimation. Running time invariant to number of detected people
|
||||
# see https://github.com/CMU-Perceptual-Computing-Lab/openpose
|
||||
#
|
||||
# ## Pipeline
|
||||
# - Run 18-keypoint model on video frames
|
||||
# - Parse keypoints and PAFs to generate personwise keypoints
|
||||
# - Save results to OpenPose.pkl
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import cv2
|
||||
import sys
|
||||
from sys import platform
|
||||
import time
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from tqdm import tqdm, tqdm_pandas
|
||||
|
||||
tqdm.pandas()
|
||||
|
||||
from multiprocessing import cpu_count
|
||||
from multiprocessing import Pool
|
||||
import itertools
|
||||
import os
|
||||
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||
os.environ["OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES"] = "1"
|
||||
|
||||
POSE_PAIRS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
|
||||
[1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 17], [5, 16]]
|
||||
keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip',
|
||||
'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']
|
||||
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
|
||||
[19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
|
||||
[47, 48], [49, 50], [53, 54], [51, 52], [55, 56],
|
||||
[37, 38], [45, 46]]
|
||||
|
||||
colors = np.array([(0, 100, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255),
|
||||
(0, 255, 0), (255, 200, 100), (255, 0, 255), (0, 255, 0), (255, 200, 100), (255, 0, 255),
|
||||
(0, 0, 255), (255, 0, 0), (200, 200, 0), (255, 0, 0), (200, 200, 0), (0, 0, 0)])
|
||||
|
||||
my_color = []
|
||||
for c in colors:
|
||||
my_color.append(tuple(c))
|
||||
|
||||
|
||||
# ## Auxiliary Functions
|
||||
# see https://www.learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/
|
||||
def getKeypoints(probMap, threshold=0.8):
|
||||
mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0)
|
||||
mapMask = np.uint8(mapSmooth > threshold)
|
||||
keypoints = []
|
||||
# find the blobs
|
||||
contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
# for each blob find the maxima
|
||||
# not enough values to unpack (expected 3, got 2)
|
||||
# version issue: https://github.com/facebookresearch/maskrcnn-benchmark/issues/339
|
||||
for cnt in contours:
|
||||
blobMask = np.zeros(mapMask.shape)
|
||||
blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
|
||||
maskedProbMap = mapSmooth * blobMask
|
||||
_, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
|
||||
keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],))
|
||||
return keypoints
|
||||
|
||||
|
||||
# Find valid connections between the different joints of a all persons present
|
||||
def getValidPairs(output, detected_keypoints, frameWidth, frameHeight):
|
||||
valid_pairs = []
|
||||
invalid_pairs = []
|
||||
n_interp_samples = 10
|
||||
paf_score_th = 0.1
|
||||
conf_th = 0.7
|
||||
# loop for every POSE_PAIR
|
||||
for k in range(len(mapIdx)):
|
||||
# A->B constitute a limb
|
||||
pafA = output[mapIdx[k][0], :, :]
|
||||
pafB = output[mapIdx[k][1], :, :]
|
||||
pafA = cv2.resize(pafA, (frameWidth, frameHeight))
|
||||
pafB = cv2.resize(pafB, (frameWidth, frameHeight))
|
||||
# Find the keypoints for the first and second limb
|
||||
candA = detected_keypoints[POSE_PAIRS[k][0]]
|
||||
candB = detected_keypoints[POSE_PAIRS[k][1]]
|
||||
nA = len(candA)
|
||||
nB = len(candB)
|
||||
# If keypoints for the joint-pair is detected
|
||||
# check every joint in candA with every joint in candB
|
||||
# Calculate the distance vector between the two joints
|
||||
# Find the PAF values at a set of interpolated points between the joints
|
||||
# Use the above formula to compute a score to mark the connection valid
|
||||
if (nA != 0 and nB != 0):
|
||||
valid_pair = np.zeros((0, 3))
|
||||
for i in range(nA):
|
||||
max_j = -1
|
||||
maxScore = -1
|
||||
found = 0
|
||||
for j in range(nB):
|
||||
# Find d_ij
|
||||
d_ij = np.subtract(candB[j][:2], candA[i][:2])
|
||||
norm = np.linalg.norm(d_ij)
|
||||
if norm:
|
||||
d_ij = d_ij / norm
|
||||
else:
|
||||
continue
|
||||
# Find p(u)
|
||||
interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples),
|
||||
np.linspace(candA[i][1], candB[j][1], num=n_interp_samples)))
|
||||
# Find L(p(u))
|
||||
paf_interp = []
|
||||
for k in range(len(interp_coord)):
|
||||
paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))],
|
||||
pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))]])
|
||||
# Find E
|
||||
paf_scores = np.dot(paf_interp, d_ij)
|
||||
avg_paf_score = sum(paf_scores) / len(paf_scores)
|
||||
# Check if the connection is valid
|
||||
# If the fraction of interpolated vectors aligned with PAF is higher then threshold -> Valid Pair
|
||||
if (len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples) > conf_th:
|
||||
if avg_paf_score > maxScore:
|
||||
max_j = j
|
||||
maxScore = avg_paf_score
|
||||
found = 1
|
||||
# Append the connection to the list
|
||||
if found:
|
||||
valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0)
|
||||
# Append the detected connections to the global list
|
||||
valid_pairs.append(valid_pair)
|
||||
else: # If no keypoints are detected
|
||||
# ATTENTION: Commented this out by Sven
|
||||
# print("No Connection : k = {}".format(k))
|
||||
invalid_pairs.append(k)
|
||||
valid_pairs.append([])
|
||||
return valid_pairs, invalid_pairs
|
||||
|
||||
|
||||
# This function creates a list of keypoints belonging to each person
|
||||
# For each detected valid pair, it assigns the joint(s) to a person
|
||||
def getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list):
|
||||
# the last number in each row is the overall score
|
||||
personwiseKeypoints = -1 * np.ones((0, 19))
|
||||
for k in range(len(mapIdx)):
|
||||
if k not in invalid_pairs:
|
||||
partAs = valid_pairs[k][:, 0]
|
||||
partBs = valid_pairs[k][:, 1]
|
||||
indexA, indexB = np.array(POSE_PAIRS[k])
|
||||
for i in range(len(valid_pairs[k])):
|
||||
found = 0
|
||||
person_idx = -1
|
||||
for j in range(len(personwiseKeypoints)):
|
||||
if personwiseKeypoints[j][indexA] == partAs[i]:
|
||||
person_idx = j
|
||||
found = 1
|
||||
break
|
||||
if found:
|
||||
personwiseKeypoints[person_idx][indexB] = partBs[i]
|
||||
personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][
|
||||
2]
|
||||
# if find no partA in the subset, create a new subset
|
||||
elif not found and k < 17:
|
||||
row = -1 * np.ones(19)
|
||||
row[indexA] = partAs[i]
|
||||
row[indexB] = partBs[i]
|
||||
# add the keypoint_scores for the two keypoints and the paf_score
|
||||
row[-1] = sum(keypoints_list[valid_pairs[k][i, :2].astype(int), 2]) + valid_pairs[k][i][2]
|
||||
personwiseKeypoints = np.vstack([personwiseKeypoints, row])
|
||||
return personwiseKeypoints
|
||||
|
||||
|
||||
def f(probMap):
|
||||
threshold = 0.5
|
||||
return getKeypoints(probMap, threshold)
|
||||
|
||||
|
||||
def getPose(output):
|
||||
detected_keypoints = []
|
||||
keypoints_list = np.zeros((0, 3))
|
||||
keypoint_id = 0
|
||||
|
||||
threshold = 0.5
|
||||
keypointsList = []
|
||||
|
||||
for part in range(18):
|
||||
probMap = output[part, :, :]
|
||||
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
|
||||
keypointsList.append(getKeypoints(probMap, threshold))
|
||||
|
||||
for keypoints in keypointsList: # nPoints = 18
|
||||
keypoints_with_id = []
|
||||
for i in range(len(keypoints)):
|
||||
keypoints_with_id.append(keypoints[i] + (keypoint_id,))
|
||||
keypoints_list = np.vstack([keypoints_list, keypoints[i]])
|
||||
keypoint_id += 1
|
||||
detected_keypoints.append(keypoints_with_id)
|
||||
|
||||
valid_pairs, invalid_pairs = getValidPairs(output, detected_keypoints, frameWidth, frameHeight)
|
||||
personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list)
|
||||
|
||||
return detected_keypoints, keypoints_list, personwiseKeypoints
|
||||
|
||||
|
||||
"""Forward array of 20 images"""
|
||||
|
||||
|
||||
def getPoseFromDNN(net, images, frameWidth, frameHeight):
|
||||
inHeight = 368
|
||||
inWidth = int((inHeight / frameHeight) * frameWidth)
|
||||
inpBlob = cv2.dnn.blobFromImages(np.array(images), 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False,
|
||||
crop=False)
|
||||
# Set the prepared object as the input blob of the network
|
||||
net.setInput(inpBlob)
|
||||
|
||||
output = net.forward()
|
||||
return output
|
||||
|
||||
|
||||
def visualize(image, df, frame):
|
||||
number_ids = len([col for col in df.columns if 'ID' in col])
|
||||
data = df.loc[df['Frame'] == frame]
|
||||
plt.imshow(image)
|
||||
plt.axis('off')
|
||||
for id_no in range(number_ids):
|
||||
keypoints = df['ID%i_Keypoints' % id_no].iloc[frame]
|
||||
for i in range(len(POSE_PAIRS)):
|
||||
index = POSE_PAIRS[i]
|
||||
A, B = keypoints[index]
|
||||
# for idx in index:
|
||||
# print(keypointsMapping[idx])
|
||||
if A is not None and B is not None:
|
||||
plt.plot((A[0], B[0]), (A[1], B[1]), c=colors[i])
|
||||
|
||||
plt.show()
|
||||
|
||||
def process(file):
|
||||
global get_keypoints
|
||||
|
||||
POSE_PROTO_FILE = r"openpose/pose_deploy_linevec.prototxt"
|
||||
POSE_WEIGHTS_FILE = r"openpose/pose_iter_440000.caffemodel"
|
||||
|
||||
if not os.path.exists(POSE_PROTO_FILE):
|
||||
print('WARNING: Could not find pose file %s' % POSE_PROTO_FILE)
|
||||
return
|
||||
if not os.path.exists(POSE_WEIGHTS_FILE):
|
||||
print('WARNING: Could not find model weights file %s' % POSE_WEIGHTS_FILE)
|
||||
return
|
||||
|
||||
VIDEO = file
|
||||
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
|
||||
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
|
||||
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
|
||||
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
|
||||
|
||||
if not os.path.exists(FRAMES):
|
||||
print('WARNING: Could not find frame directory')
|
||||
return
|
||||
|
||||
# Load Model #
|
||||
net = cv2.dnn.readNetFromCaffe(POSE_PROTO_FILE, POSE_WEIGHTS_FILE)
|
||||
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
|
||||
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
|
||||
|
||||
path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
|
||||
path_list.sort()
|
||||
|
||||
image = cv2.imread(os.path.join(FRAMES, path_list[0]))
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
frameWidth = image.shape[1]
|
||||
frameHeight = image.shape[0]
|
||||
|
||||
lst = []
|
||||
images = []
|
||||
for frame, path in enumerate(tqdm(path_list)):
|
||||
image = cv2.imread(os.path.join(FRAMES, path))
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
images.append(image)
|
||||
|
||||
if len(images) == 20:
|
||||
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
|
||||
lst.extend(output)
|
||||
images = []
|
||||
|
||||
if len(images) != 0:
|
||||
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
|
||||
lst.extend(output)
|
||||
images = []
|
||||
|
||||
print(len(lst))
|
||||
df = pd.DataFrame(range(len(lst)))
|
||||
df.columns = ["Frame"]
|
||||
df["Pose"] = lst
|
||||
|
||||
path = "%s%s_OpenPose_raw.pkl" % (TMP_DIR, VIDEOOUT)
|
||||
df.to_pickle(path)
|
||||
|
||||
def get_keypoints(frames):
|
||||
ret = []
|
||||
for f in frames:
|
||||
output = df[df.Frame == f].Pose.iloc[0]
|
||||
ret.append(getPose(output))
|
||||
return ret
|
||||
|
||||
if platform == "linux" or platform == "linux2" or platform == "darwin":
|
||||
cores = max(4, cpu_count() - 4)
|
||||
print('Run sorting on {} cores'.format(cores))
|
||||
data_split = np.array_split(df.Frame, cores)
|
||||
pool = Pool(cores)
|
||||
data = pool.map(get_keypoints, data_split)
|
||||
pool.close()
|
||||
pool.join()
|
||||
elif platform == "win32":
|
||||
print('WARNING: Can\'t run multiprocessing on Windows, this might take a while...')
|
||||
data = get_keypoints(df.Frame)
|
||||
else:
|
||||
print('WARNING: platform not supported')
|
||||
|
||||
x = np.vstack(data)
|
||||
df["DetectedKeypoints"] = x[:, 0]
|
||||
df["KeypointsList"] = x[:, 1]
|
||||
df["PersonwiseKeypoints"] = x[:, 2]
|
||||
df.head()
|
||||
|
||||
path = "%s%s_OpenPose.pkl" % (TMP_DIR, VIDEOOUT)
|
||||
df.to_pickle(path)
|
||||
print("Saved OpenPose detections to %s" % path)
|
||||
|
||||
del df["Pose"]
|
||||
del df['Pic']
|
||||
del df['DetectedKeypoints']
|
||||
|
||||
number_ids = len(df.PersonwiseKeypoints.values.flatten()[0])
|
||||
print('Number of detected IDs: ', number_ids)
|
||||
|
||||
"""Map personwise keypoints to list of keypoints for each ID"""
|
||||
def keypoints_fun(x):
|
||||
# Discard frames where not all ids detected
|
||||
if len(x.PersonwiseKeypoints) < number_ids:
|
||||
# print('None')
|
||||
return None
|
||||
# index is -1 for no detection >> keypoint = None
|
||||
lst = list(x.KeypointsList)
|
||||
lst.append(None)
|
||||
lst = np.array(lst)
|
||||
|
||||
keypoints = lst[x.PersonwiseKeypoints[id_no].astype(int)[:18]]
|
||||
return keypoints
|
||||
|
||||
for id_no in range(number_ids):
|
||||
counter = 0
|
||||
print('ID%i' % id_no)
|
||||
col = 'ID%i_Keypoints' % id_no
|
||||
|
||||
df[col] = df.apply(keypoints_fun, axis=1)
|
||||
|
||||
"""Sort IDs to be consistent throughout video"""
|
||||
lst = []
|
||||
columns = [col for col in df.columns if 'ID' in col]
|
||||
data = df[columns]
|
||||
|
||||
lst.append(data.iloc[0].values)
|
||||
|
||||
for i in range(1, len(df.Frame)):
|
||||
row = data.iloc[i]
|
||||
lst2 = []
|
||||
for ids in range(number_ids):
|
||||
keypoints = row['ID%i_Keypoints' % ids]
|
||||
|
||||
if keypoints is not None and keypoints[1] is not None:
|
||||
for j in range(number_ids):
|
||||
backtrack = 1
|
||||
while lst[i - backtrack][j] == None:
|
||||
backtrack = backtrack + 1
|
||||
keypoints2 = lst[i - backtrack][j]
|
||||
|
||||
lst2.append([ids, j, np.linalg.norm(np.array(keypoints[1]) - np.array(keypoints2[1])), keypoints])
|
||||
else:
|
||||
lst2.append([ids, None, None, None])
|
||||
dfX = pd.DataFrame(lst2)
|
||||
dfX.columns = ["Id", "GtId", "Distance", "Keypoints"]
|
||||
dfX = dfX.sort_values("Distance")
|
||||
dfX = dfX.drop_duplicates("GtId").drop_duplicates("Id")
|
||||
lstRow = []
|
||||
for j in range(number_ids):
|
||||
if (len(dfX[dfX.GtId == j]) > 0):
|
||||
lstRow.append(dfX[dfX.GtId == j].iloc[0].Keypoints)
|
||||
else:
|
||||
lstRow.append(None)
|
||||
lstRow.append(i)
|
||||
lst.append(lstRow)
|
||||
|
||||
df_new = pd.DataFrame(lst)
|
||||
columns = []
|
||||
for i in range(number_ids):
|
||||
columns.append('ID%i_Keypoints' % i)
|
||||
columns.append("Frame")
|
||||
df_new.columns = columns
|
||||
|
||||
# First frame number is NaN from sorting
|
||||
df_new.Frame = df_new.Frame.fillna(0)
|
||||
df_new = df_new.astype({'Frame': 'int32'})
|
||||
|
||||
path = "%s%s_BodyMovement.pkl" % (TMP_DIR, VIDEOOUT)
|
||||
df_new.to_pickle(path)
|
||||
print('Saved Body Movement to %s' % path)
|
||||
|
||||
visualize(image, df_new, 0)
|
||||
|
||||
|
363
processing/process_RTGene.py
Normal file
363
processing/process_RTGene.py
Normal file
|
@ -0,0 +1,363 @@
|
|||
import tensorflow as tf
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as patches
|
||||
from tqdm.notebook import tqdm
|
||||
|
||||
from rt_gene.gaze_tools import get_phi_theta_from_euler, limit_yaw
|
||||
from rt_gene.extract_landmarks_method_base import LandmarkMethodBase
|
||||
from rt_gene.estimate_gaze_base import GazeEstimatorBase
|
||||
from rt_gene.estimate_gaze_tensorflow import GazeEstimator
|
||||
from rt_gene.gaze_tools_standalone import euler_from_matrix
|
||||
|
||||
import itertools
|
||||
import pandas as pd
|
||||
|
||||
#os.environ["CUDA_VISIBLE_DEVICES"]="1"
|
||||
|
||||
def getCenter(box):
|
||||
return np.array([box[2]+box[0], box[3]+box[1]])/2
|
||||
|
||||
def load_camera_calibration(calibration_file):
|
||||
|
||||
fileType = calibration_file.split(".")[-1]
|
||||
if fileType == "pkl":
|
||||
import pickle
|
||||
infile = open(calibration_file,'rb')
|
||||
data = pickle.load(infile)
|
||||
return data["distortion_coef"], data["camera_matrix"]
|
||||
elif fileType == "yaml":
|
||||
import yaml
|
||||
with open(calibration_file, 'r') as f:
|
||||
cal = yaml.safe_load(f)
|
||||
|
||||
dist_coefficients = np.array(cal['distortion_coefficients']['data'], dtype='float32').reshape(1, 5)
|
||||
camera_matrix = np.array(cal['camera_matrix']['data'], dtype='float32').reshape(3, 3)
|
||||
|
||||
return dist_coefficients, camera_matrix
|
||||
|
||||
|
||||
def extract_eye_image_patches(subjects, landmark_estimator):
|
||||
for subject in subjects:
|
||||
le_c, re_c, _, _ = subject.get_eye_image_from_landmarks(subject, landmark_estimator.eye_image_size)
|
||||
subject.left_eye_color = le_c
|
||||
subject.right_eye_color = re_c
|
||||
|
||||
|
||||
def estimate_gaze(base_name, color_img, landmark_estimator, gaze_estimator, dist_coefficients, camera_matrix, args):
|
||||
faceboxes = landmark_estimator.get_face_bb(color_img)
|
||||
if len(faceboxes) == 0:
|
||||
tqdm.write('Could not find faces in the image')
|
||||
return
|
||||
|
||||
subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
|
||||
extract_eye_image_patches(subjects, landmark_estimator)
|
||||
|
||||
input_r_list = []
|
||||
input_l_list = []
|
||||
input_head_list = []
|
||||
valid_subject_list = []
|
||||
roll_pitch_yaw_list = []
|
||||
|
||||
for idx, subject in enumerate(subjects):
|
||||
if subject.left_eye_color is None or subject.right_eye_color is None:
|
||||
#tqdm.write('Failed to extract eye image patches')
|
||||
continue
|
||||
|
||||
success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
|
||||
subject.landmarks.reshape(len(subject.landmarks), 1, 2),
|
||||
cameraMatrix=camera_matrix,
|
||||
distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)
|
||||
|
||||
if not success:
|
||||
tqdm.write('Not able to extract head pose for subject {}'.format(idx))
|
||||
continue
|
||||
|
||||
_rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
|
||||
_rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
|
||||
_m = np.zeros((4, 4))
|
||||
_m[:3, :3] = _rotation_matrix
|
||||
_m[3, 3] = 1
|
||||
# Go from camera space to ROS space
|
||||
_camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
|
||||
[-1.0, 0.0, 0.0, 0.0],
|
||||
[0.0, -1.0, 0.0, 0.0],
|
||||
[0.0, 0.0, 0.0, 1.0]]
|
||||
roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
|
||||
roll_pitch_yaw = limit_yaw(roll_pitch_yaw)
|
||||
roll_pitch_yaw_list.append(roll_pitch_yaw)
|
||||
|
||||
phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)
|
||||
|
||||
face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
|
||||
head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))
|
||||
|
||||
if args['vis_headpose']:
|
||||
plt.axis("off")
|
||||
plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
|
||||
plt.show()
|
||||
|
||||
if args['save_headpose']:
|
||||
cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)
|
||||
|
||||
input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
|
||||
input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
|
||||
input_head_list.append([theta_head, phi_head])
|
||||
valid_subject_list.append(idx)
|
||||
|
||||
if len(valid_subject_list) == 0:
|
||||
return
|
||||
|
||||
gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
|
||||
inference_input_right_list=input_r_list,
|
||||
inference_headpose_list=input_head_list)
|
||||
|
||||
file_base = os.path.splitext(base_name)[0]
|
||||
file = "_".join(file_base.split("_")[:-1])
|
||||
frame = int(file_base.split("_")[-1])
|
||||
ret = []
|
||||
for subject_id, gaze, headpose, roll_pitch_yaw in zip(valid_subject_list, gaze_est.tolist(), input_head_list, roll_pitch_yaw_list):
|
||||
subject = subjects[subject_id]
|
||||
#print(roll_pitch_yaw)
|
||||
# Build visualizations
|
||||
r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
|
||||
l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
|
||||
s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
|
||||
|
||||
if args['vis_gaze']:
|
||||
plt.axis("off")
|
||||
plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
|
||||
plt.show()
|
||||
|
||||
if args['save_gaze']:
|
||||
cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
|
||||
# cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
|
||||
# cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)
|
||||
|
||||
if args['save_estimate']:
|
||||
with open(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
|
||||
f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +
|
||||
', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
|
||||
# Phi: pos - look down, neg - look up
|
||||
# Theta: pos - rotate left, neg - rotate right
|
||||
d = {"File":file, "Frame": frame, "SubjectId":subject_id, "HeadBox":subject.box, "Landmarks": subject.landmarks, "GazeTheta":gaze[0], "GazePhi":gaze[1], "HeadPoseTheta":headpose[0], "HeadPosePhi":headpose[1], "HeadPoseRoll":roll_pitch_yaw[0], "HeadPosePitch":roll_pitch_yaw[1], "HeadPoseYaw":roll_pitch_yaw[2]}
|
||||
ret.append(d)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def visualize(df, FRAMES):
|
||||
path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
|
||||
path_list.sort()
|
||||
|
||||
image = cv2.imread(os.path.join(FRAMES, path_list[0]))
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
fig, ax = plt.subplots(1, figsize=(18,10))
|
||||
|
||||
for i in range(len(df.SubjectId.unique())):
|
||||
bbox = df.loc[(df.Frame == 0) & (df.SubjectId == i)]['HeadBox'].values
|
||||
print(bbox)
|
||||
if not np.any(pd.isna(bbox)) and len(bbox) > 0:
|
||||
bbox = np.array(bbox[0])
|
||||
rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2]-bbox[0],bbox[3]-bbox[1],linewidth=1,edgecolor='c',facecolor='none')
|
||||
plt.text(bbox[0], bbox[1], 'ID%i' % i, color='c' ,fontsize=20)
|
||||
ax.add_patch(rect)
|
||||
|
||||
ax.imshow(image)
|
||||
plt.show()
|
||||
|
||||
|
||||
def visualize_sorting(df_sorted):
|
||||
subs = sorted(df_sorted[~df_sorted.PId.isna()].PId.unique())
|
||||
for sid in subs:
|
||||
x = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[0])
|
||||
y = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[1])
|
||||
frames = df_sorted[df_sorted.PId==sid].Frame.to_list()
|
||||
plt.scatter(frames, x, alpha=.2, label = "Sub %i" % sid)
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
|
||||
def process(file, maxPeople, cameraRes = [5760, 2880]):
|
||||
|
||||
VIDEO = file
|
||||
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
|
||||
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
|
||||
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
|
||||
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
|
||||
|
||||
if not os.path.exists(VIDEO):
|
||||
print('WARNING: Could not find video file')
|
||||
return
|
||||
|
||||
script_path = "./"
|
||||
|
||||
args = {}
|
||||
args["calib_file"] = "./calib_insta.pkl"
|
||||
args["vis_headpose"] = False # store_false
|
||||
args["save_headpose"] = False # store_false
|
||||
args["vis_gaze"] = False # store_false
|
||||
args["save_gaze"] = False # store_false
|
||||
args["save_estimate"] = False # store_false
|
||||
args["device_id_facedetection"] = "cuda:0" # store_false
|
||||
|
||||
args["im_path"] = os.path.join(script_path, './samples_gaze/')
|
||||
args["output_path"] = os.path.join(script_path, './samples_gaze/')
|
||||
args["models"] = [os.path.join(script_path, '../model_nets/Model_allsubjects1.h5')]
|
||||
args['gaze_backend'] = 'tensorflow'
|
||||
tqdm.write('Loading networks')
|
||||
landmark_estimator = LandmarkMethodBase(device_id_facedetection=args["device_id_facedetection"],
|
||||
checkpoint_path_face=os.path.join(script_path,
|
||||
"../model_nets/SFD/s3fd_facedetector.pth"),
|
||||
checkpoint_path_landmark=os.path.join(script_path,
|
||||
"../model_nets/phase1_wpdc_vdc.pth.tar"),
|
||||
model_points_file=os.path.join(script_path,
|
||||
"../model_nets/face_model_68.txt"))
|
||||
|
||||
#gaze_estimator = GazeEstimator("/gpu:0", args['models'])
|
||||
|
||||
if args['gaze_backend'] == "tensorflow":
|
||||
from rt_gene.estimate_gaze_tensorflow import GazeEstimator
|
||||
gaze_estimator = GazeEstimator("/gpu:0", args['models'])
|
||||
elif args['gaze_backend'] == "pytorch":
|
||||
from rt_gene.estimate_gaze_pytorch import GazeEstimator
|
||||
|
||||
gaze_estimator = GazeEstimator("cuda:0", args['models'])
|
||||
else:
|
||||
raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch")
|
||||
|
||||
if not os.path.isdir(args["output_path"]):
|
||||
os.makedirs(args["output_path"])
|
||||
|
||||
video = cv2.VideoCapture(VIDEO)
|
||||
print('Video frame count: ', video.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
if args["calib_file"] is not None and os.path.exists(args["calib_file"]):
|
||||
_dist_coefficients, _camera_matrix = load_camera_calibration(args["calib_file"])
|
||||
else:
|
||||
im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
|
||||
im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
||||
print('WARNING!!! You should provide the camera calibration file, otherwise you might get bad results. \n\
|
||||
Using a crude approximation!')
|
||||
_dist_coefficients, _camera_matrix = np.zeros((1, 5)), np.array(
|
||||
[[im_height, 0.0, im_width / 2.0], [0.0, im_height, im_height / 2.0], [0.0, 0.0, 1.0]])
|
||||
|
||||
lstRet = []
|
||||
for i in tqdm(list(range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))))):
|
||||
|
||||
image_file_name = "%s_%i.XXX" % (os.path.splitext(VIDEO)[0], i)
|
||||
ret, image = video.read()
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
lstRet.append(estimate_gaze(image_file_name, image, landmark_estimator, gaze_estimator, _dist_coefficients, _camera_matrix, args))
|
||||
|
||||
lst = list(itertools.chain.from_iterable(lstRet))
|
||||
df = pd.DataFrame(lst)
|
||||
df["HeadCenter"] = df.HeadBox.apply(lambda x: getCenter(x))
|
||||
df["Phi"] = df.GazePhi + df.HeadPosePhi # gaze yaw
|
||||
df["Theta"] = df.GazeTheta + df.HeadPoseTheta # gaze pitch
|
||||
df['Yaw'] = df.GazePhi + df.HeadPoseYaw
|
||||
df['Pitch'] = df.GazeTheta + df.HeadPosePitch
|
||||
|
||||
# path = '%s%s_raw.pkl' % (TMP_DIR, VIDEOOUT)
|
||||
# df.to_pickle(path)
|
||||
# print('Saved raw detections to: ', path)
|
||||
|
||||
visualize(df, FRAMES)
|
||||
|
||||
# Sort ID detections
|
||||
###############################################################################################################
|
||||
|
||||
# Find first frame where all are detected
|
||||
for frame in sorted(df.Frame.unique()):
|
||||
frame_df = df.loc[df.Frame == frame]
|
||||
if len(frame_df['SubjectId'].unique()) == maxPeople:
|
||||
first_frame = frame
|
||||
print('First frame where all are detected: ', first_frame)
|
||||
break
|
||||
|
||||
empty_rows = pd.DataFrame()
|
||||
empty_rows['Frame'] = np.zeros(maxPeople).astype(int)
|
||||
|
||||
for col in df.columns:
|
||||
if not col == 'Frame':
|
||||
empty_rows[col] = df.loc[df.Frame == first_frame, [col]].values
|
||||
|
||||
df = df.loc[df.Frame != 0]
|
||||
df = df.append(empty_rows).sort_values(by=['Frame'])
|
||||
df.head()
|
||||
|
||||
df_sorted = df.copy()
|
||||
df_sorted["PId"] = None
|
||||
df_sorted.loc[df_sorted.Frame == 0, "PId"] = list(range(maxPeople))
|
||||
df_sorted = df_sorted.sort_values("Frame")
|
||||
df_sorted.index = list(range(len(df_sorted)))
|
||||
|
||||
for frameId in tqdm(sorted(df_sorted.Frame.unique())[1:]):
|
||||
pidAssignement = []
|
||||
for frameIdBefore in range(frameId - 1, -1, -1):
|
||||
allFramesBefore = df_sorted[(df_sorted.Frame == frameIdBefore) & (~df_sorted.PId.isna())]
|
||||
if (np.array_equal(sorted(allFramesBefore.PId.to_list()), np.arange(maxPeople))):
|
||||
|
||||
dfFramesCurrent = df_sorted[df_sorted.Frame == frameId]
|
||||
for indexCurrentFrame, frameCurrent in dfFramesCurrent.iterrows():
|
||||
|
||||
lst = []
|
||||
for indexBeforeFrame, frameBefore in allFramesBefore.iterrows():
|
||||
if (frameBefore.HeadCenter[0] > frameCurrent.HeadCenter[0]):
|
||||
p1 = np.array(frameCurrent.HeadCenter)
|
||||
p2 = np.array(frameBefore.HeadCenter)
|
||||
else:
|
||||
p1 = np.array(frameBefore.HeadCenter)
|
||||
p2 = np.array(frameCurrent.HeadCenter)
|
||||
|
||||
v1 = p1 - p2
|
||||
dist1 = np.linalg.norm(v1)
|
||||
|
||||
p1[0] = p1[0] + cameraRes[0]
|
||||
v2 = p1 - p2
|
||||
dist2 = np.linalg.norm(v2)
|
||||
|
||||
dist = min([dist1, dist2])
|
||||
|
||||
lst.append([dist, frameCurrent.name, indexBeforeFrame, frameBefore])
|
||||
|
||||
lst.sort(key=lambda x: x[0])
|
||||
pidAssignement.append([indexCurrentFrame, lst[0][-1].PId])
|
||||
|
||||
break
|
||||
for index, pid in pidAssignement:
|
||||
df_sorted.loc[df_sorted.index == index, "PId"] = pid
|
||||
|
||||
visualize_sorting(df_sorted)
|
||||
|
||||
del df_sorted["SubjectId"]
|
||||
|
||||
# Rearrange DataFrame: each ID has specific columns
|
||||
###############################################################################################################
|
||||
df_sorted = df_sorted[~df_sorted.PId.isna()].drop_duplicates(subset=['Frame', 'PId'])
|
||||
FACE_COUNT = len(df_sorted[~df_sorted.PId.isna()].PId.unique())
|
||||
|
||||
df2 = df_sorted.pivot(index='Frame', columns="PId",
|
||||
values=["Landmarks", "GazeTheta", "GazePhi", "HeadCenter", "HeadPoseTheta", "HeadPosePhi",
|
||||
"HeadPoseYaw", "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"])
|
||||
lst = []
|
||||
for label in ["Landmarks", "GazeTheta", "GazePhi", "Head", "HeadPoseTheta", "HeadPosePhi", "HeadPoseYaw",
|
||||
"HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"]:
|
||||
for head_id in range(FACE_COUNT):
|
||||
lst.append("ID%i_%s" % (head_id, label))
|
||||
|
||||
df2.columns = lst
|
||||
df2 = df2.reset_index()
|
||||
|
||||
path = "%s%s_RTGene.pkl" % (TMP_DIR, VIDEOOUT)
|
||||
df2.to_pickle(path)
|
||||
print("Saved RT-Gene detections to %s" % path)
|
||||
|
Loading…
Reference in a new issue