Added processing

This commit is contained in:
apenzko 2021-10-17 21:32:30 +02:00
parent 74df5cb3f0
commit 9e0ac5daa8
9 changed files with 2287 additions and 4 deletions

View file

@ -11,11 +11,12 @@ pip install requirements.txt
``` ```
## Get Started ## Get Started
To test the GUI you can download our example use case videos from googledrive: <br> To test the GUI you can download our example use case videos from googledrive: <br>
As well as the respective processed ``.dat`` files which include all the analyses. As well as the respective processed ``.dat`` files which include all the analyses. <br>
Run [main.py](main.py) and import the video file you would like to analyze. You can then run [main.py](main.py) and import the video file you would like to analyze.
## Processing ## Processing
If you would like to analyze your own 360° video you can find the processing pipeline at [processing/](processing).
Please note the processing pipeline requires a GPU.
## Citation ## Citation
Please cite this paper if you use ConAn or parts of this publication in your research: Please cite this paper if you use ConAn or parts of this publication in your research:

24
exampledata/combine.py Normal file
View file

@ -0,0 +1,24 @@
#!/usr/bin/env python3
import pickle as pkl
def main():
data = dict()
with open('G2_VID4_BodyMovement.pkl', 'rb') as handle:
data["BodyMovement"] = pkl.load(handle)
with open('G2_VID4_HeadPose.pkl', 'rb') as handle:
data["HeadPose"] = pkl.load(handle)
with open('G2_VID4_JAActivityUnits_V2.pkl', 'rb') as handle:
data['ActivityUnits'] = pkl.load(handle)
with open('G2_VID4_RTGene.pkl', 'rb') as handle:
data['RTGene'] = pkl.load(handle)
with open("G2_VID4_speakDiar.pkl", 'rb') as handle:
data["Speaker"] = pkl.load(handle)
data["originalVideoResolution"] = (5760, 2880)
with open('G2_VID4.dat', 'wb') as handle:
pkl.dump(data, handle, protocol=pkl.HIGHEST_PROTOCOL)
if __name__ == '__main__':
main()

File diff suppressed because one or more lines are too long

23
processing/README.md Normal file
View file

@ -0,0 +1,23 @@
# Processing Pipeline
## Conda Environment Setup
```
conda env create -f conan_windows.yml
conda activate conan_windows_env
```
### OpenPose
### RT-Gene
- Run [processing/install_RTGene.py](/processing/install_RTGene.py)
- [OPTIONAL] Provide camera calibration file calib.pkl
- Provide maximum number of people in the video
### JAA-Net
### AVA-Active Speaker
### Apriltag
[https://www.wikihow.com/Install-FFmpeg-on-Windows](https://www.wikihow.com/Install-FFmpeg-on-Windows)
### Training
```
conda install -c anaconda cupy
conda install -c anaconda chainer
conda install -c anaconda ipykernel
```

View file

@ -0,0 +1,192 @@
name: conan_windows_env
channels:
- pytorch
- anaconda
- defaults
dependencies:
- _tflow_select=2.1.0=gpu
- absl-py=0.10.0=py36_0
- aiohttp=3.6.3=py36he774522_0
- argon2-cffi=20.1.0=py36he774522_1
- astor=0.8.1=py36_0
- async-timeout=3.0.1=py36_0
- async_generator=1.10=py36h28b3542_0
- attrs=20.2.0=py_0
- backcall=0.2.0=py_0
- blas=1.0=mkl
- bleach=3.2.1=py_0
- blinker=1.4=py36_0
- brotlipy=0.7.0=py36he774522_1000
- ca-certificates=2021.9.30=haa95532_1
- cachetools=4.1.1=py_0
- certifi=2021.5.30=py36haa95532_0
- cffi=1.14.3=py36h7a1dbc1_0
- chardet=3.0.4=py36_1003
- click=7.1.2=py_0
- colorama=0.4.4=py_0
- cryptography=3.1.1=py36h7a1dbc1_0
- cudatoolkit=10.1.243=h74a9793_0
- cudnn=7.6.5=cuda10.1_0
- cycler=0.10.0=py36haa95532_0
- dataclasses=0.8=pyh4f3eec9_6
- decorator=4.4.2=py_0
- defusedxml=0.6.0=py_0
- entrypoints=0.3=py36_0
- freetype=2.10.4=hd328e21_0
- gast=0.2.2=py36_0
- google-auth=1.22.1=py_0
- google-auth-oauthlib=0.4.1=py_2
- google-pasta=0.2.0=py_0
- grpcio=1.31.0=py36he7da953_0
- h5py=2.10.0=py36h5e291fa_0
- hdf5=1.10.4=h7ebc959_0
- icc_rt=2019.0.0=h0cc432a_1
- icu=58.2=vc14hc45fdbb_0
- idna=2.10=py_0
- idna_ssl=1.1.0=py36_0
- importlib-metadata=2.0.0=py_1
- importlib_metadata=2.0.0=1
- intel-openmp=2020.2=254
- ipykernel=5.3.4=py36h5ca1d4c_0
- ipython=7.16.1=py36h5ca1d4c_0
- ipython_genutils=0.2.0=py36h3c5d0ee_0
- ipywidgets=7.5.1=py_1
- jedi=0.18.0=py36haa95532_1
- jinja2=2.11.2=py_0
- jpeg=9b=hb83a4c4_2
- jsonschema=3.2.0=py_2
- jupyter=1.0.0=py36_7
- jupyter_client=6.1.7=py_0
- jupyter_console=6.2.0=py_0
- jupyter_core=4.6.3=py36_0
- jupyterlab_pygments=0.1.2=py_0
- keras-applications=1.0.8=py_1
- keras-preprocessing=1.1.0=py_1
- kiwisolver=1.3.1=py36hd77b12b_0
- libpng=1.6.37=h2a8f88b_0
- libprotobuf=3.13.0.1=h200bbdf_0
- libsodium=1.0.18=h62dcd97_0
- libtiff=4.2.0=hd0e1b90_0
- libuv=1.40.0=he774522_0
- lz4-c=1.9.3=h2bbff1b_1
- m2w64-gcc-libgfortran=5.3.0=6
- m2w64-gcc-libs=5.3.0=7
- m2w64-gcc-libs-core=5.3.0=7
- m2w64-gmp=6.1.0=2
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
- markdown=3.3.2=py36_0
- markupsafe=1.1.1=py36he774522_0
- matplotlib=3.3.4=py36haa95532_0
- matplotlib-base=3.3.4=py36h49ac443_0
- mistune=0.8.4=py36he774522_0
- mkl=2019.4=245
- mkl-service=2.3.0=py36hb782905_0
- mkl_fft=1.2.0=py36h45dec08_0
- mkl_random=1.0.4=py36h343c172_0
- msys2-conda-epoch=20160418=1
- multidict=4.7.6=py36he774522_1
- nbclient=0.5.1=py_0
- nbconvert=6.0.7=py36_0
- nbformat=5.0.8=py_0
- nest-asyncio=1.4.1=py_0
- ninja=1.10.2=h6d14046_1
- notebook=6.1.4=py36_0
- numpy=1.19.1=py36h5510c5b_0
- numpy-base=1.19.1=py36ha3acd2a_0
- oauthlib=3.1.0=py_0
- olefile=0.46=py36_0
- openssl=1.1.1l=h2bbff1b_0
- opt_einsum=3.1.0=py_0
- packaging=20.4=py_0
- pandas=1.1.3=py36ha925a31_0
- pandoc=2.11=h9490d1a_0
- pandocfilters=1.4.2=py36_1
- parso=0.8.0=py_0
- pickleshare=0.7.5=py36_0
- pillow=8.0.0=py36hca74424_0
- pip=21.0.1=py36haa95532_0
- prometheus_client=0.8.0=py_0
- prompt-toolkit=3.0.8=py_0
- prompt_toolkit=3.0.8=0
- protobuf=3.13.0.1=py36ha925a31_1
- pyasn1=0.4.8=py_0
- pyasn1-modules=0.2.8=py_0
- pycparser=2.20=py_2
- pygments=2.7.1=py_0
- pyjwt=1.7.1=py36_0
- pyopengl=3.1.1a1=py36_0
- pyopenssl=19.1.0=py_1
- pyparsing=2.4.7=py_0
- pyqt=5.9.2=py36ha878b3d_0
- pyreadline=2.1=py36_1
- pyrsistent=0.17.3=py36he774522_0
- pysocks=1.7.1=py36_0
- python=3.6.13=h3758d61_0
- python-dateutil=2.8.1=py_0
- pytorch=1.8.1=py3.6_cuda10.1_cudnn7_0
- pytz=2020.1=py_0
- pywin32=227=py36he774522_1
- pywinpty=0.5.7=py36_0
- pyzmq=19.0.2=py36ha925a31_1
- qt=5.9.7=vc14h73c81de_0
- qtconsole=4.7.7=py_0
- qtpy=1.9.0=py_0
- requests=2.24.0=py_0
- requests-oauthlib=1.3.0=py_0
- rsa=4.6=py_0
- scipy=1.5.2=py36h9439919_0
- send2trash=1.5.0=py36_0
- setuptools=58.0.4=py36haa95532_0
- sip=4.19.24=py36ha925a31_0
- six=1.15.0=py_0
- sqlite=3.36.0=h2bbff1b_0
- tensorboard=2.2.1=pyh532a8cf_0
- tensorboard-plugin-wit=1.6.0=py_0
- tensorflow=2.1.0=gpu_py36h3346743_0
- tensorflow-base=2.1.0=gpu_py36h55f5790_0
- tensorflow-estimator=2.6.0=pyh7b7c402_0
- tensorflow-gpu=2.1.0=h0d30ee6_0
- termcolor=1.1.0=py36_1
- terminado=0.9.1=py36_0
- testpath=0.4.4=py_0
- tk=8.6.11=h2bbff1b_0
- torchvision=0.9.1=py36_cu101
- tornado=6.0.4=py36he774522_1
- traitlets=4.3.3=py36_0
- typing_extensions=3.7.4.3=py_0
- urllib3=1.25.11=py_0
- vc=14.2=h21ff451_1
- vs2015_runtime=14.27.29016=h5e58377_2
- wcwidth=0.2.5=py_0
- webencodings=0.5.1=py36_1
- werkzeug=0.14.1=py36_0
- wheel=0.37.0=pyhd3eb1b0_1
- widgetsnbextension=3.5.1=py36_0
- win_inet_pton=1.1.0=py36_0
- wincertstore=0.2=py36h7fe50ca_0
- winpty=0.4.3=4
- wrapt=1.12.1=py36he774522_1
- xz=5.2.5=h62dcd97_0
- yarl=1.6.2=py36he774522_0
- zeromq=4.3.2=ha925a31_3
- zipp=3.3.1=py_0
- zlib=1.2.11=vc14h1cdd9ab_1
- zstd=1.4.9=h19a0ad4_0
- pip:
- bidict==0.21.3
- dlib==19.22.1
- imageio==2.9.0
- imageio-ffmpeg==0.4.5
- joblib==1.1.0
- lru-dict==1.1.7
- moviepy==1.0.3
- opencv-python==4.5.3.56
- overrides==6.1.0
- proglog==0.1.9
- pupil-apriltags==1.0.4
- pupil-pthreads-win==2
- scikit-learn==0.24.2
- threadpoolctl==3.0.0
- tqdm==4.62.3
- typing-utils==0.1.0

View file

@ -0,0 +1,19 @@
import os
import subprocess
def main():
download_cmds = ['git clone https://github.com/Tobias-Fischer/rt_gene.git',
'mv ./rt_gene ./rt_gene_GIT',
'mv ./rt_gene_GIT/rt_gene/src/rt_gene/ ./',
'mv ./rt_gene_GIT/rt_gene/model_nets ./../model_nets']
for cmd in download_cmds:
subprocess.call(cmd, shell=True)
from rt_gene.download_tools import download_gaze_tensorflow_models, download_external_landmark_models
download_gaze_tensorflow_models()
download_external_landmark_models()
if __name__ == '__main__':
main()

View file

@ -0,0 +1,94 @@
import os
from sys import platform
if platform == "linux" or platform == "linux2":
# linux
import apriltag
elif platform == "darwin":
# OS X
import apriltag
elif platform == "win32":
# Windows
import pupil_apriltags as apriltag
import cv2
import matplotlib.pyplot as plt
import pandas as pd
visualize = True
def process(file):
VIDEO = file
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
if not os.path.exists(FRAMES):
print('WARNING: Could not find frame directory')
return
img_paths = [f for f in os.listdir(FRAMES) if 'jpg' in f]
print('Number of frames: ', len(img_paths))
if platform == "linux" or platform == "linux2" or platform == "darwin":
# Circumvent error: too many borders in contour_detect (max of 32767!)
options = apriltag.DetectorOptions(refine_edges=False, quad_contours=False)
detector = apriltag.Detector(options)
elif platform == "win32":
print('WARNING: apriltag2 not supported on windows, running with pupil_apriltags...')
detector = apriltag.Detector(refine_edges=False)
detections = {}
if visualize:
fig = plt.Figure(figsize=(15, 10))
path = os.path.join(FRAMES, img_paths[0])
img = cv2.imread(path)
image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
result = detector.detect(img)
for i in range(len(result)):
tf = result[i].tag_family
tag_id = result[i].tag_id
cx, cy = result[i].center
# print('Found tag: ', tag_id)
img = cv2.circle(img, (int(cx), int(cy)), 50, (255, 255, 0), thickness=10)
plt.imshow(image)
plt.axis('off')
#plt.savefig('./AprilTag_Detection_%s.jpg' % VIDEOOUT)
plt.show()
tags = dict()
for frame, p in enumerate(img_paths):
path = os.path.join(FRAMES, p)
img = cv2.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
result = detector.detect(img)
for i in range(len(result)):
tf = result[i].tag_family
cx, cy = result[i].center
tag_id = result[i].tag_id
tags[tag_id] = [cx, cy]
print('Frame %i found %i tags' % (frame, len(result)))
detections[frame] = tags
df = pd.DataFrame.from_dict(detections, orient='index')
path = './AprilTag_%s.pkl' % VIDEOOUT
df.to_pickle(path)
print('Saved AprilTag detections to %s' % path)
if __name__ == '__main__':
process('./Data/ShowCase_3.mp4')

View file

@ -0,0 +1,412 @@
#!/usr/bin/env python
# coding: utf-8
# # OpenPose pose detection
# 2D real-time multi-person keypoint detection:
# **18**-keypoint body/foot keypoint estimation. Running time invariant to number of detected people
# see https://github.com/CMU-Perceptual-Computing-Lab/openpose
#
# ## Pipeline
# - Run 18-keypoint model on video frames
# - Parse keypoints and PAFs to generate personwise keypoints
# - Save results to OpenPose.pkl
import os
import numpy as np
import cv2
import sys
from sys import platform
import time
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm, tqdm_pandas
tqdm.pandas()
from multiprocessing import cpu_count
from multiprocessing import Pool
import itertools
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES"] = "1"
POSE_PAIRS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
[1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 17], [5, 16]]
keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip',
'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
[19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
[47, 48], [49, 50], [53, 54], [51, 52], [55, 56],
[37, 38], [45, 46]]
colors = np.array([(0, 100, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255),
(0, 255, 0), (255, 200, 100), (255, 0, 255), (0, 255, 0), (255, 200, 100), (255, 0, 255),
(0, 0, 255), (255, 0, 0), (200, 200, 0), (255, 0, 0), (200, 200, 0), (0, 0, 0)])
my_color = []
for c in colors:
my_color.append(tuple(c))
# ## Auxiliary Functions
# see https://www.learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/
def getKeypoints(probMap, threshold=0.8):
mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0)
mapMask = np.uint8(mapSmooth > threshold)
keypoints = []
# find the blobs
contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# for each blob find the maxima
# not enough values to unpack (expected 3, got 2)
# version issue: https://github.com/facebookresearch/maskrcnn-benchmark/issues/339
for cnt in contours:
blobMask = np.zeros(mapMask.shape)
blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
maskedProbMap = mapSmooth * blobMask
_, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],))
return keypoints
# Find valid connections between the different joints of a all persons present
def getValidPairs(output, detected_keypoints, frameWidth, frameHeight):
valid_pairs = []
invalid_pairs = []
n_interp_samples = 10
paf_score_th = 0.1
conf_th = 0.7
# loop for every POSE_PAIR
for k in range(len(mapIdx)):
# A->B constitute a limb
pafA = output[mapIdx[k][0], :, :]
pafB = output[mapIdx[k][1], :, :]
pafA = cv2.resize(pafA, (frameWidth, frameHeight))
pafB = cv2.resize(pafB, (frameWidth, frameHeight))
# Find the keypoints for the first and second limb
candA = detected_keypoints[POSE_PAIRS[k][0]]
candB = detected_keypoints[POSE_PAIRS[k][1]]
nA = len(candA)
nB = len(candB)
# If keypoints for the joint-pair is detected
# check every joint in candA with every joint in candB
# Calculate the distance vector between the two joints
# Find the PAF values at a set of interpolated points between the joints
# Use the above formula to compute a score to mark the connection valid
if (nA != 0 and nB != 0):
valid_pair = np.zeros((0, 3))
for i in range(nA):
max_j = -1
maxScore = -1
found = 0
for j in range(nB):
# Find d_ij
d_ij = np.subtract(candB[j][:2], candA[i][:2])
norm = np.linalg.norm(d_ij)
if norm:
d_ij = d_ij / norm
else:
continue
# Find p(u)
interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples),
np.linspace(candA[i][1], candB[j][1], num=n_interp_samples)))
# Find L(p(u))
paf_interp = []
for k in range(len(interp_coord)):
paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))],
pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))]])
# Find E
paf_scores = np.dot(paf_interp, d_ij)
avg_paf_score = sum(paf_scores) / len(paf_scores)
# Check if the connection is valid
# If the fraction of interpolated vectors aligned with PAF is higher then threshold -> Valid Pair
if (len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples) > conf_th:
if avg_paf_score > maxScore:
max_j = j
maxScore = avg_paf_score
found = 1
# Append the connection to the list
if found:
valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0)
# Append the detected connections to the global list
valid_pairs.append(valid_pair)
else: # If no keypoints are detected
# ATTENTION: Commented this out by Sven
# print("No Connection : k = {}".format(k))
invalid_pairs.append(k)
valid_pairs.append([])
return valid_pairs, invalid_pairs
# This function creates a list of keypoints belonging to each person
# For each detected valid pair, it assigns the joint(s) to a person
def getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list):
# the last number in each row is the overall score
personwiseKeypoints = -1 * np.ones((0, 19))
for k in range(len(mapIdx)):
if k not in invalid_pairs:
partAs = valid_pairs[k][:, 0]
partBs = valid_pairs[k][:, 1]
indexA, indexB = np.array(POSE_PAIRS[k])
for i in range(len(valid_pairs[k])):
found = 0
person_idx = -1
for j in range(len(personwiseKeypoints)):
if personwiseKeypoints[j][indexA] == partAs[i]:
person_idx = j
found = 1
break
if found:
personwiseKeypoints[person_idx][indexB] = partBs[i]
personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][
2]
# if find no partA in the subset, create a new subset
elif not found and k < 17:
row = -1 * np.ones(19)
row[indexA] = partAs[i]
row[indexB] = partBs[i]
# add the keypoint_scores for the two keypoints and the paf_score
row[-1] = sum(keypoints_list[valid_pairs[k][i, :2].astype(int), 2]) + valid_pairs[k][i][2]
personwiseKeypoints = np.vstack([personwiseKeypoints, row])
return personwiseKeypoints
def f(probMap):
threshold = 0.5
return getKeypoints(probMap, threshold)
def getPose(output):
detected_keypoints = []
keypoints_list = np.zeros((0, 3))
keypoint_id = 0
threshold = 0.5
keypointsList = []
for part in range(18):
probMap = output[part, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
keypointsList.append(getKeypoints(probMap, threshold))
for keypoints in keypointsList: # nPoints = 18
keypoints_with_id = []
for i in range(len(keypoints)):
keypoints_with_id.append(keypoints[i] + (keypoint_id,))
keypoints_list = np.vstack([keypoints_list, keypoints[i]])
keypoint_id += 1
detected_keypoints.append(keypoints_with_id)
valid_pairs, invalid_pairs = getValidPairs(output, detected_keypoints, frameWidth, frameHeight)
personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list)
return detected_keypoints, keypoints_list, personwiseKeypoints
"""Forward array of 20 images"""
def getPoseFromDNN(net, images, frameWidth, frameHeight):
inHeight = 368
inWidth = int((inHeight / frameHeight) * frameWidth)
inpBlob = cv2.dnn.blobFromImages(np.array(images), 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False,
crop=False)
# Set the prepared object as the input blob of the network
net.setInput(inpBlob)
output = net.forward()
return output
def visualize(image, df, frame):
number_ids = len([col for col in df.columns if 'ID' in col])
data = df.loc[df['Frame'] == frame]
plt.imshow(image)
plt.axis('off')
for id_no in range(number_ids):
keypoints = df['ID%i_Keypoints' % id_no].iloc[frame]
for i in range(len(POSE_PAIRS)):
index = POSE_PAIRS[i]
A, B = keypoints[index]
# for idx in index:
# print(keypointsMapping[idx])
if A is not None and B is not None:
plt.plot((A[0], B[0]), (A[1], B[1]), c=colors[i])
plt.show()
def process(file):
global get_keypoints
POSE_PROTO_FILE = r"openpose/pose_deploy_linevec.prototxt"
POSE_WEIGHTS_FILE = r"openpose/pose_iter_440000.caffemodel"
if not os.path.exists(POSE_PROTO_FILE):
print('WARNING: Could not find pose file %s' % POSE_PROTO_FILE)
return
if not os.path.exists(POSE_WEIGHTS_FILE):
print('WARNING: Could not find model weights file %s' % POSE_WEIGHTS_FILE)
return
VIDEO = file
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
if not os.path.exists(FRAMES):
print('WARNING: Could not find frame directory')
return
# Load Model #
net = cv2.dnn.readNetFromCaffe(POSE_PROTO_FILE, POSE_WEIGHTS_FILE)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
path_list.sort()
image = cv2.imread(os.path.join(FRAMES, path_list[0]))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
frameWidth = image.shape[1]
frameHeight = image.shape[0]
lst = []
images = []
for frame, path in enumerate(tqdm(path_list)):
image = cv2.imread(os.path.join(FRAMES, path))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
images.append(image)
if len(images) == 20:
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
lst.extend(output)
images = []
if len(images) != 0:
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
lst.extend(output)
images = []
print(len(lst))
df = pd.DataFrame(range(len(lst)))
df.columns = ["Frame"]
df["Pose"] = lst
path = "%s%s_OpenPose_raw.pkl" % (TMP_DIR, VIDEOOUT)
df.to_pickle(path)
def get_keypoints(frames):
ret = []
for f in frames:
output = df[df.Frame == f].Pose.iloc[0]
ret.append(getPose(output))
return ret
if platform == "linux" or platform == "linux2" or platform == "darwin":
cores = max(4, cpu_count() - 4)
print('Run sorting on {} cores'.format(cores))
data_split = np.array_split(df.Frame, cores)
pool = Pool(cores)
data = pool.map(get_keypoints, data_split)
pool.close()
pool.join()
elif platform == "win32":
print('WARNING: Can\'t run multiprocessing on Windows, this might take a while...')
data = get_keypoints(df.Frame)
else:
print('WARNING: platform not supported')
x = np.vstack(data)
df["DetectedKeypoints"] = x[:, 0]
df["KeypointsList"] = x[:, 1]
df["PersonwiseKeypoints"] = x[:, 2]
df.head()
path = "%s%s_OpenPose.pkl" % (TMP_DIR, VIDEOOUT)
df.to_pickle(path)
print("Saved OpenPose detections to %s" % path)
del df["Pose"]
del df['Pic']
del df['DetectedKeypoints']
number_ids = len(df.PersonwiseKeypoints.values.flatten()[0])
print('Number of detected IDs: ', number_ids)
"""Map personwise keypoints to list of keypoints for each ID"""
def keypoints_fun(x):
# Discard frames where not all ids detected
if len(x.PersonwiseKeypoints) < number_ids:
# print('None')
return None
# index is -1 for no detection >> keypoint = None
lst = list(x.KeypointsList)
lst.append(None)
lst = np.array(lst)
keypoints = lst[x.PersonwiseKeypoints[id_no].astype(int)[:18]]
return keypoints
for id_no in range(number_ids):
counter = 0
print('ID%i' % id_no)
col = 'ID%i_Keypoints' % id_no
df[col] = df.apply(keypoints_fun, axis=1)
"""Sort IDs to be consistent throughout video"""
lst = []
columns = [col for col in df.columns if 'ID' in col]
data = df[columns]
lst.append(data.iloc[0].values)
for i in range(1, len(df.Frame)):
row = data.iloc[i]
lst2 = []
for ids in range(number_ids):
keypoints = row['ID%i_Keypoints' % ids]
if keypoints is not None and keypoints[1] is not None:
for j in range(number_ids):
backtrack = 1
while lst[i - backtrack][j] == None:
backtrack = backtrack + 1
keypoints2 = lst[i - backtrack][j]
lst2.append([ids, j, np.linalg.norm(np.array(keypoints[1]) - np.array(keypoints2[1])), keypoints])
else:
lst2.append([ids, None, None, None])
dfX = pd.DataFrame(lst2)
dfX.columns = ["Id", "GtId", "Distance", "Keypoints"]
dfX = dfX.sort_values("Distance")
dfX = dfX.drop_duplicates("GtId").drop_duplicates("Id")
lstRow = []
for j in range(number_ids):
if (len(dfX[dfX.GtId == j]) > 0):
lstRow.append(dfX[dfX.GtId == j].iloc[0].Keypoints)
else:
lstRow.append(None)
lstRow.append(i)
lst.append(lstRow)
df_new = pd.DataFrame(lst)
columns = []
for i in range(number_ids):
columns.append('ID%i_Keypoints' % i)
columns.append("Frame")
df_new.columns = columns
# First frame number is NaN from sorting
df_new.Frame = df_new.Frame.fillna(0)
df_new = df_new.astype({'Frame': 'int32'})
path = "%s%s_BodyMovement.pkl" % (TMP_DIR, VIDEOOUT)
df_new.to_pickle(path)
print('Saved Body Movement to %s' % path)
visualize(image, df_new, 0)

View file

@ -0,0 +1,363 @@
import tensorflow as tf
import sys
import os
import argparse
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from tqdm.notebook import tqdm
from rt_gene.gaze_tools import get_phi_theta_from_euler, limit_yaw
from rt_gene.extract_landmarks_method_base import LandmarkMethodBase
from rt_gene.estimate_gaze_base import GazeEstimatorBase
from rt_gene.estimate_gaze_tensorflow import GazeEstimator
from rt_gene.gaze_tools_standalone import euler_from_matrix
import itertools
import pandas as pd
#os.environ["CUDA_VISIBLE_DEVICES"]="1"
def getCenter(box):
return np.array([box[2]+box[0], box[3]+box[1]])/2
def load_camera_calibration(calibration_file):
fileType = calibration_file.split(".")[-1]
if fileType == "pkl":
import pickle
infile = open(calibration_file,'rb')
data = pickle.load(infile)
return data["distortion_coef"], data["camera_matrix"]
elif fileType == "yaml":
import yaml
with open(calibration_file, 'r') as f:
cal = yaml.safe_load(f)
dist_coefficients = np.array(cal['distortion_coefficients']['data'], dtype='float32').reshape(1, 5)
camera_matrix = np.array(cal['camera_matrix']['data'], dtype='float32').reshape(3, 3)
return dist_coefficients, camera_matrix
def extract_eye_image_patches(subjects, landmark_estimator):
for subject in subjects:
le_c, re_c, _, _ = subject.get_eye_image_from_landmarks(subject, landmark_estimator.eye_image_size)
subject.left_eye_color = le_c
subject.right_eye_color = re_c
def estimate_gaze(base_name, color_img, landmark_estimator, gaze_estimator, dist_coefficients, camera_matrix, args):
faceboxes = landmark_estimator.get_face_bb(color_img)
if len(faceboxes) == 0:
tqdm.write('Could not find faces in the image')
return
subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
extract_eye_image_patches(subjects, landmark_estimator)
input_r_list = []
input_l_list = []
input_head_list = []
valid_subject_list = []
roll_pitch_yaw_list = []
for idx, subject in enumerate(subjects):
if subject.left_eye_color is None or subject.right_eye_color is None:
#tqdm.write('Failed to extract eye image patches')
continue
success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
subject.landmarks.reshape(len(subject.landmarks), 1, 2),
cameraMatrix=camera_matrix,
distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)
if not success:
tqdm.write('Not able to extract head pose for subject {}'.format(idx))
continue
_rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
_rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
_m = np.zeros((4, 4))
_m[:3, :3] = _rotation_matrix
_m[3, 3] = 1
# Go from camera space to ROS space
_camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
[-1.0, 0.0, 0.0, 0.0],
[0.0, -1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0]]
roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
roll_pitch_yaw = limit_yaw(roll_pitch_yaw)
roll_pitch_yaw_list.append(roll_pitch_yaw)
phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)
face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))
if args['vis_headpose']:
plt.axis("off")
plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
plt.show()
if args['save_headpose']:
cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_headpose.jpg'), head_pose_image)
input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
input_head_list.append([theta_head, phi_head])
valid_subject_list.append(idx)
if len(valid_subject_list) == 0:
return
gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
inference_input_right_list=input_r_list,
inference_headpose_list=input_head_list)
file_base = os.path.splitext(base_name)[0]
file = "_".join(file_base.split("_")[:-1])
frame = int(file_base.split("_")[-1])
ret = []
for subject_id, gaze, headpose, roll_pitch_yaw in zip(valid_subject_list, gaze_est.tolist(), input_head_list, roll_pitch_yaw_list):
subject = subjects[subject_id]
#print(roll_pitch_yaw)
# Build visualizations
r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)
if args['vis_gaze']:
plt.axis("off")
plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
plt.show()
if args['save_gaze']:
cv2.imwrite(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_gaze.jpg'), s_gaze_img)
# cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
# cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)
if args['save_estimate']:
with open(os.path.join(args['output_path'], os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +
', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
# Phi: pos - look down, neg - look up
# Theta: pos - rotate left, neg - rotate right
d = {"File":file, "Frame": frame, "SubjectId":subject_id, "HeadBox":subject.box, "Landmarks": subject.landmarks, "GazeTheta":gaze[0], "GazePhi":gaze[1], "HeadPoseTheta":headpose[0], "HeadPosePhi":headpose[1], "HeadPoseRoll":roll_pitch_yaw[0], "HeadPosePitch":roll_pitch_yaw[1], "HeadPoseYaw":roll_pitch_yaw[2]}
ret.append(d)
return ret
def visualize(df, FRAMES):
path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
path_list.sort()
image = cv2.imread(os.path.join(FRAMES, path_list[0]))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
fig, ax = plt.subplots(1, figsize=(18,10))
for i in range(len(df.SubjectId.unique())):
bbox = df.loc[(df.Frame == 0) & (df.SubjectId == i)]['HeadBox'].values
print(bbox)
if not np.any(pd.isna(bbox)) and len(bbox) > 0:
bbox = np.array(bbox[0])
rect = patches.Rectangle((bbox[0],bbox[1]),bbox[2]-bbox[0],bbox[3]-bbox[1],linewidth=1,edgecolor='c',facecolor='none')
plt.text(bbox[0], bbox[1], 'ID%i' % i, color='c' ,fontsize=20)
ax.add_patch(rect)
ax.imshow(image)
plt.show()
def visualize_sorting(df_sorted):
subs = sorted(df_sorted[~df_sorted.PId.isna()].PId.unique())
for sid in subs:
x = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[0])
y = df_sorted[df_sorted.PId==sid].HeadCenter.apply(lambda x: x[1])
frames = df_sorted[df_sorted.PId==sid].Frame.to_list()
plt.scatter(frames, x, alpha=.2, label = "Sub %i" % sid)
plt.legend()
plt.show()
def process(file, maxPeople, cameraRes = [5760, 2880]):
VIDEO = file
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
if not os.path.exists(VIDEO):
print('WARNING: Could not find video file')
return
script_path = "./"
args = {}
args["calib_file"] = "./calib_insta.pkl"
args["vis_headpose"] = False # store_false
args["save_headpose"] = False # store_false
args["vis_gaze"] = False # store_false
args["save_gaze"] = False # store_false
args["save_estimate"] = False # store_false
args["device_id_facedetection"] = "cuda:0" # store_false
args["im_path"] = os.path.join(script_path, './samples_gaze/')
args["output_path"] = os.path.join(script_path, './samples_gaze/')
args["models"] = [os.path.join(script_path, '../model_nets/Model_allsubjects1.h5')]
args['gaze_backend'] = 'tensorflow'
tqdm.write('Loading networks')
landmark_estimator = LandmarkMethodBase(device_id_facedetection=args["device_id_facedetection"],
checkpoint_path_face=os.path.join(script_path,
"../model_nets/SFD/s3fd_facedetector.pth"),
checkpoint_path_landmark=os.path.join(script_path,
"../model_nets/phase1_wpdc_vdc.pth.tar"),
model_points_file=os.path.join(script_path,
"../model_nets/face_model_68.txt"))
#gaze_estimator = GazeEstimator("/gpu:0", args['models'])
if args['gaze_backend'] == "tensorflow":
from rt_gene.estimate_gaze_tensorflow import GazeEstimator
gaze_estimator = GazeEstimator("/gpu:0", args['models'])
elif args['gaze_backend'] == "pytorch":
from rt_gene.estimate_gaze_pytorch import GazeEstimator
gaze_estimator = GazeEstimator("cuda:0", args['models'])
else:
raise ValueError("Incorrect gaze_base backend, choices are: tensorflow or pytorch")
if not os.path.isdir(args["output_path"]):
os.makedirs(args["output_path"])
video = cv2.VideoCapture(VIDEO)
print('Video frame count: ', video.get(cv2.CAP_PROP_FRAME_COUNT))
if args["calib_file"] is not None and os.path.exists(args["calib_file"]):
_dist_coefficients, _camera_matrix = load_camera_calibration(args["calib_file"])
else:
im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
print('WARNING!!! You should provide the camera calibration file, otherwise you might get bad results. \n\
Using a crude approximation!')
_dist_coefficients, _camera_matrix = np.zeros((1, 5)), np.array(
[[im_height, 0.0, im_width / 2.0], [0.0, im_height, im_height / 2.0], [0.0, 0.0, 1.0]])
lstRet = []
for i in tqdm(list(range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))))):
image_file_name = "%s_%i.XXX" % (os.path.splitext(VIDEO)[0], i)
ret, image = video.read()
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
lstRet.append(estimate_gaze(image_file_name, image, landmark_estimator, gaze_estimator, _dist_coefficients, _camera_matrix, args))
lst = list(itertools.chain.from_iterable(lstRet))
df = pd.DataFrame(lst)
df["HeadCenter"] = df.HeadBox.apply(lambda x: getCenter(x))
df["Phi"] = df.GazePhi + df.HeadPosePhi # gaze yaw
df["Theta"] = df.GazeTheta + df.HeadPoseTheta # gaze pitch
df['Yaw'] = df.GazePhi + df.HeadPoseYaw
df['Pitch'] = df.GazeTheta + df.HeadPosePitch
# path = '%s%s_raw.pkl' % (TMP_DIR, VIDEOOUT)
# df.to_pickle(path)
# print('Saved raw detections to: ', path)
visualize(df, FRAMES)
# Sort ID detections
###############################################################################################################
# Find first frame where all are detected
for frame in sorted(df.Frame.unique()):
frame_df = df.loc[df.Frame == frame]
if len(frame_df['SubjectId'].unique()) == maxPeople:
first_frame = frame
print('First frame where all are detected: ', first_frame)
break
empty_rows = pd.DataFrame()
empty_rows['Frame'] = np.zeros(maxPeople).astype(int)
for col in df.columns:
if not col == 'Frame':
empty_rows[col] = df.loc[df.Frame == first_frame, [col]].values
df = df.loc[df.Frame != 0]
df = df.append(empty_rows).sort_values(by=['Frame'])
df.head()
df_sorted = df.copy()
df_sorted["PId"] = None
df_sorted.loc[df_sorted.Frame == 0, "PId"] = list(range(maxPeople))
df_sorted = df_sorted.sort_values("Frame")
df_sorted.index = list(range(len(df_sorted)))
for frameId in tqdm(sorted(df_sorted.Frame.unique())[1:]):
pidAssignement = []
for frameIdBefore in range(frameId - 1, -1, -1):
allFramesBefore = df_sorted[(df_sorted.Frame == frameIdBefore) & (~df_sorted.PId.isna())]
if (np.array_equal(sorted(allFramesBefore.PId.to_list()), np.arange(maxPeople))):
dfFramesCurrent = df_sorted[df_sorted.Frame == frameId]
for indexCurrentFrame, frameCurrent in dfFramesCurrent.iterrows():
lst = []
for indexBeforeFrame, frameBefore in allFramesBefore.iterrows():
if (frameBefore.HeadCenter[0] > frameCurrent.HeadCenter[0]):
p1 = np.array(frameCurrent.HeadCenter)
p2 = np.array(frameBefore.HeadCenter)
else:
p1 = np.array(frameBefore.HeadCenter)
p2 = np.array(frameCurrent.HeadCenter)
v1 = p1 - p2
dist1 = np.linalg.norm(v1)
p1[0] = p1[0] + cameraRes[0]
v2 = p1 - p2
dist2 = np.linalg.norm(v2)
dist = min([dist1, dist2])
lst.append([dist, frameCurrent.name, indexBeforeFrame, frameBefore])
lst.sort(key=lambda x: x[0])
pidAssignement.append([indexCurrentFrame, lst[0][-1].PId])
break
for index, pid in pidAssignement:
df_sorted.loc[df_sorted.index == index, "PId"] = pid
visualize_sorting(df_sorted)
del df_sorted["SubjectId"]
# Rearrange DataFrame: each ID has specific columns
###############################################################################################################
df_sorted = df_sorted[~df_sorted.PId.isna()].drop_duplicates(subset=['Frame', 'PId'])
FACE_COUNT = len(df_sorted[~df_sorted.PId.isna()].PId.unique())
df2 = df_sorted.pivot(index='Frame', columns="PId",
values=["Landmarks", "GazeTheta", "GazePhi", "HeadCenter", "HeadPoseTheta", "HeadPosePhi",
"HeadPoseYaw", "HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"])
lst = []
for label in ["Landmarks", "GazeTheta", "GazePhi", "Head", "HeadPoseTheta", "HeadPosePhi", "HeadPoseYaw",
"HeadPosePitch", "HeadPoseRoll", "Phi", "Theta"]:
for head_id in range(FACE_COUNT):
lst.append("ID%i_%s" % (head_id, label))
df2.columns = lst
df2 = df2.reset_index()
path = "%s%s_RTGene.pkl" % (TMP_DIR, VIDEOOUT)
df2.to_pickle(path)
print("Saved RT-Gene detections to %s" % path)