413 lines
15 KiB
Python
413 lines
15 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# # OpenPose pose detection
|
|
# 2D real-time multi-person keypoint detection:
|
|
# **18**-keypoint body/foot keypoint estimation. Running time invariant to number of detected people
|
|
# see https://github.com/CMU-Perceptual-Computing-Lab/openpose
|
|
#
|
|
# ## Pipeline
|
|
# - Run 18-keypoint model on video frames
|
|
# - Parse keypoints and PAFs to generate personwise keypoints
|
|
# - Save results to OpenPose.pkl
|
|
|
|
import os
|
|
import numpy as np
|
|
import cv2
|
|
import sys
|
|
from sys import platform
|
|
import time
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
from tqdm import tqdm, tqdm_pandas
|
|
|
|
tqdm.pandas()
|
|
|
|
from multiprocessing import cpu_count
|
|
from multiprocessing import Pool
|
|
import itertools
|
|
import os
|
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
|
os.environ["OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES"] = "1"
|
|
|
|
POSE_PAIRS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
|
|
[1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 17], [5, 16]]
|
|
keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip',
|
|
'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']
|
|
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
|
|
[19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
|
|
[47, 48], [49, 50], [53, 54], [51, 52], [55, 56],
|
|
[37, 38], [45, 46]]
|
|
|
|
colors = np.array([(0, 100, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255),
|
|
(0, 255, 0), (255, 200, 100), (255, 0, 255), (0, 255, 0), (255, 200, 100), (255, 0, 255),
|
|
(0, 0, 255), (255, 0, 0), (200, 200, 0), (255, 0, 0), (200, 200, 0), (0, 0, 0)])
|
|
|
|
my_color = []
|
|
for c in colors:
|
|
my_color.append(tuple(c))
|
|
|
|
|
|
# ## Auxiliary Functions
|
|
# see https://www.learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/
|
|
def getKeypoints(probMap, threshold=0.8):
|
|
mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0)
|
|
mapMask = np.uint8(mapSmooth > threshold)
|
|
keypoints = []
|
|
# find the blobs
|
|
contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
|
# for each blob find the maxima
|
|
# not enough values to unpack (expected 3, got 2)
|
|
# version issue: https://github.com/facebookresearch/maskrcnn-benchmark/issues/339
|
|
for cnt in contours:
|
|
blobMask = np.zeros(mapMask.shape)
|
|
blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
|
|
maskedProbMap = mapSmooth * blobMask
|
|
_, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
|
|
keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],))
|
|
return keypoints
|
|
|
|
|
|
# Find valid connections between the different joints of a all persons present
|
|
def getValidPairs(output, detected_keypoints, frameWidth, frameHeight):
|
|
valid_pairs = []
|
|
invalid_pairs = []
|
|
n_interp_samples = 10
|
|
paf_score_th = 0.1
|
|
conf_th = 0.7
|
|
# loop for every POSE_PAIR
|
|
for k in range(len(mapIdx)):
|
|
# A->B constitute a limb
|
|
pafA = output[mapIdx[k][0], :, :]
|
|
pafB = output[mapIdx[k][1], :, :]
|
|
pafA = cv2.resize(pafA, (frameWidth, frameHeight))
|
|
pafB = cv2.resize(pafB, (frameWidth, frameHeight))
|
|
# Find the keypoints for the first and second limb
|
|
candA = detected_keypoints[POSE_PAIRS[k][0]]
|
|
candB = detected_keypoints[POSE_PAIRS[k][1]]
|
|
nA = len(candA)
|
|
nB = len(candB)
|
|
# If keypoints for the joint-pair is detected
|
|
# check every joint in candA with every joint in candB
|
|
# Calculate the distance vector between the two joints
|
|
# Find the PAF values at a set of interpolated points between the joints
|
|
# Use the above formula to compute a score to mark the connection valid
|
|
if (nA != 0 and nB != 0):
|
|
valid_pair = np.zeros((0, 3))
|
|
for i in range(nA):
|
|
max_j = -1
|
|
maxScore = -1
|
|
found = 0
|
|
for j in range(nB):
|
|
# Find d_ij
|
|
d_ij = np.subtract(candB[j][:2], candA[i][:2])
|
|
norm = np.linalg.norm(d_ij)
|
|
if norm:
|
|
d_ij = d_ij / norm
|
|
else:
|
|
continue
|
|
# Find p(u)
|
|
interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples),
|
|
np.linspace(candA[i][1], candB[j][1], num=n_interp_samples)))
|
|
# Find L(p(u))
|
|
paf_interp = []
|
|
for k in range(len(interp_coord)):
|
|
paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))],
|
|
pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))]])
|
|
# Find E
|
|
paf_scores = np.dot(paf_interp, d_ij)
|
|
avg_paf_score = sum(paf_scores) / len(paf_scores)
|
|
# Check if the connection is valid
|
|
# If the fraction of interpolated vectors aligned with PAF is higher then threshold -> Valid Pair
|
|
if (len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples) > conf_th:
|
|
if avg_paf_score > maxScore:
|
|
max_j = j
|
|
maxScore = avg_paf_score
|
|
found = 1
|
|
# Append the connection to the list
|
|
if found:
|
|
valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0)
|
|
# Append the detected connections to the global list
|
|
valid_pairs.append(valid_pair)
|
|
else: # If no keypoints are detected
|
|
# ATTENTION: Commented this out by Sven
|
|
# print("No Connection : k = {}".format(k))
|
|
invalid_pairs.append(k)
|
|
valid_pairs.append([])
|
|
return valid_pairs, invalid_pairs
|
|
|
|
|
|
# This function creates a list of keypoints belonging to each person
|
|
# For each detected valid pair, it assigns the joint(s) to a person
|
|
def getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list):
|
|
# the last number in each row is the overall score
|
|
personwiseKeypoints = -1 * np.ones((0, 19))
|
|
for k in range(len(mapIdx)):
|
|
if k not in invalid_pairs:
|
|
partAs = valid_pairs[k][:, 0]
|
|
partBs = valid_pairs[k][:, 1]
|
|
indexA, indexB = np.array(POSE_PAIRS[k])
|
|
for i in range(len(valid_pairs[k])):
|
|
found = 0
|
|
person_idx = -1
|
|
for j in range(len(personwiseKeypoints)):
|
|
if personwiseKeypoints[j][indexA] == partAs[i]:
|
|
person_idx = j
|
|
found = 1
|
|
break
|
|
if found:
|
|
personwiseKeypoints[person_idx][indexB] = partBs[i]
|
|
personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][
|
|
2]
|
|
# if find no partA in the subset, create a new subset
|
|
elif not found and k < 17:
|
|
row = -1 * np.ones(19)
|
|
row[indexA] = partAs[i]
|
|
row[indexB] = partBs[i]
|
|
# add the keypoint_scores for the two keypoints and the paf_score
|
|
row[-1] = sum(keypoints_list[valid_pairs[k][i, :2].astype(int), 2]) + valid_pairs[k][i][2]
|
|
personwiseKeypoints = np.vstack([personwiseKeypoints, row])
|
|
return personwiseKeypoints
|
|
|
|
|
|
def f(probMap):
|
|
threshold = 0.5
|
|
return getKeypoints(probMap, threshold)
|
|
|
|
|
|
def getPose(output):
|
|
detected_keypoints = []
|
|
keypoints_list = np.zeros((0, 3))
|
|
keypoint_id = 0
|
|
|
|
threshold = 0.5
|
|
keypointsList = []
|
|
|
|
for part in range(18):
|
|
probMap = output[part, :, :]
|
|
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
|
|
keypointsList.append(getKeypoints(probMap, threshold))
|
|
|
|
for keypoints in keypointsList: # nPoints = 18
|
|
keypoints_with_id = []
|
|
for i in range(len(keypoints)):
|
|
keypoints_with_id.append(keypoints[i] + (keypoint_id,))
|
|
keypoints_list = np.vstack([keypoints_list, keypoints[i]])
|
|
keypoint_id += 1
|
|
detected_keypoints.append(keypoints_with_id)
|
|
|
|
valid_pairs, invalid_pairs = getValidPairs(output, detected_keypoints, frameWidth, frameHeight)
|
|
personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list)
|
|
|
|
return detected_keypoints, keypoints_list, personwiseKeypoints
|
|
|
|
|
|
"""Forward array of 20 images"""
|
|
|
|
|
|
def getPoseFromDNN(net, images, frameWidth, frameHeight):
|
|
inHeight = 368
|
|
inWidth = int((inHeight / frameHeight) * frameWidth)
|
|
inpBlob = cv2.dnn.blobFromImages(np.array(images), 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False,
|
|
crop=False)
|
|
# Set the prepared object as the input blob of the network
|
|
net.setInput(inpBlob)
|
|
|
|
output = net.forward()
|
|
return output
|
|
|
|
|
|
def visualize(image, df, frame):
|
|
number_ids = len([col for col in df.columns if 'ID' in col])
|
|
data = df.loc[df['Frame'] == frame]
|
|
plt.imshow(image)
|
|
plt.axis('off')
|
|
for id_no in range(number_ids):
|
|
keypoints = df['ID%i_Keypoints' % id_no].iloc[frame]
|
|
for i in range(len(POSE_PAIRS)):
|
|
index = POSE_PAIRS[i]
|
|
A, B = keypoints[index]
|
|
# for idx in index:
|
|
# print(keypointsMapping[idx])
|
|
if A is not None and B is not None:
|
|
plt.plot((A[0], B[0]), (A[1], B[1]), c=colors[i])
|
|
|
|
plt.show()
|
|
|
|
def process(file):
|
|
global get_keypoints
|
|
|
|
POSE_PROTO_FILE = r"openpose/pose_deploy_linevec.prototxt"
|
|
POSE_WEIGHTS_FILE = r"openpose/pose_iter_440000.caffemodel"
|
|
|
|
if not os.path.exists(POSE_PROTO_FILE):
|
|
print('WARNING: Could not find pose file %s' % POSE_PROTO_FILE)
|
|
return
|
|
if not os.path.exists(POSE_WEIGHTS_FILE):
|
|
print('WARNING: Could not find model weights file %s' % POSE_WEIGHTS_FILE)
|
|
return
|
|
|
|
VIDEO = file
|
|
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
|
|
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
|
|
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
|
|
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
|
|
|
|
if not os.path.exists(FRAMES):
|
|
print('WARNING: Could not find frame directory')
|
|
return
|
|
|
|
# Load Model #
|
|
net = cv2.dnn.readNetFromCaffe(POSE_PROTO_FILE, POSE_WEIGHTS_FILE)
|
|
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
|
|
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
|
|
|
|
path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
|
|
path_list.sort()
|
|
|
|
image = cv2.imread(os.path.join(FRAMES, path_list[0]))
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
frameWidth = image.shape[1]
|
|
frameHeight = image.shape[0]
|
|
|
|
lst = []
|
|
images = []
|
|
for frame, path in enumerate(tqdm(path_list)):
|
|
image = cv2.imread(os.path.join(FRAMES, path))
|
|
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
images.append(image)
|
|
|
|
if len(images) == 20:
|
|
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
|
|
lst.extend(output)
|
|
images = []
|
|
|
|
if len(images) != 0:
|
|
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
|
|
lst.extend(output)
|
|
images = []
|
|
|
|
print(len(lst))
|
|
df = pd.DataFrame(range(len(lst)))
|
|
df.columns = ["Frame"]
|
|
df["Pose"] = lst
|
|
|
|
path = "%s%s_OpenPose_raw.pkl" % (TMP_DIR, VIDEOOUT)
|
|
df.to_pickle(path)
|
|
|
|
def get_keypoints(frames):
|
|
ret = []
|
|
for f in frames:
|
|
output = df[df.Frame == f].Pose.iloc[0]
|
|
ret.append(getPose(output))
|
|
return ret
|
|
|
|
if platform == "linux" or platform == "linux2" or platform == "darwin":
|
|
cores = max(4, cpu_count() - 4)
|
|
print('Run sorting on {} cores'.format(cores))
|
|
data_split = np.array_split(df.Frame, cores)
|
|
pool = Pool(cores)
|
|
data = pool.map(get_keypoints, data_split)
|
|
pool.close()
|
|
pool.join()
|
|
elif platform == "win32":
|
|
print('WARNING: Can\'t run multiprocessing on Windows, this might take a while...')
|
|
data = get_keypoints(df.Frame)
|
|
else:
|
|
print('WARNING: platform not supported')
|
|
|
|
x = np.vstack(data)
|
|
df["DetectedKeypoints"] = x[:, 0]
|
|
df["KeypointsList"] = x[:, 1]
|
|
df["PersonwiseKeypoints"] = x[:, 2]
|
|
df.head()
|
|
|
|
path = "%s%s_OpenPose.pkl" % (TMP_DIR, VIDEOOUT)
|
|
df.to_pickle(path)
|
|
print("Saved OpenPose detections to %s" % path)
|
|
|
|
del df["Pose"]
|
|
del df['Pic']
|
|
del df['DetectedKeypoints']
|
|
|
|
number_ids = len(df.PersonwiseKeypoints.values.flatten()[0])
|
|
print('Number of detected IDs: ', number_ids)
|
|
|
|
"""Map personwise keypoints to list of keypoints for each ID"""
|
|
def keypoints_fun(x):
|
|
# Discard frames where not all ids detected
|
|
if len(x.PersonwiseKeypoints) < number_ids:
|
|
# print('None')
|
|
return None
|
|
# index is -1 for no detection >> keypoint = None
|
|
lst = list(x.KeypointsList)
|
|
lst.append(None)
|
|
lst = np.array(lst)
|
|
|
|
keypoints = lst[x.PersonwiseKeypoints[id_no].astype(int)[:18]]
|
|
return keypoints
|
|
|
|
for id_no in range(number_ids):
|
|
counter = 0
|
|
print('ID%i' % id_no)
|
|
col = 'ID%i_Keypoints' % id_no
|
|
|
|
df[col] = df.apply(keypoints_fun, axis=1)
|
|
|
|
"""Sort IDs to be consistent throughout video"""
|
|
lst = []
|
|
columns = [col for col in df.columns if 'ID' in col]
|
|
data = df[columns]
|
|
|
|
lst.append(data.iloc[0].values)
|
|
|
|
for i in range(1, len(df.Frame)):
|
|
row = data.iloc[i]
|
|
lst2 = []
|
|
for ids in range(number_ids):
|
|
keypoints = row['ID%i_Keypoints' % ids]
|
|
|
|
if keypoints is not None and keypoints[1] is not None:
|
|
for j in range(number_ids):
|
|
backtrack = 1
|
|
while lst[i - backtrack][j] == None:
|
|
backtrack = backtrack + 1
|
|
keypoints2 = lst[i - backtrack][j]
|
|
|
|
lst2.append([ids, j, np.linalg.norm(np.array(keypoints[1]) - np.array(keypoints2[1])), keypoints])
|
|
else:
|
|
lst2.append([ids, None, None, None])
|
|
dfX = pd.DataFrame(lst2)
|
|
dfX.columns = ["Id", "GtId", "Distance", "Keypoints"]
|
|
dfX = dfX.sort_values("Distance")
|
|
dfX = dfX.drop_duplicates("GtId").drop_duplicates("Id")
|
|
lstRow = []
|
|
for j in range(number_ids):
|
|
if (len(dfX[dfX.GtId == j]) > 0):
|
|
lstRow.append(dfX[dfX.GtId == j].iloc[0].Keypoints)
|
|
else:
|
|
lstRow.append(None)
|
|
lstRow.append(i)
|
|
lst.append(lstRow)
|
|
|
|
df_new = pd.DataFrame(lst)
|
|
columns = []
|
|
for i in range(number_ids):
|
|
columns.append('ID%i_Keypoints' % i)
|
|
columns.append("Frame")
|
|
df_new.columns = columns
|
|
|
|
# First frame number is NaN from sorting
|
|
df_new.Frame = df_new.Frame.fillna(0)
|
|
df_new = df_new.astype({'Frame': 'int32'})
|
|
|
|
path = "%s%s_BodyMovement.pkl" % (TMP_DIR, VIDEOOUT)
|
|
df_new.to_pickle(path)
|
|
print('Saved Body Movement to %s' % path)
|
|
|
|
visualize(image, df_new, 0)
|
|
|
|
|