conan/processing/process_OpenPose.py

413 lines
15 KiB
Python

#!/usr/bin/env python
# coding: utf-8
# # OpenPose pose detection
# 2D real-time multi-person keypoint detection:
# **18**-keypoint body/foot keypoint estimation. Running time invariant to number of detected people
# see https://github.com/CMU-Perceptual-Computing-Lab/openpose
#
# ## Pipeline
# - Run 18-keypoint model on video frames
# - Parse keypoints and PAFs to generate personwise keypoints
# - Save results to OpenPose.pkl
import os
import numpy as np
import cv2
import sys
from sys import platform
import time
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm, tqdm_pandas
tqdm.pandas()
from multiprocessing import cpu_count
from multiprocessing import Pool
import itertools
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES"] = "1"
POSE_PAIRS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
[1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 17], [5, 16]]
keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip',
'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
[19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
[47, 48], [49, 50], [53, 54], [51, 52], [55, 56],
[37, 38], [45, 46]]
colors = np.array([(0, 100, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255),
(0, 255, 0), (255, 200, 100), (255, 0, 255), (0, 255, 0), (255, 200, 100), (255, 0, 255),
(0, 0, 255), (255, 0, 0), (200, 200, 0), (255, 0, 0), (200, 200, 0), (0, 0, 0)])
my_color = []
for c in colors:
my_color.append(tuple(c))
# ## Auxiliary Functions
# see https://www.learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/
def getKeypoints(probMap, threshold=0.8):
mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0)
mapMask = np.uint8(mapSmooth > threshold)
keypoints = []
# find the blobs
contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# for each blob find the maxima
# not enough values to unpack (expected 3, got 2)
# version issue: https://github.com/facebookresearch/maskrcnn-benchmark/issues/339
for cnt in contours:
blobMask = np.zeros(mapMask.shape)
blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
maskedProbMap = mapSmooth * blobMask
_, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],))
return keypoints
# Find valid connections between the different joints of a all persons present
def getValidPairs(output, detected_keypoints, frameWidth, frameHeight):
valid_pairs = []
invalid_pairs = []
n_interp_samples = 10
paf_score_th = 0.1
conf_th = 0.7
# loop for every POSE_PAIR
for k in range(len(mapIdx)):
# A->B constitute a limb
pafA = output[mapIdx[k][0], :, :]
pafB = output[mapIdx[k][1], :, :]
pafA = cv2.resize(pafA, (frameWidth, frameHeight))
pafB = cv2.resize(pafB, (frameWidth, frameHeight))
# Find the keypoints for the first and second limb
candA = detected_keypoints[POSE_PAIRS[k][0]]
candB = detected_keypoints[POSE_PAIRS[k][1]]
nA = len(candA)
nB = len(candB)
# If keypoints for the joint-pair is detected
# check every joint in candA with every joint in candB
# Calculate the distance vector between the two joints
# Find the PAF values at a set of interpolated points between the joints
# Use the above formula to compute a score to mark the connection valid
if (nA != 0 and nB != 0):
valid_pair = np.zeros((0, 3))
for i in range(nA):
max_j = -1
maxScore = -1
found = 0
for j in range(nB):
# Find d_ij
d_ij = np.subtract(candB[j][:2], candA[i][:2])
norm = np.linalg.norm(d_ij)
if norm:
d_ij = d_ij / norm
else:
continue
# Find p(u)
interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples),
np.linspace(candA[i][1], candB[j][1], num=n_interp_samples)))
# Find L(p(u))
paf_interp = []
for k in range(len(interp_coord)):
paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))],
pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))]])
# Find E
paf_scores = np.dot(paf_interp, d_ij)
avg_paf_score = sum(paf_scores) / len(paf_scores)
# Check if the connection is valid
# If the fraction of interpolated vectors aligned with PAF is higher then threshold -> Valid Pair
if (len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples) > conf_th:
if avg_paf_score > maxScore:
max_j = j
maxScore = avg_paf_score
found = 1
# Append the connection to the list
if found:
valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0)
# Append the detected connections to the global list
valid_pairs.append(valid_pair)
else: # If no keypoints are detected
# ATTENTION: Commented this out by Sven
# print("No Connection : k = {}".format(k))
invalid_pairs.append(k)
valid_pairs.append([])
return valid_pairs, invalid_pairs
# This function creates a list of keypoints belonging to each person
# For each detected valid pair, it assigns the joint(s) to a person
def getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list):
# the last number in each row is the overall score
personwiseKeypoints = -1 * np.ones((0, 19))
for k in range(len(mapIdx)):
if k not in invalid_pairs:
partAs = valid_pairs[k][:, 0]
partBs = valid_pairs[k][:, 1]
indexA, indexB = np.array(POSE_PAIRS[k])
for i in range(len(valid_pairs[k])):
found = 0
person_idx = -1
for j in range(len(personwiseKeypoints)):
if personwiseKeypoints[j][indexA] == partAs[i]:
person_idx = j
found = 1
break
if found:
personwiseKeypoints[person_idx][indexB] = partBs[i]
personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][
2]
# if find no partA in the subset, create a new subset
elif not found and k < 17:
row = -1 * np.ones(19)
row[indexA] = partAs[i]
row[indexB] = partBs[i]
# add the keypoint_scores for the two keypoints and the paf_score
row[-1] = sum(keypoints_list[valid_pairs[k][i, :2].astype(int), 2]) + valid_pairs[k][i][2]
personwiseKeypoints = np.vstack([personwiseKeypoints, row])
return personwiseKeypoints
def f(probMap):
threshold = 0.5
return getKeypoints(probMap, threshold)
def getPose(output):
detected_keypoints = []
keypoints_list = np.zeros((0, 3))
keypoint_id = 0
threshold = 0.5
keypointsList = []
for part in range(18):
probMap = output[part, :, :]
probMap = cv2.resize(probMap, (frameWidth, frameHeight))
keypointsList.append(getKeypoints(probMap, threshold))
for keypoints in keypointsList: # nPoints = 18
keypoints_with_id = []
for i in range(len(keypoints)):
keypoints_with_id.append(keypoints[i] + (keypoint_id,))
keypoints_list = np.vstack([keypoints_list, keypoints[i]])
keypoint_id += 1
detected_keypoints.append(keypoints_with_id)
valid_pairs, invalid_pairs = getValidPairs(output, detected_keypoints, frameWidth, frameHeight)
personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list)
return detected_keypoints, keypoints_list, personwiseKeypoints
"""Forward array of 20 images"""
def getPoseFromDNN(net, images, frameWidth, frameHeight):
inHeight = 368
inWidth = int((inHeight / frameHeight) * frameWidth)
inpBlob = cv2.dnn.blobFromImages(np.array(images), 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False,
crop=False)
# Set the prepared object as the input blob of the network
net.setInput(inpBlob)
output = net.forward()
return output
def visualize(image, df, frame):
number_ids = len([col for col in df.columns if 'ID' in col])
data = df.loc[df['Frame'] == frame]
plt.imshow(image)
plt.axis('off')
for id_no in range(number_ids):
keypoints = df['ID%i_Keypoints' % id_no].iloc[frame]
for i in range(len(POSE_PAIRS)):
index = POSE_PAIRS[i]
A, B = keypoints[index]
# for idx in index:
# print(keypointsMapping[idx])
if A is not None and B is not None:
plt.plot((A[0], B[0]), (A[1], B[1]), c=colors[i])
plt.show()
def process(file):
global get_keypoints
POSE_PROTO_FILE = r"openpose/pose_deploy_linevec.prototxt"
POSE_WEIGHTS_FILE = r"openpose/pose_iter_440000.caffemodel"
if not os.path.exists(POSE_PROTO_FILE):
print('WARNING: Could not find pose file %s' % POSE_PROTO_FILE)
return
if not os.path.exists(POSE_WEIGHTS_FILE):
print('WARNING: Could not find model weights file %s' % POSE_WEIGHTS_FILE)
return
VIDEO = file
ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)
if not os.path.exists(FRAMES):
print('WARNING: Could not find frame directory')
return
# Load Model #
net = cv2.dnn.readNetFromCaffe(POSE_PROTO_FILE, POSE_WEIGHTS_FILE)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
path_list.sort()
image = cv2.imread(os.path.join(FRAMES, path_list[0]))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
frameWidth = image.shape[1]
frameHeight = image.shape[0]
lst = []
images = []
for frame, path in enumerate(tqdm(path_list)):
image = cv2.imread(os.path.join(FRAMES, path))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
images.append(image)
if len(images) == 20:
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
lst.extend(output)
images = []
if len(images) != 0:
output = getPoseFromDNN(net, images, frameWidth, frameHeight)
lst.extend(output)
images = []
print(len(lst))
df = pd.DataFrame(range(len(lst)))
df.columns = ["Frame"]
df["Pose"] = lst
path = "%s%s_OpenPose_raw.pkl" % (TMP_DIR, VIDEOOUT)
df.to_pickle(path)
def get_keypoints(frames):
ret = []
for f in frames:
output = df[df.Frame == f].Pose.iloc[0]
ret.append(getPose(output))
return ret
if platform == "linux" or platform == "linux2" or platform == "darwin":
cores = max(4, cpu_count() - 4)
print('Run sorting on {} cores'.format(cores))
data_split = np.array_split(df.Frame, cores)
pool = Pool(cores)
data = pool.map(get_keypoints, data_split)
pool.close()
pool.join()
elif platform == "win32":
print('WARNING: Can\'t run multiprocessing on Windows, this might take a while...')
data = get_keypoints(df.Frame)
else:
print('WARNING: platform not supported')
x = np.vstack(data)
df["DetectedKeypoints"] = x[:, 0]
df["KeypointsList"] = x[:, 1]
df["PersonwiseKeypoints"] = x[:, 2]
df.head()
path = "%s%s_OpenPose.pkl" % (TMP_DIR, VIDEOOUT)
df.to_pickle(path)
print("Saved OpenPose detections to %s" % path)
del df["Pose"]
del df['Pic']
del df['DetectedKeypoints']
number_ids = len(df.PersonwiseKeypoints.values.flatten()[0])
print('Number of detected IDs: ', number_ids)
"""Map personwise keypoints to list of keypoints for each ID"""
def keypoints_fun(x):
# Discard frames where not all ids detected
if len(x.PersonwiseKeypoints) < number_ids:
# print('None')
return None
# index is -1 for no detection >> keypoint = None
lst = list(x.KeypointsList)
lst.append(None)
lst = np.array(lst)
keypoints = lst[x.PersonwiseKeypoints[id_no].astype(int)[:18]]
return keypoints
for id_no in range(number_ids):
counter = 0
print('ID%i' % id_no)
col = 'ID%i_Keypoints' % id_no
df[col] = df.apply(keypoints_fun, axis=1)
"""Sort IDs to be consistent throughout video"""
lst = []
columns = [col for col in df.columns if 'ID' in col]
data = df[columns]
lst.append(data.iloc[0].values)
for i in range(1, len(df.Frame)):
row = data.iloc[i]
lst2 = []
for ids in range(number_ids):
keypoints = row['ID%i_Keypoints' % ids]
if keypoints is not None and keypoints[1] is not None:
for j in range(number_ids):
backtrack = 1
while lst[i - backtrack][j] == None:
backtrack = backtrack + 1
keypoints2 = lst[i - backtrack][j]
lst2.append([ids, j, np.linalg.norm(np.array(keypoints[1]) - np.array(keypoints2[1])), keypoints])
else:
lst2.append([ids, None, None, None])
dfX = pd.DataFrame(lst2)
dfX.columns = ["Id", "GtId", "Distance", "Keypoints"]
dfX = dfX.sort_values("Distance")
dfX = dfX.drop_duplicates("GtId").drop_duplicates("Id")
lstRow = []
for j in range(number_ids):
if (len(dfX[dfX.GtId == j]) > 0):
lstRow.append(dfX[dfX.GtId == j].iloc[0].Keypoints)
else:
lstRow.append(None)
lstRow.append(i)
lst.append(lstRow)
df_new = pd.DataFrame(lst)
columns = []
for i in range(number_ids):
columns.append('ID%i_Keypoints' % i)
columns.append("Frame")
df_new.columns = columns
# First frame number is NaN from sorting
df_new.Frame = df_new.Frame.fillna(0)
df_new = df_new.astype({'Frame': 'int32'})
path = "%s%s_BodyMovement.pkl" % (TMP_DIR, VIDEOOUT)
df_new.to_pickle(path)
print('Saved Body Movement to %s' % path)
visualize(image, df_new, 0)