conan/processing/process_OpenPose.py

#!/usr/bin/env python
# coding: utf-8

# # OpenPose pose detection
# 2D real-time multi-person keypoint detection:
# **18**-keypoint body/foot keypoint estimation. Running time invariant to number of detected people
# see https://github.com/CMU-Perceptual-Computing-Lab/openpose
#
# ## Pipeline
# - Run 18-keypoint model on video frames
# - Parse keypoints and PAFs to generate personwise keypoints
# - Save results to OpenPose.pkl

import os
import numpy as np
import cv2
import sys
from sys import platform
import time
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm, tqdm_pandas

tqdm.pandas()

from multiprocessing import cpu_count
from multiprocessing import Pool
import itertools
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["OPENCV_DNN_OPENCL_ALLOW_ALL_DEVICES"] = "1"

POSE_PAIRS = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13],
              [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 17], [5, 16]]
keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 'L-Elb', 'L-Wr', 'R-Hip',
                    'R-Knee', 'R-Ank', 'L-Hip', 'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44],
          [19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30],
          [47, 48], [49, 50], [53, 54], [51, 52], [55, 56],
          [37, 38], [45, 46]]

colors = np.array([(0, 100, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255), (0, 255, 255), (0, 100, 255),
                   (0, 255, 0), (255, 200, 100), (255, 0, 255), (0, 255, 0), (255, 200, 100), (255, 0, 255),
                   (0, 0, 255), (255, 0, 0), (200, 200, 0), (255, 0, 0), (200, 200, 0), (0, 0, 0)])

my_color = []
for c in colors:
    my_color.append(tuple(c))


# ## Auxiliary Functions
# see https://www.learnopencv.com/deep-learning-based-human-pose-estimation-using-opencv-cpp-python/
def getKeypoints(probMap, threshold=0.8):
    mapSmooth = cv2.GaussianBlur(probMap, (3, 3), 0, 0)
    mapMask = np.uint8(mapSmooth > threshold)
    keypoints = []
    # find the blobs
    contours, _ = cv2.findContours(mapMask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # for each blob find the maxima
    # not enough values to unpack (expected 3, got 2)
    # version issue: https://github.com/facebookresearch/maskrcnn-benchmark/issues/339
    for cnt in contours:
        blobMask = np.zeros(mapMask.shape)
        blobMask = cv2.fillConvexPoly(blobMask, cnt, 1)
        maskedProbMap = mapSmooth * blobMask
        _, maxVal, _, maxLoc = cv2.minMaxLoc(maskedProbMap)
        keypoints.append(maxLoc + (probMap[maxLoc[1], maxLoc[0]],))
    return keypoints


# Find valid connections between the different joints of a all persons present
def getValidPairs(output, detected_keypoints, frameWidth, frameHeight):
    valid_pairs = []
    invalid_pairs = []
    n_interp_samples = 10
    paf_score_th = 0.1
    conf_th = 0.7
    # loop for every POSE_PAIR
    for k in range(len(mapIdx)):
        # A->B constitute a limb
        pafA = output[mapIdx[k][0], :, :]
        pafB = output[mapIdx[k][1], :, :]
        pafA = cv2.resize(pafA, (frameWidth, frameHeight))
        pafB = cv2.resize(pafB, (frameWidth, frameHeight))
        # Find the keypoints for the first and second limb
        candA = detected_keypoints[POSE_PAIRS[k][0]]
        candB = detected_keypoints[POSE_PAIRS[k][1]]
        nA = len(candA)
        nB = len(candB)
        # If keypoints for the joint-pair is detected
        # check every joint in candA with every joint in candB
        # Calculate the distance vector between the two joints
        # Find the PAF values at a set of interpolated points between the joints
        # Use the above formula to compute a score to mark the connection valid
        if (nA != 0 and nB != 0):
            valid_pair = np.zeros((0, 3))
            for i in range(nA):
                max_j = -1
                maxScore = -1
                found = 0
                for j in range(nB):
                    # Find d_ij
                    d_ij = np.subtract(candB[j][:2], candA[i][:2])
                    norm = np.linalg.norm(d_ij)
                    if norm:
                        d_ij = d_ij / norm
                    else:
                        continue
                    # Find p(u)
                    interp_coord = list(zip(np.linspace(candA[i][0], candB[j][0], num=n_interp_samples),
                                            np.linspace(candA[i][1], candB[j][1], num=n_interp_samples)))
                    # Find L(p(u))
                    paf_interp = []
                    for k in range(len(interp_coord)):
                        paf_interp.append([pafA[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))],
                                           pafB[int(round(interp_coord[k][1])), int(round(interp_coord[k][0]))]])
                    # Find E
                    paf_scores = np.dot(paf_interp, d_ij)
                    avg_paf_score = sum(paf_scores) / len(paf_scores)
                    # Check if the connection is valid
                    # If the fraction of interpolated vectors aligned with PAF is higher then threshold -> Valid Pair
                    if (len(np.where(paf_scores > paf_score_th)[0]) / n_interp_samples) > conf_th:
                        if avg_paf_score > maxScore:
                            max_j = j
                            maxScore = avg_paf_score
                            found = 1
                # Append the connection to the list
                if found:
                    valid_pair = np.append(valid_pair, [[candA[i][3], candB[max_j][3], maxScore]], axis=0)
            # Append the detected connections to the global list
            valid_pairs.append(valid_pair)
        else:  # If no keypoints are detected
            # ATTENTION: Commented this out by Sven
            # print("No Connection : k = {}".format(k))
            invalid_pairs.append(k)
            valid_pairs.append([])
    return valid_pairs, invalid_pairs


# This function creates a list of keypoints belonging to each person
# For each detected valid pair, it assigns the joint(s) to a person
def getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list):
    # the last number in each row is the overall score
    personwiseKeypoints = -1 * np.ones((0, 19))
    for k in range(len(mapIdx)):
        if k not in invalid_pairs:
            partAs = valid_pairs[k][:, 0]
            partBs = valid_pairs[k][:, 1]
            indexA, indexB = np.array(POSE_PAIRS[k])
            for i in range(len(valid_pairs[k])):
                found = 0
                person_idx = -1
                for j in range(len(personwiseKeypoints)):
                    if personwiseKeypoints[j][indexA] == partAs[i]:
                        person_idx = j
                        found = 1
                        break
                if found:
                    personwiseKeypoints[person_idx][indexB] = partBs[i]
                    personwiseKeypoints[person_idx][-1] += keypoints_list[partBs[i].astype(int), 2] + valid_pairs[k][i][
                        2]
                # if find no partA in the subset, create a new subset
                elif not found and k < 17:
                    row = -1 * np.ones(19)
                    row[indexA] = partAs[i]
                    row[indexB] = partBs[i]
                    # add the keypoint_scores for the two keypoints and the paf_score
                    row[-1] = sum(keypoints_list[valid_pairs[k][i, :2].astype(int), 2]) + valid_pairs[k][i][2]
                    personwiseKeypoints = np.vstack([personwiseKeypoints, row])
    return personwiseKeypoints


def f(probMap):
    threshold = 0.5
    return getKeypoints(probMap, threshold)


def getPose(output):
    detected_keypoints = []
    keypoints_list = np.zeros((0, 3))
    keypoint_id = 0

    threshold = 0.5
    keypointsList = []

    for part in range(18):
        probMap = output[part, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        keypointsList.append(getKeypoints(probMap, threshold))

    for keypoints in keypointsList:  # nPoints = 18
        keypoints_with_id = []
        for i in range(len(keypoints)):
            keypoints_with_id.append(keypoints[i] + (keypoint_id,))
            keypoints_list = np.vstack([keypoints_list, keypoints[i]])
            keypoint_id += 1
        detected_keypoints.append(keypoints_with_id)

    valid_pairs, invalid_pairs = getValidPairs(output, detected_keypoints, frameWidth, frameHeight)
    personwiseKeypoints = getPersonwiseKeypoints(valid_pairs, invalid_pairs, keypoints_list)

    return detected_keypoints, keypoints_list, personwiseKeypoints


"""Forward array of 20 images"""


def getPoseFromDNN(net, images, frameWidth, frameHeight):
    inHeight = 368
    inWidth = int((inHeight / frameHeight) * frameWidth)
    inpBlob = cv2.dnn.blobFromImages(np.array(images), 1.0 / 255, (inWidth, inHeight), (0, 0, 0), swapRB=False,
                                     crop=False)
    # Set the prepared object as the input blob of the network
    net.setInput(inpBlob)

    output = net.forward()
    return output


def visualize(image, df, frame):
    number_ids = len([col for col in df.columns if 'ID' in col])
    data = df.loc[df['Frame'] == frame]
    plt.imshow(image)
    plt.axis('off')
    for id_no in range(number_ids):
        keypoints = df['ID%i_Keypoints' % id_no].iloc[frame]
        for i in range(len(POSE_PAIRS)):
            index = POSE_PAIRS[i]
            A, B = keypoints[index]
            # for idx in index:
            # print(keypointsMapping[idx])
            if A is not None and B is not None:
                plt.plot((A[0], B[0]), (A[1], B[1]), c=colors[i])

    plt.show()

def process(file):
    global get_keypoints

    POSE_PROTO_FILE = r"openpose/pose_deploy_linevec.prototxt"
    POSE_WEIGHTS_FILE = r"openpose/pose_iter_440000.caffemodel"

    if not os.path.exists(POSE_PROTO_FILE):
        print('WARNING: Could not find pose file %s' % POSE_PROTO_FILE)
        return
    if not os.path.exists(POSE_WEIGHTS_FILE):
        print('WARNING: Could not find model weights file %s' % POSE_WEIGHTS_FILE)
        return

    VIDEO = file
    ROOT = "/".join(VIDEO.split("/")[:-1]) + "/"
    VIDEOOUT = VIDEO.split("/")[-1].split(".")[0]
    TMP_DIR = "/".join(VIDEO.split("/")[:-2]) + "/temp/"
    FRAMES = "%s%s_frames" % (TMP_DIR, VIDEOOUT)

    if not os.path.exists(FRAMES):
        print('WARNING: Could not find frame directory')
        return

    # Load Model #
    net = cv2.dnn.readNetFromCaffe(POSE_PROTO_FILE, POSE_WEIGHTS_FILE)
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

    path_list = [f for f in os.listdir(FRAMES) if '.jpg' in f]
    path_list.sort()

    image = cv2.imread(os.path.join(FRAMES, path_list[0]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    frameWidth = image.shape[1]
    frameHeight = image.shape[0]

    lst = []
    images = []
    for frame, path in enumerate(tqdm(path_list)):
        image = cv2.imread(os.path.join(FRAMES, path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        images.append(image)

        if len(images) == 20:
            output = getPoseFromDNN(net, images, frameWidth, frameHeight)
            lst.extend(output)
            images = []

    if len(images) != 0:
        output = getPoseFromDNN(net, images, frameWidth, frameHeight)
        lst.extend(output)
        images = []

    print(len(lst))
    df = pd.DataFrame(range(len(lst)))
    df.columns = ["Frame"]
    df["Pose"] = lst

    path = "%s%s_OpenPose_raw.pkl" % (TMP_DIR, VIDEOOUT)
    df.to_pickle(path)

    def get_keypoints(frames):
        ret = []
        for f in frames:
            output = df[df.Frame == f].Pose.iloc[0]
            ret.append(getPose(output))
        return ret

    if platform == "linux" or platform == "linux2" or platform == "darwin":
        cores = max(4, cpu_count() - 4)
        print('Run sorting on {} cores'.format(cores))
        data_split = np.array_split(df.Frame, cores)
        pool = Pool(cores)
        data = pool.map(get_keypoints, data_split)
        pool.close()
        pool.join()
    elif platform == "win32":
        print('WARNING: Can\'t run multiprocessing on Windows, this might take a while...')
        data = get_keypoints(df.Frame)
    else:
        print('WARNING: platform not supported')

    x = np.vstack(data)
    df["DetectedKeypoints"] = x[:, 0]
    df["KeypointsList"] = x[:, 1]
    df["PersonwiseKeypoints"] = x[:, 2]
    df.head()

    path = "%s%s_OpenPose.pkl" % (TMP_DIR, VIDEOOUT)
    df.to_pickle(path)
    print("Saved OpenPose detections to %s" % path)

    del df["Pose"]
    del df['Pic']
    del df['DetectedKeypoints']

    number_ids = len(df.PersonwiseKeypoints.values.flatten()[0])
    print('Number of detected IDs: ', number_ids)

    """Map personwise keypoints to list of keypoints for each ID"""
    def keypoints_fun(x):
        # Discard frames where not all ids detected
        if len(x.PersonwiseKeypoints) < number_ids:
            # print('None')
            return None
            # index is -1 for no detection >> keypoint = None
        lst = list(x.KeypointsList)
        lst.append(None)
        lst = np.array(lst)

        keypoints = lst[x.PersonwiseKeypoints[id_no].astype(int)[:18]]
        return keypoints

    for id_no in range(number_ids):
        counter = 0
        print('ID%i' % id_no)
        col = 'ID%i_Keypoints' % id_no

        df[col] = df.apply(keypoints_fun, axis=1)

    """Sort IDs to be consistent throughout video"""
    lst = []
    columns = [col for col in df.columns if 'ID' in col]
    data = df[columns]

    lst.append(data.iloc[0].values)

    for i in range(1, len(df.Frame)):
        row = data.iloc[i]
        lst2 = []
        for ids in range(number_ids):
            keypoints = row['ID%i_Keypoints' % ids]

            if keypoints is not None and keypoints[1] is not None:
                for j in range(number_ids):
                    backtrack = 1
                    while lst[i - backtrack][j] == None:
                        backtrack = backtrack + 1
                    keypoints2 = lst[i - backtrack][j]

                    lst2.append([ids, j, np.linalg.norm(np.array(keypoints[1]) - np.array(keypoints2[1])), keypoints])
            else:
                lst2.append([ids, None, None, None])
        dfX = pd.DataFrame(lst2)
        dfX.columns = ["Id", "GtId", "Distance", "Keypoints"]
        dfX = dfX.sort_values("Distance")
        dfX = dfX.drop_duplicates("GtId").drop_duplicates("Id")
        lstRow = []
        for j in range(number_ids):
            if (len(dfX[dfX.GtId == j]) > 0):
                lstRow.append(dfX[dfX.GtId == j].iloc[0].Keypoints)
            else:
                lstRow.append(None)
        lstRow.append(i)
        lst.append(lstRow)

    df_new = pd.DataFrame(lst)
    columns = []
    for i in range(number_ids):
        columns.append('ID%i_Keypoints' % i)
    columns.append("Frame")
    df_new.columns = columns

    # First frame number is NaN from sorting
    df_new.Frame = df_new.Frame.fillna(0)
    df_new = df_new.astype({'Frame': 'int32'})

    path = "%s%s_BodyMovement.pkl" % (TMP_DIR, VIDEOOUT)
    df_new.to_pickle(path)
    print('Saved Body Movement to %s' % path)

    visualize(image, df_new, 0)