{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing for LSTM: Blobdetection and Cutting" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "from scipy.odr import *\n", "from scipy.stats import *\n", "import numpy as np\n", "import pandas as pd\n", "import os\n", "import time\n", "import matplotlib.pyplot as plt\n", "import ast\n", "from multiprocessing import Pool, cpu_count\n", "\n", "import scipy\n", "\n", "from IPython import display\n", "from matplotlib.patches import Rectangle\n", "\n", "from sklearn.metrics import mean_squared_error\n", "import json\n", "\n", "import scipy.stats as st\n", "from sklearn.metrics import r2_score\n", "\n", "\n", "from matplotlib import cm\n", "from mpl_toolkits.mplot3d import axes3d\n", "import matplotlib.pyplot as plt\n", "\n", "import copy\n", "\n", "from sklearn.model_selection import LeaveOneOut, LeavePOut\n", "\n", "from multiprocessing import Pool\n", "import cv2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df_filtered = pd.read_pickle(\"DataStudyCollection/df_lstm.pkl\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTimestampCurrent_TaskTask_amountTaskIDVersionIDRepetitionIDActual_DataIs_PauseImageIsMaxMaxRepetition
29198011,54515E+1233680020TrueFalse[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...True0
29198111,54515E+1233680020TrueFalse[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...True0
29198211,54515E+1233680020TrueFalse[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...True0
29198311,54515E+1233680020TrueFalse[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...True0
29198411,54515E+1233680020TrueFalse[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...True0
\n", "
" ], "text/plain": [ " userID Timestamp Current_Task Task_amount TaskID VersionID \\\n", "291980 1 1,54515E+12 33 680 0 2 \n", "291981 1 1,54515E+12 33 680 0 2 \n", "291982 1 1,54515E+12 33 680 0 2 \n", "291983 1 1,54515E+12 33 680 0 2 \n", "291984 1 1,54515E+12 33 680 0 2 \n", "\n", " RepetitionID Actual_Data Is_Pause \\\n", "291980 0 True False \n", "291981 0 True False \n", "291982 0 True False \n", "291983 0 True False \n", "291984 0 True False \n", "\n", " Image IsMax \\\n", "291980 [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ... True \n", "291981 [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ... True \n", "291982 [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ... True \n", "291983 [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ... True \n", "291984 [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ... True \n", "\n", " MaxRepetition \n", "291980 0 \n", "291981 0 \n", "291982 0 \n", "291983 0 \n", "291984 0 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_filtered.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df_filtered.Image = df_filtered.Image.apply(lambda x: x.reshape(27, 15))\n", "df_filtered.Image = df_filtered.Image.apply(lambda x: x.clip(min=0, max=255))\n", "df_filtered.Image = df_filtered.Image.apply(lambda x: x.astype(np.uint8))\n", "df_filtered[\"ImageSum\"] = df_filtered.Image.apply(lambda x: np.sum(x))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#LSTMs new Blob detection (only detect, if there are blobs)\n", "def detect_blobs(image):\n", " #image = image.reshape(27, 15)\n", " large = np.ones((29,17), dtype=np.uint8)\n", " large[1:28,1:16] = image\n", " temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY)\n", " contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n", " contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n", " lstBlob = []\n", " lstMin = []\n", " lstMax = []\n", " count = 0\n", " return len(contours) > 0" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 3.42 s, sys: 1.14 s, total: 4.57 s\n", "Wall time: 4.94 s\n" ] } ], "source": [ "%%time\n", "pool = Pool(cpu_count() - 1)\n", "temp_blobs = pool.map(detect_blobs, df_filtered.Image)\n", "pool.close()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df_filtered[\"ContainsBlobs\"] = temp_blobs" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#Label if knuckle or finger\n", "def f(row):\n", " if row['TaskID'] < 17:\n", " #val = \"Knuckle\"\n", " val = 0\n", " elif row['TaskID'] >= 17:\n", " #val = \"Finger\"\n", " val = 1\n", " return val\n", "df_filtered['InputMethod'] = df_filtered.apply(f, axis=1)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "df_filtered.index = range(len(df_filtered))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "10\n", "11\n", "12\n", "13\n", "14\n", "15\n", "16\n", "17\n", "18\n", "CPU times: user 4min 7s, sys: 424 ms, total: 4min 8s\n", "Wall time: 4min 8s\n" ] } ], "source": [ "%%time\n", "# trim image sequences down to only between first and last detected blob\n", "UserIDs = []\n", "TaskIDs = []\n", "VersionIDs = []\n", "Blobs = []\n", "for userID in df_filtered.userID.unique():\n", " print(userID)\n", " for TaskID in df_filtered[df_filtered.userID == userID].TaskID.unique():\n", " for VersionID in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID)].VersionID.unique():\n", " first_blob = -1\n", " last_blob = -1\n", " for index, row in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID)].iterrows():\n", " if row.ContainsBlobs:\n", " last_blob = index\n", " if first_blob == -1:\n", " first_blob = index\n", " if first_blob >= 0 and last_blob >= 0:\n", " UserIDs.append(userID)\n", " TaskIDs.append(TaskID)\n", " VersionIDs.append(VersionID)\n", " Blobs.append(df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID) & (df_filtered.index >= first_blob) & (df_filtered.index <= last_blob)].Image.tolist())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "UserIDs = np.array(UserIDs, dtype=np.int64)\n", "TaskIDs = np.array(TaskIDs, dtype=np.int64)\n", "VersionIDs = np.array(VersionIDs, dtype=np.int64)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTaskIDVersionIDBlobs
0103[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
1105[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
2106[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,...
3107[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
4108[[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0...
\n", "
" ], "text/plain": [ " userID TaskID VersionID \\\n", "0 1 0 3 \n", "1 1 0 5 \n", "2 1 0 6 \n", "3 1 0 7 \n", "4 1 0 8 \n", "\n", " Blobs \n", "0 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... \n", "1 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... \n", "2 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,... \n", "3 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... \n", "4 [[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0... " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all = pd.DataFrame()\n", "df_lstm_all[\"userID\"] = UserIDs\n", "df_lstm_all[\"TaskID\"] = TaskIDs\n", "df_lstm_all[\"VersionID\"] = VersionIDs\n", "df_lstm_all[\"Blobs\"] = Blobs\n", "df_lstm_all.Blobs = df_lstm_all.Blobs.map(np.array)\n", "df_lstm_all.head()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTaskIDVersionIDBlobsBlobCountGestureOnly
0103[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...380
1105[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...570
2106[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,...410
3107[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...200
4108[[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0...410
\n", "
" ], "text/plain": [ " userID TaskID VersionID \\\n", "0 1 0 3 \n", "1 1 0 5 \n", "2 1 0 6 \n", "3 1 0 7 \n", "4 1 0 8 \n", "\n", " Blobs BlobCount GestureOnly \n", "0 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 38 0 \n", "1 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 57 0 \n", "2 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,... 41 0 \n", "3 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 20 0 \n", "4 [[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0... 41 0 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.head()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "df_lstm_all[\"Length\"] = df_lstm_all.Blobs.apply(lambda x: x.shape[0])" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEP1JREFUeJzt3XuMXOV5x/HvE5ubSBpDiFaWbXVNsRQ5oiVoRYgSRVtQwEBVU4lEjlBxIkuWWqImElVrGqnkhgSVCE2iXOQGK06EApQkMgpU1AVGUf8AAuFiDCJsEkd45WAlBidLFNqlT/+Yd+l0s+ud2Z3dubzfj7TyOe9558zz7Fn7t+fMmXFkJpKk+ryp1wVIknrDAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVanWvCziRs846K0dHRxf9+FdffZXTTz+9ewX1yLD0AfbSj4alD7CXGY8//vgvM/PtC83r6wAYHR3lscceW/TjG40G4+Pj3SuoR4alD7CXfjQsfYC9zIiIn7czz0tAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUqb5+J/BKGd11b1vzDt10xTJXIkkrxzMASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkirVdgBExKqIeCIivl/WN0bEIxExERF3RsTJZfyUsj5Rto+27OP6Mv58RFza7WYkSe3r5Azg48BzLes3A7dm5jnAy8COMr4DeLmM31rmERGbgW3AO4EtwFciYtXSypckLVZbARAR64ErgK+X9QAuAu4uU/YCV5blrWWdsv3iMn8rcEdmvpaZPwMmgAu60YQkqXOr25z3z8DfAW8p628DXsnM6bJ+GFhXltcBLwJk5nREHC/z1wEPt+yz9TFviIidwE6AkZERGo1Gu738nqmpqbYef9250wvOAZZUy1K028cgsJf+Myx9gL10asEAiIg/A45m5uMRMb6s1QCZuRvYDTA2Npbj44t/ykajQTuP/8iue9va36GrF1/LUrTbxyCwl/4zLH2AvXSqnTOA9wJ/HhGXA6cCfwB8AVgTEavLWcB6YLLMnwQ2AIcjYjXwVuBXLeMzWh8jSVphC74GkJnXZ+b6zByl+SLug5l5NfAQcFWZth3YV5bvKeuU7Q9mZpbxbeUuoY3AJuDRrnUiSepIu68BzOXvgTsi4nPAE8BtZfw24FsRMQEcoxkaZObBiLgLeBaYBq7NzNeX8PySpCXoKAAyswE0yvJPmeMunsz8HfDBeR5/I3Bjp0VKkrrPdwJLUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqtbrXBSynA5PH+ciue3tdhiT1Jc8AJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkSi0YABFxakQ8GhFPRcTBiPh0Gd8YEY9ExERE3BkRJ5fxU8r6RNk+2rKv68v48xFx6XI1JUlaWDtnAK8BF2XmnwDnAVsi4kLgZuDWzDwHeBnYUebvAF4u47eWeUTEZmAb8E5gC/CViFjVzWYkSe1bMACyaaqsnlS+ErgIuLuM7wWuLMtbyzpl+8UREWX8jsx8LTN/BkwAF3SlC0lSx9r6OOjym/rjwDnAl4GfAK9k5nSZchhYV5bXAS8CZOZ0RBwH3lbGH27ZbetjWp9rJ7ATYGRkhEaj0VlHLUZOg+vOnV54YpuWUstSTE1N9ey5u81e+s+w9AH20qm2AiAzXwfOi4g1wPeAdyxXQZm5G9gNMDY2luPj44ve15du38ctB7r3Xx4cunrxtSxFo9FgKd+HfmIv/WdY+gB76VRHdwFl5ivAQ8B7gDURMfOv63pgsixPAhsAyva3Ar9qHZ/jMZKkFdbOXUBvL7/5ExGnAR8AnqMZBFeVaduBfWX5nrJO2f5gZmYZ31buEtoIbAIe7VYjkqTOtHN9ZC2wt7wO8Cbgrsz8fkQ8C9wREZ8DngBuK/NvA74VERPAMZp3/pCZByPiLuBZYBq4tlxakiT1wIIBkJlPA++aY/ynzHEXT2b+DvjgPPu6Ebix8zIlSd3mO4ElqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKLRgAEbEhIh6KiGcj4mBEfLyMnxkR+yPihfLnGWU8IuKLETEREU9HxPkt+9pe5r8QEduXry1J0kLaOQOYBq7LzM3AhcC1EbEZ2AU8kJmbgAfKOsBlwKbytRP4KjQDA7gBeDdwAXDDTGhIklbeggGQmUcy80dl+TfAc8A6YCuwt0zbC1xZlrcC38ymh4E1EbEWuBTYn5nHMvNlYD+wpavdSJLa1tFrABExCrwLeAQYycwjZdMvgJGyvA54seVhh8vYfOOSpB5Y3e7EiHgz8B3gE5n564h4Y1tmZkRkNwqKiJ00Lx0xMjJCo9FY9L5GToPrzp3uRlkAS6plKaampnr23N1mL/1nWPoAe+lUWwEQESfR/Mf/9sz8bhl+KSLWZuaRconnaBmfBDa0PHx9GZsExmeNN2Y/V2buBnYDjI2N5fj4+OwpbfvS7fu45UDbGbegQ1cvvpalaDQaLOX70E/spf8MSx9gL51q5y6gAG4DnsvMz7dsugeYuZNnO7CvZfyacjfQhcDxcqnofuCSiDijvPh7SRmTJPVAO78evxf4S+BARDxZxv4BuAm4KyJ2AD8HPlS23QdcDkwAvwU+CpCZxyLis8APy7zPZOaxrnQhSerYggGQmf8JxDybL55jfgLXzrOvPcCeTgqUJC0P3wksSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpbr3/yVWYHTXvW3NO3TTFctciSQtnWcAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmq1IIBEBF7IuJoRDzTMnZmROyPiBfKn2eU8YiIL0bEREQ8HRHntzxme5n/QkRsX552JEntaucM4BvAlllju4AHMnMT8EBZB7gM2FS+dgJfhWZgADcA7wYuAG6YCQ1JUm8sGACZ+QPg2KzhrcDesrwXuLJl/JvZ9DCwJiLWApcC+zPzWGa+DOzn90NFkrSCFvsawEhmHinLvwBGyvI64MWWeYfL2HzjkqQeWb3UHWRmRkR2oxiAiNhJ8/IRIyMjNBqNRe9r5DS47tzpLlXWvqXUPJepqamu77NX7KX/DEsfYC+dWmwAvBQRazPzSLnEc7SMTwIbWuatL2OTwPis8cZcO87M3cBugLGxsRwfH59rWlu+dPs+bjmw5Izr2KGrx7u6v0ajwVK+D/3EXvrPsPQB9tKpxV4CugeYuZNnO7CvZfyacjfQhcDxcqnofuCSiDijvPh7SRmTJPXIgr8eR8S3af72flZEHKZ5N89NwF0RsQP4OfChMv0+4HJgAvgt8FGAzDwWEZ8FfljmfSYzZ7+wLElaQQsGQGZ+eJ5NF88xN4Fr59nPHmBPR9VJkpaN7wSWpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKrXg/wmszo3uureteYduumKZK5Gk+XkGIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUbwTroXbfMPaNLacvcyWSauQZgCRVygCQpEoZAJJUKQNAkirli8AD4MDkcT7SxgvGfrqopE54BiBJlTIAJKlSK34JKCK2AF8AVgFfz8ybVrqGYdXu+wrAy0WSVvgMICJWAV8GLgM2Ax+OiM0rWYMkqWmlzwAuACYy86cAEXEHsBV4doXrqJ7/baWklQ6AdcCLLeuHgXevcA3qQCeXldrhx1pI/aPvbgONiJ3AzrI6FRHPL2F3ZwG/XHpVvfU3Q9IHwJ/ePDy9MDzHZVj6AHuZ8YftTFrpAJgENrSsry9jb8jM3cDubjxZRDyWmWPd2FcvDUsfYC/9aFj6AHvp1ErfBvpDYFNEbIyIk4FtwD0rXIMkiRU+A8jM6Yj4GHA/zdtA92TmwZWsQZLUtOKvAWTmfcB9K/R0XbmU1AeGpQ+wl340LH2AvXQkMnO5n0OS1If8KAhJqtRQBkBEbImI5yNiIiJ29bqeTkXEoYg4EBFPRsRjZezMiNgfES+UP8/odZ1ziYg9EXE0Ip5pGZuz9mj6YjlOT0fE+b2r/P+bp49PRcRkOS5PRsTlLduuL308HxGX9qbquUXEhoh4KCKejYiDEfHxMj5Qx+UEfQzccYmIUyPi0Yh4qvTy6TK+MSIeKTXfWW6WISJOKesTZftoVwrJzKH6ovni8k+As4GTgaeAzb2uq8MeDgFnzRr7J2BXWd4F3NzrOuep/f3A+cAzC9UOXA78GxDAhcAjva5/gT4+BfztHHM3l5+zU4CN5edvVa97aKlvLXB+WX4L8ONS80AdlxP0MXDHpXxv31yWTwIeKd/ru4BtZfxrwF+V5b8GvlaWtwF3dqOOYTwDeOPjJjLzv4CZj5sYdFuBvWV5L3BlD2uZV2b+ADg2a3i+2rcC38ymh4E1EbF2ZSo9sXn6mM9W4I7MfC0zfwZM0Pw57AuZeSQzf1SWfwM8R/Nd+QN1XE7Qx3z69riU7+1UWT2pfCVwEXB3GZ99TGaO1d3AxRERS61jGANgro+bONEPST9K4N8j4vHyzmiAkcw8UpZ/AYz0prRFma/2QTxWHyuXRfa0XIYbmD7KpYN30fyNc2CPy6w+YACPS0SsiogngaPAfppnKK9k5nSZ0lrvG72U7ceBty21hmEMgGHwvsw8n+anpl4bEe9v3ZjN88CBvH1rkGsHvgr8EXAecAS4pbfldCYi3gx8B/hEZv66ddsgHZc5+hjI45KZr2fmeTQ/EeEC4B0rXcMwBsCCHzfR7zJzsvx5FPgezR+Ol2ZOw8ufR3tXYcfmq32gjlVmvlT+0v4P8C/83+WEvu8jIk6i+Y/m7Zn53TI8cMdlrj4G+bgAZOYrwEPAe2hebpt5f1ZrvW/0Ura/FfjVUp97GANgoD9uIiJOj4i3zCwDlwDP0Oxhe5m2HdjXmwoXZb7a7wGuKXedXAgcb7kk0XdmXQf/C5rHBZp9bCt3amwENgGPrnR98ynXim8DnsvMz7dsGqjjMl8fg3hcIuLtEbGmLJ8GfIDmaxoPAVeVabOPycyxugp4sJy1LU2vXw1fji+adzH8mOY1tU/2up4Oaz+b5p0LTwEHZ+qneb3vAeAF4D+AM3td6zz1f5vmafh/07yGuWO+2mneCfHlcpwOAGO9rn+BPr5V6ny6/IVc2zL/k6WP54HLel3/rF7eR/PyztPAk+Xr8kE7LifoY+COC/DHwBOl5meAfyzjZ9MMqQngX4FTyvipZX2ibD+7G3X4TmBJqtQwXgKSJLXBAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVL/C2+FhSKKT6n/AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df_lstm_all.Length.hist(range=(0,300), bins=30)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.02870949403069926" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_lstm_all[df_lstm_all.Length > 50]) / len(df_lstm_all)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 10554.0\n", "mean 15.9\n", "std 13.6\n", "min 1.0\n", "25% 8.0\n", "50% 13.0\n", "75% 19.0\n", "max 301.0\n", "Name: Length, dtype: float64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.Length.describe().round(1)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "lengths = []\n", "for index, row in df_lstm_all.iterrows():\n", " lengths.append(row.Blobs.shape[0])\n", "df_lstm_all[\"BlobCount\"] = lengths\n", "# add a column for pure gesture recognition without finger/knuckle\n", "df_lstm_all[\"GestureOnly\"] = df_lstm_all.TaskID % 17" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 10554.000000\n", "mean 15.906576\n", "std 13.605214\n", "min 1.000000\n", "25% 8.000000\n", "50% 13.000000\n", "75% 19.000000\n", "max 301.000000\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
GestureOnly
011.4214298.940925
113.61868313.864708
28.8525966.315931
38.6729135.580500
49.8287676.793559
59.2112216.861675
614.6224968.338379
713.68452413.263753
820.39712912.916920
914.46859910.042060
1014.9214408.909217
1113.6955787.661549
1217.07085311.755087
1315.71221910.545010
1416.4683549.826818
1519.84083611.239255
1642.93162421.024635
\n", "
" ], "text/plain": [ " mean std\n", "GestureOnly \n", "0 11.421429 8.940925\n", "1 13.618683 13.864708\n", "2 8.852596 6.315931\n", "3 8.672913 5.580500\n", "4 9.828767 6.793559\n", "5 9.211221 6.861675\n", "6 14.622496 8.338379\n", "7 13.684524 13.263753\n", "8 20.397129 12.916920\n", "9 14.468599 10.042060\n", "10 14.921440 8.909217\n", "11 13.695578 7.661549\n", "12 17.070853 11.755087\n", "13 15.712219 10.545010\n", "14 16.468354 9.826818\n", "15 19.840836 11.239255\n", "16 42.931624 21.024635" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.groupby(df_lstm_all.GestureOnly)[\"BlobCount\"].agg([\"mean\", \"std\"])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "before: 10554\n", "after: 9193\n", "ratio: 12.895584612469206\n" ] } ], "source": [ "# filter on gesture lengths\n", "print(\"before: %s\" % len(df_lstm_all))\n", "df_lstm = df_lstm_all[(df_lstm_all.BlobCount <= 100) & (df_lstm_all.BlobCount >= 5)]\n", "print(\"after: %s\" % len(df_lstm))\n", "print(\"ratio: %s\" % ((len(df_lstm_all) - len(df_lstm)) / len(df_lstm_all) * 100))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 9193.000000\n", "mean 17.678995\n", "std 12.059369\n", "min 5.000000\n", "25% 10.000000\n", "50% 15.000000\n", "75% 20.000000\n", "max 97.000000\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "lengths = []\n", "for index, row in df_lstm.iterrows():\n", " lengths.append(row.Blobs.shape[0])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "df_lstm[\"BlobCount\"] = lengths" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 9193.000000\n", "mean 17.678995\n", "std 12.059369\n", "min 5.000000\n", "25% 10.000000\n", "50% 15.000000\n", "75% 20.000000\n", "max 97.000000\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "def lerp(a, b, c=0.5):\n", " return c * b + (1.0 - c) * a\n", "\n", "#Svens new Blob detection\n", "def detect_blobs_return_old(image, task):\n", " #image = e.Image\n", " large = np.ones((29,17), dtype=np.uint8)\n", " large[1:28,1:16] = np.copy(image)\n", " temp, thresh = cv2.threshold(cv2.bitwise_not(large), 205, 255, cv2.THRESH_BINARY)\n", " contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n", " contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n", " lstBlob = []\n", " lstCenter = []\n", " lstMin = []\n", " lstMax = []\n", " count = 0\n", " contours.sort(key=lambda a: cv2.contourArea(a))\n", " if len(contours) > 0:\n", " # if two finger or knuckle\n", " cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1\n", " for i in range(1, cont_count + 1):\n", " max_contour = contours[-1 * i]\n", " xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0)\n", " xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0)\n", " M = cv2.moments(max_contour)\n", " cX = int(M[\"m10\"] / M[\"m00\"]) - 1\n", " cY = int(M[\"m01\"] / M[\"m00\"]) - 1\n", " #croped_im = np.zeros((27,15))\n", " blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])]\n", " #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob\n", " #return (1, [croped_im])\n", " lstBlob.append(blob)\n", " lstCenter.append((cY, cX))\n", " lstMin.append(xmax-xmin)\n", " lstMax.append(ymax-ymin)\n", " count = count + 1\n", " return (count, lstBlob, lstCenter)\n", " else:\n", " return (0, [np.zeros((29, 19))], 0, 0)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# descides whether or not a normalization is neccessary\n", "# and cuts or adds zeros\n", "def normalize_blobs(blobs, new_len=50):\n", " new_count = new_len - blobs.shape[0]\n", " if new_count == 0:\n", " return blobs\n", " elif new_count > 0:\n", " temp = np.array([np.zeros((27, 15))] * new_count)\n", " return np.append(blobs, temp, axis=0)\n", " else:\n", " return blobs[0:new_len]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 3.24 s, sys: 556 ms, total: 3.8 s\n", "Wall time: 3.8 s\n" ] } ], "source": [ "%%time\n", "# normalizes all image sequences\n", "df_lstm_norm = df_lstm.copy(deep=True)\n", "new_blobs = []\n", "for index, row in df_lstm.iterrows():\n", " new_blobs.append(normalize_blobs(row.Blobs, 50))\n", "\n", "df_lstm_norm.Blobs = new_blobs\n", "\n", "lengths = []\n", "for index, row in df_lstm_norm.iterrows():\n", " lengths.append(row.Blobs.shape[0])\n", "df_lstm_norm[\"BlobCount\"] = lengths" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 9193.0\n", "mean 50.0\n", "std 0.0\n", "min 50.0\n", "25% 50.0\n", "50% 50.0\n", "75% 50.0\n", "max 50.0\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_norm.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "df_lstm_norm.to_pickle(\"DataStudyCollection/df_lstm_norm50.pkl\")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTaskIDVersionIDBlobsBlobCountGestureOnly
0103[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...500
1105[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...500
2106[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...500
3107[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...500
4108[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0...500
\n", "
" ], "text/plain": [ " userID TaskID VersionID \\\n", "0 1 0 3 \n", "1 1 0 5 \n", "2 1 0 6 \n", "3 1 0 7 \n", "4 1 0 8 \n", "\n", " Blobs BlobCount GestureOnly \n", "0 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 50 0 \n", "1 [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 50 0 \n", "2 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 50 0 \n", "3 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0... 50 0 \n", "4 [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0... 50 0 " ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_norm.head()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "finished\n" ] } ], "source": [ "print(\"finished\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }