{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Preprocessing for LSTM: Blobdetection and Cutting" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "from scipy.odr import *\n", "from scipy.stats import *\n", "import numpy as np\n", "import pandas as pd\n", "import os\n", "import time\n", "import matplotlib.pyplot as plt\n", "import ast\n", "from multiprocessing import Pool, cpu_count\n", "\n", "import scipy\n", "\n", "from IPython import display\n", "from matplotlib.patches import Rectangle\n", "\n", "from sklearn.metrics import mean_squared_error\n", "import json\n", "\n", "import scipy.stats as st\n", "from sklearn.metrics import r2_score\n", "\n", "\n", "from matplotlib import cm\n", "from mpl_toolkits.mplot3d import axes3d\n", "import matplotlib.pyplot as plt\n", "\n", "import copy\n", "\n", "from sklearn.model_selection import LeaveOneOut, LeavePOut\n", "\n", "from multiprocessing import Pool\n", "import cv2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "df_filtered = pd.read_pickle(\"DataStudyEvaluation/df_lstm.pkl\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTimestampCurrent_TaskTask_amountTaskIDVersionIDRepetitionIDActual_DataIs_PauseImageIsMaxMaxRepetition
1782931155352174726216510021TrueFalse[0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 1, 2, ...True1
1782941155352174730216510021TrueFalse[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...True1
1782951155352174734216510021TrueFalse[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...True1
1782961155352174738816510021TrueFalse[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...True1
1782971155352174742216510021TrueFalse[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...True1
\n", "
" ], "text/plain": [ " userID Timestamp Current_Task Task_amount TaskID VersionID \\\n", "178293 1 1553521747262 16 510 0 2 \n", "178294 1 1553521747302 16 510 0 2 \n", "178295 1 1553521747342 16 510 0 2 \n", "178296 1 1553521747388 16 510 0 2 \n", "178297 1 1553521747422 16 510 0 2 \n", "\n", " RepetitionID Actual_Data Is_Pause \\\n", "178293 1 True False \n", "178294 1 True False \n", "178295 1 True False \n", "178296 1 True False \n", "178297 1 True False \n", "\n", " Image IsMax \\\n", "178293 [0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 1, 2, ... True \n", "178294 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n", "178295 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n", "178296 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n", "178297 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n", "\n", " MaxRepetition \n", "178293 1 \n", "178294 1 \n", "178295 1 \n", "178296 1 \n", "178297 1 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_filtered.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_filtered.userID.unique())" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df_filtered.Image = df_filtered.Image.apply(lambda x: x.reshape(27, 15))\n", "df_filtered.Image = df_filtered.Image.apply(lambda x: x.clip(min=0, max=255))\n", "df_filtered.Image = df_filtered.Image.apply(lambda x: x.astype(np.uint8))\n", "df_filtered[\"ImageSum\"] = df_filtered.Image.apply(lambda x: np.sum(x))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#LSTMs new Blob detection (only detect, if there are blobs)\n", "def detect_blobs(image):\n", " #image = image.reshape(27, 15)\n", " large = np.ones((29,17), dtype=np.uint8)\n", " large[1:28,1:16] = image\n", " temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY)\n", " contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n", " contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n", " lstBlob = []\n", " lstMin = []\n", " lstMax = []\n", " count = 0\n", " return len(contours) > 0" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 1.93 s, sys: 581 ms, total: 2.51 s\n", "Wall time: 2.71 s\n" ] } ], "source": [ "%%time\n", "pool = Pool(cpu_count() - 1)\n", "temp_blobs = pool.map(detect_blobs, df_filtered.Image)\n", "pool.close()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df_filtered[\"ContainsBlobs\"] = temp_blobs" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#Label if knuckle or finger\n", "def f(row):\n", " if row['TaskID'] < 17:\n", " #val = \"Knuckle\"\n", " val = 0\n", " elif row['TaskID'] >= 17:\n", " #val = \"Finger\"\n", " val = 1\n", " return val\n", "df_filtered['InputMethod'] = df_filtered.apply(f, axis=1)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Problem: some timestamps are strings (XXXXE+XXXX) which is not accurate enough, switching to index instead\n", "\"\"\"def cast_to_int(x):\n", " if type(x) == int:\n", " return x\n", " x = str(x).replace(\",\", \".\")\n", " return int(float(x))\n", "\n", "df_filtered.Timestamp = df_filtered.Timestamp.map(cast_to_int)\"\"\"\n", "df_filtered.index = range(len(df_filtered))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "10\n", "11\n", "12\n", "CPU times: user 1min 32s, sys: 60.2 ms, total: 1min 32s\n", "Wall time: 1min 32s\n" ] } ], "source": [ "%%time\n", "# trim image sequences down to only between first and last detected blob\n", "UserIDs = []\n", "TaskIDs = []\n", "VersionIDs = []\n", "Blobs = []\n", "for userID in df_filtered.userID.unique():\n", " print(userID)\n", " for TaskID in df_filtered[df_filtered.userID == userID].TaskID.unique():\n", " for VersionID in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID)].VersionID.unique():\n", " first_blob = -1\n", " last_blob = -1\n", " for index, row in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID)].iterrows():\n", " if row.ContainsBlobs:\n", " last_blob = index\n", " if first_blob == -1:\n", " first_blob = index\n", " if first_blob >= 0 and last_blob >= 0:\n", " UserIDs.append(userID)\n", " TaskIDs.append(TaskID)\n", " VersionIDs.append(VersionID)\n", " Blobs.append(df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID) & (df_filtered.index >= first_blob) & (df_filtered.index <= last_blob)].Image.tolist())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "UserIDs = np.array(UserIDs, dtype=np.int64)\n", "TaskIDs = np.array(TaskIDs, dtype=np.int64)\n", "VersionIDs = np.array(VersionIDs, dtype=np.int64)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTaskIDVersionIDBlobs
0102[[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...
1103[[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...
2104[[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...
3105[[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...
4106[[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...
\n", "
" ], "text/plain": [ " userID TaskID VersionID \\\n", "0 1 0 2 \n", "1 1 0 3 \n", "2 1 0 4 \n", "3 1 0 5 \n", "4 1 0 6 \n", "\n", " Blobs \n", "0 [[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0... \n", "1 [[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,... \n", "2 [[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2... \n", "3 [[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0... \n", "4 [[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0... " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all = pd.DataFrame()\n", "df_lstm_all[\"userID\"] = UserIDs\n", "df_lstm_all[\"TaskID\"] = TaskIDs\n", "df_lstm_all[\"VersionID\"] = VersionIDs\n", "df_lstm_all[\"Blobs\"] = Blobs\n", "df_lstm_all.Blobs = df_lstm_all.Blobs.map(np.array)\n", "df_lstm_all.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTaskIDVersionIDBlobs
0102[[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...
1103[[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...
2104[[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...
3105[[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...
4106[[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...
\n", "
" ], "text/plain": [ " userID TaskID VersionID \\\n", "0 1 0 2 \n", "1 1 0 3 \n", "2 1 0 4 \n", "3 1 0 5 \n", "4 1 0 6 \n", "\n", " Blobs \n", "0 [[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0... \n", "1 [[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,... \n", "2 [[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2... \n", "3 [[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0... \n", "4 [[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0... " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "df_lstm_all[\"Length\"] = df_lstm_all.Blobs.apply(lambda x: x.shape[0])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEQRJREFUeJzt3W2MXGd5h/HrrhNCFaPYqenKcqw6qSxVLmlTs0pSFaF1oyZO8sFBQihRRByaylWbqCBRCVPUhkKRTFWompaGmsbCaSkm5UWxEtPUdbOK+BCITY3tJA3eglOyMrbAwbCAaEPvfphnw3S7k52d2Z235/pJoznznGfO3LfPev8+Z86MIzORJNXnp/pdgCSpPwwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUu6HcBr2TNmjW5YcOGjp///e9/n4svvnjpCuqTUekD7GVQjUovo9IHdNfLkSNHvpWZr11o3kAHwIYNGzh8+HDHz5+cnGRiYmLpCuqTUekD7GVQjUovo9IHdNdLRDzfzjxPAUlSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUG+pPAvbJh56NtzTu16+ZlrkSSescjAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVasEAiIj1EfF4RDwTEU9HxNvL+KURcTAiTpb71WU8IuK+iJiKiGMRsblpW9vL/JMRsX352pIkLaSdI4CXgHdm5ibgWuDuiNgE7AQOZeZG4FB5DHAjsLHcdgD3QyMwgHuBa4CrgXtnQ0OS1HsLBkBmns7ML5fl7wHPAuuAbcDeMm0vcEtZ3gY8mA1PAqsiYi1wA3AwM89l5ovAQWDrknYjSWpbZGb7kyM2AE8ArwP+MzNXlfEAXszMVRHxCLArM79Q1h0C3gVMAK/OzD8p438I/DAz/2zOa+ygceTA2NjY6/ft29dxczMzM6xcuXLBecenz7e1vSvXXdJxLd1ot49hYC+DaVR6GZU+oLtetmzZciQzxxea1/Z/CBMRK4HPAO/IzO82fuc3ZGZGRPtJ8goyczewG2B8fDwnJiY63tbk5CTtPP/Odv9DmNs7r6Ub7fYxDOxlMI1KL6PSB/Sml7auAoqIC2n88v9EZn62DJ8pp3Yo92fL+DSwvunpl5WxVuOSpD5o5yqgAB4Ans3MDzet2g/MXsmzHXi4afyOcjXQtcD5zDwNPAZcHxGry5u/15cxSVIftHMK6NeAtwLHI+JoGfsDYBfwUETcBTwPvKWsOwDcBEwBPwDeBpCZ5yLi/cBTZd77MvPcknQhSVq0BQOgvJkbLVZfN8/8BO5usa09wJ7FFChJWh5+EliSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVWrBAIiIPRFxNiJONI29NyKmI+Joud3UtO7dETEVEc9FxA1N41vL2FRE7Fz6ViRJi9HOEcDHga3zjP95Zl5VbgcAImITcCvwi+U5fx0RKyJiBfAR4EZgE3BbmStJ6pMLFpqQmU9ExIY2t7cN2JeZPwK+HhFTwNVl3VRmfg0gIvaVuc8sumJJ0pLo5j2AeyLiWDlFtLqMrQO+0TTnhTLWalyS1CeRmQtPahwBPJKZryuPx4BvAQm8H1ibmb8ZEX8FPJmZf1/mPQB8vmxma2b+Vhl/K3BNZt4zz2vtAHYAjI2NvX7fvn0dNzczM8PKlSsXnHd8+nxb27ty3SUd19KNdvsYBvYymEall1HpA7rrZcuWLUcyc3yheQueAppPZp6ZXY6IjwGPlIfTwPqmqZeVMV5hfO62dwO7AcbHx3NiYqKTEgGYnJykneffufPRtrZ36vbOa+lGu30MA3sZTKPSy6j0Ab3ppaNTQBGxtunhm4DZK4T2A7dGxEURcTmwEfgS8BSwMSIuj4hX0XijeH/nZUuSurXgEUBEfBKYANZExAvAvcBERFxF4xTQKeC3ATLz6Yh4iMabuy8Bd2fmj8t27gEeA1YAezLz6SXvRpLUtnauArptnuEHXmH+B4APzDN+ADiwqOoGzIZ2TxXtunmZK5Gk7vlJYEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiq1YABExJ6IOBsRJ5rGLo2IgxFxstyvLuMREfdFxFREHIuIzU3P2V7mn4yI7cvTjiSpXe0cAXwc2DpnbCdwKDM3AofKY4AbgY3ltgO4HxqBAdwLXANcDdw7GxqSpP5YMAAy8wng3JzhbcDesrwXuKVp/MFseBJYFRFrgRuAg5l5LjNfBA7y/0NFktRDnb4HMJaZp8vyN4GxsrwO+EbTvBfKWKtxSVKfXNDtBjIzIyKXohiAiNhB4/QRY2NjTE5OdrytmZmZtp7/zitf6vg15tNNzfNpt49hYC+DaVR6GZU+oDe9dBoAZyJibWaeLqd4zpbxaWB907zLytg0MDFnfHK+DWfmbmA3wPj4eE5MTMw3rS2Tk5O08/w7dz7a8WvM59TtC7/mYrTbxzCwl8E0Kr2MSh/Qm146PQW0H5i9kmc78HDT+B3laqBrgfPlVNFjwPURsbq8+Xt9GZMk9cmCRwAR8Uka/3pfExEv0LiaZxfwUETcBTwPvKVMPwDcBEwBPwDeBpCZ5yLi/cBTZd77MnPuG8uSpB5aMAAy87YWq66bZ24Cd7fYzh5gz6KqkyQtGz8JLEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJler620AH2fHp80v+RW+SNCo8ApCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqlRXARARpyLieEQcjYjDZezSiDgYESfL/eoyHhFxX0RMRcSxiNi8FA1IkjqzFEcAWzLzqswcL493AocycyNwqDwGuBHYWG47gPuX4LUlSR1ajlNA24C9ZXkvcEvT+IPZ8CSwKiLWLsPrS5LaEJnZ+ZMjvg68CCTwN5m5OyK+k5mryvoAXszMVRHxCLArM79Q1h0C3pWZh+dscweNIwTGxsZev2/fvo7rO3vuPGd+2PHTO3blukuWdHszMzOsXLlySbfZL/YymEall1HpA7rrZcuWLUeazsq0dEFHW/+JN2TmdET8LHAwIv69eWVmZkQsKmEyczewG2B8fDwnJiY6Lu4vP/EwHzrebYuLd+r2iSXd3uTkJN38OQwSexlMo9LLqPQBvemlq9+OmTld7s9GxOeAq4EzEbE2M0+XUzxny/RpYH3T0y8rYyNnw85H25p3atfNy1yJJLXW8XsAEXFxRLxmdhm4HjgB7Ae2l2nbgYfL8n7gjnI10LXA+cw83XHlkqSudHMEMAZ8rnGanwuAf8jMf4qIp4CHIuIu4HngLWX+AeAmYAr4AfC2Ll5bktSljgMgM78G/PI8498GrptnPIG7O309SdLS8pPAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZW6oN8F1GzDzkfbmvfxrRcvcyWSauQRgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUl4EOgePT57mzzUtGT+26eZmrkTQqPAKQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlfIy0BHT7jeMermoJI8AJKlSBoAkVarnp4AiYivwF8AK4G8zc1eva5CniiT1+AggIlYAHwFuBDYBt0XEpl7WIElq6PURwNXAVGZ+DSAi9gHbgGd6XIfa1O6RQrv87y2lwdHrAFgHfKPp8QvANT2uQUPAU1TS8hu4y0AjYgewozyciYjnutjcGuBb3VfVX783In0AbPng0vYSH1yqLXVkZPYLo9PLqPQB3fXyc+1M6nUATAPrmx5fVsZelpm7gd1L8WIRcTgzx5diW/00Kn2AvQyqUellVPqA3vTS68tAnwI2RsTlEfEq4FZgf49rkCTR4yOAzHwpIu4BHqNxGeiezHy6lzVIkhp6/h5AZh4ADvTo5ZbkVNIAGJU+wF4G1aj0Mip9QA96icxc7teQJA0gvwpCkio1kgEQEVsj4rmImIqInf2uZ7Ei4lREHI+IoxFxuIxdGhEHI+JkuV/d7zrnExF7IuJsRJxoGpu39mi4r+ynYxGxuX+V/18t+nhvREyX/XI0Im5qWvfu0sdzEXFDf6qeX0Ssj4jHI+KZiHg6It5exodxv7TqZaj2TUS8OiK+FBFfKX38cRm/PCK+WOr9VLlYhoi4qDyeKus3LEkhmTlSNxpvLv8HcAXwKuArwKZ+17XIHk4Ba+aM/SmwsyzvBD7Y7zpb1P5GYDNwYqHagZuAzwMBXAt8sd/1L9DHe4Hfn2fupvJzdhFwefn5W9HvHprqWwtsLsuvAb5aah7G/dKql6HaN+XPdmVZvhD4Yvmzfgi4tYx/FPidsvy7wEfL8q3Ap5aijlE8Anj56yYy87+A2a+bGHbbgL1leS9wSx9raSkznwDOzRluVfs24MFseBJYFRFre1PpK2vRRyvbgH2Z+aPM/DowRePncCBk5unM/HJZ/h7wLI1P5Q/jfmnVSysDuW/Kn+1MeXhhuSXw68Cny/jcfTK7rz4NXBcR0W0doxgA833dxCv9gAyiBP45Io6UT0YDjGXm6bL8TWCsP6V1pFXtw7iv7imnRfY0nYYbmj7KqYNfofEvzqHeL3N6gSHbNxGxIiKOAmeBgzSOTr6TmS+VKc21vtxHWX8e+JluaxjFABgFb8jMzTS+NfXuiHhj88psHAcO5eVbw1w7cD/w88BVwGngQ/0tZ3EiYiXwGeAdmfnd5nXDtl/m6WXo9k1m/jgzr6LxjQhXA7/Q6xpGMQAW/LqJQZeZ0+X+LPA5Gj8cZ2YPw8v92f5VuGitah+qfZWZZ8pf2v8BPsZPTiUMfB8RcSGNX5ifyMzPluGh3C/z9TLM+yYzvwM8DvwqjdNts5/Paq715T7K+kuAb3f72qMYAEP9dRMRcXFEvGZ2GbgeOEGjh+1l2nbg4f5U2JFWte8H7ihXnVwLnG86JTFw5pwHfxON/QKNPm4tV2pcDmwEvtTr+lop54ofAJ7NzA83rRq6/dKql2HbNxHx2ohYVZZ/GvgNGu9nPA68uUybu09m99WbgX8tR23d6fe74ctxo3EVw1dpnFN7T7/rWWTtV9C4auErwNOz9dM433cIOAn8C3Bpv2ttUf8naRyC/zeNc5h3taqdxpUQHyn76Tgw3u/6F+jj70qdx8pfyLVN899T+ngOuLHf9c/p5Q00Tu8cA46W201Dul9a9TJU+wb4JeDfSr0ngD8q41fQCKgp4B+Bi8r4q8vjqbL+iqWow08CS1KlRvEUkCSpDQaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmV+l9aEnWcfWzNswAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "df_lstm_all.Length.hist(range=(0,300), bins=30)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.05110421609782807" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_lstm_all[df_lstm_all.Length > 50]) / len(df_lstm_all)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 5479.0\n", "mean 21.2\n", "std 15.5\n", "min 1.0\n", "25% 13.0\n", "50% 18.0\n", "75% 26.0\n", "max 251.0\n", "Name: Length, dtype: float64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.Length.describe().round(1)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "lengths = []\n", "for index, row in df_lstm_all.iterrows():\n", " lengths.append(row.Blobs.shape[0])\n", "df_lstm_all[\"BlobCount\"] = lengths\n", "# add a column for pure gesture recognition without finger/knuckle\n", "df_lstm_all[\"GestureOnly\"] = df_lstm_all.TaskID % 17" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 5479.000000\n", "mean 21.239460\n", "std 15.541015\n", "min 1.000000\n", "25% 13.000000\n", "50% 18.000000\n", "75% 26.000000\n", "max 251.000000\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meanstd
GestureOnly
019.00000015.626834
118.38738712.717864
213.41800611.615571
313.19218210.096861
413.4393449.632580
513.1574079.709874
617.1988138.017407
718.6278417.706187
830.09646314.204850
919.1343758.786072
1020.28930810.179677
1119.31152620.655269
1220.68323010.314326
1320.3573679.820602
1421.58153811.342465
1527.73765413.548982
1651.78370819.654648
\n", "
" ], "text/plain": [ " mean std\n", "GestureOnly \n", "0 19.000000 15.626834\n", "1 18.387387 12.717864\n", "2 13.418006 11.615571\n", "3 13.192182 10.096861\n", "4 13.439344 9.632580\n", "5 13.157407 9.709874\n", "6 17.198813 8.017407\n", "7 18.627841 7.706187\n", "8 30.096463 14.204850\n", "9 19.134375 8.786072\n", "10 20.289308 10.179677\n", "11 19.311526 20.655269\n", "12 20.683230 10.314326\n", "13 20.357367 9.820602\n", "14 21.581538 11.342465\n", "15 27.737654 13.548982\n", "16 51.783708 19.654648" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_all.groupby(df_lstm_all.GestureOnly)[\"BlobCount\"].agg([\"mean\", \"std\"])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "before: 5479\n", "after: 5043\n", "ratio: 7.957656506661799\n" ] } ], "source": [ "# filter on gesture lengths\n", "print(\"before: %s\" % len(df_lstm_all))\n", "df_lstm = df_lstm_all[(df_lstm_all.BlobCount <= 100) & (df_lstm_all.BlobCount >= 5)]\n", "print(\"after: %s\" % len(df_lstm))\n", "print(\"ratio: %s\" % ((len(df_lstm_all) - len(df_lstm)) / len(df_lstm_all) * 100))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 5043.000000\n", "mean 22.512195\n", "std 13.430134\n", "min 5.000000\n", "25% 14.000000\n", "50% 19.000000\n", "75% 27.000000\n", "max 99.000000\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "lengths = []\n", "for index, row in df_lstm.iterrows():\n", " lengths.append(row.Blobs.shape[0])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "df_lstm[\"BlobCount\"] = lengths" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 5043.000000\n", "mean 22.512195\n", "std 13.430134\n", "min 5.000000\n", "25% 14.000000\n", "50% 19.000000\n", "75% 27.000000\n", "max 99.000000\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "def lerp(a, b, c=0.5):\n", " return c * b + (1.0 - c) * a\n", "\n", "#Svens new Blob detection\n", "def detect_blobs_return_old(image, task):\n", " #image = e.Image\n", " large = np.ones((29,17), dtype=np.uint8)\n", " large[1:28,1:16] = np.copy(image)\n", " temp, thresh = cv2.threshold(cv2.bitwise_not(large), 205, 255, cv2.THRESH_BINARY)\n", " contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n", " contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n", " lstBlob = []\n", " lstCenter = []\n", " lstMin = []\n", " lstMax = []\n", " count = 0\n", " contours.sort(key=lambda a: cv2.contourArea(a))\n", " if len(contours) > 0:\n", " # if two finger or knuckle\n", " cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1\n", " for i in range(1, cont_count + 1):\n", " max_contour = contours[-1 * i]\n", " xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0)\n", " xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0)\n", " M = cv2.moments(max_contour)\n", " cX = int(M[\"m10\"] / M[\"m00\"]) - 1\n", " cY = int(M[\"m01\"] / M[\"m00\"]) - 1\n", " #croped_im = np.zeros((27,15))\n", " blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])]\n", " #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob\n", " #return (1, [croped_im])\n", " lstBlob.append(blob)\n", " lstCenter.append((cY, cX))\n", " lstMin.append(xmax-xmin)\n", " lstMax.append(ymax-ymin)\n", " count = count + 1\n", " return (count, lstBlob, lstCenter)\n", " else:\n", " return (0, [np.zeros((29, 19))], 0, 0)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# descides whether or not a normalization is neccessary\n", "# and cuts or adds zeros\n", "def normalize_blobs(blobs, new_len=50):\n", " new_count = new_len - blobs.shape[0]\n", " if new_count == 0:\n", " return blobs\n", " elif new_count > 0:\n", " temp = np.array([np.zeros((27, 15))] * new_count)\n", " return np.append(blobs, temp, axis=0)\n", " else:\n", " return blobs[0:new_len]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 1.48 s, sys: 236 ms, total: 1.71 s\n", "Wall time: 1.71 s\n" ] } ], "source": [ "%%time\n", "# normalizes all image sequences\n", "df_lstm_norm = df_lstm.copy(deep=True)\n", "new_blobs = []\n", "for index, row in df_lstm.iterrows():\n", " new_blobs.append(normalize_blobs(row.Blobs, 50))\n", "\n", "df_lstm_norm.Blobs = new_blobs\n", "\n", "lengths = []\n", "for index, row in df_lstm_norm.iterrows():\n", " lengths.append(row.Blobs.shape[0])\n", "df_lstm_norm[\"BlobCount\"] = lengths" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 5043.0\n", "mean 50.0\n", "std 0.0\n", "min 50.0\n", "25% 50.0\n", "50% 50.0\n", "75% 50.0\n", "max 50.0\n", "Name: BlobCount, dtype: float64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_norm.BlobCount.describe()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "df_lstm_norm.to_pickle(\"DataStudyEvaluation/df_lstm_norm50.pkl\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIDTaskIDVersionIDBlobsLengthBlobCountGestureOnly
0102[[[0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0...25500
1103[[[0.0, 2.0, 191.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1...12500
2104[[[0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0...14500
3105[[[0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 3.0...11500
4106[[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0...16500
\n", "
" ], "text/plain": [ " userID TaskID VersionID \\\n", "0 1 0 2 \n", "1 1 0 3 \n", "2 1 0 4 \n", "3 1 0 5 \n", "4 1 0 6 \n", "\n", " Blobs Length BlobCount \\\n", "0 [[[0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0... 25 50 \n", "1 [[[0.0, 2.0, 191.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1... 12 50 \n", "2 [[[0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0... 14 50 \n", "3 [[[0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 3.0... 11 50 \n", "4 [[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0... 16 50 \n", "\n", " GestureOnly \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_norm.head()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_lstm_norm.userID.unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }