knuckletouch/python/Step_37_LSTM_Preprocessing....

1304 lines
43 KiB
Plaintext
Raw Permalink Normal View History

2019-08-07 23:57:12 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Preprocessing for LSTM: Blobdetection and Cutting"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
"from scipy.odr import *\n",
"from scipy.stats import *\n",
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"import ast\n",
"from multiprocessing import Pool, cpu_count\n",
"\n",
"import scipy\n",
"\n",
"from IPython import display\n",
"from matplotlib.patches import Rectangle\n",
"\n",
"from sklearn.metrics import mean_squared_error\n",
"import json\n",
"\n",
"import scipy.stats as st\n",
"from sklearn.metrics import r2_score\n",
"\n",
"\n",
"from matplotlib import cm\n",
"from mpl_toolkits.mplot3d import axes3d\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import copy\n",
"\n",
"from sklearn.model_selection import LeaveOneOut, LeavePOut\n",
"\n",
"from multiprocessing import Pool\n",
"import cv2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df_filtered = pd.read_pickle(\"DataStudyEvaluation/df_lstm.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>userID</th>\n",
" <th>Timestamp</th>\n",
" <th>Current_Task</th>\n",
" <th>Task_amount</th>\n",
" <th>TaskID</th>\n",
" <th>VersionID</th>\n",
" <th>RepetitionID</th>\n",
" <th>Actual_Data</th>\n",
" <th>Is_Pause</th>\n",
" <th>Image</th>\n",
" <th>IsMax</th>\n",
" <th>MaxRepetition</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>178293</th>\n",
" <td>1</td>\n",
" <td>1553521747262</td>\n",
" <td>16</td>\n",
" <td>510</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 1, 2, ...</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178294</th>\n",
" <td>1</td>\n",
" <td>1553521747302</td>\n",
" <td>16</td>\n",
" <td>510</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178295</th>\n",
" <td>1</td>\n",
" <td>1553521747342</td>\n",
" <td>16</td>\n",
" <td>510</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178296</th>\n",
" <td>1</td>\n",
" <td>1553521747388</td>\n",
" <td>16</td>\n",
" <td>510</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178297</th>\n",
" <td>1</td>\n",
" <td>1553521747422</td>\n",
" <td>16</td>\n",
" <td>510</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>True</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" userID Timestamp Current_Task Task_amount TaskID VersionID \\\n",
"178293 1 1553521747262 16 510 0 2 \n",
"178294 1 1553521747302 16 510 0 2 \n",
"178295 1 1553521747342 16 510 0 2 \n",
"178296 1 1553521747388 16 510 0 2 \n",
"178297 1 1553521747422 16 510 0 2 \n",
"\n",
" RepetitionID Actual_Data Is_Pause \\\n",
"178293 1 True False \n",
"178294 1 True False \n",
"178295 1 True False \n",
"178296 1 True False \n",
"178297 1 True False \n",
"\n",
" Image IsMax \\\n",
"178293 [0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 1, 2, ... True \n",
"178294 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n",
"178295 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n",
"178296 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n",
"178297 [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... True \n",
"\n",
" MaxRepetition \n",
"178293 1 \n",
"178294 1 \n",
"178295 1 \n",
"178296 1 \n",
"178297 1 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_filtered.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"12"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_filtered.userID.unique())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df_filtered.Image = df_filtered.Image.apply(lambda x: x.reshape(27, 15))\n",
"df_filtered.Image = df_filtered.Image.apply(lambda x: x.clip(min=0, max=255))\n",
"df_filtered.Image = df_filtered.Image.apply(lambda x: x.astype(np.uint8))\n",
"df_filtered[\"ImageSum\"] = df_filtered.Image.apply(lambda x: np.sum(x))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#LSTMs new Blob detection (only detect, if there are blobs)\n",
"def detect_blobs(image):\n",
" #image = image.reshape(27, 15)\n",
" large = np.ones((29,17), dtype=np.uint8)\n",
" large[1:28,1:16] = image\n",
" temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY)\n",
" contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
" contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n",
" lstBlob = []\n",
" lstMin = []\n",
" lstMax = []\n",
" count = 0\n",
" return len(contours) > 0"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.93 s, sys: 581 ms, total: 2.51 s\n",
"Wall time: 2.71 s\n"
]
}
],
"source": [
"%%time\n",
"pool = Pool(cpu_count() - 1)\n",
"temp_blobs = pool.map(detect_blobs, df_filtered.Image)\n",
"pool.close()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df_filtered[\"ContainsBlobs\"] = temp_blobs"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#Label if knuckle or finger\n",
"def f(row):\n",
" if row['TaskID'] < 17:\n",
" #val = \"Knuckle\"\n",
" val = 0\n",
" elif row['TaskID'] >= 17:\n",
" #val = \"Finger\"\n",
" val = 1\n",
" return val\n",
"df_filtered['InputMethod'] = df_filtered.apply(f, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Problem: some timestamps are strings (XXXXE+XXXX) which is not accurate enough, switching to index instead\n",
"\"\"\"def cast_to_int(x):\n",
" if type(x) == int:\n",
" return x\n",
" x = str(x).replace(\",\", \".\")\n",
" return int(float(x))\n",
"\n",
"df_filtered.Timestamp = df_filtered.Timestamp.map(cast_to_int)\"\"\"\n",
"df_filtered.index = range(len(df_filtered))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"2\n",
"3\n",
"4\n",
"5\n",
"6\n",
"7\n",
"8\n",
"9\n",
"10\n",
"11\n",
"12\n",
"CPU times: user 1min 32s, sys: 60.2 ms, total: 1min 32s\n",
"Wall time: 1min 32s\n"
]
}
],
"source": [
"%%time\n",
"# trim image sequences down to only between first and last detected blob\n",
"UserIDs = []\n",
"TaskIDs = []\n",
"VersionIDs = []\n",
"Blobs = []\n",
"for userID in df_filtered.userID.unique():\n",
" print(userID)\n",
" for TaskID in df_filtered[df_filtered.userID == userID].TaskID.unique():\n",
" for VersionID in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID)].VersionID.unique():\n",
" first_blob = -1\n",
" last_blob = -1\n",
" for index, row in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID)].iterrows():\n",
" if row.ContainsBlobs:\n",
" last_blob = index\n",
" if first_blob == -1:\n",
" first_blob = index\n",
" if first_blob >= 0 and last_blob >= 0:\n",
" UserIDs.append(userID)\n",
" TaskIDs.append(TaskID)\n",
" VersionIDs.append(VersionID)\n",
" Blobs.append(df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID) & (df_filtered.index >= first_blob) & (df_filtered.index <= last_blob)].Image.tolist())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"UserIDs = np.array(UserIDs, dtype=np.int64)\n",
"TaskIDs = np.array(TaskIDs, dtype=np.int64)\n",
"VersionIDs = np.array(VersionIDs, dtype=np.int64)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>userID</th>\n",
" <th>TaskID</th>\n",
" <th>VersionID</th>\n",
" <th>Blobs</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>[[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>[[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>[[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>[[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>[[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" userID TaskID VersionID \\\n",
"0 1 0 2 \n",
"1 1 0 3 \n",
"2 1 0 4 \n",
"3 1 0 5 \n",
"4 1 0 6 \n",
"\n",
" Blobs \n",
"0 [[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0... \n",
"1 [[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,... \n",
"2 [[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2... \n",
"3 [[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0... \n",
"4 [[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0... "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_all = pd.DataFrame()\n",
"df_lstm_all[\"userID\"] = UserIDs\n",
"df_lstm_all[\"TaskID\"] = TaskIDs\n",
"df_lstm_all[\"VersionID\"] = VersionIDs\n",
"df_lstm_all[\"Blobs\"] = Blobs\n",
"df_lstm_all.Blobs = df_lstm_all.Blobs.map(np.array)\n",
"df_lstm_all.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>userID</th>\n",
" <th>TaskID</th>\n",
" <th>VersionID</th>\n",
" <th>Blobs</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>[[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>[[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>[[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>[[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>[[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" userID TaskID VersionID \\\n",
"0 1 0 2 \n",
"1 1 0 3 \n",
"2 1 0 4 \n",
"3 1 0 5 \n",
"4 1 0 6 \n",
"\n",
" Blobs \n",
"0 [[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0... \n",
"1 [[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,... \n",
"2 [[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2... \n",
"3 [[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0... \n",
"4 [[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0... "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_all.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"df_lstm_all[\"Length\"] = df_lstm_all.Blobs.apply(lambda x: x.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f563b2b4278>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEQRJREFUeJzt3W2MXGd5h/HrrhNCFaPYqenKcqw6qSxVLmlTs0pSFaF1oyZO8sFBQihRRByaylWbqCBRCVPUhkKRTFWompaGmsbCaSkm5UWxEtPUdbOK+BCITY3tJA3eglOyMrbAwbCAaEPvfphnw3S7k52d2Z235/pJoznznGfO3LfPev8+Z86MIzORJNXnp/pdgCSpPwwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUu6HcBr2TNmjW5YcOGjp///e9/n4svvnjpCuqTUekD7GVQjUovo9IHdNfLkSNHvpWZr11o3kAHwIYNGzh8+HDHz5+cnGRiYmLpCuqTUekD7GVQjUovo9IHdNdLRDzfzjxPAUlSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUG+pPAvbJh56NtzTu16+ZlrkSSescjAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVasEAiIj1EfF4RDwTEU9HxNvL+KURcTAiTpb71WU8IuK+iJiKiGMRsblpW9vL/JMRsX352pIkLaSdI4CXgHdm5ibgWuDuiNgE7AQOZeZG4FB5DHAjsLHcdgD3QyMwgHuBa4CrgXtnQ0OS1HsLBkBmns7ML5fl7wHPAuuAbcDeMm0vcEtZ3gY8mA1PAqsiYi1wA3AwM89l5ovAQWDrknYjSWpbZGb7kyM2AE8ArwP+MzNXlfEAXszMVRHxCLArM79Q1h0C3gVMAK/OzD8p438I/DAz/2zOa+ygceTA2NjY6/ft29dxczMzM6xcuXLBecenz7e1vSvXXdJxLd1ot49hYC+DaVR6GZU+oLtetmzZciQzxxea1/Z/CBMRK4HPAO/IzO82fuc3ZGZGRPtJ8goyczewG2B8fDwnJiY63tbk5CTtPP/Odv9DmNs7r6Ub7fYxDOxlMI1KL6PSB/Sml7auAoqIC2n88v9EZn62DJ8pp3Yo92fL+DSwvunpl5WxVuOSpD5o5yqgAB4Ans3MDzet2g/MXsmzHXi4afyOcjXQtcD5zDwNPAZcHxGry5u/15cxSVIftHMK6NeAtwLHI+JoGfsDYBfwUETcBTwPvKWsOwDcBEwBPwDeBpCZ5yLi/cBTZd77MvPcknQhSVq0BQOgvJkbLVZfN8/8BO5usa09wJ7FFChJWh5+EliSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVWrBAIiIPRFxNiJONI29NyKmI+Joud3UtO7dETEVEc9FxA1N41vL2FRE7Fz6ViRJi9HOEcDHga3zjP95Zl5VbgcAImITcCvwi+U5fx0RKyJiBfAR4EZgE3BbmStJ6pMLFpqQmU9ExIY2t7cN2JeZPwK+HhFTwNVl3VRmfg0gIvaVuc8sumJJ0pLo5j2AeyLiWDlFtLqMrQO+0TTnhTLWalyS1CeRmQtPahwBPJKZryuPx4BvAQm8H1ibmb8ZEX8FPJmZf1/mPQB8vmxma2b+Vhl/K3BNZt4zz2vtAHYAjI2NvX7fvn0dNzczM8PKlSsXnHd8+nxb27ty3SUd19KNdvsYBvYymEall1HpA7rrZcuWLUcyc3yheQueAppPZp6ZXY6IjwGPlIfTwPqmqZeVMV5hfO62dwO7AcbHx3NiYqKTEgGYnJykneffufPRtrZ36vbOa+lGu30MA3sZTKPSy6j0Ab3ppaNTQBGxtunhm4DZK4T2A7dGxEURcTmwEfgS8BSwMSIuj4hX0XijeH/nZUuSurXgEUBEfBKYANZExAvAvcBERFxF4xTQKeC3ATLz6Yh4iMabuy8Bd2fmj8t27gEeA1YAezLz6SXvRpLUtnauArptnuEHXmH+B4APzDN+ADiwqOoGzIZ2TxXtunmZK5Gk7vlJYEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiq1YABExJ6IOBsRJ5rGLo2IgxFxstyvLuMREfdFxFREHIuIzU3P2V7mn4yI7cvTjiSpXe0cAXwc2DpnbCdwKDM3AofKY4AbgY3ltgO4HxqBAdwLXANcDdw7GxqSpP5YMAAy8wng3JzhbcDesrwXuKVp/MFseBJYFRFrgRuAg5l5LjNfBA7y/0NFktRDnb4HMJaZp8vyN4GxsrwO+EbTvBfKWKtxSVKfXNDtBjIzIyKXohiAiNhB4/QRY2NjTE5OdrytmZmZtp7/zitf6vg15tNNzfNpt49hYC+DaVR6GZU+oDe9dBoAZyJibWaeLqd4zpbxaWB907zLytg0MDFnfHK+DWfmbmA3wPj4eE5MTMw3rS2Tk5O08/w7dz7a8WvM59TtC7/mYrTbxzCwl8E0Kr2MSh/Qm146PQW0H5i9kmc78HDT+B3laqBrgfPlVNFjwPURsbq8+Xt9GZMk9cmCRwAR8Uka/3pfExEv0LiaZxfwUETcBTwPvKVMPwDcBEwBPwDeBpCZ5yLi/cBTZd77MnPuG8uSpB5aMAAy87YWq66bZ24Cd7fYzh5gz6KqkyQtGz8JLEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJler620AH2fHp80v+RW+SNCo8ApCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqlRXARARpyLieEQcjYjDZezSiDgYESfL/eoyHhFxX0RMRcSxiNi8FA1IkjqzFEcAWzLzqswcL493AocycyNwqDwGuBHYWG47gPuX4LUlSR1ajlNA24C9ZXkvcEvT+IPZ8CSwKiLWLsPrS5LaEJnZ+ZMjvg68CCTwN5m5OyK+k5mryvoAXszMVRHxCLArM79Q1h0C3pWZh+dscweNIwTGxsZev2/fvo7rO3vuPGd+2PHTO3blukuWdHszMzOsXLlySbfZL/YymEall1HpA7rrZcuWLUeazsq0dEFHW/+JN2TmdET8LHAwIv69eWVmZkQsKmEyczewG2B8fDwnJiY6Lu4vP/EwHzrebYuLd+r2iSXd3uTkJN38OQwSexlMo9LLqPQBvemlq9+OmTld7s9GxOeAq4EzEbE2M0+XUzxny/RpYH3T0y8rYyNnw85H25p3atfNy1yJJLXW8XsAEXFxRLxmdhm4HjgB7Ae2l2nbgYfL8n7gjnI10LXA+cw83XHlkqSudHMEMAZ8rnGanwuAf8jMf4qIp4CHIuIu4HngLWX+AeAmYAr4AfC2Ll5bktSljgMgM78G/PI8498GrptnPIG7O309SdLS8pPAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTI
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"df_lstm_all.Length.hist(range=(0,300), bins=30)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.05110421609782807"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_lstm_all[df_lstm_all.Length > 50]) / len(df_lstm_all)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 5479.0\n",
"mean 21.2\n",
"std 15.5\n",
"min 1.0\n",
"25% 13.0\n",
"50% 18.0\n",
"75% 26.0\n",
"max 251.0\n",
"Name: Length, dtype: float64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_all.Length.describe().round(1)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"lengths = []\n",
"for index, row in df_lstm_all.iterrows():\n",
" lengths.append(row.Blobs.shape[0])\n",
"df_lstm_all[\"BlobCount\"] = lengths\n",
"# add a column for pure gesture recognition without finger/knuckle\n",
"df_lstm_all[\"GestureOnly\"] = df_lstm_all.TaskID % 17"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 5479.000000\n",
"mean 21.239460\n",
"std 15.541015\n",
"min 1.000000\n",
"25% 13.000000\n",
"50% 18.000000\n",
"75% 26.000000\n",
"max 251.000000\n",
"Name: BlobCount, dtype: float64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_all.BlobCount.describe()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" </tr>\n",
" <tr>\n",
" <th>GestureOnly</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19.000000</td>\n",
" <td>15.626834</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>18.387387</td>\n",
" <td>12.717864</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>13.418006</td>\n",
" <td>11.615571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13.192182</td>\n",
" <td>10.096861</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>13.439344</td>\n",
" <td>9.632580</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>13.157407</td>\n",
" <td>9.709874</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>17.198813</td>\n",
" <td>8.017407</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>18.627841</td>\n",
" <td>7.706187</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>30.096463</td>\n",
" <td>14.204850</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>19.134375</td>\n",
" <td>8.786072</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>20.289308</td>\n",
" <td>10.179677</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>19.311526</td>\n",
" <td>20.655269</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>20.683230</td>\n",
" <td>10.314326</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>20.357367</td>\n",
" <td>9.820602</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>21.581538</td>\n",
" <td>11.342465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>27.737654</td>\n",
" <td>13.548982</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>51.783708</td>\n",
" <td>19.654648</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean std\n",
"GestureOnly \n",
"0 19.000000 15.626834\n",
"1 18.387387 12.717864\n",
"2 13.418006 11.615571\n",
"3 13.192182 10.096861\n",
"4 13.439344 9.632580\n",
"5 13.157407 9.709874\n",
"6 17.198813 8.017407\n",
"7 18.627841 7.706187\n",
"8 30.096463 14.204850\n",
"9 19.134375 8.786072\n",
"10 20.289308 10.179677\n",
"11 19.311526 20.655269\n",
"12 20.683230 10.314326\n",
"13 20.357367 9.820602\n",
"14 21.581538 11.342465\n",
"15 27.737654 13.548982\n",
"16 51.783708 19.654648"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_all.groupby(df_lstm_all.GestureOnly)[\"BlobCount\"].agg([\"mean\", \"std\"])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"before: 5479\n",
"after: 5043\n",
"ratio: 7.957656506661799\n"
]
}
],
"source": [
"# filter on gesture lengths\n",
"print(\"before: %s\" % len(df_lstm_all))\n",
"df_lstm = df_lstm_all[(df_lstm_all.BlobCount <= 100) & (df_lstm_all.BlobCount >= 5)]\n",
"print(\"after: %s\" % len(df_lstm))\n",
"print(\"ratio: %s\" % ((len(df_lstm_all) - len(df_lstm)) / len(df_lstm_all) * 100))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 5043.000000\n",
"mean 22.512195\n",
"std 13.430134\n",
"min 5.000000\n",
"25% 14.000000\n",
"50% 19.000000\n",
"75% 27.000000\n",
"max 99.000000\n",
"Name: BlobCount, dtype: float64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm.BlobCount.describe()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"lengths = []\n",
"for index, row in df_lstm.iterrows():\n",
" lengths.append(row.Blobs.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"df_lstm[\"BlobCount\"] = lengths"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 5043.000000\n",
"mean 22.512195\n",
"std 13.430134\n",
"min 5.000000\n",
"25% 14.000000\n",
"50% 19.000000\n",
"75% 27.000000\n",
"max 99.000000\n",
"Name: BlobCount, dtype: float64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm.BlobCount.describe()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"def lerp(a, b, c=0.5):\n",
" return c * b + (1.0 - c) * a\n",
"\n",
"#Svens new Blob detection\n",
"def detect_blobs_return_old(image, task):\n",
" #image = e.Image\n",
" large = np.ones((29,17), dtype=np.uint8)\n",
" large[1:28,1:16] = np.copy(image)\n",
" temp, thresh = cv2.threshold(cv2.bitwise_not(large), 205, 255, cv2.THRESH_BINARY)\n",
" contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
" contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n",
" lstBlob = []\n",
" lstCenter = []\n",
" lstMin = []\n",
" lstMax = []\n",
" count = 0\n",
" contours.sort(key=lambda a: cv2.contourArea(a))\n",
" if len(contours) > 0:\n",
" # if two finger or knuckle\n",
" cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1\n",
" for i in range(1, cont_count + 1):\n",
" max_contour = contours[-1 * i]\n",
" xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0)\n",
" xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0)\n",
" M = cv2.moments(max_contour)\n",
" cX = int(M[\"m10\"] / M[\"m00\"]) - 1\n",
" cY = int(M[\"m01\"] / M[\"m00\"]) - 1\n",
" #croped_im = np.zeros((27,15))\n",
" blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])]\n",
" #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob\n",
" #return (1, [croped_im])\n",
" lstBlob.append(blob)\n",
" lstCenter.append((cY, cX))\n",
" lstMin.append(xmax-xmin)\n",
" lstMax.append(ymax-ymin)\n",
" count = count + 1\n",
" return (count, lstBlob, lstCenter)\n",
" else:\n",
" return (0, [np.zeros((29, 19))], 0, 0)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# descides whether or not a normalization is neccessary\n",
"# and cuts or adds zeros\n",
"def normalize_blobs(blobs, new_len=50):\n",
" new_count = new_len - blobs.shape[0]\n",
" if new_count == 0:\n",
" return blobs\n",
" elif new_count > 0:\n",
" temp = np.array([np.zeros((27, 15))] * new_count)\n",
" return np.append(blobs, temp, axis=0)\n",
" else:\n",
" return blobs[0:new_len]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.48 s, sys: 236 ms, total: 1.71 s\n",
"Wall time: 1.71 s\n"
]
}
],
"source": [
"%%time\n",
"# normalizes all image sequences\n",
"df_lstm_norm = df_lstm.copy(deep=True)\n",
"new_blobs = []\n",
"for index, row in df_lstm.iterrows():\n",
" new_blobs.append(normalize_blobs(row.Blobs, 50))\n",
"\n",
"df_lstm_norm.Blobs = new_blobs\n",
"\n",
"lengths = []\n",
"for index, row in df_lstm_norm.iterrows():\n",
" lengths.append(row.Blobs.shape[0])\n",
"df_lstm_norm[\"BlobCount\"] = lengths"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 5043.0\n",
"mean 50.0\n",
"std 0.0\n",
"min 50.0\n",
"25% 50.0\n",
"50% 50.0\n",
"75% 50.0\n",
"max 50.0\n",
"Name: BlobCount, dtype: float64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_norm.BlobCount.describe()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"df_lstm_norm.to_pickle(\"DataStudyEvaluation/df_lstm_norm50.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>userID</th>\n",
" <th>TaskID</th>\n",
" <th>VersionID</th>\n",
" <th>Blobs</th>\n",
" <th>Length</th>\n",
" <th>BlobCount</th>\n",
" <th>GestureOnly</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>[[[0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0...</td>\n",
" <td>25</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>[[[0.0, 2.0, 191.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1...</td>\n",
" <td>12</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>[[[0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0...</td>\n",
" <td>14</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>[[[0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 3.0...</td>\n",
" <td>11</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>[[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0...</td>\n",
" <td>16</td>\n",
" <td>50</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" userID TaskID VersionID \\\n",
"0 1 0 2 \n",
"1 1 0 3 \n",
"2 1 0 4 \n",
"3 1 0 5 \n",
"4 1 0 6 \n",
"\n",
" Blobs Length BlobCount \\\n",
"0 [[[0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0... 25 50 \n",
"1 [[[0.0, 2.0, 191.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1... 12 50 \n",
"2 [[[0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0... 14 50 \n",
"3 [[[0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 3.0... 11 50 \n",
"4 [[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0... 16 50 \n",
"\n",
" GestureOnly \n",
"0 0 \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_norm.head()"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_lstm_norm.userID.unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}