{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preprocessing for LSTM: Blobdetection and Cutting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "from scipy.odr import *\n",
    "from scipy.stats import *\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import time\n",
    "import matplotlib.pyplot as plt\n",
    "import ast\n",
    "from multiprocessing import Pool, cpu_count\n",
    "\n",
    "import scipy\n",
    "\n",
    "from IPython import display\n",
    "from matplotlib.patches import Rectangle\n",
    "\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import json\n",
    "\n",
    "import scipy.stats as st\n",
    "from sklearn.metrics import r2_score\n",
    "\n",
    "\n",
    "from matplotlib import cm\n",
    "from mpl_toolkits.mplot3d import axes3d\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import copy\n",
    "\n",
    "from sklearn.model_selection import LeaveOneOut, LeavePOut\n",
    "\n",
    "from multiprocessing import Pool\n",
    "import cv2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered = pd.read_pickle(\"DataStudyEvaluation/df_lstm.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Current_Task</th>\n",
       "      <th>Task_amount</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>RepetitionID</th>\n",
       "      <th>Actual_Data</th>\n",
       "      <th>Is_Pause</th>\n",
       "      <th>Image</th>\n",
       "      <th>IsMax</th>\n",
       "      <th>MaxRepetition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>178293</th>\n",
       "      <td>1</td>\n",
       "      <td>1553521747262</td>\n",
       "      <td>16</td>\n",
       "      <td>510</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 1, 2, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178294</th>\n",
       "      <td>1</td>\n",
       "      <td>1553521747302</td>\n",
       "      <td>16</td>\n",
       "      <td>510</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178295</th>\n",
       "      <td>1</td>\n",
       "      <td>1553521747342</td>\n",
       "      <td>16</td>\n",
       "      <td>510</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178296</th>\n",
       "      <td>1</td>\n",
       "      <td>1553521747388</td>\n",
       "      <td>16</td>\n",
       "      <td>510</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178297</th>\n",
       "      <td>1</td>\n",
       "      <td>1553521747422</td>\n",
       "      <td>16</td>\n",
       "      <td>510</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        userID      Timestamp  Current_Task  Task_amount  TaskID  VersionID  \\\n",
       "178293       1  1553521747262            16          510       0          2   \n",
       "178294       1  1553521747302            16          510       0          2   \n",
       "178295       1  1553521747342            16          510       0          2   \n",
       "178296       1  1553521747388            16          510       0          2   \n",
       "178297       1  1553521747422            16          510       0          2   \n",
       "\n",
       "        RepetitionID  Actual_Data  Is_Pause  \\\n",
       "178293             1         True     False   \n",
       "178294             1         True     False   \n",
       "178295             1         True     False   \n",
       "178296             1         True     False   \n",
       "178297             1         True     False   \n",
       "\n",
       "                                                    Image  IsMax  \\\n",
       "178293  [0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 3, 1, 1, 1, 2, ...   True   \n",
       "178294  [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...   True   \n",
       "178295  [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...   True   \n",
       "178296  [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...   True   \n",
       "178297  [0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...   True   \n",
       "\n",
       "        MaxRepetition  \n",
       "178293              1  \n",
       "178294              1  \n",
       "178295              1  \n",
       "178296              1  \n",
       "178297              1  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_filtered.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_filtered.userID.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered.Image = df_filtered.Image.apply(lambda x: x.reshape(27, 15))\n",
    "df_filtered.Image = df_filtered.Image.apply(lambda x: x.clip(min=0, max=255))\n",
    "df_filtered.Image = df_filtered.Image.apply(lambda x: x.astype(np.uint8))\n",
    "df_filtered[\"ImageSum\"] = df_filtered.Image.apply(lambda x: np.sum(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#LSTMs new Blob detection (only detect, if there are blobs)\n",
    "def detect_blobs(image):\n",
    "    #image = image.reshape(27, 15)\n",
    "    large = np.ones((29,17), dtype=np.uint8)\n",
    "    large[1:28,1:16] = image\n",
    "    temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY)\n",
    "    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
    "    contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n",
    "    lstBlob  = []\n",
    "    lstMin = []\n",
    "    lstMax = []\n",
    "    count = 0\n",
    "    return len(contours) > 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.93 s, sys: 581 ms, total: 2.51 s\n",
      "Wall time: 2.71 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "pool = Pool(cpu_count() - 1)\n",
    "temp_blobs = pool.map(detect_blobs, df_filtered.Image)\n",
    "pool.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered[\"ContainsBlobs\"] = temp_blobs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Label if knuckle or finger\n",
    "def f(row):\n",
    "    if row['TaskID'] < 17:\n",
    "        #val = \"Knuckle\"\n",
    "        val = 0\n",
    "    elif row['TaskID'] >= 17:\n",
    "        #val = \"Finger\"\n",
    "        val = 1\n",
    "    return val\n",
    "df_filtered['InputMethod'] = df_filtered.apply(f, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Problem: some timestamps are strings (XXXXE+XXXX) which is not accurate enough, switching to index instead\n",
    "\"\"\"def cast_to_int(x):\n",
    "    if type(x) == int:\n",
    "        return x\n",
    "    x = str(x).replace(\",\", \".\")\n",
    "    return int(float(x))\n",
    "\n",
    "df_filtered.Timestamp = df_filtered.Timestamp.map(cast_to_int)\"\"\"\n",
    "df_filtered.index = range(len(df_filtered))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "10\n",
      "11\n",
      "12\n",
      "CPU times: user 1min 32s, sys: 60.2 ms, total: 1min 32s\n",
      "Wall time: 1min 32s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# trim image sequences down to only between first and last detected blob\n",
    "UserIDs = []\n",
    "TaskIDs = []\n",
    "VersionIDs = []\n",
    "Blobs = []\n",
    "for userID in df_filtered.userID.unique():\n",
    "    print(userID)\n",
    "    for TaskID in df_filtered[df_filtered.userID == userID].TaskID.unique():\n",
    "        for VersionID in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID)].VersionID.unique():\n",
    "            first_blob = -1\n",
    "            last_blob = -1\n",
    "            for index, row in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID)].iterrows():\n",
    "                if row.ContainsBlobs:\n",
    "                    last_blob = index\n",
    "                    if first_blob == -1:\n",
    "                        first_blob = index\n",
    "            if first_blob >= 0 and last_blob >= 0:\n",
    "                UserIDs.append(userID)\n",
    "                TaskIDs.append(TaskID)\n",
    "                VersionIDs.append(VersionID)\n",
    "                Blobs.append(df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID) & (df_filtered.index >= first_blob) & (df_filtered.index <= last_blob)].Image.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "UserIDs = np.array(UserIDs, dtype=np.int64)\n",
    "TaskIDs = np.array(TaskIDs, dtype=np.int64)\n",
    "VersionIDs = np.array(VersionIDs, dtype=np.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>Blobs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>[[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>[[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>[[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>[[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>[[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userID  TaskID  VersionID  \\\n",
       "0       1       0          2   \n",
       "1       1       0          3   \n",
       "2       1       0          4   \n",
       "3       1       0          5   \n",
       "4       1       0          6   \n",
       "\n",
       "                                               Blobs  \n",
       "0  [[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...  \n",
       "1  [[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...  \n",
       "2  [[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...  \n",
       "3  [[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...  \n",
       "4  [[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all = pd.DataFrame()\n",
    "df_lstm_all[\"userID\"] = UserIDs\n",
    "df_lstm_all[\"TaskID\"] = TaskIDs\n",
    "df_lstm_all[\"VersionID\"] = VersionIDs\n",
    "df_lstm_all[\"Blobs\"] = Blobs\n",
    "df_lstm_all.Blobs = df_lstm_all.Blobs.map(np.array)\n",
    "df_lstm_all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>Blobs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>[[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>[[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>[[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>[[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>[[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userID  TaskID  VersionID  \\\n",
       "0       1       0          2   \n",
       "1       1       0          3   \n",
       "2       1       0          4   \n",
       "3       1       0          5   \n",
       "4       1       0          6   \n",
       "\n",
       "                                               Blobs  \n",
       "0  [[[0, 1, 1, 2, 1, 1, 2, 0, 1, 2, 1, 0, 0, 0, 0...  \n",
       "1  [[[0, 2, 191, 0, 0, 1, 2, 0, 1, 1, 2, 0, 1, 0,...  \n",
       "2  [[[0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 0, 1, 0, 2...  \n",
       "3  [[[0, 1, 0, 2, 2, 0, 1, 0, 3, 1, 1, 0, 0, 0, 0...  \n",
       "4  [[[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0...  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_lstm_all[\"Length\"] = df_lstm_all.Blobs.apply(lambda x: x.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f563b2b4278>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEQRJREFUeJzt3W2MXGd5h/HrrhNCFaPYqenKcqw6qSxVLmlTs0pSFaF1oyZO8sFBQihRRByaylWbqCBRCVPUhkKRTFWompaGmsbCaSkm5UWxEtPUdbOK+BCITY3tJA3eglOyMrbAwbCAaEPvfphnw3S7k52d2Z235/pJoznznGfO3LfPev8+Z86MIzORJNXnp/pdgCSpPwwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUu6HcBr2TNmjW5YcOGjp///e9/n4svvnjpCuqTUekD7GVQjUovo9IHdNfLkSNHvpWZr11o3kAHwIYNGzh8+HDHz5+cnGRiYmLpCuqTUekD7GVQjUovo9IHdNdLRDzfzjxPAUlSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUG+pPAvbJh56NtzTu16+ZlrkSSescjAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVasEAiIj1EfF4RDwTEU9HxNvL+KURcTAiTpb71WU8IuK+iJiKiGMRsblpW9vL/JMRsX352pIkLaSdI4CXgHdm5ibgWuDuiNgE7AQOZeZG4FB5DHAjsLHcdgD3QyMwgHuBa4CrgXtnQ0OS1HsLBkBmns7ML5fl7wHPAuuAbcDeMm0vcEtZ3gY8mA1PAqsiYi1wA3AwM89l5ovAQWDrknYjSWpbZGb7kyM2AE8ArwP+MzNXlfEAXszMVRHxCLArM79Q1h0C3gVMAK/OzD8p438I/DAz/2zOa+ygceTA2NjY6/ft29dxczMzM6xcuXLBecenz7e1vSvXXdJxLd1ot49hYC+DaVR6GZU+oLtetmzZciQzxxea1/Z/CBMRK4HPAO/IzO82fuc3ZGZGRPtJ8goyczewG2B8fDwnJiY63tbk5CTtPP/Odv9DmNs7r6Ub7fYxDOxlMI1KL6PSB/Sml7auAoqIC2n88v9EZn62DJ8pp3Yo92fL+DSwvunpl5WxVuOSpD5o5yqgAB4Ans3MDzet2g/MXsmzHXi4afyOcjXQtcD5zDwNPAZcHxGry5u/15cxSVIftHMK6NeAtwLHI+JoGfsDYBfwUETcBTwPvKWsOwDcBEwBPwDeBpCZ5yLi/cBTZd77MvPcknQhSVq0BQOgvJkbLVZfN8/8BO5usa09wJ7FFChJWh5+EliSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVWrBAIiIPRFxNiJONI29NyKmI+Joud3UtO7dETEVEc9FxA1N41vL2FRE7Fz6ViRJi9HOEcDHga3zjP95Zl5VbgcAImITcCvwi+U5fx0RKyJiBfAR4EZgE3BbmStJ6pMLFpqQmU9ExIY2t7cN2JeZPwK+HhFTwNVl3VRmfg0gIvaVuc8sumJJ0pLo5j2AeyLiWDlFtLqMrQO+0TTnhTLWalyS1CeRmQtPahwBPJKZryuPx4BvAQm8H1ibmb8ZEX8FPJmZf1/mPQB8vmxma2b+Vhl/K3BNZt4zz2vtAHYAjI2NvX7fvn0dNzczM8PKlSsXnHd8+nxb27ty3SUd19KNdvsYBvYymEall1HpA7rrZcuWLUcyc3yheQueAppPZp6ZXY6IjwGPlIfTwPqmqZeVMV5hfO62dwO7AcbHx3NiYqKTEgGYnJykneffufPRtrZ36vbOa+lGu30MA3sZTKPSy6j0Ab3ppaNTQBGxtunhm4DZK4T2A7dGxEURcTmwEfgS8BSwMSIuj4hX0XijeH/nZUuSurXgEUBEfBKYANZExAvAvcBERFxF4xTQKeC3ATLz6Yh4iMabuy8Bd2fmj8t27gEeA1YAezLz6SXvRpLUtnauArptnuEHXmH+B4APzDN+ADiwqOoGzIZ2TxXtunmZK5Gk7vlJYEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiq1YABExJ6IOBsRJ5rGLo2IgxFxstyvLuMREfdFxFREHIuIzU3P2V7mn4yI7cvTjiSpXe0cAXwc2DpnbCdwKDM3AofKY4AbgY3ltgO4HxqBAdwLXANcDdw7GxqSpP5YMAAy8wng3JzhbcDesrwXuKVp/MFseBJYFRFrgRuAg5l5LjNfBA7y/0NFktRDnb4HMJaZp8vyN4GxsrwO+EbTvBfKWKtxSVKfXNDtBjIzIyKXohiAiNhB4/QRY2NjTE5OdrytmZmZtp7/zitf6vg15tNNzfNpt49hYC+DaVR6GZU+oDe9dBoAZyJibWaeLqd4zpbxaWB907zLytg0MDFnfHK+DWfmbmA3wPj4eE5MTMw3rS2Tk5O08/w7dz7a8WvM59TtC7/mYrTbxzCwl8E0Kr2MSh/Qm146PQW0H5i9kmc78HDT+B3laqBrgfPlVNFjwPURsbq8+Xt9GZMk9cmCRwAR8Uka/3pfExEv0LiaZxfwUETcBTwPvKVMPwDcBEwBPwDeBpCZ5yLi/cBTZd77MnPuG8uSpB5aMAAy87YWq66bZ24Cd7fYzh5gz6KqkyQtGz8JLEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJler620AH2fHp80v+RW+SNCo8ApCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqlRXARARpyLieEQcjYjDZezSiDgYESfL/eoyHhFxX0RMRcSxiNi8FA1IkjqzFEcAWzLzqswcL493AocycyNwqDwGuBHYWG47gPuX4LUlSR1ajlNA24C9ZXkvcEvT+IPZ8CSwKiLWLsPrS5LaEJnZ+ZMjvg68CCTwN5m5OyK+k5mryvoAXszMVRHxCLArM79Q1h0C3pWZh+dscweNIwTGxsZev2/fvo7rO3vuPGd+2PHTO3blukuWdHszMzOsXLlySbfZL/YymEall1HpA7rrZcuWLUeazsq0dEFHW/+JN2TmdET8LHAwIv69eWVmZkQsKmEyczewG2B8fDwnJiY6Lu4vP/EwHzrebYuLd+r2iSXd3uTkJN38OQwSexlMo9LLqPQBvemlq9+OmTld7s9GxOeAq4EzEbE2M0+XUzxny/RpYH3T0y8rYyNnw85H25p3atfNy1yJJLXW8XsAEXFxRLxmdhm4HjgB7Ae2l2nbgYfL8n7gjnI10LXA+cw83XHlkqSudHMEMAZ8rnGanwuAf8jMf4qIp4CHIuIu4HngLWX+AeAmYAr4AfC2Ll5bktSljgMgM78G/PI8498GrptnPIG7O309SdLS8pPAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZW6oN8F1GzDzkfbmvfxrRcvcyWSauQRgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUl4EOgePT57mzzUtGT+26eZmrkTQqPAKQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlfIy0BHT7jeMermoJI8AJKlSBoAkVarnp4AiYivwF8AK4G8zc1eva5CniiT1+AggIlYAHwFuBDYBt0XEpl7WIElq6PURwNXAVGZ+DSAi9gHbgGd6XIfa1O6RQrv87y2lwdHrAFgHfKPp8QvANT2uQUPAU1TS8hu4y0AjYgewozyciYjnutjcGuBb3VfVX783In0AbPng0vYSH1yqLXVkZPYLo9PLqPQB3fXyc+1M6nUATAPrmx5fVsZelpm7gd1L8WIRcTgzx5diW/00Kn2AvQyqUellVPqA3vTS68tAnwI2RsTlEfEq4FZgf49rkCTR4yOAzHwpIu4BHqNxGeiezHy6lzVIkhp6/h5AZh4ADvTo5ZbkVNIAGJU+wF4G1aj0Mip9QA96icxc7teQJA0gvwpCkio1kgEQEVsj4rmImIqInf2uZ7Ei4lREHI+IoxFxuIxdGhEHI+JkuV/d7zrnExF7IuJsRJxoGpu39mi4r+ynYxGxuX+V/18t+nhvREyX/XI0Im5qWvfu0sdzEXFDf6qeX0Ssj4jHI+KZiHg6It5exodxv7TqZaj2TUS8OiK+FBFfKX38cRm/PCK+WOr9VLlYhoi4qDyeKus3LEkhmTlSNxpvLv8HcAXwKuArwKZ+17XIHk4Ba+aM/SmwsyzvBD7Y7zpb1P5GYDNwYqHagZuAzwMBXAt8sd/1L9DHe4Hfn2fupvJzdhFwefn5W9HvHprqWwtsLsuvAb5aah7G/dKql6HaN+XPdmVZvhD4Yvmzfgi4tYx/FPidsvy7wEfL8q3Ap5aijlE8Anj56yYy87+A2a+bGHbbgL1leS9wSx9raSkznwDOzRluVfs24MFseBJYFRFre1PpK2vRRyvbgH2Z+aPM/DowRePncCBk5unM/HJZ/h7wLI1P5Q/jfmnVSysDuW/Kn+1MeXhhuSXw68Cny/jcfTK7rz4NXBcR0W0doxgA833dxCv9gAyiBP45Io6UT0YDjGXm6bL8TWCsP6V1pFXtw7iv7imnRfY0nYYbmj7KqYNfofEvzqHeL3N6gSHbNxGxIiKOAmeBgzSOTr6TmS+VKc21vtxHWX8e+JluaxjFABgFb8jMzTS+NfXuiHhj88psHAcO5eVbw1w7cD/w88BVwGngQ/0tZ3EiYiXwGeAdmfnd5nXDtl/m6WXo9k1m/jgzr6LxjQhXA7/Q6xpGMQAW/LqJQZeZ0+X+LPA5Gj8cZ2YPw8v92f5VuGitah+qfZWZZ8pf2v8BPsZPTiUMfB8RcSGNX5ifyMzPluGh3C/z9TLM+yYzvwM8DvwqjdNts5/Paq715T7K+kuAb3f72qMYAEP9dRMRcXFEvGZ2GbgeOEGjh+1l2nbg4f5U2JFWte8H7ihXnVwLnG86JTFw5pwHfxON/QKNPm4tV2pcDmwEvtTr+lop54ofAJ7NzA83rRq6/dKql2HbNxHx2ohYVZZ/GvgNGu9nPA68uUybu09m99WbgX8tR23d6fe74ctxo3EVw1dpnFN7T7/rWWTtV9C4auErwNOz9dM433cIOAn8C3Bpv2ttUf8naRyC/zeNc5h3taqdxpUQHyn76Tgw3u/6F+jj70qdx8pfyLVN899T+ngOuLHf9c/p5Q00Tu8cA46W201Dul9a9TJU+wb4JeDfSr0ngD8q41fQCKgp4B+Bi8r4q8vjqbL+iqWow08CS1KlRvEUkCSpDQaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmV+l9aEnWcfWzNswAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_lstm_all.Length.hist(range=(0,300), bins=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.05110421609782807"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_lstm_all[df_lstm_all.Length > 50]) / len(df_lstm_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    5479.0\n",
       "mean       21.2\n",
       "std        15.5\n",
       "min         1.0\n",
       "25%        13.0\n",
       "50%        18.0\n",
       "75%        26.0\n",
       "max       251.0\n",
       "Name: Length, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.Length.describe().round(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "lengths = []\n",
    "for index, row in df_lstm_all.iterrows():\n",
    "    lengths.append(row.Blobs.shape[0])\n",
    "df_lstm_all[\"BlobCount\"] = lengths\n",
    "# add a column for pure gesture recognition without finger/knuckle\n",
    "df_lstm_all[\"GestureOnly\"] = df_lstm_all.TaskID % 17"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    5479.000000\n",
       "mean       21.239460\n",
       "std        15.541015\n",
       "min         1.000000\n",
       "25%        13.000000\n",
       "50%        18.000000\n",
       "75%        26.000000\n",
       "max       251.000000\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GestureOnly</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19.000000</td>\n",
       "      <td>15.626834</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18.387387</td>\n",
       "      <td>12.717864</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13.418006</td>\n",
       "      <td>11.615571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>13.192182</td>\n",
       "      <td>10.096861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13.439344</td>\n",
       "      <td>9.632580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>13.157407</td>\n",
       "      <td>9.709874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>17.198813</td>\n",
       "      <td>8.017407</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>18.627841</td>\n",
       "      <td>7.706187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>30.096463</td>\n",
       "      <td>14.204850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>19.134375</td>\n",
       "      <td>8.786072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>20.289308</td>\n",
       "      <td>10.179677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>19.311526</td>\n",
       "      <td>20.655269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>20.683230</td>\n",
       "      <td>10.314326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>20.357367</td>\n",
       "      <td>9.820602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>21.581538</td>\n",
       "      <td>11.342465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>27.737654</td>\n",
       "      <td>13.548982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>51.783708</td>\n",
       "      <td>19.654648</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  mean        std\n",
       "GestureOnly                      \n",
       "0            19.000000  15.626834\n",
       "1            18.387387  12.717864\n",
       "2            13.418006  11.615571\n",
       "3            13.192182  10.096861\n",
       "4            13.439344   9.632580\n",
       "5            13.157407   9.709874\n",
       "6            17.198813   8.017407\n",
       "7            18.627841   7.706187\n",
       "8            30.096463  14.204850\n",
       "9            19.134375   8.786072\n",
       "10           20.289308  10.179677\n",
       "11           19.311526  20.655269\n",
       "12           20.683230  10.314326\n",
       "13           20.357367   9.820602\n",
       "14           21.581538  11.342465\n",
       "15           27.737654  13.548982\n",
       "16           51.783708  19.654648"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.groupby(df_lstm_all.GestureOnly)[\"BlobCount\"].agg([\"mean\", \"std\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "before: 5479\n",
      "after: 5043\n",
      "ratio: 7.957656506661799\n"
     ]
    }
   ],
   "source": [
    "# filter on gesture lengths\n",
    "print(\"before: %s\" % len(df_lstm_all))\n",
    "df_lstm = df_lstm_all[(df_lstm_all.BlobCount <= 100) & (df_lstm_all.BlobCount >= 5)]\n",
    "print(\"after: %s\" % len(df_lstm))\n",
    "print(\"ratio: %s\" % ((len(df_lstm_all) - len(df_lstm)) / len(df_lstm_all) * 100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    5043.000000\n",
       "mean       22.512195\n",
       "std        13.430134\n",
       "min         5.000000\n",
       "25%        14.000000\n",
       "50%        19.000000\n",
       "75%        27.000000\n",
       "max        99.000000\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "lengths = []\n",
    "for index, row in df_lstm.iterrows():\n",
    "    lengths.append(row.Blobs.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "df_lstm[\"BlobCount\"] = lengths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    5043.000000\n",
       "mean       22.512195\n",
       "std        13.430134\n",
       "min         5.000000\n",
       "25%        14.000000\n",
       "50%        19.000000\n",
       "75%        27.000000\n",
       "max        99.000000\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lerp(a, b, c=0.5):\n",
    "    return c * b + (1.0 - c) * a\n",
    "\n",
    "#Svens new Blob detection\n",
    "def detect_blobs_return_old(image, task):\n",
    "    #image = e.Image\n",
    "    large = np.ones((29,17), dtype=np.uint8)\n",
    "    large[1:28,1:16] = np.copy(image)\n",
    "    temp, thresh = cv2.threshold(cv2.bitwise_not(large), 205, 255, cv2.THRESH_BINARY)\n",
    "    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
    "    contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n",
    "    lstBlob  = []\n",
    "    lstCenter = []\n",
    "    lstMin = []\n",
    "    lstMax = []\n",
    "    count = 0\n",
    "    contours.sort(key=lambda a: cv2.contourArea(a))\n",
    "    if len(contours) > 0:\n",
    "        # if two finger or knuckle\n",
    "        cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1\n",
    "        for i in range(1, cont_count + 1):\n",
    "            max_contour = contours[-1 * i]\n",
    "            xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0)\n",
    "            xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0)\n",
    "            M = cv2.moments(max_contour)\n",
    "            cX = int(M[\"m10\"] / M[\"m00\"]) - 1\n",
    "            cY = int(M[\"m01\"] / M[\"m00\"]) - 1\n",
    "            #croped_im = np.zeros((27,15))\n",
    "            blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])]\n",
    "            #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob\n",
    "            #return (1, [croped_im])\n",
    "            lstBlob.append(blob)\n",
    "            lstCenter.append((cY, cX))\n",
    "            lstMin.append(xmax-xmin)\n",
    "            lstMax.append(ymax-ymin)\n",
    "            count = count + 1\n",
    "        return (count, lstBlob, lstCenter)\n",
    "    else:\n",
    "        return (0, [np.zeros((29, 19))], 0, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# descides whether or not a normalization is neccessary\n",
    "# and cuts or adds zeros\n",
    "def normalize_blobs(blobs, new_len=50):\n",
    "    new_count = new_len - blobs.shape[0]\n",
    "    if new_count == 0:\n",
    "        return blobs\n",
    "    elif new_count > 0:\n",
    "        temp = np.array([np.zeros((27, 15))] * new_count)\n",
    "        return np.append(blobs, temp, axis=0)\n",
    "    else:\n",
    "        return blobs[0:new_len]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.48 s, sys: 236 ms, total: 1.71 s\n",
      "Wall time: 1.71 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# normalizes all image sequences\n",
    "df_lstm_norm = df_lstm.copy(deep=True)\n",
    "new_blobs = []\n",
    "for index, row in df_lstm.iterrows():\n",
    "    new_blobs.append(normalize_blobs(row.Blobs, 50))\n",
    "\n",
    "df_lstm_norm.Blobs = new_blobs\n",
    "\n",
    "lengths = []\n",
    "for index, row in df_lstm_norm.iterrows():\n",
    "    lengths.append(row.Blobs.shape[0])\n",
    "df_lstm_norm[\"BlobCount\"] = lengths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    5043.0\n",
       "mean       50.0\n",
       "std         0.0\n",
       "min        50.0\n",
       "25%        50.0\n",
       "50%        50.0\n",
       "75%        50.0\n",
       "max        50.0\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_norm.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_lstm_norm.to_pickle(\"DataStudyEvaluation/df_lstm_norm50.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>Blobs</th>\n",
       "      <th>Length</th>\n",
       "      <th>BlobCount</th>\n",
       "      <th>GestureOnly</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>[[[0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0...</td>\n",
       "      <td>25</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>[[[0.0, 2.0, 191.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1...</td>\n",
       "      <td>12</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>[[[0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0...</td>\n",
       "      <td>14</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>[[[0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 3.0...</td>\n",
       "      <td>11</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>[[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0...</td>\n",
       "      <td>16</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userID  TaskID  VersionID  \\\n",
       "0       1       0          2   \n",
       "1       1       0          3   \n",
       "2       1       0          4   \n",
       "3       1       0          5   \n",
       "4       1       0          6   \n",
       "\n",
       "                                               Blobs  Length  BlobCount  \\\n",
       "0  [[[0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 0.0, 1.0...      25         50   \n",
       "1  [[[0.0, 2.0, 191.0, 0.0, 0.0, 1.0, 2.0, 0.0, 1...      12         50   \n",
       "2  [[[0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 2.0, 1.0, 0.0...      14         50   \n",
       "3  [[[0.0, 1.0, 0.0, 2.0, 2.0, 0.0, 1.0, 0.0, 3.0...      11         50   \n",
       "4  [[[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0...      16         50   \n",
       "\n",
       "   GestureOnly  \n",
       "0            0  \n",
       "1            0  \n",
       "2            0  \n",
       "3            0  \n",
       "4            0  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_norm.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_norm.userID.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}