knuckletouch/python/Step_10_LSTM_Preprocessing....

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preprocessing for LSTM: Blobdetection and Cutting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "from scipy.odr import *\n",
    "from scipy.stats import *\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import time\n",
    "import matplotlib.pyplot as plt\n",
    "import ast\n",
    "from multiprocessing import Pool, cpu_count\n",
    "\n",
    "import scipy\n",
    "\n",
    "from IPython import display\n",
    "from matplotlib.patches import Rectangle\n",
    "\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import json\n",
    "\n",
    "import scipy.stats as st\n",
    "from sklearn.metrics import r2_score\n",
    "\n",
    "\n",
    "from matplotlib import cm\n",
    "from mpl_toolkits.mplot3d import axes3d\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import copy\n",
    "\n",
    "from sklearn.model_selection import LeaveOneOut, LeavePOut\n",
    "\n",
    "from multiprocessing import Pool\n",
    "import cv2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered = pd.read_pickle(\"DataStudyCollection/df_lstm.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>Timestamp</th>\n",
       "      <th>Current_Task</th>\n",
       "      <th>Task_amount</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>RepetitionID</th>\n",
       "      <th>Actual_Data</th>\n",
       "      <th>Is_Pause</th>\n",
       "      <th>Image</th>\n",
       "      <th>IsMax</th>\n",
       "      <th>MaxRepetition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>291980</th>\n",
       "      <td>1</td>\n",
       "      <td>1,54515E+12</td>\n",
       "      <td>33</td>\n",
       "      <td>680</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291981</th>\n",
       "      <td>1</td>\n",
       "      <td>1,54515E+12</td>\n",
       "      <td>33</td>\n",
       "      <td>680</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291982</th>\n",
       "      <td>1</td>\n",
       "      <td>1,54515E+12</td>\n",
       "      <td>33</td>\n",
       "      <td>680</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291983</th>\n",
       "      <td>1</td>\n",
       "      <td>1,54515E+12</td>\n",
       "      <td>33</td>\n",
       "      <td>680</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291984</th>\n",
       "      <td>1</td>\n",
       "      <td>1,54515E+12</td>\n",
       "      <td>33</td>\n",
       "      <td>680</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...</td>\n",
       "      <td>True</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        userID    Timestamp  Current_Task  Task_amount  TaskID  VersionID  \\\n",
       "291980       1  1,54515E+12            33          680       0          2   \n",
       "291981       1  1,54515E+12            33          680       0          2   \n",
       "291982       1  1,54515E+12            33          680       0          2   \n",
       "291983       1  1,54515E+12            33          680       0          2   \n",
       "291984       1  1,54515E+12            33          680       0          2   \n",
       "\n",
       "        RepetitionID  Actual_Data  Is_Pause  \\\n",
       "291980             0         True     False   \n",
       "291981             0         True     False   \n",
       "291982             0         True     False   \n",
       "291983             0         True     False   \n",
       "291984             0         True     False   \n",
       "\n",
       "                                                    Image  IsMax  \\\n",
       "291980  [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...   True   \n",
       "291981  [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...   True   \n",
       "291982  [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...   True   \n",
       "291983  [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...   True   \n",
       "291984  [0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0, ...   True   \n",
       "\n",
       "        MaxRepetition  \n",
       "291980              0  \n",
       "291981              0  \n",
       "291982              0  \n",
       "291983              0  \n",
       "291984              0  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_filtered.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered.Image = df_filtered.Image.apply(lambda x: x.reshape(27, 15))\n",
    "df_filtered.Image = df_filtered.Image.apply(lambda x: x.clip(min=0, max=255))\n",
    "df_filtered.Image = df_filtered.Image.apply(lambda x: x.astype(np.uint8))\n",
    "df_filtered[\"ImageSum\"] = df_filtered.Image.apply(lambda x: np.sum(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#LSTMs new Blob detection (only detect, if there are blobs)\n",
    "def detect_blobs(image):\n",
    "    #image = image.reshape(27, 15)\n",
    "    large = np.ones((29,17), dtype=np.uint8)\n",
    "    large[1:28,1:16] = image\n",
    "    temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY)\n",
    "    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
    "    contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n",
    "    lstBlob  = []\n",
    "    lstMin = []\n",
    "    lstMax = []\n",
    "    count = 0\n",
    "    return len(contours) > 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.42 s, sys: 1.14 s, total: 4.57 s\n",
      "Wall time: 4.94 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "pool = Pool(cpu_count() - 1)\n",
    "temp_blobs = pool.map(detect_blobs, df_filtered.Image)\n",
    "pool.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered[\"ContainsBlobs\"] = temp_blobs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Label if knuckle or finger\n",
    "def f(row):\n",
    "    if row['TaskID'] < 17:\n",
    "        #val = \"Knuckle\"\n",
    "        val = 0\n",
    "    elif row['TaskID'] >= 17:\n",
    "        #val = \"Finger\"\n",
    "        val = 1\n",
    "    return val\n",
    "df_filtered['InputMethod'] = df_filtered.apply(f, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered.index = range(len(df_filtered))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "10\n",
      "11\n",
      "12\n",
      "13\n",
      "14\n",
      "15\n",
      "16\n",
      "17\n",
      "18\n",
      "CPU times: user 4min 7s, sys: 424 ms, total: 4min 8s\n",
      "Wall time: 4min 8s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# trim image sequences down to only between first and last detected blob\n",
    "UserIDs = []\n",
    "TaskIDs = []\n",
    "VersionIDs = []\n",
    "Blobs = []\n",
    "for userID in df_filtered.userID.unique():\n",
    "    print(userID)\n",
    "    for TaskID in df_filtered[df_filtered.userID == userID].TaskID.unique():\n",
    "        for VersionID in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID)].VersionID.unique():\n",
    "            first_blob = -1\n",
    "            last_blob = -1\n",
    "            for index, row in df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID)].iterrows():\n",
    "                if row.ContainsBlobs:\n",
    "                    last_blob = index\n",
    "                    if first_blob == -1:\n",
    "                        first_blob = index\n",
    "            if first_blob >= 0 and last_blob >= 0:\n",
    "                UserIDs.append(userID)\n",
    "                TaskIDs.append(TaskID)\n",
    "                VersionIDs.append(VersionID)\n",
    "                Blobs.append(df_filtered[(df_filtered.userID == userID) & (df_filtered.TaskID == TaskID) & (df_filtered.VersionID == VersionID) & (df_filtered.index >= first_blob) & (df_filtered.index <= last_blob)].Image.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "UserIDs = np.array(UserIDs, dtype=np.int64)\n",
    "TaskIDs = np.array(TaskIDs, dtype=np.int64)\n",
    "VersionIDs = np.array(VersionIDs, dtype=np.int64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>Blobs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userID  TaskID  VersionID  \\\n",
       "0       1       0          3   \n",
       "1       1       0          5   \n",
       "2       1       0          6   \n",
       "3       1       0          7   \n",
       "4       1       0          8   \n",
       "\n",
       "                                               Blobs  \n",
       "0  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...  \n",
       "1  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...  \n",
       "2  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,...  \n",
       "3  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...  \n",
       "4  [[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0...  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all = pd.DataFrame()\n",
    "df_lstm_all[\"userID\"] = UserIDs\n",
    "df_lstm_all[\"TaskID\"] = TaskIDs\n",
    "df_lstm_all[\"VersionID\"] = VersionIDs\n",
    "df_lstm_all[\"Blobs\"] = Blobs\n",
    "df_lstm_all.Blobs = df_lstm_all.Blobs.map(np.array)\n",
    "df_lstm_all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>Blobs</th>\n",
       "      <th>BlobCount</th>\n",
       "      <th>GestureOnly</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...</td>\n",
       "      <td>38</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...</td>\n",
       "      <td>57</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,...</td>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0...</td>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userID  TaskID  VersionID  \\\n",
       "0       1       0          3   \n",
       "1       1       0          5   \n",
       "2       1       0          6   \n",
       "3       1       0          7   \n",
       "4       1       0          8   \n",
       "\n",
       "                                               Blobs  BlobCount  GestureOnly  \n",
       "0  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...         38            0  \n",
       "1  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...         57            0  \n",
       "2  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 239,...         41            0  \n",
       "3  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...         20            0  \n",
       "4  [[[0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0...         41            0  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_lstm_all[\"Length\"] = df_lstm_all.Blobs.apply(lambda x: x.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f1a61641278>"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEP1JREFUeJzt3XuMXOV5x/HvE5ubSBpDiFaWbXVNsRQ5oiVoRYgSRVtQwEBVU4lEjlBxIkuWWqImElVrGqnkhgSVCE2iXOQGK06EApQkMgpU1AVGUf8AAuFiDCJsEkd45WAlBidLFNqlT/+Yd+l0s+ud2Z3dubzfj7TyOe9558zz7Fn7t+fMmXFkJpKk+ryp1wVIknrDAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVanWvCziRs846K0dHRxf9+FdffZXTTz+9ewX1yLD0AfbSj4alD7CXGY8//vgvM/PtC83r6wAYHR3lscceW/TjG40G4+Pj3SuoR4alD7CXfjQsfYC9zIiIn7czz0tAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUqb5+J/BKGd11b1vzDt10xTJXIkkrxzMASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkirVdgBExKqIeCIivl/WN0bEIxExERF3RsTJZfyUsj5Rto+27OP6Mv58RFza7WYkSe3r5Azg48BzLes3A7dm5jnAy8COMr4DeLmM31rmERGbgW3AO4EtwFciYtXSypckLVZbARAR64ErgK+X9QAuAu4uU/YCV5blrWWdsv3iMn8rcEdmvpaZPwMmgAu60YQkqXOr25z3z8DfAW8p628DXsnM6bJ+GFhXltcBLwJk5nREHC/z1wEPt+yz9TFviIidwE6AkZERGo1Gu738nqmpqbYef9250wvOAZZUy1K028cgsJf+Myx9gL10asEAiIg/A45m5uMRMb6s1QCZuRvYDTA2Npbj44t/ykajQTuP/8iue9va36GrF1/LUrTbxyCwl/4zLH2AvXSqnTOA9wJ/HhGXA6cCfwB8AVgTEavLWcB6YLLMnwQ2AIcjYjXwVuBXLeMzWh8jSVphC74GkJnXZ+b6zByl+SLug5l5NfAQcFWZth3YV5bvKeuU7Q9mZpbxbeUuoY3AJuDRrnUiSepIu68BzOXvgTsi4nPAE8BtZfw24FsRMQEcoxkaZObBiLgLeBaYBq7NzNeX8PySpCXoKAAyswE0yvJPmeMunsz8HfDBeR5/I3Bjp0VKkrrPdwJLUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqtbrXBSynA5PH+ciue3tdhiT1Jc8AJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkSi0YABFxakQ8GhFPRcTBiPh0Gd8YEY9ExERE3BkRJ5fxU8r6RNk+2rKv68v48xFx6XI1JUlaWDtnAK8BF2XmnwDnAVsi4kLgZuDWzDwHeBnYUebvAF4u47eWeUTEZmAb8E5gC/CViFjVzWYkSe1bMACyaaqsnlS+ErgIuLuM7wWuLMtbyzpl+8UREWX8jsx8LTN/BkwAF3SlC0lSx9r6OOjym/rjwDnAl4GfAK9k5nSZchhYV5bXAS8CZOZ0RBwH3lbGH27ZbetjWp9rJ7ATYGRkhEaj0VlHLUZOg+vOnV54YpuWUstSTE1N9ey5u81e+s+w9AH20qm2AiAzXwfOi4g1wPeAdyxXQZm5G9gNMDY2luPj44ve15du38ctB7r3Xx4cunrxtSxFo9FgKd+HfmIv/WdY+gB76VRHdwFl5ivAQ8B7gDURMfOv63pgsixPAhsAyva3Ar9qHZ/jMZKkFdbOXUBvL7/5ExGnAR8AnqMZBFeVaduBfWX5nrJO2f5gZmYZ31buEtoIbAIe7VYjkqTOtHN9ZC2wt7wO8Cbgrsz8fkQ8C9wREZ8DngBuK/NvA74VERPAMZp3/pCZByPiLuBZYBq4tlxakiT1wIIBkJlPA++aY/ynzHEXT2b+DvjgPPu6Ebix8zIlSd3mO4ElqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKLRgAEbEhIh6KiGcj4mBEfLyMnxkR+yPihfLnGWU8IuKLETEREU9HxPkt+9pe5r8QEduXry1J0kLaOQOYBq7LzM3AhcC1EbEZ2AU8kJmbgAfKOsBlwKbytRP4KjQDA7gBeDdwAXDDTGhIklbeggGQmUcy80dl+TfAc8A6YCuwt0zbC1xZlrcC38ymh4E1EbEWuBTYn5nHMvNlYD+wpavdSJLa1tFrABExCrwLeAQYycwjZdMvgJGyvA54seVhh8vYfOOSpB5Y3e7EiHgz8B3gE5n564h4Y1tmZkRkNwqKiJ00Lx0xMjJCo9FY9L5GToPrzp3uRlkAS6plKaampnr23N1mL/1nWPoAe+lUWwEQESfR/Mf/9sz8bhl+KSLWZuaRconnaBmfBDa0PHx9GZsExmeNN2Y/V2buBnYDjI2N5fj4+OwpbfvS7fu45UDbGbegQ1cvvpalaDQaLOX70E/spf8MSx9gL51q5y6gAG4DnsvMz7dsugeYuZNnO7CvZfyacjfQhcDxcqnofuCSiDijvPh7SRmTJPVAO78evxf4S+BARDxZxv4BuAm4KyJ2AD8HPlS23QdcDkwAvwU+CpCZxyLis8APy7zPZOaxrnQhSerYggGQmf8JxDybL55jfgLXzrOvPcCeTgqUJC0P3wksSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpbr3/yVWYHTXvW3NO3TTFctciSQtnWcAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmq1IIBEBF7IuJoRDzTMnZmROyPiBfKn2eU8YiIL0bEREQ8HRHntzxme5n/QkRsX552JEntaucM4BvAlllju4AHMnMT8EBZB7gM2FS+dgJfhWZgADcA7wYuAG6YCQ1JUm8sGACZ+QPg2KzhrcDesrwXuLJl/JvZ9DCwJiLWApcC+zPzWGa+DOzn90NFkrSCFvsawEhmHinLvwBGyvI64MWWeYfL2HzjkqQeWb3UHWRmRkR2oxiAiNhJ8/IRIyMjNBqNRe9r5DS47tzpLlXWvqXUPJepqamu77NX7KX/DEsfYC+dWmwAvBQRazPzSLnEc7SMTwIbWuatL2OTwPis8cZcO87M3cBugLGxsRwfH59rWlu+dPs+bjmw5Izr2KGrx7u6v0ajwVK+D/3EXvrPsPQB9tKpxV4CugeYuZNnO7CvZfyacjfQhcDxcqnofuCSiDijvPh7SRmTJPXIgr8eR8S3af72flZEHKZ5N89NwF0RsQP4OfChMv0+4HJgAvgt8FGAzDwWEZ8FfljmfSYzZ7+wLElaQQsGQGZ+eJ5NF88xN4Fr59nPHmBPR9VJkpaN7wSWpEoZAJJUKQNAkiplAEhSpQwASaqUASBJlTIAJKlSBoAkVcoAkKRKGQCSVCkDQJIqZQBIUqUMAEmqlAEgSZUyACSpUgaAJFXKAJCkShkAklQpA0CSKrXg/wmszo3uureteYduumKZK5Gk+XkGIEmVMgAkqVIGgCRVygCQpEoZAJJUKQNAkiplAEhSpQwASaqUbwTroXbfMPaNLacvcyWSauQZgCRVygCQpEoZAJJUKQNAkirli8AD4MDkcT7SxgvGfrqopE54BiBJlTIAJKlSK34JKCK2AF8AVgFfz8ybVrqGYdXu+wrAy0WSVvgMICJWAV8GLgM2Ax+OiM0rWYMkqWmlzwAuACYy86cAEXEHsBV4doXrqJ7/baWklQ6AdcCLLeuHgXevcA3qQCeXldrhx1pI/aPvbgONiJ3AzrI6FRHPL2F3ZwG/XHpVvfU3Q9IHwJ/ePDy9MDzHZVj6AHuZ8YftTFrpAJgENrSsry9jb8jM3cDubjxZRDyWmWPd2FcvDUsfYC/9aFj6AHvp1ErfBvpDYFNEbIyIk4FtwD0rXIMkiRU+A8jM6Yj4GHA/zdtA92TmwZWsQZLUtOKvAWTmfcB9K/R0XbmU1AeGpQ+wl340LH2AvXQkMnO5n0OS1If8KAhJqtRQBkBEbImI5yNiIiJ29bqeTkXEoYg4EBFPRsRjZezMiNgfES+UP8/odZ1ziYg9EXE0Ip5pGZuz9mj6YjlOT0fE+b2r/P+bp49PRcRkOS5PRsTlLduuL308HxGX9qbquUXEhoh4KCKejYiDEfHxMj5Qx+UEfQzccYmIUyPi0Yh4qvTy6TK+MSIeKTXfWW6WISJOKesTZftoVwrJzKH6ovni8k+As4GTgaeAzb2uq8MeDgFnzRr7J2BXWd4F3NzrOuep/f3A+cAzC9UOXA78GxDAhcAjva5/gT4+BfztHHM3l5+zU4CN5edvVa97aKlvLXB+WX4L8ONS80AdlxP0MXDHpXxv31yWTwIeKd/ru4BtZfxrwF+V5b8GvlaWtwF3dqOOYTwDeOPjJjLzv4CZj5sYdFuBvWV5L3BlD2uZV2b+ADg2a3i+2rcC38ymh4E1EbF2ZSo9sXn6mM9W4I7MfC0zfwZM0Pw57AuZeSQzf1SWfwM8R/Nd+QN1XE7Qx3z69riU7+1UWT2pfCVwEXB3GZ99TGaO1d3AxRERS61jGANgro+bONEPST9K4N8j4vHyzmiAkcw8UpZ/AYz0prRFma/2QTxWHyuXRfa0XIYbmD7KpYN30fyNc2CPy6w+YACPS0SsiogngaPAfppnKK9k5nSZ0lrvG72U7ceBty21hmEMgGHwvsw8n+anpl4bEe9v3ZjN88CBvH1rkGsHvgr8EXAecAS4pbfldCYi3gx8B/hEZv66ddsgHZc5+hjI45KZr2fmeTQ/EeEC4B0rXcMwBsCCHzfR7zJzsvx5FPgezR+Ol2ZOw8ufR3tXYcfmq32gjlVmvlT+0v4P8C/83+WEvu8jIk6i+Y/m7Zn53TI8cMdlrj4G+bgAZOYrwEPAe2hebpt5f1ZrvW/0Ura/FfjVUp97GANgoD9uIiJOj4i3zCwDlwDP0Oxhe5m2HdjXmwoXZb7a7wGuKXedXAgcb7kk0XdmXQf/C5rHBZp9bCt3amwENgGPrnR98ynXim8DnsvMz7dsGqjjMl8fg3hcIuLtEbGmLJ8GfIDmaxoPAVeVabOPycyxugp4sJy1LU2vXw1fji+adzH8mOY1tU/2up4Oaz+b5p0LTwEHZ+qneb3vAeAF4D+AM3td6zz1f5vmafh/07yGuWO+2mneCfHlcpwOAGO9rn+BPr5V6ny6/IVc2zL/k6WP54HLel3/rF7eR/PyztPAk+Xr8kE7LifoY+COC/DHwBOl5meAfyzjZ9MMqQngX4FTyvipZX2ibD+7G3X4TmBJqtQwXgKSJLXBAJCkShkAklQpA0CSKmUASFKlDABJqpQBIEmVMgAkqVL/C2+FhSKKT6n/AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_lstm_all.Length.hist(range=(0,300), bins=30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.02870949403069926"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_lstm_all[df_lstm_all.Length > 50]) / len(df_lstm_all)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    10554.0\n",
       "mean        15.9\n",
       "std         13.6\n",
       "min          1.0\n",
       "25%          8.0\n",
       "50%         13.0\n",
       "75%         19.0\n",
       "max        301.0\n",
       "Name: Length, dtype: float64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.Length.describe().round(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "lengths = []\n",
    "for index, row in df_lstm_all.iterrows():\n",
    "    lengths.append(row.Blobs.shape[0])\n",
    "df_lstm_all[\"BlobCount\"] = lengths\n",
    "# add a column for pure gesture recognition without finger/knuckle\n",
    "df_lstm_all[\"GestureOnly\"] = df_lstm_all.TaskID % 17"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    10554.000000\n",
       "mean        15.906576\n",
       "std         13.605214\n",
       "min          1.000000\n",
       "25%          8.000000\n",
       "50%         13.000000\n",
       "75%         19.000000\n",
       "max        301.000000\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GestureOnly</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11.421429</td>\n",
       "      <td>8.940925</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>13.618683</td>\n",
       "      <td>13.864708</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8.852596</td>\n",
       "      <td>6.315931</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8.672913</td>\n",
       "      <td>5.580500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9.828767</td>\n",
       "      <td>6.793559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>9.211221</td>\n",
       "      <td>6.861675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>14.622496</td>\n",
       "      <td>8.338379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>13.684524</td>\n",
       "      <td>13.263753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>20.397129</td>\n",
       "      <td>12.916920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>14.468599</td>\n",
       "      <td>10.042060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>14.921440</td>\n",
       "      <td>8.909217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>13.695578</td>\n",
       "      <td>7.661549</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>17.070853</td>\n",
       "      <td>11.755087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>15.712219</td>\n",
       "      <td>10.545010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>16.468354</td>\n",
       "      <td>9.826818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>19.840836</td>\n",
       "      <td>11.239255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>42.931624</td>\n",
       "      <td>21.024635</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  mean        std\n",
       "GestureOnly                      \n",
       "0            11.421429   8.940925\n",
       "1            13.618683  13.864708\n",
       "2             8.852596   6.315931\n",
       "3             8.672913   5.580500\n",
       "4             9.828767   6.793559\n",
       "5             9.211221   6.861675\n",
       "6            14.622496   8.338379\n",
       "7            13.684524  13.263753\n",
       "8            20.397129  12.916920\n",
       "9            14.468599  10.042060\n",
       "10           14.921440   8.909217\n",
       "11           13.695578   7.661549\n",
       "12           17.070853  11.755087\n",
       "13           15.712219  10.545010\n",
       "14           16.468354   9.826818\n",
       "15           19.840836  11.239255\n",
       "16           42.931624  21.024635"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_all.groupby(df_lstm_all.GestureOnly)[\"BlobCount\"].agg([\"mean\", \"std\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "before: 10554\n",
      "after: 9193\n",
      "ratio: 12.895584612469206\n"
     ]
    }
   ],
   "source": [
    "# filter on gesture lengths\n",
    "print(\"before: %s\" % len(df_lstm_all))\n",
    "df_lstm = df_lstm_all[(df_lstm_all.BlobCount <= 100) & (df_lstm_all.BlobCount >= 5)]\n",
    "print(\"after: %s\" % len(df_lstm))\n",
    "print(\"ratio: %s\" % ((len(df_lstm_all) - len(df_lstm)) / len(df_lstm_all) * 100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    9193.000000\n",
       "mean       17.678995\n",
       "std        12.059369\n",
       "min         5.000000\n",
       "25%        10.000000\n",
       "50%        15.000000\n",
       "75%        20.000000\n",
       "max        97.000000\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "lengths = []\n",
    "for index, row in df_lstm.iterrows():\n",
    "    lengths.append(row.Blobs.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "df_lstm[\"BlobCount\"] = lengths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    ""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    9193.000000\n",
       "mean       17.678995\n",
       "std        12.059369\n",
       "min         5.000000\n",
       "25%        10.000000\n",
       "50%        15.000000\n",
       "75%        20.000000\n",
       "max        97.000000\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def lerp(a, b, c=0.5):\n",
    "    return c * b + (1.0 - c) * a\n",
    "\n",
    "#Svens new Blob detection\n",
    "def detect_blobs_return_old(image, task):\n",
    "    #image = e.Image\n",
    "    large = np.ones((29,17), dtype=np.uint8)\n",
    "    large[1:28,1:16] = np.copy(image)\n",
    "    temp, thresh = cv2.threshold(cv2.bitwise_not(large), 205, 255, cv2.THRESH_BINARY)\n",
    "    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)\n",
    "    contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]\n",
    "    lstBlob  = []\n",
    "    lstCenter = []\n",
    "    lstMin = []\n",
    "    lstMax = []\n",
    "    count = 0\n",
    "    contours.sort(key=lambda a: cv2.contourArea(a))\n",
    "    if len(contours) > 0:\n",
    "        # if two finger or knuckle\n",
    "        cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1\n",
    "        for i in range(1, cont_count + 1):\n",
    "            max_contour = contours[-1 * i]\n",
    "            xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0)\n",
    "            xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0)\n",
    "            M = cv2.moments(max_contour)\n",
    "            cX = int(M[\"m10\"] / M[\"m00\"]) - 1\n",
    "            cY = int(M[\"m01\"] / M[\"m00\"]) - 1\n",
    "            #croped_im = np.zeros((27,15))\n",
    "            blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])]\n",
    "            #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob\n",
    "            #return (1, [croped_im])\n",
    "            lstBlob.append(blob)\n",
    "            lstCenter.append((cY, cX))\n",
    "            lstMin.append(xmax-xmin)\n",
    "            lstMax.append(ymax-ymin)\n",
    "            count = count + 1\n",
    "        return (count, lstBlob, lstCenter)\n",
    "    else:\n",
    "        return (0, [np.zeros((29, 19))], 0, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# descides whether or not a normalization is neccessary\n",
    "# and cuts or adds zeros\n",
    "def normalize_blobs(blobs, new_len=50):\n",
    "    new_count = new_len - blobs.shape[0]\n",
    "    if new_count == 0:\n",
    "        return blobs\n",
    "    elif new_count > 0:\n",
    "        temp = np.array([np.zeros((27, 15))] * new_count)\n",
    "        return np.append(blobs, temp, axis=0)\n",
    "    else:\n",
    "        return blobs[0:new_len]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.24 s, sys: 556 ms, total: 3.8 s\n",
      "Wall time: 3.8 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# normalizes all image sequences\n",
    "df_lstm_norm = df_lstm.copy(deep=True)\n",
    "new_blobs = []\n",
    "for index, row in df_lstm.iterrows():\n",
    "    new_blobs.append(normalize_blobs(row.Blobs, 50))\n",
    "\n",
    "df_lstm_norm.Blobs = new_blobs\n",
    "\n",
    "lengths = []\n",
    "for index, row in df_lstm_norm.iterrows():\n",
    "    lengths.append(row.Blobs.shape[0])\n",
    "df_lstm_norm[\"BlobCount\"] = lengths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    9193.0\n",
       "mean       50.0\n",
       "std         0.0\n",
       "min        50.0\n",
       "25%        50.0\n",
       "50%        50.0\n",
       "75%        50.0\n",
       "max        50.0\n",
       "Name: BlobCount, dtype: float64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_norm.BlobCount.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_lstm_norm.to_pickle(\"DataStudyCollection/df_lstm_norm50.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>TaskID</th>\n",
       "      <th>VersionID</th>\n",
       "      <th>Blobs</th>\n",
       "      <th>BlobCount</th>\n",
       "      <th>GestureOnly</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>[[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0...</td>\n",
       "      <td>50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userID  TaskID  VersionID  \\\n",
       "0       1       0          3   \n",
       "1       1       0          5   \n",
       "2       1       0          6   \n",
       "3       1       0          7   \n",
       "4       1       0          8   \n",
       "\n",
       "                                               Blobs  BlobCount  GestureOnly  \n",
       "0  [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...         50            0  \n",
       "1  [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...         50            0  \n",
       "2  [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...         50            0  \n",
       "3  [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...         50            0  \n",
       "4  [[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0...         50            0  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_lstm_norm.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "finished\n"
     ]
    }
   ],
   "source": [
    "print(\"finished\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}