{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Filtering the data for the LSTM: removes all the rows, where we used the revert button, when the participant performed a wrong gesture\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "from scipy.odr import *\n",
    "from scipy.stats import *\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import time\n",
    "import matplotlib.pyplot as plt\n",
    "import ast\n",
    "from multiprocessing import Pool, cpu_count\n",
    "\n",
    "import scipy\n",
    "\n",
    "from IPython import display\n",
    "from matplotlib.patches import Rectangle\n",
    "\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import json\n",
    "\n",
    "import scipy.stats as st\n",
    "from sklearn.metrics import r2_score\n",
    "\n",
    "\n",
    "from matplotlib import cm\n",
    "from mpl_toolkits.mplot3d import axes3d\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import copy\n",
    "\n",
    "from sklearn.model_selection import LeaveOneOut, LeavePOut\n",
    "\n",
    "from multiprocessing import Pool\n",
    "import cv2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfAll = pd.read_pickle(\"DataStudyCollection/AllData.pkl\")\n",
    "dfAll.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_actual = dfAll[(dfAll.Actual_Data == True) & (dfAll.Is_Pause == False)]\n",
    "df_actual.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"all: %s, actual data: %s\" % (len(dfAll), len(df_actual)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "# filter out all gestures, where the revert button was pressed during the study and the gestrue was repeated\n",
    "def is_max(df):\n",
    "    df_temp = df.copy(deep=True)\n",
    "    max_version = df_temp.RepetitionID.max()\n",
    "    df_temp[\"IsMax\"] = np.where(df_temp.RepetitionID == max_version, True, False)\n",
    "    df_temp[\"MaxRepetition\"] = [max_version] * len(df_temp)\n",
    "    return df_temp\n",
    "\n",
    "df_filtered = df_actual.copy(deep=True)\n",
    "df_grp = df_filtered.groupby([df_filtered.userID, df_filtered.TaskID, df_filtered.VersionID])\n",
    "pool = Pool(cpu_count() - 1)\n",
    "result_lst = pool.map(is_max, [grp for name, grp in df_grp])\n",
    "df_filtered = pd.concat(result_lst)\n",
    "df_filtered = df_filtered[df_filtered.IsMax == True]\n",
    "pool.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_filtered.to_pickle(\"DataStudyCollection/df_lstm.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"actual: %s, filtered data: %s\" % (len(df_actual), len(df_filtered)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}