knuckletouch/python/Step_09_LSTM_ReadData.ipynb

153 lines
3.7 KiB
Plaintext
Raw Permalink Normal View History

2019-08-07 23:57:12 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Filtering the data for the LSTM: removes all the rows, where we used the revert button, when the participant performed a wrong gesture\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
"from scipy.odr import *\n",
"from scipy.stats import *\n",
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"import ast\n",
"from multiprocessing import Pool, cpu_count\n",
"\n",
"import scipy\n",
"\n",
"from IPython import display\n",
"from matplotlib.patches import Rectangle\n",
"\n",
"from sklearn.metrics import mean_squared_error\n",
"import json\n",
"\n",
"import scipy.stats as st\n",
"from sklearn.metrics import r2_score\n",
"\n",
"\n",
"from matplotlib import cm\n",
"from mpl_toolkits.mplot3d import axes3d\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import copy\n",
"\n",
"from sklearn.model_selection import LeaveOneOut, LeavePOut\n",
"\n",
"from multiprocessing import Pool\n",
"import cv2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfAll = pd.read_pickle(\"DataStudyCollection/AllData.pkl\")\n",
"dfAll.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_actual = dfAll[(dfAll.Actual_Data == True) & (dfAll.Is_Pause == False)]\n",
"df_actual.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"all: %s, actual data: %s\" % (len(dfAll), len(df_actual)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# filter out all gestures, where the revert button was pressed during the study and the gestrue was repeated\n",
"def is_max(df):\n",
" df_temp = df.copy(deep=True)\n",
" max_version = df_temp.RepetitionID.max()\n",
" df_temp[\"IsMax\"] = np.where(df_temp.RepetitionID == max_version, True, False)\n",
" df_temp[\"MaxRepetition\"] = [max_version] * len(df_temp)\n",
" return df_temp\n",
"\n",
"df_filtered = df_actual.copy(deep=True)\n",
"df_grp = df_filtered.groupby([df_filtered.userID, df_filtered.TaskID, df_filtered.VersionID])\n",
"pool = Pool(cpu_count() - 1)\n",
"result_lst = pool.map(is_max, [grp for name, grp in df_grp])\n",
"df_filtered = pd.concat(result_lst)\n",
"df_filtered = df_filtered[df_filtered.IsMax == True]\n",
"pool.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_filtered.to_pickle(\"DataStudyCollection/df_lstm.pkl\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"actual: %s, filtered data: %s\" % (len(df_actual), len(df_filtered)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}