knuckletouch/python/Step_09_LSTM_ReadData.ipynb

3.7 KiB

Filtering the data for the LSTM: removes all the rows, where we used the revert button, when the participant performed a wrong gesture

In [ ]:
%matplotlib inline

from scipy.odr import *
from scipy.stats import *
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import ast
from multiprocessing import Pool, cpu_count

import scipy

from IPython import display
from matplotlib.patches import Rectangle

from sklearn.metrics import mean_squared_error
import json

import scipy.stats as st
from sklearn.metrics import r2_score


from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt

import copy

from sklearn.model_selection import LeaveOneOut, LeavePOut

from multiprocessing import Pool
import cv2
In [ ]:
dfAll = pd.read_pickle("DataStudyCollection/AllData.pkl")
dfAll.head()
In [ ]:
df_actual = dfAll[(dfAll.Actual_Data == True) & (dfAll.Is_Pause == False)]
df_actual.head()
In [ ]:
print("all: %s, actual data: %s" % (len(dfAll), len(df_actual)))
In [ ]:
%%time
# filter out all gestures, where the revert button was pressed during the study and the gestrue was repeated
def is_max(df):
    df_temp = df.copy(deep=True)
    max_version = df_temp.RepetitionID.max()
    df_temp["IsMax"] = np.where(df_temp.RepetitionID == max_version, True, False)
    df_temp["MaxRepetition"] = [max_version] * len(df_temp)
    return df_temp

df_filtered = df_actual.copy(deep=True)
df_grp = df_filtered.groupby([df_filtered.userID, df_filtered.TaskID, df_filtered.VersionID])
pool = Pool(cpu_count() - 1)
result_lst = pool.map(is_max, [grp for name, grp in df_grp])
df_filtered = pd.concat(result_lst)
df_filtered = df_filtered[df_filtered.IsMax == True]
pool.close()
In [ ]:
df_filtered.to_pickle("DataStudyCollection/df_lstm.pkl")
In [ ]:
print("actual: %s, filtered data: %s" % (len(df_actual), len(df_filtered)))
In [ ]: