3.7 KiB
3.7 KiB
Filtering the data for the LSTM: removes all the rows, where we used the revert button, when the participant performed a wrong gesture¶
In [ ]:
%matplotlib inline from scipy.odr import * from scipy.stats import * import numpy as np import pandas as pd import os import time import matplotlib.pyplot as plt import ast from multiprocessing import Pool, cpu_count import scipy from IPython import display from matplotlib.patches import Rectangle from sklearn.metrics import mean_squared_error import json import scipy.stats as st from sklearn.metrics import r2_score from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import copy from sklearn.model_selection import LeaveOneOut, LeavePOut from multiprocessing import Pool import cv2
In [ ]:
dfAll = pd.read_pickle("DataStudyCollection/AllData.pkl") dfAll.head()
In [ ]:
df_actual = dfAll[(dfAll.Actual_Data == True) & (dfAll.Is_Pause == False)] df_actual.head()
In [ ]:
print("all: %s, actual data: %s" % (len(dfAll), len(df_actual)))
In [ ]:
%%time # filter out all gestures, where the revert button was pressed during the study and the gestrue was repeated def is_max(df): df_temp = df.copy(deep=True) max_version = df_temp.RepetitionID.max() df_temp["IsMax"] = np.where(df_temp.RepetitionID == max_version, True, False) df_temp["MaxRepetition"] = [max_version] * len(df_temp) return df_temp df_filtered = df_actual.copy(deep=True) df_grp = df_filtered.groupby([df_filtered.userID, df_filtered.TaskID, df_filtered.VersionID]) pool = Pool(cpu_count() - 1) result_lst = pool.map(is_max, [grp for name, grp in df_grp]) df_filtered = pd.concat(result_lst) df_filtered = df_filtered[df_filtered.IsMax == True] pool.close()
In [ ]:
df_filtered.to_pickle("DataStudyCollection/df_lstm.pkl")
In [ ]:
print("actual: %s, filtered data: %s" % (len(df_actual), len(df_filtered)))
In [ ]: