43 KiB
43 KiB
In [9]:
%matplotlib inline from scipy.odr import * from scipy.stats import * import numpy as np import pandas as pd import os import time import matplotlib.pyplot as plt import ast from multiprocessing import Pool, cpu_count import scipy from IPython import display from matplotlib.patches import Rectangle from sklearn.metrics import mean_squared_error import json import scipy.stats as st from sklearn.metrics import r2_score from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import copy from sklearn.model_selection import LeaveOneOut, LeavePOut from multiprocessing import Pool import cv2
In [10]:
dfAll = pd.read_pickle("DataStudyCollection/AllData.pkl") df = dfAll[(dfAll.Actual_Data == True) & (dfAll.Is_Pause == False)] df.head()
Out[10]:
userID | Timestamp | Current_Task | Task_amount | TaskID | VersionID | RepetitionID | Actual_Data | Is_Pause | Image | |
---|---|---|---|---|---|---|---|---|---|---|
7919 | 17 | 1547138928692 | 1 | 680 | 6 | 2 | 0 | True | False | [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... |
7920 | 17 | 1547138928735 | 1 | 680 | 6 | 2 | 0 | True | False | [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... |
7921 | 17 | 1547138928773 | 1 | 680 | 6 | 2 | 0 | True | False | [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... |
7922 | 17 | 1547138928813 | 1 | 680 | 6 | 2 | 0 | True | False | [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... |
7923 | 17 | 1547138928861 | 1 | 680 | 6 | 2 | 0 | True | False | [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... |
In [11]:
%%time def is_max(df): df_temp = df.copy(deep=True) max_version = df_temp.RepetitionID.max() df_temp["IsMax"] = np.where(df_temp.RepetitionID == max_version, True, False) df_temp["MaxRepetition"] = [max_version] * len(df_temp) return df_temp df_grp = df.groupby([df.userID, df.TaskID, df.VersionID]) pool = Pool(cpu_count() - 1) result_lst = pool.map(is_max, [grp for name, grp in df_grp]) df = pd.concat(result_lst) pool.close()
CPU times: user 39 s, sys: 5.78 s, total: 44.8 s Wall time: 43.3 s
In [12]:
df.Image = df.Image.apply(lambda x: x.reshape(27, 15)) df.Image = df.Image.apply(lambda x: x.clip(min=0, max=255)) df.Image = df.Image.apply(lambda x: x.astype(np.uint8)) df["ImageSum"] = df.Image.apply(lambda x: np.sum(x))
In [13]:
df.to_pickle("DataStudyCollection/dfFiltered.pkl")
In [14]:
print("recorded actual: %s, used data: %s" % (len(dfAll), len(df)))
recorded actual: 1010014, used data: 851455
In [15]:
df = pd.read_pickle("DataStudyCollection/dfFiltered.pkl")
In [16]:
df.head()
Out[16]:
userID | Timestamp | Current_Task | Task_amount | TaskID | VersionID | RepetitionID | Actual_Data | Is_Pause | Image | IsMax | MaxRepetition | ImageSum | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
291980 | 1 | 1,54515E+12 | 33 | 680 | 0 | 2 | 0 | True | False | [[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0]... | True | 0 | 307 |
291981 | 1 | 1,54515E+12 | 33 | 680 | 0 | 2 | 0 | True | False | [[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0]... | True | 0 | 222 |
291982 | 1 | 1,54515E+12 | 33 | 680 | 0 | 2 | 0 | True | False | [[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0]... | True | 0 | 521 |
291983 | 1 | 1,54515E+12 | 33 | 680 | 0 | 2 | 0 | True | False | [[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0]... | True | 0 | 318 |
291984 | 1 | 1,54515E+12 | 33 | 680 | 0 | 2 | 0 | True | False | [[0, 2, 0, 0, 0, 0, 1, 2, 2, 3, 2, 1, 1, 1, 0]... | True | 0 | 373 |
In [26]:
#Label if knuckle or finger def f(row): if row['TaskID'] < 17: #val = "Knuckle" val = 0 elif row['TaskID'] >= 17: #val = "Finger" val = 1 return val df['InputMethod'] = df.apply(f, axis=1) def f(row): if row['TaskID'] < 17: val = "Knuckle" elif row['TaskID'] >= 17: val = "Finger" return val df['Input'] = df.apply(f, axis=1)
In [17]:
#Svens new Blob detection def detect_blobs(image, task): #image = e.Image large = np.ones((29,17), dtype=np.uint8) large[1:28,1:16] = np.copy(image) temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255] lstBlob = [] lstMin = [] lstMax = [] count = 0 contours.sort(key=lambda a: cv2.contourArea(a)) if len(contours) > 0: # if two finger or knuckle cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1 for i in range(1, cont_count + 1): max_contour = contours[-1 * i] xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0) xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0) #croped_im = np.zeros((27,15)) blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])] #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob #return (1, [croped_im]) lstBlob.append(blob) lstMin.append(xmax-xmin) lstMax.append(ymax-ymin) count = count + 1 return (count, lstBlob, lstMin, lstMax) else: return (0, [np.zeros((29, 19))], 0, 0)
In [18]:
%%time pool = Pool(os.cpu_count()-2) temp_blobs = pool.starmap(detect_blobs, zip(df.Image, df.TaskID)) pool.close()
CPU times: user 11.9 s, sys: 7.51 s, total: 19.4 s Wall time: 18.6 s
In [19]:
df["BlobCount"] = [a[0] for a in temp_blobs] df["BlobImages"] = [a[1] for a in temp_blobs] df["BlobW"] = [a[2] for a in temp_blobs] df["BlobH"] = [a[3] for a in temp_blobs]
In [20]:
df.BlobCount.value_counts()
Out[20]:
0 710145 1 128117 2 13193 Name: BlobCount, dtype: int64
In [21]:
dfX = df[(df.BlobCount == 1)].copy(deep=True) dfX.BlobImages = dfX.BlobImages.apply(lambda x : x[0]) dfX.BlobW = dfX.BlobW.apply(lambda x : x[0]) dfX.BlobH = dfX.BlobH.apply(lambda x : x[0]) dfY = df[(df.BlobCount == 2)].copy(deep=True) dfY.BlobImages = dfY.BlobImages.apply(lambda x : x[0]) dfY.BlobW = dfY.BlobW.apply(lambda x : x[0]) dfY.BlobH = dfY.BlobH.apply(lambda x : x[0]) dfZ = df[(df.BlobCount == 2)].copy(deep=True) dfZ.BlobImages = dfZ.BlobImages.apply(lambda x : x[1]) dfZ.BlobW = dfZ.BlobW.apply(lambda x : x[1]) dfZ.BlobH = dfZ.BlobH.apply(lambda x : x[1]) df = dfX.append([dfY, dfZ])
In [22]:
print("Sample Size not Argumented:", len(df))
Sample Size not Argumented: 154503
In [23]:
df["BlobArea"] = df["BlobW"] * df["BlobH"]
In [24]:
df.BlobArea.describe().round(1)
Out[24]:
count 154503.0 mean 15.8 std 5.1 min 12.0 25% 12.0 50% 16.0 75% 16.0 max 110.0 Name: BlobArea, dtype: float64
In [27]:
df.groupby("Input").BlobArea.describe().round(1)
Out[27]:
count | mean | std | min | 25% | 50% | 75% | max | |
---|---|---|---|---|---|---|---|---|
Input | ||||||||
Finger | 110839.0 | 16.6 | 5.3 | 12.0 | 12.0 | 16.0 | 16.0 | 110.0 |
Knuckle | 43664.0 | 13.7 | 3.7 | 12.0 | 12.0 | 12.0 | 16.0 | 72.0 |
In [ ]:
df["BlobSum"] = df.BlobImages.apply(lambda x: np.sum(x))
In [ ]:
df.BlobSum.describe()
In [27]:
df.BlobSum.hist()
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fef38ed5908>
In [28]:
#Small / Blobs where the pixels are only a "little" hit dfX = df[df.BlobSum <= 255] len(dfX)
Out[28]:
0
In [29]:
#Augmenting by flipping in both axis (datax4) df["Version"] = "Normal" dfFlipped = df.copy(deep=True) dfFlipped.BlobImages = dfFlipped.BlobImages.apply(lambda x: np.flipud(x)) dfFlipped["Version"] = "FlippedUD" df = df.append(dfFlipped) dfFlipped = df.copy(deep=True) dfFlipped.BlobImages = dfFlipped.BlobImages.apply(lambda x: np.fliplr(x)) dfFlipped["Version"] = "FlippedLR" df = df.append(dfFlipped)
In [30]:
print("Sample Size argumented:", len(df))
Sample Size argumented: 618012
In [31]:
def pasteToEmpty (blob): croped_im = np.zeros((27,15)) croped_im[0:blob.shape[0],0:blob.shape[1]] = blob return croped_im
In [32]:
df["Blobs"] = df.BlobImages.apply(lambda x: pasteToEmpty(x))
In [34]:
df.to_pickle("DataStudyCollection/df_statistics.pkl")
In [35]:
df[["userID", "TaskID", "Version", "Blobs", "InputMethod"]].to_pickle("DataStudyCollection/df_blobs_area.pkl")
display blobs¶
In [36]:
plt.clf() plt.figure(figsize=(6, 6)) ax = plt.gca() data_point = 100 data = df.Blobs.iloc[data_point] print(df.iloc[data_point]) plt.imshow(data, cmap='gray', vmin=0, vmax=255) # Loop over data dimensions and create text annotations. for i in range(0, data.shape[0]): for j in range(0, data.shape[1]): text = ax.text(j, i, int(data[i, j]), ha="center", va="center", color="cyan", fontsize=1) plt.show()
userID 1 Timestamp 1,54515E+12 Current_Task 121 Task_amount 680 TaskID 0 VersionID 7 RepetitionID 0 Actual_Data True Is_Pause False Image [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]... IsMax True MaxRepetition 0 ImageSum 1495 BlobCount 1 BlobImages [[2, 2, 11, 11, 2], [2, 9, 40, 42, 9], [4, 13,... BlobW 3 BlobH 4 BlobArea 12 BlobSum 1071 Version Normal Blobs [[2.0, 2.0, 11.0, 11.0, 2.0, 0.0, 0.0, 0.0, 0.... InputMethod 0 Input Knuckle Name: 299548, dtype: object
<Figure size 432x288 with 0 Axes>