mayersn 0fb6efadd5 Added notebooks and models

2019-08-07 17:57:12 -04:00

40 KiB

Raw Permalink Blame History

In [1]:

%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from multiprocessing import Pool, cpu_count

import copy

import cv2

In [2]:

dfAll = pd.read_pickle("DataStudyEvaluation/AllData.pkl")
df = dfAll[(dfAll.Actual_Data == True) & (dfAll.Is_Pause == False)]
df.head()

Out[2]:

	userID	Timestamp	Current_Task	Task_amount	TaskID	VersionID	Actual_Data	Is_Pause	Image
56454	12	1553865148939	1	510	17	2	True	False	[0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, ...
56455	12	1553865148981	1	510	17	2	True	False	[0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, ...
56456	12	1553865149021	1	510	17	2	True	False	[0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, ...
56457	12	1553865149060	1	510	17	2	True	False	[0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, ...
56458	12	1553865149099	1	510	17	2	True	False	[0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, ...

In [3]:

df.userID.unique()

Out[3]:

array([12,  5,  1, 10,  6,  3,  7,  8,  9, 11])

In [4]:

%%time
def is_max(df):
    df_temp = df.copy(deep=True)
    max_version = df_temp.RepetitionID.max()
    df_temp["IsMax"] = np.where(df_temp.RepetitionID == max_version, True, False)
    df_temp["MaxRepetition"] = [max_version] * len(df_temp)
    return df_temp

df_grp = df.groupby([df.userID, df.TaskID, df.VersionID])
pool = Pool(cpu_count() - 1)
result_lst = pool.map(is_max, [grp for name, grp in df_grp])
df = pd.concat(result_lst)
pool.close()

CPU times: user 12.2 s, sys: 2.34 s, total: 14.6 s
Wall time: 14.2 s

In [5]:

df.Image = df.Image.apply(lambda x: x.reshape(27, 15))
df.Image = df.Image.apply(lambda x: x.clip(min=0, max=255))
df.Image = df.Image.apply(lambda x: x.astype(np.uint8))
df["ImageSum"] = df.Image.apply(lambda x: np.sum(x))

In [6]:

df.to_pickle("DataStudyEvaluation/dfFiltered.pkl")

In [7]:

print("recorded actual: %s, used data: %s" % (len(dfAll), len(df)))

recorded actual: 608084, used data: 413500

In [8]:

df = pd.read_pickle("DataStudyEvaluation/dfFiltered.pkl")

In [9]:

df.head()

Out[9]:

	userID	Timestamp	Current_Task	Task_amount	VersionID	Actual_Data	Is_Pause	Image	IsMax	MaxRepetition	ImageSum
178160	1	1553521741802	16	510	2	True	False	[[0, 1, 1, 1, 0, 1, 0, 4, 1, 0, 2, 1, 1, 0, 1]...	False	1	286
178161	1	1553521741842	16	510	2	True	False	[[0, 1, 1, 1, 0, 1, 0, 4, 1, 0, 2, 1, 1, 0, 1]...	False	1	319
178162	1	1553521741882	16	510	2	True	False	[[0, 1, 1, 1, 0, 1, 0, 4, 1, 0, 2, 1, 1, 0, 1]...	False	1	72
178163	1	1553521741922	16	510	2	True	False	[[0, 1, 1, 1, 0, 1, 0, 4, 1, 0, 2, 1, 1, 0, 1]...	False	1	288
178164	1	1553521741990	16	510	2	True	False	[[0, 1, 1, 1, 0, 1, 0, 4, 1, 0, 2, 1, 1, 0, 1]...	False	1	308

In [10]:

#Label if knuckle or finger
def f(row):
    if row['TaskID'] < 17:
        #val = "Knuckle"
        val = 0
    elif row['TaskID'] >= 17:
        #val = "Finger"
        val = 1
    return val
df['InputMethod'] = df.apply(f, axis=1)

def f(row):
    if row['TaskID'] < 17:
        val = "Knuckle"
    elif row['TaskID'] >= 17:
        val = "Finger"
    return val
df['Input'] = df.apply(f, axis=1)

In [11]:

#Svens new Blob detection
def detect_blobs(image, task):
    #image = e.Image
    large = np.ones((29,17), dtype=np.uint8)
    large[1:28,1:16] = np.copy(image)
    temp, thresh = cv2.threshold(cv2.bitwise_not(large), 200, 255, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = [a for a in contours if cv2.contourArea(a) > 8 and cv2.contourArea(a) < 255]
    lstBlob  = []
    lstMin = []
    lstMax = []
    count = 0
    contours.sort(key=lambda a: cv2.contourArea(a))
    if len(contours) > 0:
        # if two finger or knuckle
        cont_count = 2 if task in [1, 6, 7, 18, 23, 24] and len(contours) > 1 else 1
        for i in range(1, cont_count + 1):
            max_contour = contours[-1 * i]
            xmax, ymax = np.max(max_contour.reshape(len(max_contour),2), axis=0)
            xmin, ymin = np.min(max_contour.reshape(len(max_contour),2), axis=0)
            #croped_im = np.zeros((27,15))
            blob = large[max(ymin - 1, 0):min(ymax + 1, large.shape[0]),max(xmin - 1, 0):min(xmax + 1, large.shape[1])]
            #croped_im[0:blob.shape[0],0:blob.shape[1]] = blob
            #return (1, [croped_im])
            lstBlob.append(blob)
            lstMin.append(xmax-xmin)
            lstMax.append(ymax-ymin)
            count = count + 1
        return (count, lstBlob, lstMin, lstMax)
    else:
        return (0, [np.zeros((29, 19))], 0, 0)

In [12]:

%%time
pool = Pool(os.cpu_count()-1)
temp_blobs = pool.starmap(detect_blobs, zip(df.Image, df.TaskID))
pool.close()

CPU times: user 5.65 s, sys: 4.52 s, total: 10.2 s
Wall time: 9.76 s

In [13]:

df["BlobCount"] = [a[0] for a in temp_blobs]
df["BlobImages"] = [a[1] for a in temp_blobs]
df["BlobW"] = [a[2] for a in temp_blobs]
df["BlobH"] = [a[3] for a in temp_blobs]

In [14]:

df.BlobCount.value_counts()

Out[14]:

0    334475
1     73449
2      5576
Name: BlobCount, dtype: int64

In [15]:

dfX = df[(df.BlobCount == 1)].copy(deep=True)
dfX.BlobImages = dfX.BlobImages.apply(lambda x : x[0])
dfX.BlobW = dfX.BlobW.apply(lambda x : x[0])
dfX.BlobH = dfX.BlobH.apply(lambda x : x[0])

dfY = df[(df.BlobCount == 2)].copy(deep=True)
dfY.BlobImages = dfY.BlobImages.apply(lambda x : x[0])
dfY.BlobW = dfY.BlobW.apply(lambda x : x[0])
dfY.BlobH = dfY.BlobH.apply(lambda x : x[0])

dfZ = df[(df.BlobCount == 2)].copy(deep=True)
dfZ.BlobImages = dfZ.BlobImages.apply(lambda x : x[1])
dfZ.BlobW = dfZ.BlobW.apply(lambda x : x[1])
dfZ.BlobH = dfZ.BlobH.apply(lambda x : x[1])

df = dfX.append([dfY, dfZ])

In [16]:

print("Sample Size not Argumented:", len(df))

Sample Size not Argumented: 84601

In [17]:

df["BlobArea"] = df["BlobW"] * df["BlobH"]

In [18]:

df.BlobArea.describe().round(1)

Out[18]:

count    84601.0
mean        16.5
std          5.9
min         12.0
25%         12.0
50%         16.0
75%         16.0
max         72.0
Name: BlobArea, dtype: float64

In [19]:

df.groupby("Input").BlobArea.describe().round(1)

Out[19]:

	count	mean	std	min	25%	50%	75%	max
Input
Finger	59879.0	17.3	5.8	12.0	12.0	16.0	20.0	56.0
Knuckle	24722.0	14.8	5.7	12.0	12.0	12.0	16.0	72.0

In [20]:

df["BlobSum"] = df.BlobImages.apply(lambda x: np.sum(x))

In [21]:

df.BlobSum.describe()

Out[21]:

count    84601.000000
mean      1238.209170
std        485.150602
min        467.000000
25%        930.000000
50%       1094.000000
75%       1383.000000
max       4275.000000
Name: BlobSum, dtype: float64

In [22]:

df.BlobSum.hist()

Out[22]:

<matplotlib.axes._subplots.AxesSubplot at 0x7ff3f0375b38>

In [23]:

#Small / Blobs where the pixels are only a "little" hit
dfX = df[df.BlobSum <= 255]
len(dfX)

Out[23]:

In [24]:

print("Sample Size argumented:", len(df))

Sample Size argumented: 84601

In [25]:

def pasteToEmpty (blob):
    croped_im = np.zeros((27,15))
    croped_im[0:blob.shape[0],0:blob.shape[1]] = blob
    return croped_im

In [26]:

df["Blobs"] = df.BlobImages.apply(lambda x: pasteToEmpty(x))

In [27]:

df.to_pickle("DataStudyEvaluation/df_statistics.pkl")

In [28]:

df[["userID", "TaskID", "Blobs", "InputMethod"]].to_pickle("DataStudyEvaluation/df_blobs_area.pkl")

display blobs¶

In [29]:

plt.clf()
plt.figure(figsize=(6, 6))
ax = plt.gca()
data_point = 100
data = df.Blobs.iloc[data_point]
print(df.iloc[data_point])
plt.imshow(data, cmap='gray', vmin=0, vmax=255)
# Loop over data dimensions and create text annotations.
for i in range(0, data.shape[0]):
    for j in range(0, data.shape[1]):
        text = ax.text(j, i, int(data[i, j]),
                       ha="center", va="center", color="cyan", fontsize=1)
plt.show()

userID                                                           1
Timestamp                                            1553522289862
Current_Task                                                   155
Task_amount                                                    510
TaskID                                                           0
VersionID                                                       11
RepetitionID                                                     0
Actual_Data                                                   True
Is_Pause                                                     False
Image            [[1, 1, 1, 0, 0, 0, 0, 2, 1, 2, 2, 0, 3, 0, 0]...
IsMax                                                         True
MaxRepetition                                                    0
ImageSum                                                       933
InputMethod                                                      0
Input                                                      Knuckle
BlobCount                                                        1
BlobImages       [[2, 2, 4, 5, 2], [1, 5, 11, 13, 5], [4, 9, 71...
BlobW                                                            3
BlobH                                                            4
BlobArea                                                        12
BlobSum                                                        710
Blobs            [[2.0, 2.0, 4.0, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0,...
Name: 191534, dtype: object

<Figure size 432x288 with 0 Axes>

40 KiB Raw Permalink Blame History

display blobs¶

40 KiB

Raw Permalink Blame History