891 lines
32 KiB
Text
891 lines
32 KiB
Text
![]() |
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"## USE for Multi GPU Systems\n",
|
|||
|
"#import os\n",
|
|||
|
"#os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"\n",
|
|||
|
"\n",
|
|||
|
"%matplotlib inline\n",
|
|||
|
"\n",
|
|||
|
"from scipy.odr import *\n",
|
|||
|
"from scipy.stats import *\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import os\n",
|
|||
|
"import time\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import ast\n",
|
|||
|
"from multiprocessing import Pool\n",
|
|||
|
"\n",
|
|||
|
"import scipy\n",
|
|||
|
"\n",
|
|||
|
"from IPython import display\n",
|
|||
|
"from matplotlib.patches import Rectangle\n",
|
|||
|
"\n",
|
|||
|
"from sklearn.metrics import mean_squared_error\n",
|
|||
|
"import json\n",
|
|||
|
"\n",
|
|||
|
"import scipy.stats as st\n",
|
|||
|
"from sklearn.metrics import r2_score\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"from matplotlib import cm\n",
|
|||
|
"from mpl_toolkits.mplot3d import axes3d\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from matplotlib.patches import Ellipse\n",
|
|||
|
"\n",
|
|||
|
"import copy\n",
|
|||
|
"\n",
|
|||
|
"from sklearn.model_selection import LeaveOneOut, LeavePOut\n",
|
|||
|
"\n",
|
|||
|
"from multiprocessing import Pool\n",
|
|||
|
"import cv2\n",
|
|||
|
"\n",
|
|||
|
"import sklearn\n",
|
|||
|
"import random\n",
|
|||
|
"from sklearn import neighbors\n",
|
|||
|
"from sklearn import svm\n",
|
|||
|
"from sklearn import tree\n",
|
|||
|
"from sklearn import ensemble\n",
|
|||
|
"from sklearn.model_selection import GridSearchCV\n",
|
|||
|
"from sklearn.metrics import classification_report\n",
|
|||
|
"\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import math\n",
|
|||
|
"\n",
|
|||
|
"# Importing matplotlib to plot images.\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"%matplotlib inline\n",
|
|||
|
"\n",
|
|||
|
"# Importing SK-learn to calculate precision and recall\n",
|
|||
|
"import sklearn\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"from sklearn.model_selection import train_test_split, cross_val_score, LeaveOneGroupOut\n",
|
|||
|
"from sklearn.utils import shuffle\n",
|
|||
|
"from sklearn.model_selection import GridSearchCV\n",
|
|||
|
"from sklearn.metrics.pairwise import euclidean_distances\n",
|
|||
|
"from sklearn.metrics import confusion_matrix\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"import pickle as pkl\n",
|
|||
|
"import h5py\n",
|
|||
|
"\n",
|
|||
|
"from pathlib import Path\n",
|
|||
|
"import os.path\n",
|
|||
|
"import sys\n",
|
|||
|
"import datetime\n",
|
|||
|
"import time\n",
|
|||
|
"\n",
|
|||
|
"import skimage\n",
|
|||
|
"\n",
|
|||
|
"target_names = [\"Knuckle\", \"Finger\"]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from skimage import measure\n",
|
|||
|
"from skimage.measure import find_contours, approximate_polygon, \\\n",
|
|||
|
" subdivide_polygon, EllipseModel, LineModelND"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def getEllipseParams(img):\n",
|
|||
|
" points = np.argwhere(img > 40)\n",
|
|||
|
" \n",
|
|||
|
" contours = skimage.measure.find_contours(img, 40)\n",
|
|||
|
" points_to_approx = []\n",
|
|||
|
" highest_val = 0\n",
|
|||
|
" for n, contour in enumerate(contours):\n",
|
|||
|
" if (len(contour) > highest_val):\n",
|
|||
|
" points_to_approx = contour\n",
|
|||
|
" highest_val = len(contour) \n",
|
|||
|
" \n",
|
|||
|
" try:\n",
|
|||
|
" contour = np.fliplr(points_to_approx)\n",
|
|||
|
" except Exception as inst:\n",
|
|||
|
" return [-1, -1, -1, -1, -1]\n",
|
|||
|
" \n",
|
|||
|
"\n",
|
|||
|
" ellipse = skimage.measure.fit.EllipseModel()\n",
|
|||
|
" ellipse.estimate(contour)\n",
|
|||
|
" try:\n",
|
|||
|
" xc, yc, a, b, theta = ellipse.params \n",
|
|||
|
" except Exception as int:\n",
|
|||
|
" return [-1, -1, -1, -1, -1]\n",
|
|||
|
" \n",
|
|||
|
" return [xc, yc, a, b, theta]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[ 1 2 9 6 4 14 17 16 12 3 10 18 5] [13 8 11 15 7]\n",
|
|||
|
"13 : 5\n",
|
|||
|
"0.7222222222222222 : 0.2777777777777778\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# the data, split between train and test sets\n",
|
|||
|
"df = pd.read_pickle(\"DataStudyCollection/df_statistics.pkl\")\n",
|
|||
|
"\n",
|
|||
|
"lst = df.userID.unique()\n",
|
|||
|
"np.random.seed(42)\n",
|
|||
|
"np.random.shuffle(lst)\n",
|
|||
|
"test_ids = lst[-5:]\n",
|
|||
|
"train_ids = lst[:-5]\n",
|
|||
|
"\n",
|
|||
|
"df[\"Set\"] = \"Test\"\n",
|
|||
|
"df.loc[df.userID.isin(train_ids), \"Set\"] = \"Train\"\n",
|
|||
|
"print(train_ids, test_ids)\n",
|
|||
|
"print(len(train_ids), \":\", len(test_ids))\n",
|
|||
|
"print(len(train_ids) / len(lst), \":\", len(test_ids)/ len(lst))\n",
|
|||
|
"\n",
|
|||
|
"#df_train = df[df.userID.isin(train_ids)]\n",
|
|||
|
"#df_test = df[df.userID.isin(test_ids) & (df.Version == \"Normal\")]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.patches.Ellipse at 0x7ff60430b668>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAJ4AAAD8CAYAAACGuR0qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADetJREFUeJzt3XuQVPWZxvHvO8AwMAy3gCNBRbCQDV4yuoSoYArvqNkga0K8ZXHXCmrFXbWyu2VlqzS7+YdaNWx243pJQImV6LoaSjZFokhijHeZaBBULuE+AgPKZYIyzDDv/tFnKsNlunu6e/rt6Xk+VVPdfW79UvVwus85/TuvuTsixVYRXYD0TgqehFDwJISCJyEUPAmh4EkIBU9CKHgSQsGTEH2L+WaV1t+rqO58AbO061ufDP9PDrWlnZ3xGo2u4uStid273H1kpuXyCp6ZTQd+APQBfuzuc9MtX0U1X6y4uPPtVVamfb+KmkFp5/uf9qefnyFY3tycdr5k9oI/vSmb5XL+qDWzPsADwOXAROBaM5uY6/akd8nnO95kYJ27r3f3g8CTwIzClCXlLp/gjQa2dHi9NZl2GDObY2bLzWx5C/ook5RuP6p190fcfZK7T+pH/+5+O+kh8gleA3Bih9cnJNNEMsoneG8B481srJlVAtcAiwtTlpS7nE+nuHurmd0GPEfqdMoCd1+VdiWztKdMKgYOTL96htMp1jf9P6d1R2Pa+VI8eZ3Hc/clwJIC1SK9iC6ZSQgFT0IoeBKiqD8SKKR+lX353NljOHnCKCoqjLY2Z/PKzbxfv4HmT1uiy5MMelzwaoYO5Ou3XsSV151L1cCjj5BbDrbyxtKVPPEfv2T9Kp1WLFU9KnjnXnIa3773GqprBgDwx/caWP2HzTR/2kJlVT8mnHEC404bzdQr65h6ZR2/fuZNHvjOU3zSdCC4cjlSaQVv5PBOZ13x9S/yrXtm0Keigpc2bOTe3/2O9xp3HrbMwHpj5OBqbpz2l8w67/NcePVkTvnSeG6b/ywbd+7msw/uTfv2bQcU0GLpEQcXZ08Zzz/860z6VFQw7+VX+Ntnfn5U6Nrt3Lefexe/xNX3Pc77DY2MGTmM+bd+lVHDaopctaRT8sEbMqyaf5w7C4B5r7zKD19/I6v1Nu/aw+wf/g9vrtvCcUMG8dA3Z1IzLM2vn6WoSj541956IcNH1rDirfX8d5aha/fpwVbuePT/WLttF+NqP8Mt/35DN1UpXVXSwauuqWL6174AwIPfW0xbDmMimg40c/ujiznQ0srF10+lbpp+JF0KSjp4508/g6oBlbz96lo2rNme83a2fLSXh5e+DqC9Xoko6eCdOXkcAK++kP5HL9l47MV6Ptq+h7Gnn8iZ5/9F3tuT/JR08M6YNBaAFW9tyHtbrYfaWDL/1wB85ZZL8t6e5Keo5/EMsDRjZ33An69EVPbvy8hRQzl4sJVNDXvwAZWsveHBtNuvbz6Ydv4Pvnoj3/iXv2by5XX0H11LS3PrYfPbtupKR7GU7B6vZnDq6kTT3k8yjofN1kc79vLHVQ30r6rkc2efXJBtSm5KNniDaqoA2N9U2JFpq95aD8CEujEF3a50TckG75NPUh+bAwcVdmTahxtTVzxGjBpa0O1K15Rs8PZ8nLodxdACX23Ys7Mptd0R6cdvSPcq2eC1HGxl7+799O3Xh5HHDynYdv+071MABiXfISVGyQYPYM17HwJwWt1JBdtmdRK4/fqpVKiSDt47b6YOBL4wZXzBtlkzNDWEsmnPJwXbpnRdUc/jOelvFVbRcPi419eefIVv3nkZUy+cyEN3/oSz/+3WtNs/OCT9/fVOOrCasafWArBt/XZcv78LU9J7vA83NFL/m1VUDazk0uvOK8g2zzg3tfdc+dragmxPclPSwQNYPP9FAGb9/WUMqa7Ka1ujxx3HSaeO4tP9B1j7h80FqE5yVfLBe+P5d1nxyhqGjqjhjplfymtbV825EIDfLlpOa8uhQpQnOSr54AH85z/9lIPNLcw873S+ck5uv6ebcPwILrt+Cm1tbSx6eFmBK5Su6hHB27puB4/c/TQAd19/KeefPrZL6w/o15f7rrmCyv79+OXjL7Np9bbuKFO6oEcED+AXj/6W+c+9Sd8+Fcy7eQbXXXBWVusNGVjFj2+6mlNqP8Om1dt45O7/7eZKJRulNbwxg/969mUM+LvLJvPPX7uAS846lYeXvMbrHxx9oGAGl585gTsvm8LoYUP4cPc+vnfjQ7rLQImwYnboHmzDPV27gYpBGe5/V9kPgClfPovb77+BwcNTy2/ftIt3X1vD9i0f4w4jPzuMuvMnMGrMCADWvbuFe/7mIXatTn8kq3G1+XvBn65390mZlsu3z8VGoAk4BLRm84aF8Mov3ubtF9/nr26axsxbLub4MSM4PglZR7s+3M3j9y/hhafeoC1D8xUprrz2eEnwJrn7rmyWL9Qe77B1Koxxp5/AaeeMZ8iIwZgZ+3bv54P6Dax5ZxNtbX/+97Xt0Z0EultR9niloK3NWbdiC+tWbMH6HR1MKU35HtU68LyZ1ZvZnEIUJL1Dvnu8qe7eYGbHAUvN7AN3f6njAkkg5wBUkf7m2tJ75LXHc/eG5LERWESqzdSRy6jBihwlnyZ61WZW0/4cuBRYWajCpLzl81FbCyxKxsn2BX7m7r/KuFaao+i2pqY8ypGeJJ8GK+uBzxewFulFesy1WikvCp6EUPAkhIInIRQ8CaHgSQgFT0IoeBJCwZMQCp6EUPAkhIInIRQ8CaHgSQgFT0IoeBJCwZMQCp6EUPAkhIInIRQ8CaHgSQgFT0IoeBJCwZMQCp6EUPAkhIInIRQ8CaHgSQgFT0JkDJ6ZLTCzRjNb2WHacDNbamZrk8dh3VumlJts9niPAdOPmHYXsMzdxwPLktciWcsYvOQu7h8fMXkGsDB5vhC4qsB1SZnL9Tterbu3997cTup+yCJZy/vgwlM9qTq9o7aZzTGz5Wa2vIXmfN9OykSuwdthZqMAksfGzhZUnws5llyDtxiYnTyfDTxbmHKkt8jmdMoTwGvABDPbamY3AXOBS8xsLXBx8lokaxn7XLj7tZ3MuqjAtUgvoisXEkLBkxAKnoRQ8CSEgichFDwJoeBJCAVPQih4EkLBkxAKnoRQ8CSEgichFDwJoeBJCAVPQih4EkLBkxAKnoRQ8CSEgichFDwJoeBJCAVPQih4EkLBkxAKnoRQ8CSEgichFDwJoeBJCAVPQuTaYOW7ZtZgZu8kf1d0b5lSbnJtsAIwz93rkr8lhS1Lyl2uDVZE8pLPd7zbzGxF8lHcaS8z9bmQY8k1eA8CpwB1wDbg/s4WVJ8LOZacgufuO9z9kLu3AT8CJhe2LCl3OQWvvatPYiawsrNlRY4lY5+LpMHKNGCEmW0F7gGmmVkdqR5mG4Gbu7FGKUO5NliZ3w21SC+iKxcSQsGTEAqehFDwJISCJyEUPAmh4EkIBU9CKHgSQsGTEAqehFDwJISCJyEUPAmh4EkIBU9CKHgSQsGTEAqehFDwJISCJyEUPAmh4EkIBU9CKHgSQsGTEAqehFDwJISCJyEUPAmh4EmIbPpcnGhmvzGz98xslZndnkwfbmZLzWxt8tjpDbhFjpTNHq8V+La7TwTOAb5lZhOBu4Bl7j4eWJa8FslKNn0utrn775PnTcD7wGhgBrAwWWwhcFV3FSnlJ+OtaDsys5OBs4A3gFp335bM2g7UdrLOHGAOQBUDc61TykzWBxdmNgh4BrjD3fd1nOfuTupG3EdRnws5lqyCZ2b9SIXup+7+82Tyjva2A8ljY/eUKOUom6NaI3WX9/fd/fsdZi0GZifPZwPPFr48KVfZfMebAnwDeNfM3kmmfQeYCzxlZjcBm4BZ3VOilKNs+ly8DFgnsy8qbDnSW+jKhYRQ8CSEgichFDwJoeBJCAVPQih4EkLBkxAKnoRQ8CSEgichFDwJoeBJCAVPQih4EkLBkxAKnoRQ8CSEgichFDwJoeBJCAVPQih4EkLBkxAKnoRQ8CSEgichFDwJoeBJCAVPQih4EiK
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 432x288 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {
|
|||
|
"needs_background": "light"
|
|||
|
},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig, ax = plt.subplots(1)\n",
|
|||
|
"img = df.iloc[0].Blobs\n",
|
|||
|
"xc, yc, a, b, theta = getEllipseParams(img)\n",
|
|||
|
"ax.imshow(img)\n",
|
|||
|
"e = Ellipse(xy=[xc,yc], width=a*2, height=b*2, angle=math.degrees(theta), fill=False, lw=2, edgecolor='w')\n",
|
|||
|
"ax.add_artist(e)\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"lst = df.Blobs.apply(lambda x: getEllipseParams(x))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"lst2 = np.vstack(lst.values)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"(618012, 5)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"lst2.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"df[\"XC\"] = lst2[:,0]\n",
|
|||
|
"df[\"YC\"] = lst2[:,1]\n",
|
|||
|
"df[\"EllipseW\"] = lst2[:,2]\n",
|
|||
|
"df[\"EllipseH\"] = lst2[:,3]\n",
|
|||
|
"df[\"EllipseTheta\"] = lst2[:,4]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"df[\"Area\"] = df[\"EllipseW\"] * df[\"EllipseH\"] * np.pi\n",
|
|||
|
"df[\"AvgCapa\"] = df.Blobs.apply(lambda x: np.mean(x))\n",
|
|||
|
"df[\"SumCapa\"] = df.Blobs.apply(lambda x: np.sum(x))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"[8, 11, 6, 7, 16, 15, 14, 10, 9, 2, 3, 13, 17, 5, 12, 1, 4]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"lst = list(range(1, df.userID.max()))\n",
|
|||
|
"SEED = 42#448\n",
|
|||
|
"random.seed(SEED)\n",
|
|||
|
"random.shuffle(lst)\n",
|
|||
|
"lst"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"dfY = df[df.Set == \"Train\"].copy(deep=True)\n",
|
|||
|
"dfT = df[(df.Set == \"Test\") & (df.Version == \"Normal\")].copy(deep=True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"minmax = min(len(dfY[dfY.Input == \"Finger\"]), len(dfY[dfY.Input == \"Knuckle\"]))\n",
|
|||
|
"dfX = dfY[dfY.Input == \"Finger\"].sample(minmax)\n",
|
|||
|
"dfZ = dfY[dfY.Input == \"Knuckle\"].sample(minmax)\n",
|
|||
|
"dfY = pd.concat([dfX,dfZ])\n",
|
|||
|
"\n",
|
|||
|
"minmax = min(len(dfT[dfT.Input == \"Finger\"]), len(dfT[dfT.Input == \"Knuckle\"]))\n",
|
|||
|
"dfX = dfT[dfT.Input == \"Finger\"].sample(minmax)\n",
|
|||
|
"dfZ = dfT[dfT.Input == \"Knuckle\"].sample(minmax)\n",
|
|||
|
"dfT = pd.concat([dfX,dfZ])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>userID</th>\n",
|
|||
|
" <th>Timestamp</th>\n",
|
|||
|
" <th>Current_Task</th>\n",
|
|||
|
" <th>Task_amount</th>\n",
|
|||
|
" <th>TaskID</th>\n",
|
|||
|
" <th>VersionID</th>\n",
|
|||
|
" <th>RepetitionID</th>\n",
|
|||
|
" <th>Actual_Data</th>\n",
|
|||
|
" <th>Is_Pause</th>\n",
|
|||
|
" <th>Image</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>InputMethod</th>\n",
|
|||
|
" <th>Set</th>\n",
|
|||
|
" <th>XC</th>\n",
|
|||
|
" <th>YC</th>\n",
|
|||
|
" <th>EllipseW</th>\n",
|
|||
|
" <th>EllipseH</th>\n",
|
|||
|
" <th>EllipseTheta</th>\n",
|
|||
|
" <th>Area</th>\n",
|
|||
|
" <th>AvgCapa</th>\n",
|
|||
|
" <th>SumCapa</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>Input</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>Finger</th>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>Knuckle</th>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" <td>9421</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>2 rows × 31 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" userID Timestamp Current_Task Task_amount TaskID VersionID \\\n",
|
|||
|
"Input \n",
|
|||
|
"Finger 9421 9421 9421 9421 9421 9421 \n",
|
|||
|
"Knuckle 9421 9421 9421 9421 9421 9421 \n",
|
|||
|
"\n",
|
|||
|
" RepetitionID Actual_Data Is_Pause Image ... InputMethod Set \\\n",
|
|||
|
"Input ... \n",
|
|||
|
"Finger 9421 9421 9421 9421 ... 9421 9421 \n",
|
|||
|
"Knuckle 9421 9421 9421 9421 ... 9421 9421 \n",
|
|||
|
"\n",
|
|||
|
" XC YC EllipseW EllipseH EllipseTheta Area AvgCapa SumCapa \n",
|
|||
|
"Input \n",
|
|||
|
"Finger 9421 9421 9421 9421 9421 9421 9421 9421 \n",
|
|||
|
"Knuckle 9421 9421 9421 9421 9421 9421 9421 9421 \n",
|
|||
|
"\n",
|
|||
|
"[2 rows x 31 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"dfT.groupby(\"Input\").count()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# FEATURE SET: sum of capacitance, avg of capacitance, ellipse area, ellipse width, height and theta."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"features = [\"SumCapa\", \"AvgCapa\", \"Area\", \"EllipseW\", \"EllipseH\", \"EllipseTheta\"]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# ZeroR"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"dfT[\"InputMethodPred\"] = 1"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[[ 0 9421]\n",
|
|||
|
" [ 0 9421]]\n",
|
|||
|
"Accuray: 0.50\n",
|
|||
|
"Recall: 0.50\n",
|
|||
|
"Precision: 0.50\n",
|
|||
|
"F1-Score: 0.33\n",
|
|||
|
" precision recall f1-score support\n",
|
|||
|
"\n",
|
|||
|
" Knuckle 0.00 0.00 0.00 9421\n",
|
|||
|
" Finger 0.50 1.00 0.67 9421\n",
|
|||
|
"\n",
|
|||
|
" micro avg 0.50 0.50 0.50 18842\n",
|
|||
|
" macro avg 0.25 0.50 0.33 18842\n",
|
|||
|
"weighted avg 0.25 0.50 0.33 18842\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.\n",
|
|||
|
" 'precision', 'predicted', average, warn_for)\n",
|
|||
|
"/usr/local/lib/python3.6/dist-packages/sklearn/metrics/classification.py:1143: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.\n",
|
|||
|
" 'precision', 'predicted', average, warn_for)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(confusion_matrix(dfT.InputMethod.values, dfT.InputMethodPred.values, labels=[0, 1]))\n",
|
|||
|
"print(\"Accuray: %.2f\" % accuracy_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"Recall: %.2f\" % metrics.recall_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(\"Precision: %.2f\" % metrics.average_precision_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(\"F1-Score: %.2f\" % metrics.f1_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(classification_report(dfT.InputMethod.values, dfT.InputMethodPred.values, target_names=target_names))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# DecisionTreeClassifier"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Fitting 5 folds for each of 240 candidates, totalling 1200 fits\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[Parallel(n_jobs=30)]: Using backend LokyBackend with 30 concurrent workers.\n",
|
|||
|
"[Parallel(n_jobs=30)]: Done 140 tasks | elapsed: 10.4s\n",
|
|||
|
"[Parallel(n_jobs=30)]: Done 390 tasks | elapsed: 31.4s\n",
|
|||
|
"[Parallel(n_jobs=30)]: Done 740 tasks | elapsed: 1.3min\n",
|
|||
|
"[Parallel(n_jobs=30)]: Done 1200 out of 1200 | elapsed: 2.4min finished\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"{'max_depth': 22, 'min_samples_split': 2} 0.8120637794585754\n",
|
|||
|
"[[7409 2012]\n",
|
|||
|
" [3096 6325]]\n",
|
|||
|
"Accuray: 0.73\n",
|
|||
|
"Recall: 0.73\n",
|
|||
|
"Precision: 0.67\n",
|
|||
|
"F1-Score: 0.73\n",
|
|||
|
" precision recall f1-score support\n",
|
|||
|
"\n",
|
|||
|
" Knuckle 0.71 0.79 0.74 9421\n",
|
|||
|
" Finger 0.76 0.67 0.71 9421\n",
|
|||
|
"\n",
|
|||
|
" micro avg 0.73 0.73 0.73 18842\n",
|
|||
|
" macro avg 0.73 0.73 0.73 18842\n",
|
|||
|
"weighted avg 0.73 0.73 0.73 18842\n",
|
|||
|
"\n",
|
|||
|
"CPU times: user 7.26 s, sys: 3.38 s, total: 10.6 s\n",
|
|||
|
"Wall time: 2min 29s\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%%time\n",
|
|||
|
"param_grid = {'max_depth': range(2,32,1),\n",
|
|||
|
" 'min_samples_split':range(2,10,1)}\n",
|
|||
|
"#TODO: Create Baseline for different ML stuff\n",
|
|||
|
"clf = GridSearchCV(tree.DecisionTreeClassifier(), \n",
|
|||
|
" param_grid,\n",
|
|||
|
" cv=5 , n_jobs=os.cpu_count()-2, verbose=1)\n",
|
|||
|
"clf.fit(dfY[features].values, dfY.InputMethod.values)\n",
|
|||
|
"print(clf.best_params_, clf.best_score_)\n",
|
|||
|
"dfT[\"InputMethodPred\"] = clf.predict(dfT[features].values) \n",
|
|||
|
"\n",
|
|||
|
"print(confusion_matrix(dfT.InputMethod.values, dfT.InputMethodPred.values, labels=[0, 1]))\n",
|
|||
|
"print(\"Accuray: %.3f\" % accuracy_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"Recall: %.3f\" % metrics.recall_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(\"Precision: %.3f\" % metrics.average_precision_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(\"F1-Score: %.3f\" % metrics.f1_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(classification_report(dfT.InputMethod.values, dfT.InputMethodPred.values, target_names=target_names))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# RandomForestClassifier"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Fitting 5 folds for each of 180 candidates, totalling 900 fits\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[Parallel(n_jobs=94)]: Using backend LokyBackend with 94 concurrent workers.\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 12 tasks | elapsed: 1.2min\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 262 tasks | elapsed: 4.0min\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 612 tasks | elapsed: 9.2min\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 900 out of 900 | elapsed: 12.8min finished\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"{'max_depth': 60, 'n_estimators': 63} 0.8669582104371696\n",
|
|||
|
"[[8175 1246]\n",
|
|||
|
" [2765 6656]]\n",
|
|||
|
"Accuray: 0.79\n",
|
|||
|
"Recall: 0.71\n",
|
|||
|
"Precision: 0.74\n",
|
|||
|
"F1-Score: 0.77\n",
|
|||
|
" precision recall f1-score support\n",
|
|||
|
"\n",
|
|||
|
" Knuckle 0.75 0.87 0.80 9421\n",
|
|||
|
" Finger 0.84 0.71 0.77 9421\n",
|
|||
|
"\n",
|
|||
|
" micro avg 0.79 0.79 0.79 18842\n",
|
|||
|
" macro avg 0.79 0.79 0.79 18842\n",
|
|||
|
"weighted avg 0.79 0.79 0.79 18842\n",
|
|||
|
"\n",
|
|||
|
"CPU times: user 42.1 s, sys: 834 ms, total: 42.9 s\n",
|
|||
|
"Wall time: 13min 28s\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%%time\n",
|
|||
|
"param_grid = {'n_estimators': range(55,64,1),\n",
|
|||
|
" 'max_depth': range(50,70,1)}\n",
|
|||
|
"#TODO: Create Baseline for different ML stuff\n",
|
|||
|
"clf = GridSearchCV(ensemble.RandomForestClassifier(), \n",
|
|||
|
" param_grid,\n",
|
|||
|
" cv=5 , n_jobs=os.cpu_count()-2, verbose=1)\n",
|
|||
|
"clf.fit(dfY[features].values, dfY.InputMethod.values)\n",
|
|||
|
"print(clf.best_params_, clf.best_score_)\n",
|
|||
|
"dfT[\"InputMethodPred\"] = clf.predict(dfT[features].values) \n",
|
|||
|
"\n",
|
|||
|
"print(confusion_matrix(dfT.InputMethod.values, dfT.InputMethodPred.values, labels=[0, 1]))\n",
|
|||
|
"print(\"Accuray: %.2f\" % accuracy_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"Recall: %.2f\" % metrics.recall_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"Precision: %.2f\" % metrics.average_precision_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"F1-Score: %.2f\" % metrics.f1_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(classification_report(dfT.InputMethod.values, dfT.InputMethodPred.values, target_names=target_names))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# kNN"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Fitting 5 folds for each of 62 candidates, totalling 310 fits\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[Parallel(n_jobs=94)]: Using backend LokyBackend with 94 concurrent workers.\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 12 tasks | elapsed: 17.7s\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 310 out of 310 | elapsed: 1.5min finished\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"{'n_neighbors': 2} 0.800546827088748\n",
|
|||
|
"[[8187 1234]\n",
|
|||
|
" [4318 5103]]\n",
|
|||
|
"Accuray: 0.71\n",
|
|||
|
"Recall: 0.54\n",
|
|||
|
"Precision: 0.67\n",
|
|||
|
"F1-Score: 0.65\n",
|
|||
|
" precision recall f1-score support\n",
|
|||
|
"\n",
|
|||
|
" Knuckle 0.65 0.87 0.75 9421\n",
|
|||
|
" Finger 0.81 0.54 0.65 9421\n",
|
|||
|
"\n",
|
|||
|
" micro avg 0.71 0.71 0.71 18842\n",
|
|||
|
" macro avg 0.73 0.71 0.70 18842\n",
|
|||
|
"weighted avg 0.73 0.71 0.70 18842\n",
|
|||
|
"\n",
|
|||
|
"CPU times: user 1.74 s, sys: 300 ms, total: 2.04 s\n",
|
|||
|
"Wall time: 1min 30s\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%%time\n",
|
|||
|
"param_grid = {'n_neighbors': range(2,64,1),\n",
|
|||
|
" #weights': ['uniform', 'distance']\n",
|
|||
|
" }\n",
|
|||
|
"#TODO: Create Baseline for different ML stuff\n",
|
|||
|
"clf = GridSearchCV(neighbors.KNeighborsClassifier(),\n",
|
|||
|
" param_grid,\n",
|
|||
|
" cv=5 , n_jobs=os.cpu_count()-2, verbose=1)\n",
|
|||
|
"clf.fit(dfY[features].values, dfY.InputMethod.values)\n",
|
|||
|
"print(clf.best_params_, clf.best_score_)\n",
|
|||
|
"dfT[\"InputMethodPred\"] = clf.predict(dfT[features].values) \n",
|
|||
|
"\n",
|
|||
|
"print(confusion_matrix(dfT.InputMethod.values, dfT.InputMethodPred.values, labels=[0, 1]))\n",
|
|||
|
"print(\"Accuray: %.2f\" % accuracy_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"Recall: %.2f\" % metrics.recall_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(\"Precision: %.2f\" % metrics.average_precision_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(\"F1-Score: %.2f\" % metrics.f1_score(dfT.InputMethod.values, dfT.InputMethodPred.values, average=\"macro\"))\n",
|
|||
|
"print(classification_report(dfT.InputMethod.values, dfT.InputMethodPred.values, target_names=target_names))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# SVM"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Fitting 5 folds for each of 9 candidates, totalling 45 fits\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[Parallel(n_jobs=94)]: Using backend LokyBackend with 94 concurrent workers.\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 42 out of 45 | elapsed: 1056.5min remaining: 75.5min\n",
|
|||
|
"[Parallel(n_jobs=94)]: Done 45 out of 45 | elapsed: 1080.5min finished\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"{'C': 10.0, 'gamma': 10.0} 0.8256943024851795\n",
|
|||
|
"CPU times: user 2h 42min 9s, sys: 23.6 s, total: 2h 42min 33s\n",
|
|||
|
"Wall time: 20h 43min 1s\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%%time\n",
|
|||
|
"C_range = np.logspace(1, 3,3)\n",
|
|||
|
"gamma_range = np.logspace(-1, 1, 3)\n",
|
|||
|
"param_grid = dict(gamma=gamma_range, C=C_range)\n",
|
|||
|
"clf = GridSearchCV(sklearn.svm.SVC(), \n",
|
|||
|
" param_grid,\n",
|
|||
|
" cv=5 , n_jobs=os.cpu_count()-2, verbose=1)\n",
|
|||
|
"clf.fit(dfY[features].values, dfY.InputMethod.values)\n",
|
|||
|
"print(clf.best_params_, clf.best_score_)\n",
|
|||
|
"\n",
|
|||
|
"dfT[\"InputMethodPred\"] = clf.predict(dfT[features].values)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"{'C': 10.0, 'gamma': 10.0} 0.8256943024851795\n",
|
|||
|
"[[7106 2315]\n",
|
|||
|
" [2944 6477]]\n",
|
|||
|
"Accuray: 0.72\n",
|
|||
|
"Recall: 0.69\n",
|
|||
|
"Precision: 0.66\n",
|
|||
|
"F1-Score: 0.71\n",
|
|||
|
" precision recall f1-score support\n",
|
|||
|
"\n",
|
|||
|
" Knuckle 0.71 0.75 0.73 9421\n",
|
|||
|
" Finger 0.74 0.69 0.71 9421\n",
|
|||
|
"\n",
|
|||
|
" micro avg 0.72 0.72 0.72 18842\n",
|
|||
|
" macro avg 0.72 0.72 0.72 18842\n",
|
|||
|
"weighted avg 0.72 0.72 0.72 18842\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(clf.best_params_, clf.best_score_)\n",
|
|||
|
"print(confusion_matrix(dfT.InputMethod.values, dfT.InputMethodPred.values, labels=[0, 1]))\n",
|
|||
|
"print(\"Accuray: %.2f\" % accuracy_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"Recall: %.2f\" % metrics.recall_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"Precision: %.2f\" % metrics.average_precision_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(\"F1-Score: %.2f\" % metrics.f1_score(dfT.InputMethod.values, dfT.InputMethodPred.values))\n",
|
|||
|
"print(classification_report(dfT.InputMethod.values, dfT.InputMethodPred.values, target_names=target_names))"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.6.7"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|