InferringIntention/keyboard_and_mouse/dataset/.ipynb_checkpoints/03-NextActionPrediction-checkpoint.ipynb
2024-03-24 23:42:27 +01:00

687 lines
36 KiB
Text
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3aed8aec",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021-09-27 15:31:30.518074: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import datetime\n",
"import time,pdb\n",
"import json\n",
"import random\n",
"import statistics\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from sklearn import svm\n",
"from sklearn.model_selection import GridSearchCV \n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import accuracy_score\n",
"from tensorflow.keras.layers import *\n",
"from sklearn.model_selection import train_test_split\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.optimizers import *\n",
"from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, Callback\n",
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.metrics import accuracy_score\n",
"import tqdm\n",
"from multiprocessing import Pool\n",
"import os\n",
"from tensorflow.compat.v1.keras.layers import Bidirectional, CuDNNLSTM"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "817f7108",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"available PIDs [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. 16.]\n",
"available TaskIDs [0. 1. 2. 3. 4. 5. 6.]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Timestamp</th>\n",
" <th>Event</th>\n",
" <th>TaskID</th>\n",
" <th>Part</th>\n",
" <th>PID</th>\n",
" <th>TextRule</th>\n",
" <th>Rule</th>\n",
" <th>Type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.575388e+12</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>{'Title': ['1', 'Indent', 'and', 'Italic'], 'S...</td>\n",
" <td>3.0</td>\n",
" <td>Cmd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.575388e+12</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>{'Title': ['1', 'Indent', 'and', 'Italic'], 'S...</td>\n",
" <td>3.0</td>\n",
" <td>Toolbar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.575388e+12</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>{'Title': ['1', 'Indent', 'and', 'Italic'], 'S...</td>\n",
" <td>3.0</td>\n",
" <td>Cmd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.575388e+12</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>{'Title': ['1', 'Indent', 'and', 'Italic'], 'S...</td>\n",
" <td>3.0</td>\n",
" <td>Cmd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.575388e+12</td>\n",
" <td>4</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>{'Title': ['1', 'Indent', 'and', 'Italic'], 'S...</td>\n",
" <td>3.0</td>\n",
" <td>Cmd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8376</th>\n",
" <td>1.603898e+12</td>\n",
" <td>7</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>{'Title': ['Size', 'Big'], 'Subtitle': ['Bold'...</td>\n",
" <td>5.0</td>\n",
" <td>Toolbar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8377</th>\n",
" <td>1.603898e+12</td>\n",
" <td>2</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>{'Title': ['Size', 'Big'], 'Subtitle': ['Bold'...</td>\n",
" <td>5.0</td>\n",
" <td>Cmd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8378</th>\n",
" <td>1.603898e+12</td>\n",
" <td>2</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>{'Title': ['Size', 'Big'], 'Subtitle': ['Bold'...</td>\n",
" <td>5.0</td>\n",
" <td>Cmd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8379</th>\n",
" <td>1.603898e+12</td>\n",
" <td>6</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>{'Title': ['Size', 'Big'], 'Subtitle': ['Bold'...</td>\n",
" <td>5.0</td>\n",
" <td>Toolbar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8380</th>\n",
" <td>1.603898e+12</td>\n",
" <td>6</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>{'Title': ['Size', 'Big'], 'Subtitle': ['Bold'...</td>\n",
" <td>5.0</td>\n",
" <td>Toolbar</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8381 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" Timestamp Event TaskID Part PID \\\n",
"0 1.575388e+12 4 0.0 1.0 1.0 \n",
"1 1.575388e+12 1 0.0 1.0 1.0 \n",
"2 1.575388e+12 1 0.0 1.0 1.0 \n",
"3 1.575388e+12 4 0.0 1.0 1.0 \n",
"4 1.575388e+12 4 0.0 1.0 1.0 \n",
"... ... ... ... ... ... \n",
"8376 1.603898e+12 7 6.0 5.0 16.0 \n",
"8377 1.603898e+12 2 6.0 5.0 16.0 \n",
"8378 1.603898e+12 2 6.0 5.0 16.0 \n",
"8379 1.603898e+12 6 6.0 5.0 16.0 \n",
"8380 1.603898e+12 6 6.0 5.0 16.0 \n",
"\n",
" TextRule Rule Type \n",
"0 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"1 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"2 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"3 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"4 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"... ... ... ... \n",
"8376 {'Title': ['Size', 'Big'], 'Subtitle': ['Bold'... 5.0 Toolbar \n",
"8377 {'Title': ['Size', 'Big'], 'Subtitle': ['Bold'... 5.0 Cmd \n",
"8378 {'Title': ['Size', 'Big'], 'Subtitle': ['Bold'... 5.0 Cmd \n",
"8379 {'Title': ['Size', 'Big'], 'Subtitle': ['Bold'... 5.0 Toolbar \n",
"8380 {'Title': ['Size', 'Big'], 'Subtitle': ['Bold'... 5.0 Toolbar \n",
"\n",
"[8381 rows x 8 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"study_data_path = \"../IntentData/\"\n",
"data = pd.read_pickle(study_data_path + \"/Preprocessing_data/clean_data.pkl\")\n",
"#val_data = pd.read_pickle(study_data_path + \"/Preprocessing_data/clean_data_condition2.pkl\")\n",
"\n",
"print(\"available PIDs\", data.PID.unique())\n",
"\n",
"print(\"available TaskIDs\", data.TaskID.unique())\n",
"\n",
"data.Event.unique()\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "ab778228",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 560.000000\n",
"mean 14.966071\n",
"std 2.195440\n",
"min 8.000000\n",
"25% 14.000000\n",
"50% 15.000000\n",
"75% 16.000000\n",
"max 28.000000\n",
"Name: Event, dtype: float64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby([\"PID\", \"Part\", \"TaskID\"])[\"Event\"].count().describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "32550f71",
"metadata": {},
"outputs": [],
"source": [
"\n",
"Task_IDs = list(range(0,7))\n",
"\n",
"# grouping by part is needed to have one ruleset for the whole part\n",
"g = data.groupby([\"PID\", \"Part\", \"TaskID\"])\n",
"df_all = []"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f6fecc2f",
"metadata": {},
"outputs": [],
"source": [
"def createTrainTestalaSven(test_IDs, task_IDs, window_size, stride, shapes=False, val_IDs=None):\n",
" if not isinstance(test_IDs, list):\n",
" raise ValueError(\"Test_IDs are not a list\")\n",
" if not isinstance(task_IDs, list):\n",
" raise ValueError(\"Task_IDs are not a list\")\n",
" # Fill data arrays\n",
" all_elem = []\n",
" for current in g.groups.keys():\n",
" c = g.get_group(current)\n",
" if (c.TaskID.isin(task_IDs).all()):\n",
" \n",
" new_data = c.Event.values\n",
" stepper = 0\n",
" while stepper <= (len(new_data)-window_size-1):\n",
" tmp = new_data[stepper:stepper + window_size]\n",
" x = tmp[:-1]\n",
" y = tmp[-1]\n",
" stepper += stride\n",
" \n",
" if (c.PID.isin(test_IDs).all()):\n",
" all_elem.append([\"Test\", x, y])\n",
" elif (c.PID.isin(val_IDs).all()):\n",
" all_elem.append([\"Val\", x, y])\n",
" else:\n",
" all_elem.append([\"Train\", x, y])\n",
" df_tmp = pd.DataFrame(all_elem, columns =[\"Split\", \"X\", \"Y\"])\n",
" turbo = []\n",
" for s in df_tmp.Split.unique():\n",
" dfX = df_tmp[df_tmp.Split == s]\n",
" max_amount = dfX.groupby([\"Y\"]).count().max().X\n",
" for y in dfX.Y.unique():\n",
" df_turbotmp = dfX[dfX.Y == y]\n",
" turbo.append(df_turbotmp)\n",
" turbo.append(df_turbotmp.sample(max_amount-len(df_turbotmp), replace=True))\n",
" # if len(df_turbotmp) < max_amount:\n",
"\n",
" df_tmp = pd.concat(turbo)\n",
" x_train, y_train = df_tmp[df_tmp.Split == \"Train\"].X.values, df_tmp[df_tmp.Split == \"Train\"].Y.values\n",
" x_test, y_test = df_tmp[df_tmp.Split == \"Test\"].X.values, df_tmp[df_tmp.Split == \"Test\"].Y.values\n",
" x_val, y_val = df_tmp[df_tmp.Split == \"Val\"].X.values, df_tmp[df_tmp.Split == \"Val\"].Y.values\n",
" \n",
" x_train = np.expand_dims(np.stack(x_train), axis=2)\n",
" y_train = np.array(y_train)\n",
" x_test = np.expand_dims(np.stack(x_test), axis=2)\n",
" y_test = np.array(y_test)\n",
" if len(x_val) > 0:\n",
" x_val = np.expand_dims(np.stack(x_val), axis=2)\n",
" y_val = np.array(y_val)\n",
" return(x_train, y_train, x_test, y_test, x_val, y_val)\n",
" return(x_train, y_train, x_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b8f92bc1",
"metadata": {},
"outputs": [],
"source": [
"def createTrainTest(test_IDs, task_IDs, window_size, stride, shapes=False, val_IDs=None):\n",
" if not isinstance(test_IDs, list):\n",
" raise ValueError(\"Test_IDs are not a list\")\n",
" if not isinstance(task_IDs, list):\n",
" raise ValueError(\"Task_IDs are not a list\")\n",
" # Fill data arrays\n",
" y_train = []\n",
" x_train = []\n",
" y_test = []\n",
" x_test = []\n",
" x_val = []\n",
" y_val = []\n",
" \n",
" for current in g.groups.keys():\n",
" c = g.get_group(current)\n",
" if (c.TaskID.isin(task_IDs).all()):\n",
" \n",
" new_data = c.Event.values\n",
" stepper = 0\n",
" while stepper <= (len(new_data)-window_size-1):\n",
" tmp = new_data[stepper:stepper + window_size]\n",
" pdb.set_trace()\n",
" x = tmp[:-1]\n",
" y = tmp[-1]\n",
" stepper += stride\n",
" if (c.PID.isin(test_IDs).all()):\n",
" if y == 6:\n",
" y_test.append(y)\n",
" x_test.append(x)\n",
" y_test.append(y)\n",
" x_test.append(x)\n",
" elif (c.PID.isin(val_IDs).all()):\n",
" if y == 6:\n",
" y_val.append(y)\n",
" x_val.append(x)\n",
" y_val.append(y)\n",
" x_val.append(x)\n",
" else:\n",
" if y == 6:\n",
" y_train.append(y)\n",
" x_train.append(x)\n",
" y_train.append(y)\n",
" x_train.append(x)\n",
" x_train = np.array(x_train)\n",
" y_train = np.array(y_train)\n",
" x_test = np.array(x_test)\n",
" y_test = np.array(y_test)\n",
" x_val = np.array(x_val)\n",
" y_val = np.array(y_val)\n",
" pdb.set_trace()\n",
" if (shapes):\n",
" print(x_train.shape)\n",
" print(y_train.shape)\n",
" print(x_test.shape)\n",
" print(y_test.shape)\n",
" print(x_val.shape)\n",
" print(y_val.shape)\n",
" print(np.unique(y_test))\n",
" print(np.unique(y_train))\n",
" if len(x_val) > 0:\n",
" return(x_train, y_train, x_test, y_test, x_val, y_val)\n",
" return (x_train, y_train, x_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e56fbc58",
"metadata": {},
"outputs": [],
"source": [
"maxlen = 1000\n",
"lens = []\n",
"for current in g.groups.keys():\n",
" c = g.get_group(current)\n",
" lens.append(len(c.Event.values))\n",
" maxlen = min(maxlen, len(c.Event.values))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "c02cbdae",
"metadata": {},
"outputs": [],
"source": [
"# Number of trees in random forest\n",
"n_estimators = np.arange(5,100, 5)\n",
"# Number of features to consider at every split\n",
"max_features = ['sqrt']\n",
"# Maximum number of levels in tree\n",
"max_depth = np.arange(5,100, 5)\n",
"# Minimum number of samples required to split a node\n",
"min_samples_split = np.arange(2,10, 1)\n",
"# Minimum number of samples required at each leaf node\n",
"min_samples_leaf = np.arange(2,5, 1)\n",
"# Method of selecting samples for training each tree\n",
"bootstrap = [True, False]\n",
"\n",
"# Create the random grid\n",
"param_grid = {'n_estimators': n_estimators,\n",
" 'max_features': max_features,\n",
" 'max_depth': max_depth,\n",
" 'min_samples_split': min_samples_split,\n",
" 'min_samples_leaf': min_samples_leaf,\n",
" 'bootstrap': bootstrap}\n",
"\n",
"grid = GridSearchCV(RandomForestClassifier(), param_grid, refit = True, verbose = 0, return_train_score=True) "
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c2bcfe7f",
"metadata": {},
"outputs": [],
"source": [
"def doTrainSlideWindowNoPad(currentPid):\n",
" print(f\"doTrain: {currentPid}\")\n",
" dfs = []\n",
" for window_size in range(8, 15): \n",
" (x_train, y_train, x_test, y_test) = createTrainTest([currentPid], Task_IDs, window_size, 1, False, [200])\n",
" print(f\"doTrain: created TrainTestsplit\")\n",
"\n",
" # print(\"window_size\", 5, \"PID\", currentPid, \"samples\", x_train.shape[0], \"generated_samples\", \"samples\", x_train_window.shape[0])\n",
"\n",
" grid.fit(x_train, y_train)\n",
" print(\"fitted\")\n",
" # y_pred = grid.predict(x_test)\n",
"\n",
" df_params = pd.DataFrame(grid.cv_results_[\"params\"])\n",
" df_params[\"Mean_test\"] = grid.cv_results_[\"mean_test_score\"]\n",
" df_params[\"Mean_train\"] = grid.cv_results_[\"mean_train_score\"]\n",
" df_params[\"STD_test\"] = grid.cv_results_[\"std_test_score\"]\n",
" df_params[\"STD_train\"] = grid.cv_results_[\"std_train_score\"]\n",
" df_params['Window_Size'] = window_size\n",
" df_params['PID'] = currentPid\n",
" # df_params[\"Accuracy\"] = accuracy_score(y_pred, y_test)\n",
" dfs.append(df_params)\n",
"\n",
" return pd.concat(dfs)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "9e3d86f1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"doTrain: 1\n",
"> \u001b[0;32m/tmp/ipykernel_90176/2602038955.py\u001b[0m(23)\u001b[0;36mcreateTrainTest\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m 21 \u001b[0;31m \u001b[0mtmp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstepper\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstepper\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 22 \u001b[0;31m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m---> 23 \u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 24 \u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 25 \u001b[0;31m \u001b[0mstepper\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mstride\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\n",
"ipdb> tmp\n",
"array([4, 1, 1, 4, 4, 7, 7, 7])\n",
"ipdb> new_data\n",
"array([4, 1, 1, 4, 4, 7, 7, 7, 7, 7, 7, 4, 1, 4, 4, 4])\n",
"ipdb> current\n",
"(1.0, 1.0, 0.0)\n",
"ipdb> print(c)\n",
" Timestamp Event TaskID Part PID \\\n",
"0 1.575388e+12 4 0.0 1.0 1.0 \n",
"1 1.575388e+12 1 0.0 1.0 1.0 \n",
"2 1.575388e+12 1 0.0 1.0 1.0 \n",
"3 1.575388e+12 4 0.0 1.0 1.0 \n",
"4 1.575388e+12 4 0.0 1.0 1.0 \n",
"5 1.575388e+12 7 0.0 1.0 1.0 \n",
"6 1.575388e+12 7 0.0 1.0 1.0 \n",
"7 1.575388e+12 7 0.0 1.0 1.0 \n",
"8 1.575388e+12 7 0.0 1.0 1.0 \n",
"9 1.575388e+12 7 0.0 1.0 1.0 \n",
"10 1.575388e+12 7 0.0 1.0 1.0 \n",
"11 1.575388e+12 4 0.0 1.0 1.0 \n",
"12 1.575388e+12 1 0.0 1.0 1.0 \n",
"13 1.575388e+12 4 0.0 1.0 1.0 \n",
"14 1.575388e+12 4 0.0 1.0 1.0 \n",
"15 1.575388e+12 4 0.0 1.0 1.0 \n",
"\n",
" TextRule Rule Type \n",
"0 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"1 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"2 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"3 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"4 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"5 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"6 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"7 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"8 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"9 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"10 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"11 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"12 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Toolbar \n",
"13 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"14 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"15 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S... 3.0 Cmd \n",
"ipdb> print(c.TextRule)\n",
"0 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"1 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"2 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"3 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"4 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"5 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"6 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"7 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"8 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"9 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"10 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"11 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"12 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"13 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"14 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"15 {'Title': ['1', 'Indent', 'and', 'Italic'], 'S...\n",
"Name: TextRule, dtype: object\n",
"ipdb> print(c.Event)\n",
"0 4\n",
"1 1\n",
"2 1\n",
"3 4\n",
"4 4\n",
"5 7\n",
"6 7\n",
"7 7\n",
"8 7\n",
"9 7\n",
"10 7\n",
"11 4\n",
"12 1\n",
"13 4\n",
"14 4\n",
"15 4\n",
"Name: Event, dtype: int64\n",
"ipdb> val\n",
"*** NameError: name 'val' is not defined\n",
"ipdb> val_IDs\n",
"[200]\n",
"--KeyboardInterrupt--\n",
"\n",
"KeyboardInterrupt: Interrupted by user\n",
"> \u001b[0;32m/tmp/ipykernel_90176/2602038955.py\u001b[0m(22)\u001b[0;36mcreateTrainTest\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m 20 \u001b[0;31m \u001b[0;32mwhile\u001b[0m \u001b[0mstepper\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0mwindow_size\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 21 \u001b[0;31m \u001b[0mtmp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstepper\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstepper\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m---> 22 \u001b[0;31m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 23 \u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 24 \u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\n",
"--KeyboardInterrupt--\n",
"\n",
"KeyboardInterrupt: Interrupted by user\n",
"> \u001b[0;32m/tmp/ipykernel_90176/2602038955.py\u001b[0m(23)\u001b[0;36mcreateTrainTest\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m 21 \u001b[0;31m \u001b[0mtmp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstepper\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstepper\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 22 \u001b[0;31m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m---> 23 \u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 24 \u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\u001b[0;32m 25 \u001b[0;31m \u001b[0mstepper\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mstride\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0m\n",
"ipdb> q\n"
]
},
{
"ename": "BdbQuit",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mBdbQuit\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_90176/1128965594.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdoTrainSlideWindowNoPad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/tmp/ipykernel_90176/2629087375.py\u001b[0m in \u001b[0;36mdoTrainSlideWindowNoPad\u001b[0;34m(currentPid)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mdfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mwindow_size\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m15\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreateTrainTest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcurrentPid\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTask_IDs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m200\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"doTrain: created TrainTestsplit\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tmp/ipykernel_90176/2602038955.py\u001b[0m in \u001b[0;36mcreateTrainTest\u001b[0;34m(test_IDs, task_IDs, window_size, stride, shapes, val_IDs)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mtmp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstepper\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstepper\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mstepper\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mstride\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tmp/ipykernel_90176/2602038955.py\u001b[0m in \u001b[0;36mcreateTrainTest\u001b[0;34m(test_IDs, task_IDs, window_size, stride, shapes, val_IDs)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mtmp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstepper\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mstepper\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mpdb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_trace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 23\u001b[0;31m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 24\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mstepper\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mstride\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/intentPrediction/lib/python3.9/bdb.py\u001b[0m in \u001b[0;36mtrace_dispatch\u001b[0;34m(self, frame, event, arg)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;31m# None\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'line'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_line\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'call'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/intentPrediction/lib/python3.9/bdb.py\u001b[0m in \u001b[0;36mdispatch_line\u001b[0;34m(self, frame)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstop_here\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbreak_here\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muser_line\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 113\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquitting\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mBdbQuit\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 114\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrace_dispatch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mBdbQuit\u001b[0m: "
]
}
],
"source": [
"doTrainSlideWindowNoPad(1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}