first commit
This commit is contained in:
commit
83b04e2133
109 changed files with 12081 additions and 0 deletions
132
watch_and_help/stan/plot_user_length.py
Normal file
132
watch_and_help/stan/plot_user_length.py
Normal file
|
@ -0,0 +1,132 @@
|
|||
import numpy as np
|
||||
from numpy import genfromtxt
|
||||
import matplotlib.pyplot as plt
|
||||
import argparse
|
||||
import pathlib
|
||||
|
||||
def main(args):
|
||||
if args.task_type == 'new_test_task':
|
||||
user = 9
|
||||
N = 1
|
||||
if args.task_type == 'test_task':
|
||||
user = 92
|
||||
N = 1
|
||||
rate = 100
|
||||
|
||||
widths = [-0.1, 0, 0.1]
|
||||
user_table = [6, 13, 15, 19, 20, 23, 27, 30, 33, 44, 46, 49, 50, 51, 52, 53, 54, 56, 65, 71, 84]
|
||||
|
||||
# read data
|
||||
model_data_list = []
|
||||
user_list = []
|
||||
if not args.plot_user_list:
|
||||
for i in range(user):
|
||||
path = "result/"+args.task_type+"/user"+str(user)+"/"+args.loss_type+"/N"+ str(N) + "/" + args.model_type + "_N" + str(N) + "_result_" + str(rate) + "_user" + str(i) +".csv"
|
||||
data = genfromtxt(path, delimiter=',', skip_header =1)
|
||||
data = data[[1,2,3,5,6,7,9,10,11],:][:,[2,4,6,7]]
|
||||
model_data_list.append(data)
|
||||
if args.task_type == 'test_task':
|
||||
user_list.append(np.transpose(data[:,[0]]))
|
||||
else:
|
||||
for i in range(user):
|
||||
for t in user_table:
|
||||
if t == i+1:
|
||||
path = "result/"+args.task_type+"/user"+str(user)+"/"+args.loss_type+"/N"+ str(N) + "/" + args.model_type + "_N" + str(N) + "_result_" + str(rate) + "_user" + str(i) +".csv"
|
||||
data = genfromtxt(path, delimiter=',', skip_header =1)
|
||||
data = data[[1,2,3,5,6,7,9,10,11],:][:,[2,4,6,7]]
|
||||
model_data_list.append(data)
|
||||
user_list.append(np.transpose(data[:,[0]]))
|
||||
|
||||
color = ['royalblue', 'lightgreen', 'tomato']
|
||||
legend = ['put fridge', 'put\n dishwasher', 'read book']
|
||||
fig, axs = plt.subplots(3, sharex=True, sharey=True)
|
||||
fig.set_figheight(10) # all sample rate: 10; 3 row: 8
|
||||
fig.set_figwidth(20)
|
||||
|
||||
for ax in range(3):
|
||||
y_total = []
|
||||
y_low_total = []
|
||||
y_high_total = []
|
||||
for j in range(3):
|
||||
y= []
|
||||
y_low = []
|
||||
y_high = []
|
||||
for i in range(len(model_data_list)):
|
||||
y.append(model_data_list[i][j+ax*3][0])
|
||||
y_low.append(model_data_list[i][j+ax*3][2])
|
||||
y_high.append(model_data_list[i][j+ax*3][3])
|
||||
y_total.append(y)
|
||||
y_low_total.append(y_low)
|
||||
y_high_total.append(y_high)
|
||||
print()
|
||||
print("user mean of mean prob: ", np.mean(y))
|
||||
print("user mean of sd prob: ", np.std(y))
|
||||
|
||||
for i in range(3):
|
||||
if args.plot_type == 'line':
|
||||
axs[ax].plot(range(user), y_total[i], color=color[i], label=legend[i])
|
||||
axs[ax].fill_between(range(user), y_low_total[i], y_high_total[i], color=color[i],alpha=0.3 )
|
||||
if args.plot_type == 'bar':
|
||||
if args.task_type == 'new_test_task':
|
||||
widths = [-0.25, 0, 0.25]
|
||||
yerror = [np.array(y_total[i])-np.array(y_low_total[i]), np.array(y_high_total[i])-np.array(y_total[i])]
|
||||
axs[0].text(-0.19, 0.9, 'True Intention:', horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes, fontsize= 36)
|
||||
axs[ax].bar(np.arange(user)+widths[i],y_total[i], width=0.2, yerr=yerror, color=color[i], label=legend[i])
|
||||
axs[ax].tick_params(axis='x', which='both', pad=15, length=0)
|
||||
plt.xticks(range(user), range(1,user+1))
|
||||
axs[ax].set_ylabel('prob', fontsize= 36) # was 22
|
||||
axs[ax].text(-0.19, 0.5, legend[ax], horizontalalignment='center', verticalalignment='center', transform=axs[ax].transAxes, fontsize= 36, color=color[ax])
|
||||
plt.xlabel('user', fontsize= 40) # was 22
|
||||
for k, x in enumerate(np.arange(user)+widths[i]):
|
||||
y = y_total[i][k] + yerror[1][k]
|
||||
axs[ax].annotate(f'{y_total[i][k]:.2f}', (x, y), textcoords='offset points', xytext=(-15, 3), fontsize=14)
|
||||
|
||||
|
||||
if args.task_type == 'test_task':
|
||||
if not args.plot_user_list:
|
||||
axs[0].text(-0.19, 0.9, 'True Intention:', horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes, fontsize= 36)
|
||||
axs[ax].errorbar(np.arange(user)+widths[i],y_total[i], yerr=[np.array(y_total[i])-np.array(y_low_total[i]), np.array(y_high_total[i])-np.array(y_total[i])],markerfacecolor=color[i], ecolor=color[i], markeredgecolor=color[i], label=legend[i],fmt='.k')
|
||||
axs[ax].tick_params(axis='x', which='both', pad=15, length=0)
|
||||
plt.xticks(range(user)[::5], range(1,user+1)[::5])
|
||||
axs[ax].set_ylabel('prob', fontsize= 36)
|
||||
axs[ax].text(-0.19, 0.5, legend[ax], horizontalalignment='center', verticalalignment='center', transform=axs[ax].transAxes, fontsize= 36, color=color[ax])
|
||||
plt.xlabel('user', fontsize= 40)
|
||||
else:
|
||||
axs[0].text(-0.19, 0.9, 'True Intention:', horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes, fontsize= 36)
|
||||
axs[ax].errorbar(np.arange(len(model_data_list))+widths[i],y_total[i], yerr=[np.array(y_total[i])-np.array(y_low_total[i]), np.array(y_high_total[i])-np.array(y_total[i])],markerfacecolor=color[i], ecolor=color[i], markeredgecolor=color[i], label=legend[i],fmt='.k')
|
||||
axs[ax].tick_params(axis='x', which='both', pad=15, length=0)
|
||||
plt.xticks(range(len(model_data_list)), user_table)
|
||||
axs[ax].set_ylabel('prob', fontsize= 36)
|
||||
#axs[ax].set_yticks(range(0.0,1.0, 0.25))
|
||||
axs[ax].text(-0.19, 0.5, legend[ax], horizontalalignment='center', verticalalignment='center', transform=axs[ax].transAxes, fontsize= 36, color=color[ax])
|
||||
plt.xlabel('user', fontsize= 40)
|
||||
|
||||
axs[ax].tick_params(axis='both', which='major', labelsize=30)
|
||||
|
||||
handles, labels = axs[0].get_legend_handles_labels()
|
||||
|
||||
plt.ylim([0, 1.08])
|
||||
plt.tight_layout()
|
||||
pathlib.Path("result/"+args.task_type+"/user"+str(user)+"/"+args.loss_type+"/figure/").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.task_type == 'test_task':
|
||||
if not args.plot_user_list:
|
||||
plt.savefig("result/"+args.task_type+"/user"+str(user)+"/"+args.loss_type+"/figure/"+"N"+ str(N)+"_"+args.model_type+"_rate_"+str(rate)+"_"+args.plot_type+"_test_set_1.png", bbox_inches='tight')
|
||||
else:
|
||||
plt.savefig("result/"+args.task_type+"/user"+str(user)+"/"+args.loss_type+"/figure/"+"N"+ str(N)+"_"+args.model_type+"_rate_"+str(rate)+"_"+args.plot_type+"_test_set_1_user_analysis.png", bbox_inches='tight')
|
||||
if args.task_type == 'new_test_task':
|
||||
plt.savefig("result/"+args.ask_type+"/user"+str(user)+"/"+args.loss_type+"/figure/"+"N"+ str(N)+"_"+args.model_type+"_rate_"+str(rate)+"_"+args.plot_type+"_test_set_2.png", bbox_inches='tight')
|
||||
plt.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='')
|
||||
parser.add_argument('--loss_type', type=str, default='ce')
|
||||
parser.add_argument('--model_type', type=str, default="lstmlast" )
|
||||
parser.add_argument('--plot_type', type=str, default='bar') # bar or line
|
||||
parser.add_argument('--task_type', type=str, default='test_task')
|
||||
parser.add_argument('--plot_user_list', action='store_true') # plot user_table or not
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args)
|
||||
|
5
watch_and_help/stan/plot_user_length.sh
Normal file
5
watch_and_help/stan/plot_user_length.sh
Normal file
|
@ -0,0 +1,5 @@
|
|||
python3 plot_user_length.py \
|
||||
--loss_type ce \
|
||||
--model_type lstmlast \
|
||||
--plot_type bar \
|
||||
--task_type test_task
|
88
watch_and_help/stan/plot_user_length_10_steps.py
Normal file
88
watch_and_help/stan/plot_user_length_10_steps.py
Normal file
|
@ -0,0 +1,88 @@
|
|||
import numpy as np
|
||||
from numpy import genfromtxt
|
||||
import matplotlib.pyplot as plt
|
||||
import argparse
|
||||
import pathlib
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='')
|
||||
parser.add_argument('--loss_type', type=str, default='ce')
|
||||
parser.add_argument('--model_type', type=str, default="lstmlast" )
|
||||
parser.add_argument('--task_type', type=str, default='test_task')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.task_type == 'new_test_task':
|
||||
user = 9
|
||||
N = 1
|
||||
if args.task_type == 'test_task':
|
||||
user = 92
|
||||
N = 1
|
||||
|
||||
#rate = range(0,101,10)
|
||||
rate_user_data_list = []
|
||||
for r in range(0,101,10): # rate = range(0,101,10)
|
||||
# read data
|
||||
print(r)
|
||||
model_data_list = []
|
||||
for i in range(user):
|
||||
path = "result/"+args.task_type+"/user"+str(user)+"/"+args.loss_type+"/N"+ str(N) + "/" + args.model_type + "_N" + str(N) + "_result_" + str(r) + "_user" + str(i) +".csv"
|
||||
data = genfromtxt(path, delimiter=',', skip_header =1)
|
||||
data = data[[1,2,3,5,6,7,9,10,11],:][:,[2,4,6,7]]
|
||||
model_data_list.append(data)
|
||||
#print(type(data))
|
||||
model_data_list_total = np.stack(model_data_list)
|
||||
mean_user_data = np.mean(model_data_list_total,axis=0)
|
||||
rate_user_data_list.append(mean_user_data)
|
||||
|
||||
color = ['royalblue', 'lightgreen', 'tomato']
|
||||
legend = ['put fridge', 'put\n dishwasher', 'read book']
|
||||
fig, axs = plt.subplots(3, sharex=True, sharey=True)
|
||||
fig.set_figheight(10) # all sample rate: 10; 3 row: 8
|
||||
fig.set_figwidth(20)
|
||||
axs[0].text(-0.145, 0.9, 'True Intention:', horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes, fontsize= 25) # all: -0.3,0.5 3rows: -0.5,0.5
|
||||
|
||||
for ax in range(3):
|
||||
y_total = []
|
||||
y_low_total = []
|
||||
y_high_total = []
|
||||
for j in range(3):
|
||||
y= []
|
||||
y_low = []
|
||||
y_high = []
|
||||
for i in range(len(rate_user_data_list)):
|
||||
y.append(rate_user_data_list[i][j+ax*3][0])
|
||||
y_low.append(rate_user_data_list[i][j+ax*3][2])
|
||||
y_high.append(rate_user_data_list[i][j+ax*3][3])
|
||||
y_total.append(y)
|
||||
y_low_total.append(y_low)
|
||||
y_high_total.append(y_high)
|
||||
print()
|
||||
print("user mean of mean prob: ", np.mean(y))
|
||||
print("user mean of sd prob: ", np.std(y))
|
||||
|
||||
for i in range(3):
|
||||
axs[ax].plot(range(0,101,10), y_total[i], color=color[i], label=legend[i])
|
||||
axs[ax].fill_between(range(0,101,10), y_low_total[i], y_high_total[i], color=color[i],alpha=0.3 )
|
||||
axs[ax].set_xticks(range(0,101,10))
|
||||
axs[ax].set_ylabel('probability', fontsize=22)
|
||||
|
||||
axs[ax].text(-0.145, 0.5, legend[ax], horizontalalignment='center', verticalalignment='center', transform=axs[ax].transAxes, fontsize= 25, color=color[ax])
|
||||
axs[ax].tick_params(axis='both', which='major', labelsize=18)
|
||||
|
||||
plt.xlabel('Percentage of observed actions in one action sequence', fontsize= 22)
|
||||
handles, labels = axs[0].get_legend_handles_labels()
|
||||
|
||||
plt.xlim([0, 101])
|
||||
plt.ylim([0, 1])
|
||||
pathlib.Path("result/"+args.task_type+"/user"+str(user)+"/"+args.loss_type+"/figure/").mkdir(parents=True, exist_ok=True)
|
||||
if args.task_type == 'test_task':
|
||||
plt.savefig("result/"+args.task_type+"/user"+str(user)+ "/"+args.loss_type+"/figure/N"+ str(N) + "_"+args.model_type+"_rate_full_test_set_1.png", bbox_inches='tight')
|
||||
if args.task_type == 'new_test_task':
|
||||
plt.savefig("result/"+args.task_type+"/user"+str(user)+ "/"+args.loss_type+"/figure/N"+ str(N) + "_"+args.model_type+"_rate_full_test_set_2.png", bbox_inches='tight')
|
||||
|
||||
plt.show()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
4
watch_and_help/stan/plot_user_length_10_steps.sh
Normal file
4
watch_and_help/stan/plot_user_length_10_steps.sh
Normal file
|
@ -0,0 +1,4 @@
|
|||
python3 plot_user_length_10_steps.py \
|
||||
--loss_type ce \
|
||||
--model_type lstmlast \
|
||||
--task_type test_task
|
64
watch_and_help/stan/sampler_user.py
Normal file
64
watch_and_help/stan/sampler_user.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import numpy as np
|
||||
from numpy import genfromtxt
|
||||
import csv
|
||||
import pandas
|
||||
import argparse
|
||||
|
||||
def sample_predciton(path, rate):
|
||||
data = pandas.read_csv(path).values
|
||||
task_list = [0, 1, 2]
|
||||
start = 0
|
||||
stop = 0
|
||||
num_unique = np.unique(data[:,1])
|
||||
#print('unique number', num_unique)
|
||||
|
||||
samples = []
|
||||
for j in task_list:
|
||||
for i in num_unique:
|
||||
inx = np.where((data[:,1] == i) & (data[:,-2] == j))
|
||||
samples.append(data[inx])
|
||||
|
||||
for i in range(len(samples)):
|
||||
n = int(len(samples[i])*(100-rate)/100)
|
||||
samples[i] = samples[i][:-n]
|
||||
|
||||
return np.vstack(samples)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='')
|
||||
parser.add_argument('--LOSS', type=str, default='ce')
|
||||
parser.add_argument('--MODEL_TYPE', type=str, default="lstmlast_cross_entropy_bs_32_iter_2000_train_task_prob" )
|
||||
parser.add_argument('--EPOCHS', type=int, default=50)
|
||||
parser.add_argument('--TASK', type=str, default='test_task')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
|
||||
|
||||
task = ['put_fridge', 'put_dishwasher', 'read_book']
|
||||
sets = [args.TASK]
|
||||
rate = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
||||
|
||||
for i in task:
|
||||
for j in rate:
|
||||
for k in sets:
|
||||
if k == 'test_task':
|
||||
user_num = 92
|
||||
if k == 'new_test_task':
|
||||
user_num = 9
|
||||
|
||||
for l in range(user_num):
|
||||
pred_path = "prediction/" + k + "/" + "user" + str(user_num) + "/ce/" + i + "/" + "loss_weight_" + args.MODEL_TYPE + "_prediction_" + i + "_user" + str(l) + ".csv"
|
||||
save_path = "prediction/" + k + "/" + "user" + str(user_num) + "/ce/" + i + "/" + "loss_weight_" + args.MODEL_TYPE + "_prediction_" + i + "_user" + str(l) + "_rate_" + str(j) + ".csv"
|
||||
data = sample_predciton(pred_path, j)
|
||||
|
||||
head = []
|
||||
for r in range(79):
|
||||
head.append('act'+str(r+1))
|
||||
head.append('task_name')
|
||||
head.append('gt')
|
||||
head.insert(0,'action_id')
|
||||
pandas.DataFrame(data[:,1:]).to_csv(save_path, header=head)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
5
watch_and_help/stan/sampler_user.sh
Normal file
5
watch_and_help/stan/sampler_user.sh
Normal file
|
@ -0,0 +1,5 @@
|
|||
python3 sampler_user.py \
|
||||
--TASK test_task \
|
||||
--LOSS ce \
|
||||
--MODEL_TYPE lstmlast \
|
||||
--EPOCHS 50
|
76
watch_and_help/stan/save_act_series.R
Normal file
76
watch_and_help/stan/save_act_series.R
Normal file
|
@ -0,0 +1,76 @@
|
|||
library(tidyverse)
|
||||
library(cmdstanr)
|
||||
library(dplyr)
|
||||
|
||||
strategies <- c("put_fridge", "put_dishwasher", "read_book")
|
||||
model_type <- "lstmlast_cross_entropy_bs_32_iter_2000_train_task_prob"
|
||||
rate <- "_0"
|
||||
task_type <- "new_test_task" # new_test_task test_task
|
||||
loss_type <- "ce"
|
||||
set.seed(9746234)
|
||||
if (task_type=="test_task"){
|
||||
user_num <- 92
|
||||
user <-c(0:(user_num-1))
|
||||
N <- 1
|
||||
}
|
||||
if (task_type=="new_test_task"){
|
||||
user_num <- 9
|
||||
user <-c(0:(user_num-1))
|
||||
N <- 1
|
||||
}
|
||||
total_user_act1 <- vector("list", length(user_num))
|
||||
total_user_act2 <- vector("list", length(user_num))
|
||||
|
||||
sel <- vector("list", length(strategies))
|
||||
act_series <- vector("list", user_num)
|
||||
for (u in seq_along(user)){
|
||||
print('user')
|
||||
print(u)
|
||||
dat <- vector("list", length(strategies))
|
||||
for (i in seq_along(strategies)) {
|
||||
if (rate=="_0"){
|
||||
dat[[i]] <- read.csv(paste0("prediction/", task_type, "/user", user_num, "/", loss_type, "/", strategies[[i]], "/loss_weight_", model_type, "_prediction_", strategies[[i]], "_user", user[[u]], "_rate_", "90", ".csv"))
|
||||
} else if (rate=="_100"){
|
||||
dat[[i]] <- read.csv(paste0("prediction/", task_type, "/user", user_num, "/", loss_type, "/", strategies[[i]], "/loss_weight_", model_type, "_prediction_", strategies[[i]], "_user", user[[u]], ".csv"))
|
||||
} else{
|
||||
dat[[i]] <- read.csv(paste0("prediction/", task_type, "/user", user_num, "/", loss_type, "/", strategies[[i]], "/loss_weight_", model_type, "_prediction_", strategies[[i]], "_user", user[[u]], "_rate", rate, ".csv"))
|
||||
}
|
||||
dat[[i]]$assumed_strategy <- strategies[[i]]
|
||||
dat[[i]]$index <- dat[[i]]$action_id # sample based on intention
|
||||
dat[[i]]$id <- dat[[i]][,1] # sample based on intention
|
||||
}
|
||||
|
||||
N <- 1
|
||||
# select all action series and infer every one
|
||||
sel[[1]]<-dat[[1]] %>%
|
||||
group_by(task_name) %>%
|
||||
filter(task_name==1)
|
||||
sel[[1]] <- data.frame(sel[[1]])
|
||||
unique_act_id_t1 <- unique(sel[[1]]$action_id)
|
||||
write.csv(unique_act_id_t1, paste0("result/", task_type, "/user", user_num, "/", loss_type, "/act", "/", "action_series_", "user_",u, "_put_dishwasher", ".csv"))
|
||||
total_user_act1[[u]] <- unique_act_id_t1
|
||||
|
||||
sel[[1]]<-dat[[1]] %>%
|
||||
group_by(task_name) %>%
|
||||
filter(task_name==2)
|
||||
sel[[1]] <- data.frame(sel[[1]])
|
||||
unique_act_id_t1 <- unique(sel[[1]]$action_id)
|
||||
write.csv(unique_act_id_t1, paste0("result/", task_type, "/user", user_num, "/", loss_type, "/act", "/", "action_series_", "user_",u, "_read_book", ".csv"))
|
||||
total_user_act2[[u]] <- unique_act_id_t1
|
||||
}
|
||||
|
||||
write.csv(total_user_act1, paste0("result/", task_type, "/user", user_num, "/", loss_type, "/act", "/", "action_series_", "_put_dishwasher_total", ".csv"))
|
||||
write.csv(total_user_act2, paste0("result/", task_type, "/user", user_num, "/", loss_type, "/act", "/", "action_series_", "read_book_total", ".csv"))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
87
watch_and_help/stan/split_user.py
Normal file
87
watch_and_help/stan/split_user.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
import numpy as np
|
||||
import pathlib
|
||||
import argparse
|
||||
|
||||
np.random.seed(seed=100)
|
||||
|
||||
def sample_user(data, num_users, split_inx):
|
||||
np.random.seed(seed=100)
|
||||
num_unique3 = np.unique(data[:,1])
|
||||
num_unique2 = num_unique3[0:split_inx[1]]
|
||||
num_unique = num_unique3[0:split_inx[0]]
|
||||
|
||||
user_list1 = [np.random.choice(num_unique, int(len(num_unique)/num_users), replace=False) for i in range(num_users)]
|
||||
user_list2 = [np.random.choice(num_unique2, int(len(num_unique2)/num_users), replace=False) for i in range(num_users)]
|
||||
user_list3 = [np.random.choice(num_unique3, int(len(num_unique3)/num_users), replace=False) for i in range(num_users)]
|
||||
|
||||
user_data = []
|
||||
|
||||
for i in range(num_users): # len(user_list)
|
||||
user_idx1 = [int(item) for item in user_list1[i]]
|
||||
user_idx2 = [int(item) for item in user_list2[i]]
|
||||
user_idx3 = [int(item) for item in user_list3[i]]
|
||||
|
||||
data_list = []
|
||||
for j in range(len(user_idx1)):
|
||||
inx = np.where((data[:,1] == user_idx1[j]) & (data[:,-2]==0))
|
||||
data_list.append(data[inx])
|
||||
|
||||
for j in range(len(user_idx2)):
|
||||
inx = np.where((data[:,1] == user_idx2[j]) & (data[:,-2]==1))
|
||||
data_list.append(data[inx])
|
||||
|
||||
for j in range(len(user_idx3)):
|
||||
inx = np.where((data[:,1] == user_idx3[j]) & (data[:,-2]==2))
|
||||
data_list.append(data[inx])
|
||||
|
||||
user_data.append(np.vstack(data_list))
|
||||
|
||||
return user_data
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='')
|
||||
parser.add_argument('--LOSS', type=str, default='ce')
|
||||
parser.add_argument('--MODEL_TYPE', type=str, default="lstmlast_cross_entropy_bs_32_iter_2000_train_task_prob" )
|
||||
parser.add_argument('--EPOCHS', type=int, default=50)
|
||||
parser.add_argument('--TASK', type=str, default='test_task')
|
||||
args = parser.parse_args()
|
||||
|
||||
pref = ['put_fridge', 'put_dishwasher', 'read_book']
|
||||
|
||||
if args.TASK == 'new_test_task':
|
||||
NUM_USER = 9 # 9 for 1 user 1 action
|
||||
SPLIT_INX = [NUM_USER, 45]
|
||||
if args.TASK == 'test_task':
|
||||
NUM_USER = 92
|
||||
SPLIT_INX = [NUM_USER, 229]
|
||||
|
||||
head = []
|
||||
for j in range(79):
|
||||
head.append('act'+str(j+1))
|
||||
head.append('task_name')
|
||||
head.append('gt')
|
||||
head.insert(0,'action_id')
|
||||
head.insert(0,'')
|
||||
|
||||
for i in pref:
|
||||
path = "prediction/"+args.TASK+"/" + args.MODEL_TYPE + "/model_" + i + "_strategy_put_fridge" +".csv"
|
||||
data = np.genfromtxt(path, skip_header=1, delimiter=',')
|
||||
data_task_name = np.genfromtxt(path, skip_header=1, delimiter=',', usecols=-2, dtype=None)
|
||||
data_task_name[data_task_name==b'put_fridge'] = 0
|
||||
data_task_name[data_task_name==b'put_dishwasher'] = 1
|
||||
data_task_name[data_task_name==b'read_book'] = 2
|
||||
data[:,-2] = data_task_name.astype(np.float)
|
||||
print("data length: ", len(data))
|
||||
users_data = sample_user(data, NUM_USER, SPLIT_INX)
|
||||
|
||||
length = 0
|
||||
pathlib.Path("prediction/"+args.TASK+"/user" + str(NUM_USER) + "/" + args.LOSS + "/" + i).mkdir(parents=True, exist_ok=True)
|
||||
for j in range(len(users_data)):
|
||||
save_path = "prediction/"+args.TASK+"/user" + str(NUM_USER) + "/" + args.LOSS + "/" + i +"/loss_weight_"+ args.MODEL_TYPE + "_prediction_"+ i + "_user"+str(j)+".csv"
|
||||
length = length + len(users_data[j])
|
||||
np.savetxt(save_path, users_data[j], delimiter=',', header=','.join(head))
|
||||
print("user data length: ", length)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
5
watch_and_help/stan/split_user.sh
Normal file
5
watch_and_help/stan/split_user.sh
Normal file
|
@ -0,0 +1,5 @@
|
|||
python3 split_user.py \
|
||||
--TASK test_task \
|
||||
--LOSS ce \
|
||||
--MODEL_TYPE lstmlast \
|
||||
--EPOCHS 50
|
BIN
watch_and_help/stan/strategy_inference_model
Executable file
BIN
watch_and_help/stan/strategy_inference_model
Executable file
Binary file not shown.
26
watch_and_help/stan/strategy_inference_model.stan
Normal file
26
watch_and_help/stan/strategy_inference_model.stan
Normal file
|
@ -0,0 +1,26 @@
|
|||
data {
|
||||
int<lower=1> I; // number of question options (22)
|
||||
int<lower=0> N; // number of questions being asked by the user
|
||||
int<lower=1> K; // number of strategies
|
||||
// observed "true" questions of the user
|
||||
int q[N];
|
||||
// array of predicted probabilities of questions given strategies
|
||||
// coming from the forward neural network
|
||||
matrix[I, K] P_q_S[N];
|
||||
}
|
||||
parameters {
|
||||
// probabiliy vector of the strategies being applied by the user
|
||||
// to be inferred by the model here
|
||||
simplex[K] P_S;
|
||||
}
|
||||
model {
|
||||
for (n in 1:N) {
|
||||
// marginal probability vector of the questions being asked
|
||||
vector[I] theta = P_q_S[n] * P_S;
|
||||
// categorical likelihood
|
||||
target += categorical_lpmf(q[n] | theta);
|
||||
}
|
||||
// priors
|
||||
target += dirichlet_lpdf(P_S | rep_vector(1.0, K));
|
||||
}
|
||||
|
190
watch_and_help/stan/strategy_inference_test.R
Normal file
190
watch_and_help/stan/strategy_inference_test.R
Normal file
|
@ -0,0 +1,190 @@
|
|||
library(tidyverse)
|
||||
library(cmdstanr)
|
||||
library(dplyr)
|
||||
|
||||
# index order of the strategies assumed throughout
|
||||
strategies <- c("put_fridge", "put_dishwasher", "read_book")
|
||||
model_type <- "lstmlast"
|
||||
rates <- c("_0", "_10", "_20", "_30", "_40", "_50", "_60", "_70", "_80", "_90", "_100")
|
||||
task_type <- "test_task" # new_test_task test_task
|
||||
loss_type <- "ce"
|
||||
set.seed(9746234)
|
||||
if (task_type=="test_task"){
|
||||
user_num <- 92
|
||||
user <-c(38:(user_num-1))
|
||||
N <- 1
|
||||
}
|
||||
if (task_type=="new_test_task"){
|
||||
user_num <- 9
|
||||
user <-c(0:(user_num-1))
|
||||
N <- 1
|
||||
}
|
||||
|
||||
# read data from csv
|
||||
sel <- vector("list", length(strategies))
|
||||
act_series <- vector("list", user_num)
|
||||
for (u in seq_along(user)){
|
||||
for (rate in rates) {
|
||||
dat <- vector("list", length(strategies))
|
||||
for (i in seq_along(strategies)) {
|
||||
if (rate=="_0"){
|
||||
dat[[i]] <- read.csv(paste0("prediction/", task_type, "/user", user_num, "/", loss_type, "/", strategies[[i]], "/loss_weight_", model_type, "_prediction_", strategies[[i]], "_user", user[[u]], "_rate_", "10", ".csv")) # _60
|
||||
} else if (rate=="_100"){
|
||||
dat[[i]] <- read.csv(paste0("prediction/", task_type, "/user", user_num, "/", loss_type, "/", strategies[[i]], "/loss_weight_", model_type, "_prediction_", strategies[[i]], "_user", user[[u]], ".csv")) # _60
|
||||
} else{
|
||||
dat[[i]] <- read.csv(paste0("prediction/", task_type, "/user", user_num, "/", loss_type, "/", strategies[[i]], "/loss_weight_", model_type, "_prediction_", strategies[[i]], "_user", user[[u]], "_rate", rate, ".csv")) # _60
|
||||
}
|
||||
# strategy assumed for prediction
|
||||
dat[[i]]$assumed_strategy <- strategies[[i]]
|
||||
dat[[i]]$index <- dat[[i]]$action_id # sample based on intention
|
||||
dat[[i]]$id <- dat[[i]][,1] # sample based on intention
|
||||
}
|
||||
|
||||
# reset N after inference
|
||||
if (task_type=="test_task"){
|
||||
N <- 1
|
||||
}
|
||||
if (task_type=="new_test_task"){
|
||||
N <- 1
|
||||
}
|
||||
|
||||
# select one action series from one intention
|
||||
if (rate == "_0"){
|
||||
sel[[1]]<-dat[[1]] %>%
|
||||
group_by(task_name) %>%
|
||||
sample_n(N)
|
||||
|
||||
sel[[1]] <- data.frame(sel[[1]])
|
||||
act_series[[u]] <- sel[[1]]$action_id
|
||||
#print(typeof(sel[[1]]))
|
||||
#print(typeof(dat[[1]]))
|
||||
#print(sel[[1]]$action_id[2])
|
||||
}
|
||||
|
||||
print(c('unique action id', sel[[1]]$action_id))
|
||||
|
||||
# filter data from the selected action series, N series per intention
|
||||
dat[[1]]<-subset(dat[[1]], dat[[1]]$action_id == sel[[1]]$action_id[1] | dat[[1]]$action_id == sel[[1]]$action_id[2] | dat[[1]]$action_id == sel[[1]]$action_id[3])
|
||||
dat[[2]]<-subset(dat[[2]], dat[[2]]$action_id == sel[[1]]$action_id[1] | dat[[2]]$action_id == sel[[1]]$action_id[2] | dat[[2]]$action_id == sel[[1]]$action_id[3])
|
||||
dat[[3]]<-subset(dat[[3]], dat[[3]]$action_id == sel[[1]]$action_id[1] | dat[[3]]$action_id == sel[[1]]$action_id[2] | dat[[3]]$action_id == sel[[1]]$action_id[3])
|
||||
row.names(dat) <- NULL
|
||||
print(c('task name 1', dat[[1]]$task_name))
|
||||
print(c('task name 2', dat[[2]]$task_name))
|
||||
print(c('task name 3', dat[[3]]$task_name))
|
||||
print(c('action id 1', dat[[1]]$action_id))
|
||||
print(c('action id 2', dat[[2]]$action_id))
|
||||
print(c('action id 3', dat[[3]]$action_id))
|
||||
|
||||
# create save path
|
||||
dir.create(file.path(paste0("result/", task_type, "/user", user_num, "/", loss_type, "/N", N)), showWarnings = FALSE, recursive = TRUE)
|
||||
dir.create(file.path("temp"), showWarnings = FALSE)
|
||||
save_path <- paste0("result/", task_type, "/user", user_num, "/", loss_type, "/N", N, "/", model_type, "_N", N, "_", "result", rate,"_user", user[[u]], ".csv")
|
||||
|
||||
if(task_type=="test_task"){
|
||||
dat <- do.call(rbind, dat) %>%
|
||||
mutate(index = as.numeric(as.factor(id))) %>%
|
||||
rename(true_strategy = task_name) %>%
|
||||
mutate(
|
||||
true_strategy = factor(
|
||||
#true_strategy, levels = 0:3,
|
||||
true_strategy, levels = 0:2,
|
||||
labels = strategies
|
||||
),
|
||||
q_type = case_when(
|
||||
gt %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 22, 23, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 42, 43, 44, 58, 59, 64, 65, 68, 69, 70, 71, 72, 73, 74) ~ "put_fridge",
|
||||
gt %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 25, 29,30, 31, 32, 33, 34, 37, 38, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57) ~ "put_dishwasher",
|
||||
gt %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45) ~ "read_book",
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
if(task_type=="new_test_task"){
|
||||
dat <- do.call(rbind, dat) %>%
|
||||
mutate(index = as.numeric(as.factor(id))) %>%
|
||||
rename(true_strategy = task_name) %>%
|
||||
mutate(
|
||||
true_strategy = factor(
|
||||
true_strategy, levels = 0:2,
|
||||
labels = strategies
|
||||
),
|
||||
q_type = case_when(
|
||||
# new_test_set
|
||||
gt %in% c(1, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 19, 20, 22, 23, 25, 29, 30, 31, 32, 33, 34, 35, 40, 42, 43, 44, 46, 47, 52, 53, 55, 56, 58, 59, 60, 64, 65, 68, 69, 70, 71, 72, 73, 74, 75, 77, 78) ~ "put_fridge",
|
||||
gt %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74) ~ "put_dishwasher",
|
||||
gt %in% c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 75, 76, 77, 78) ~ "read_book",
|
||||
)
|
||||
)
|
||||
}
|
||||
#print(nrow(dat))
|
||||
#print(dat)
|
||||
|
||||
dat_obs <- dat %>% filter(assumed_strategy == strategies[[i]])
|
||||
N <- nrow(dat_obs)
|
||||
print(c("N: ", N))
|
||||
q <- dat_obs$gt
|
||||
true_strategy <- dat_obs$true_strategy
|
||||
|
||||
K <- length(unique(dat$assumed_strategy))
|
||||
I <- 79
|
||||
|
||||
P_q_S <- array(dim = c(N, I, K))
|
||||
for (n in 1:N) {
|
||||
P_q_S[n, , ] <- dat %>%
|
||||
filter(index == n) %>%
|
||||
select(matches("^act[[:digit:]]+$")) %>%
|
||||
as.matrix() %>%
|
||||
t()
|
||||
for (k in 1:K) {
|
||||
# normalize probabilities
|
||||
P_q_S[n, , k] <- P_q_S[n, , k] / sum(P_q_S[n, , k])
|
||||
}
|
||||
}
|
||||
|
||||
mod <- cmdstan_model(paste0(getwd(),"/strategy_inference_model.stan"))
|
||||
|
||||
if (rate=="_0"){
|
||||
sub <- integer(0)
|
||||
} else {
|
||||
sub <- which(true_strategy == "put_fridge")
|
||||
}
|
||||
sdata <- list(N = length(sub), K = K, I = I, q = q[sub], P_q_S = P_q_S[sub, , ])
|
||||
fit_put_fridge <- mod$sample(data = sdata, refresh=0, output_dir=paste0(getwd(),"/temp"))
|
||||
print(fit_put_fridge$summary(NULL, c("mean","sd")))
|
||||
|
||||
|
||||
if (rate=="_0"){
|
||||
sub <- integer(0)
|
||||
} else {
|
||||
sub <- which(true_strategy == "put_dishwasher")
|
||||
}
|
||||
sdata <- list(N = length(sub), K = K, I = I, q = q[sub], P_q_S = P_q_S[sub, , ])
|
||||
fit_put_dishwasher <- mod$sample(data = sdata, refresh=0, output_dir=paste0(getwd(),"/temp"))
|
||||
print(fit_put_dishwasher$summary(NULL, c("mean","sd")))
|
||||
|
||||
# read_book strategy (should favor index 3)
|
||||
if (rate=="_0"){
|
||||
sub <- integer(0)
|
||||
} else {
|
||||
sub <- which(true_strategy == "read_book")
|
||||
}
|
||||
sdata <- list(N = length(sub), K = K, I = I, q = q[sub], P_q_S = P_q_S[sub, , ])
|
||||
fit_read_book <- mod$sample(data = sdata, refresh=0, output_dir=paste0(getwd(),"/temp"))
|
||||
print(fit_read_book$summary(NULL, c("mean","sd")))
|
||||
|
||||
# save csv
|
||||
df <-rbind(fit_put_fridge$summary(), fit_put_dishwasher$summary(), fit_read_book$summary())
|
||||
write.csv(df,file=save_path,quote=FALSE)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue