import numpy as np from numpy import genfromtxt import csv import pandas import argparse def sample_predciton(path, rate): data = pandas.read_csv(path).values task_list = [0, 1, 2, 3, 4, 5, 6] start = 0 stop = 0 num_unique = np.unique(data[:,1]) samples = [] for j in task_list: for i in num_unique: inx = np.where((data[:,1] == i) & (data[:,-2] == j)) samples.append(data[inx]) for i in range(len(samples)): n = int(len(samples[i])*(100-rate)/100) if n == 0: n = 1 samples[i] = samples[i][:-n] if len(samples[i]) == 0: print('len of after sampling',len(samples[i])) return np.vstack(samples) def main(): # parsing parameters parser = argparse.ArgumentParser(description='') parser.add_argument('--batch_size', type=int, default=8, help='batch size') parser.add_argument('--lr', type=float, default=1e-4, help='learning rate') parser.add_argument('--hidden_size', type=int, default=128, help='hidden_size') parser.add_argument('--model_type', type=str, default='lstmlast', help='model type') args = parser.parse_args() task = np.arange(7) user_num = 5 bs = args.batch_size lr = args.lr # 1e-4 hs = args.hidden_size #128 model_type = args.model_type #'lstmlast' rate = [10, 20, 30, 40, 50, 60, 70, 80, 90] for i in task: for j in rate: for l in range(user_num): pred_path = "prediction/task" + str(i) + "/" + model_type + "_bs_" + str(bs) + "_lr_" + str(lr) + "_hidden_size_" + str(hs) + "/user" + str(l) + "_pred.csv" save_path = "prediction/task" + str(i) + "/" + model_type + "_bs_" + str(bs) + "_lr_" + str(lr) + "_hidden_size_" + str(hs) + "/user" + str(l) + "_rate_" + str(j) + "_pred.csv" data = sample_predciton(pred_path, j) head = [] for r in range(7): head.append('act'+str(r+1)) head.append('task_name') head.append('gt') head.insert(0,'action_id') pandas.DataFrame(data[:,1:]).to_csv(save_path, header=head) if __name__ == '__main__': # split the prediction by length, from 10% to 90% main()