from utils import adt_dataset, seed_torch from model import attended_hand_recognition from utils.opt import options from utils import log from torch.utils.data import DataLoader import torch import numpy as np import time import datetime import torch.optim as optim import os os.nice(5) import math def main(opt): # set the random seed to ensure reproducibility seed_torch.seed_torch(seed=0) torch.set_num_threads(1) data_dir = opt.data_dir seq_len = opt.seq_len opt.joint_number = opt.body_joint_number + opt.hand_joint_number*2 learning_rate = opt.learning_rate print('>>> create model') net = attended_hand_recognition.attended_hand_recognition(opt=opt).to(opt.cuda_idx) optimizer = optim.AdamW(filter(lambda x: x.requires_grad, net.parameters()), lr=learning_rate, weight_decay=opt.weight_decay) print(">>> total params: {:.2f}M".format(sum(p.numel() for p in net.parameters()) / 1000000.0)) print('>>> loading datasets') train_actions = 'all' test_actions = opt.actions train_dataset = adt_dataset.adt_dataset(data_dir, seq_len, train_actions, 1, opt.object_num, opt.hand_joint_number, opt.sample_rate) train_data_size = train_dataset.dataset.shape print("Training data size: {}".format(train_data_size)) train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=0, pin_memory=True) valid_dataset = adt_dataset.adt_dataset(data_dir, seq_len, test_actions, 0, opt.object_num, opt.hand_joint_number, opt.sample_rate) valid_data_size = valid_dataset.dataset.shape print("Validation data size: {}".format(valid_data_size)) valid_loader = DataLoader(valid_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True) # training local_time = time.asctime(time.localtime(time.time())) print('\nTraining starts at ' + local_time) start_time = datetime.datetime.now() start_epoch = 1 acc_best = 0 best_epoch = 0 exp_lr = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=opt.gamma, last_epoch=-1) for epo in range(start_epoch, opt.epoch + 1): is_best = False learning_rate = exp_lr.optimizer.param_groups[0]["lr"] train_start_time = datetime.datetime.now() result_train = run_model(net, optimizer, is_train=1, data_loader=train_loader, opt=opt) train_end_time = datetime.datetime.now() train_time = (train_end_time - train_start_time).seconds*1000 train_batch_num = math.ceil(train_data_size[0]/opt.batch_size) train_time_per_batch = math.ceil(train_time/train_batch_num) #print('\nTraining time per batch: {} ms'.format(train_time_per_batch)) exp_lr.step() rng_state = torch.get_rng_state() if epo % opt.validation_epoch == 0: print('>>> training epoch: {:d}, lr: {:.12f}'.format(epo, learning_rate)) print('Training data size: {}'.format(train_data_size)) print('Average baseline acc: {:.2f}%'.format(result_train['baseline_acc_average']*100)) print('Average training acc: {:.2f}%'.format(result_train['prediction_acc_average']*100)) test_start_time = datetime.datetime.now() result_valid = run_model(net, is_train=0, data_loader=valid_loader, opt=opt) test_end_time = datetime.datetime.now() test_time = (test_end_time - test_start_time).seconds*1000 test_batch_num = math.ceil(valid_data_size[0]/opt.test_batch_size) test_time_per_batch = math.ceil(test_time/test_batch_num) #print('\nTest time per batch: {} ms'.format(test_time_per_batch)) print('Validation data size: {}'.format(valid_data_size)) print('Average baseline acc: {:.2f}%'.format(result_valid['baseline_acc_average']*100)) print('Average validation acc: {:.2f}%'.format(result_valid['prediction_acc_average']*100)) if result_valid['prediction_acc_average'] > acc_best: acc_best = result_valid['prediction_acc_average'] is_best = True best_epoch = epo print('Best validation error: {:.2f}%, best epoch: {}'.format(acc_best*100, best_epoch)) end_time = datetime.datetime.now() total_training_time = (end_time - start_time).seconds/60 print('\nTotal training time: {:.1f} min'.format(total_training_time)) local_time = time.asctime(time.localtime(time.time())) print('\nTraining ends at ' + local_time) result_log = np.array([epo, learning_rate]) head = np.array(['epoch', 'lr']) for k in result_train.keys(): result_log = np.append(result_log, [result_train[k]]) head = np.append(head, [k]) for k in result_valid.keys(): result_log = np.append(result_log, [result_valid[k]]) head = np.append(head, ['valid_' + k]) csv_name = 'attended_hand_recognition_results' model_name = 'attended_hand_recognition_model.pt' log.save_csv_log(opt, head, result_log, is_create=(epo == 1), file_name=csv_name) log.save_ckpt({'epoch': epo, 'lr': learning_rate, 'acc': result_valid['prediction_acc_average'], 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()}, opt=opt, file_name = model_name) torch.set_rng_state(rng_state) def eval(opt): data_dir = opt.data_dir seq_len = opt.seq_len opt.joint_number = opt.body_joint_number + opt.hand_joint_number*2 print('>>> create model') net = attended_hand_recognition.attended_hand_recognition(opt=opt).to(opt.cuda_idx) print(">>> total params: {:.2f}M".format(sum(p.numel() for p in net.parameters()) / 1000000.0)) #load model model_name = 'attended_hand_recognition_model.pt' model_path = os.path.join(opt.ckpt, model_name) print(">>> loading ckpt from '{}'".format(model_path)) ckpt = torch.load(model_path) net.load_state_dict(ckpt['state_dict']) print(">>> ckpt loaded (epoch: {} | acc: {})".format(ckpt['epoch'], ckpt['acc'])) print('>>> loading datasets') train_actions = 'all' test_actions = opt.actions train_dataset = adt_dataset.adt_dataset(data_dir, seq_len, train_actions, 1, opt.object_num, opt.hand_joint_number, opt.sample_rate) train_data_size = train_dataset.dataset.shape print("Train data size: {}".format(train_data_size)) train_loader = DataLoader(train_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True) test_dataset = adt_dataset.adt_dataset(data_dir, seq_len, test_actions, 0, opt.object_num, opt.hand_joint_number, opt.sample_rate) test_data_size = test_dataset.dataset.shape print("Test data size: {}".format(test_data_size)) test_loader = DataLoader(test_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True) # test local_time = time.asctime(time.localtime(time.time())) print('\nTest starts at ' + local_time) start_time = datetime.datetime.now() if opt.save_predictions: result_train, predictions_train = run_model(net, is_train=0, data_loader=train_loader, opt=opt) result_test, predictions_test = run_model(net, is_train=0, data_loader=test_loader, opt=opt) else: result_train = run_model(net, is_train=0, data_loader=train_loader, opt=opt) result_test = run_model(net, is_train=0, data_loader=test_loader, opt=opt) print('Average train baseline acc: {:.2f}%'.format(result_train['baseline_acc_average']*100)) print('Average train method acc: {:.2f}%'.format(result_train['prediction_acc_average']*100)) print('Average test baseline acc: {:.2f}%'.format(result_test['baseline_acc_average']*100)) print('Average test method acc: {:.2f}%'.format(result_test['prediction_acc_average']*100)) end_time = datetime.datetime.now() total_test_time = (end_time - start_time).seconds/60 print('\nTotal test time: {:.1f} min'.format(total_test_time)) local_time = time.asctime(time.localtime(time.time())) print('\nTest ends at ' + local_time) if opt.save_predictions: prediction = predictions_train[:, :, -3:-1].reshape(-1, 2) attended_hand_gt = predictions_train[:, :, -1:].reshape(-1) y_prd = np.argmax(prediction, axis=1) acc = np.sum(y_prd == attended_hand_gt)/prediction.shape[0] print('Average train acc: {:.2f}%'.format(acc*100)) predictions_train_path = os.path.join(opt.ckpt, "attended_hand_recognition_train.npy") np.save(predictions_train_path, predictions_train) prediction = predictions_test[:, :, -3:-1].reshape(-1, 2) attended_hand_gt = predictions_test[:, :, -1:].reshape(-1) y_prd = np.argmax(prediction, axis=1) acc = np.sum(y_prd == attended_hand_gt)/prediction.shape[0] print('Average test acc: {:.2f}%'.format(acc*100)) predictions_test_path = os.path.join(opt.ckpt, "attended_hand_recognition_test.npy") np.save(predictions_test_path, predictions_test) def run_model(net, optimizer=None, is_train=1, data_loader=None, opt=None): if is_train == 1: net.train() else: net.eval() if opt.is_eval and opt.save_predictions: predictions = [] prediction_acc_average = 0 baseline_acc_average = 0 criterion = torch.nn.CrossEntropyLoss() n = 0 input_n = opt.seq_len for i, (data) in enumerate(data_loader): batch_size, seq_n, dim = data.shape joint_number = opt.joint_number object_num = opt.object_num # when only one sample in this batch if batch_size == 1 and is_train == 1: continue n += batch_size*seq_n data = data.float().to(opt.cuda_idx) eye_gaze = data.clone()[:, :, :3] joints = data.clone()[:, :, 3:(joint_number+1)*3] head_directions = data.clone()[:, :, (joint_number+1)*3:(joint_number+2)*3] attended_hand_gt = data.clone()[:, :, (joint_number+2+8*object_num*2)*3:(joint_number+2+8*object_num*2)*3+1].type(torch.LongTensor).to(opt.cuda_idx) attended_hand_baseline = data.clone()[:, :, (joint_number+2+8*object_num*2)*3+1:(joint_number+2+8*object_num*2)*3+2].type(torch.LongTensor).to(opt.cuda_idx) input = torch.cat((joints, head_directions), dim=2) if object_num > 0: object_positions = data.clone()[:, :, (joint_number+2)*3:(joint_number+2+8*object_num*2)*3] input = torch.cat((input, object_positions), dim=2) prediction = net(input, input_n=input_n) if opt.is_eval and opt.save_predictions: # eye_gaze + joints + head_directions + object_positions + predictions + attended_hand_gt prediction = torch.nn.functional.softmax(prediction, dim=2) prediction_cpu = torch.cat((eye_gaze, input), dim=2) prediction_cpu = torch.cat((prediction_cpu, prediction), dim=2) prediction_cpu = torch.cat((prediction_cpu, attended_hand_gt), dim=2) prediction_cpu = prediction_cpu.cpu().data.numpy() if len(predictions) == 0: predictions = prediction_cpu else: predictions = np.concatenate((predictions, prediction_cpu), axis=0) attended_hand_gt = attended_hand_gt.reshape(batch_size*input_n) attended_hand_baseline = attended_hand_baseline.reshape(batch_size*input_n) prediction = prediction.reshape(-1, 2) loss = criterion(prediction, attended_hand_gt) if is_train == 1: optimizer.zero_grad() loss.backward() optimizer.step() # calculate prediction accuracy _, y_prd = torch.max(prediction.data, 1) acc = torch.sum(y_prd == attended_hand_gt)/(batch_size*input_n) prediction_acc_average += acc.cpu().data.numpy() * batch_size*input_n acc = torch.sum(attended_hand_gt == attended_hand_baseline)/(batch_size*input_n) baseline_acc_average += acc.cpu().data.numpy() * batch_size*input_n result = {} result["baseline_acc_average"] = baseline_acc_average / n result["prediction_acc_average"] = prediction_acc_average / n if opt.is_eval and opt.save_predictions: return result, predictions else: return result if __name__ == '__main__': option = options().parse() if option.is_eval == False: main(option) else: eval(option)