first commit
This commit is contained in:
parent
99ce0acafb
commit
8f6b6a34e7
73 changed files with 11656 additions and 0 deletions
368
attended_hand_recognition_hot3d.py
Normal file
368
attended_hand_recognition_hot3d.py
Normal file
|
@ -0,0 +1,368 @@
|
|||
from utils import hot3d_aria_dataset, seed_torch
|
||||
from model import attended_hand_recognition
|
||||
from utils.opt import options
|
||||
from utils import log
|
||||
from torch.utils.data import DataLoader
|
||||
import torch
|
||||
import numpy as np
|
||||
import time
|
||||
import datetime
|
||||
import torch.optim as optim
|
||||
import os
|
||||
os.nice(5)
|
||||
import math
|
||||
|
||||
|
||||
def main(opt):
|
||||
# set the random seed to ensure reproducibility
|
||||
seed_torch.seed_torch(seed=0)
|
||||
torch.set_num_threads(1)
|
||||
|
||||
data_dir = opt.data_dir
|
||||
seq_len = opt.seq_len
|
||||
opt.joint_number = opt.body_joint_number + opt.hand_joint_number*2
|
||||
learning_rate = opt.learning_rate
|
||||
print('>>> create model')
|
||||
net = attended_hand_recognition.attended_hand_recognition(opt=opt).to(opt.cuda_idx)
|
||||
optimizer = optim.AdamW(filter(lambda x: x.requires_grad, net.parameters()), lr=learning_rate, weight_decay=opt.weight_decay)
|
||||
print(">>> total params: {:.2f}M".format(sum(p.numel() for p in net.parameters()) / 1000000.0))
|
||||
print('>>> loading datasets')
|
||||
|
||||
actions = opt.actions
|
||||
test_user_id = opt.test_user_id
|
||||
if actions == 'all':
|
||||
if test_user_id == 1:
|
||||
train_actions = 'all'
|
||||
test_actions = 'all'
|
||||
train_subjects = ['P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003']
|
||||
opt.ckpt = opt.ckpt + '/user1/'
|
||||
if test_user_id == 2:
|
||||
train_actions = 'all'
|
||||
test_actions = 'all'
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0009', 'P0010', 'P0011']
|
||||
opt.ckpt = opt.ckpt + '/user2/'
|
||||
if test_user_id == 3:
|
||||
train_actions = 'all'
|
||||
test_actions = 'all'
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011']
|
||||
test_subjects = ['P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/user3/'
|
||||
elif actions == 'room':
|
||||
train_actions = ['kitchen', 'office']
|
||||
test_actions = ['room']
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/scene1/'
|
||||
elif actions == 'kitchen':
|
||||
train_actions = ['room', 'office']
|
||||
test_actions = ['kitchen']
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/scene2/'
|
||||
elif actions == 'office':
|
||||
train_actions = ['room', 'kitchen']
|
||||
test_actions = ['office']
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/scene3/'
|
||||
else:
|
||||
raise( ValueError, "Unrecognised actions: %d" % actions)
|
||||
|
||||
if not os.path.isdir(opt.ckpt):
|
||||
os.makedirs(opt.ckpt)
|
||||
|
||||
train_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, train_subjects, seq_len, train_actions, opt.object_num, opt.sample_rate)
|
||||
train_data_size = train_dataset.dataset.shape
|
||||
print("Training data size: {}".format(train_data_size))
|
||||
train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=0, pin_memory=True)
|
||||
valid_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, test_subjects, seq_len, test_actions, opt.object_num, opt.sample_rate)
|
||||
valid_data_size = valid_dataset.dataset.shape
|
||||
print("Validation data size: {}".format(valid_data_size))
|
||||
valid_loader = DataLoader(valid_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True)
|
||||
|
||||
# training
|
||||
local_time = time.asctime(time.localtime(time.time()))
|
||||
print('\nTraining starts at ' + local_time)
|
||||
start_time = datetime.datetime.now()
|
||||
start_epoch = 1
|
||||
|
||||
acc_best = 0
|
||||
best_epoch = 0
|
||||
exp_lr = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=opt.gamma, last_epoch=-1)
|
||||
for epo in range(start_epoch, opt.epoch + 1):
|
||||
is_best = False
|
||||
learning_rate = exp_lr.optimizer.param_groups[0]["lr"]
|
||||
|
||||
train_start_time = datetime.datetime.now()
|
||||
result_train = run_model(net, optimizer, is_train=1, data_loader=train_loader, opt=opt)
|
||||
train_end_time = datetime.datetime.now()
|
||||
train_time = (train_end_time - train_start_time).seconds*1000
|
||||
train_batch_num = math.ceil(train_data_size[0]/opt.batch_size)
|
||||
train_time_per_batch = math.ceil(train_time/train_batch_num)
|
||||
#print('\nTraining time per batch: {} ms'.format(train_time_per_batch))
|
||||
|
||||
exp_lr.step()
|
||||
rng_state = torch.get_rng_state()
|
||||
if epo % opt.validation_epoch == 0:
|
||||
if actions == 'all':
|
||||
print("\ntest user id: {}\n".format(test_user_id))
|
||||
elif actions == 'room':
|
||||
print("\ntest scene/action: room\n")
|
||||
elif actions == 'kitchen':
|
||||
print("\ntest scene/action: kitchen\n")
|
||||
elif actions == 'office':
|
||||
print("\ntest scene/action: office\n")
|
||||
print('>>> training epoch: {:d}, lr: {:.12f}'.format(epo, learning_rate))
|
||||
print('Training data size: {}'.format(train_data_size))
|
||||
print('Average baseline acc: {:.2f}%'.format(result_train['baseline_acc_average']*100))
|
||||
print('Average training acc: {:.2f}%'.format(result_train['prediction_acc_average']*100))
|
||||
|
||||
test_start_time = datetime.datetime.now()
|
||||
result_valid = run_model(net, is_train=0, data_loader=valid_loader, opt=opt)
|
||||
test_end_time = datetime.datetime.now()
|
||||
test_time = (test_end_time - test_start_time).seconds*1000
|
||||
test_batch_num = math.ceil(valid_data_size[0]/opt.test_batch_size)
|
||||
test_time_per_batch = math.ceil(test_time/test_batch_num)
|
||||
#print('\nTest time per batch: {} ms'.format(test_time_per_batch))
|
||||
print('Validation data size: {}'.format(valid_data_size))
|
||||
|
||||
print('Average baseline acc: {:.2f}%'.format(result_valid['baseline_acc_average']*100))
|
||||
print('Average validation acc: {:.2f}%'.format(result_valid['prediction_acc_average']*100))
|
||||
|
||||
if result_valid['prediction_acc_average'] > acc_best:
|
||||
acc_best = result_valid['prediction_acc_average']
|
||||
is_best = True
|
||||
best_epoch = epo
|
||||
|
||||
print('Best validation error: {:.2f}%, best epoch: {}'.format(acc_best*100, best_epoch))
|
||||
end_time = datetime.datetime.now()
|
||||
total_training_time = (end_time - start_time).seconds/60
|
||||
print('\nTotal training time: {:.1f} min'.format(total_training_time))
|
||||
local_time = time.asctime(time.localtime(time.time()))
|
||||
print('\nTraining ends at ' + local_time)
|
||||
|
||||
result_log = np.array([epo, learning_rate])
|
||||
head = np.array(['epoch', 'lr'])
|
||||
for k in result_train.keys():
|
||||
result_log = np.append(result_log, [result_train[k]])
|
||||
head = np.append(head, [k])
|
||||
for k in result_valid.keys():
|
||||
result_log = np.append(result_log, [result_valid[k]])
|
||||
head = np.append(head, ['valid_' + k])
|
||||
|
||||
csv_name = 'attended_hand_recognition_results'
|
||||
model_name = 'attended_hand_recognition_model.pt'
|
||||
log.save_csv_log(opt, head, result_log, is_create=(epo == 1), file_name=csv_name)
|
||||
log.save_ckpt({'epoch': epo,
|
||||
'lr': learning_rate,
|
||||
'acc': result_valid['prediction_acc_average'],
|
||||
'state_dict': net.state_dict(),
|
||||
'optimizer': optimizer.state_dict()},
|
||||
opt=opt,
|
||||
file_name = model_name)
|
||||
|
||||
torch.set_rng_state(rng_state)
|
||||
|
||||
|
||||
def eval(opt):
|
||||
data_dir = opt.data_dir
|
||||
seq_len = opt.seq_len
|
||||
opt.joint_number = opt.body_joint_number + opt.hand_joint_number*2
|
||||
|
||||
print('>>> create model')
|
||||
net = attended_hand_recognition.attended_hand_recognition(opt=opt).to(opt.cuda_idx)
|
||||
print(">>> total params: {:.2f}M".format(sum(p.numel() for p in net.parameters()) / 1000000.0))
|
||||
#load model
|
||||
actions = opt.actions
|
||||
test_user_id = opt.test_user_id
|
||||
if actions == 'all':
|
||||
if test_user_id == 1:
|
||||
train_actions = 'all'
|
||||
test_actions = 'all'
|
||||
train_subjects = ['P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003']
|
||||
opt.ckpt = opt.ckpt + '/user1/'
|
||||
if test_user_id == 2:
|
||||
train_actions = 'all'
|
||||
test_actions = 'all'
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0009', 'P0010', 'P0011']
|
||||
opt.ckpt = opt.ckpt + '/user2/'
|
||||
if test_user_id == 3:
|
||||
train_actions = 'all'
|
||||
test_actions = 'all'
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011']
|
||||
test_subjects = ['P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/user3/'
|
||||
elif actions == 'room':
|
||||
train_actions = ['kitchen', 'office']
|
||||
test_actions = ['room']
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/scene1/'
|
||||
elif actions == 'kitchen':
|
||||
train_actions = ['room', 'office']
|
||||
test_actions = ['kitchen']
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/scene2/'
|
||||
elif actions == 'office':
|
||||
train_actions = ['room', 'kitchen']
|
||||
test_actions = ['office']
|
||||
train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
|
||||
opt.ckpt = opt.ckpt + '/scene3/'
|
||||
else:
|
||||
raise( ValueError, "Unrecognised actions: %d" % actions)
|
||||
|
||||
model_name = 'attended_hand_recognition_model.pt'
|
||||
model_path = os.path.join(opt.ckpt, model_name)
|
||||
print(">>> loading ckpt from '{}'".format(model_path))
|
||||
ckpt = torch.load(model_path)
|
||||
net.load_state_dict(ckpt['state_dict'])
|
||||
print(">>> ckpt loaded (epoch: {} | acc: {})".format(ckpt['epoch'], ckpt['acc']))
|
||||
|
||||
print('>>> loading datasets')
|
||||
train_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, train_subjects, seq_len, train_actions, opt.object_num, opt.sample_rate)
|
||||
train_data_size = train_dataset.dataset.shape
|
||||
print("Train data size: {}".format(train_data_size))
|
||||
train_loader = DataLoader(train_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True)
|
||||
test_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, test_subjects, seq_len, test_actions, opt.object_num, opt.sample_rate)
|
||||
test_data_size = test_dataset.dataset.shape
|
||||
print("Test data size: {}".format(test_data_size))
|
||||
test_loader = DataLoader(test_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True)
|
||||
|
||||
# test
|
||||
local_time = time.asctime(time.localtime(time.time()))
|
||||
print('\nTest starts at ' + local_time)
|
||||
start_time = datetime.datetime.now()
|
||||
if actions == 'all':
|
||||
print("\ntest user id: {}\n".format(test_user_id))
|
||||
elif actions == 'room':
|
||||
print("\ntest scene/action: room\n")
|
||||
elif actions == 'kitchen':
|
||||
print("\ntest scene/action: kitchen\n")
|
||||
elif actions == 'office':
|
||||
print("\ntest scene/action: office\n")
|
||||
if opt.save_predictions:
|
||||
result_train, predictions_train = run_model(net, is_train=0, data_loader=train_loader, opt=opt)
|
||||
result_test, predictions_test = run_model(net, is_train=0, data_loader=test_loader, opt=opt)
|
||||
else:
|
||||
result_train = run_model(net, is_train=0, data_loader=train_loader, opt=opt)
|
||||
result_test = run_model(net, is_train=0, data_loader=test_loader, opt=opt)
|
||||
|
||||
print('Average train baseline acc: {:.2f}%'.format(result_train['baseline_acc_average']*100))
|
||||
print('Average train method acc: {:.2f}%'.format(result_train['prediction_acc_average']*100))
|
||||
print('Average test baseline acc: {:.2f}%'.format(result_test['baseline_acc_average']*100))
|
||||
print('Average test method acc: {:.2f}%'.format(result_test['prediction_acc_average']*100))
|
||||
|
||||
end_time = datetime.datetime.now()
|
||||
total_test_time = (end_time - start_time).seconds/60
|
||||
print('\nTotal test time: {:.1f} min'.format(total_test_time))
|
||||
local_time = time.asctime(time.localtime(time.time()))
|
||||
print('\nTest ends at ' + local_time)
|
||||
|
||||
if opt.save_predictions:
|
||||
prediction = predictions_train[:, :, -3:-1].reshape(-1, 2)
|
||||
attended_hand_gt = predictions_train[:, :, -1:].reshape(-1)
|
||||
y_prd = np.argmax(prediction, axis=1)
|
||||
acc = np.sum(y_prd == attended_hand_gt)/prediction.shape[0]
|
||||
print('Average train acc: {:.2f}%'.format(acc*100))
|
||||
predictions_train_path = os.path.join(opt.ckpt, "attended_hand_recognition_train.npy")
|
||||
np.save(predictions_train_path, predictions_train)
|
||||
|
||||
prediction = predictions_test[:, :, -3:-1].reshape(-1, 2)
|
||||
attended_hand_gt = predictions_test[:, :, -1:].reshape(-1)
|
||||
y_prd = np.argmax(prediction, axis=1)
|
||||
acc = np.sum(y_prd == attended_hand_gt)/prediction.shape[0]
|
||||
print('Average test acc: {:.2f}%'.format(acc*100))
|
||||
predictions_test_path = os.path.join(opt.ckpt, "attended_hand_recognition_test.npy")
|
||||
np.save(predictions_test_path, predictions_test)
|
||||
|
||||
|
||||
def run_model(net, optimizer=None, is_train=1, data_loader=None, opt=None):
|
||||
if is_train == 1:
|
||||
net.train()
|
||||
else:
|
||||
net.eval()
|
||||
|
||||
if opt.is_eval and opt.save_predictions:
|
||||
predictions = []
|
||||
|
||||
prediction_acc_average = 0
|
||||
baseline_acc_average = 0
|
||||
criterion = torch.nn.CrossEntropyLoss()
|
||||
|
||||
n = 0
|
||||
input_n = opt.seq_len
|
||||
|
||||
for i, (data) in enumerate(data_loader):
|
||||
batch_size, seq_n, dim = data.shape
|
||||
joint_number = opt.joint_number
|
||||
object_num = opt.object_num
|
||||
# when only one sample in this batch
|
||||
if batch_size == 1 and is_train == 1:
|
||||
continue
|
||||
n += batch_size*seq_n
|
||||
data = data.float().to(opt.cuda_idx)
|
||||
|
||||
eye_gaze = data.clone()[:, :, :3]
|
||||
joints = data.clone()[:, :, 3:(joint_number+1)*3]
|
||||
head_directions = data.clone()[:, :, (joint_number+1)*3:(joint_number+2)*3]
|
||||
attended_hand_gt = data.clone()[:, :, (joint_number+2+8*object_num*2)*3:(joint_number+2+8*object_num*2)*3+1].type(torch.LongTensor).to(opt.cuda_idx)
|
||||
attended_hand_baseline = data.clone()[:, :, (joint_number+2+8*object_num*2)*3+1:(joint_number+2+8*object_num*2)*3+2].type(torch.LongTensor).to(opt.cuda_idx)
|
||||
|
||||
input = torch.cat((joints, head_directions), dim=2)
|
||||
if object_num > 0:
|
||||
object_positions = data.clone()[:, :, (joint_number+2)*3:(joint_number+2+8*object_num*2)*3]
|
||||
input = torch.cat((input, object_positions), dim=2)
|
||||
prediction = net(input, input_n=input_n)
|
||||
|
||||
if opt.is_eval and opt.save_predictions:
|
||||
# eye_gaze + joints + head_directions + object_positions + predictions + attended_hand_gt
|
||||
prediction = torch.nn.functional.softmax(prediction, dim=2)
|
||||
prediction_cpu = torch.cat((eye_gaze, input), dim=2)
|
||||
prediction_cpu = torch.cat((prediction_cpu, prediction), dim=2)
|
||||
prediction_cpu = torch.cat((prediction_cpu, attended_hand_gt), dim=2)
|
||||
prediction_cpu = prediction_cpu.cpu().data.numpy()
|
||||
if len(predictions) == 0:
|
||||
predictions = prediction_cpu
|
||||
else:
|
||||
predictions = np.concatenate((predictions, prediction_cpu), axis=0)
|
||||
|
||||
attended_hand_gt = attended_hand_gt.reshape(batch_size*input_n)
|
||||
attended_hand_baseline = attended_hand_baseline.reshape(batch_size*input_n)
|
||||
prediction = prediction.reshape(-1, 2)
|
||||
loss = criterion(prediction, attended_hand_gt)
|
||||
|
||||
if is_train == 1:
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# calculate prediction accuracy
|
||||
_, y_prd = torch.max(prediction.data, 1)
|
||||
acc = torch.sum(y_prd == attended_hand_gt)/(batch_size*input_n)
|
||||
prediction_acc_average += acc.cpu().data.numpy() * batch_size*input_n
|
||||
|
||||
acc = torch.sum(attended_hand_gt == attended_hand_baseline)/(batch_size*input_n)
|
||||
baseline_acc_average += acc.cpu().data.numpy() * batch_size*input_n
|
||||
|
||||
result = {}
|
||||
result["baseline_acc_average"] = baseline_acc_average / n
|
||||
result["prediction_acc_average"] = prediction_acc_average / n
|
||||
|
||||
if opt.is_eval and opt.save_predictions:
|
||||
return result, predictions
|
||||
else:
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
option = options().parse()
|
||||
if option.is_eval == False:
|
||||
main(option)
|
||||
else:
|
||||
eval(option)
|
Loading…
Add table
Add a link
Reference in a new issue