first commit

2025-04-30 14:15:00 +02:00 · 2025-04-30 14:15:00 +02:00 · 8f6b6a34e7
commit 8f6b6a34e7
parent 99ce0acafb
73 changed files with 11656 additions and 0 deletions
--- a/attended_hand_recognition_hot3d.py
+++ b/attended_hand_recognition_hot3d.py
@ -0,0 +1,368 @@
+from utils import hot3d_aria_dataset, seed_torch
+from model import attended_hand_recognition
+from utils.opt import options
+from utils import log
+from torch.utils.data import DataLoader
+import torch
+import numpy as np
+import time
+import datetime
+import torch.optim as optim
+import os
+os.nice(5)
+import math
+
+
+def main(opt):
+    # set the random seed to ensure reproducibility
+    seed_torch.seed_torch(seed=0)
+    torch.set_num_threads(1)
+
+    data_dir = opt.data_dir
+    seq_len = opt.seq_len
+    opt.joint_number = opt.body_joint_number + opt.hand_joint_number*2    
+    learning_rate = opt.learning_rate
+    print('>>> create model')
+    net = attended_hand_recognition.attended_hand_recognition(opt=opt).to(opt.cuda_idx)
+    optimizer = optim.AdamW(filter(lambda x: x.requires_grad, net.parameters()), lr=learning_rate, weight_decay=opt.weight_decay)
+    print(">>> total params: {:.2f}M".format(sum(p.numel() for p in net.parameters()) / 1000000.0))
+    print('>>> loading datasets')
+    
+    actions = opt.actions
+    test_user_id = opt.test_user_id
+    if actions == 'all':            
+        if test_user_id == 1:
+            train_actions = 'all'
+            test_actions = 'all'
+            train_subjects = ['P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+            test_subjects = ['P0001', 'P0002', 'P0003']
+            opt.ckpt = opt.ckpt + '/user1/'
+        if test_user_id == 2:    
+            train_actions = 'all'
+            test_actions = 'all'        
+            train_subjects = ['P0001', 'P0002', 'P0003', 'P0012', 'P0014', 'P0015']
+            test_subjects = ['P0009', 'P0010', 'P0011']
+            opt.ckpt = opt.ckpt + '/user2/'
+        if test_user_id == 3:
+            train_actions = 'all'
+            test_actions = 'all'        
+            train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011']
+            test_subjects = ['P0012', 'P0014', 'P0015']
+            opt.ckpt = opt.ckpt + '/user3/'
+    elif actions == 'room':
+        train_actions = ['kitchen', 'office']
+        test_actions = ['room']        
+        train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        opt.ckpt = opt.ckpt + '/scene1/'
+    elif actions == 'kitchen':
+        train_actions = ['room', 'office']
+        test_actions = ['kitchen']        
+        train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        opt.ckpt = opt.ckpt + '/scene2/'
+    elif actions == 'office':
+        train_actions = ['room', 'kitchen']
+        test_actions = ['office']        
+        train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        opt.ckpt = opt.ckpt + '/scene3/'
+    else:
+        raise( ValueError, "Unrecognised actions: %d" % actions)
+        
+    if not os.path.isdir(opt.ckpt):
+        os.makedirs(opt.ckpt)
+        
+    train_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, train_subjects, seq_len, train_actions, opt.object_num, opt.sample_rate)
+    train_data_size = train_dataset.dataset.shape
+    print("Training data size: {}".format(train_data_size))
+    train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=0, pin_memory=True)
+    valid_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, test_subjects, seq_len, test_actions, opt.object_num, opt.sample_rate)
+    valid_data_size = valid_dataset.dataset.shape
+    print("Validation data size: {}".format(valid_data_size))                
+    valid_loader = DataLoader(valid_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True)
+    
+    # training
+    local_time = time.asctime(time.localtime(time.time()))
+    print('\nTraining starts at ' + local_time)
+    start_time = datetime.datetime.now()
+    start_epoch = 1
+
+    acc_best = 0
+    best_epoch = 0
+    exp_lr = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=opt.gamma, last_epoch=-1)
+    for epo in range(start_epoch, opt.epoch + 1):
+        is_best = False            
+        learning_rate = exp_lr.optimizer.param_groups[0]["lr"]
+            
+        train_start_time = datetime.datetime.now()
+        result_train = run_model(net, optimizer, is_train=1, data_loader=train_loader, opt=opt)
+        train_end_time = datetime.datetime.now()
+        train_time = (train_end_time - train_start_time).seconds*1000
+        train_batch_num = math.ceil(train_data_size[0]/opt.batch_size)
+        train_time_per_batch = math.ceil(train_time/train_batch_num)
+        #print('\nTraining time per batch: {} ms'.format(train_time_per_batch))
+        
+        exp_lr.step()
+        rng_state = torch.get_rng_state()
+        if epo % opt.validation_epoch == 0:
+            if actions == 'all':
+                print("\ntest user id: {}\n".format(test_user_id))
+            elif actions == 'room':
+                print("\ntest scene/action: room\n")
+            elif actions == 'kitchen':
+                print("\ntest scene/action: kitchen\n")
+            elif actions == 'office':
+                print("\ntest scene/action: office\n")               
+            print('>>> training epoch: {:d}, lr: {:.12f}'.format(epo, learning_rate))
+            print('Training data size: {}'.format(train_data_size))          
+            print('Average baseline acc: {:.2f}%'.format(result_train['baseline_acc_average']*100))
+            print('Average training acc: {:.2f}%'.format(result_train['prediction_acc_average']*100))
+            
+            test_start_time = datetime.datetime.now()
+            result_valid = run_model(net, is_train=0, data_loader=valid_loader, opt=opt)                        
+            test_end_time = datetime.datetime.now()
+            test_time = (test_end_time - test_start_time).seconds*1000
+            test_batch_num = math.ceil(valid_data_size[0]/opt.test_batch_size)
+            test_time_per_batch = math.ceil(test_time/test_batch_num)
+            #print('\nTest time per batch: {} ms'.format(test_time_per_batch))
+            print('Validation data size: {}'.format(valid_data_size))
+            
+            print('Average baseline acc: {:.2f}%'.format(result_valid['baseline_acc_average']*100))
+            print('Average validation acc: {:.2f}%'.format(result_valid['prediction_acc_average']*100))
+            
+            if result_valid['prediction_acc_average'] > acc_best:
+                acc_best = result_valid['prediction_acc_average']
+                is_best = True
+                best_epoch = epo
+                
+            print('Best validation error: {:.2f}%, best epoch: {}'.format(acc_best*100, best_epoch))                                                
+            end_time = datetime.datetime.now()
+            total_training_time = (end_time - start_time).seconds/60
+            print('\nTotal training time: {:.1f} min'.format(total_training_time))
+            local_time = time.asctime(time.localtime(time.time()))
+            print('\nTraining ends at ' + local_time)
+            
+            result_log = np.array([epo, learning_rate])
+            head = np.array(['epoch', 'lr'])
+            for k in result_train.keys():
+                result_log = np.append(result_log, [result_train[k]])
+                head = np.append(head, [k])
+            for k in result_valid.keys():
+                result_log = np.append(result_log, [result_valid[k]])
+                head = np.append(head, ['valid_' + k])
+            
+            csv_name = 'attended_hand_recognition_results'
+            model_name = 'attended_hand_recognition_model.pt'
+            log.save_csv_log(opt, head, result_log, is_create=(epo == 1), file_name=csv_name)
+            log.save_ckpt({'epoch': epo,
+                           'lr': learning_rate,
+                           'acc': result_valid['prediction_acc_average'],
+                           'state_dict': net.state_dict(),
+                           'optimizer': optimizer.state_dict()},
+                            opt=opt,
+                            file_name = model_name)
+                            
+        torch.set_rng_state(rng_state)
+
+        
+def eval(opt):
+    data_dir = opt.data_dir
+    seq_len = opt.seq_len
+    opt.joint_number = opt.body_joint_number + opt.hand_joint_number*2
+    
+    print('>>> create model')
+    net = attended_hand_recognition.attended_hand_recognition(opt=opt).to(opt.cuda_idx)    
+    print(">>> total params: {:.2f}M".format(sum(p.numel() for p in net.parameters()) / 1000000.0))    
+    #load model    
+    actions = opt.actions
+    test_user_id = opt.test_user_id
+    if actions == 'all':            
+        if test_user_id == 1:
+            train_actions = 'all'
+            test_actions = 'all'
+            train_subjects = ['P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+            test_subjects = ['P0001', 'P0002', 'P0003']
+            opt.ckpt = opt.ckpt + '/user1/'
+        if test_user_id == 2:    
+            train_actions = 'all'
+            test_actions = 'all'        
+            train_subjects = ['P0001', 'P0002', 'P0003', 'P0012', 'P0014', 'P0015']
+            test_subjects = ['P0009', 'P0010', 'P0011']
+            opt.ckpt = opt.ckpt + '/user2/'
+        if test_user_id == 3:
+            train_actions = 'all'
+            test_actions = 'all'        
+            train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011']
+            test_subjects = ['P0012', 'P0014', 'P0015']
+            opt.ckpt = opt.ckpt + '/user3/'
+    elif actions == 'room':
+        train_actions = ['kitchen', 'office']
+        test_actions = ['room']        
+        train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        opt.ckpt = opt.ckpt + '/scene1/'
+    elif actions == 'kitchen':
+        train_actions = ['room', 'office']
+        test_actions = ['kitchen']        
+        train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        opt.ckpt = opt.ckpt + '/scene2/'
+    elif actions == 'office':
+        train_actions = ['room', 'kitchen']
+        test_actions = ['office']        
+        train_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        test_subjects = ['P0001', 'P0002', 'P0003', 'P0009', 'P0010', 'P0011', 'P0012', 'P0014', 'P0015']
+        opt.ckpt = opt.ckpt + '/scene3/'
+    else:
+        raise( ValueError, "Unrecognised actions: %d" % actions)
+            
+    model_name = 'attended_hand_recognition_model.pt'
+    model_path = os.path.join(opt.ckpt, model_name)    
+    print(">>> loading ckpt from '{}'".format(model_path))
+    ckpt = torch.load(model_path)
+    net.load_state_dict(ckpt['state_dict'])
+    print(">>> ckpt loaded (epoch: {} | acc: {})".format(ckpt['epoch'], ckpt['acc']))
+    
+    print('>>> loading datasets')                  
+    train_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, train_subjects, seq_len, train_actions, opt.object_num, opt.sample_rate)
+    train_data_size = train_dataset.dataset.shape
+    print("Train data size: {}".format(train_data_size))                
+    train_loader = DataLoader(train_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True)            
+    test_dataset = hot3d_aria_dataset.hot3d_aria_dataset(data_dir, test_subjects, seq_len, test_actions, opt.object_num, opt.sample_rate)
+    test_data_size = test_dataset.dataset.shape
+    print("Test data size: {}".format(test_data_size))                
+    test_loader = DataLoader(test_dataset, batch_size=opt.test_batch_size, shuffle=False, num_workers=0, pin_memory=True)
+
+    # test
+    local_time = time.asctime(time.localtime(time.time()))
+    print('\nTest starts at ' + local_time)
+    start_time = datetime.datetime.now()
+    if actions == 'all':
+        print("\ntest user id: {}\n".format(test_user_id))
+    elif actions == 'room':
+        print("\ntest scene/action: room\n")
+    elif actions == 'kitchen':
+        print("\ntest scene/action: kitchen\n")
+    elif actions == 'office':
+        print("\ntest scene/action: office\n")    
+    if opt.save_predictions:
+        result_train, predictions_train = run_model(net, is_train=0, data_loader=train_loader, opt=opt)
+        result_test, predictions_test = run_model(net, is_train=0, data_loader=test_loader, opt=opt)
+    else:
+        result_train = run_model(net, is_train=0, data_loader=train_loader, opt=opt)
+        result_test = run_model(net, is_train=0, data_loader=test_loader, opt=opt)
+
+    print('Average train baseline acc: {:.2f}%'.format(result_train['baseline_acc_average']*100))
+    print('Average train method acc: {:.2f}%'.format(result_train['prediction_acc_average']*100))    
+    print('Average test baseline acc: {:.2f}%'.format(result_test['baseline_acc_average']*100))
+    print('Average test method acc: {:.2f}%'.format(result_test['prediction_acc_average']*100))
+    
+    end_time = datetime.datetime.now()
+    total_test_time = (end_time - start_time).seconds/60
+    print('\nTotal test time: {:.1f} min'.format(total_test_time))
+    local_time = time.asctime(time.localtime(time.time()))
+    print('\nTest ends at ' + local_time)
+    
+    if opt.save_predictions:
+        prediction = predictions_train[:, :, -3:-1].reshape(-1, 2)
+        attended_hand_gt = predictions_train[:, :, -1:].reshape(-1)
+        y_prd = np.argmax(prediction, axis=1)
+        acc = np.sum(y_prd == attended_hand_gt)/prediction.shape[0]
+        print('Average train acc: {:.2f}%'.format(acc*100))        
+        predictions_train_path = os.path.join(opt.ckpt, "attended_hand_recognition_train.npy")
+        np.save(predictions_train_path, predictions_train)        
+        
+        prediction = predictions_test[:, :, -3:-1].reshape(-1, 2)
+        attended_hand_gt = predictions_test[:, :, -1:].reshape(-1)
+        y_prd = np.argmax(prediction, axis=1)
+        acc = np.sum(y_prd == attended_hand_gt)/prediction.shape[0]
+        print('Average test acc: {:.2f}%'.format(acc*100))        
+        predictions_test_path = os.path.join(opt.ckpt, "attended_hand_recognition_test.npy")
+        np.save(predictions_test_path, predictions_test)        
+
+        
+def run_model(net, optimizer=None, is_train=1, data_loader=None, opt=None):
+    if is_train == 1:
+        net.train()
+    else:
+        net.eval()
+            
+    if opt.is_eval and opt.save_predictions:
+        predictions = []
+    
+    prediction_acc_average = 0
+    baseline_acc_average = 0
+    criterion = torch.nn.CrossEntropyLoss()
+    
+    n = 0    
+    input_n = opt.seq_len
+    
+    for i, (data) in enumerate(data_loader):
+        batch_size, seq_n, dim = data.shape
+        joint_number = opt.joint_number
+        object_num = opt.object_num
+        # when only one sample in this batch
+        if batch_size == 1 and is_train == 1:
+            continue        
+        n += batch_size*seq_n
+        data = data.float().to(opt.cuda_idx)
+                
+        eye_gaze = data.clone()[:, :, :3]
+        joints = data.clone()[:, :, 3:(joint_number+1)*3]
+        head_directions = data.clone()[:, :, (joint_number+1)*3:(joint_number+2)*3]
+        attended_hand_gt = data.clone()[:, :, (joint_number+2+8*object_num*2)*3:(joint_number+2+8*object_num*2)*3+1].type(torch.LongTensor).to(opt.cuda_idx)
+        attended_hand_baseline = data.clone()[:, :, (joint_number+2+8*object_num*2)*3+1:(joint_number+2+8*object_num*2)*3+2].type(torch.LongTensor).to(opt.cuda_idx)
+                        
+        input = torch.cat((joints, head_directions), dim=2)
+        if object_num > 0:
+            object_positions = data.clone()[:, :, (joint_number+2)*3:(joint_number+2+8*object_num*2)*3]
+            input = torch.cat((input, object_positions), dim=2)                        
+        prediction = net(input, input_n=input_n)            
+        
+        if opt.is_eval and opt.save_predictions:
+            # eye_gaze + joints + head_directions + object_positions + predictions + attended_hand_gt
+            prediction = torch.nn.functional.softmax(prediction, dim=2)
+            prediction_cpu = torch.cat((eye_gaze, input), dim=2)            
+            prediction_cpu = torch.cat((prediction_cpu, prediction), dim=2)
+            prediction_cpu = torch.cat((prediction_cpu, attended_hand_gt), dim=2)
+            prediction_cpu = prediction_cpu.cpu().data.numpy()
+            if len(predictions) == 0:
+                predictions = prediction_cpu                
+            else:
+                predictions = np.concatenate((predictions, prediction_cpu), axis=0)
+                
+        attended_hand_gt = attended_hand_gt.reshape(batch_size*input_n)
+        attended_hand_baseline = attended_hand_baseline.reshape(batch_size*input_n)
+        prediction = prediction.reshape(-1, 2)        
+        loss = criterion(prediction, attended_hand_gt)
+                
+        if is_train == 1:            
+            optimizer.zero_grad()
+            loss.backward()                        
+            optimizer.step()
+
+        # calculate prediction accuracy
+        _, y_prd = torch.max(prediction.data, 1)
+        acc = torch.sum(y_prd == attended_hand_gt)/(batch_size*input_n)
+        prediction_acc_average += acc.cpu().data.numpy() * batch_size*input_n
+        
+        acc = torch.sum(attended_hand_gt == attended_hand_baseline)/(batch_size*input_n)
+        baseline_acc_average += acc.cpu().data.numpy() * batch_size*input_n
+                    
+    result = {}
+    result["baseline_acc_average"] = baseline_acc_average / n
+    result["prediction_acc_average"] = prediction_acc_average / n
+        
+    if opt.is_eval and opt.save_predictions:        
+        return result, predictions
+    else:
+        return result
+
+        
+if __name__ == '__main__':    
+    option = options().parse()
+    if option.is_eval == False:
+        main(option)
+    else:
+        eval(option)