HOIGaze/hot3d_processing/hot3d_aria_preprocessing.ipynb
2025-04-30 14:15:00 +02:00

40 KiB

In [ ]:
import os
os.nice(5)
import rerun as rr
import numpy as np
from math import tan
import time
from utils import remake_dir
import pandas as pd
from dataset_api import Hot3dDataProvider
from data_loaders.loader_object_library import load_object_library
from data_loaders.mano_layer import MANOHandModel
from data_loaders.loader_masks import combine_mask_data, load_mask_data, MaskData
from data_loaders.loader_hand_poses import Handedness, HandPose
from data_loaders.loader_object_library import ObjectLibrary
from data_loaders.headsets import Headset
from projectaria_tools.core.stream_id import StreamId
from projectaria_tools.core.sensor_data import TimeDomain, TimeQueryOptions
from projectaria_tools.core.sophus import SE3


dataset_path = '/datasets/public/zhiming_datasets/hot3d/aria/'
dataset_processed_path = '/scratch/hu/pose_forecast/hot3d_hoigaze/'
object_library_path = '/datasets/public/zhiming_datasets/hot3d/assets/'
mano_hand_model_path = '/datasets/public/zhiming_datasets/hot3d/mano_v1_2/models/'
remake_dir(dataset_processed_path)
dataset_info = pd.read_csv('hot3d_aria_scene.csv')
valid_frame_length = 60 # 60 frames -> 2 seconds, dropout the recordings that are too short

# init the object library
if not os.path.exists(object_library_path):
    print("invalid library path.")
    print("please do update the path to VALID values for your system.")
    raise
object_library = load_object_library(object_library_folderpath=object_library_path)

# load the bounding box information of the objects
object_info = pd.read_csv('hot3d_objects.csv')
object_bbx = {}
for i, uid in enumerate(object_info['object_uid']):    
    bbx_x_min = object_info['bbx_x_min'][i]
    bbx_x_max = object_info['bbx_x_max'][i]
    bbx_y_min = object_info['bbx_y_min'][i]
    bbx_y_max = object_info['bbx_y_max'][i]
    bbx_z_min = object_info['bbx_z_min'][i]
    bbx_z_max = object_info['bbx_z_max'][i]
    bbx = [bbx_x_min, bbx_x_max, bbx_y_min, bbx_y_max, bbx_z_min, bbx_z_max]
    object_bbx[str(uid)] = bbx
    
# init the HANDs model. If None, the UmeTrack HANDs model will be used
mano_hand_model = None
if mano_hand_model_path is not None:
    mano_hand_model = MANOHandModel(mano_hand_model_path)
    
for i, seq in enumerate(dataset_info['sequence_name']):
    scene = dataset_info['scene'][i]
    print("\nprocessing {}th seq: {}, scene: {}...".format(i+1, seq, scene))
    seq_path = dataset_path + seq + '/'
    if not os.path.exists(seq_path):
        print("invalid input sequence path.")
        print("please do update the path to VALID values for your system.")
        raise
    save_path = dataset_processed_path + seq + '_' + scene + '_'

    # segment the sequence into valid and invalid parts using the masks    
    mask_list = [
        "masks/mask_object_pose_available.csv",
        "masks/mask_hand_pose_available.csv",    
        "masks/mask_headset_pose_available.csv",
        #"masks/mask_object_visibility.csv",    
        #"masks/mask_hand_visible.csv",
        #"masks/mask_good_exposure.csv",
        "masks/mask_qa_pass.csv"]
    
    # load the referred masks
    mask_data_list = []
    for it in mask_list:
        if os.path.exists(os.path.join(seq_path, it)):
            ret = load_mask_data(os.path.join(seq_path, it))
            mask_data_list.append(ret)
    # combine the masks (you can choose logical "and"/"or")
    combined_masks = combine_mask_data(mask_data_list, "and")
    masks = []
    for value in combined_masks.data['214-1'].values():
        masks.append(value)
    print("valid frames: {}/{}".format(sum(masks), len(masks)))
    
    # initialize hot3d data provider
    hot3d_data_provider = Hot3dDataProvider(
    sequence_folder=seq_path,
    object_library=object_library,
    mano_hand_model=mano_hand_model)
    #print(f"data_provider statistics: {hot3d_data_provider.get_data_statistics()}")        
    # alias over the HAND pose data provider
    hand_data_provider = hot3d_data_provider.mano_hand_data_provider if hot3d_data_provider.mano_hand_data_provider is not None else hot3d_data_provider.umetrack_hand_data_provider
    # alias over the Object pose data provider
    object_pose_data_provider = hot3d_data_provider.object_pose_data_provider
    # alias over the HEADSET/Device pose data provider
    device_pose_provider = hot3d_data_provider.device_pose_data_provider
    # alias over the Device data provider
    device_data_provider = hot3d_data_provider.device_data_provider
    device_calibration = device_data_provider.get_device_calibration()
    transform_device_cpf = device_calibration.get_transform_device_cpf()
    # retrieve a list of timestamps for the sequence (in nanoseconds)
    timestamps = device_data_provider.get_sequence_timestamps()
    
    # segment valid data
    index = 0
    valid_frames = 0
    start_frames = []
    end_frames = []
    while(index<len(masks)):
        value = masks[index]    
        if value == True:
            start = index
            start_frames.append(start)
            #print("start at: {}".format(start))
            while(value == True):
                index += 1
                if index<len(masks):
                    value = masks[index]
                else:
                    break                
            end = index-1
            end_frames.append(end)
            #print("end at: {}".format(end))
            valid_frames += end - start + 1            
        else:
            index += 1
            
    segment_num = len(start_frames)
    local_time = time.asctime(time.localtime(time.time()))
    print('\nprocessing starts at ' + local_time)    
    for i in range(segment_num):
        start_frame = start_frames[i]
        end_frame = end_frames[i]
        frame_length = end_frame - start_frame + 1
        if frame_length < valid_frame_length:
            continue
        print("start frame: {}, end frame: {}, length: {}".format(start_frame, end_frame, frame_length))
        
        timestamps_data = np.zeros((frame_length, 1))
        head_data = np.zeros((frame_length, 10)) # head_direction (3) + head_translation (3) + head_rotation (4, quat_xyzw)
        gaze_data = np.zeros((frame_length, 6)) # gaze_direction (3) + gaze_center_in_world (3)        
        hand_data = np.zeros((frame_length, 44)) # left_hand (22) + right_hand (22), hand = wrist_pose (7, translation (3) + rotation (4)) + joint_angles (15)            
        hand_joint_data = np.zeros((frame_length, 122)) # left_hand (20*3) + right_hand (20*3) + attended_hand_gt + attended_hand_baseline (closest_hand) 
        hand_joint_initial_data = np.zeros((frame_length, 122)) # left_hand (20*3) + right_hand (20*3) + attended_hand_gt + closest_hand
        object_data = np.zeros((frame_length, 48)) # object_data (8) * 6 objects (at most 6 objects), object_data = object_uid (1) + object_pose (7, translation (3) + rotation (4))         
        object_bbx_data = np.zeros((frame_length, 144)) # bounding box information: 6 objects (at most 6 objects) * 8 vertexes * 3
        object_bbx_left_hand_data = np.zeros((frame_length, 144)) # bounding box information of the objects ranked using distances to the left hand
        object_bbx_right_hand_data = np.zeros((frame_length, 144)) # bounding box information of the objects ranked using distances to the right hand
        object_bbx_left_hand_initial_data = np.zeros((frame_length, 144)) # bounding box information of the objects ranked using distances to the left hand
        object_bbx_right_hand_initial_data = np.zeros((frame_length, 144)) # bounding box information of the objects ranked using distances to the right hand
        
        # extract the valid frames
        for frame in range(start_frame, end_frame+1):
            timestamp_ns = timestamps[frame]
            timestamps_data[frame-start_frame] = timestamp_ns
            
            # extract head data
            headset_pose3d_with_dt = device_pose_provider.get_pose_at_timestamp(
                                    timestamp_ns=timestamp_ns,
                                    time_query_options=TimeQueryOptions.CLOSEST,
                                    time_domain=TimeDomain.TIME_CODE)                    
            headset_pose3d = headset_pose3d_with_dt.pose3d
            T_world_device = headset_pose3d.T_world_device
            # use cpf pose as head pose, see https://facebookresearch.github.io/projectaria_tools/docs/data_formats/coordinate_convention/3d_coordinate_frame_convention
            T_world_cpf = T_world_device @ transform_device_cpf            
            head_translation = T_world_cpf.translation()[0]
            head_center_in_cpf = np.array([0, 0, 1.0], dtype = np.float64)
            head_center_in_world = T_world_cpf @ head_center_in_cpf
            head_center_in_world = head_center_in_world.reshape(3, )
            head_direction = head_center_in_world - head_translation
            head_direction = np.array([x / np.linalg.norm(head_direction) for x in head_direction])            
            head_rotation = np.roll(T_world_cpf.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w
            head_data[frame-start_frame, 0:3] = head_direction
            head_data[frame-start_frame, 3:6] = head_translation
            head_data[frame-start_frame, 6:10] = head_rotation
            
            # extract eye gaze data
            aria_eye_gaze_data = device_data_provider.get_eye_gaze(timestamp_ns)                
            yaw = aria_eye_gaze_data.yaw
            pitch = aria_eye_gaze_data.pitch
            depth = aria_eye_gaze_data.depth
            if depth == 0:
                depth = 1
            gaze_center_in_cpf = np.array([tan(yaw), tan(pitch), 1.0], dtype = np.float64)*depth
            gaze_center_in_world = T_world_cpf @ gaze_center_in_cpf
            gaze_center_in_world = gaze_center_in_world.reshape(3, )
            gaze_direction = gaze_center_in_world - head_translation
            gaze_direction = np.array([x / np.linalg.norm(gaze_direction) for x in gaze_direction])
            # in rare cases, yaw, pitch is nan
            if np.isnan(np.sum(gaze_direction)):
                # use previous frame as an alternative
                gaze_direction = gaze_data[frame-start_frame-1, 0:3]
                gaze_center_in_world = gaze_data[frame-start_frame-1, 3:6]
            gaze_data[frame-start_frame, 0:3] = gaze_direction
            gaze_data[frame-start_frame, 3:6] = gaze_center_in_world
            
            # extract hand data
            hand_poses_with_dt = hand_data_provider.get_pose_at_timestamp(
                                timestamp_ns=timestamp_ns,
                                time_query_options=TimeQueryOptions.CLOSEST,
                                time_domain=TimeDomain.TIME_CODE)            
            hand_pose_collection = hand_poses_with_dt.pose3d_collection
            left_hand = hand_pose_collection.poses[Handedness.Left]
            left_hand_translation = left_hand.wrist_pose.translation()[0]
            left_hand_rotation = np.roll(left_hand.wrist_pose.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w
            left_hand_joint_angles = left_hand.joint_angles
            left_hand_joints = hand_data_provider.get_hand_landmarks(left_hand).numpy().reshape(-1)
            left_hand_initial = HandPose(Handedness.Left, left_hand.wrist_pose, np.zeros(15))
            left_hand_initial_joints = hand_data_provider.get_hand_landmarks(left_hand_initial).numpy().reshape(-1)            
            right_hand = hand_pose_collection.poses[Handedness.Right]
            right_hand_translation = right_hand.wrist_pose.translation()[0]
            right_hand_rotation = np.roll(right_hand.wrist_pose.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w
            right_hand_joint_angles = right_hand.joint_angles
            right_hand_joints = hand_data_provider.get_hand_landmarks(right_hand).numpy().reshape(-1)
            right_hand_initial = HandPose(Handedness.Right, right_hand.wrist_pose, np.zeros(15))
            right_hand_initial_joints = hand_data_provider.get_hand_landmarks(right_hand_initial).numpy().reshape(-1)
            
            left_hand_direction = np.mean(left_hand_joints.reshape((20, 3)), axis=0) - head_translation
            left_hand_direction = np.array([x / np.linalg.norm(left_hand_direction) for x in left_hand_direction]) 
            left_hand_distance_to_gaze = np.arccos(np.sum(gaze_direction*left_hand_direction))
            right_hand_direction = np.mean(right_hand_joints.reshape((20, 3)), axis=0) - head_translation
            right_hand_direction = np.array([x / np.linalg.norm(right_hand_direction) for x in right_hand_direction]) 
            right_hand_distance_to_gaze = np.arccos(np.sum(gaze_direction*right_hand_direction))
            if left_hand_distance_to_gaze < right_hand_distance_to_gaze:
                hand_joint_data[frame-start_frame, 120:121] = 0
            else:
                hand_joint_data[frame-start_frame, 120:121] = 1

            left_hand_initial_direction = np.mean(left_hand_initial_joints.reshape((20, 3)), axis=0) - head_translation
            left_hand_initial_direction = np.array([x / np.linalg.norm(left_hand_initial_direction) for x in left_hand_initial_direction]) 
            left_hand_initial_distance_to_gaze = np.arccos(np.sum(gaze_direction*left_hand_initial_direction))
            right_hand_initial_direction = np.mean(right_hand_initial_joints.reshape((20, 3)), axis=0) - head_translation
            right_hand_initial_direction = np.array([x / np.linalg.norm(right_hand_initial_direction) for x in right_hand_initial_direction]) 
            right_hand_initial_distance_to_gaze = np.arccos(np.sum(gaze_direction*right_hand_initial_direction))
            if left_hand_initial_distance_to_gaze < right_hand_initial_distance_to_gaze:
                hand_joint_initial_data[frame-start_frame, 120:121] = 0
            else:
                hand_joint_initial_data[frame-start_frame, 120:121] = 1
            
            hand_data[frame-start_frame, 0:3] = left_hand_translation
            hand_data[frame-start_frame, 3:7] = left_hand_rotation
            hand_data[frame-start_frame, 7:22] = left_hand_joint_angles
            hand_data[frame-start_frame, 22:25] = right_hand_translation
            hand_data[frame-start_frame, 25:29] = right_hand_rotation
            hand_data[frame-start_frame, 29:44] = right_hand_joint_angles
            hand_joint_data[frame-start_frame, 0:60] = left_hand_joints
            hand_joint_data[frame-start_frame, 60:120] = right_hand_joints
            hand_joint_initial_data[frame-start_frame, 0:60] = left_hand_initial_joints
            hand_joint_initial_data[frame-start_frame, 60:120] = right_hand_initial_joints
                        
            # extract object data
            object_poses_with_dt = object_pose_data_provider.get_pose_at_timestamp(
                                timestamp_ns=timestamp_ns,
                                time_query_options=TimeQueryOptions.CLOSEST,
                                time_domain=TimeDomain.TIME_CODE)
            objects_pose3d = object_poses_with_dt.pose3d_collection.poses
            object_num = len(objects_pose3d)
            objects_distance_to_left_hand = {}
            objects_distance_to_right_hand = {}            
            objects_distance_to_left_hand_initial = {}
            objects_distance_to_right_hand_initial = {}
            objects_pose3d_dict = {}
            item = 0
            for (object_uid, object_pose3d) in objects_pose3d.items():
                object_translation = object_pose3d.T_world_object.translation()[0]                
                object_distance_to_left_hand = np.mean(np.linalg.norm(left_hand_joints.reshape((20, 3))-object_translation, axis=1))
                object_distance_to_right_hand = np.mean(np.linalg.norm(right_hand_joints.reshape((20, 3))-object_translation, axis=1))
                object_distance_to_left_hand_initial = np.mean(np.linalg.norm(left_hand_initial_joints.reshape((20, 3))-object_translation, axis=1))
                object_distance_to_right_hand_initial = np.mean(np.linalg.norm(right_hand_initial_joints.reshape((20, 3))-object_translation, axis=1))                
                objects_distance_to_left_hand[object_uid] = object_distance_to_left_hand              
                objects_distance_to_right_hand[object_uid] = object_distance_to_right_hand
                objects_distance_to_left_hand_initial[object_uid] = object_distance_to_left_hand_initial
                objects_distance_to_right_hand_initial[object_uid] = object_distance_to_right_hand_initial
                objects_pose3d_dict[object_uid] = object_pose3d.T_world_object                                
                item += 1
                
            objects_distance_to_left_hand_sorted = sorted(objects_distance_to_left_hand.items(), key = lambda kv:(kv[1], kv[0]))
            objects_distance_to_right_hand_sorted = sorted(objects_distance_to_right_hand.items(), key = lambda kv:(kv[1], kv[0]))            
            left_object_closest_uid = objects_distance_to_left_hand_sorted[0][0]
            left_object_closest_distance = objects_distance_to_left_hand_sorted[0][1]
            right_object_closest_uid = objects_distance_to_right_hand_sorted[0][0]
            right_object_closest_distance = objects_distance_to_right_hand_sorted[0][1]
            if left_object_closest_distance < right_object_closest_distance:
                hand_joint_data[frame-start_frame, -1] = 0
            else:
                hand_joint_data[frame-start_frame, -1] = 1

            objects_distance_to_left_hand_initial_sorted = sorted(objects_distance_to_left_hand_initial.items(), key = lambda kv:(kv[1], kv[0]))
            objects_distance_to_right_hand_initial_sorted = sorted(objects_distance_to_right_hand_initial.items(), key = lambda kv:(kv[1], kv[0]))
            left_initial_object_closest_uid = objects_distance_to_left_hand_initial_sorted[0][0]
            left_initial_object_closest_distance = objects_distance_to_left_hand_initial_sorted[0][1]
            right_initial_object_closest_uid = objects_distance_to_right_hand_initial_sorted[0][0]
            right_initial_object_closest_distance = objects_distance_to_right_hand_initial_sorted[0][1]
            if left_initial_object_closest_distance < right_initial_object_closest_distance:
                hand_joint_initial_data[frame-start_frame, -1] = 0
            else:
                hand_joint_initial_data[frame-start_frame, -1] = 1
            
            item = 0
            for object_uid in objects_pose3d_dict:
                object_pose3d = objects_pose3d_dict[object_uid]
                object_translation = object_pose3d.translation()[0]
                object_rotation = np.roll(object_pose3d.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w                
                object_data[frame-start_frame, item*8:item*8+1] = object_uid
                object_data[frame-start_frame, item*8+1:item*8+4] = object_translation
                object_data[frame-start_frame, item*8+4:item*8+8] = object_rotation
                bbx = object_bbx[object_uid]
                #print("uid: {}, bbx: {}".format(object_uid, bbx))
                x_min = bbx[0]
                x_max = bbx[1]
                y_min = bbx[2]
                y_max = bbx[3]
                z_min = bbx[4]
                z_max = bbx[5]
                bbx_vertex = np.array([x_min, y_min, z_min], dtype = np.float64)                                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )                
                object_bbx_data[frame-start_frame, item*24:item*24+3] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_data[frame-start_frame, item*24+3:item*24+6] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_data[frame-start_frame, item*24+6:item*24+9] = bbx_vertex
                bbx_vertex = np.array([x_min, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_data[frame-start_frame, item*24+9:item*24+12] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_data[frame-start_frame, item*24+12:item*24+15] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_data[frame-start_frame, item*24+15:item*24+18] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_data[frame-start_frame, item*24+18:item*24+21] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_data[frame-start_frame, item*24+21:item*24+24] = bbx_vertex                
                item += 1
                
            for item in range(len(objects_distance_to_left_hand_sorted)):
                object_uid = objects_distance_to_left_hand_sorted[item][0]
                object_pose3d = objects_pose3d_dict[object_uid]            
                object_translation = object_pose3d.translation()[0]
                object_rotation = np.roll(object_pose3d.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w                
                bbx = object_bbx[object_uid]
                #print("uid: {}, bbx: {}".format(object_uid, bbx))
                x_min = bbx[0]
                x_max = bbx[1]
                y_min = bbx[2]
                y_max = bbx[3]
                z_min = bbx[4]
                z_max = bbx[5]
                bbx_vertex = np.array([x_min, y_min, z_min], dtype = np.float64)                                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )                
                object_bbx_left_hand_data[frame-start_frame, item*24:item*24+3] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_data[frame-start_frame, item*24+3:item*24+6] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_data[frame-start_frame, item*24+6:item*24+9] = bbx_vertex
                bbx_vertex = np.array([x_min, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_data[frame-start_frame, item*24+9:item*24+12] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_data[frame-start_frame, item*24+12:item*24+15] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_data[frame-start_frame, item*24+15:item*24+18] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_data[frame-start_frame, item*24+18:item*24+21] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_data[frame-start_frame, item*24+21:item*24+24] = bbx_vertex                

            for item in range(len(objects_distance_to_right_hand_sorted)):
                object_uid = objects_distance_to_right_hand_sorted[item][0]
                object_pose3d = objects_pose3d_dict[object_uid]            
                object_translation = object_pose3d.translation()[0]
                object_rotation = np.roll(object_pose3d.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w                
                bbx = object_bbx[object_uid]
                #print("uid: {}, bbx: {}".format(object_uid, bbx))
                x_min = bbx[0]
                x_max = bbx[1]
                y_min = bbx[2]
                y_max = bbx[3]
                z_min = bbx[4]
                z_max = bbx[5]
                bbx_vertex = np.array([x_min, y_min, z_min], dtype = np.float64)                                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )                
                object_bbx_right_hand_data[frame-start_frame, item*24:item*24+3] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_data[frame-start_frame, item*24+3:item*24+6] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_data[frame-start_frame, item*24+6:item*24+9] = bbx_vertex
                bbx_vertex = np.array([x_min, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_data[frame-start_frame, item*24+9:item*24+12] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_data[frame-start_frame, item*24+12:item*24+15] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_data[frame-start_frame, item*24+15:item*24+18] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_data[frame-start_frame, item*24+18:item*24+21] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_data[frame-start_frame, item*24+21:item*24+24] = bbx_vertex                

            for item in range(len(objects_distance_to_left_hand_initial_sorted)):
                object_uid = objects_distance_to_left_hand_initial_sorted[item][0]
                object_pose3d = objects_pose3d_dict[object_uid]            
                object_translation = object_pose3d.translation()[0]
                object_rotation = np.roll(object_pose3d.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w                
                bbx = object_bbx[object_uid]
                #print("uid: {}, bbx: {}".format(object_uid, bbx))
                x_min = bbx[0]
                x_max = bbx[1]
                y_min = bbx[2]
                y_max = bbx[3]
                z_min = bbx[4]
                z_max = bbx[5]
                bbx_vertex = np.array([x_min, y_min, z_min], dtype = np.float64)                                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )                
                object_bbx_left_hand_initial_data[frame-start_frame, item*24:item*24+3] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_initial_data[frame-start_frame, item*24+3:item*24+6] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_initial_data[frame-start_frame, item*24+6:item*24+9] = bbx_vertex
                bbx_vertex = np.array([x_min, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_initial_data[frame-start_frame, item*24+9:item*24+12] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_initial_data[frame-start_frame, item*24+12:item*24+15] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_initial_data[frame-start_frame, item*24+15:item*24+18] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_initial_data[frame-start_frame, item*24+18:item*24+21] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_left_hand_initial_data[frame-start_frame, item*24+21:item*24+24] = bbx_vertex                

            for item in range(len(objects_distance_to_right_hand_initial_sorted)):
                object_uid = objects_distance_to_right_hand_initial_sorted[item][0]
                object_pose3d = objects_pose3d_dict[object_uid]            
                object_translation = object_pose3d.translation()[0]
                object_rotation = np.roll(object_pose3d.rotation().to_quat()[0], -1) # change from w,x,y,z to x,y,z,w                
                bbx = object_bbx[object_uid]
                #print("uid: {}, bbx: {}".format(object_uid, bbx))
                x_min = bbx[0]
                x_max = bbx[1]
                y_min = bbx[2]
                y_max = bbx[3]
                z_min = bbx[4]
                z_max = bbx[5]
                bbx_vertex = np.array([x_min, y_min, z_min], dtype = np.float64)                                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )                
                object_bbx_right_hand_initial_data[frame-start_frame, item*24:item*24+3] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_initial_data[frame-start_frame, item*24+3:item*24+6] = bbx_vertex
                bbx_vertex = np.array([x_max, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_initial_data[frame-start_frame, item*24+6:item*24+9] = bbx_vertex
                bbx_vertex = np.array([x_min, y_min, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_initial_data[frame-start_frame, item*24+9:item*24+12] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_initial_data[frame-start_frame, item*24+12:item*24+15] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_max], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_initial_data[frame-start_frame, item*24+15:item*24+18] = bbx_vertex
                bbx_vertex = np.array([x_max, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_initial_data[frame-start_frame, item*24+18:item*24+21] = bbx_vertex
                bbx_vertex = np.array([x_min, y_max, z_min], dtype = np.float64)                
                bbx_vertex = (object_pose3d @ bbx_vertex).reshape(3, )
                object_bbx_right_hand_initial_data[frame-start_frame, item*24+21:item*24+24] = bbx_vertex                
        
        # save the data
        timestamps_path = save_path + str(start_frame) + "_" + str(end_frame) + '_timestamps.npy'
        head_path = save_path + str(start_frame) + "_" + str(end_frame) + '_head.npy'
        gaze_path = save_path + str(start_frame) + "_" + str(end_frame) + '_gaze.npy'
        hand_path = save_path + str(start_frame) + "_" + str(end_frame) + '_hand.npy'        
        hand_joint_path = save_path + str(start_frame) + "_" + str(end_frame) + '_handjoints.npy'
        hand_joint_initial_path = save_path + str(start_frame) + "_" + str(end_frame) + '_inithandjoints.npy'
        object_path = save_path + str(start_frame) + "_" + str(end_frame) + '_objects.npy'
        object_bbx_path = save_path + str(start_frame) + "_" + str(end_frame) + '_object_bbx.npy'
        object_bbx_left_hand_path = save_path + str(start_frame) + "_" + str(end_frame) + '_object_bbxleft.npy'
        object_bbx_right_hand_path = save_path + str(start_frame) + "_" + str(end_frame) + '_object_bbxright.npy'
        object_bbx_left_hand_initial_path = save_path + str(start_frame) + "_" + str(end_frame) + '_object_initbbxleft.npy'
        object_bbx_right_hand_initial_path = save_path + str(start_frame) + "_" + str(end_frame) + '_object_initbbxright.npy'
        
        np.save(timestamps_path, timestamps_data)
        np.save(head_path, head_data)
        np.save(gaze_path, gaze_data)
        np.save(hand_path, hand_data)
        np.save(hand_joint_path, hand_joint_data)
        np.save(hand_joint_initial_path, hand_joint_initial_data)
        np.save(object_path, object_data)
        np.save(object_bbx_path, object_bbx_data)
        np.save(object_bbx_left_hand_path, object_bbx_left_hand_data)
        np.save(object_bbx_right_hand_path, object_bbx_right_hand_data)
        np.save(object_bbx_left_hand_initial_path, object_bbx_left_hand_initial_data)
        np.save(object_bbx_right_hand_initial_path, object_bbx_right_hand_initial_data)
        
    local_time = time.asctime(time.localtime(time.time()))
    print('\nprocessing ends at ' + local_time)
In [ ]: