first commit

2025-04-30 14:15:00 +02:00 · 2025-04-30 14:15:00 +02:00 · 8f6b6a34e7
commit 8f6b6a34e7
parent 99ce0acafb
73 changed files with 11656 additions and 0 deletions
--- a/adt_processing/README.md
+++ b/adt_processing/README.md
@ -0,0 +1,29 @@
+## Code to process the ADT dataset
+
+Note: processing the ADT dataset is much more complicated than other datasets because it relies on the Project Aria Tools. It would be easier to get started with other datasets first.
+
+
+## Usage:
+Step 1: Follow https://facebookresearch.github.io/projectaria_tools/docs/open_datasets/aria_digital_twin_dataset/dataset_download to prepare the environment and download the dataset. Please note that in our paper we used the 1.1.0 version of the dataset with Project Aria Tools 1.1.0 and Python 3.8. We use 'python setup.py build_py' to build Project Aria Tools.
+
+Step 2: Set 'dataset_path' and 'dataset_processed_path' in 'adt_preprocessing.py', put 'adt_preprocessing.py', 'adt.csv', and 'utils' into the codebase of the Project Aria Tools, and run it to process the dataset.
+
+Step 3: It is optional but highly recommended to set 'data_path' in 'dataset_visualisation.py' to visualise and get familiar with the dataset.
+
+
+## Citations
+
+```bibtex
+@inproceedings{hu25hoigaze,
+	title={HOIGaze: Gaze Estimation During Hand-Object Interactions in Extended Reality Exploiting Eye-Hand-Head Coordination},
+	author={Hu, Zhiming and Haeufle, Daniel and Schmitt, Syn and Bulling, Andreas},
+	booktitle={Proceedings of the 2025 ACM Special Interest Group on Computer Graphics and Interactive Techniques},
+	year={2025}}
+	
+@inproceedings{pan2023aria,
+	title={Aria digital twin: A new benchmark dataset for egocentric 3d machine perception},
+	author={Pan, Xiaqing and Charron, Nicholas and Yang, Yongqian and Peters, Scott and Whelan, Thomas and Kong, Chen and Parkhi, Omkar and Newcombe, Richard and Ren, Yuheng Carl},
+	booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+	pages={20133--20143},
+	year={2023}}
+```
--- a/adt_processing/adt.csv
+++ b/adt_processing/adt.csv
@ -0,0 +1,35 @@
+sequence_name,training,action
+Apartment_release_work_skeleton_seq132,0,work
+Apartment_release_work_skeleton_seq138,0,work
+Apartment_release_meal_skeleton_seq132,0,meal
+Apartment_release_decoration_skeleton_seq133,0,decoration
+Apartment_release_decoration_skeleton_seq139,0,decoration
+Apartment_release_decoration_skeleton_seq134,0,decoration
+Apartment_release_work_skeleton_seq107,0,work
+Apartment_release_meal_skeleton_seq135,0,meal
+Apartment_release_work_skeleton_seq135,0,work
+Apartment_release_meal_skeleton_seq131,0,meal
+Apartment_release_work_skeleton_seq131,1,work
+Apartment_release_work_skeleton_seq109,1,work
+Apartment_release_work_skeleton_seq110,1,work
+Apartment_release_decoration_skeleton_seq140,1,decoration
+Apartment_release_decoration_skeleton_seq137,1,decoration
+Apartment_release_work_skeleton_seq136,1,work
+Apartment_release_meal_skeleton_seq136,1,meal
+Apartment_release_work_skeleton_seq106,1,work
+Apartment_release_meal_skeleton_seq134,1,meal
+Apartment_release_work_skeleton_seq134,1,work
+Apartment_release_decoration_skeleton_seq135,1,decoration
+Apartment_release_decoration_skeleton_seq138,1,decoration
+Apartment_release_decoration_skeleton_seq132,1,decoration
+Apartment_release_work_skeleton_seq139,1,work
+Apartment_release_work_skeleton_seq133,1,work
+Apartment_release_meal_skeleton_seq139,1,meal
+Apartment_release_meal_skeleton_seq133,1,meal
+Apartment_release_work_skeleton_seq140,1,work
+Apartment_release_work_skeleton_seq137,1,work
+Apartment_release_meal_skeleton_seq140,1,meal
+Apartment_release_meal_skeleton_seq137,1,meal
+Apartment_release_decoration_skeleton_seq136,1,decoration
+Apartment_release_decoration_skeleton_seq131,1,decoration
+Apartment_release_work_skeleton_seq108,1,work
--- a/adt_processing/adt_preprocessing.py
+++ b/adt_processing/adt_preprocessing.py
@ -0,0 +1,272 @@
+import numpy as np
+import os
+os.nice(5)
+import sys
+from pathlib import Path
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import plotly.graph_objects as go
+import math
+from math import tan
+import random
+from scipy.linalg import pinv
+import projectaria_tools.core.mps as mps
+import shutil
+import json
+from PIL import Image
+from utils import remake_dir
+import pandas as pd
+import pylab as p
+from IPython.display import display
+import time
+
+
+from projectaria_tools import utils
+from projectaria_tools.core.stream_id import StreamId
+from projectaria_tools.core import calibration
+from projectaria_tools.projects.adt import (
+   AriaDigitalTwinDataProvider,
+   AriaDigitalTwinSkeletonProvider,
+   AriaDigitalTwinDataPathsProvider,
+   bbox3d_to_line_coordinates,
+   bbox2d_to_image_coordinates,
+   utils as adt_utils,
+   Aria3dPose
+)
+
+
+dataset_path = '/datasets/public/zhiming_datasets/adt/'
+dataset_processed_path = '/scratch/hu/pose_forecast/adt_hoigaze/'
+
+remake_dir(dataset_processed_path)
+remake_dir(dataset_processed_path + "train/")
+remake_dir(dataset_processed_path + "test/")
+dataset_info = pd.read_csv('adt.csv')
+object_num = 5 # number of extracted dynamic objects that are closest to the left or right hands
+
+
+for i, seq in enumerate(dataset_info['sequence_name']):        
+    action = dataset_info['action'][i]
+    print("\nprocessing {}th seq: {}, action: {}...".format(i+1, seq, action))
+    seq_path = dataset_path + seq + '/'
+    if dataset_info['training'][i] == 1:
+        save_path = dataset_processed_path + 'train/' + seq + '_'                    
+    if dataset_info['training'][i] == 0:
+        save_path = dataset_processed_path + 'test/' + seq + '_'        
+        
+    paths_provider = AriaDigitalTwinDataPathsProvider(seq_path)
+    all_device_serials = paths_provider.get_device_serial_numbers()
+    selected_device_number = 0
+    data_paths = paths_provider.get_datapaths_by_device_num(selected_device_number)
+    print("loading ground truth data...")
+    gt_provider = AriaDigitalTwinDataProvider(data_paths)
+    print("loading ground truth data done")
+    
+    stream_id = StreamId("214-1")
+    img_timestamps_ns = gt_provider.get_aria_device_capture_timestamps_ns(stream_id)
+    frame_num = len(img_timestamps_ns)
+    print("There are {} frames".format(frame_num))
+
+    # get all available skeletons in a sequence
+    skeleton_ids = gt_provider.get_skeleton_ids()
+    skeleton_info = gt_provider.get_instance_info_by_id(skeleton_ids[0])
+    print("skeleton ", skeleton_info.name, " wears ", skeleton_info.associated_device_serial)
+    
+    useful_frames = []
+    gaze_data = np.zeros((frame_num, 6)) # gaze_direction (3) + gaze_2d (2) + frame_id (1)
+    head_data = np.zeros((frame_num, 6)) # head_direction (3) + head_translation (3)
+    hand_data = np.zeros((frame_num, 6)) # left_hand_translation (3) + right_hand_translation (3)
+    hand_joint_data = np.zeros((frame_num, 92)) # left_hand (15*3) + right_hand (15*3) + attended_hand_gt + attended_hand_baseline (closest_hand)
+    object_all_data = []
+    object_bbx_all_data = []
+    object_center_all_data = []    
+    
+    local_time = time.asctime(time.localtime(time.time()))
+    print('\nProcessing starts at ' + local_time)    
+    for j in range(frame_num):
+        timestamps_ns = img_timestamps_ns[j]
+        
+        skeleton_with_dt = gt_provider.get_skeleton_by_timestamp_ns(timestamps_ns, skeleton_ids[0])
+        assert skeleton_with_dt.is_valid(), "skeleton is not valid"
+        
+        skeleton = skeleton_with_dt.data()
+        head_translation_id = [4]
+        hand_translation_id = [8, 27]
+        hand_joints_id = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]               
+        hand_translation = np.array(skeleton.joints)[hand_translation_id, :].reshape(2*3)
+        head_translation = np.array(skeleton.joints)[head_translation_id, :].reshape(1*3)
+        hand_joints = np.array(skeleton.joints)[hand_joints_id, :].reshape(30*3)
+        hand_data[j] = hand_translation
+        hand_joint_data[j, :90] = hand_joints
+        left_hand_joints = hand_joints[:45].reshape(15, 3)
+        left_hand_center = np.mean(left_hand_joints, axis=0)
+        right_hand_joints = hand_joints[45:].reshape(15, 3)
+        right_hand_center = np.mean(right_hand_joints, axis=0)
+        
+        # get the Aria pose
+        aria3dpose_with_dt = gt_provider.get_aria_3d_pose_by_timestamp_ns(timestamps_ns)
+        if not aria3dpose_with_dt.is_valid():
+            print("aria 3d pose is not available")
+        aria3dpose = aria3dpose_with_dt.data()        
+        transform_scene_device = aria3dpose.transform_scene_device.matrix()
+
+        # get projection function
+        cam_calibration = gt_provider.get_aria_camera_calibration(stream_id)
+        assert cam_calibration is not None, "no camera calibration"
+
+        eye_gaze_with_dt = gt_provider.get_eyegaze_by_timestamp_ns(timestamps_ns)
+        assert eye_gaze_with_dt.is_valid(), "Eye gaze not available"
+        
+        # Project the gaze center in CPF frame into camera sensor plane, with multiplication performed in homogenous coordinates
+        eye_gaze = eye_gaze_with_dt.data()
+        gaze_center_in_cpf = np.array([tan(eye_gaze.yaw), tan(eye_gaze.pitch), 1.0], dtype=np.float64) * eye_gaze.depth        
+        head_center_in_cpf = np.array([0.0, 0.0, 1.0], dtype=np.float64)        
+        transform_cpf_sensor = gt_provider.raw_data_provider_ptr().get_device_calibration().get_transform_cpf_sensor(cam_calibration.get_label())
+        gaze_center_in_camera = transform_cpf_sensor.inverse().matrix() @ np.hstack((gaze_center_in_cpf, 1)).T
+        gaze_center_in_camera = gaze_center_in_camera[:3] / gaze_center_in_camera[3:]               
+        gaze_center_in_pixels = cam_calibration.project(gaze_center_in_camera)
+        head_center_in_camera = transform_cpf_sensor.inverse().matrix() @ np.hstack((head_center_in_cpf, 0)).T
+        head_center_in_camera = head_center_in_camera[:3]        
+        
+        extrinsic_matrix = cam_calibration.get_transform_device_camera().matrix()
+        gaze_center_in_device = (extrinsic_matrix @ np.hstack((gaze_center_in_camera, 1)))[0:3]
+        gaze_center_in_scene = (transform_scene_device @ np.hstack((gaze_center_in_device, 1)))[0:3]
+        head_center_in_device = (extrinsic_matrix @ np.hstack((head_center_in_camera, 0)))[0:3]
+        head_center_in_scene = (transform_scene_device @ np.hstack((head_center_in_device, 0)))[0:3]        
+        
+        gaze_direction = gaze_center_in_scene - head_translation
+        if np.linalg.norm(gaze_direction) == 0: # invalid data that will be filtered
+            gaze_direction = np.array([0.0, 0.0, 1.0], dtype=np.float64)            
+        else:
+            gaze_direction = [x / np.linalg.norm(gaze_direction) for x in gaze_direction]
+        head_direction = head_center_in_scene        
+        head_direction = [x / np.linalg.norm(head_direction) for x in head_direction]        
+        head_data[j, 0:3] = head_direction
+        head_data[j, 3:6] = head_translation
+        
+        left_hand_direction = left_hand_center - head_translation
+        left_hand_direction = np.array([x / np.linalg.norm(left_hand_direction) for x in left_hand_direction]) 
+        left_hand_distance_to_gaze = np.arccos(np.sum(gaze_direction*left_hand_direction))
+        right_hand_direction = right_hand_center - head_translation
+        right_hand_direction = np.array([x / np.linalg.norm(right_hand_direction) for x in right_hand_direction]) 
+        right_hand_distance_to_gaze = np.arccos(np.sum(gaze_direction*right_hand_direction))
+        if left_hand_distance_to_gaze < right_hand_distance_to_gaze:
+            hand_joint_data[j, 90:91] = 0
+        else:
+            hand_joint_data[j, 90:91] = 1
+        
+        if gaze_center_in_pixels is not None:
+            x_pixel = gaze_center_in_pixels[1]
+            y_pixel = gaze_center_in_pixels[0]
+            gaze_center_in_pixels[0] = x_pixel
+            gaze_center_in_pixels[1] = y_pixel
+                            
+            useful_frames.append(j)
+            gaze_2d = np.divide(gaze_center_in_pixels, cam_calibration.get_image_size())
+
+            gaze_data[j, 0:3] = gaze_direction
+            gaze_data[j, 3:5] = gaze_2d
+            gaze_data[j, 5:6] = j
+                
+        # get the objects
+        bbox3d_with_dt = gt_provider.get_object_3d_boundingboxes_by_timestamp_ns(timestamps_ns)
+        assert bbox3d_with_dt.is_valid(), "3D bounding box is not available"
+        bbox3d_all = bbox3d_with_dt.data()
+        
+        object_all = []
+        object_bbx_all = []
+        object_center_all = []
+        
+        for obj_id in bbox3d_all:
+            bbox3d = bbox3d_all[obj_id]
+            aabb = bbox3d.aabb
+            aabb_coords = bbox3d_to_line_coordinates(aabb)
+            obb = np.zeros(shape=(len(aabb_coords), 3))
+            for k in range(0, len(aabb_coords)):
+                aabb_pt = aabb_coords[k]
+                aabb_pt_homo = np.append(aabb_pt, [1])
+                obb_pt = (bbox3d.transform_scene_object.matrix() @ aabb_pt_homo)[0:3]
+                obb[k] = obb_pt
+            motion_type = gt_provider.get_instance_info_by_id(obj_id).motion_type
+            if(str(motion_type) == 'MotionType.DYNAMIC'):
+                object_all.append(obb)
+                bbx_idx = [0, 1, 2, 3, 5, 6, 7, 8]
+                obb_bbx = obb[bbx_idx, :]
+                object_bbx_all.append(obb_bbx)
+                obb_center = np.mean(obb_bbx, axis=0)
+                object_center_all.append(obb_center)
+                
+        object_all_data.append(object_all)
+        object_bbx_all_data.append(object_bbx_all)
+        object_center_all_data.append(object_center_all)
+        
+    gaze_data = gaze_data[useful_frames, :] # useful_frames are actually continuous
+    head_data = head_data[useful_frames, :]
+    hand_data = hand_data[useful_frames, :]
+    hand_joint_data = hand_joint_data[useful_frames, :]
+    
+    object_all_data = np.array(object_all_data)
+    object_all_data = object_all_data[useful_frames, :, :, :]
+    #print("Objects shape: {}".format(object_all_data.shape))    
+    object_bbx_all_data = np.array(object_bbx_all_data)
+    object_bbx_all_data = object_bbx_all_data[useful_frames, :, :, :]        
+    object_center_all_data = np.array(object_center_all_data)
+    object_center_all_data = object_center_all_data[useful_frames, :, :]
+    
+    # extract the closest objects to the left or right hands
+    useful_frames_num = len(useful_frames)
+    print("There are {} useful frames".format(useful_frames_num))
+    object_num_all = object_all_data.shape[1]    
+    object_left_hand_data = np.zeros((useful_frames_num, object_num, 16, 3))    
+    object_bbx_left_hand_data = np.zeros((useful_frames_num, object_num, 8, 3))    
+    object_distance_to_left_hand = np.zeros((useful_frames_num, object_num_all))    
+    object_right_hand_data = np.zeros((useful_frames_num, object_num, 16, 3))
+    object_bbx_right_hand_data = np.zeros((useful_frames_num, object_num, 8, 3))
+    object_distance_to_right_hand = np.zeros((useful_frames_num, object_num_all))
+    
+    for j in range(useful_frames_num):
+        left_hand_joints = hand_joint_data[j, :45].reshape(15, 3)
+        right_hand_joints = hand_joint_data[j, 45:90].reshape(15, 3)
+        for k in range(object_num_all):                    
+            object_pos = object_center_all_data[j, k, :]
+            object_distance_to_left_hand[j, k] = np.mean(np.linalg.norm(left_hand_joints-object_pos, axis=1))
+            object_distance_to_right_hand[j, k] = np.mean(np.linalg.norm(right_hand_joints-object_pos, axis=1))
+    
+    for j in range(useful_frames_num):        
+        distance_to_left_hand = object_distance_to_left_hand[j, :]
+        distance_to_left_hand_min = np.min(distance_to_left_hand)
+        distance_to_right_hand = object_distance_to_right_hand[j, :]
+        distance_to_right_hand_min = np.min(distance_to_right_hand)
+        if distance_to_left_hand_min < distance_to_right_hand_min:
+            hand_joint_data[j, 91:92] = 0            
+        else:
+            hand_joint_data[j, 91:92] = 1
+            
+        left_hand_index = np.argsort(distance_to_left_hand)
+        right_hand_index = np.argsort(distance_to_right_hand)
+        for k in range(object_num):        
+            object_left_hand_data[j, k] = object_all_data[j, left_hand_index[k]]
+            object_bbx_left_hand_data[j, k] = object_bbx_all_data[j, left_hand_index[k]]
+            object_right_hand_data[j, k] = object_all_data[j, right_hand_index[k]]            
+            object_bbx_right_hand_data[j, k] = object_bbx_all_data[j, right_hand_index[k]]
+                    
+    gaze_path = save_path + 'gaze.npy'
+    head_path = save_path + 'head.npy'        
+    hand_path = save_path + 'hand.npy'
+    hand_joint_path = save_path + 'handjoints.npy'
+    object_left_hand_path = save_path + 'object_left.npy'
+    object_bbx_left_hand_path = save_path + 'object_bbxleft.npy'    
+    object_right_hand_path = save_path + 'object_right.npy'    
+    object_bbx_right_hand_path = save_path + 'object_bbxright.npy'
+    
+    np.save(gaze_path, gaze_data)
+    np.save(head_path, head_data)    
+    np.save(hand_path, hand_data)
+    np.save(hand_joint_path, hand_joint_data)
+    np.save(object_left_hand_path, object_left_hand_data)
+    np.save(object_bbx_left_hand_path, object_bbx_left_hand_data)
+    np.save(object_right_hand_path, object_right_hand_data)
+    np.save(object_bbx_right_hand_path, object_bbx_right_hand_data)
+    
+    local_time = time.asctime(time.localtime(time.time()))
+    print('\nProcessing ends at ' + local_time)
--- a/adt_processing/dataset_visualisation.py
+++ b/adt_processing/dataset_visualisation.py
@ -0,0 +1,162 @@
+# visualise data in the ADT dataset
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+
+
+# play human pose using a skeleton
+class Player_Skeleton:
+    def __init__(self, fps=30.0, object_num=10):
+    
+        self._fps = fps
+        self.object_num = object_num
+        # names of all the joints: head + left_hand + right_hand + left_hand_joint + right_hand_joint + gaze_direction + head_direction
+        self._joint_names = ['Head', 'LHand', 'RHand', 'LThumb1', 'LThumb2', 'LThumb3', 'LIndex1', 'LIndex2', 'LIndex3', 'LMiddle1', 'LMiddle2', 'LMiddle3', 'LRing1', 'LRing2', 'LRing3', 'LPinky1', 'LPinky2', 'LPinky3', 'RThumb1', 'RThumb2', 'RThumb3', 'RIndex1', 'RIndex2', 'RIndex3', 'RMiddle1', 'RMiddle2', 'RMiddle3', 'RRing1', 'RRing2', 'RRing3', 'RPinky1', 'RPinky2', 'RPinky3', 'Gaze_direction', 'Head_direction']
+        
+        self._joint_ids = {name: idx for idx, name in enumerate(self._joint_names)}
+        
+        # parent of every joint
+        self._joint_parent_names = {
+            # root                    
+            'Head': 'Head',
+            'LHand': 'LHand',
+            'RHand': 'RHand',
+            'LThumb1': 'LHand',
+            'LThumb2': 'LThumb1',
+            'LThumb3': 'LThumb2',
+            'LIndex1': 'LHand',
+            'LIndex2': 'LIndex1',
+            'LIndex3': 'LIndex2',
+            'LMiddle1': 'LHand',
+            'LMiddle2': 'LMiddle1',
+            'LMiddle3': 'LMiddle2',
+            'LRing1': 'LHand',
+            'LRing2': 'LRing1',
+            'LRing3': 'LRing2',
+            'LPinky1': 'LHand',
+            'LPinky2': 'LPinky1',
+            'LPinky3': 'LPinky2',
+            'RThumb1': 'RHand',
+            'RThumb2': 'RThumb1',
+            'RThumb3': 'RThumb2',
+            'RIndex1': 'RHand',
+            'RIndex2': 'RIndex1',
+            'RIndex3': 'RIndex2',
+            'RMiddle1': 'RHand',
+            'RMiddle2': 'RMiddle1',
+            'RMiddle3': 'RMiddle2',
+            'RRing1': 'RHand',
+            'RRing2': 'RRing1',
+            'RRing3': 'RRing2',
+            'RPinky1': 'RHand',
+            'RPinky2': 'RPinky1',
+            'RPinky3': 'RPinky2',
+            'Gaze_direction': 'Head',
+            'Head_direction': 'Head',}
+            
+        # id of joint parent
+        self._joint_parent_ids = [self._joint_ids[self._joint_parent_names[child_name]] for child_name in self._joint_names]
+        self._joint_links = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]        
+        # colors: 0 for head, 1 for left, 2 for right
+        self._link_colors = [1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4]
+        
+        self._fig = plt.figure()
+        self._ax = plt.gca(projection='3d')
+            
+        self._plots = []
+        for i in range(len(self._joint_links)):
+            if self._link_colors[i] == 0:
+                color = "#3498db"
+            if self._link_colors[i] == 1:
+                color = "#3498db"
+            if self._link_colors[i] == 2:
+                color = "#3498db"
+            if self._link_colors[i] == 3:
+                color = "#6aa84f"
+            if self._link_colors[i] == 4:
+                color = "#a64d79"                
+            self._plots.append(self._ax.plot([0, 0], [0, 0], [0, 0], lw=2.0, c=color))
+
+        for i in range(self.object_num):
+            self._plots.append(self._ax.plot([0, 0], [0, 0], [0, 0], lw=1.0, c='#ff0000'))
+
+        self._ax.set_xlabel("x")
+        self._ax.set_ylabel("y")
+        self._ax.set_zlabel("z")
+
+    # play the sequence of human pose in xyz representations
+    def play_xyz(self, pose_xyz, gaze, head, objects):
+        gaze_direction = pose_xyz[:, :3] + gaze[:, :3]*0.5
+        head_direction = pose_xyz[:, :3] + head[:, :3]*0.5
+        pose_xyz = np.concatenate((pose_xyz, gaze_direction), axis = 1)        
+        pose_xyz = np.concatenate((pose_xyz, head_direction), axis = 1)        
+        
+        for i in range(pose_xyz.shape[0]):        
+            joint_number = len(self._joint_names)
+            pose_xyz_tmp = pose_xyz[i].reshape(joint_number, 3)
+            objects_xyz = objects[i, :, :, :]
+            for j in range(len(self._joint_links)):
+                idx = self._joint_links[j]
+                start_point = pose_xyz_tmp[idx]
+                end_point = pose_xyz_tmp[self._joint_parent_ids[idx]]
+                x = np.array([start_point[0], end_point[0]])
+                y = np.array([start_point[2], end_point[2]])
+                z = np.array([start_point[1], end_point[1]])
+                self._plots[j][0].set_xdata(x)
+                self._plots[j][0].set_ydata(y)
+                self._plots[j][0].set_3d_properties(z)
+
+            for j in range(len(self._joint_links), len(self._joint_links) + objects_xyz.shape[0]):
+                object_xyz = objects_xyz[j - len(self._joint_links), :, :]
+                self._plots[j][0].set_xdata(object_xyz[:, 0])
+                self._plots[j][0].set_ydata(object_xyz[:, 2])
+                self._plots[j][0].set_3d_properties(object_xyz[:, 1])                
+                                                                      
+            r = 1.0
+            x_root, y_root, z_root = pose_xyz_tmp[0, 0], pose_xyz_tmp[0, 2], pose_xyz_tmp[0, 1]
+            self._ax.set_xlim3d([-r + x_root, r + x_root])
+            self._ax.set_ylim3d([-r + y_root, r + y_root])
+            self._ax.set_zlim3d([-r + z_root, r + z_root])
+            #self._ax.view_init(elev=30, azim=-110)
+
+            self._ax.grid(False)
+            #self._ax.axis('off')
+            
+            self._ax.set_aspect('auto')
+            plt.show(block=False)
+            self._fig.canvas.draw()
+            past_time = f"{i / self._fps:.1f}"
+            plt.title(f"Time: {past_time} s", fontsize=15)
+            plt.pause(0.000000001)
+
+            
+if __name__ == "__main__":
+    data_path = '/scratch/hu/pose_forecast/adt_hoigaze/test/Apartment_release_meal_skeleton_seq132_'
+    gaze_path = data_path + 'gaze.npy'
+    head_path = data_path + 'head.npy'
+    hand_path = data_path + 'hand.npy'
+    hand_joint_path = data_path + 'handjoints.npy'        
+    object_left_hand_path = data_path + 'object_left.npy'
+    object_right_hand_path = data_path + 'object_right.npy'
+    
+    gaze = np.load(gaze_path) # gaze_direction (3) + gaze_2d (2) + frame_id (1)
+    print("Gaze shape: {}".format(gaze.shape))    
+    gaze_direction = gaze[:, :3]
+    head = np.load(head_path) # head_direction (3) + head_translation (3)
+    print("Head shape: {}".format(head.shape))
+    head_direction = head[:, :3]    
+    head_translation = head[:, 3:]    
+    hand_translation = np.load(hand_path) # left_hand_translation (3) + right_hand_translation (3)
+    print("Hand shape: {}".format(hand_translation.shape))    
+    hand_joint = np.load(hand_joint_path) # left_hand (15*3) + right_hand (15*3) + hand_dominance + closest_hand
+    print("Hand joint shape: {}".format(hand_joint.shape))      
+    hand_joint = hand_joint[:, :90]    
+    pose = np.concatenate((head_translation, hand_translation), axis=1)
+    pose = np.concatenate((pose, hand_joint), axis=1)
+    object_left = np.load(object_left_hand_path)[:, :, :, :]
+    object_right = np.load(object_right_hand_path)[:, :, :, :]
+    object_all = np.concatenate((object_left, object_right), axis=1)
+    print("Object shape: {}".format(object_all.shape))
+    
+    player = Player_Skeleton(object_num = object_all.shape[1])
+    player.play_xyz(pose, gaze_direction, head_direction, object_all)
--- a/adt_processing/utils/init.py
+++ b/adt_processing/utils/init.py
@ -0,0 +1,4 @@
+__all__ = ['file_systems']
+
+from .file_systems import remake_dir, make_dir
+
--- a/adt_processing/utils/file_systems.py
+++ b/adt_processing/utils/file_systems.py
@ -0,0 +1,50 @@
+import os
+import shutil
+import time
+
+
+# remove a directory
+def remove_dir(dirName):
+	if os.path.exists(dirName):
+		shutil.rmtree(dirName)
+	else:
+		print("Invalid directory path!")
+
+
+# remake a directory
+def remake_dir(dirName):
+	if os.path.exists(dirName):
+		shutil.rmtree(dirName)
+		os.makedirs(dirName)
+	else:
+		os.makedirs(dirName)
+	
+    
+# calculate the number of lines in a file
+def file_lines(fileName):
+	if os.path.exists(fileName):
+		with open(fileName, 'r') as fr:
+			return len(fr.readlines())
+	else:
+		print("Invalid file path!")
+		return 0
+	
+    
+# make a directory if it does not exist.
+def make_dir(dirName):
+	if os.path.exists(dirName):
+		print("Directory "+ dirName + " already exists.")
+	else:
+		os.makedirs(dirName)
+	
+	
+	
+if __name__ == "__main__":
+	dirName = "test"
+	RemakeDir(dirName)
+	time.sleep(3)
+	MakeDir(dirName)
+	RemoveDir(dirName)
+	time.sleep(3)
+	MakeDir(dirName)
+	#print(FileLines('233.txt'))