IRENE/utils/dataset.py

import dgl
import torch
import torch.utils.data
import os
import pickle as pkl 
import json
import numpy as np
from tqdm import tqdm 

import sys 
sys.path.append('/projects/bortoletto/irene/')
from utils.grid_object import * 
from utils.relations import *

# ========================== Helper functions ==========================

def index_data(json_list, path_list):
    print(f'processing files {len(json_list)}')
    data_tuples = []
    for j, v in tqdm(zip(json_list, path_list)):
        with open(j, 'r') as f:
            state = json.load(f)
        ep_lens = [len(x) for x in state]
        past_len = 0
        for e, l in enumerate(ep_lens):
            data_tuples.append([])
            # skip first 30 frames and last 83 frames
            for f in range(30, l - 83):
                # find action taken; 
                f0x, f0y = state[e][f]['agent'][0]
                f1x, f1y = state[e][f + 1]['agent'][0]
                dx = (f1x - f0x) / 2.
                dy = (f1y - f0y) / 2.
                action = [dx, dy]
                #data_tuples[-1].append((v, past_len + f, action))
                data_tuples[-1].append((j, past_len + f, action)) 
                # data_tuples = [[json file, frame number, action] for each episode in each video]
            assert len(data_tuples[-1]) > 0
            past_len += l
    return data_tuples

# ========================== Dataset class ==========================

class TransitionDataset(torch.utils.data.Dataset):
    """
    Training dataset class for the behavior cloning mlp model.
    Args:
        path: path to the dataset
        types: list of video types to include
        mode: train, val
        num_test: number of test state-action pairs
        num_trials: number of trials in an episode
        action_range: number of frames to skip; actions are combined over these number of frames (displcement) of the agent
        process_data: whether to the videos or not (skip if already processed)
        max_len: max number of context state-action pairs
    __getitem__:
        returns: (states, actions, lens, n_nodes)
        dem_frames: batched DGLGraph.heterograph
        dem_actions: (max_len, 2)
        query_frames: DGLGraph.heterograph
        target_actions: (num_test, 2)
    """
    def __init__(
            self, 
            path, 
            types=None, 
            mode="train", 
            num_test=1, 
            num_trials=9,
            action_range=10, 
            process_data=0, 
            max_len=30
        ):
        self.path = path
        self.types = types
        self.mode = mode
        self.num_trials = num_trials
        self.num_test = num_test
        self.action_range = action_range
        self.max_len = max_len
        self.ep_combs = self.num_trials * (self.num_trials - 2)  # 9p2 - 9
        self.eps = [[x, y] for x in range(self.num_trials) for y in range(self.num_trials) if x != y]
        types_str = '_'.join(self.types)
        self.rel_deter_func = [
            is_top_adj, is_left_adj, is_top_right_adj, is_top_left_adj,
            is_down_adj, is_right_adj, is_down_left_adj, is_down_right_adj,
            is_left, is_right, is_front, is_back, is_aligned, is_close
        ]
        self.path_list = []
        self.json_list = []
        # get video paths and json file paths
        for t in types:
            print(f'reading files of type {t} in {mode}')
            paths = [os.path.join(self.path, t, x) for x in os.listdir(os.path.join(self.path, t)) if
                     x.endswith(f'.mp4')]
            jsons = [os.path.join(self.path, t, x) for x in os.listdir(os.path.join(self.path, t)) if
                     x.endswith(f'.json') and 'index' not in x]
            paths = sorted(paths)
            jsons = sorted(jsons)
            if mode == 'train':
                self.path_list += paths[:int(0.8 * len(jsons))]
                self.json_list += jsons[:int(0.8 * len(jsons))]
            elif mode == 'val':
                self.path_list += paths[int(0.8 * len(jsons)):]
                self.json_list += jsons[int(0.8 * len(jsons)):]
            else:
                self.path_list += paths
                self.json_list += jsons
        self.data_tuples = []
        if process_data:
            # index the videos in the dataset directory. This is done to speed up the retrieval of videos.
            # frame index, action tuples are stored
            self.data_tuples = index_data(self.json_list, self.path_list) 
            # tuples of frame index and action (displacement of agent) 
            index_dict = {'data_tuples': self.data_tuples}
            with open(os.path.join(self.path, f'jindex_bib_{mode}_{types_str}.json'), 'w') as fp:
                json.dump(index_dict, fp)
        else:
            # read pre-indexed data
            with open(os.path.join(self.path, f'jindex_bib_{mode}_{types_str}.json'), 'r') as fp:
                index_dict = json.load(fp)
            self.data_tuples = index_dict['data_tuples']
        self.tot_trials = len(self.path_list) * 9
    
    def _get_frame_graph(self, jsonfile, frame_idx):
        # load json 
        with open(jsonfile, 'rb') as f:
            frame_data = json.load(f)
        flat_list = [x for xs in frame_data for x in xs] 
        # extract entities
        grid_objs = parse_objects(flat_list[frame_idx])
        # --- build the graph 
        adj = self._get_spatial_rel(grid_objs)
        # define edges
        is_top_adj_src, is_top_adj_dst = np.nonzero(adj[0])
        is_left_adj_src, is_left_adj_dst = np.nonzero(adj[1])
        is_top_right_adj_src, is_top_right_adj_dst = np.nonzero(adj[2])
        is_top_left_adj_src, is_top_left_adj_dst = np.nonzero(adj[3])
        is_down_adj_src, is_down_adj_dst = np.nonzero(adj[4])
        is_right_adj_src, is_right_adj_dst = np.nonzero(adj[5])
        is_down_left_adj_src, is_down_left_adj_dst = np.nonzero(adj[6])
        is_down_right_adj_src, is_down_right_adj_dst = np.nonzero(adj[7])
        is_left_src, is_left_dst = np.nonzero(adj[8])
        is_right_src, is_right_dst = np.nonzero(adj[9])
        is_front_src, is_front_dst = np.nonzero(adj[10])
        is_back_src, is_back_dst = np.nonzero(adj[11])
        is_aligned_src, is_aligned_dst = np.nonzero(adj[12])
        is_close_src, is_close_dst = np.nonzero(adj[13])
        g = dgl.heterograph({
            ('obj', 'is_top_adj', 'obj'):        (torch.tensor(is_top_adj_src), torch.tensor(is_top_adj_dst)),
            ('obj', 'is_left_adj', 'obj'):       (torch.tensor(is_left_adj_src), torch.tensor(is_left_adj_dst)),
            ('obj', 'is_top_right_adj', 'obj'):  (torch.tensor(is_top_right_adj_src), torch.tensor(is_top_right_adj_dst)),
            ('obj', 'is_top_left_adj', 'obj'):   (torch.tensor(is_top_left_adj_src), torch.tensor(is_top_left_adj_dst)),
            ('obj', 'is_down_adj', 'obj'):       (torch.tensor(is_down_adj_src), torch.tensor(is_down_adj_dst)),
            ('obj', 'is_right_adj', 'obj'):      (torch.tensor(is_right_adj_src), torch.tensor(is_right_adj_dst)),
            ('obj', 'is_down_left_adj', 'obj'):  (torch.tensor(is_down_left_adj_src), torch.tensor(is_down_left_adj_dst)),
            ('obj', 'is_down_right_adj', 'obj'): (torch.tensor(is_down_right_adj_src), torch.tensor(is_down_right_adj_dst)),
            ('obj', 'is_left', 'obj'):           (torch.tensor(is_left_src), torch.tensor(is_left_dst)),
            ('obj', 'is_right', 'obj'):          (torch.tensor(is_right_src), torch.tensor(is_right_dst)),
            ('obj', 'is_front', 'obj'):          (torch.tensor(is_front_src), torch.tensor(is_front_dst)),
            ('obj', 'is_back', 'obj'):           (torch.tensor(is_back_src), torch.tensor(is_back_dst)),
            ('obj', 'is_aligned', 'obj'):        (torch.tensor(is_aligned_src), torch.tensor(is_aligned_dst)),
            ('obj', 'is_close', 'obj'):          (torch.tensor(is_close_src), torch.tensor(is_close_dst))
        }, num_nodes_dict={'obj': len(grid_objs)})
        g = self._add_node_features(grid_objs, g)
        breakpoint()
        return g

    def _add_node_features(self, objs, graph):
        for obj_idx, obj in enumerate(objs):
            graph.nodes[obj_idx].data['type'] = torch.tensor(obj.type)
            graph.nodes[obj_idx].data['pos'] = torch.tensor([[obj.x, obj.y]], dtype=torch.float32)
            assert len(obj.attributes) == 2 and None not in obj.attributes[0] and None not in obj.attributes[1]
            graph.nodes[obj_idx].data['color'] = torch.tensor([obj.attributes[0]])
            graph.nodes[obj_idx].data['shape'] = torch.tensor([obj.attributes[1]])
        return graph
        
    def _get_spatial_rel(self, objs):
        spatial_tensors = [np.zeros([len(objs), len(objs)]) for _ in range(len(self.rel_deter_func))]
        for obj_idx1, obj1 in enumerate(objs):
            for obj_idx2, obj2 in enumerate(objs):
                direction_vec = np.array((0, -1)) # Up
                for rel_idx, func in enumerate(self.rel_deter_func):
                    if func(obj1, obj2, direction_vec):
                        spatial_tensors[rel_idx][obj_idx1, obj_idx2] = 1.0
        return spatial_tensors

    def get_trial(self, trials, step=10):
        # retrieve state embeddings and actions from cached file
        states = [] 
        actions = []
        trial_len = []
        lens = []
        n_nodes = []
        # 8 trials
        for t in trials:
            tl = [(t, n) for n in range(0, len(self.data_tuples[t]), step)]
            if len(tl) > self.max_len: # 30
                tl = tl[:self.max_len]
            trial_len.append(tl)
        for tl in trial_len:
            states.append([])
            actions.append([])
            lens.append(len(tl))
            for t, n in tl:
                video = self.data_tuples[t][n][0] 
                states[-1].append(self._get_frame_graph(video, self.data_tuples[t][n][1])) 
                n_nodes.append(states[-1][-1].number_of_nodes())
                # actions are pooled over frames 
                if len(self.data_tuples[t]) > n + self.action_range:
                    actions_xy = [d[2] for d in self.data_tuples[t][n:n + self.action_range]]
                else:
                    actions_xy = [d[2] for d in self.data_tuples[t][n:]]
                actions_xy = np.array(actions_xy)
                actions_xy = np.mean(actions_xy, axis=0)
                actions[-1].append(actions_xy)
            states[-1] = dgl.batch(states[-1])
            actions[-1] = torch.tensor(np.array(actions[-1]))
            trial_actions_padded = torch.zeros(self.max_len, actions[-1].size(1))
            trial_actions_padded[:actions[-1].size(0), :] = actions[-1]
            actions[-1] = trial_actions_padded
        return states, actions, lens, n_nodes 

    def __getitem__(self, idx):
        ep_trials = [idx * self.num_trials + t for t in range(self.num_trials)] # [idx, ..., idx+8]
        states, actions, lens, n_nodes = self.get_trial(ep_trials, step=self.action_range)
        return states, actions, lens, n_nodes

    def __len__(self):
        return self.tot_trials // self.num_trials


class TestTransitionDatasetSequence(torch.utils.data.Dataset):
    """
    Test dataset class for the behavior cloning rnn model. This dataset is used to test the model on the eval data.
    This class is used to compare plausible and implausible episodes.
    Args:
        path: path to the dataset
        types: list of video types to include
        size: size of the frames to be returned
        mode: test
        num_context: number of context state-action pairs
        num_test: number of test state-action pairs
        num_trials: number of trials in an episode
        action_range: number of frames to skip; actions are combined over these number of frames (displcement) of the agent
        process_data: whether to the videos or not (skip if already processed)
    __getitem__:
        returns:  (expected_dem_frames, expected_dem_actions, expected_dem_lens expected_query_frames, expected_target_actions,
        unexpected_dem_frames, unexpected_dem_actions, unexpected_dem_lens, unexpected_query_frames, unexpected_target_actions)
        dem_frames: (num_context, max_len, 3, size, size)
        dem_actions: (num_context, max_len, 2)
        dem_lens: (num_context)
        query_frames: (num_test, 3, size, size)
        target_actions: (num_test, 2)
    """
    def __init__(
            self, 
            path, 
            task_type=None, 
            mode="test", 
            num_test=1, 
            num_trials=9,
            action_range=10, 
            process_data=0, 
            max_len=30
        ):
        self.path = path
        self.task_type = task_type
        self.mode = mode
        self.num_trials = num_trials
        self.num_test = num_test
        self.action_range = action_range
        self.max_len = max_len
        self.ep_combs = self.num_trials * (self.num_trials - 2)  # 9p2 - 9
        self.eps = [[x, y] for x in range(self.num_trials) for y in range(self.num_trials) if x != y]
        self.path_list_exp = []
        self.json_list_exp = []
        self.path_list_un = []
        self.json_list_un = []
        
        print(f'reading files of type {task_type} in {mode}')
        
        paths_expected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
                                 x.endswith('e.mp4')])
        jsons_expected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
                                 x.endswith('e.json') and 'index' not in x])
        paths_unexpected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
                                   x.endswith('u.mp4')])
        jsons_unexpected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
                                   x.endswith('u.json') and 'index' not in x])
        self.path_list_exp += paths_expected
        self.json_list_exp += jsons_expected
        self.path_list_un += paths_unexpected
        self.json_list_un += jsons_unexpected
        self.data_expected = []
        self.data_unexpected = []
        
        if process_data:
            # index data. This is done to speed up video retrieval.
            # frame index, action tuples are stored
            self.data_expected = index_data(self.json_list_exp, self.path_list_exp)
            index_dict = {'data_tuples': self.data_expected}
            with open(os.path.join(self.path, f'jindex_bib_test_{task_type}e.json'), 'w') as fp:
                json.dump(index_dict, fp)

            self.data_unexpected = index_data(self.json_list_un, self.path_list_un)
            index_dict = {'data_tuples': self.data_unexpected}
            with open(os.path.join(self.path, f'jindex_bib_test_{task_type}u.json'), 'w') as fp:
                json.dump(index_dict, fp)
        else:
            with open(os.path.join(self.path, f'jindex_bib_{mode}_{task_type}e.json'), 'r') as fp:
                index_dict = json.load(fp)
            self.data_expected = index_dict['data_tuples']
            with open(os.path.join(self.path, f'jindex_bib_{mode}_{task_type}u.json'), 'r') as fp:
                index_dict = json.load(fp)
            self.data_unexpected = index_dict['data_tuples']

        self.rel_deter_func = [
            is_top_adj, is_left_adj, is_top_right_adj, is_top_left_adj,
            is_down_adj, is_right_adj, is_down_left_adj, is_down_right_adj,
            is_left, is_right, is_front, is_back, is_aligned, is_close
        ]
        
        print('Done.')

    def _get_frame_graph(self, jsonfile, frame_idx):
        # load json 
        with open(jsonfile, 'rb') as f:
            frame_data = json.load(f)
        flat_list = [x for xs in frame_data for x in xs] 
        # extract entities
        grid_objs = parse_objects(flat_list[frame_idx])
        # --- build the graph 
        adj = self._get_spatial_rel(grid_objs)
        # define edges
        is_top_adj_src, is_top_adj_dst = np.nonzero(adj[0])
        is_left_adj_src, is_left_adj_dst = np.nonzero(adj[1])
        is_top_right_adj_src, is_top_right_adj_dst = np.nonzero(adj[2])
        is_top_left_adj_src, is_top_left_adj_dst = np.nonzero(adj[3])
        is_down_adj_src, is_down_adj_dst = np.nonzero(adj[4])
        is_right_adj_src, is_right_adj_dst = np.nonzero(adj[5])
        is_down_left_adj_src, is_down_left_adj_dst = np.nonzero(adj[6])
        is_down_right_adj_src, is_down_right_adj_dst = np.nonzero(adj[7])
        is_left_src, is_left_dst = np.nonzero(adj[8])
        is_right_src, is_right_dst = np.nonzero(adj[9])
        is_front_src, is_front_dst = np.nonzero(adj[10])
        is_back_src, is_back_dst = np.nonzero(adj[11])
        is_aligned_src, is_aligned_dst = np.nonzero(adj[12])
        is_close_src, is_close_dst = np.nonzero(adj[13])
        g = dgl.heterograph({
            ('obj', 'is_top_adj', 'obj'):        (torch.tensor(is_top_adj_src), torch.tensor(is_top_adj_dst)),
            ('obj', 'is_left_adj', 'obj'):       (torch.tensor(is_left_adj_src), torch.tensor(is_left_adj_dst)),
            ('obj', 'is_top_right_adj', 'obj'):  (torch.tensor(is_top_right_adj_src), torch.tensor(is_top_right_adj_dst)),
            ('obj', 'is_top_left_adj', 'obj'):   (torch.tensor(is_top_left_adj_src), torch.tensor(is_top_left_adj_dst)),
            ('obj', 'is_down_adj', 'obj'):       (torch.tensor(is_down_adj_src), torch.tensor(is_down_adj_dst)),
            ('obj', 'is_right_adj', 'obj'):      (torch.tensor(is_right_adj_src), torch.tensor(is_right_adj_dst)),
            ('obj', 'is_down_left_adj', 'obj'):  (torch.tensor(is_down_left_adj_src), torch.tensor(is_down_left_adj_dst)),
            ('obj', 'is_down_right_adj', 'obj'): (torch.tensor(is_down_right_adj_src), torch.tensor(is_down_right_adj_dst)),
            ('obj', 'is_left', 'obj'):           (torch.tensor(is_left_src), torch.tensor(is_left_dst)),
            ('obj', 'is_right', 'obj'):          (torch.tensor(is_right_src), torch.tensor(is_right_dst)),
            ('obj', 'is_front', 'obj'):          (torch.tensor(is_front_src), torch.tensor(is_front_dst)),
            ('obj', 'is_back', 'obj'):           (torch.tensor(is_back_src), torch.tensor(is_back_dst)),
            ('obj', 'is_aligned', 'obj'):        (torch.tensor(is_aligned_src), torch.tensor(is_aligned_dst)),
            ('obj', 'is_close', 'obj'):          (torch.tensor(is_close_src), torch.tensor(is_close_dst))
        }, num_nodes_dict={'obj': len(grid_objs)})
        g = self._add_node_features(grid_objs, g)
        return g

    def _add_node_features(self, objs, graph):
        for obj_idx, obj in enumerate(objs):
            graph.nodes[obj_idx].data['type'] = torch.tensor(obj.type)
            graph.nodes[obj_idx].data['pos'] = torch.tensor([[obj.x, obj.y]], dtype=torch.float32)
            assert len(obj.attributes) == 2 and None not in obj.attributes[0] and None not in obj.attributes[1]
            graph.nodes[obj_idx].data['color'] = torch.tensor([obj.attributes[0]])
            graph.nodes[obj_idx].data['shape'] = torch.tensor([obj.attributes[1]])
        return graph
        
    def _get_spatial_rel(self, objs):
        spatial_tensors = [np.zeros([len(objs), len(objs)]) for _ in range(len(self.rel_deter_func))]
        for obj_idx1, obj1 in enumerate(objs):
            for obj_idx2, obj2 in enumerate(objs):
                direction_vec = np.array((0, -1)) # Up why??????????????
                for rel_idx, func in enumerate(self.rel_deter_func):
                    if func(obj1, obj2, direction_vec):
                        spatial_tensors[rel_idx][obj_idx1, obj_idx2] = 1.0
        return spatial_tensors

    def get_trial(self, trials, data, step=10):
        # retrieve state embeddings and actions from cached file
        states = []
        actions = []
        trial_len = []
        lens = []
        n_nodes = []
        for t in trials:
            tl = [(t, n) for n in range(0, len(data[t]), step)]
            if len(tl) > self.max_len:
                tl = tl[:self.max_len]
            trial_len.append(tl)
        for tl in trial_len:
            states.append([])
            actions.append([])
            lens.append(len(tl))
            for t, n in tl:
                video = data[t][n][0]
                states[-1].append(self._get_frame_graph(video, data[t][n][1]))
                n_nodes.append(states[-1][-1].number_of_nodes())
                if len(data[t]) > n + self.action_range:
                    actions_xy = [d[2] for d in data[t][n:n + self.action_range]]
                else:
                    actions_xy = [d[2] for d in data[t][n:]]
                actions_xy = np.array(actions_xy)
                actions_xy = np.mean(actions_xy, axis=0)
                actions[-1].append(actions_xy)
            states[-1] = dgl.batch(states[-1])
            actions[-1] = torch.tensor(np.array(actions[-1]))
            trial_actions_padded = torch.zeros(self.max_len, actions[-1].size(1))
            trial_actions_padded[:actions[-1].size(0), :] = actions[-1]
            actions[-1] = trial_actions_padded
        return states, actions, lens, n_nodes

    def get_test(self, trial, data, step=10):
        # retrieve state embeddings and actions from cached file
        states = []
        actions = []
        trial_len = []
        trial_len += [(trial, n) for n in range(0, len(data[trial]), step)]
        for t, n in trial_len:
            video = data[t][n][0]
            state = self._get_frame_graph(video, data[t][n][1])
            if len(data[t]) > n + self.action_range:
                actions_xy = [d[2] for d in data[t][n:n + self.action_range]]
            else:
                actions_xy = [d[2] for d in data[t][n:]]
            actions_xy = np.array(actions_xy)
            actions_xy = np.mean(actions_xy, axis=0)
            actions.append(actions_xy)
            states.append(state)
        #states = torch.stack(states)
        states = dgl.batch(states) 
        actions = torch.tensor(np.array(actions))
        return states, actions

    def __getitem__(self, idx):
        ep_trials = [idx * self.num_trials + t for t in range(self.num_trials)]
        dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes = self.get_trial(
            ep_trials[:-1], self.data_expected, step=self.action_range
        )
        dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes = self.get_trial(
            ep_trials[:-1], self.data_unexpected, step=self.action_range
        )
        query_expected_frames, target_expected_actions = self.get_test(
            ep_trials[-1], self.data_expected, step=self.action_range
        )
        query_unexpected_frames, target_unexpected_actions = self.get_test(
            ep_trials[-1], self.data_unexpected, step=self.action_range
        )
        return dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes, \
               dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes, \
               query_expected_frames, target_expected_actions, \
               query_unexpected_frames, target_unexpected_actions
        
    def __len__(self):
        return len(self.path_list_exp)


if __name__ == '__main__':
    types = ['preference', 'multi_agent', 'inaccessible_goal',
             'efficiency_irrational', 'efficiency_time','efficiency_path',
             'instrumental_no_barrier', 'instrumental_blocking_barrier', 'instrumental_inconsequential_barrier']
    for t in types:
        ttd = TestTransitionDatasetSequence(path='/datasets/external/bib_evaluation_1_1/', task_type=t, process_data=0, mode='test')
        for i in range(ttd.__len__()):
            print(i, end='\r')
            dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes, \
            dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes, \
            query_expected_frames, target_expected_actions, \
            query_unexpected_frames, target_unexpected_actions = ttd.__getitem__(i)
            for j in range(8):
                if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in dem_expected_states[j].ndata['type']:
                    print(i)
                if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in dem_unexpected_states[j].ndata['type']:
                    print(i)
            if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in query_expected_frames.ndata['type']:
                print(i)
            if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in query_unexpected_frames.ndata['type']:
                print(i)