up

2024-02-01 15:40:47 +01:00 · 2024-02-01 15:40:47 +01:00 · de0bea7508
commit de0bea7508
parent a333481e05
18 changed files with 3150 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,75 @@
-# IRENE
+<div align="center">
+<h1> Neural Reasoning about Agents' Goals, Preferences, and Actions </h1>

-Official code of "Neural Reasoning About Agents' Goals, Preferences, and Actions"
+**[Matteo Bortoletto][1], &nbsp; [Lei Shi][2], &nbsp; [Andreas Bulling][3]** <br> <br>
+**AAAI'24, Vancouver, CA** <br>
+**[[Paper][4]]**
+
+</div>
+
+# Citation
+If you find our code useful or use it in your own projects, please cite our paper:
+
+```bibtex
+@inproceedings{bortoletto2024neural,
+  author = {Bortoletto, Matteo and Lei, Shi and Bulling, Andreas},
+  title = {{Neural Reasoning about Agents' Goals, Preferences, and Actions}},
+  booktitle = {Proc. 38th AAAI Conference on Artificial Intelligence (AAAI)},
+  year = {2024},
+}
+```
+
+# Setup
+
+This code is based on the [original implementation][5] of the BIB benchmark. 
+
+## Using `virtualenv`
+```
+python -m virtualenv /path/to/env
+source /path/to/env/bin/activate
+pip install -r requirements.txt
+```
+
+## Using `conda`
+```
+conda create --name <env_name> python=3.8.10 pip=20.0.2 cudatoolkit=10.2.89
+conda activate <env_name>
+pip install -r requirements_conda.txt
+pip install dgl-cu102 dglgo -f https://data.dgl.ai/wheels/repo.html
+```
+
+
+# Running the code 
+
+## Activate the environment
+Run `source bibdgl/bin/activate`.
+
+## Index data
+This will create the json files with all the indexed frames for each episode in each video.
+```
+python utils/index_data.py
+```
+You need to manually set `mode` in the dataset class (in main). 
+
+## Generate graphs 
+This will generate the graphs from the videos:
+```
+python /utils/build_graphs.py --mode MODE --cpus NUM_CPUS
+```
+`MODE` can be `train`, `val` or `test`. NOTE: check `utils/build_graphs.py` to make sure you're loading the correct dataset to generate the graphs you want. 
+
+## Training
+You can use the `gtbc.sh`.
+
+## Testing 
+Use `run_test_tom.sh`. 
+
+# Hardware setup
+All models are trained on an NVIDIA Tesla V100-SXM2-32GB GPU. 
+
+
+[1]: https://mattbortoletto.github.io/
+[2]: https://perceptualui.org/people/shi/
+[3]: https://perceptualui.org/people/bulling/
+[4]: https://perceptualui.org/publications/bortoletto24_aaai.pdf
+[5]: https://github.com/kanishkg/bib-baselines
--- a/run_test.sh
+++ b/run_test.sh
@ -0,0 +1,9 @@
+echo 314 e31
+
+CUDA_VISIBLE_DEVICES=1 python test_tom.py \
+--model_type graphbcrnn \
+--types efficiency_irrational \
+--ckpt /projects/bortoletto/icml2023_matteo/wandb/run-20221224_135525-8i1r2aqy/files/bib/8i1r2aqy/checkpoints/epoch\=31-step\=22399.ckpt \
+--data_path /datasets/external/bib_evaluation_1_1/graphs/all_tasks \
+--process_data 0 \
+--surprise_type max
--- a/run_train.sh
+++ b/run_train.sh
@ -0,0 +1,18 @@
+CUDA_VISIBLE_DEVICES=0 python train_tom.py \
+--model_type graphbcrnn \
+--types single_object preference instrumental_action \
+--data_path /datasets/external/bib_train/graphs/all_tasks/ \
+--seed 7 \
+--batch_size 32 \
+--max_epochs 35 \
+--gpus 1 \
+--auto_select_gpus True \
+--num_workers 2 \
+--stochastic_weight_avg True \
+--lr 5e-4 \
+--check_val_every_n_epoch 1 \
+--track_grad_norm 2 \
+--gradient_clip_val 10 \
+--gnn_type RSAGEv4 \
+--state_dim 96 \
+--aggregation sum
--- a/test_tom.py
+++ b/test_tom.py
@ -0,0 +1,122 @@
+from argparse import ArgumentParser
+import numpy as np
+from tqdm import tqdm
+
+import torch
+from torch.utils.data import DataLoader
+import torch.nn.functional as F
+import dgl 
+
+from tom.dataset import TestToMnetDGLDataset, collate_function_seq_test
+from tom.model import GraphBC_T, GraphBCRNN
+
+
+def get_z_scores(total, total_expected, total_unexpected):
+    mean = np.mean(total)
+    std = np.std(total)
+    print("Z-Score expected: ",
+          (np.mean(total_expected) - mean) / std)
+    print("Z-Score unexpected: ",
+          (np.mean(total_unexpected) - mean) / std)
+
+
+parser = ArgumentParser()
+
+parser.add_argument('--model_type', type=str, default='graphbcrnn')
+parser.add_argument('--ckpt', type=str, default=None, help='path to checkpoint')
+parser.add_argument('--data_path', type=str, default=None, help='path to the data')
+parser.add_argument('--process_data', type=int, default=0)
+parser.add_argument('--surprise_type', type=str, default='max',
+                    help='surprise type: mean, max. This is used for comparing the plausibility scores of the two test episodes')
+parser.add_argument('--types', nargs='+', type=str,
+                    default=[
+                        'preference', 'multi_agent', 'inaccessible_goal',
+                        'efficiency_irrational', 'efficiency_time','efficiency_path',
+                        'instrumental_no_barrier', 'instrumental_blocking_barrier', 'instrumental_inconsequential_barrier'
+                    ],
+                    help='types of tasks used for training / testing')
+parser.add_argument('--filename', type=str, default='')
+
+args = parser.parse_args()
+
+filename = args.filename 
+
+if args.model_type == 'graphbct':
+    model = GraphBC_T.load_from_checkpoint(args.ckpt) 
+elif args.model_type == 'graphbcrnn':
+    model = GraphBCRNN.load_from_checkpoint(args.ckpt) 
+else:
+    raise ValueError('Unknown model type.')
+
+device = 'cuda'
+model.to(device)
+model.eval()
+with torch.no_grad():
+    for t in args.types:
+        if args.model_type == 'graphbcrnn':
+            test_dataset = TestToMnetDGLDataset(
+                path=args.data_path, 
+                task_type=t, 
+                mode='test'
+            )
+        test_dataloader = DataLoader(
+            test_dataset, 
+            batch_size=1, 
+            num_workers=1, 
+            pin_memory=True, 
+            collate_fn=collate_function_seq_test,
+            shuffle=False
+        )
+        count = 0
+        total, total_expected, total_unexpected = [], [], [] 
+        pbar = tqdm(test_dataloader)
+        for j, batch in enumerate(pbar):
+            if args.model_type == 'graphbcrnn':
+                dem_expected_states, dem_expected_actions, dem_expected_lens, \
+                dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, \
+                query_expected_frames, target_expected_actions, \
+                query_unexpected_frames, target_unexpected_actions = batch 
+                dem_expected_states = dem_expected_states.to(device)
+                dem_expected_actions = dem_expected_actions.to(device)
+                dem_unexpected_states = dem_unexpected_states.to(device)
+                dem_unexpected_actions = dem_unexpected_actions.to(device)
+                target_expected_actions = target_expected_actions.to(device)
+                target_unexpected_actions = target_unexpected_actions.to(device)
+            surprise_expected = []
+            query_expected_frames = dgl.unbatch(query_expected_frames)
+            for i in range(len(query_expected_frames)):
+                if args.model_type == 'graphbcrnn':
+                    test_actions, test_actions_pred = model(
+                        [dem_expected_states, dem_expected_actions, dem_expected_lens, query_expected_frames[i].to(device), target_expected_actions[:, i, :]]
+                    )
+                loss = F.mse_loss(test_actions, test_actions_pred)
+                surprise_expected.append(loss.cpu().detach().numpy())
+            mean_expected_surprise = np.mean(surprise_expected)
+            max_expected_surprise = np.max(surprise_expected)
+            
+            # calculate the plausibility scores for the unexpected episode
+            surprise_unexpected = []
+            query_unexpected_frames = dgl.unbatch(query_unexpected_frames)
+            for i in range(len(query_unexpected_frames)):
+                if args.model_type == 'graphbcrnn':
+                    test_actions, test_actions_pred = model(
+                        [dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, query_unexpected_frames[i].to(device), target_unexpected_actions[:, i, :]]
+                    )
+                loss = F.mse_loss(test_actions, test_actions_pred)
+                surprise_unexpected.append(loss.cpu().detach().numpy())
+            mean_unexpected_surprise = np.mean(surprise_unexpected)
+            max_unexpected_surprise = np.max(surprise_unexpected)
+            
+            correct_mean = mean_expected_surprise < mean_unexpected_surprise + 0.5 * (mean_expected_surprise == mean_unexpected_surprise)
+            correct_max = max_expected_surprise < max_unexpected_surprise + 0.5 * (max_expected_surprise == max_unexpected_surprise)
+            if args.surprise_type == 'max':
+                count += correct_max
+            elif args.surprise_type == 'mean':
+                count += correct_mean  
+            pbar.set_postfix({'accuracy': count/(j+1.), 'type': t})
+
+            total_expected.append(max_expected_surprise)
+            total_unexpected.append(max_unexpected_surprise)
+            total.append(max_expected_surprise)
+            total.append(max_unexpected_surprise)
+        get_z_scores(total, total_expected, total_unexpected)
--- a/tom/init.py
+++ b/tom/init.py
--- a/tom/dataset.py
+++ b/tom/dataset.py
@ -0,0 +1,310 @@
+import pickle as pkl 
+import os 
+import torch
+import torch.utils.data
+import torch.nn.functional as F 
+import dgl 
+import random 
+from dgl.data import DGLDataset
+
+
+def collate_function_seq(batch): 
+    #dem_frames = torch.stack([item[0] for item in batch])
+    dem_frames = dgl.batch([item[0] for item in batch])
+    dem_actions = torch.stack([item[1] for item in batch])
+    dem_lens = [item[2] for item in batch]
+    #query_frames = torch.stack([item[3] for item in batch])
+    query_frames = dgl.batch([item[3] for item in batch])
+    target_actions = torch.stack([item[4] for item in batch])
+    return [dem_frames, dem_actions, dem_lens, query_frames, target_actions]
+
+def collate_function_seq_test(batch):
+    dem_expected_states = dgl.batch([item[0] for item in batch][0])
+    dem_expected_actions = torch.stack([item[1] for item in batch][0]).unsqueeze(dim=0)
+    dem_expected_lens = [item[2] for item in batch]
+    #print(dem_expected_actions.size())
+    dem_unexpected_states = dgl.batch([item[3] for item in batch][0])
+    dem_unexpected_actions = torch.stack([item[4] for item in batch][0]).unsqueeze(dim=0)
+    dem_unexpected_lens = [item[5] for item in batch]
+    query_expected_frames = dgl.batch([item[6] for item in batch])
+    target_expected_actions = torch.stack([item[7] for item in batch])
+    #print(target_expected_actions.size())
+    query_unexpected_frames = dgl.batch([item[8] for item in batch])
+    target_unexpected_actions = torch.stack([item[9] for item in batch])
+    return [
+        dem_expected_states, dem_expected_actions, dem_expected_lens, \
+        dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, \
+        query_expected_frames, target_expected_actions, \
+        query_unexpected_frames, target_unexpected_actions
+    ]
+
+def collate_function_mental(batch): 
+    dem_frames = dgl.batch([item[0] for item in batch])
+    dem_actions = torch.stack([item[1] for item in batch])
+    dem_lens = [item[2] for item in batch]
+    past_test_frames = dgl.batch([item[3] for item in batch])
+    past_test_actions = torch.stack([item[4] for item in batch])
+    past_test_len = [item[5] for item in batch]
+    query_frames = dgl.batch([item[6] for item in batch])
+    target_actions = torch.stack([item[7] for item in batch])
+    return [dem_frames, dem_actions, dem_lens, past_test_frames, past_test_actions, past_test_len, query_frames, target_actions]
+
+
+class ToMnetDGLDataset(DGLDataset):
+    """
+    Training dataset class.
+    """
+    def __init__(self, path, types=None, mode="train"):
+        self.path = path
+        self.types = types
+        self.mode = mode
+        print('Mode:', self.mode)
+
+        if self.mode == 'train':
+            if len(self.types) == 4:
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats/'
+                #self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_global/'
+                #self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_local/'
+            elif len(self.types) == 3: 
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + self.types[1][0].upper() + self.types[2][0].upper() + '/' 
+                print(self.types[0][0].upper() + self.types[1][0].upper() + self.types[2][0].upper()) 
+            elif len(self.types) == 2:
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + self.types[1][0].upper() + '/' 
+                print(self.types[0][0].upper() + self.types[1][0].upper()) 
+            elif len(self.types) == 1: 
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + '/'
+            else: raise ValueError('Number of types different from 1 or 4.')
+        elif self.mode == 'val':
+            assert len(self.types) == 1  
+            self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats/' + self.types[0] + '/'
+            #self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_global/' + self.types[0] + '/'
+            #self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_local/' + self.types[0] + '/'
+        else:
+            raise ValueError 
+        
+    def get_test(self, states, actions):
+        # now states is a batched graph -> unbatch it, take the len, pick one sub-graph 
+        # randomly and select the corresponding action
+        frame_graphs = dgl.unbatch(states)
+        trial_len = len(frame_graphs)
+        query_idx = random.randint(0, trial_len - 1)
+        query_graph = frame_graphs[query_idx]
+        target_action = actions[query_idx]
+        return query_graph, target_action  
+
+    def __getitem__(self, idx):
+        with open(self.path+str(idx)+'.pkl', 'rb') as f: 
+            states, actions, lens, _ = pkl.load(f)
+        # shuffle
+        ziplist = list(zip(states, actions, lens))
+        random.shuffle(ziplist)
+        states, actions, lens = zip(*ziplist)
+        # convert tuples to lists
+        states, actions, lens = [*states], [*actions], [*lens]
+        # pick last element in the list as test and pick random frame 
+        test_s, test_a = self.get_test(states[-1], actions[-1])
+        dem_s = states[:-1]
+        dem_a = actions[:-1]
+        dem_lens = lens[:-1]
+        dem_s = dgl.batch(dem_s)
+        dem_a = torch.stack(dem_a)
+        return dem_s, dem_a, dem_lens, test_s, test_a
+
+    def __len__(self):
+        return len(os.listdir(self.path))
+
+
+class TestToMnetDGLDataset(DGLDataset):
+    """
+    Testing dataset class.
+    """
+    def __init__(self, path, task_type=None, mode="test"):
+        self.path = path
+        self.type = task_type
+        self.mode = mode
+        print('Mode:', self.mode)
+
+        if self.mode == 'test': 
+            self.path = self.path + '_dgl_hetero_nobound_4feats/' + self.type + '/'
+            #self.path = self.path + '_dgl_hetero_nobound_4feats_global/' + self.type + '/'
+            #self.path = self.path + '_dgl_hetero_nobound_4feats_local/' + self.type + '/'
+        else:
+            raise ValueError  
+        
+    def __getitem__(self, idx):
+        with open(self.path+str(idx)+'.pkl', 'rb') as f: 
+            dem_expected_states, dem_expected_actions, dem_expected_lens, _, \
+            dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, _, \
+            query_expected_frames, target_expected_actions, \
+            query_unexpected_frames, target_unexpected_actions = pkl.load(f)
+        assert len(dem_expected_states) == 8
+        assert len(dem_expected_actions) == 8
+        assert len(dem_expected_lens) == 8
+        assert len(dem_unexpected_states) == 8
+        assert len(dem_unexpected_actions) == 8
+        assert len(dem_unexpected_lens) == 8
+        assert len(dgl.unbatch(query_expected_frames)) == target_expected_actions.size()[0]
+        assert len(dgl.unbatch(query_unexpected_frames)) == target_unexpected_actions.size()[0]
+        # ignore n_nodes 
+        return dem_expected_states, dem_expected_actions, dem_expected_lens, \
+               dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, \
+               query_expected_frames, target_expected_actions, \
+               query_unexpected_frames, target_unexpected_actions
+
+    def __len__(self):
+        return len(os.listdir(self.path))
+
+
+class ToMnetDGLDatasetUndersample(DGLDataset):
+    """
+    Training dataset class for the behavior cloning mlp model.
+    """
+    def __init__(self, path, types=None, mode="train"):
+        self.path = path
+        self.types = types
+        self.mode = mode
+        print('Mode:', self.mode)
+
+        if self.mode == 'train':
+            if len(self.types) == 4:
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats/'
+            elif len(self.types) == 3: 
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + self.types[1][0].upper() + self.types[2][0].upper() + '/' 
+                print(self.types[0][0].upper() + self.types[1][0].upper() + self.types[2][0].upper()) 
+            elif len(self.types) == 2:
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + self.types[1][0].upper() + '/' 
+                print(self.types[0][0].upper() + self.types[1][0].upper()) 
+            elif len(self.types) == 1: 
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + '/'
+            else: raise ValueError('Number of types different from 1 or 4.')
+        elif self.mode == 'val':
+            assert len(self.types) == 1  
+            self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats/' + self.types[0] + '/'
+        else:
+            raise ValueError
+        print('Undersampled dataset!') 
+
+    def get_test(self, states, actions):
+        # now states is a batched graph -> unbatch it, take the len, pick one sub-graph 
+        # randomly and select the corresponding action
+        frame_graphs = dgl.unbatch(states)
+        trial_len = len(frame_graphs)
+        query_idx = random.randint(0, trial_len - 1)
+        query_graph = frame_graphs[query_idx]
+        target_action = actions[query_idx]
+        return query_graph, target_action  
+
+    def __getitem__(self, idx):
+        idx = idx + 3175
+        with open(self.path+str(idx)+'.pkl', 'rb') as f: 
+            states, actions, lens, _ = pkl.load(f)
+        # shuffle
+        ziplist = list(zip(states, actions, lens))
+        random.shuffle(ziplist)
+        states, actions, lens = zip(*ziplist)
+        # convert tuples to lists
+        states, actions, lens = [*states], [*actions], [*lens]
+        # pick last element in the list as test and pick random frame 
+        test_s, test_a = self.get_test(states[-1], actions[-1])
+        dem_s = states[:-1]
+        dem_a = actions[:-1]
+        dem_lens = lens[:-1]
+        dem_s = dgl.batch(dem_s)
+        dem_a = torch.stack(dem_a)
+        return dem_s, dem_a, dem_lens, test_s, test_a
+
+    def __len__(self):
+        return len(os.listdir(self.path)) - 3175
+
+
+class ToMnetDGLDatasetMental(DGLDataset):
+    """
+    Training dataset class.
+    """
+    def __init__(self, path, types=None, mode="train"):
+        self.path = path
+        self.types = types
+        self.mode = mode
+        print('Mode:', self.mode)
+
+        if self.mode == 'train':
+            if len(self.types) == 4:
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats/'
+            elif len(self.types) == 3: 
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + self.types[1][0].upper() + self.types[2][0].upper() + '/' 
+                print(self.types[0][0].upper() + self.types[1][0].upper() + self.types[2][0].upper()) 
+            elif len(self.types) == 2:
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + self.types[1][0].upper() + '/' 
+                print(self.types[0][0].upper() + self.types[1][0].upper()) 
+            elif len(self.types) == 1: 
+                self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats_' + self.types[0][0].upper() + '/'
+            else: raise ValueError('Number of types different from 1 or 4.')
+        elif self.mode == 'val':
+            assert len(self.types) == 1  
+            self.path = self.path + self.mode + '_dgl_hetero_nobound_4feats/' + self.types[0] + '/'
+        else:
+            raise ValueError 
+
+    def get_test(self, states, actions):
+        """
+        return: past_test_graphs, past_test_actions, test_graph, test_action
+        """
+        frame_graphs = dgl.unbatch(states)
+        trial_len = len(frame_graphs)
+        query_idx = random.randint(0, trial_len - 1)
+        test_graph = frame_graphs[query_idx]
+        test_action = actions[query_idx]
+        if query_idx > 0:
+            #past_test_actions = F.pad(past_test_actions, (0,0,0,41-query_idx), 'constant', 0) 
+            if query_idx == 1: 
+                past_test_graphs = frame_graphs[0]
+                past_test_actions = actions[:query_idx]
+                past_test_actions = F.pad(past_test_actions, (0,0,0,41-query_idx), 'constant', 0) 
+                return past_test_graphs, past_test_actions, query_idx, test_graph, test_action
+            else:
+                past_test_graphs = frame_graphs[:query_idx]
+                past_test_actions = actions[:query_idx] 
+                past_test_graphs = dgl.batch(past_test_graphs)
+                past_test_actions = F.pad(past_test_actions, (0,0,0,41-query_idx), 'constant', 0) 
+                return past_test_graphs, past_test_actions, query_idx, test_graph, test_action
+        else: 
+            test_action_ = F.pad(test_action.unsqueeze(0), (0,0,0,41-1), 'constant', 0) 
+            # NOTE: since there are no past frames, return the test frame and action and query_idx=1
+            # not sure what would be a better alternative
+            return test_graph, test_action_, 1, test_graph, test_action 
+
+    def __getitem__(self, idx):
+        with open(self.path+str(idx)+'.pkl', 'rb') as f: 
+            states, actions, lens, _ = pkl.load(f)
+        ziplist = list(zip(states, actions, lens))
+        random.shuffle(ziplist)
+        states, actions, lens = zip(*ziplist)
+        states, actions, lens = [*states], [*actions], [*lens]
+        past_test_s, past_test_a, past_test_len, test_s, test_a = self.get_test(states[-1], actions[-1])
+        dem_s = states[:-1]
+        dem_a = actions[:-1]
+        dem_lens = lens[:-1]
+        dem_s = dgl.batch(dem_s)
+        dem_a = torch.stack(dem_a)
+        return dem_s, dem_a, dem_lens, past_test_s, past_test_a, past_test_len, test_s, test_a 
+
+    def __len__(self):
+        return len(os.listdir(self.path))
+
+# --------------------------------------------------------------------------------------------------------
+
+if __name__ == "__main__":
+
+    types = [
+        'preference', 'multi_agent', 'inaccessible_goal',
+        'efficiency_irrational', 'efficiency_time','efficiency_path',
+        'instrumental_no_barrier', 'instrumental_blocking_barrier', 'instrumental_inconsequential_barrier'
+    ]
+
+    mental_dataset = ToMnetDGLDatasetMental(
+        path='/datasets/external/bib_train/graphs/all_tasks/', 
+        types=['instrumental_action'], 
+        mode='train'
+    )
+    dem_frames, dem_actions, dem_lens, past_test_frames, past_test_actions, len, test_frame, test_action = mental_dataset.__getitem__(99)
+    breakpoint()
--- a/tom/gnn.py
+++ b/tom/gnn.py
@ -0,0 +1,877 @@
+import dgl.nn.pytorch as dglnn
+import torch.nn as nn
+import torch.nn.functional as F 
+import torch  
+import dgl 
+import sys
+import copy 
+
+from wandb import agent 
+sys.path.append('/projects/bortoletto/irene/')
+from tom.norm import Norm 
+
+
+class RSAGEv4(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels))
+        self.embedding_color = nn.Linear(3, int(hidden_channels))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*3, hidden_channels)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*2, hidden_channels)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.SAGEConv(
+                    in_feats=hidden_channels, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    aggregator_type='lstm',
+                    feat_drop=dropout,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RAGNNv4(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels))
+        self.embedding_color = nn.Linear(3, int(hidden_channels))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*3, hidden_channels)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*2, hidden_channels)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.AGNNConv()
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+    
+# -------------------------------------------------------------------------------------------
+
+class RGATv2(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Embedding(9, int(hidden_channels*num_heads/4))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads/4))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads/4))
+        self.embedding_shape = nn.Embedding(18, int(hidden_channels*num_heads/4))
+        self.combine = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        feats = []
+        feats.append(self.embedding_type(torch.argmax(g.ndata['type'], dim=1)))
+        feats.append(self.embedding_pos(g.ndata['pos']/170.))
+        feats.append(self.embedding_color(g.ndata['color']/255.))
+        feats.append(self.embedding_shape(torch.argmax(g.ndata['shape'], dim=1)))
+        h = {'obj': self.combine(torch.cat(feats, dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out 
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv3(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*4, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        feats = []
+        feats.append(self.embedding_type(g.ndata['type'].float()))
+        feats.append(self.embedding_pos(g.ndata['pos']/170.)) # NOTE: this should be 180 because I remove the boundary walls!
+        feats.append(self.embedding_color(g.ndata['color']/255.))
+        feats.append(self.embedding_shape(g.ndata['shape'].float()))
+        h = {'obj': self.combine(torch.cat(feats, dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGCNv2(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ReLU()
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels))
+        self.embedding_color = nn.Linear(3, int(hidden_channels))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels))
+        self.combine = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*4, hidden_channels)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.RelGraphConv(
+                in_feat=hidden_channels, 
+                out_feat=hidden_channels, 
+                num_rels=len(rel_names), 
+                regularizer=None, 
+                num_bases=None, 
+                bias=True, 
+                activation=activation, 
+                self_loop=True, 
+                dropout=dropout, 
+                layer_norm=False
+            )    
+        for _ in range(n_layers-1)])
+        self.layers.append(
+            dglnn.RelGraphConv(
+                in_feat=hidden_channels, 
+                out_feat=out_channels, 
+                num_rels=len(rel_names), 
+                regularizer=None, 
+                num_bases=None, 
+                bias=True, 
+                activation=activation, 
+                self_loop=True, 
+                dropout=dropout, 
+                layer_norm=False
+            )    
+        )
+
+    def forward(self, g):
+        g = g.to_homogeneous()
+        feats = []
+        feats.append(self.embedding_type(g.ndata['type'].float()))
+        feats.append(self.embedding_pos(g.ndata['pos']/170.))
+        feats.append(self.embedding_color(g.ndata['color']/255.))
+        feats.append(self.embedding_shape(g.ndata['shape'].float()))
+        h = self.combine(torch.cat(feats, dim=1))
+        for conv in self.layers:
+            h = conv(g, h, g.etypes) 
+        with g.local_scope():
+            g.ndata['h'] = h
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv3Agent(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*4, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        agent_mask = g.ndata['type'][:, 0] == 1 
+        feats = []
+        feats.append(self.embedding_type(g.ndata['type'].float()))
+        feats.append(self.embedding_pos(g.ndata['pos']/200.))
+        feats.append(self.embedding_color(g.ndata['color']/255.))
+        feats.append(self.embedding_shape(g.ndata['shape'].float()))
+        h = {'obj': self.combine(torch.cat(feats, dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = g.ndata['h'][agent_mask, :]
+            ctx = dgl.mean_nodes(g, 'h')
+        return out + ctx 
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv4(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*3, hidden_channels*num_heads)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*num_heads*2, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    share_weights=False, 
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv4Norm(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*3, hidden_channels*num_heads)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*num_heads*2, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+        self.norms = nn.ModuleList([
+            Norm(
+                norm_type='gn', 
+                hidden_dim=hidden_channels*num_heads if l < n_layers - 1 else out_channels
+            ) 
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+            h = {k: self.norms[l](g, v) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv3Norm(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*4, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+        self.norms = nn.ModuleList([
+            Norm(
+                norm_type='gn', 
+                hidden_dim=hidden_channels*num_heads if l < n_layers - 1 else out_channels
+            ) 
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        feats = []
+        feats.append(self.embedding_type(g.ndata['type'].float()))
+        feats.append(self.embedding_pos(g.ndata['pos']/170.)) 
+        feats.append(self.embedding_color(g.ndata['color']/255.))
+        feats.append(self.embedding_shape(g.ndata['shape'].float()))
+        h = {'obj': self.combine(torch.cat(feats, dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+            h = {k: self.norms[l](g, v) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv4Agent(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*3, hidden_channels*num_heads)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*num_heads*2, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+        self.combine_agent_context = nn.Linear(out_channels*2, out_channels)
+
+    def forward(self, g):
+        agent_mask = g.ndata['type'][:, 0] == 1 
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            h_a = g.ndata['h'][agent_mask, :]
+            g_no_agent = copy.deepcopy(g)
+            g_no_agent.remove_nodes([i for i, x in enumerate(agent_mask) if x])
+            h_g = dgl.mean_nodes(g_no_agent, 'h')
+            out = self.combine_agent_context(torch.cat((h_a, h_g), dim=1))
+            return out 
+
+# -------------------------------------------------------------------------------------------
+
+class RGCNv4(nn.Module):
+    # multi-layer GNN for one single feature
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        rel_names,
+        n_layers, 
+        activation=nn.ELU(), 
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels))
+        self.embedding_color = nn.Linear(3, int(hidden_channels))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*3, hidden_channels)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*2, hidden_channels)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GraphConv(
+                    in_feats=hidden_channels, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv5(nn.Module):
+
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.hidden_channels = hidden_channels
+        self.num_heads = num_heads
+        self.embedding_type = nn.Linear(9, hidden_channels*num_heads)
+        self.embedding_pos = nn.Linear(2, hidden_channels*num_heads)
+        self.embedding_color = nn.Linear(3, hidden_channels*num_heads)
+        self.embedding_shape = nn.Linear(18, hidden_channels*num_heads)
+        self.combine = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*4, hidden_channels*num_heads)
+        )
+        self.attention = nn.Linear(hidden_channels*num_heads*4, 4)
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads, 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+
+    def forward(self, g):
+        feats = []
+        feats.append(self.embedding_type(g.ndata['type'].float()))
+        feats.append(self.embedding_pos(g.ndata['pos']/170.)) 
+        feats.append(self.embedding_color(g.ndata['color']/255.))
+        feats.append(self.embedding_shape(g.ndata['shape'].float()))
+        h = torch.cat(feats, dim=1) 
+        feat_attn = F.softmax(self.attention(h), dim=1)
+        h = h * feat_attn.repeat_interleave(self.hidden_channels*self.num_heads, dim=1)
+        h_in = self.combine(h)
+        h = {'obj': h_in}
+        for conv in self.layers:
+            h = conv(g, h) 
+            h = {k: v.flatten(1) for k, v in h.items()}
+            #if l != len(self.layers) - 1:
+            #    h = {k: v.flatten(1) for k, v in h.items()}
+            #else:
+            #    h = {k: v.mean(1) for k, v in h.items()}
+        h = {k: v + h_in for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            out = dgl.mean_nodes(g, 'h')
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv6(nn.Module):
+
+    # RGATv6 = RGATv4 + Global Attention Pooling  
+
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*3, hidden_channels*num_heads)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*num_heads*2, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+        gate_nn = nn.Linear(out_channels, 1)
+        self.gap = dglnn.GlobalAttentionPooling(gate_nn) 
+
+    def forward(self, g):
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+        #with g.local_scope():
+            #g.ndata['h'] = h['obj']
+            #out = dgl.mean_nodes(g, 'h')
+        out = self.gap(g, h['obj'])
+        return out
+
+# -------------------------------------------------------------------------------------------
+
+class RGATv6Agent(nn.Module):
+
+    # RGATv6 = RGATv4 + Global Attention Pooling  
+
+    def __init__(
+        self, 
+        hidden_channels,
+        out_channels, 
+        num_heads,
+        rel_names,
+        dropout,
+        n_layers, 
+        activation=nn.ELU(), 
+        residual=False
+    ):
+        super().__init__()
+        self.embedding_type = nn.Linear(9, int(hidden_channels*num_heads))
+        self.embedding_pos = nn.Linear(2, int(hidden_channels*num_heads))
+        self.embedding_color = nn.Linear(3, int(hidden_channels*num_heads))
+        self.embedding_shape = nn.Linear(18, int(hidden_channels*num_heads))
+        self.combine_attr = nn.Sequential(
+            nn.ReLU(), 
+            nn.Linear(hidden_channels*num_heads*3, hidden_channels*num_heads)
+        )
+        self.combine_pos = nn.Sequential(
+            nn.ReLU(),
+            nn.Linear(hidden_channels*num_heads*2, hidden_channels*num_heads)
+        )
+        self.layers = nn.ModuleList([
+            dglnn.HeteroGraphConv({
+                rel: dglnn.GATv2Conv(
+                    in_feats=hidden_channels*num_heads, 
+                    out_feats=hidden_channels if l < n_layers - 1 else out_channels, 
+                    num_heads=num_heads if l < n_layers - 1 else 1, # TODO: change to num_heads always 
+                    feat_drop=dropout, 
+                    attn_drop=dropout,
+                    residual=residual,
+                    activation=activation if l < n_layers - 1 else None 
+                )
+                for rel in rel_names}, aggregate='sum')
+            for l in range(n_layers)
+        ])
+        gate_nn = nn.Linear(out_channels, 1)
+        self.gap = dglnn.GlobalAttentionPooling(gate_nn) 
+        self.combine_agent_context = nn.Linear(out_channels*2, out_channels)
+
+    def forward(self, g):
+        agent_mask = g.ndata['type'][:, 0] == 1 
+        attr = []
+        attr.append(self.embedding_type(g.ndata['type'].float()))
+        pos = self.embedding_pos(g.ndata['pos']/170.) 
+        attr.append(self.embedding_color(g.ndata['color']/255.))
+        attr.append(self.embedding_shape(g.ndata['shape'].float()))
+        combined_attr = self.combine_attr(torch.cat(attr, dim=1))
+        h = {'obj': self.combine_pos(torch.cat((pos, combined_attr), dim=1))}
+        for l, conv in enumerate(self.layers):
+            h = conv(g, h) 
+            if l != len(self.layers) - 1:
+                h = {k: v.flatten(1) for k, v in h.items()}
+            else:
+                h = {k: v.mean(1) for k, v in h.items()}
+        with g.local_scope():
+            g.ndata['h'] = h['obj']
+            h_a = g.ndata['h'][agent_mask, :]
+            h_g = g.ndata['h'][~agent_mask, :]
+            g_no_agent = copy.deepcopy(g)
+            g_no_agent.remove_nodes([i for i, x in enumerate(agent_mask) if x])
+            h_g = self.gap(g_no_agent, h_g) # dgl.mean_nodes(g_no_agent, 'h')
+            out = self.combine_agent_context(torch.cat((h_a, h_g), dim=1))
+        return out
+
+# -------------------------------------------------------------------------------------------
+
--- a/tom/model.py
+++ b/tom/model.py
@ -0,0 +1,513 @@
+from argparse import ArgumentParser
+import torch
+import torch.nn as nn 
+import torch.nn.functional as F
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks import ModelCheckpoint
+from torch.utils.data import DataLoader
+
+from tom.dataset import *
+from tom.transformer import TransformerEncoder
+from tom.gnn import RGATv2, RGATv3, RGATv3Agent, RGATv4, RGATv4Norm, RSAGEv4, RAGNNv4
+
+
+class MlpModel(nn.Module):
+    """Multilayer Perceptron with last layer linear.
+    Args:
+        input_size (int): number of inputs
+        hidden_sizes (list): can be empty list for none (linear model).
+        output_size: linear layer at output, or if ``None``, the last hidden size 
+                     will be the output size and will have nonlinearity applied
+        nonlinearity: torch nonlinearity Module (not Functional).
+    """
+
+    def __init__(
+            self,
+            input_size,
+            hidden_sizes,  # Can be empty list or None for none.
+            output_size=None,  # if None, last layer has nonlinearity applied.
+            nonlinearity=nn.ReLU,  # Module, not Functional.
+            dropout=None  # Dropout value
+    ):
+        super().__init__()
+        if isinstance(hidden_sizes, int):
+            hidden_sizes = [hidden_sizes]
+        elif hidden_sizes is None:
+            hidden_sizes = []
+        hidden_layers = [nn.Linear(n_in, n_out) for n_in, n_out in
+                         zip([input_size] + hidden_sizes[:-1], hidden_sizes)]
+        sequence = list()
+        for i, layer in enumerate(hidden_layers):
+            if dropout is not None:
+                sequence.extend([layer, nonlinearity(), nn.Dropout(dropout)])
+            else:
+                sequence.extend([layer, nonlinearity()])
+
+        if output_size is not None:
+            last_size = hidden_sizes[-1] if hidden_sizes else input_size
+            sequence.append(torch.nn.Linear(last_size, output_size))
+        self.model = nn.Sequential(*sequence)
+        self._output_size = (hidden_sizes[-1] if output_size is None
+                             else output_size)
+
+    def forward(self, input):
+        """Compute the model on the input, assuming input shape [B,input_size]."""
+        return self.model(input)
+
+    @property
+    def output_size(self):
+        """Retuns the output size of the model."""
+        return self._output_size
+
+# ---------------------------------------------------------------------------------------------------------------------------------
+
+class GraphBCRNN(pl.LightningModule):
+    """
+    Implementation of the baseline model for the BC-RNN algorithm.
+    R-GCN + LSTM are used to encode the familiarization trials
+    """
+    @staticmethod
+    def add_model_specific_args(parent_parser):
+        parser = ArgumentParser(parents=[parent_parser], add_help=False)
+        parser.add_argument('--lr', type=float, default=1e-4)
+        parser.add_argument('--action_dim', type=int, default=2)
+        parser.add_argument('--context_dim', type=int, default=32) # lstm hidden size
+        parser.add_argument('--beta', type=float, default=0.01)
+        parser.add_argument('--dropout', type=float, default=0.2)
+        parser.add_argument('--process_data', type=int, default=0)
+        parser.add_argument('--max_len', type=int, default=30)
+        # arguments for gnn 
+        parser.add_argument('--gnn_type', type=str, default='RGATv4')
+        parser.add_argument('--state_dim', type=int, default=128) # gnn out_feats
+        parser.add_argument('--feats_dims', type=list, default=[9, 2, 3, 18])
+        parser.add_argument('--aggregation', type=str, default='sum')
+        # arguments for mpl
+        #parser.add_argument('--mpl_hid_feats', type=list, default=[256, 64, 16])
+        return parser
+
+    def __init__(self, hparams):
+        super().__init__()
+        
+        self.hparams = hparams
+        self.lr = self.hparams.lr
+        self.state_dim = self.hparams.state_dim
+        self.action_dim = self.hparams.action_dim
+        self.context_dim = self.hparams.context_dim 
+        self.beta = self.hparams.beta
+        self.dropout = self.hparams.dropout
+        self.max_len = self.hparams.max_len
+        self.feats_dims = self.hparams.feats_dims # type, position, color, shape 
+        self.rel_names = [
+            'is_aligned', 'is_back', 'is_close', 'is_down_adj', 'is_down_left_adj',
+            'is_down_right_adj', 'is_front', 'is_left', 'is_left_adj', 'is_right',
+            'is_right_adj', 'is_top_adj', 'is_top_left_adj', 'is_top_right_adj'
+        ]
+        self.gnn_aggregation = self.hparams.aggregation
+        
+        if self.hparams.gnn_type == 'RGATv2':
+            self.gnn_encoder = RGATv2(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                num_heads=4,
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2, 
+                activation=nn.ELU(), 
+                residual=False
+            )
+        if self.hparams.gnn_type == 'RGATv3':
+            self.gnn_encoder = RGATv3(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                num_heads=4,
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2, 
+                activation=nn.ELU(), 
+                residual=False
+            )
+        if self.hparams.gnn_type == 'RGATv4':
+            self.gnn_encoder = RGATv4(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                num_heads=4,
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2, 
+                activation=nn.ELU(), 
+                residual=False
+            )
+        if self.hparams.gnn_type == 'RGATv3Agent':
+            self.gnn_encoder = RGATv3Agent(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                num_heads=4,
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2, 
+                activation=nn.ELU(), 
+                residual=False
+            )
+        if self.hparams.gnn_type == 'RSAGEv4':
+            self.gnn_encoder = RSAGEv4(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2,
+                activation=nn.ELU()
+            )
+        if self.gnn_aggregation == 'cat_axis_1':
+            self.lstm_input_size = self.state_dim * len(self.feats_dims) + self.action_dim
+            self.mlp_input_size = self.state_dim * len(self.feats_dims) + self.context_dim * 2
+        elif self.gnn_aggregation == 'sum':
+            self.lstm_input_size = self.state_dim + self.action_dim
+            self.mlp_input_size = self.state_dim + self.context_dim * 2
+        else:
+            raise ValueError('Fix this')
+
+        self.context_enc = nn.LSTM(self.lstm_input_size, self.context_dim, 2, 
+                                         batch_first=True, bidirectional=True)
+
+        self.policy = MlpModel(input_size=self.mlp_input_size, hidden_sizes=[256, 128, 256],
+                               output_size=self.action_dim, dropout=self.dropout)
+        
+        self.past_samples = []
+
+    def forward(self, batch):
+        dem_frames, dem_actions, dem_lens, query_frame, target_action = batch
+        dem_actions = dem_actions.float()
+        target_action = target_action.float()
+        dem_states = self.gnn_encoder(dem_frames) # torch.Size([number of frames, 128 * number of features if cat_axis_1])
+        # segment according the number of frames in each episode and pad with zeros
+        # to obtain tensors of shape [batch size, num of trials (8), max num of frames (30), hidden dim]
+        b, l, s, _ = dem_actions.size()
+        dem_states_new = []
+        for batch in range(b):
+            dem_states_new.append(self._sequence_to_padding(dem_states, dem_lens[batch], self.max_len))
+        dem_states_new = torch.stack(dem_states_new).to(self.device) # torch.Size([batchsize, 8, 30, 128 * number of features if cat_axis_1])
+        # concatenate states and actions to get expert trajectory
+        dem_states_new = dem_states_new.view(b * l, s, -1) # torch.Size([batchsize*8, 30, 128 * number of features if cat_axis_1])
+        dem_actions = dem_actions.view(b * l, s, -1) # torch.Size([batchsize*8, 30, 128])
+        dem_traj = torch.cat([dem_states_new, dem_actions], dim=2) # torch.Size([batchsize*8, 30, 2 + 128 * number of features if cat_axis_1])
+        # embed expert trajectory to get a context embedding batch x samples x dim
+        dem_lens = torch.tensor([t for dl in dem_lens for t in dl]).to(torch.int64).cpu()
+        dem_lens = dem_lens.view(-1)
+        x_lstm = nn.utils.rnn.pack_padded_sequence(dem_traj, dem_lens, batch_first=True, enforce_sorted=False)
+        x_lstm, _ = self.context_enc(x_lstm)
+        x_lstm, _ = nn.utils.rnn.pad_packed_sequence(x_lstm, batch_first=True)
+        x_out = x_lstm[:, -1, :]
+        x_out = x_out.view(b, l, -1)
+        context = torch.mean(x_out, dim=1) # torch.Size([batchsize, 64]) (64=32*2)
+        # concat context embedding to the state embedding of test trajectory
+        test_states = self.gnn_encoder(query_frame) # torch.Size([2, 128])
+        test_context_states = torch.cat([context, test_states], dim=1) # torch.Size([batchsize, 192]) 192=64+128
+        # for each state in the test states calculate action
+        test_actions_pred = torch.tanh(self.policy(test_context_states)) # torch.Size([batchsize, 2])
+        return target_action, test_actions_pred
+
+    def _sequence_to_padding(self, x, lengths, max_length): 
+        # declare the shape, it can work for x of any shape.
+        ret_tensor = torch.zeros((len(lengths), max_length) + tuple(x.shape[1:])) 
+        cum_len = 0  
+        for i, l in enumerate(lengths): 
+            ret_tensor[i, :l] = x[cum_len: cum_len+l] 
+            cum_len += l 
+        return ret_tensor 
+
+    def training_step(self, batch, batch_idx):
+        test_actions, test_actions_pred = self.forward(batch)
+        loss = F.mse_loss(test_actions, test_actions_pred)
+        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
+        return loss
+
+    def validation_step(self, batch, batch_idx, dataloader_idx=0):
+        test_actions, test_actions_pred = self.forward(batch)
+        loss = F.mse_loss(test_actions, test_actions_pred)
+        self.log('val_loss', loss, on_epoch=True, logger=True)
+
+    def configure_optimizers(self):
+        optim = torch.optim.Adam(self.parameters(), lr=self.lr)
+        return optim 
+        #optim = torch.optim.AdamW(self.parameters(), lr=self.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
+        #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optim, gamma=0.8)
+        #return [optim], [scheduler]
+
+    def train_dataloader(self): 
+        train_dataset = ToMnetDGLDataset(path=self.hparams.data_path, 
+                                         types=self.hparams.types, 
+                                         mode='train')
+        train_loader = DataLoader(dataset=train_dataset, 
+                                  batch_size=self.hparams.batch_size,
+                                  collate_fn=collate_function_seq, 
+                                  num_workers=self.hparams.num_workers,
+                                  #pin_memory=True, 
+                                  shuffle=True)
+        return train_loader
+
+    def val_dataloader(self): 
+        val_datasets = []
+        val_loaders = []
+        for t in self.hparams.types:
+            val_datasets.append(ToMnetDGLDataset(path=self.hparams.data_path, 
+                                                 types=[t], 
+                                                 mode='val'))
+            val_loaders.append(DataLoader(dataset=val_datasets[-1], 
+                                          batch_size=self.hparams.batch_size,
+                                          collate_fn=collate_function_seq, 
+                                          num_workers=self.hparams.num_workers,
+                                          #pin_memory=True, 
+                                          shuffle=False))
+        return val_loaders
+
+    def configure_callbacks(self):
+        checkpoint = ModelCheckpoint(
+            dirpath=None, # automatically set 
+            #filename=self.params['bc_model']+'-'+self.params['gnn_type']+'-'+self.gnn_params['feats_aggr']+'-{epoch:02d}',
+            save_top_k=-1,
+            period=1
+        )
+        return [checkpoint]
+
+# ---------------------------------------------------------------------------------------------------------------------------------
+
+class GraphBC_T(pl.LightningModule):
+    """
+    BC model with GraphTrans encoder, LSTM and MLP.
+    """
+    @staticmethod
+    def add_model_specific_args(parent_parser):
+        parser = ArgumentParser(parents=[parent_parser], add_help=False)
+        parser.add_argument('--lr', type=float, default=1e-4)
+        parser.add_argument('--action_dim', type=int, default=2)
+        parser.add_argument('--context_dim', type=int, default=32) # lstm hidden size
+        parser.add_argument('--beta', type=float, default=0.01)
+        parser.add_argument('--dropout', type=float, default=0.2)
+        parser.add_argument('--process_data', type=int, default=0)
+        parser.add_argument('--max_len', type=int, default=30)
+        # arguments for gnn 
+        parser.add_argument('--state_dim', type=int, default=128) # gnn out_feats
+        parser.add_argument('--feats_dims', type=list, default=[9, 2, 3, 18])
+        parser.add_argument('--aggregation', type=str, default='cat_axis_1')
+        parser.add_argument('--gnn_type', type=str, default='RGATv3')
+        # arguments for mpl
+        #parser.add_argument('--mpl_hid_feats', type=list, default=[256, 64, 16])
+        # arguments for transformer
+        parser.add_argument('--d_model', type=int, default=128)
+        parser.add_argument('--nhead', type=int, default=4) 
+        parser.add_argument('--dim_feedforward', type=int, default=512) 
+        parser.add_argument('--transformer_dropout', type=float, default=0.3)
+        parser.add_argument('--transformer_activation', type=str, default='gelu') 
+        parser.add_argument('--num_encoder_layers', type=int, default=6) 
+        parser.add_argument('--transformer_norm_input', type=int, default=0) 
+        return parser
+    
+    def __init__(self, hparams):
+        super().__init__()
+
+        self.hparams = hparams
+        self.lr = self.hparams.lr
+        self.state_dim = self.hparams.state_dim
+        self.action_dim = self.hparams.action_dim
+        self.context_dim = self.hparams.context_dim 
+        self.beta = self.hparams.beta
+        self.dropout = self.hparams.dropout
+        self.max_len = self.hparams.max_len
+        self.feats_dims = self.hparams.feats_dims # type, position, color, shape 
+        
+        self.rel_names = [
+            'is_aligned', 'is_back', 'is_close', 'is_down_adj', 'is_down_left_adj',
+            'is_down_right_adj', 'is_front', 'is_left', 'is_left_adj', 'is_right',
+            'is_right_adj', 'is_top_adj', 'is_top_left_adj', 'is_top_right_adj'
+        ]
+        self.gnn_aggregation = self.hparams.aggregation
+        if self.hparams.gnn_type == 'RGATv3':
+            self.gnn_encoder = RGATv3(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                num_heads=4,
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2, 
+                activation=nn.ELU(), 
+                residual=False
+            )
+        if self.hparams.gnn_type == 'RGATv4':
+            self.gnn_encoder = RGATv4(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                num_heads=4,
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2, 
+                activation=nn.ELU(), 
+                residual=False
+            )
+        if self.hparams.gnn_type == 'RSAGEv4':
+            self.gnn_encoder = RSAGEv4(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2,
+                activation=nn.ELU()
+            )
+        if self.hparams.gnn_type == 'RAGNNv4':
+            self.gnn_encoder = RAGNNv4(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2,
+                activation=nn.ELU()
+            )
+        if self.hparams.gnn_type == 'RGATv4Norm':
+            self.gnn_encoder = RGATv4Norm(
+                hidden_channels=self.state_dim,
+                out_channels=self.state_dim, 
+                num_heads=4,
+                rel_names=self.rel_names,
+                dropout=0.0,
+                n_layers=2, 
+                activation=nn.ELU(), 
+                residual=True
+            )
+        self.d_model = self.hparams.d_model
+        self.nhead = self.hparams.nhead
+        self.dim_feedforward = self.hparams.dim_feedforward
+        self.transformer_dropout = self.hparams.transformer_dropout
+        self.transformer_activation = self.hparams.transformer_activation
+        self.num_encoder_layers = self.hparams.num_encoder_layers
+        self.transformer_norm_input = self.hparams.transformer_norm_input
+        self.context_enc = TransformerEncoder(
+            self.d_model, 
+            self.nhead, 
+            self.dim_feedforward, 
+            self.transformer_dropout,
+            self.transformer_activation, 
+            self.num_encoder_layers, 
+            self.max_len,
+            self.transformer_norm_input
+        )
+
+        if self.gnn_aggregation == 'cat_axis_1':
+            self.gnn2transformer = nn.Linear(self.state_dim * len(self.feats_dims) + self.action_dim, self.d_model) 
+            self.mlp_input_size =  len(self.feats_dims) * self.state_dim +  self.d_model
+        elif self.gnn_aggregation == 'sum':
+            self.gnn2transformer = nn.Linear(self.state_dim + self.action_dim, self.d_model)  
+            self.mlp_input_size = self.state_dim + self.d_model
+        else:
+            raise ValueError('Only sum and cat1 aggregations are available.')
+
+        self.policy = MlpModel(input_size=self.mlp_input_size, hidden_sizes=[256, 128, 256],
+                               output_size=self.action_dim, dropout=self.dropout)
+
+        # CLS Embedding parameters, requires_grad=True
+        self.embedding = nn.Embedding(self.max_len + 1, self.d_model) # + 1 cause of cls token  
+        self.emb_layer_norm = nn.LayerNorm(self.d_model)
+        self.emb_dropout =  nn.Dropout(p=self.transformer_dropout)
+
+    def forward(self, batch):
+        dem_frames, dem_actions, dem_lens, query_frame, target_action = batch
+        dem_actions = dem_actions.float()
+        target_action = target_action.float()
+        dem_states = self.gnn_encoder(dem_frames) 
+        b, l, s, _ = dem_actions.size()
+        dem_lens = torch.tensor([t for dl in dem_lens for t in dl]).to(torch.int64).cpu()
+        dem_lens = dem_lens.view(-1) 
+        dem_actions_packed = torch.nn.utils.rnn.pack_padded_sequence(dem_actions.view(b*l, s, -1), dem_lens, batch_first=True, enforce_sorted=False)[0]
+        dem_traj = torch.cat([dem_states, dem_actions_packed], dim=1) 
+        h_node = self.gnn2transformer(dem_traj)
+        hidden_dim = h_node.size()[1] 
+        padded_trajectory = torch.zeros(b*l, self.max_len, hidden_dim).to(self.device) 
+        j = 0
+        for idx, i in enumerate(dem_lens):
+            padded_trajectory[idx][:i] = h_node[j:j+i]
+            j += i
+        mask = self.make_mask(padded_trajectory).to(self.device) 
+        transformer_input = padded_trajectory.transpose(0, 1) # [30, 16, 128]
+        # add cls:
+        cls_embedding = nn.Parameter(torch.randn([1, 1, self.d_model], requires_grad=True)).expand(1, b*l, -1).to(self.device)
+        transformer_input = torch.cat([transformer_input, cls_embedding], dim=0)  # [31, 16, 128]
+        zeros = mask.data.new(mask.size(0), 1).fill_(0) 
+        mask = torch.cat([mask, zeros], dim=1)    
+        # Embed    
+        indices = torch.arange(self.max_len + 1, dtype=torch.int).to(self.device) # + 1 cause of cls [0, 1, ..., 30]
+        positional_embeddings = self.embedding(indices).unsqueeze(1) # torch.Size([31, 1, 128])
+        #generate transformer input
+        pe_input = positional_embeddings + transformer_input # torch.Size([31, 16, 128])
+        # Layernorm and dropout
+        transformer_in = self.emb_dropout(self.emb_layer_norm(pe_input)) # torch.Size([31, 16, 128])
+        # transformer encoding and output parsing      
+        out, _ = self.context_enc(transformer_in, mask)  # [31, 16, 128]   
+        cls = out[-1]
+        cls = cls.view(b, l, -1) # 2, 8, 128
+        context = torch.mean(cls, dim=1)      
+        # CLASSIFICATION
+        test_states = self.gnn_encoder(query_frame) # torch.Size([2, 512])
+        test_context_states = torch.cat([context, test_states], dim=1) # torch.Size([batchsize, hidden_dim + lstm_hidden_dim]) 192=512+128 
+        # for each state in the test states calculate action
+        x = self.policy(test_context_states)
+        test_actions_pred = torch.tanh(x) # torch.Size([batchsize, 2])
+        #test_actions_pred = torch.tanh(self.policy(test_context_states)) # torch.Size([batchsize, 2])
+        return target_action, test_actions_pred
+
+    def make_mask(self, feature):     
+        return (torch.sum(
+            torch.abs(feature),
+            dim=-1     
+        ) == 0)#.unsqueeze(1).unsqueeze(2) 
+
+    def training_step(self, batch, batch_idx):
+        test_actions, test_actions_pred = self.forward(batch)
+        loss = F.mse_loss(test_actions, test_actions_pred)
+        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
+        return loss
+
+    def validation_step(self, batch, batch_idx, dataloader_idx=0):
+        test_actions, test_actions_pred = self.forward(batch)
+        loss = F.mse_loss(test_actions, test_actions_pred)
+        self.log('val_loss', loss, on_epoch=True, logger=True)
+    
+    def configure_optimizers(self):
+        optim = torch.optim.Adam(self.parameters(), lr=self.lr)
+        return optim
+        #optim = torch.optim.AdamW(self.parameters(), lr=self.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01)
+        #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optim, gamma=0.96)
+        #return [optim], [scheduler]
+
+    def train_dataloader(self): 
+        train_dataset = ToMnetDGLDataset(path=self.hparams.data_path, 
+                                         types=self.hparams.types, 
+                                         mode='train')
+        train_loader = DataLoader(dataset=train_dataset, 
+                                  batch_size=self.hparams.batch_size,
+                                  collate_fn=collate_function_seq, 
+                                  num_workers=self.hparams.num_workers,
+                                  #pin_memory=True, 
+                                  shuffle=True)
+        return train_loader
+
+    def val_dataloader(self): 
+        val_datasets = []
+        val_loaders = []
+        for t in self.hparams.types:
+            val_datasets.append(ToMnetDGLDataset(path=self.hparams.data_path, 
+                                                 types=[t], 
+                                                 mode='val'))
+            val_loaders.append(DataLoader(dataset=val_datasets[-1], 
+                                          batch_size=self.hparams.batch_size,
+                                          collate_fn=collate_function_seq, 
+                                          num_workers=self.hparams.num_workers,
+                                          #pin_memory=True, 
+                                          shuffle=False))
+        return val_loaders
+
+    def configure_callbacks(self):
+        checkpoint = ModelCheckpoint(
+            dirpath=None, # automatically set 
+            #filename=self.params['bc_model']+'-'+self.params['gnn_type']+'-'+self.gnn_params['feats_aggr']+'-{epoch:02d}',
+            save_top_k=-1,
+            period=1
+        )
+        return [checkpoint]
--- a/tom/norm.py
+++ b/tom/norm.py
@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+
+
+class Norm(nn.Module):
+
+    def __init__(self, norm_type, hidden_dim=64, print_info=None):
+        super(Norm, self).__init__()
+        
+        # assert norm_type in ['bn', 'ln', 'gn', None]
+        self.norm = None
+        self.print_info = print_info
+        if norm_type == 'bn':
+            self.norm = nn.BatchNorm1d(hidden_dim)
+        elif norm_type == 'gn':
+            self.norm = norm_type
+            self.weight = nn.Parameter(torch.ones(hidden_dim))
+            self.bias = nn.Parameter(torch.zeros(hidden_dim))
+
+            self.mean_scale = nn.Parameter(torch.ones(hidden_dim))
+
+    def forward(self, graph, tensor, print_=False):
+        
+        if self.norm is not None and type(self.norm) != str:
+            return self.norm(tensor)
+        elif self.norm is None:
+            return tensor
+        
+        batch_list = graph.batch_num_nodes('obj')
+        batch_size = len(batch_list)
+        #batch_list = torch.tensor(batch_list).long().to(tensor.device)
+        batch_list = batch_list.long().to(tensor.device)
+        batch_index = torch.arange(batch_size).to(tensor.device).repeat_interleave(batch_list)
+        batch_index = batch_index.view((-1,) + (1,) * (tensor.dim() - 1)).expand_as(tensor)
+        mean = torch.zeros(batch_size, *tensor.shape[1:]).to(tensor.device)
+        mean = mean.scatter_add_(0, batch_index, tensor)
+        mean = (mean.T / batch_list).T
+        mean = mean.repeat_interleave(batch_list, dim=0)
+
+        sub = tensor - mean * self.mean_scale
+
+        std = torch.zeros(batch_size, *tensor.shape[1:]).to(tensor.device)
+        std = std.scatter_add_(0, batch_index, sub.pow(2))
+        std = ((std.T / batch_list).T + 1e-6).sqrt()
+        std = std.repeat_interleave(batch_list, dim=0)
+        return self.weight * sub / std + self.bias
--- a/tom/transformer.py
+++ b/tom/transformer.py
@ -0,0 +1,89 @@
+import torch 
+import torch.nn as nn
+import math
+
+
+class PositionalEncoding(nn.Module):
+
+    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
+        super().__init__()
+
+        self.dropout = nn.Dropout(p=dropout)
+
+        position = torch.arange(max_len).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
+        pe = torch.zeros(max_len, 1, d_model)
+        pe[:, 0, 0::2] = torch.sin(position * div_term)
+        pe[:, 0, 1::2] = torch.cos(position * div_term)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: Tensor, shape [seq_len, batch_size, embedding_dim]
+        """
+        x = x + self.pe[:x.size(0)]
+        return self.dropout(x)
+
+
+class TransformerEncoder(nn.Module):
+
+    def __init__(
+            self, 
+            d_model, 
+            nhead, 
+            dim_feedforward, 
+            transformer_dropout, 
+            transformer_activation, 
+            num_encoder_layers, 
+            max_input_len, 
+            transformer_norm_input
+        ):
+        super().__init__()
+        self.d_model = d_model
+        self.num_layer = num_encoder_layers
+        self.max_input_len = max_input_len
+
+        # Creating Transformer Encoder Model
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=d_model, 
+            nhead=nhead, 
+            dim_feedforward=dim_feedforward, 
+            dropout=transformer_dropout, 
+            activation=transformer_activation
+        )
+        encoder_norm = nn.LayerNorm(d_model)
+        self.transformer = nn.TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
+        
+
+        self.norm_input = None
+        if transformer_norm_input:
+            self.norm_input = nn.LayerNorm(d_model)
+
+    def forward(self, padded_h_node, src_padding_mask):
+        """
+        padded_h_node: n_b x B x h_d # 63, 257, 128
+        src_key_padding_mask: B x n_b # 257, 63
+        """
+        # (S, B, h_d), (B, S)        
+        if self.norm_input is not None:
+            padded_h_node = self.norm_input(padded_h_node)
+
+        transformer_out = self.transformer(padded_h_node, src_key_padding_mask=src_padding_mask)  # (S, B, h_d)
+
+        return transformer_out, src_padding_mask
+
+
+
+if __name__ == '__main__':
+    model = TransformerEncoder(
+        d_model=12,
+        nhead=4, 
+        dim_feedforward=32, 
+        transformer_dropout=0.0, 
+        transformer_activation='gelu', 
+        num_encoder_layers=4, 
+        max_input_len=34, 
+        transformer_norm_input=0
+    )
+    print(model.norm_input)
--- a/train_tom.py
+++ b/train_tom.py
@ -0,0 +1,75 @@
+import random
+from argparse import ArgumentParser
+import numpy as np
+import torch
+from pytorch_lightning import Trainer
+from pytorch_lightning.loggers import WandbLogger
+from tom.model import GraphBC_T, GraphBCRNN
+
+torch.multiprocessing.set_sharing_strategy('file_system')
+
+parser = ArgumentParser()
+
+# program level args
+parser.add_argument('--seed', type=int, default=4)
+# data specific args
+parser.add_argument('--data_path', type=str, default='/datasets/external/bib_train/graphs/all_tasks/')
+parser.add_argument('--types', nargs='+', type=str, 
+                    default=['preference', 'multi_agent', 'single_object', 'instrumental_action'],
+                    help='types of tasks used for training / validation')
+parser.add_argument('--train', type=int, default=1)
+parser.add_argument('--num_workers', type=int, default=4)
+parser.add_argument('--batch_size', type=int, default=16)
+parser.add_argument('--model_type', type=str, default='graphbcrnn')
+
+# model specific args
+parser_model = ArgumentParser()
+parser_model = GraphBC_T.add_model_specific_args(parser_model)
+# parser_model = GraphBCRNN.add_model_specific_args(parser_model)
+# NOTE: here unfortunately you have to select manually the model 
+
+# add all the available trainer options to argparse
+parser = Trainer.add_argparse_args(parser)
+
+# combine parsers
+parser_all = ArgumentParser(conflict_handler='resolve',
+                            parents=[parser, parser_model])
+
+# parse args
+args = parser_all.parse_args()
+args.types = sorted(args.types)
+print(args)
+
+random.seed(args.seed)
+np.random.seed(args.seed)
+torch.manual_seed(args.seed)
+
+# init model
+if args.model_type == 'graphbct':
+    model = GraphBC_T(args) 
+elif args.model_type == 'graphbcrnn':
+    model = GraphBCRNN(args)
+else:
+    raise NotImplementedError
+
+torch.autograd.set_detect_anomaly(True)
+
+logger = WandbLogger(project='bib')
+trainer = Trainer(
+    gradient_clip_val=args.gradient_clip_val,
+    gpus=args.gpus,
+    auto_select_gpus=args.auto_select_gpus,
+    track_grad_norm=args.track_grad_norm,
+    check_val_every_n_epoch=args.check_val_every_n_epoch,
+    max_epochs=args.max_epochs,
+    accelerator=args.accelerator,
+    resume_from_checkpoint=args.resume_from_checkpoint,
+    stochastic_weight_avg=args.stochastic_weight_avg,
+    num_sanity_val_steps=args.num_sanity_val_steps,
+    logger=logger
+)
+
+if args.train:
+    trainer.fit(model)
+else:
+    raise NotImplementedError
--- a/utils/init.py
+++ b/utils/init.py
--- a/utils/build_graphs.py
+++ b/utils/build_graphs.py
@ -0,0 +1,115 @@
+import sys
+sys.path.append('/projects/bortoletto/icml2023_matteo/utils')
+from dataset import TransitionDataset, TestTransitionDatasetSequence
+import multiprocessing as mp 
+import argparse
+import pickle as pkl 
+import os 
+
+# Instantiate the parser
+parser = argparse.ArgumentParser()
+parser.add_argument('--cpus', type=int,
+                    help='Number of processes')
+parser.add_argument('--mode', type=str,
+                    help='Train (train) or validation (val)')
+args = parser.parse_args()
+
+NUM_PROCESSES = args.cpus 
+MODE = args.mode
+
+def generate_files(idx):
+    print('Generating idx', idx)
+    if os.path.exists(PATH+str(idx)+'.pkl'):
+        print('Index', idx, 'skipped.')
+        return 
+    if MODE == 'train' or MODE == 'val':
+        states, actions, lens, n_nodes = dataset.__getitem__(idx)
+        with open(PATH+str(idx)+'.pkl', 'wb') as f:
+            pkl.dump([states, actions, lens, n_nodes], f)
+    elif MODE == 'test':
+        dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes, \
+        dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes, \
+        query_expected_frames, target_expected_actions, \
+        query_unexpected_frames, target_unexpected_actions = dataset.__getitem__(idx)
+        with open(PATH+str(idx)+'.pkl', 'wb') as f:
+            pkl.dump([
+                dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes, \
+                dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes, \
+                query_expected_frames, target_expected_actions, \
+                query_unexpected_frames, target_unexpected_actions], f
+            )
+    else:
+        raise ValueError('MODE can be only train, val or test.')
+    print(PATH+str(idx)+'.pkl saved.')
+
+if __name__ == "__main__":
+    if MODE == 'train':
+        print('TRAIN MODE')
+        PATH = '/datasets/external/bib_train/graphs/all_tasks/train_dgl_hetero_nobound_4feats/'
+        if not os.path.exists(PATH):
+            os.makedirs(PATH) 
+            print(PATH, 'directory created.')
+        dataset = TransitionDataset(
+            path='/datasets/external/bib_train/', 
+            types=['instrumental_action', 'multi_agent', 'preference', 'single_object'], 
+            mode="train", 
+            max_len=30, 
+            num_test=1, 
+            num_trials=9,
+            action_range=10, 
+            process_data=0
+        )
+        pool = mp.Pool(processes=NUM_PROCESSES)
+        print('Starting graph generation with', NUM_PROCESSES, 'processes...')
+        pool.map(generate_files, [i for i in range(dataset.__len__())])
+        pool.close()
+    elif MODE == 'val':
+        print('VALIDATION MODE')
+        types = ['multi_agent', 'instrumental_action', 'preference', 'single_object']
+        for t in range(len(types)):
+            PATH = '/datasets/external/bib_train/graphs/all_tasks/val_dgl_hetero_nobound_4feats/'+types[t]+'/'
+            if not os.path.exists(PATH):
+                os.makedirs(PATH) 
+                print(PATH, 'directory created.')
+            dataset = TransitionDataset(
+                path='/datasets/external/bib_train/', 
+                types=[types[t]], 
+                mode="val", 
+                max_len=30, 
+                num_test=1, 
+                num_trials=9,
+                action_range=10, 
+                process_data=0
+            )
+            pool = mp.Pool(processes=NUM_PROCESSES)
+            print('Starting', types[t], 'graph generation with', NUM_PROCESSES, 'processes...')
+            pool.map(generate_files, [i for i in range(dataset.__len__())])
+            pool.close()
+    elif MODE == 'test':
+        print('TEST MODE')
+        types = [
+            'preference', 'multi_agent', 'inaccessible_goal',
+            'efficiency_irrational', 'efficiency_time','efficiency_path',
+            'instrumental_no_barrier', 'instrumental_blocking_barrier', 'instrumental_inconsequential_barrier'
+        ]
+        for t in range(len(types)):
+            PATH = '/datasets/external/bib_evaluation_1_1/graphs/all_tasks_dgl_hetero_nobound_4feats/'+types[t]+'/'
+            if not os.path.exists(PATH):
+                os.makedirs(PATH) 
+                print(PATH, 'directory created.')
+            dataset = TestTransitionDatasetSequence(
+                path='/datasets/external/bib_evaluation_1_1/', 
+                task_type=types[t], 
+                mode="test", 
+                num_test=1, 
+                num_trials=9,
+                action_range=10, 
+                process_data=0,
+                max_len=30
+            )
+            pool = mp.Pool(processes=NUM_PROCESSES)
+            print('Starting', types[t], 'graph generation with', NUM_PROCESSES, 'processes...')
+            pool.map(generate_files, [i for i in range(dataset.__len__())])
+            pool.close()
+    else:
+        raise ValueError 
--- a/utils/dataset.py
+++ b/utils/dataset.py
@ -0,0 +1,487 @@
+import dgl
+import torch
+import torch.utils.data
+import os
+import pickle as pkl 
+import json
+import numpy as np
+from tqdm import tqdm 
+
+import sys 
+sys.path.append('/projects/bortoletto/irene/')
+from utils.grid_object import * 
+from utils.relations import *
+
+# ========================== Helper functions ==========================
+
+def index_data(json_list, path_list):
+    print(f'processing files {len(json_list)}')
+    data_tuples = []
+    for j, v in tqdm(zip(json_list, path_list)):
+        with open(j, 'r') as f:
+            state = json.load(f)
+        ep_lens = [len(x) for x in state]
+        past_len = 0
+        for e, l in enumerate(ep_lens):
+            data_tuples.append([])
+            # skip first 30 frames and last 83 frames
+            for f in range(30, l - 83):
+                # find action taken; 
+                f0x, f0y = state[e][f]['agent'][0]
+                f1x, f1y = state[e][f + 1]['agent'][0]
+                dx = (f1x - f0x) / 2.
+                dy = (f1y - f0y) / 2.
+                action = [dx, dy]
+                #data_tuples[-1].append((v, past_len + f, action))
+                data_tuples[-1].append((j, past_len + f, action)) 
+                # data_tuples = [[json file, frame number, action] for each episode in each video]
+            assert len(data_tuples[-1]) > 0
+            past_len += l
+    return data_tuples
+
+# ========================== Dataset class ==========================
+
+class TransitionDataset(torch.utils.data.Dataset):
+    """
+    Training dataset class for the behavior cloning mlp model.
+    Args:
+        path: path to the dataset
+        types: list of video types to include
+        mode: train, val
+        num_test: number of test state-action pairs
+        num_trials: number of trials in an episode
+        action_range: number of frames to skip; actions are combined over these number of frames (displcement) of the agent
+        process_data: whether to the videos or not (skip if already processed)
+        max_len: max number of context state-action pairs
+    __getitem__:
+        returns: (states, actions, lens, n_nodes)
+        dem_frames: batched DGLGraph.heterograph
+        dem_actions: (max_len, 2)
+        query_frames: DGLGraph.heterograph
+        target_actions: (num_test, 2)
+    """
+    def __init__(
+            self, 
+            path, 
+            types=None, 
+            mode="train", 
+            num_test=1, 
+            num_trials=9,
+            action_range=10, 
+            process_data=0, 
+            max_len=30
+        ):
+        self.path = path
+        self.types = types
+        self.mode = mode
+        self.num_trials = num_trials
+        self.num_test = num_test
+        self.action_range = action_range
+        self.max_len = max_len
+        self.ep_combs = self.num_trials * (self.num_trials - 2)  # 9p2 - 9
+        self.eps = [[x, y] for x in range(self.num_trials) for y in range(self.num_trials) if x != y]
+        types_str = '_'.join(self.types)
+        self.rel_deter_func = [
+            is_top_adj, is_left_adj, is_top_right_adj, is_top_left_adj,
+            is_down_adj, is_right_adj, is_down_left_adj, is_down_right_adj,
+            is_left, is_right, is_front, is_back, is_aligned, is_close
+        ]
+        self.path_list = []
+        self.json_list = []
+        # get video paths and json file paths
+        for t in types:
+            print(f'reading files of type {t} in {mode}')
+            paths = [os.path.join(self.path, t, x) for x in os.listdir(os.path.join(self.path, t)) if
+                     x.endswith(f'.mp4')]
+            jsons = [os.path.join(self.path, t, x) for x in os.listdir(os.path.join(self.path, t)) if
+                     x.endswith(f'.json') and 'index' not in x]
+            paths = sorted(paths)
+            jsons = sorted(jsons)
+            if mode == 'train':
+                self.path_list += paths[:int(0.8 * len(jsons))]
+                self.json_list += jsons[:int(0.8 * len(jsons))]
+            elif mode == 'val':
+                self.path_list += paths[int(0.8 * len(jsons)):]
+                self.json_list += jsons[int(0.8 * len(jsons)):]
+            else:
+                self.path_list += paths
+                self.json_list += jsons
+        self.data_tuples = []
+        if process_data:
+            # index the videos in the dataset directory. This is done to speed up the retrieval of videos.
+            # frame index, action tuples are stored
+            self.data_tuples = index_data(self.json_list, self.path_list) 
+            # tuples of frame index and action (displacement of agent) 
+            index_dict = {'data_tuples': self.data_tuples}
+            with open(os.path.join(self.path, f'jindex_bib_{mode}_{types_str}.json'), 'w') as fp:
+                json.dump(index_dict, fp)
+        else:
+            # read pre-indexed data
+            with open(os.path.join(self.path, f'jindex_bib_{mode}_{types_str}.json'), 'r') as fp:
+                index_dict = json.load(fp)
+            self.data_tuples = index_dict['data_tuples']
+        self.tot_trials = len(self.path_list) * 9
+    
+    def _get_frame_graph(self, jsonfile, frame_idx):
+        # load json 
+        with open(jsonfile, 'rb') as f:
+            frame_data = json.load(f)
+        flat_list = [x for xs in frame_data for x in xs] 
+        # extract entities
+        grid_objs = parse_objects(flat_list[frame_idx])
+        # --- build the graph 
+        adj = self._get_spatial_rel(grid_objs)
+        # define edges
+        is_top_adj_src, is_top_adj_dst = np.nonzero(adj[0])
+        is_left_adj_src, is_left_adj_dst = np.nonzero(adj[1])
+        is_top_right_adj_src, is_top_right_adj_dst = np.nonzero(adj[2])
+        is_top_left_adj_src, is_top_left_adj_dst = np.nonzero(adj[3])
+        is_down_adj_src, is_down_adj_dst = np.nonzero(adj[4])
+        is_right_adj_src, is_right_adj_dst = np.nonzero(adj[5])
+        is_down_left_adj_src, is_down_left_adj_dst = np.nonzero(adj[6])
+        is_down_right_adj_src, is_down_right_adj_dst = np.nonzero(adj[7])
+        is_left_src, is_left_dst = np.nonzero(adj[8])
+        is_right_src, is_right_dst = np.nonzero(adj[9])
+        is_front_src, is_front_dst = np.nonzero(adj[10])
+        is_back_src, is_back_dst = np.nonzero(adj[11])
+        is_aligned_src, is_aligned_dst = np.nonzero(adj[12])
+        is_close_src, is_close_dst = np.nonzero(adj[13])
+        g = dgl.heterograph({
+            ('obj', 'is_top_adj', 'obj'):        (torch.tensor(is_top_adj_src), torch.tensor(is_top_adj_dst)),
+            ('obj', 'is_left_adj', 'obj'):       (torch.tensor(is_left_adj_src), torch.tensor(is_left_adj_dst)),
+            ('obj', 'is_top_right_adj', 'obj'):  (torch.tensor(is_top_right_adj_src), torch.tensor(is_top_right_adj_dst)),
+            ('obj', 'is_top_left_adj', 'obj'):   (torch.tensor(is_top_left_adj_src), torch.tensor(is_top_left_adj_dst)),
+            ('obj', 'is_down_adj', 'obj'):       (torch.tensor(is_down_adj_src), torch.tensor(is_down_adj_dst)),
+            ('obj', 'is_right_adj', 'obj'):      (torch.tensor(is_right_adj_src), torch.tensor(is_right_adj_dst)),
+            ('obj', 'is_down_left_adj', 'obj'):  (torch.tensor(is_down_left_adj_src), torch.tensor(is_down_left_adj_dst)),
+            ('obj', 'is_down_right_adj', 'obj'): (torch.tensor(is_down_right_adj_src), torch.tensor(is_down_right_adj_dst)),
+            ('obj', 'is_left', 'obj'):           (torch.tensor(is_left_src), torch.tensor(is_left_dst)),
+            ('obj', 'is_right', 'obj'):          (torch.tensor(is_right_src), torch.tensor(is_right_dst)),
+            ('obj', 'is_front', 'obj'):          (torch.tensor(is_front_src), torch.tensor(is_front_dst)),
+            ('obj', 'is_back', 'obj'):           (torch.tensor(is_back_src), torch.tensor(is_back_dst)),
+            ('obj', 'is_aligned', 'obj'):        (torch.tensor(is_aligned_src), torch.tensor(is_aligned_dst)),
+            ('obj', 'is_close', 'obj'):          (torch.tensor(is_close_src), torch.tensor(is_close_dst))
+        }, num_nodes_dict={'obj': len(grid_objs)})
+        g = self._add_node_features(grid_objs, g)
+        breakpoint()
+        return g
+
+    def _add_node_features(self, objs, graph):
+        for obj_idx, obj in enumerate(objs):
+            graph.nodes[obj_idx].data['type'] = torch.tensor(obj.type)
+            graph.nodes[obj_idx].data['pos'] = torch.tensor([[obj.x, obj.y]], dtype=torch.float32)
+            assert len(obj.attributes) == 2 and None not in obj.attributes[0] and None not in obj.attributes[1]
+            graph.nodes[obj_idx].data['color'] = torch.tensor([obj.attributes[0]])
+            graph.nodes[obj_idx].data['shape'] = torch.tensor([obj.attributes[1]])
+        return graph
+        
+    def _get_spatial_rel(self, objs):
+        spatial_tensors = [np.zeros([len(objs), len(objs)]) for _ in range(len(self.rel_deter_func))]
+        for obj_idx1, obj1 in enumerate(objs):
+            for obj_idx2, obj2 in enumerate(objs):
+                direction_vec = np.array((0, -1)) # Up
+                for rel_idx, func in enumerate(self.rel_deter_func):
+                    if func(obj1, obj2, direction_vec):
+                        spatial_tensors[rel_idx][obj_idx1, obj_idx2] = 1.0
+        return spatial_tensors
+
+    def get_trial(self, trials, step=10):
+        # retrieve state embeddings and actions from cached file
+        states = [] 
+        actions = []
+        trial_len = []
+        lens = []
+        n_nodes = []
+        # 8 trials
+        for t in trials:
+            tl = [(t, n) for n in range(0, len(self.data_tuples[t]), step)]
+            if len(tl) > self.max_len: # 30
+                tl = tl[:self.max_len]
+            trial_len.append(tl)
+        for tl in trial_len:
+            states.append([])
+            actions.append([])
+            lens.append(len(tl))
+            for t, n in tl:
+                video = self.data_tuples[t][n][0] 
+                states[-1].append(self._get_frame_graph(video, self.data_tuples[t][n][1])) 
+                n_nodes.append(states[-1][-1].number_of_nodes())
+                # actions are pooled over frames 
+                if len(self.data_tuples[t]) > n + self.action_range:
+                    actions_xy = [d[2] for d in self.data_tuples[t][n:n + self.action_range]]
+                else:
+                    actions_xy = [d[2] for d in self.data_tuples[t][n:]]
+                actions_xy = np.array(actions_xy)
+                actions_xy = np.mean(actions_xy, axis=0)
+                actions[-1].append(actions_xy)
+            states[-1] = dgl.batch(states[-1])
+            actions[-1] = torch.tensor(np.array(actions[-1]))
+            trial_actions_padded = torch.zeros(self.max_len, actions[-1].size(1))
+            trial_actions_padded[:actions[-1].size(0), :] = actions[-1]
+            actions[-1] = trial_actions_padded
+        return states, actions, lens, n_nodes 
+
+    def __getitem__(self, idx):
+        ep_trials = [idx * self.num_trials + t for t in range(self.num_trials)] # [idx, ..., idx+8]
+        states, actions, lens, n_nodes = self.get_trial(ep_trials, step=self.action_range)
+        return states, actions, lens, n_nodes
+
+    def __len__(self):
+        return self.tot_trials // self.num_trials
+
+
+class TestTransitionDatasetSequence(torch.utils.data.Dataset):
+    """
+    Test dataset class for the behavior cloning rnn model. This dataset is used to test the model on the eval data.
+    This class is used to compare plausible and implausible episodes.
+    Args:
+        path: path to the dataset
+        types: list of video types to include
+        size: size of the frames to be returned
+        mode: test
+        num_context: number of context state-action pairs
+        num_test: number of test state-action pairs
+        num_trials: number of trials in an episode
+        action_range: number of frames to skip; actions are combined over these number of frames (displcement) of the agent
+        process_data: whether to the videos or not (skip if already processed)
+    __getitem__:
+        returns:  (expected_dem_frames, expected_dem_actions, expected_dem_lens expected_query_frames, expected_target_actions,
+        unexpected_dem_frames, unexpected_dem_actions, unexpected_dem_lens, unexpected_query_frames, unexpected_target_actions)
+        dem_frames: (num_context, max_len, 3, size, size)
+        dem_actions: (num_context, max_len, 2)
+        dem_lens: (num_context)
+        query_frames: (num_test, 3, size, size)
+        target_actions: (num_test, 2)
+    """
+    def __init__(
+            self, 
+            path, 
+            task_type=None, 
+            mode="test", 
+            num_test=1, 
+            num_trials=9,
+            action_range=10, 
+            process_data=0, 
+            max_len=30
+        ):
+        self.path = path
+        self.task_type = task_type
+        self.mode = mode
+        self.num_trials = num_trials
+        self.num_test = num_test
+        self.action_range = action_range
+        self.max_len = max_len
+        self.ep_combs = self.num_trials * (self.num_trials - 2)  # 9p2 - 9
+        self.eps = [[x, y] for x in range(self.num_trials) for y in range(self.num_trials) if x != y]
+        self.path_list_exp = []
+        self.json_list_exp = []
+        self.path_list_un = []
+        self.json_list_un = []
+        
+        print(f'reading files of type {task_type} in {mode}')
+        
+        paths_expected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
+                                 x.endswith('e.mp4')])
+        jsons_expected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
+                                 x.endswith('e.json') and 'index' not in x])
+        paths_unexpected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
+                                   x.endswith('u.mp4')])
+        jsons_unexpected = sorted([os.path.join(self.path, task_type, x) for x in os.listdir(os.path.join(self.path, task_type)) if
+                                   x.endswith('u.json') and 'index' not in x])
+        self.path_list_exp += paths_expected
+        self.json_list_exp += jsons_expected
+        self.path_list_un += paths_unexpected
+        self.json_list_un += jsons_unexpected
+        self.data_expected = []
+        self.data_unexpected = []
+        
+        if process_data:
+            # index data. This is done to speed up video retrieval.
+            # frame index, action tuples are stored
+            self.data_expected = index_data(self.json_list_exp, self.path_list_exp)
+            index_dict = {'data_tuples': self.data_expected}
+            with open(os.path.join(self.path, f'jindex_bib_test_{task_type}e.json'), 'w') as fp:
+                json.dump(index_dict, fp)
+
+            self.data_unexpected = index_data(self.json_list_un, self.path_list_un)
+            index_dict = {'data_tuples': self.data_unexpected}
+            with open(os.path.join(self.path, f'jindex_bib_test_{task_type}u.json'), 'w') as fp:
+                json.dump(index_dict, fp)
+        else:
+            with open(os.path.join(self.path, f'jindex_bib_{mode}_{task_type}e.json'), 'r') as fp:
+                index_dict = json.load(fp)
+            self.data_expected = index_dict['data_tuples']
+            with open(os.path.join(self.path, f'jindex_bib_{mode}_{task_type}u.json'), 'r') as fp:
+                index_dict = json.load(fp)
+            self.data_unexpected = index_dict['data_tuples']
+
+        self.rel_deter_func = [
+            is_top_adj, is_left_adj, is_top_right_adj, is_top_left_adj,
+            is_down_adj, is_right_adj, is_down_left_adj, is_down_right_adj,
+            is_left, is_right, is_front, is_back, is_aligned, is_close
+        ]
+        
+        print('Done.')
+
+    def _get_frame_graph(self, jsonfile, frame_idx):
+        # load json 
+        with open(jsonfile, 'rb') as f:
+            frame_data = json.load(f)
+        flat_list = [x for xs in frame_data for x in xs] 
+        # extract entities
+        grid_objs = parse_objects(flat_list[frame_idx])
+        # --- build the graph 
+        adj = self._get_spatial_rel(grid_objs)
+        # define edges
+        is_top_adj_src, is_top_adj_dst = np.nonzero(adj[0])
+        is_left_adj_src, is_left_adj_dst = np.nonzero(adj[1])
+        is_top_right_adj_src, is_top_right_adj_dst = np.nonzero(adj[2])
+        is_top_left_adj_src, is_top_left_adj_dst = np.nonzero(adj[3])
+        is_down_adj_src, is_down_adj_dst = np.nonzero(adj[4])
+        is_right_adj_src, is_right_adj_dst = np.nonzero(adj[5])
+        is_down_left_adj_src, is_down_left_adj_dst = np.nonzero(adj[6])
+        is_down_right_adj_src, is_down_right_adj_dst = np.nonzero(adj[7])
+        is_left_src, is_left_dst = np.nonzero(adj[8])
+        is_right_src, is_right_dst = np.nonzero(adj[9])
+        is_front_src, is_front_dst = np.nonzero(adj[10])
+        is_back_src, is_back_dst = np.nonzero(adj[11])
+        is_aligned_src, is_aligned_dst = np.nonzero(adj[12])
+        is_close_src, is_close_dst = np.nonzero(adj[13])
+        g = dgl.heterograph({
+            ('obj', 'is_top_adj', 'obj'):        (torch.tensor(is_top_adj_src), torch.tensor(is_top_adj_dst)),
+            ('obj', 'is_left_adj', 'obj'):       (torch.tensor(is_left_adj_src), torch.tensor(is_left_adj_dst)),
+            ('obj', 'is_top_right_adj', 'obj'):  (torch.tensor(is_top_right_adj_src), torch.tensor(is_top_right_adj_dst)),
+            ('obj', 'is_top_left_adj', 'obj'):   (torch.tensor(is_top_left_adj_src), torch.tensor(is_top_left_adj_dst)),
+            ('obj', 'is_down_adj', 'obj'):       (torch.tensor(is_down_adj_src), torch.tensor(is_down_adj_dst)),
+            ('obj', 'is_right_adj', 'obj'):      (torch.tensor(is_right_adj_src), torch.tensor(is_right_adj_dst)),
+            ('obj', 'is_down_left_adj', 'obj'):  (torch.tensor(is_down_left_adj_src), torch.tensor(is_down_left_adj_dst)),
+            ('obj', 'is_down_right_adj', 'obj'): (torch.tensor(is_down_right_adj_src), torch.tensor(is_down_right_adj_dst)),
+            ('obj', 'is_left', 'obj'):           (torch.tensor(is_left_src), torch.tensor(is_left_dst)),
+            ('obj', 'is_right', 'obj'):          (torch.tensor(is_right_src), torch.tensor(is_right_dst)),
+            ('obj', 'is_front', 'obj'):          (torch.tensor(is_front_src), torch.tensor(is_front_dst)),
+            ('obj', 'is_back', 'obj'):           (torch.tensor(is_back_src), torch.tensor(is_back_dst)),
+            ('obj', 'is_aligned', 'obj'):        (torch.tensor(is_aligned_src), torch.tensor(is_aligned_dst)),
+            ('obj', 'is_close', 'obj'):          (torch.tensor(is_close_src), torch.tensor(is_close_dst))
+        }, num_nodes_dict={'obj': len(grid_objs)})
+        g = self._add_node_features(grid_objs, g)
+        return g
+
+    def _add_node_features(self, objs, graph):
+        for obj_idx, obj in enumerate(objs):
+            graph.nodes[obj_idx].data['type'] = torch.tensor(obj.type)
+            graph.nodes[obj_idx].data['pos'] = torch.tensor([[obj.x, obj.y]], dtype=torch.float32)
+            assert len(obj.attributes) == 2 and None not in obj.attributes[0] and None not in obj.attributes[1]
+            graph.nodes[obj_idx].data['color'] = torch.tensor([obj.attributes[0]])
+            graph.nodes[obj_idx].data['shape'] = torch.tensor([obj.attributes[1]])
+        return graph
+        
+    def _get_spatial_rel(self, objs):
+        spatial_tensors = [np.zeros([len(objs), len(objs)]) for _ in range(len(self.rel_deter_func))]
+        for obj_idx1, obj1 in enumerate(objs):
+            for obj_idx2, obj2 in enumerate(objs):
+                direction_vec = np.array((0, -1)) # Up why??????????????
+                for rel_idx, func in enumerate(self.rel_deter_func):
+                    if func(obj1, obj2, direction_vec):
+                        spatial_tensors[rel_idx][obj_idx1, obj_idx2] = 1.0
+        return spatial_tensors
+
+    def get_trial(self, trials, data, step=10):
+        # retrieve state embeddings and actions from cached file
+        states = []
+        actions = []
+        trial_len = []
+        lens = []
+        n_nodes = []
+        for t in trials:
+            tl = [(t, n) for n in range(0, len(data[t]), step)]
+            if len(tl) > self.max_len:
+                tl = tl[:self.max_len]
+            trial_len.append(tl)
+        for tl in trial_len:
+            states.append([])
+            actions.append([])
+            lens.append(len(tl))
+            for t, n in tl:
+                video = data[t][n][0]
+                states[-1].append(self._get_frame_graph(video, data[t][n][1]))
+                n_nodes.append(states[-1][-1].number_of_nodes())
+                if len(data[t]) > n + self.action_range:
+                    actions_xy = [d[2] for d in data[t][n:n + self.action_range]]
+                else:
+                    actions_xy = [d[2] for d in data[t][n:]]
+                actions_xy = np.array(actions_xy)
+                actions_xy = np.mean(actions_xy, axis=0)
+                actions[-1].append(actions_xy)
+            states[-1] = dgl.batch(states[-1])
+            actions[-1] = torch.tensor(np.array(actions[-1]))
+            trial_actions_padded = torch.zeros(self.max_len, actions[-1].size(1))
+            trial_actions_padded[:actions[-1].size(0), :] = actions[-1]
+            actions[-1] = trial_actions_padded
+        return states, actions, lens, n_nodes
+
+    def get_test(self, trial, data, step=10):
+        # retrieve state embeddings and actions from cached file
+        states = []
+        actions = []
+        trial_len = []
+        trial_len += [(trial, n) for n in range(0, len(data[trial]), step)]
+        for t, n in trial_len:
+            video = data[t][n][0]
+            state = self._get_frame_graph(video, data[t][n][1])
+            if len(data[t]) > n + self.action_range:
+                actions_xy = [d[2] for d in data[t][n:n + self.action_range]]
+            else:
+                actions_xy = [d[2] for d in data[t][n:]]
+            actions_xy = np.array(actions_xy)
+            actions_xy = np.mean(actions_xy, axis=0)
+            actions.append(actions_xy)
+            states.append(state)
+        #states = torch.stack(states)
+        states = dgl.batch(states) 
+        actions = torch.tensor(np.array(actions))
+        return states, actions
+
+    def __getitem__(self, idx):
+        ep_trials = [idx * self.num_trials + t for t in range(self.num_trials)]
+        dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes = self.get_trial(
+            ep_trials[:-1], self.data_expected, step=self.action_range
+        )
+        dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes = self.get_trial(
+            ep_trials[:-1], self.data_unexpected, step=self.action_range
+        )
+        query_expected_frames, target_expected_actions = self.get_test(
+            ep_trials[-1], self.data_expected, step=self.action_range
+        )
+        query_unexpected_frames, target_unexpected_actions = self.get_test(
+            ep_trials[-1], self.data_unexpected, step=self.action_range
+        )
+        return dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes, \
+               dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes, \
+               query_expected_frames, target_expected_actions, \
+               query_unexpected_frames, target_unexpected_actions
+        
+    def __len__(self):
+        return len(self.path_list_exp)
+
+
+if __name__ == '__main__':
+    types = ['preference', 'multi_agent', 'inaccessible_goal',
+             'efficiency_irrational', 'efficiency_time','efficiency_path',
+             'instrumental_no_barrier', 'instrumental_blocking_barrier', 'instrumental_inconsequential_barrier']
+    for t in types:
+        ttd = TestTransitionDatasetSequence(path='/datasets/external/bib_evaluation_1_1/', task_type=t, process_data=0, mode='test')
+        for i in range(ttd.__len__()):
+            print(i, end='\r')
+            dem_expected_states, dem_expected_actions, dem_expected_lens, dem_expected_nodes, \
+            dem_unexpected_states, dem_unexpected_actions, dem_unexpected_lens, dem_unexpected_nodes, \
+            query_expected_frames, target_expected_actions, \
+            query_unexpected_frames, target_unexpected_actions = ttd.__getitem__(i)
+            for j in range(8):
+                if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in dem_expected_states[j].ndata['type']:
+                    print(i)
+                if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in dem_unexpected_states[j].ndata['type']:
+                    print(i)
+            if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in query_expected_frames.ndata['type']:
+                print(i)
+            if not torch.tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]) in query_unexpected_frames.ndata['type']:
+                print(i)
--- a/utils/grid_object.py
+++ b/utils/grid_object.py
@ -0,0 +1,174 @@
+import json
+import pdb
+import numpy as np 
+from sklearn.preprocessing import OneHotEncoder
+import itertools
+
+
+SHAPES = {
+    # walls
+    'square': 0, 
+    # objects
+    'heart': 1, 'clove_tree': 2, 'moon': 3, 'wine': 4, 'double_dia': 5, 
+    'flag': 6, 'capsule': 7, 'vase': 8, 'curved_triangle': 9, 'spoon': 10, 
+    'medal': 11, # inaccessible goal
+    # home
+    'home': 12,
+    # agent
+    'pentagon': 13, 'clove': 14, 'kite': 15,
+    # key 
+    'triangle0': 16, 'triangle180': 16, 'triangle90': 16, 'triangle270': 16,
+    # lock 
+    'triangle_slot0': 17, 'triangle_slot180': 17, 'triangle_slot90': 17, 'triangle_slot270': 17
+}
+
+ENTITIES = {
+    'agent': 0 , 'walls': 1, 'fuse_walls': 2, 'key': 3,
+    'blocking': 4, 'home': 5, 'objects': 6, 'pin': 7, 'lock': 8
+}
+
+# =============================== GridPbject class ===============================
+
+class GridObject():
+    "object is specified by its location"
+    def __init__(self, x, y, object_type, attributes=[]):
+        self.x = x
+        self.y = y
+        self.type = object_type
+        self.attributes = attributes
+
+    @property
+    def pos(self):
+        return np.array([self.x, self.y])
+
+    @property
+    def name(self):
+        return {'type': str(self.type),
+                'x': str(self.x),
+                'y': str(self.y),
+                'color': self.attributes[0],
+                'shape': self.attributes[1]}
+
+# =============================== Helper functions ===============================
+
+def type2index(key):
+    for name, idx in ENTITIES.items():
+        if name == key:
+            return idx
+        
+def find_shape(shape_string, print_shape=False): 
+    try: 
+        shape = shape_string.split('/')[-1].split('.')[0]
+    except:
+        shape = shape_string.split('.')[0] 
+    if print_shape: print(shape)
+    for name, idx in SHAPES.items(): 
+        if name == shape:
+            return idx
+
+def parse_objects(frame):
+    """
+    x and y are computed differently from walls and objects
+    for walls x, y = obj[0][0] + obj[1][0]/2, obj[0][1] + obj[1][1]/2
+    for objects x, y = obj[0][0] + obj[1], obj[0][1] + obj[1]
+    :param obj:
+    :return: GridObject
+    """
+    shape_onehot_encoder = OneHotEncoder(sparse=False)
+    shape_onehot_encoder.fit([[i] for i in range(len(SHAPES)-6)])
+    type_onehot_encoder = OneHotEncoder(sparse=False)
+    type_onehot_encoder.fit([[i] for i in range(len(ENTITIES))])
+    # remove duplicate walls
+    frame['walls'].sort()
+    frame['walls'] = list(k for k, _ in itertools.groupby(frame['walls']))
+    # remove boundary walls
+    frame['walls'] = [w for w in frame['walls'] if (w[0][0] != 0 and w[0][0] != 180 and w[0][1] != 0 and w[0][1] != 180)]
+    # remove duplicate fuse_walls
+    frame['fuse_walls'].sort()
+    frame['fuse_walls'] = list(k for k, _ in itertools.groupby(frame['fuse_walls']))
+    grid_objs = []
+    assert 'agent' in frame.keys()
+    for key in frame.keys():
+        #print(key)
+        if key == 'size':
+            continue
+        obj = frame[key]
+        if obj == []:
+            #print(key, 'skipped')
+            continue
+        obj_type = type2index(key)
+        obj_type = type_onehot_encoder.transform([[obj_type]])
+        if key == 'walls':
+            for wall in obj:
+                x, y = wall[0][0] + wall[1][0]/2, wall[0][1] + wall[1][1]/2
+                #x, y = (wall[0][0] + wall[1][0]/2)/200, (wall[0][1] + wall[1][1]/2)/200 if u use this you need to change relations.py!!!
+                color = [0, 0, 0] if key == 'walls' else [80, 146, 56]
+                #color = [c / 255 for c in color]
+                shape = 0
+                assert shape in SHAPES.values(), 'Shape not found'
+                shape = shape_onehot_encoder.transform([[shape]])[0]
+                grid_obj = GridObject(x=x, y=y, object_type=obj_type, attributes=[color, shape])
+                grid_objs.append(grid_obj)
+        elif key == 'fuse_walls':
+            # resample green barriers
+            obj = [obj[i] for i in range(len(obj)) if (obj[i][0][0] % 20 == 0 and obj[i][0][1] % 20 == 0)]
+            for wall in obj:
+                x, y = wall[0][0] + wall[1][0]/2, wall[0][1] + wall[1][1]/2
+                #x, y = (wall[0][0] + wall[1][0]/2)/200, (wall[0][1] + wall[1][1]/2)/200 if u use this you need to change relations.py!!!
+                color = [80, 146, 56]
+                #color = [c / 255 for c in color]
+                shape = 0
+                assert shape in SHAPES.values(), 'Shape not found'
+                shape = shape_onehot_encoder.transform([[shape]])[0]
+                grid_obj = GridObject(x=x, y=y, object_type=obj_type, attributes=[color, shape])
+                grid_objs.append(grid_obj)
+        elif key == 'objects':
+            for ob in obj:
+                x, y = ob[0][0] + ob[1], ob[0][1] + ob[1]
+                color = ob[-1] 
+                #color = [c / 255 for c in color]
+                shape = find_shape(ob[2], print_shape=False)
+                assert shape in SHAPES.values(), 'Shape not found'
+                shape = shape_onehot_encoder.transform([[shape]])[0]
+                grid_obj = GridObject(x=x, y=y, object_type=obj_type, attributes=[color, shape])
+                grid_objs.append(grid_obj)
+        elif key == 'key':
+            obj = obj[0] 
+            x, y = obj[0][0] + obj[1], obj[0][1] + obj[1]
+            color = obj[-1] 
+            #color = [c / 255 for c in color]
+            shape = find_shape(obj[2], print_shape=False)
+            assert shape in SHAPES.values(), 'Shape not found'
+            shape = shape_onehot_encoder.transform([[shape]])[0]
+            grid_obj = GridObject(x=x, y=y, object_type=obj_type, attributes=[color, shape])
+            grid_objs.append(grid_obj) 
+        elif key == 'lock':
+            obj = obj[0] 
+            x, y = obj[0][0] + obj[1], obj[0][1] + obj[1]
+            color = obj[-1] 
+            #color = [c / 255 for c in color]
+            shape = find_shape(obj[2], print_shape=False)
+            assert shape in SHAPES.values(), 'Shape not found'
+            shape = shape_onehot_encoder.transform([[shape]])[0]
+            grid_obj = GridObject(x=x, y=y, object_type=obj_type, attributes=[color, shape])
+            grid_objs.append(grid_obj) 
+        else:
+            try:
+                x, y = obj[0][0] + obj[1], obj[0][1] + obj[1]
+                color = obj[-1] 
+                #color = [c / 255 for c in color]
+                shape = find_shape(obj[2], print_shape=False)
+                assert shape in SHAPES.values(), 'Shape not found'
+                shape = shape_onehot_encoder.transform([[shape]])[0]
+            except:
+                # [[[x, y], extension, shape, color]] in some cases in instrumental_no_barrier (bib_evaluation_1_1)
+                x, y = obj[0][0][0] + obj[0][1], obj[0][0][1] + obj[0][1]
+                color = obj[0][-1] 
+                #color = [c / 255 for c in color]
+                assert len(color) == 3
+                shape = find_shape(obj[0][2], print_shape=False)
+                assert shape in SHAPES.values(), 'Shape not found'
+                shape = shape_onehot_encoder.transform([[shape]])[0]
+            grid_obj = GridObject(x=x, y=y, object_type=obj_type, attributes=[color, shape])
+            grid_objs.append(grid_obj)
+    return grid_objs
--- a/utils/index_data.py
+++ b/utils/index_data.py
@ -0,0 +1,124 @@
+import json
+import os
+import torch
+import torch.utils.data
+from tqdm import tqdm
+
+
+def index_data(json_list, path_list):
+    print(f'processing files {len(json_list)}')
+    data_tuples = []
+    for j, v in tqdm(zip(json_list, path_list)):
+        with open(j, 'r') as f:
+            state = json.load(f)
+        ep_lens = [len(x) for x in state]
+        past_len = 0
+        for e, l in enumerate(ep_lens):
+            data_tuples.append([])
+            # skip first 30 frames and last 83 frames
+            for f in range(30, l - 83):
+                # find action taken; 
+                f0x, f0y = state[e][f]['agent'][0]
+                f1x, f1y = state[e][f + 1]['agent'][0]
+                dx = (f1x - f0x) / 2.
+                dy = (f1y - f0y) / 2.
+                action = [dx, dy]
+                #data_tuples[-1].append((v, past_len + f, action))
+                data_tuples[-1].append((j, past_len + f, action)) 
+                # data_tuples = (json file, frame number, action)
+            assert len(data_tuples[-1]) > 0
+            past_len += l
+    return data_tuples
+
+class TransitionDataset(torch.utils.data.Dataset):
+    """
+    Training dataset class for the behavior cloning mlp model.
+    Args:
+        path: path to the dataset
+        types: list of video types to include
+        size: size of the frames to be returned
+        mode: train, val
+        num_context: number of context state-action pairs
+        num_test: number of test state-action pairs
+        num_trials: number of trials in an episode
+        action_range: number of frames to skip; actions are combined over these number of frames (displcement) of the agent
+        process_data: whether to the videos or not (skip if already processed)
+    __getitem__:
+        returns:  (dem_frames, dem_actions, query_frames, target_actions)
+        dem_frames: (num_context, 3, size, size)
+        dem_actions: (num_context, 2)
+        query_frames: (num_test, 3, size, size)
+        target_actions: (num_test, 2)
+    """
+    def __init__(self, path, types=None, size=None, mode="train", num_context=30, num_test=1, num_trials=9,
+                 action_range=10, process_data=0):
+
+        self.path = path
+        self.types = types
+        self.size = size
+        self.mode = mode
+        self.num_trials = num_trials
+        self.num_context = num_context
+        self.num_test = num_test
+        self.action_range = action_range
+        self.ep_combs = self.num_trials * (self.num_trials - 2)  # 9p2 - 9
+        self.eps = [[x, y] for x in range(self.num_trials) for y in range(self.num_trials) if x != y]
+        types_str = '_'.join(self.types)
+
+        self.path_list = []
+        self.json_list = []
+        # get video paths and json file paths
+        for t in types:
+            print(f'reading files of type {t} in {mode}')
+            paths = [os.path.join(self.path, t, x) for x in os.listdir(os.path.join(self.path, t)) if
+                     x.endswith(f'.mp4')]
+            jsons = [os.path.join(self.path, t, x) for x in os.listdir(os.path.join(self.path, t)) if
+                     x.endswith(f'.json') and 'index' not in x]
+
+            paths = sorted(paths)
+            jsons = sorted(jsons)
+
+            if mode == 'train':
+                self.path_list += paths[:int(0.8 * len(jsons))]
+                self.json_list += jsons[:int(0.8 * len(jsons))]
+            elif mode == 'val':
+                self.path_list += paths[int(0.8 * len(jsons)):]
+                self.json_list += jsons[int(0.8 * len(jsons)):]
+            else:
+                self.path_list += paths
+                self.json_list += jsons
+
+        self.data_tuples = []
+        if process_data:
+            # index the videos in the dataset directory. This is done to speed up the retrieval of videos.
+            # frame index, action tuples are stored
+            self.data_tuples = index_data(self.json_list, self.path_list) 
+            # tuples of frame index and action (displacement of agent) 
+            index_dict = {'data_tuples': self.data_tuples}
+            with open(os.path.join(self.path, f'jindex_bib_{mode}_{types_str}.json'), 'w') as fp:
+                json.dump(index_dict, fp)
+        else:
+            # read pre-indexed data
+            with open(os.path.join(self.path, f'jindex_bib_{mode}_{types_str}.json'), 'r') as fp:
+                index_dict = json.load(fp)
+            self.data_tuples = index_dict['data_tuples']
+
+        self.tot_trials = len(self.path_list) * 9
+    
+
+    def __getitem__(self, idx):
+        print('Empty')
+        return 
+
+    def __len__(self):
+        return self.tot_trials // self.num_trials
+    
+    
+if __name__ == "__main__":
+    dataset = TransitionDataset(path='/datasets/external/bib_train/', 
+                                types=['multi_agent', 'instrumental_action'], #['instrumental_action', 'multi_agent', 'preference', 'single_object'], 
+                                size=(84, 84), 
+                                mode="train", num_context=30, 
+                                num_test=1, num_trials=9,
+                                action_range=10, process_data=1)
+    print(len(dataset))
--- a/utils/relations.py
+++ b/utils/relations.py
@ -0,0 +1,116 @@
+import numpy as np
+
+
+# =============================== relationships to build the graph ===============================
+
+def rotate_vec2d(vec, degrees):
+    """
+    rotate a vector anti-clockwise
+    :param vec:
+    :param degrees:
+    :return:
+    """
+    theta = np.radians(degrees)
+    c, s = np.cos(theta), np.sin(theta)
+    R = np.array(((c, -s), (s, c)))
+    return R@vec
+
+# ---------- Remote Directional Relations --------------------------------------------------------
+
+def is_front(obj1, obj2, direction_vec)->bool:
+    diff = obj2.pos - obj1.pos
+    return diff@direction_vec > 0.1
+
+def is_back(obj1, obj2, direction_vec)->bool:
+    diff = obj2.pos - obj1.pos
+    return diff@direction_vec < -0.1
+
+def is_left(obj1, obj2, direction_vec)->bool:
+    left_vec = rotate_vec2d(direction_vec, -90)
+    diff = obj2.pos - obj1.pos
+    return diff@left_vec > 0.1
+
+def is_right(obj1, obj2, direction_vec)->bool:
+    left_vec = rotate_vec2d(direction_vec, 90)
+    diff = obj2.pos - obj1.pos
+    return diff@left_vec > 0.1
+
+# ---------- Alignment and Adjacency Relations ---------------------------------------------------
+
+def is_close(obj1, obj2, direction_vec=None)->bool:
+    # indicate whether two objects are adjacent to each other, 
+    # which, unlike local directional relations, carry no directional information
+    distance = np.abs(obj1.pos - obj2.pos)
+    return np.sum(distance)==20
+
+def is_aligned(obj1, obj2, direction_vec=None)->bool:
+    # indicate if two entities are on the same horizontal or vertical line
+    diff = obj2.pos - obj1.pos
+    return np.any(diff==0)
+
+# ---------- Local Directional Relations ---------------------------------------------------------
+
+def is_top_adj(obj1, obj2, direction_vec=None)->bool:
+    return obj1.x==obj2.x and obj1.y==obj2.y+20
+
+def is_left_adj(obj1, obj2, direction_vec=None)->bool:
+    return obj1.y==obj2.y and obj1.x==obj2.x-20
+
+def is_top_left_adj(obj1, obj2, direction_vec=None)->bool:
+    return obj1.y==obj2.y+20 and obj1.x==obj2.x-20
+
+def is_top_right_adj(obj1, obj2, direction_vec=None)->bool:
+    return obj1.y==obj2.y+20 and obj1.x==obj2.x+20
+
+def is_down_adj(obj1, obj2, direction_vec=None)->bool:
+    return is_top_adj(obj2, obj1)
+
+def is_right_adj(obj1, obj2, direction_vec=None)->bool:
+    return is_left_adj(obj2, obj1)
+
+def is_down_right_adj(obj1, obj2, direction_vec=None)->bool:
+    return is_top_left_adj(obj2, obj1)
+
+def is_down_left_adj(obj1, obj2, direction_vec=None)->bool:
+    return is_top_right_adj(obj2, obj1)
+
+# ---------- More Remote Directional Relations (not used) ----------------------------------------
+
+def top_left(obj1, obj2, direction_vec)->bool:
+    return (obj1.x-obj2.x) <= (obj1.y-obj2.y)
+
+def top_right(obj1, obj2, direction_vec)->bool:
+    return -(obj1.x-obj2.x) <= (obj1.y-obj2.y)
+
+def down_left(obj1, obj2, direction_vec)->bool:
+    return top_right(obj2, obj1, direction_vec)
+
+def down_right(obj1, obj2, direction_vec)->bool:
+    return top_left(obj2, obj1, direction_vec)
+
+def fan_top(obj1, obj2, direction_vec)->bool:
+    top_left = (obj1.x-obj2.x) <= (obj1.y-obj2.y)
+    top_right = -(obj1.x-obj2.x) <= (obj1.y-obj2.y)
+    return top_left and top_right
+
+def fan_down(obj1, obj2, direction_vec)->bool:
+    return fan_top(obj2, obj1, direction_vec)
+
+def fan_right(obj1, obj2, direction_vec)->bool:
+    down_left = (obj1.x-obj2.x) >= (obj1.y-obj2.y)
+    top_right = -(obj1.x-obj2.x) <= (obj1.y-obj2.y)
+    return down_left and top_right
+
+def fan_left(obj1, obj2, direction_vec)->bool:
+    return fan_right(obj2, obj1, direction_vec)
+
+# ---------- Ad-hoc Relations --------------------------------------------------------------------
+
+def needs(obj1, obj2, direction_vec=None)->bool:
+    return np.argmax(obj1.type) == 0 and np.argmax(obj2.type) == 3
+
+def opens(obj1, obj2, direction_vec=None)->bool:
+    return np.argmax(obj1.type) == 3 and np.argmax(obj2.type) == 8
+
+def collects(obj1, obj2, direction_vec=None)->bool:
+    return np.argmax(obj1.type) == 0 and np.argmax(obj2.type) == 6
--- a/utils/run_build_graphs.sh
+++ b/utils/run_build_graphs.sh
@ -0,0 +1 @@
+python build_graphs.py --mode train --cpus 30 && python build_graphs.py --mode val --cpus 30
				`@ -0,0 +1 @@`
				`python build_graphs.py --mode train --cpus 30 && python build_graphs.py --mode val --cpus 30`