import os import json import numpy as np import torch def mkdirs(paths): if isinstance(paths, list): for path in paths: if not os.path.exists(path): os.makedirs(path) else: if not os.path.exists(paths): os.makedirs(paths) def invert_dict(d): return {v: k for k, v in d.items()} def load_vocab(path): with open(path, 'r') as f: vocab = json.load(f) vocab['question_idx_to_token'] = invert_dict(vocab['question_token_to_idx']) vocab['program_idx_to_token'] = invert_dict(vocab['program_token_to_idx']) vocab['answer_idx_to_token'] = invert_dict(vocab['answer_token_to_idx']) # Sanity check: make sure , , and are consistent assert vocab['question_token_to_idx'][''] == 0 assert vocab['question_token_to_idx'][''] == 1 assert vocab['question_token_to_idx'][''] == 2 assert vocab['program_token_to_idx'][''] == 0 assert vocab['program_token_to_idx'][''] == 1 assert vocab['program_token_to_idx'][''] == 2 return vocab def load_scenes(scenes_json): with open(scenes_json) as f: scenes_dict = json.load(f)['scenes'] scenes = [] for s in scenes_dict: table = [] for i, o in enumerate(s['objects']): item = {} item['id'] = '%d-%d' % (s['image_index'], i) if '3d_coords' in o: item['position'] = [np.dot(o['3d_coords'], s['directions']['right']), np.dot(o['3d_coords'], s['directions']['front']), o['3d_coords'][2]] else: item['position'] = o['position'] item['color'] = o['color'] item['material'] = o['material'] item['shape'] = o['shape'] item['size'] = o['size'] table.append(item) scenes.append(table) return scenes def load_embedding(path): return torch.Tensor(np.load(path))