neuro-symbolic-visual-dialog/utils.py

81 lines
2.7 KiB
Python

import json
import numpy as np
def merge_captions_question_programs(path_cap, path_ques, caption_first=True):
with open(path_cap, "r"):
c_progs = path_cap.readlines()
with open(path_ques, "r"):
q_progs = path_ques.readlines()
all_merged_progs = []
i = 0
while i < len(q_progs):
cap_idx = i % 11 if caption_first else i % 10
start_idx_p = i + 1 if caption_first else i
end_idx_p = start_idx_p + 12 if caption_first else start_idx_p + 11
temp = c_progs[cap_idx] + q_progs[start_idx_p, end_idx_p]
all_merged_progs.append(temp)
i = end_idx_p
def load_clevr_scenes(scenes_json):
with open(scenes_json) as f:
scenes_raw = json.load(f)
if type(scenes_raw) == dict:
scenes_raw = scenes_raw["scenes"]
scenes = []
for s in scenes_raw:
table = []
for i, o in enumerate(s['objects']):
item = {}
item['id'] = '%d-%d' % (s['image_index'], i)
if '3d_coords' in o:
item['position'] = [np.dot(o['3d_coords'], s['directions']['right']),
np.dot(o['3d_coords'], s['directions']['front']),
o['3d_coords'][2]]
else:
item['position'] = o['position']
item['color'] = o['color']
item['material'] = o['material']
item['shape'] = o['shape']
item['size'] = o['size']
table.append(item)
scenes.append(table)
return scenes
def load_minecraft_scenes(scenes_json):
with open(scenes_json) as f:
scenes_raw = json.load(f)
if type(scenes_raw) == dict:
scenes_raw = scenes_raw["scenes"]
scenes = []
for s in scenes_raw:
table = []
for i, o in enumerate(s['objects']):
item = {}
item['id'] = '%d-%d' % (s['image_index'], i)
if '3d_coords' in o:
item['position'] = [np.dot(o['3d_coords'], s['directions']['right']),
np.dot(o['3d_coords'], s['directions']['front']),
o['3d_coords'][2]]
else:
item['position'] = o['position']
item['nature'] = o['nature']
item['class'] = o['class']
item['direction'] = "facing_"
if o['direction'] == "front":
item['direction'] += "forward"
elif o['direction'] == "back":
item['direction'] += "backward"
elif o['direction'] == "right":
item['direction'] += "right"
elif o['direction'] == "left":
item['direction'] += "left"
table.append(item)
scenes.append(table)
return scenes