912 lines
38 KiB
Python
912 lines
38 KiB
Python
"""
|
|
author: Adnen Abdessaied
|
|
maintainer: "Adnen Abdessaied"
|
|
website: adnenabdessaied.de
|
|
version: 1.0.1
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json, torch, pickle, copy, time
|
|
import numpy as np
|
|
import torch.nn as nn
|
|
import torch.utils.data as Data
|
|
from tensorboardX import SummaryWriter
|
|
from copy import deepcopy
|
|
from clevrDialog_dataset import ClevrDialogQuestionDataset
|
|
import pickle
|
|
from tqdm import tqdm
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
from executor.symbolic_executor import SymbolicExecutorClevr, SymbolicExecutorMinecraft
|
|
from models import SeqToSeqQ, QuestEncoder_1, QuestEncoder_2, Decoder, CaptionEncoder, SeqToSeqC
|
|
from optim import get_optim, adjust_lr
|
|
from options_caption_parser import Options as OptionsC
|
|
from options_question_parser import Options as OptionsQ
|
|
|
|
|
|
class Execution:
|
|
def __init__(self, optsQ, optsC):
|
|
self.opts = deepcopy(optsQ)
|
|
if self.opts.useCuda > 0 and torch.cuda.is_available():
|
|
self.device = torch.device("cuda:0")
|
|
print("[INFO] Using GPU {} ...".format(torch.cuda.get_device_name(0)))
|
|
else:
|
|
print("[INFO] Using CPU ...")
|
|
self.device = torch.device("cpu")
|
|
|
|
self.loss_fn = torch.nn.NLLLoss().to(self.device)
|
|
|
|
print("[INFO] Loading dataset ...")
|
|
|
|
self.datasetTr = ClevrDialogQuestionDataset(
|
|
self.opts.dataPathTr, self.opts.vocabPath, "train", "All tr data")
|
|
|
|
self.datasetVal = ClevrDialogQuestionDataset(
|
|
self.opts.dataPathVal, self.opts.vocabPath, "val", "All val data", train=False)
|
|
|
|
self.datasetTest = ClevrDialogQuestionDataset(
|
|
self.opts.dataPathTest, self.opts.vocabPath, "test", "All val data", train=False)
|
|
|
|
self.QuestionNet = constructQuestionNet(
|
|
self.opts,
|
|
self.datasetTr.lenVocabText,
|
|
self.datasetTr.lenVocabProg,
|
|
self.datasetTr.maxLenProg,
|
|
)
|
|
|
|
if os.path.isfile(self.opts.captionNetPath):
|
|
self.CaptionNet = constructCaptionNet(
|
|
optsC,
|
|
self.datasetTr.lenVocabText,
|
|
self.datasetTr.lenVocabProg,
|
|
self.datasetTr.maxLenProg
|
|
)
|
|
print('Loading CaptionNet from {}'.format(self.opts.captionNetPath))
|
|
state_dict = torch.load(self.opts.captionNetPath)['state_dict']
|
|
self.CaptionNet.load_state_dict(state_dict)
|
|
self.CaptionNet.to(self.device)
|
|
total_params_cap = sum(p.numel() for p in self.CaptionNet.parameters() if p.requires_grad)
|
|
print("The caption encoder has {} trainable parameters".format(total_params_cap))
|
|
|
|
self.QuestionNet.to(self.device)
|
|
# if os.path.isfile(self.opts.load_checkpoint_path):
|
|
# print('Loading QuestionNet from {}'.format(optsQ.load_checkpoint_path))
|
|
# state_dict = torch.load(self.opts.load_checkpoint_path)['state_dict']
|
|
# self.QuestionNet.load_state_dict(state_dict)
|
|
total_params_quest = sum(p.numel() for p in self.QuestionNet.parameters() if p.requires_grad)
|
|
print("The question encoder has {} trainable parameters".format(total_params_quest))
|
|
|
|
if "minecraft" in self.opts.scenesPath:
|
|
self.symbolicExecutor = SymbolicExecutorMinecraft(self.opts.scenesPath)
|
|
else:
|
|
self.symbolicExecutor = SymbolicExecutorClevr(self.opts.scenesPath)
|
|
|
|
tb_path = os.path.join(self.opts.run_dir, "tb_logdir")
|
|
if not os.path.isdir(tb_path):
|
|
os.makedirs(tb_path)
|
|
|
|
self.ckpt_path = os.path.join(self.opts.run_dir, "ckpt_dir")
|
|
if not os.path.isdir(self.ckpt_path):
|
|
os.makedirs(self.ckpt_path)
|
|
if not os.path.isdir(self.opts.text_log_dir):
|
|
os.makedirs(self.opts.text_log_dir)
|
|
|
|
self.writer = SummaryWriter(tb_path)
|
|
self.iter_val = 0
|
|
|
|
if os.path.isfile(self.opts.dependenciesPath):
|
|
with open(self.opts.dependenciesPath, "rb") as f:
|
|
self.dependencies = pickle.load(f)
|
|
|
|
def train(self):
|
|
self.QuestionNet.train()
|
|
|
|
# Define the multi-gpu training if needed
|
|
if len(self.opts.gpu_ids) > 1:
|
|
self.QuestionNet = nn.DataParallel(self.QuestionNet, device_ids=self.opts.gpu_ids)
|
|
|
|
# Load checkpoint if resume training
|
|
if os.path.isfile(self.opts.load_checkpoint_path):
|
|
print("[INFO] Resume trainig from ckpt {} ...".format(
|
|
self.opts.load_checkpoint_path
|
|
))
|
|
|
|
# Load the network parameters
|
|
ckpt = torch.load(self.opts.load_checkpoint_path)
|
|
print("[INFO] Checkpoint successfully loaded ...")
|
|
self.QuestionNet.load_state_dict(ckpt['state_dict'])
|
|
|
|
# Load the optimizer paramters
|
|
optim = get_optim(self.opts, self.QuestionNet, len(self.datasetTr)) # , ckpt['optim'], lr_base=ckpt['lr_base'])
|
|
# optim._step = int(data_size / self.__C.BATCH_SIZE * self.__C.CKPT_EPOCH)
|
|
optim.optimizer.load_state_dict(ckpt['optimizer'])
|
|
_iter = 0 # ckpt['last_iter']
|
|
epoch = 0 # ckpt['last_epoch']
|
|
|
|
else:
|
|
optim = get_optim(self.opts, self.QuestionNet, len(self.datasetTr))
|
|
_iter = 0
|
|
epoch = 0
|
|
|
|
trainTime = 0
|
|
bestValAcc = float("-inf")
|
|
bestCkp = 0
|
|
# Training loop
|
|
while _iter < self.opts.num_iters:
|
|
|
|
# Learning Rate Decay
|
|
if _iter in self.opts.lr_decay_marks:
|
|
adjust_lr(optim, self.opts.lr_decay_factor)
|
|
|
|
# Define multi-thread dataloader
|
|
dataloader = Data.DataLoader(
|
|
self.datasetTr,
|
|
batch_size=self.opts.batch_size,
|
|
shuffle=self.opts.shuffle_data,
|
|
num_workers=self.opts.num_workers,
|
|
)
|
|
|
|
# Iteration
|
|
time_start = 0
|
|
time_end = 0
|
|
for batch_iter, (quest, hist, prog, questionRound, _) in enumerate(dataloader):
|
|
time_start = time.time()
|
|
if _iter >= self.opts.num_iters:
|
|
break
|
|
quest = quest.to(self.device)
|
|
if self.opts.last_n_rounds < 10:
|
|
last_n_rounds_batch = []
|
|
for i, r in enumerate(questionRound.tolist()):
|
|
startIdx = max(r - self.opts.last_n_rounds, 0)
|
|
endIdx = max(r, self.opts.last_n_rounds)
|
|
if hist.dim() == 3:
|
|
assert endIdx - startIdx == self.opts.last_n_rounds
|
|
histBatch = hist[i, :, :]
|
|
last_n_rounds_batch.append(histBatch[startIdx:endIdx, :])
|
|
elif hist.dim() == 2:
|
|
startIdx *= 20
|
|
endIdx *= 20
|
|
histBatch = hist[i, :]
|
|
temp = histBatch[startIdx:endIdx].cpu()
|
|
if r > self.opts.last_n_rounds:
|
|
last_n_rounds_batch.append(torch.cat([torch.tensor([1]), temp, torch.tensor([2])], 0))
|
|
else:
|
|
last_n_rounds_batch.append(torch.cat([temp, torch.tensor([2, 0])], 0))
|
|
hist = torch.stack(last_n_rounds_batch, dim=0)
|
|
hist = hist.to(self.device)
|
|
prog = prog.to(self.device)
|
|
progTarget = prog.clone()
|
|
optim.zero_grad()
|
|
|
|
predSoftmax, _ = self.QuestionNet(quest, hist, prog[:, :-1])
|
|
loss = self.loss_fn(
|
|
# predSoftmax[:, :-1, :].contiguous().view(-1, predSoftmax.size(2)),
|
|
predSoftmax.contiguous().view(-1, predSoftmax.size(2)),
|
|
progTarget[:, 1:].contiguous().view(-1))
|
|
loss.backward()
|
|
|
|
if _iter % self.opts.validate_every == 0 and _iter > 0:
|
|
valAcc = self.val()
|
|
if valAcc > bestValAcc:
|
|
bestValAcc = valAcc
|
|
bestCkp = _iter
|
|
print("\n[INFO] Checkpointing model @ iter {} with val accuracy {}\n".format(_iter, valAcc))
|
|
state = {
|
|
'state_dict': self.QuestionNet.state_dict(),
|
|
'optimizer': optim.optimizer.state_dict(),
|
|
'lr_base': optim.lr_base,
|
|
'optim': optim.lr_base,
|
|
'last_iter': _iter,
|
|
'last_epoch': epoch,
|
|
}
|
|
# checkpointing
|
|
torch.save(
|
|
state,
|
|
os.path.join(self.ckpt_path, 'ckpt_iter' + str(_iter) + '.pkl')
|
|
)
|
|
|
|
# logging
|
|
self.writer.add_scalar(
|
|
'train/loss',
|
|
loss.cpu().data.numpy(),
|
|
global_step=_iter)
|
|
|
|
self.writer.add_scalar(
|
|
'train/lr',
|
|
optim._rate,
|
|
global_step=_iter)
|
|
if _iter % self.opts.display_every == 0:
|
|
time_end = time.time()
|
|
trainTime += time_end-time_start
|
|
|
|
print("\r[CLEVR-Dialog - %s (%d | %d)][epoch %2d][iter %4d/%4d][runtime %4f] loss: %.4f, lr: %.2e" % (
|
|
self.datasetTr.name,
|
|
batch_iter,
|
|
len(dataloader),
|
|
epoch,
|
|
_iter,
|
|
self.opts.num_iters,
|
|
trainTime,
|
|
loss.cpu().data.numpy(),
|
|
optim._rate,
|
|
), end=' ')
|
|
|
|
optim.step()
|
|
_iter += 1
|
|
|
|
epoch += 1
|
|
print("[INFO] Avg. epoch time: {} s".format(trainTime / epoch))
|
|
print("[INFO] Best model achieved val acc. {} @ iter {}".format(bestValAcc, bestCkp))
|
|
|
|
def val(self):
|
|
self.QuestionNet.eval()
|
|
|
|
total_correct = 0
|
|
total = 0
|
|
|
|
if len(self.opts.gpu_ids) > 1:
|
|
self.QuestionNet = nn.DataParallel(self.QuestionNet, device_ids=self.opts.gpu_ids)
|
|
self.QuestionNet = self.QuestionNet.eval()
|
|
dataloader = Data.DataLoader(
|
|
self.datasetVal,
|
|
batch_size=self.opts.batch_size,
|
|
shuffle=True,
|
|
num_workers=self.opts.num_workers,
|
|
pin_memory=False
|
|
)
|
|
_iterCur = 0
|
|
_totalCur = len(dataloader)
|
|
|
|
for step, (question, questionPrg, questionImgIdx, questionRounds, history, historiesProg, answer) in enumerate(dataloader):
|
|
# print("\rEvaluation: [step %4d/%4d]" % (
|
|
print("\rEvaluation: [step %4d/%4d]" % (
|
|
step,
|
|
int(len(dataloader)),
|
|
), end=' ')
|
|
|
|
question = question.to(self.device)
|
|
|
|
if history.dim() == 3:
|
|
caption = history.detach()
|
|
caption = caption[:, 0, :]
|
|
caption = caption[:, :16].to(self.device)
|
|
elif history.dim() == 2:
|
|
caption = history.detach()
|
|
caption = caption[:, :16].to(self.device)
|
|
if self.opts.last_n_rounds is not None:
|
|
last_n_rounds_batch = []
|
|
for i, r in enumerate(questionRounds.tolist()):
|
|
startIdx = max(r - self.opts.last_n_rounds, 0)
|
|
endIdx = max(r, self.opts.last_n_rounds)
|
|
if history.dim() == 3:
|
|
assert endIdx - startIdx == self.opts.last_n_rounds
|
|
histBatch = history[i, :, :]
|
|
last_n_rounds_batch.append(histBatch[startIdx:endIdx, :])
|
|
elif history.dim() == 2:
|
|
startIdx *= 20
|
|
endIdx *= 20
|
|
histBatch = history[i, :]
|
|
temp = histBatch[startIdx:endIdx]
|
|
if r > self.opts.last_n_rounds:
|
|
last_n_rounds_batch.append(torch.cat([torch.tensor([1]), temp, torch.tensor([2])], 0))
|
|
else:
|
|
last_n_rounds_batch.append(torch.cat([temp, torch.tensor([2, 0])], 0))
|
|
history = torch.stack(last_n_rounds_batch, dim=0)
|
|
history = history.to(self.device)
|
|
questionPrg = questionPrg.to(self.device)
|
|
|
|
questProgsToksPred = self.QuestionNet.sample(question, history)
|
|
questProgsPred = decodeProg(questProgsToksPred, self.datasetVal.vocab["idx_prog_to_token"])
|
|
targetProgs = decodeProg(questionPrg, self.datasetVal.vocab["idx_prog_to_token"], target=True)
|
|
|
|
correct = [1 if pred == gt else 0 for (pred, gt) in zip(questProgsPred, targetProgs)]
|
|
|
|
correct = sum(correct)
|
|
total_correct += correct
|
|
total += len(targetProgs)
|
|
self.QuestionNet.train()
|
|
|
|
return 100.0 * (total_correct / total)
|
|
|
|
# Evaluation
|
|
def eval_with_gt(self):
|
|
# Define the multi-gpu training if needed
|
|
all_pred_answers = []
|
|
all_gt_answers = []
|
|
all_question_types = []
|
|
all_penalties = []
|
|
all_pred_programs = []
|
|
all_gt_programs = []
|
|
|
|
first_failure_round = 0
|
|
total_correct = 0
|
|
total_acc_pen = 0
|
|
total = 0
|
|
total_quest_prog_correct = 0
|
|
|
|
if len(self.opts.gpu_ids) > 1:
|
|
self.QuestionNet = nn.DataParallel(self.QuestionNet, device_ids=self.opts.gpu_ids)
|
|
self.QuestionNet = self.QuestionNet.eval()
|
|
self.CaptionNet = self.CaptionNet.eval()
|
|
if self.opts.batch_size != self.opts.dialogLen:
|
|
print("[INFO] Changed batch size from {} to {}".format(self.opts.batch_size, self.opts.dialogLen))
|
|
self.opts.batch_size = self.opts.dialogLen
|
|
dataloader = Data.DataLoader(
|
|
self.datasetTest,
|
|
batch_size=self.opts.batch_size,
|
|
shuffle=False,
|
|
num_workers=self.opts.num_workers,
|
|
pin_memory=False
|
|
)
|
|
_iterCur = 0
|
|
_totalCur = len(dataloader)
|
|
|
|
for step, (question, questionPrg, questionImgIdx, questionRounds, history, historiesProg, answer) in enumerate(dataloader):
|
|
# print("\rEvaluation: [step %4d/%4d]" % (
|
|
# step + 1,
|
|
# int(data_size / self.opts.batch_size),
|
|
# ), end=' ')
|
|
# if step >= 5000:
|
|
# break
|
|
batchSize = question.size(0)
|
|
question = question.to(self.device)
|
|
# dependecy = self.dependencies[step*batchSize:(step+1)*batchSize]
|
|
|
|
if history.dim() == 3:
|
|
caption = history.detach()
|
|
caption = caption[:, 0, :]
|
|
caption = caption[:, :16].to(self.device)
|
|
elif history.dim() == 2:
|
|
caption = history.detach()
|
|
caption = caption[:, :16].to(self.device)
|
|
if self.opts.last_n_rounds < 10:
|
|
last_n_rounds_batch = []
|
|
for i, r in enumerate(questionRounds.tolist()):
|
|
startIdx = max(r - self.opts.last_n_rounds, 0)
|
|
endIdx = max(r, self.opts.last_n_rounds)
|
|
if history.dim() == 3:
|
|
assert endIdx - startIdx == self.opts.last_n_rounds
|
|
histBatch = history[i, :, :]
|
|
last_n_rounds_batch.append(histBatch[startIdx:endIdx, :])
|
|
elif history.dim() == 2:
|
|
startIdx *= 20
|
|
endIdx *= 20
|
|
histBatch = history[i, :]
|
|
temp = histBatch[startIdx:endIdx]
|
|
if r > self.opts.last_n_rounds:
|
|
last_n_rounds_batch.append(torch.cat([torch.tensor([1]), temp, torch.tensor([2])], 0))
|
|
else:
|
|
last_n_rounds_batch.append(torch.cat([temp, torch.tensor([2, 0])], 0))
|
|
history = torch.stack(last_n_rounds_batch, dim=0)
|
|
|
|
history = history.to(self.device)
|
|
questionPrg = questionPrg.to(self.device)
|
|
historiesProg = historiesProg.tolist()
|
|
questionRounds = questionRounds.tolist()
|
|
answer = answer.tolist()
|
|
answers = list(map(lambda a: self.datasetTest.vocab["idx_text_to_token"][a], answer))
|
|
questionImgIdx = questionImgIdx.tolist()
|
|
# if "minecraft" in self.opts.scenesPath:
|
|
# questionImgIdx = [idx - 1 for idx in questionImgIdx]
|
|
questProgsToksPred = self.QuestionNet.sample(question, history)
|
|
capProgsToksPred = self.CaptionNet.sample(caption)
|
|
|
|
questProgsPred = decodeProg(questProgsToksPred, self.datasetTest.vocab["idx_prog_to_token"])
|
|
capProgsPred = decodeProg(capProgsToksPred, self.datasetTest.vocab["idx_prog_to_token"])
|
|
|
|
targetProgs = decodeProg(questionPrg, self.datasetTest.vocab["idx_prog_to_token"], target=True)
|
|
questionTypes = [targetProg[0] for targetProg in targetProgs]
|
|
# progHistories = getProgHistories(historiesProg[0], dataset.vocab["idx_prog_to_token"])
|
|
progHistories = [getProgHistories(progHistToks, self.datasetTest.vocab["idx_prog_to_token"]) for progHistToks in historiesProg]
|
|
pred_answers = []
|
|
all_pred_programs.append([capProgsPred[0]] + questProgsPred)
|
|
all_gt_programs.append([progHistories[0]] + (targetProgs))
|
|
|
|
for i in range(batchSize):
|
|
# if capProgsPred[i][0] == "extreme-center":
|
|
# print("bla")
|
|
# print("idx = {}".format(questionImgIdx[i]))
|
|
ans = self.getPrediction(
|
|
questProgsPred[i],
|
|
capProgsPred[i],
|
|
progHistories[i],
|
|
questionImgIdx[i]
|
|
)
|
|
# if ans == "Error":
|
|
# print(capProgsPred[i])
|
|
pred_answers.append(ans)
|
|
# print(pred_answers)
|
|
correct = [1 if pred == ans else 0 for (pred, ans) in zip(pred_answers, answers)]
|
|
correct_prog = [1 if pred == ans else 0 for (pred, ans) in zip(questProgsPred, targetProgs)]
|
|
idx_false = np.argwhere(np.array(correct) == 0).squeeze(-1)
|
|
if idx_false.shape[-1] > 0:
|
|
first_failure_round += idx_false[0] + 1
|
|
else:
|
|
first_failure_round += self.opts.dialogLen + 1
|
|
|
|
correct = sum(correct)
|
|
correct_prog = sum(correct_prog)
|
|
total_correct += correct
|
|
total_quest_prog_correct += correct_prog
|
|
total += len(answers)
|
|
all_pred_answers.append(pred_answers)
|
|
all_gt_answers.append(answers)
|
|
all_question_types.append(questionTypes)
|
|
penalty = np.zeros_like(penalty)
|
|
all_penalties.append(penalty)
|
|
_iterCur += 1
|
|
if _iterCur % self.opts.display_every == 0:
|
|
print("[Evaluation] step {0} / {1} | acc. = {2:.2f}".format(
|
|
_iterCur, _totalCur, 100.0 * (total_correct / total)))
|
|
|
|
ffr = 1.0 * (first_failure_round/_totalCur)/(self.opts.dialogLen + 1)
|
|
|
|
textOut = "\n --------------- Average First Failure Round --------------- \n"
|
|
textOut += "{} / {}".format(ffr, self.opts.dialogLen)
|
|
|
|
# print(total_correct, total)
|
|
accuracy = total_correct / total
|
|
vd_acc = total_acc_pen / total
|
|
quest_prog_acc = total_quest_prog_correct / total
|
|
textOut += "\n --------------- Overall acc. --------------- \n"
|
|
textOut += "{}".format(100.0 * accuracy)
|
|
textOut += "\n --------------- Overall VD acc. --------------- \n"
|
|
textOut += "{}".format(100.0 * vd_acc)
|
|
textOut += "\n --------------- Question Prog. Acc --------------- \n"
|
|
textOut += "{}".format(100.0 * quest_prog_acc)
|
|
textOut += get_per_round_acc(
|
|
all_pred_answers, all_gt_answers, all_penalties)
|
|
|
|
textOut += get_per_question_type_acc(
|
|
all_pred_answers, all_gt_answers, all_question_types, all_penalties)
|
|
|
|
# textOut += get_per_dependency_type_acc(
|
|
# all_pred_answers, all_gt_answers, all_penalties)
|
|
|
|
textOut += "\n --------------- Done --------------- \n"
|
|
print(textOut)
|
|
fname = self.opts.questionNetPath.split("/")[-3] + "results_{}_{}.txt".format(self.opts.last_n_rounds, self.opts.dialogLen)
|
|
pred_answers_fname = self.opts.questionNetPath.split("/")[-3] + "_pred_answers_{}_{}.pkl".format(self.opts.last_n_rounds, self.opts.dialogLen)
|
|
pred_answers_fname = os.path.join("/projects/abdessaied/clevr-dialog/output/pred_answers", pred_answers_fname)
|
|
model_name = "NSVD_stack" if "stack" in self.opts.questionNetPath else "NSVD_concat"
|
|
experiment_name = "minecraft"
|
|
# experiment_name += "_{}".format(self.opts.dialogLen)
|
|
prog_output_fname = os.path.join("/projects/abdessaied/clevr-dialog/output/prog_output/{}_{}.pkl".format(model_name, experiment_name))
|
|
|
|
fpath = os.path.join(self.opts.text_log_dir, fname)
|
|
with open(fpath, "w") as f:
|
|
f.writelines(textOut)
|
|
with open(pred_answers_fname, "wb") as f:
|
|
pickle.dump(all_pred_answers, f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
with open(prog_output_fname, "wb") as f:
|
|
pickle.dump((all_gt_programs, all_pred_programs, all_pred_answers), f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
# Evaluation
|
|
def eval_with_pred(self):
|
|
# Define the multi-gpu training if needed
|
|
all_pred_answers = []
|
|
all_gt_answers = []
|
|
all_question_types = []
|
|
all_penalties = []
|
|
|
|
first_failure_round = 0
|
|
total_correct = 0
|
|
total_acc_pen = 0
|
|
total = 0
|
|
|
|
samples = {}
|
|
|
|
if len(self.opts.gpu_ids) > 1:
|
|
self.QuestionNet = nn.DataParallel(self.QuestionNet, device_ids=self.opts.gpu_ids)
|
|
self.QuestionNet = self.QuestionNet.eval()
|
|
self.CaptionNet = self.CaptionNet.eval()
|
|
if self.opts.batch_size != self.opts.dialogLen:
|
|
print("[INFO] Changed batch size from {} to {}".format(self.opts.batch_size, self.opts.dialogLen))
|
|
self.opts.batch_size = self.opts.dialogLen
|
|
dataloader = Data.DataLoader(
|
|
self.datasetTest,
|
|
batch_size=self.opts.batch_size,
|
|
shuffle=False,
|
|
num_workers=self.opts.num_workers,
|
|
pin_memory=False
|
|
)
|
|
_iterCur = 0
|
|
_totalCur = len(dataloader)
|
|
step = 0
|
|
for step, (question, questionPrg, questionImgIdx, questionRounds, history, historiesProg, answer) in enumerate(dataloader):
|
|
question = question.tolist()
|
|
questions = decode(question, self.datasetTest.vocab["idx_text_to_token"], target=True)
|
|
questions = list(map(lambda q: " ".join(q), questions))
|
|
targetProgs = decode(questionPrg, self.datasetTest.vocab["idx_prog_to_token"], target=True)
|
|
|
|
questionTypes = [targetProg[0] for targetProg in targetProgs]
|
|
targetProgs = list(map(lambda q: " ".join(q), targetProgs))
|
|
|
|
historiesProg = historiesProg.tolist()
|
|
progHistories = [getProgHistories(progHistToks, self.datasetTest.vocab["idx_prog_to_token"]) for progHistToks in historiesProg]
|
|
|
|
answer = answer.tolist()
|
|
answers = list(map(lambda a: self.datasetTest.vocab["idx_text_to_token"][a], answer))
|
|
questionImgIdx = questionImgIdx.tolist()
|
|
|
|
if self.opts.encoderType == 2:
|
|
histories_eval = [history[0, 0, :].tolist()]
|
|
caption = history.detach()
|
|
caption = caption[0, 0, :].unsqueeze(0)
|
|
caption = caption[:, :16].to(self.device)
|
|
elif self.opts.encoderType == 1:
|
|
caption = history.detach()
|
|
histories_eval = [history[0, :20].tolist()]
|
|
caption = caption[0, :16].unsqueeze(0).to(self.device)
|
|
cap = decode(caption, self.datasetTest.vocab["idx_text_to_token"], target=False)
|
|
capProgToksPred = self.CaptionNet.sample(caption)
|
|
capProgPred = decode(capProgToksPred, self.datasetTest.vocab["idx_prog_to_token"])[0]
|
|
|
|
pred_answers = []
|
|
pred_quest_prog = []
|
|
for i, (q, prog_hist, img_idx) in enumerate(zip(question, progHistories, questionImgIdx)):
|
|
_round = i + 1
|
|
if _round <= self.opts.last_n_rounds:
|
|
start = 0
|
|
else:
|
|
start = _round - self.opts.last_n_rounds
|
|
end = len(histories_eval)
|
|
|
|
quest = torch.tensor(q).unsqueeze(0).to(self.device)
|
|
if self.opts.encoderType == 3:
|
|
hist = torch.stack([torch.tensor(h) for h in histories_eval[start:end]], dim=0).unsqueeze(0).to(self.device)
|
|
elif self.opts.encoderType == 1:
|
|
histories_eval_copy = deepcopy(histories_eval)
|
|
histories_eval_copy[-1].append(self.datasetTest.vocab["text_token_to_idx"]["<END>"])
|
|
hist = torch.cat([torch.tensor(h) for h in histories_eval_copy[start:end]], dim=-1).unsqueeze(0).to(self.device)
|
|
|
|
questProgsToksPred = self.QuestionNet.sample(quest, hist)
|
|
questProgsPred = decode(questProgsToksPred, self.datasetTest.vocab["idx_prog_to_token"])[0]
|
|
pred_quest_prog.append(" ".join(questProgsPred))
|
|
ans = self.getPrediction(
|
|
questProgsPred,
|
|
capProgPred,
|
|
prog_hist,
|
|
img_idx
|
|
)
|
|
ans_idx = self.datasetTest.vocab["text_token_to_idx"].get(
|
|
ans, self.datasetTest.vocab["text_token_to_idx"]["<UNK>"])
|
|
q[q.index(self.datasetTest.vocab["text_token_to_idx"]["<END>"])] = self.datasetTest.vocab["text_token_to_idx"]["<NULL>"]
|
|
q[-1] = self.datasetTest.vocab["text_token_to_idx"]["<END>"]
|
|
q.insert(-1, ans_idx)
|
|
if self.opts.encoderType == 3:
|
|
histories_eval.append(copy.deepcopy(q))
|
|
elif self.opts.encoderType == 0:
|
|
del q[0]
|
|
del q[-1]
|
|
histories_eval.append(copy.deepcopy(q))
|
|
|
|
pred_answers.append(ans)
|
|
|
|
correct = [1 if pred == ans else 0 for (pred, ans) in zip(pred_answers, answers)]
|
|
idx_false = np.argwhere(np.array(correct) == 0).squeeze(-1)
|
|
if idx_false.shape[-1] > 0:
|
|
first_failure_round += idx_false[0] + 1
|
|
else:
|
|
first_failure_round += self.opts.dialogLen + 1
|
|
|
|
correct = sum(correct)
|
|
total_correct += correct
|
|
total += len(answers)
|
|
all_pred_answers.append(pred_answers)
|
|
all_gt_answers.append(answers)
|
|
all_question_types.append(questionTypes)
|
|
_iterCur += 1
|
|
if _iterCur % self.opts.display_every == 0:
|
|
print("[Evaluation] step {0} / {1} | acc. = {2:.2f}".format(
|
|
_iterCur, _totalCur, 100.0 * (total_correct / total)
|
|
))
|
|
samples["{}_{}".format(questionImgIdx[0], (step % 5) + 1)] = {
|
|
"caption": " ".join(cap[0]),
|
|
"cap_prog_gt": " ".join(progHistories[0][0]),
|
|
"cap_prog_pred": " ".join(capProgPred),
|
|
|
|
"questions": questions,
|
|
"quest_progs_gt": targetProgs,
|
|
"quest_progs_pred": pred_quest_prog,
|
|
|
|
|
|
"answers": answers,
|
|
"preds": pred_answers,
|
|
"acc": correct,
|
|
}
|
|
|
|
|
|
ffr = 1.0 * self.opts.dialogLen * (first_failure_round/total)
|
|
|
|
textOut = "\n --------------- Average First Failure Round --------------- \n"
|
|
textOut += "{} / {}".format(ffr, self.opts.dialogLen)
|
|
|
|
# print(total_correct, total)
|
|
accuracy = total_correct / total
|
|
vd_acc = total_acc_pen / total
|
|
textOut += "\n --------------- Overall acc. --------------- \n"
|
|
textOut += "{}".format(100.0 * accuracy)
|
|
textOut += "\n --------------- Overall VD acc. --------------- \n"
|
|
textOut += "{}".format(100.0 * vd_acc)
|
|
|
|
textOut += get_per_round_acc(
|
|
all_pred_answers, all_gt_answers, all_penalties)
|
|
|
|
textOut += get_per_question_type_acc(
|
|
all_pred_answers, all_gt_answers, all_question_types, all_penalties)
|
|
|
|
textOut += "\n --------------- Done --------------- \n"
|
|
print(textOut)
|
|
if step >= len(dataloader):
|
|
fname = self.opts.questionNetPath.split("/")[-3] + "_results_{}_{}_{}.txt".format(self.opts.last_n_rounds, self.opts.dialogLen, self.acc_type)
|
|
pred_answers_fname = self.opts.questionNetPath.split("/")[-3] + "_pred_answers_{}_{}.pkl".format(self.opts.last_n_rounds, self.opts.dialogLen)
|
|
pred_answers_fname = os.path.join("/projects/abdessaied/clevr-dialog/output/pred_answers", pred_answers_fname)
|
|
|
|
fpath = os.path.join(self.opts.text_log_dir, fname)
|
|
with open(fpath, "w") as f:
|
|
f.writelines(textOut)
|
|
with open(pred_answers_fname, "wb") as f:
|
|
pickle.dump(all_pred_answers, f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
def getPrediction(self, questProgPred, capProgPred, historyProg, imgIndex):
|
|
self.symbolicExecutor.reset(imgIndex)
|
|
# if round one, execute the predicted caption program first then answer the question
|
|
if len(historyProg) == 1:
|
|
captionFuncLabel = capProgPred[0]
|
|
captionFuncArgs = capProgPred[1:]
|
|
|
|
questionFuncLabel = questProgPred[0]
|
|
questionFuncArgs = questProgPred[1:]
|
|
|
|
try:
|
|
_ = self.symbolicExecutor.execute(captionFuncLabel, captionFuncArgs)
|
|
except:
|
|
return "Error"
|
|
|
|
try:
|
|
predAnswer = self.symbolicExecutor.execute(questionFuncLabel, questionFuncArgs)
|
|
except:
|
|
return "Error"
|
|
|
|
# If it is not the first round, we have to execute the program history and
|
|
# then answer the question.
|
|
else:
|
|
questionFuncLabel = questProgPred[0]
|
|
questionFuncArgs = questProgPred[1:]
|
|
for prg in historyProg:
|
|
# prg = prg.split(" ")
|
|
FuncLabel = prg[0]
|
|
FuncArgs = prg[1:]
|
|
try:
|
|
_ = self.symbolicExecutor.execute(FuncLabel, FuncArgs)
|
|
except:
|
|
return "Error"
|
|
|
|
try:
|
|
predAnswer = self.symbolicExecutor.execute(questionFuncLabel, questionFuncArgs)
|
|
except:
|
|
return "Error"
|
|
return str(predAnswer)
|
|
|
|
def run(self, run_mode, epoch=None):
|
|
self.set_seed(self.opts.seed)
|
|
if run_mode == 'train':
|
|
self.train()
|
|
|
|
elif run_mode == 'test_with_gt':
|
|
print('Testing with gt answers in history')
|
|
print('Loading ckpt {}'.format(self.opts.questionNetPath))
|
|
state_dict = torch.load(self.opts.questionNetPath)['state_dict']
|
|
self.QuestionNet.load_state_dict(state_dict)
|
|
self.eval_with_gt()
|
|
|
|
elif run_mode == 'test_with_pred':
|
|
print('Testing with predicted answers in history')
|
|
print('Loading ckpt {}'.format(self.opts.questionNetPath))
|
|
state_dict = torch.load(self.opts.questionNetPath)['state_dict']
|
|
self.QuestionNet.load_state_dict(state_dict)
|
|
self.eval_with_pred()
|
|
else:
|
|
exit(-1)
|
|
|
|
def set_seed(self, seed):
|
|
"""Sets the seed for reproducibility.
|
|
Args:
|
|
seed (int): The seed used
|
|
"""
|
|
torch.manual_seed(seed)
|
|
torch.cuda.manual_seed(seed)
|
|
torch.backends.cudnn.deterministic = True
|
|
torch.backends.cudnn.benchmark = False
|
|
np.random.seed(seed)
|
|
print('[INFO] Seed set to {}...'.format(seed))
|
|
|
|
|
|
def constructQuestionNet(opts, lenVocabText, lenVocabProg, maxLenProg):
|
|
decoder = Decoder(opts, lenVocabProg, maxLenProg)
|
|
if opts.encoderType == 1:
|
|
encoder = QuestEncoder_1(opts, lenVocabText)
|
|
elif opts.encoderType == 2:
|
|
encoder = QuestEncoder_2(opts, lenVocabText)
|
|
|
|
net = SeqToSeqQ(encoder, decoder)
|
|
return net
|
|
|
|
|
|
def constructCaptionNet(opts, lenVocabText, lenVocabProg, maxLenProg):
|
|
decoder = Decoder(opts, lenVocabProg, maxLenProg)
|
|
encoder = CaptionEncoder(opts, lenVocabText)
|
|
net = SeqToSeqC(encoder, decoder)
|
|
return net
|
|
|
|
|
|
def getProgHistories(progHistToks, prgIdxToToken):
|
|
progHist = []
|
|
temp = []
|
|
for tok in progHistToks:
|
|
if tok not in [0, 1, 2]:
|
|
temp.append(prgIdxToToken[tok])
|
|
# del progHistToks[i]
|
|
if tok == 2:
|
|
# del progHistToks[i]
|
|
# progHist.append(" ".join(temp))
|
|
progHist.append(temp)
|
|
temp = []
|
|
return progHist
|
|
|
|
|
|
def getHistoriesFromStack(histToks, textIdxToToken):
|
|
histories = "\n"
|
|
temp = []
|
|
for i, roundToks in enumerate(histToks):
|
|
for tok in roundToks:
|
|
if tok not in [0, 1, 2]:
|
|
temp.append(textIdxToToken[tok])
|
|
# del progHistToks[i]
|
|
if tok == 2:
|
|
# del progHistToks[i]
|
|
if i == 0:
|
|
histories += " ".join(temp) + ".\n"
|
|
else:
|
|
histories += " ".join(temp[:-1]) + "? | {}.\n".format(temp[-1])
|
|
# histories.append(temp)
|
|
temp = []
|
|
break
|
|
return histories
|
|
|
|
|
|
def getHistoriesFromConcat(histToks, textIdxToToken):
|
|
histories = []
|
|
temp = []
|
|
for tok in histToks:
|
|
if tok not in [0, 1, 2]:
|
|
temp.append(textIdxToToken[tok])
|
|
# del progHistToks[i]
|
|
if tok == 2:
|
|
# del progHistToks[i]
|
|
histories.append(" ".join(temp[:-1]) + "? | {}".format(temp[-1]))
|
|
# histories.append(temp)
|
|
temp = []
|
|
return histories
|
|
|
|
|
|
def decodeProg(tokens, prgIdxToToken, target=False):
|
|
tokensBatch = tokens.tolist()
|
|
progsBatch = []
|
|
for tokens in tokensBatch:
|
|
prog = []
|
|
for tok in tokens:
|
|
if tok == 2: # <END> has index 2
|
|
break
|
|
prog.append(prgIdxToToken.get(tok))
|
|
if target:
|
|
prog = prog[1:]
|
|
# progsBatch.append(" ".join(prog))
|
|
progsBatch.append(prog)
|
|
return progsBatch
|
|
|
|
|
|
def printPred(predSoftmax, gts, prgIdxToToken):
|
|
assert predSoftmax.size(0) == gts.size(0)
|
|
tokens = predSoftmax.topk(1)[1].squeeze(-1)
|
|
tokens = tokens.tolist()
|
|
gts = gts.tolist()
|
|
message = "\n ------------------------ \n"
|
|
for token, gt in zip(tokens, gts):
|
|
message += "Prediction: "
|
|
for tok in token:
|
|
message += prgIdxToToken.get(tok) + " "
|
|
message += "\n Target : "
|
|
for tok in gt:
|
|
message += prgIdxToToken.get(tok) + " "
|
|
message += "\n ------------------------ \n"
|
|
return message
|
|
|
|
|
|
def get_per_round_acc(preds, gts, penalties):
|
|
res = {}
|
|
for img_preds, img_gt, img_pen in zip(preds, gts, penalties):
|
|
img_preds = list(img_preds)
|
|
img_gt = list(img_gt)
|
|
img_pen = list(img_pen)
|
|
for i, (pred, gt, pen) in enumerate(zip(img_preds, img_gt, img_pen)):
|
|
_round = str(i + 1)
|
|
if _round not in res:
|
|
res[_round] = {
|
|
"correct": 0,
|
|
"all": 0
|
|
}
|
|
res[_round]["all"] += 1
|
|
if pred == gt:
|
|
res[_round]["correct"] += 0.5**pen
|
|
|
|
textOut = "\n --------------- Per round Acc --------------- \n"
|
|
for k in res:
|
|
textOut += "{}: {} %\n".format(k, 100.0 * (res[k]["correct"]/res[k]["all"]))
|
|
return textOut
|
|
|
|
|
|
def get_per_question_type_acc(preds, gts, qtypes, penalties):
|
|
res1 = {}
|
|
res2 = {}
|
|
|
|
for img_preds, img_gt, img_qtypes, img_pen in zip(preds, gts, qtypes, penalties):
|
|
# img_preds = list(img_preds)
|
|
# img_gt = list(img_gt)
|
|
img_pen = list(img_pen)
|
|
for pred, gt, temp, pen in zip(img_preds, img_gt, img_qtypes, img_pen):
|
|
if temp not in res1:
|
|
res1[temp] = {
|
|
"correct": 0,
|
|
"all": 0
|
|
}
|
|
temp_cat = temp.split("-")[0]
|
|
if temp_cat not in res2:
|
|
res2[temp_cat] = {
|
|
"correct": 0,
|
|
"all": 0
|
|
}
|
|
res1[temp]["all"] += 1
|
|
res2[temp_cat]["all"] += 1
|
|
|
|
if pred == gt:
|
|
res1[temp]["correct"] += 0.5**pen
|
|
res2[temp_cat]["correct"] += 0.5**pen
|
|
|
|
textOut = "\n --------------- Per question Type Acc --------------- \n"
|
|
for k in res1:
|
|
textOut += "{}: {} %\n".format(k, 100.0 * (res1[k]["correct"]/res1[k]["all"]))
|
|
|
|
textOut += "\n --------------- Per question Category Acc --------------- \n"
|
|
for k in res2:
|
|
textOut += "{}: {} %\n".format(k, 100.0 * (res2[k]["correct"]/res2[k]["all"]))
|
|
return textOut
|
|
|
|
|
|
def decode(tokens, prgIdxToToken, target=False):
|
|
if type(tokens) != list:
|
|
tokens = tokens.tolist()
|
|
|
|
progsBatch = []
|
|
for token in tokens:
|
|
prog = []
|
|
for tok in token:
|
|
if tok == 2: # <END> has index 2
|
|
break
|
|
prog.append(prgIdxToToken.get(tok))
|
|
if target:
|
|
prog = prog[1:]
|
|
# progsBatch.append(" ".join(prog))
|
|
progsBatch.append(prog)
|
|
return progsBatch
|
|
|
|
if __name__ == "__main__":
|
|
optsC = OptionsC().parse()
|
|
optsQ = OptionsQ().parse()
|
|
|
|
exe = Execution(optsQ, optsC)
|
|
exe.run("test")
|
|
print("[INFO] Done ...")
|