VDGR/ensemble.py

import os
import os.path as osp
import numpy as np
import json
import argparse
import pyhocon
import glog as log
import torch
from tqdm import tqdm

from utils.data_utils import load_pickle_lines
from utils.visdial_metrics import scores_to_ranks


parser = argparse.ArgumentParser(description='Ensemble for VisDial')
parser.add_argument('--exp', type=str, default='test',
                    help='experiment name from .conf')
parser.add_argument('--mode', type=str, default='predict', choices=['eval', 'predict'],
                    help='eval or predict')
parser.add_argument('--ssh', action='store_true',
                    help='whether or not we are executing command via ssh. '
                         'If set to True, we will not log.info anything to screen and only redirect them to log file')


if __name__ == '__main__':
    args = parser.parse_args()

    # initialization
    config = pyhocon.ConfigFactory.parse_file(f"config/ensemble.conf")[args.exp]
    config["log_dir"] = os.path.join(config["log_dir"], args.exp)
    if not os.path.exists(config["log_dir"]):
        os.makedirs(config["log_dir"])

    # set logs
    log_file = os.path.join(config["log_dir"], f'{args.mode}.log')
    set_log_file(log_file, file_only=args.ssh)

    # print environment info
    log.info(f"Running experiment: {args.exp}")
    log.info(f"Results saved to {config['log_dir']}")
    log.info(pyhocon.HOCONConverter.convert(config, "hocon"))

    if isinstance(config['processed'], list):
        assert len(config['models']) == len(config['processed'])
        processed = {model:pcd for model, pcd in zip(config['models'], config['processed'])}
    else:
        processed = {model: config['processed'] for model in config['models']}

    if config['split'] == 'test' and np.any(config['processed']):
        test_data = json.load(open(config['visdial_test_data']))['data']['dialogs']
        imid2rndid = {t['image_id']: len(t['dialog']) for t in test_data}
        del test_data

    # load predictions files
    visdial_outputs = dict()
    if args.mode == 'eval':
        metrics = {}
    for model in config['models']:
        pred_filename = osp.join(config['pred_dir'], model, 'visdial_prediction.pkl')
        pred_dict = {p['image_id']: p for p in load_pickle_lines(pred_filename)}
        log.info(f'Loading {len(pred_dict)} predictions from {pred_filename}')
        visdial_outputs[model] = pred_dict
        if args.mode == 'eval':
            assert len(visdial_outputs[model]) >= num_dialogs
            metric = json.load(open(osp.join(config['pred_dir'], model, "metrics_epoch_best.json")))
            metrics[model] = metric['val']

    image_ids = visdial_outputs[model].keys()
    predictions = []

    # for each dialog
    for image_id in tqdm(image_ids):
        scores = []
        round_id = None

        for model in config['models']:
            pred = visdial_outputs[model][image_id]

            if config['split'] == 'test' and processed[model]:
                # if predict on processed data, the first few rounds are deleted from some dialogs
                # so the original round ids can only be found in the original test data
                round_id_in_pred = imid2rndid[image_id]
            else:
                round_id_in_pred = pred['gt_relevance_round_id']

            if not isinstance(round_id_in_pred, int):
                round_id_in_pred = int(round_id_in_pred)
            if round_id is None:
                round_id = round_id_in_pred
            else:
                # make sure all models have the same round_id
                assert round_id == round_id_in_pred
            scores.append(torch.from_numpy(pred['nsp_probs']).unsqueeze(0))

        # ensemble scores
        scores = torch.cat(scores, 0) # [n_model, num_rounds, num_options]
        scores = torch.sum(scores, dim=0, keepdim=True) # [1, num_rounds, num_options]


        if scores.size(0) > 1:
            scores = scores[round_id - 1].unsqueeze(0)
        ranks = scores_to_ranks(scores) # [eval_batch_size, num_rounds, num_options]
        ranks = ranks.squeeze(1)
        prediction = {
            "image_id": image_id,
            "round_id": round_id,
            "ranks": ranks[0].tolist()
        }
        predictions.append(prediction)

    filename = osp.join(config['log_dir'], f'{config["split"]}_ensemble_preds.json')
    with open(filename, 'w') as f:
        json.dump(predictions, f)
    log.info(f'{len(predictions)} predictions saved to {filename}')