# Phase 1 P1 { use_cpu = false visdial_version = 1.0 train_on_dense = false metrics_to_maximize = mrr # visdial data visdial_image_feats = data/visdial_img_feat.lmdb visdial_image_adj_matrices = data/img_adj_matrices visdial_question_adj_matrices = data/question_adj_matrices visdial_history_adj_matrices = data/history_adj_matrices visdial_train = data/visdial_1.0_train.json visdial_val = data/visdial_1.0_val.json visdial_test = data/visdial_1.0_test.json visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json visdial_train_09 = data/visdial_0.9_train.json visdial_val_09 = data/visdial_0.9_val.json visdial_test_09 = data/visdial_0.9_test.json visdialconv_val = data/visdial_conv.json visdialconv_val_dense_annotations = data/visdialconv_dense_annotations.json visdialvispro_val = data/vispro.json visdialvispro_val_dense_annotations = data/vispro_dense_annotations.json visdial_question_parse_vocab = data/parse_vocab.pkl # init start_path = ckpt/vdgr_visdial_v1.0_after_warmup_K2.ckpt model_config = config/bert_base_6layer_6conect.json # visdial training freeze_vilbert = false visdial_tot_rounds = 11 num_negative_samples = 1 sequences_per_image = 2 batch_size = 8 lm_loss_coeff = 1 nsp_loss_coeff = 1 img_loss_coeff = 1 batch_multiply = 1 use_trainval = false dense_loss = ce dense_loss_coeff = 0 dataloader_text_only = false rlv_hst_only = false rlv_hst_dense_round = false # visdial model mask_prob = 0.1 image_mask_prob = 0.1 max_seq_len = 256 num_options = 100 num_options_dense = 100 use_embedding = joint # visdial evaluation eval_visdial_on_test = true eval_batch_size = 1 eval_line_batch_size = 200 skip_mrr_eval = false skip_ndcg_eval = false skip_visdial_eval = false eval_visdial_every = 1 eval_dataset = visdial # visdial_vispro # choices = [visdial, visdial_conv, visdial_vispro ] continue_evaluation = false eval_at_start = false eval_before_training = false initializer = normal bert_cased = false # restore ckpt loads_best_ckpt = false loads_ckpt = false restarts = false resets_max_metric = false uses_new_optimizer = false sets_new_lr = false loads_start_path = false # logging random_seed = 42 next_logging_pct = 1.0 next_evaluating_pct = 50.0 max_ckpt_to_keep = 1 num_epochs = 20 early_stop_epoch = 5 skip_saving_ckpt = false dp_type = apex stack_gr_data = false master_port = 5122 stop_epochs = -1 train_each_round = false drop_last_answer = false num_samples = -1 # predicting predict_split = test predict_each_round = false predict_dense_round = false num_test_dialogs = 8000 num_val_dialogs = 2064 save_score = false # optimizer reset_optim = none learning_rate_bert = 5e-6 learning_rate_gnn = 2e-4 gnn_weight_decay = 0.01 use_diff_lr_gnn = true min_lr = 0 decay_method_bert = linear decay_method_gnn = linear decay_exp = 2 max_grad_norm = 1.0 task_optimizer = adam warmup_ratio = 0.1 # directory log_dir = logs/vdgr data_dir = data visdial_output_dir = visdial_output bert_cache_dir = transformers # keep track of other hparams in bert json v_gnn_edge_dim = 12 # 11 classes + hub_node q_gnn_edge_dim = 48 # 47 classes + hub_node num_v_gnn_layers = 2 num_q_gnn_layers = 2 num_h_gnn_layers = 2 num_gnn_attention_heads = 4 v_gnn_ids = [0, 1, 2, 3, 4, 5] t_gnn_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] } # Phase 2 P2_CE = ${P1} { # basic train_on_dense = true use_trainval = true metrics_to_maximize = ndcg visdial_train_dense = data/visdial_1.0_train_dense.json visdial_train_dense_annotations = data/visdial_1.0_train_dense_annotations.json visdial_val_dense = data/visdial_1.0_val.json tr_graph_idx_mapping = data/tr_dense_mapping.json val_graph_idx_mapping = data/val_dense_mapping.json test_graph_idx_mapping = data/test_dense_mapping.json visdial_val = data/visdial_1.0_val.json visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json # data start_path = logs/vdgr/P1_K2_v1.0/epoch_best.ckpt rlv_hst_only = false # visdial training nsp_loss_coeff = 0 dense_loss_coeff = 1 batch_multiply = 10 batch_size = 1 # visdial model num_options_dense = 100 # visdial evaluation eval_batch_size = 1 eval_line_batch_size = 100 skip_mrr_eval = true # training stop_epochs = 3 dp_type = dp dense_loss = ce # optimizer learning_rate_bert = 1e-4 } P2_LISTNET = ${P2_CE} { dense_loss = listnet }