VDGR/config/vdgr.conf

189 lines
4.5 KiB
Text
Raw Normal View History

2023-10-25 15:38:09 +02:00
# Phase 1
P1 {
use_cpu = false
visdial_version = 1.0
train_on_dense = false
metrics_to_maximize = mrr
# visdial data
visdial_image_feats = data/visdial_img_feat.lmdb
visdial_image_adj_matrices = data/img_adj_matrices
visdial_question_adj_matrices = data/question_adj_matrices
visdial_history_adj_matrices = data/history_adj_matrices
visdial_train = data/visdial_1.0_train.json
visdial_val = data/visdial_1.0_val.json
visdial_test = data/visdial_1.0_test.json
visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json
visdial_train_09 = data/visdial_0.9_train.json
visdial_val_09 = data/visdial_0.9_val.json
visdial_test_09 = data/visdial_0.9_test.json
visdialconv_val = data/visdial_conv.json
visdialconv_val_dense_annotations = data/visdialconv_dense_annotations.json
visdialvispro_val = data/vispro.json
visdialvispro_val_dense_annotations = data/vispro_dense_annotations.json
visdial_question_parse_vocab = data/parse_vocab.pkl
# init
start_path = ckpt/vdgr_visdial_v1.0_after_warmup_K2.ckpt
model_config = config/bert_base_6layer_6conect.json
# visdial training
freeze_vilbert = false
visdial_tot_rounds = 11
num_negative_samples = 1
sequences_per_image = 2
batch_size = 8
lm_loss_coeff = 1
nsp_loss_coeff = 1
img_loss_coeff = 1
batch_multiply = 1
use_trainval = false
dense_loss = ce
dense_loss_coeff = 0
dataloader_text_only = false
rlv_hst_only = false
rlv_hst_dense_round = false
# visdial model
mask_prob = 0.1
image_mask_prob = 0.1
max_seq_len = 256
num_options = 100
num_options_dense = 100
use_embedding = joint
# visdial evaluation
eval_visdial_on_test = true
eval_batch_size = 1
eval_line_batch_size = 200
skip_mrr_eval = false
skip_ndcg_eval = false
skip_visdial_eval = false
eval_visdial_every = 1
eval_dataset = visdial # visdial_vispro # choices = [visdial, visdial_conv, visdial_vispro ]
continue_evaluation = false
eval_at_start = false
eval_before_training = false
initializer = normal
bert_cased = false
# restore ckpt
loads_best_ckpt = false
loads_ckpt = false
restarts = false
resets_max_metric = false
uses_new_optimizer = false
sets_new_lr = false
loads_start_path = false
# logging
random_seed = 42
next_logging_pct = 1.0
next_evaluating_pct = 50.0
max_ckpt_to_keep = 1
num_epochs = 20
early_stop_epoch = 5
skip_saving_ckpt = false
dp_type = apex
stack_gr_data = false
master_port = 5122
stop_epochs = -1
train_each_round = false
drop_last_answer = false
num_samples = -1
# predicting
predict_split = test
predict_each_round = false
predict_dense_round = false
num_test_dialogs = 8000
num_val_dialogs = 2064
save_score = false
# optimizer
reset_optim = none
learning_rate_bert = 5e-6
learning_rate_gnn = 2e-4
gnn_weight_decay = 0.01
use_diff_lr_gnn = true
min_lr = 0
decay_method_bert = linear
decay_method_gnn = linear
decay_exp = 2
max_grad_norm = 1.0
task_optimizer = adam
warmup_ratio = 0.1
# directory
log_dir = logs/vdgr
data_dir = data
visdial_output_dir = visdial_output
bert_cache_dir = transformers
# keep track of other hparams in bert json
v_gnn_edge_dim = 12 # 11 classes + hub_node
q_gnn_edge_dim = 48 # 47 classes + hub_node
num_v_gnn_layers = 2
num_q_gnn_layers = 2
num_h_gnn_layers = 2
num_gnn_attention_heads = 4
v_gnn_ids = [0, 1, 2, 3, 4, 5]
t_gnn_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}
# Phase 2
P2_CE = ${P1} {
# basic
train_on_dense = true
use_trainval = true
metrics_to_maximize = ndcg
visdial_train_dense = data/visdial_1.0_train_dense.json
visdial_train_dense_annotations = data/visdial_1.0_train_dense_annotations.json
visdial_val_dense = data/visdial_1.0_val.json
tr_graph_idx_mapping = data/tr_dense_mapping.json
val_graph_idx_mapping = data/val_dense_mapping.json
test_graph_idx_mapping = data/test_dense_mapping.json
visdial_val = data/visdial_1.0_val.json
visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json
# data
start_path = logs/vdgr/P1_K2_v1.0/epoch_best.ckpt
rlv_hst_only = false
# visdial training
nsp_loss_coeff = 0
dense_loss_coeff = 1
batch_multiply = 10
batch_size = 1
# visdial model
num_options_dense = 100
# visdial evaluation
eval_batch_size = 1
eval_line_batch_size = 100
skip_mrr_eval = true
# training
stop_epochs = 3
dp_type = dp
dense_loss = ce
# optimizer
learning_rate_bert = 1e-4
}
P2_LISTNET = ${P2_CE} {
dense_loss = listnet
}