Code release
This commit is contained in:
commit
09fb25e339
29 changed files with 7162 additions and 0 deletions
40
config/bert_base_6layer_6conect.json
Normal file
40
config/bert_base_6layer_6conect.json
Normal file
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"attention_probs_dropout_prob": 0.1,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"hidden_size": 768,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"max_position_embeddings": 512,
|
||||
"num_attention_heads": 12,
|
||||
"num_hidden_layers": 12,
|
||||
"type_vocab_size": 2,
|
||||
"vocab_size": 30522,
|
||||
"v_feature_size": 2048,
|
||||
"v_target_size": 1601,
|
||||
"v_hidden_size": 1024,
|
||||
"v_num_hidden_layers": 6,
|
||||
"v_num_attention_heads": 8,
|
||||
"v_intermediate_size": 1024,
|
||||
"bi_hidden_size": 1024,
|
||||
"bi_num_attention_heads": 8,
|
||||
"bi_intermediate_size": 1024,
|
||||
"bi_attention_type": 1,
|
||||
"v_attention_probs_dropout_prob": 0.1,
|
||||
"v_hidden_act": "gelu",
|
||||
"v_hidden_dropout_prob": 0.1,
|
||||
"v_initializer_range": 0.02,
|
||||
"pooling_method": "mul",
|
||||
"v_biattention_id": [0, 1, 2, 3, 4, 5],
|
||||
"t_biattention_id": [6, 7, 8, 9, 10, 11],
|
||||
"gnn_act": "gelu",
|
||||
"num_v_gnn_layers": 2,
|
||||
"num_q_gnn_layers": 2,
|
||||
"num_h_gnn_layers": 2,
|
||||
"num_gnn_attention_heads": 4,
|
||||
"gnn_dropout_prob": 0.1,
|
||||
"v_gnn_edge_dim": 12,
|
||||
"q_gnn_edge_dim": 48,
|
||||
"v_gnn_ids": [0, 1, 2, 3, 4, 5],
|
||||
"t_gnn_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
||||
}
|
33
config/ensemble.conf
Normal file
33
config/ensemble.conf
Normal file
|
@ -0,0 +1,33 @@
|
|||
test = {
|
||||
split = test
|
||||
skip_mrr_eval = true
|
||||
# data
|
||||
visdial_test_data = data/visdial_1.0_test.json
|
||||
|
||||
# directory
|
||||
log_dir = logs/vdgr_ensemble
|
||||
pred_dir = logs/vdgr
|
||||
visdial_output_dir = visdial_output
|
||||
|
||||
processed = [
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false
|
||||
]
|
||||
|
||||
models = [
|
||||
"P2_K1_v1.0_CE",
|
||||
"P2_K2_v1.0_CE",
|
||||
"P2_K3_v1.0_CE",
|
||||
"P2_K4_v1.0_CE",
|
||||
"P2_K1_v1.0_LISTNET",
|
||||
"P2_K2_v1.0_LISTNET",
|
||||
"P2_K3_v1.0_LISTNET",
|
||||
"P2_K4_v1.0_LISTNET",
|
||||
]
|
||||
}
|
188
config/vdgr.conf
Normal file
188
config/vdgr.conf
Normal file
|
@ -0,0 +1,188 @@
|
|||
# Phase 1
|
||||
P1 {
|
||||
use_cpu = false
|
||||
visdial_version = 1.0
|
||||
train_on_dense = false
|
||||
metrics_to_maximize = mrr
|
||||
|
||||
# visdial data
|
||||
visdial_image_feats = data/visdial_img_feat.lmdb
|
||||
|
||||
visdial_image_adj_matrices = data/img_adj_matrices
|
||||
visdial_question_adj_matrices = data/question_adj_matrices
|
||||
visdial_history_adj_matrices = data/history_adj_matrices
|
||||
|
||||
visdial_train = data/visdial_1.0_train.json
|
||||
visdial_val = data/visdial_1.0_val.json
|
||||
visdial_test = data/visdial_1.0_test.json
|
||||
visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json
|
||||
|
||||
visdial_train_09 = data/visdial_0.9_train.json
|
||||
visdial_val_09 = data/visdial_0.9_val.json
|
||||
visdial_test_09 = data/visdial_0.9_test.json
|
||||
|
||||
visdialconv_val = data/visdial_conv.json
|
||||
visdialconv_val_dense_annotations = data/visdialconv_dense_annotations.json
|
||||
|
||||
visdialvispro_val = data/vispro.json
|
||||
visdialvispro_val_dense_annotations = data/vispro_dense_annotations.json
|
||||
|
||||
visdial_question_parse_vocab = data/parse_vocab.pkl
|
||||
|
||||
# init
|
||||
start_path = ckpt/vdgr_visdial_v1.0_after_warmup_K2.ckpt
|
||||
model_config = config/bert_base_6layer_6conect.json
|
||||
|
||||
# visdial training
|
||||
freeze_vilbert = false
|
||||
visdial_tot_rounds = 11
|
||||
num_negative_samples = 1
|
||||
sequences_per_image = 2
|
||||
batch_size = 8
|
||||
lm_loss_coeff = 1
|
||||
nsp_loss_coeff = 1
|
||||
img_loss_coeff = 1
|
||||
batch_multiply = 1
|
||||
use_trainval = false
|
||||
dense_loss = ce
|
||||
dense_loss_coeff = 0
|
||||
dataloader_text_only = false
|
||||
rlv_hst_only = false
|
||||
rlv_hst_dense_round = false
|
||||
|
||||
# visdial model
|
||||
mask_prob = 0.1
|
||||
image_mask_prob = 0.1
|
||||
max_seq_len = 256
|
||||
num_options = 100
|
||||
num_options_dense = 100
|
||||
use_embedding = joint
|
||||
|
||||
# visdial evaluation
|
||||
eval_visdial_on_test = true
|
||||
eval_batch_size = 1
|
||||
eval_line_batch_size = 200
|
||||
skip_mrr_eval = false
|
||||
skip_ndcg_eval = false
|
||||
skip_visdial_eval = false
|
||||
eval_visdial_every = 1
|
||||
eval_dataset = visdial # visdial_vispro # choices = [visdial, visdial_conv, visdial_vispro ]
|
||||
|
||||
continue_evaluation = false
|
||||
eval_at_start = false
|
||||
eval_before_training = false
|
||||
initializer = normal
|
||||
bert_cased = false
|
||||
|
||||
# restore ckpt
|
||||
loads_best_ckpt = false
|
||||
loads_ckpt = false
|
||||
restarts = false
|
||||
resets_max_metric = false
|
||||
uses_new_optimizer = false
|
||||
sets_new_lr = false
|
||||
loads_start_path = false
|
||||
|
||||
# logging
|
||||
random_seed = 42
|
||||
next_logging_pct = 1.0
|
||||
next_evaluating_pct = 50.0
|
||||
max_ckpt_to_keep = 1
|
||||
num_epochs = 20
|
||||
early_stop_epoch = 5
|
||||
skip_saving_ckpt = false
|
||||
dp_type = apex
|
||||
stack_gr_data = false
|
||||
master_port = 5122
|
||||
stop_epochs = -1
|
||||
train_each_round = false
|
||||
drop_last_answer = false
|
||||
num_samples = -1
|
||||
|
||||
# predicting
|
||||
predict_split = test
|
||||
predict_each_round = false
|
||||
predict_dense_round = false
|
||||
num_test_dialogs = 8000
|
||||
num_val_dialogs = 2064
|
||||
save_score = false
|
||||
|
||||
# optimizer
|
||||
reset_optim = none
|
||||
learning_rate_bert = 5e-6
|
||||
learning_rate_gnn = 2e-4
|
||||
gnn_weight_decay = 0.01
|
||||
use_diff_lr_gnn = true
|
||||
min_lr = 0
|
||||
decay_method_bert = linear
|
||||
decay_method_gnn = linear
|
||||
decay_exp = 2
|
||||
max_grad_norm = 1.0
|
||||
task_optimizer = adam
|
||||
warmup_ratio = 0.1
|
||||
|
||||
# directory
|
||||
log_dir = logs/vdgr
|
||||
data_dir = data
|
||||
visdial_output_dir = visdial_output
|
||||
bert_cache_dir = transformers
|
||||
|
||||
# keep track of other hparams in bert json
|
||||
v_gnn_edge_dim = 12 # 11 classes + hub_node
|
||||
q_gnn_edge_dim = 48 # 47 classes + hub_node
|
||||
num_v_gnn_layers = 2
|
||||
num_q_gnn_layers = 2
|
||||
num_h_gnn_layers = 2
|
||||
num_gnn_attention_heads = 4
|
||||
v_gnn_ids = [0, 1, 2, 3, 4, 5]
|
||||
t_gnn_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
||||
}
|
||||
|
||||
# Phase 2
|
||||
P2_CE = ${P1} {
|
||||
# basic
|
||||
train_on_dense = true
|
||||
use_trainval = true
|
||||
metrics_to_maximize = ndcg
|
||||
|
||||
visdial_train_dense = data/visdial_1.0_train_dense.json
|
||||
visdial_train_dense_annotations = data/visdial_1.0_train_dense_annotations.json
|
||||
visdial_val_dense = data/visdial_1.0_val.json
|
||||
|
||||
tr_graph_idx_mapping = data/tr_dense_mapping.json
|
||||
val_graph_idx_mapping = data/val_dense_mapping.json
|
||||
test_graph_idx_mapping = data/test_dense_mapping.json
|
||||
|
||||
visdial_val = data/visdial_1.0_val.json
|
||||
visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json
|
||||
|
||||
# data
|
||||
start_path = logs/vdgr/P1_K2_v1.0/epoch_best.ckpt
|
||||
rlv_hst_only = false
|
||||
|
||||
# visdial training
|
||||
nsp_loss_coeff = 0
|
||||
dense_loss_coeff = 1
|
||||
batch_multiply = 10
|
||||
batch_size = 1
|
||||
|
||||
# visdial model
|
||||
num_options_dense = 100
|
||||
|
||||
# visdial evaluation
|
||||
eval_batch_size = 1
|
||||
eval_line_batch_size = 100
|
||||
skip_mrr_eval = true
|
||||
|
||||
# training
|
||||
stop_epochs = 3
|
||||
dp_type = dp
|
||||
dense_loss = ce
|
||||
|
||||
# optimizer
|
||||
learning_rate_bert = 1e-4
|
||||
}
|
||||
|
||||
P2_LISTNET = ${P2_CE} {
|
||||
dense_loss = listnet
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue