Code release

This commit is contained in:
Adnen Abdessaied 2023-10-25 15:38:09 +02:00
commit 09fb25e339
29 changed files with 7162 additions and 0 deletions

View file

@ -0,0 +1,40 @@
{
"attention_probs_dropout_prob": 0.1,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"max_position_embeddings": 512,
"num_attention_heads": 12,
"num_hidden_layers": 12,
"type_vocab_size": 2,
"vocab_size": 30522,
"v_feature_size": 2048,
"v_target_size": 1601,
"v_hidden_size": 1024,
"v_num_hidden_layers": 6,
"v_num_attention_heads": 8,
"v_intermediate_size": 1024,
"bi_hidden_size": 1024,
"bi_num_attention_heads": 8,
"bi_intermediate_size": 1024,
"bi_attention_type": 1,
"v_attention_probs_dropout_prob": 0.1,
"v_hidden_act": "gelu",
"v_hidden_dropout_prob": 0.1,
"v_initializer_range": 0.02,
"pooling_method": "mul",
"v_biattention_id": [0, 1, 2, 3, 4, 5],
"t_biattention_id": [6, 7, 8, 9, 10, 11],
"gnn_act": "gelu",
"num_v_gnn_layers": 2,
"num_q_gnn_layers": 2,
"num_h_gnn_layers": 2,
"num_gnn_attention_heads": 4,
"gnn_dropout_prob": 0.1,
"v_gnn_edge_dim": 12,
"q_gnn_edge_dim": 48,
"v_gnn_ids": [0, 1, 2, 3, 4, 5],
"t_gnn_ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}

33
config/ensemble.conf Normal file
View file

@ -0,0 +1,33 @@
test = {
split = test
skip_mrr_eval = true
# data
visdial_test_data = data/visdial_1.0_test.json
# directory
log_dir = logs/vdgr_ensemble
pred_dir = logs/vdgr
visdial_output_dir = visdial_output
processed = [
false,
false,
false,
false,
false,
false,
false,
false
]
models = [
"P2_K1_v1.0_CE",
"P2_K2_v1.0_CE",
"P2_K3_v1.0_CE",
"P2_K4_v1.0_CE",
"P2_K1_v1.0_LISTNET",
"P2_K2_v1.0_LISTNET",
"P2_K3_v1.0_LISTNET",
"P2_K4_v1.0_LISTNET",
]
}

188
config/vdgr.conf Normal file
View file

@ -0,0 +1,188 @@
# Phase 1
P1 {
use_cpu = false
visdial_version = 1.0
train_on_dense = false
metrics_to_maximize = mrr
# visdial data
visdial_image_feats = data/visdial_img_feat.lmdb
visdial_image_adj_matrices = data/img_adj_matrices
visdial_question_adj_matrices = data/question_adj_matrices
visdial_history_adj_matrices = data/history_adj_matrices
visdial_train = data/visdial_1.0_train.json
visdial_val = data/visdial_1.0_val.json
visdial_test = data/visdial_1.0_test.json
visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json
visdial_train_09 = data/visdial_0.9_train.json
visdial_val_09 = data/visdial_0.9_val.json
visdial_test_09 = data/visdial_0.9_test.json
visdialconv_val = data/visdial_conv.json
visdialconv_val_dense_annotations = data/visdialconv_dense_annotations.json
visdialvispro_val = data/vispro.json
visdialvispro_val_dense_annotations = data/vispro_dense_annotations.json
visdial_question_parse_vocab = data/parse_vocab.pkl
# init
start_path = ckpt/vdgr_visdial_v1.0_after_warmup_K2.ckpt
model_config = config/bert_base_6layer_6conect.json
# visdial training
freeze_vilbert = false
visdial_tot_rounds = 11
num_negative_samples = 1
sequences_per_image = 2
batch_size = 8
lm_loss_coeff = 1
nsp_loss_coeff = 1
img_loss_coeff = 1
batch_multiply = 1
use_trainval = false
dense_loss = ce
dense_loss_coeff = 0
dataloader_text_only = false
rlv_hst_only = false
rlv_hst_dense_round = false
# visdial model
mask_prob = 0.1
image_mask_prob = 0.1
max_seq_len = 256
num_options = 100
num_options_dense = 100
use_embedding = joint
# visdial evaluation
eval_visdial_on_test = true
eval_batch_size = 1
eval_line_batch_size = 200
skip_mrr_eval = false
skip_ndcg_eval = false
skip_visdial_eval = false
eval_visdial_every = 1
eval_dataset = visdial # visdial_vispro # choices = [visdial, visdial_conv, visdial_vispro ]
continue_evaluation = false
eval_at_start = false
eval_before_training = false
initializer = normal
bert_cased = false
# restore ckpt
loads_best_ckpt = false
loads_ckpt = false
restarts = false
resets_max_metric = false
uses_new_optimizer = false
sets_new_lr = false
loads_start_path = false
# logging
random_seed = 42
next_logging_pct = 1.0
next_evaluating_pct = 50.0
max_ckpt_to_keep = 1
num_epochs = 20
early_stop_epoch = 5
skip_saving_ckpt = false
dp_type = apex
stack_gr_data = false
master_port = 5122
stop_epochs = -1
train_each_round = false
drop_last_answer = false
num_samples = -1
# predicting
predict_split = test
predict_each_round = false
predict_dense_round = false
num_test_dialogs = 8000
num_val_dialogs = 2064
save_score = false
# optimizer
reset_optim = none
learning_rate_bert = 5e-6
learning_rate_gnn = 2e-4
gnn_weight_decay = 0.01
use_diff_lr_gnn = true
min_lr = 0
decay_method_bert = linear
decay_method_gnn = linear
decay_exp = 2
max_grad_norm = 1.0
task_optimizer = adam
warmup_ratio = 0.1
# directory
log_dir = logs/vdgr
data_dir = data
visdial_output_dir = visdial_output
bert_cache_dir = transformers
# keep track of other hparams in bert json
v_gnn_edge_dim = 12 # 11 classes + hub_node
q_gnn_edge_dim = 48 # 47 classes + hub_node
num_v_gnn_layers = 2
num_q_gnn_layers = 2
num_h_gnn_layers = 2
num_gnn_attention_heads = 4
v_gnn_ids = [0, 1, 2, 3, 4, 5]
t_gnn_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
}
# Phase 2
P2_CE = ${P1} {
# basic
train_on_dense = true
use_trainval = true
metrics_to_maximize = ndcg
visdial_train_dense = data/visdial_1.0_train_dense.json
visdial_train_dense_annotations = data/visdial_1.0_train_dense_annotations.json
visdial_val_dense = data/visdial_1.0_val.json
tr_graph_idx_mapping = data/tr_dense_mapping.json
val_graph_idx_mapping = data/val_dense_mapping.json
test_graph_idx_mapping = data/test_dense_mapping.json
visdial_val = data/visdial_1.0_val.json
visdial_val_dense_annotations = data/visdial_1.0_val_dense_annotations.json
# data
start_path = logs/vdgr/P1_K2_v1.0/epoch_best.ckpt
rlv_hst_only = false
# visdial training
nsp_loss_coeff = 0
dense_loss_coeff = 1
batch_multiply = 10
batch_size = 1
# visdial model
num_options_dense = 100
# visdial evaluation
eval_batch_size = 1
eval_line_batch_size = 100
skip_mrr_eval = true
# training
stop_epochs = 3
dp_type = dp
dense_loss = ce
# optimizer
learning_rate_bert = 1e-4
}
P2_LISTNET = ${P2_CE} {
dense_loss = listnet
}