94 lines
2.9 KiB
Text
94 lines
2.9 KiB
Text
mixer {
|
|
task = avsd
|
|
#################################################################################
|
|
# datasets
|
|
# avsd
|
|
|
|
avsd_processed = features/
|
|
avsd_train = raw_data/train_set4DSTC7-AVSD.json
|
|
avsd_val = raw_data/valid_set4DSTC7-AVSD.json
|
|
avsd_test_dstc7 = raw_data/test_set4DSTC7-AVSD.json
|
|
avsd_test_dstc8 = raw_data/test_set4DSTC8-AVSD.json
|
|
avsd_test_dstc10 = raw_data/test_set4DSTC10-AVSD.json
|
|
avsd_feature_path = features/
|
|
avsd_i3d_rgb = features/i3d_rgb
|
|
avsd_i3d_rgb_test = features/i3d_rgb_testset
|
|
avsd_i3d_flow = features/i3d_flow_all
|
|
avsd_i3d_flow_test = features/i3d_flow_testset
|
|
avsd_audio = features/vggish_all
|
|
avsd_audio_test = features/vggish_testset
|
|
avsd_objects = features/sam
|
|
avsd_objects_test = features/sam_testset
|
|
|
|
dstc = 7
|
|
|
|
# NextQA
|
|
nextqa_root = processed/next_qa/annotations
|
|
nextqa_vid_feat = processed/next_qa/vid_feat
|
|
#################################################################################
|
|
# Model
|
|
bart_size = large # base, large
|
|
avsd_bart_base_config = config/avsd_bart_base.json
|
|
avsd_bart_large_config = config/avsd_bart_large.json
|
|
nextqa_bart_large_config = config/nextqa_bart_large.json
|
|
|
|
#################################################################################
|
|
# Logging & Checkpointing
|
|
log_dir = logs
|
|
output_dir_dstc7 = output/dstc7
|
|
output_dir_dstc8 = output/dstc8
|
|
output_dir_dstc10 = output/dstc10
|
|
output_dir_nextqa = output/nextqa
|
|
max_ckpt_to_keep = 5
|
|
start_ckpt_for_generating = none
|
|
loads_start_path = false
|
|
next_logging_pct = 0.1
|
|
save_ckpt=true
|
|
skip_saving_ckpt = false
|
|
stop_epochs = -1
|
|
resets_min_val_loss = false
|
|
restarts = false
|
|
uses_new_optimizer = true
|
|
sets_new_lr = false
|
|
################################################################################
|
|
# Data processing
|
|
expand_rnd = false
|
|
cap_sum = cap_sum
|
|
add_state_tokens = true
|
|
bart_max_input_len = 1024
|
|
num_workers = 0
|
|
n_history = 3
|
|
caption_drop_rate = 0.0
|
|
vis_feat_length = 36
|
|
#################################################################################
|
|
# Training
|
|
dp_type = ddp
|
|
batch_size = 16
|
|
num_epochs = 12
|
|
warmup_ratio = 0.1
|
|
batch_multiply = 1
|
|
skip_eval = false
|
|
stop_epoch = -1
|
|
random_seed = 54
|
|
learning_rate_bart = 1e-5
|
|
learning_rate_other = 1e-4
|
|
min_lr = 0
|
|
clip_grad_value = 1.0
|
|
print_output = false
|
|
eval_first = false
|
|
overfit_size = -1
|
|
elbo_global_coeff = 100
|
|
elbo_local_coeff = 100
|
|
gen_coeff = 1
|
|
#################################################################################
|
|
# Generation
|
|
gen_batch_size = 1
|
|
beam_depth = 5
|
|
max_generation_length = 20
|
|
min_generation_length = 1
|
|
length_penalty = 0.3
|
|
#################################################################################
|
|
# Misc.
|
|
master_port = 5101
|
|
use_cpu = false
|
|
}
|