LAYER: 6 HIDDEN_SIZE: 512 MEM_HIDDEN_SIZE: 2048 MULTI_HEAD: 8 DROPOUT_R: 0.1 FLAT_MLP_SIZE: 512 FLAT_GLIMPSES: 1 FLAT_OUT_SIZE: 1024 LR_BASE: 0.0001 LR_DECAY_R: 0.2 GRAD_ACCU_STEPS: 1 CKPT_VERSION: 'small' CKPT_EPOCH: 13