Initial commit
This commit is contained in:
commit
b5f3b728c3
53 changed files with 7008 additions and 0 deletions
0
cfgs/.gitkeep
Normal file
0
cfgs/.gitkeep
Normal file
267
cfgs/base_cfgs.py
Normal file
267
cfgs/base_cfgs.py
Normal file
|
@ -0,0 +1,267 @@
|
|||
# --------------------------------------------------------
|
||||
# mcan-vqa (Deep Modular Co-Attention Networks)
|
||||
# Licensed under The MIT License [see LICENSE for details]
|
||||
# Written by Yuhao Cui https://github.com/cuiyuhao1996
|
||||
# --------------------------------------------------------
|
||||
|
||||
from cfgs.path_cfgs import PATH
|
||||
|
||||
import os, torch, random
|
||||
import numpy as np
|
||||
from types import MethodType
|
||||
|
||||
|
||||
class Cfgs(PATH):
|
||||
def __init__(self, EXP_NAME, DATASET_PATH):
|
||||
super(Cfgs, self).__init__(EXP_NAME, DATASET_PATH)
|
||||
|
||||
# Set Devices
|
||||
# If use multi-gpu training, set e.g.'0, 1, 2' instead
|
||||
self.GPU = '0'
|
||||
|
||||
# Set RNG For CPU And GPUs
|
||||
self.SEED = random.randint(0, 99999999)
|
||||
|
||||
# -------------------------
|
||||
# ---- Version Control ----
|
||||
# -------------------------
|
||||
|
||||
# Define a specific name to start new training
|
||||
# self.VERSION = 'Anonymous_' + str(self.SEED)
|
||||
self.VERSION = str(self.SEED)
|
||||
|
||||
# Resume training
|
||||
self.RESUME = False
|
||||
|
||||
# Used in Resume training and testing
|
||||
self.CKPT_VERSION = self.VERSION
|
||||
self.CKPT_EPOCH = 0
|
||||
|
||||
# Absolutely checkpoint path, 'CKPT_VERSION' and 'CKPT_EPOCH' will be overridden
|
||||
self.CKPT_PATH = None
|
||||
|
||||
# Print loss every step
|
||||
self.VERBOSE = True
|
||||
|
||||
|
||||
# ------------------------------
|
||||
# ---- Data Provider Params ----
|
||||
# ------------------------------
|
||||
|
||||
# {'train', 'val', 'test'}
|
||||
self.RUN_MODE = 'train'
|
||||
|
||||
# Set True to evaluate offline
|
||||
self.EVAL_EVERY_EPOCH = True
|
||||
|
||||
# # Define the 'train' 'val' 'test' data split
|
||||
# # (EVAL_EVERY_EPOCH triggered when set {'train': 'train'})
|
||||
# self.SPLIT = {
|
||||
# 'train': '',
|
||||
# 'val': 'val',
|
||||
# 'test': 'test',
|
||||
# }
|
||||
# # A external method to set train split
|
||||
# self.TRAIN_SPLIT = 'train+val+vg'
|
||||
|
||||
# Set True to use pretrained word embedding
|
||||
# (GloVe: spaCy https://spacy.io/)
|
||||
self.USE_GLOVE = True
|
||||
|
||||
# Word embedding matrix size
|
||||
# (token size x WORD_EMBED_SIZE)
|
||||
self.WORD_EMBED_SIZE = 300
|
||||
|
||||
# Max length of question sentences
|
||||
self.MAX_TOKEN = 15
|
||||
|
||||
# VGG 4096D features
|
||||
self.FRAME_FEAT_SIZE = 4096
|
||||
|
||||
# C3D 4096D features
|
||||
self.CLIP_FEAT_SIZE = 4096
|
||||
|
||||
self.NUM_ANS = 1000
|
||||
|
||||
# Default training batch size: 64
|
||||
self.BATCH_SIZE = 64
|
||||
|
||||
# Multi-thread I/O
|
||||
self.NUM_WORKERS = 8
|
||||
|
||||
# Use pin memory
|
||||
# (Warning: pin memory can accelerate GPU loading but may
|
||||
# increase the CPU memory usage when NUM_WORKS is large)
|
||||
self.PIN_MEM = True
|
||||
|
||||
# Large model can not training with batch size 64
|
||||
# Gradient accumulate can split batch to reduce gpu memory usage
|
||||
# (Warning: BATCH_SIZE should be divided by GRAD_ACCU_STEPS)
|
||||
self.GRAD_ACCU_STEPS = 1
|
||||
|
||||
# Set 'external': use external shuffle method to implement training shuffle
|
||||
# Set 'internal': use pytorch dataloader default shuffle method
|
||||
self.SHUFFLE_MODE = 'external'
|
||||
|
||||
|
||||
# ------------------------
|
||||
# ---- Network Params ----
|
||||
# ------------------------
|
||||
|
||||
# Model deeps
|
||||
# (Encoder and Decoder will be same deeps)
|
||||
self.LAYER = 6
|
||||
|
||||
# Model hidden size
|
||||
# (512 as default, bigger will be a sharp increase of gpu memory usage)
|
||||
self.HIDDEN_SIZE = 512
|
||||
|
||||
# Multi-head number in MCA layers
|
||||
# (Warning: HIDDEN_SIZE should be divided by MULTI_HEAD)
|
||||
self.MULTI_HEAD = 8
|
||||
|
||||
# Dropout rate for all dropout layers
|
||||
# (dropout can prevent overfitting: [Dropout: a simple way to prevent neural networks from overfitting])
|
||||
self.DROPOUT_R = 0.1
|
||||
|
||||
# MLP size in flatten layers
|
||||
self.FLAT_MLP_SIZE = 512
|
||||
|
||||
# Flatten the last hidden to vector with {n} attention glimpses
|
||||
self.FLAT_GLIMPSES = 1
|
||||
self.FLAT_OUT_SIZE = 1024
|
||||
|
||||
|
||||
# --------------------------
|
||||
# ---- Optimizer Params ----
|
||||
# --------------------------
|
||||
|
||||
# The base learning rate
|
||||
self.LR_BASE = 0.0001
|
||||
|
||||
# Learning rate decay ratio
|
||||
self.LR_DECAY_R = 0.2
|
||||
|
||||
# Learning rate decay at {x, y, z...} epoch
|
||||
self.LR_DECAY_LIST = [10, 12]
|
||||
|
||||
# Max training epoch
|
||||
self.MAX_EPOCH = 30
|
||||
|
||||
# Gradient clip
|
||||
# (default: -1 means not using)
|
||||
self.GRAD_NORM_CLIP = -1
|
||||
|
||||
# Adam optimizer betas and eps
|
||||
self.OPT_BETAS = (0.9, 0.98)
|
||||
self.OPT_EPS = 1e-9
|
||||
self.OPT_WEIGHT_DECAY = 1e-5
|
||||
# --------------------------
|
||||
# ---- DNC Hyper-Params ----
|
||||
# --------------------------
|
||||
self.IN_SIZE_DNC = self.HIDDEN_SIZE
|
||||
self.OUT_SIZE_DNC = self.HIDDEN_SIZE
|
||||
self.WORD_LENGTH_DNC = 512
|
||||
self.CELL_COUNT_DNC = 64
|
||||
self.MEM_HIDDEN_SIZE = self.CELL_COUNT_DNC * self.WORD_LENGTH_DNC
|
||||
self.N_READ_HEADS_DNC = 4
|
||||
|
||||
def parse_to_dict(self, args):
|
||||
args_dict = {}
|
||||
for arg in dir(args):
|
||||
if not arg.startswith('_') and not isinstance(getattr(args, arg), MethodType):
|
||||
if getattr(args, arg) is not None:
|
||||
args_dict[arg] = getattr(args, arg)
|
||||
|
||||
return args_dict
|
||||
|
||||
|
||||
def add_args(self, args_dict):
|
||||
for arg in args_dict:
|
||||
setattr(self, arg, args_dict[arg])
|
||||
|
||||
|
||||
def proc(self):
|
||||
assert self.RUN_MODE in ['train', 'val', 'test']
|
||||
|
||||
# ------------ Devices setup
|
||||
# os.environ['CUDA_VISIBLE_DEVICES'] = self.GPU
|
||||
self.N_GPU = len(self.GPU.split(','))
|
||||
self.DEVICES = [_ for _ in range(self.N_GPU)]
|
||||
torch.set_num_threads(2)
|
||||
|
||||
|
||||
# ------------ Seed setup
|
||||
# fix pytorch seed
|
||||
torch.manual_seed(self.SEED)
|
||||
if self.N_GPU < 2:
|
||||
torch.cuda.manual_seed(self.SEED)
|
||||
else:
|
||||
torch.cuda.manual_seed_all(self.SEED)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
|
||||
# fix numpy seed
|
||||
np.random.seed(self.SEED)
|
||||
|
||||
# fix random seed
|
||||
random.seed(self.SEED)
|
||||
|
||||
if self.CKPT_PATH is not None:
|
||||
print('Warning: you are now using CKPT_PATH args, '
|
||||
'CKPT_VERSION and CKPT_EPOCH will not work')
|
||||
self.CKPT_VERSION = self.CKPT_PATH.split('/')[-1] + '_' + str(random.randint(0, 99999999))
|
||||
|
||||
|
||||
# ------------ Split setup
|
||||
self.SPLIT['train'] = self.TRAIN_SPLIT
|
||||
if 'val' in self.SPLIT['train'].split('+') or self.RUN_MODE not in ['train']:
|
||||
self.EVAL_EVERY_EPOCH = False
|
||||
|
||||
if self.RUN_MODE not in ['test']:
|
||||
self.TEST_SAVE_PRED = False
|
||||
|
||||
|
||||
# ------------ Gradient accumulate setup
|
||||
assert self.BATCH_SIZE % self.GRAD_ACCU_STEPS == 0
|
||||
self.SUB_BATCH_SIZE = int(self.BATCH_SIZE / self.GRAD_ACCU_STEPS)
|
||||
|
||||
# Use a small eval batch will reduce gpu memory usage
|
||||
self.EVAL_BATCH_SIZE = 32
|
||||
|
||||
|
||||
# ------------ Networks setup
|
||||
# FeedForwardNet size in every MCA layer
|
||||
self.FF_SIZE = int(self.HIDDEN_SIZE * 4)
|
||||
self.FF_MEM_SIZE = int()
|
||||
|
||||
# A pipe line hidden size in attention compute
|
||||
assert self.HIDDEN_SIZE % self.MULTI_HEAD == 0
|
||||
self.HIDDEN_SIZE_HEAD = int(self.HIDDEN_SIZE / self.MULTI_HEAD)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
for attr in dir(self):
|
||||
if not attr.startswith('__') and not isinstance(getattr(self, attr), MethodType):
|
||||
print('{ %-17s }->' % attr, getattr(self, attr))
|
||||
|
||||
return ''
|
||||
|
||||
def check_path(self):
|
||||
print('Checking dataset ...')
|
||||
|
||||
|
||||
if not os.path.exists(self.FRAMES):
|
||||
print(self.FRAMES + 'NOT EXIST')
|
||||
exit(-1)
|
||||
|
||||
if not os.path.exists(self.CLIPS):
|
||||
print(self.CLIPS + 'NOT EXIST')
|
||||
exit(-1)
|
||||
|
||||
for mode in self.QA_PATH:
|
||||
if not os.path.exists(self.QA_PATH[mode]):
|
||||
print(self.QA_PATH[mode] + 'NOT EXIST')
|
||||
exit(-1)
|
||||
|
||||
print('Finished')
|
||||
print('')
|
6
cfgs/fusion_cfgs.yml
Normal file
6
cfgs/fusion_cfgs.yml
Normal file
|
@ -0,0 +1,6 @@
|
|||
CONTROLLER_INPUT_SIZE: 512
|
||||
CONTROLLER_HIDDEN_SIZE: 512
|
||||
CONTROLLER_NUM_LAYERS: 2
|
||||
HIDDEN_DIM_COMP: 1024
|
||||
OUT_DIM_COMP: 512
|
||||
COMP_NUM_LAYERS: 2
|
61
cfgs/path_cfgs.py
Normal file
61
cfgs/path_cfgs.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
# --------------------------------------------------------
|
||||
# mcan-vqa (Deep Modular Co-Attention Networks)
|
||||
# Licensed under The MIT License [see LICENSE for details]
|
||||
# Written by Yuhao Cui https://github.com/cuiyuhao1996
|
||||
# --------------------------------------------------------
|
||||
|
||||
import os
|
||||
|
||||
class PATH:
|
||||
def __init__(self, EXP_NAME, DATASET_PATH):
|
||||
# name of the experiment
|
||||
self.EXP_NAME = EXP_NAME
|
||||
|
||||
# Dataset root path
|
||||
self.DATASET_PATH = DATASET_PATH
|
||||
|
||||
# Bottom up features root path
|
||||
self.FRAMES = os.path.join(DATASET_PATH, 'frame_feat/')
|
||||
self.CLIPS = os.path.join(DATASET_PATH, 'clip_feat/')
|
||||
|
||||
|
||||
def init_path(self):
|
||||
self.QA_PATH = {
|
||||
'train': self.DATASET_PATH + 'train_qa.json',
|
||||
'val': self.DATASET_PATH + 'val_qa.json',
|
||||
'test': self.DATASET_PATH + 'test_qa.json',
|
||||
}
|
||||
self.C3D_PATH = self.DATASET_PATH + 'c3d.pickle'
|
||||
|
||||
if self.EXP_NAME not in os.listdir('./'):
|
||||
os.mkdir('./' + self.EXP_NAME)
|
||||
os.mkdir('./' + self.EXP_NAME + '/results')
|
||||
self.RESULT_PATH = './' + self.EXP_NAME + '/results/result_test/'
|
||||
self.PRED_PATH = './' + self.EXP_NAME + '/results/pred/'
|
||||
self.CACHE_PATH = './' + self.EXP_NAME + '/results/cache/'
|
||||
self.LOG_PATH = './' + self.EXP_NAME + '/results/log/'
|
||||
self.TB_PATH = './' + self.EXP_NAME + '/results/tensorboard/'
|
||||
self.CKPTS_PATH = './' + self.EXP_NAME + '/ckpts/'
|
||||
|
||||
if 'result_test' not in os.listdir('./' + self.EXP_NAME + '/results'):
|
||||
os.mkdir('./' + self.EXP_NAME + '/results/result_test/')
|
||||
|
||||
if 'pred' not in os.listdir('./' + self.EXP_NAME + '/results'):
|
||||
os.mkdir('./' + self.EXP_NAME + '/results/pred/')
|
||||
|
||||
if 'cache' not in os.listdir('./' + self.EXP_NAME + '/results'):
|
||||
os.mkdir('./' + self.EXP_NAME + '/results/cache')
|
||||
|
||||
if 'log' not in os.listdir('./' + self.EXP_NAME + '/results'):
|
||||
os.mkdir('./' + self.EXP_NAME + '/results/log')
|
||||
|
||||
if 'tensorboard' not in os.listdir('./' + self.EXP_NAME + '/results'):
|
||||
os.mkdir('./' + self.EXP_NAME + '/results/tensorboard')
|
||||
|
||||
if 'ckpts' not in os.listdir('./' + self.EXP_NAME):
|
||||
os.mkdir('./' + self.EXP_NAME + '/ckpts')
|
||||
|
||||
|
||||
def check_path(self):
|
||||
raise NotImplementedError
|
||||
|
13
cfgs/small_model.yml
Normal file
13
cfgs/small_model.yml
Normal file
|
@ -0,0 +1,13 @@
|
|||
LAYER: 6
|
||||
HIDDEN_SIZE: 512
|
||||
MEM_HIDDEN_SIZE: 2048
|
||||
MULTI_HEAD: 8
|
||||
DROPOUT_R: 0.1
|
||||
FLAT_MLP_SIZE: 512
|
||||
FLAT_GLIMPSES: 1
|
||||
FLAT_OUT_SIZE: 1024
|
||||
LR_BASE: 0.0001
|
||||
LR_DECAY_R: 0.2
|
||||
GRAD_ACCU_STEPS: 1
|
||||
CKPT_VERSION: 'small'
|
||||
CKPT_EPOCH: 13
|
Loading…
Add table
Add a link
Reference in a new issue