268 lines
7.9 KiB
Python
268 lines
7.9 KiB
Python
|
# --------------------------------------------------------
|
|||
|
# mcan-vqa (Deep Modular Co-Attention Networks)
|
|||
|
# Licensed under The MIT License [see LICENSE for details]
|
|||
|
# Written by Yuhao Cui https://github.com/cuiyuhao1996
|
|||
|
# --------------------------------------------------------
|
|||
|
|
|||
|
from cfgs.path_cfgs import PATH
|
|||
|
|
|||
|
import os, torch, random
|
|||
|
import numpy as np
|
|||
|
from types import MethodType
|
|||
|
|
|||
|
|
|||
|
class Cfgs(PATH):
|
|||
|
def __init__(self, EXP_NAME, DATASET_PATH):
|
|||
|
super(Cfgs, self).__init__(EXP_NAME, DATASET_PATH)
|
|||
|
|
|||
|
# Set Devices
|
|||
|
# If use multi-gpu training, set e.g.'0, 1, 2' instead
|
|||
|
self.GPU = '0'
|
|||
|
|
|||
|
# Set RNG For CPU And GPUs
|
|||
|
self.SEED = random.randint(0, 99999999)
|
|||
|
|
|||
|
# -------------------------
|
|||
|
# ---- Version Control ----
|
|||
|
# -------------------------
|
|||
|
|
|||
|
# Define a specific name to start new training
|
|||
|
# self.VERSION = 'Anonymous_' + str(self.SEED)
|
|||
|
self.VERSION = str(self.SEED)
|
|||
|
|
|||
|
# Resume training
|
|||
|
self.RESUME = False
|
|||
|
|
|||
|
# Used in Resume training and testing
|
|||
|
self.CKPT_VERSION = self.VERSION
|
|||
|
self.CKPT_EPOCH = 0
|
|||
|
|
|||
|
# Absolutely checkpoint path, 'CKPT_VERSION' and 'CKPT_EPOCH' will be overridden
|
|||
|
self.CKPT_PATH = None
|
|||
|
|
|||
|
# Print loss every step
|
|||
|
self.VERBOSE = True
|
|||
|
|
|||
|
|
|||
|
# ------------------------------
|
|||
|
# ---- Data Provider Params ----
|
|||
|
# ------------------------------
|
|||
|
|
|||
|
# {'train', 'val', 'test'}
|
|||
|
self.RUN_MODE = 'train'
|
|||
|
|
|||
|
# Set True to evaluate offline
|
|||
|
self.EVAL_EVERY_EPOCH = True
|
|||
|
|
|||
|
# # Define the 'train' 'val' 'test' data split
|
|||
|
# # (EVAL_EVERY_EPOCH triggered when set {'train': 'train'})
|
|||
|
# self.SPLIT = {
|
|||
|
# 'train': '',
|
|||
|
# 'val': 'val',
|
|||
|
# 'test': 'test',
|
|||
|
# }
|
|||
|
# # A external method to set train split
|
|||
|
# self.TRAIN_SPLIT = 'train+val+vg'
|
|||
|
|
|||
|
# Set True to use pretrained word embedding
|
|||
|
# (GloVe: spaCy https://spacy.io/)
|
|||
|
self.USE_GLOVE = True
|
|||
|
|
|||
|
# Word embedding matrix size
|
|||
|
# (token size x WORD_EMBED_SIZE)
|
|||
|
self.WORD_EMBED_SIZE = 300
|
|||
|
|
|||
|
# Max length of question sentences
|
|||
|
self.MAX_TOKEN = 15
|
|||
|
|
|||
|
# VGG 4096D features
|
|||
|
self.FRAME_FEAT_SIZE = 4096
|
|||
|
|
|||
|
# C3D 4096D features
|
|||
|
self.CLIP_FEAT_SIZE = 4096
|
|||
|
|
|||
|
self.NUM_ANS = 1000
|
|||
|
|
|||
|
# Default training batch size: 64
|
|||
|
self.BATCH_SIZE = 64
|
|||
|
|
|||
|
# Multi-thread I/O
|
|||
|
self.NUM_WORKERS = 8
|
|||
|
|
|||
|
# Use pin memory
|
|||
|
# (Warning: pin memory can accelerate GPU loading but may
|
|||
|
# increase the CPU memory usage when NUM_WORKS is large)
|
|||
|
self.PIN_MEM = True
|
|||
|
|
|||
|
# Large model can not training with batch size 64
|
|||
|
# Gradient accumulate can split batch to reduce gpu memory usage
|
|||
|
# (Warning: BATCH_SIZE should be divided by GRAD_ACCU_STEPS)
|
|||
|
self.GRAD_ACCU_STEPS = 1
|
|||
|
|
|||
|
# Set 'external': use external shuffle method to implement training shuffle
|
|||
|
# Set 'internal': use pytorch dataloader default shuffle method
|
|||
|
self.SHUFFLE_MODE = 'external'
|
|||
|
|
|||
|
|
|||
|
# ------------------------
|
|||
|
# ---- Network Params ----
|
|||
|
# ------------------------
|
|||
|
|
|||
|
# Model deeps
|
|||
|
# (Encoder and Decoder will be same deeps)
|
|||
|
self.LAYER = 6
|
|||
|
|
|||
|
# Model hidden size
|
|||
|
# (512 as default, bigger will be a sharp increase of gpu memory usage)
|
|||
|
self.HIDDEN_SIZE = 512
|
|||
|
|
|||
|
# Multi-head number in MCA layers
|
|||
|
# (Warning: HIDDEN_SIZE should be divided by MULTI_HEAD)
|
|||
|
self.MULTI_HEAD = 8
|
|||
|
|
|||
|
# Dropout rate for all dropout layers
|
|||
|
# (dropout can prevent overfitting: [Dropout: a simple way to prevent neural networks from overfitting])
|
|||
|
self.DROPOUT_R = 0.1
|
|||
|
|
|||
|
# MLP size in flatten layers
|
|||
|
self.FLAT_MLP_SIZE = 512
|
|||
|
|
|||
|
# Flatten the last hidden to vector with {n} attention glimpses
|
|||
|
self.FLAT_GLIMPSES = 1
|
|||
|
self.FLAT_OUT_SIZE = 1024
|
|||
|
|
|||
|
|
|||
|
# --------------------------
|
|||
|
# ---- Optimizer Params ----
|
|||
|
# --------------------------
|
|||
|
|
|||
|
# The base learning rate
|
|||
|
self.LR_BASE = 0.0001
|
|||
|
|
|||
|
# Learning rate decay ratio
|
|||
|
self.LR_DECAY_R = 0.2
|
|||
|
|
|||
|
# Learning rate decay at {x, y, z...} epoch
|
|||
|
self.LR_DECAY_LIST = [10, 12]
|
|||
|
|
|||
|
# Max training epoch
|
|||
|
self.MAX_EPOCH = 30
|
|||
|
|
|||
|
# Gradient clip
|
|||
|
# (default: -1 means not using)
|
|||
|
self.GRAD_NORM_CLIP = -1
|
|||
|
|
|||
|
# Adam optimizer betas and eps
|
|||
|
self.OPT_BETAS = (0.9, 0.98)
|
|||
|
self.OPT_EPS = 1e-9
|
|||
|
self.OPT_WEIGHT_DECAY = 1e-5
|
|||
|
# --------------------------
|
|||
|
# ---- DNC Hyper-Params ----
|
|||
|
# --------------------------
|
|||
|
self.IN_SIZE_DNC = self.HIDDEN_SIZE
|
|||
|
self.OUT_SIZE_DNC = self.HIDDEN_SIZE
|
|||
|
self.WORD_LENGTH_DNC = 512
|
|||
|
self.CELL_COUNT_DNC = 64
|
|||
|
self.MEM_HIDDEN_SIZE = self.CELL_COUNT_DNC * self.WORD_LENGTH_DNC
|
|||
|
self.N_READ_HEADS_DNC = 4
|
|||
|
|
|||
|
def parse_to_dict(self, args):
|
|||
|
args_dict = {}
|
|||
|
for arg in dir(args):
|
|||
|
if not arg.startswith('_') and not isinstance(getattr(args, arg), MethodType):
|
|||
|
if getattr(args, arg) is not None:
|
|||
|
args_dict[arg] = getattr(args, arg)
|
|||
|
|
|||
|
return args_dict
|
|||
|
|
|||
|
|
|||
|
def add_args(self, args_dict):
|
|||
|
for arg in args_dict:
|
|||
|
setattr(self, arg, args_dict[arg])
|
|||
|
|
|||
|
|
|||
|
def proc(self):
|
|||
|
assert self.RUN_MODE in ['train', 'val', 'test']
|
|||
|
|
|||
|
# ------------ Devices setup
|
|||
|
# os.environ['CUDA_VISIBLE_DEVICES'] = self.GPU
|
|||
|
self.N_GPU = len(self.GPU.split(','))
|
|||
|
self.DEVICES = [_ for _ in range(self.N_GPU)]
|
|||
|
torch.set_num_threads(2)
|
|||
|
|
|||
|
|
|||
|
# ------------ Seed setup
|
|||
|
# fix pytorch seed
|
|||
|
torch.manual_seed(self.SEED)
|
|||
|
if self.N_GPU < 2:
|
|||
|
torch.cuda.manual_seed(self.SEED)
|
|||
|
else:
|
|||
|
torch.cuda.manual_seed_all(self.SEED)
|
|||
|
torch.backends.cudnn.deterministic = True
|
|||
|
|
|||
|
# fix numpy seed
|
|||
|
np.random.seed(self.SEED)
|
|||
|
|
|||
|
# fix random seed
|
|||
|
random.seed(self.SEED)
|
|||
|
|
|||
|
if self.CKPT_PATH is not None:
|
|||
|
print('Warning: you are now using CKPT_PATH args, '
|
|||
|
'CKPT_VERSION and CKPT_EPOCH will not work')
|
|||
|
self.CKPT_VERSION = self.CKPT_PATH.split('/')[-1] + '_' + str(random.randint(0, 99999999))
|
|||
|
|
|||
|
|
|||
|
# ------------ Split setup
|
|||
|
self.SPLIT['train'] = self.TRAIN_SPLIT
|
|||
|
if 'val' in self.SPLIT['train'].split('+') or self.RUN_MODE not in ['train']:
|
|||
|
self.EVAL_EVERY_EPOCH = False
|
|||
|
|
|||
|
if self.RUN_MODE not in ['test']:
|
|||
|
self.TEST_SAVE_PRED = False
|
|||
|
|
|||
|
|
|||
|
# ------------ Gradient accumulate setup
|
|||
|
assert self.BATCH_SIZE % self.GRAD_ACCU_STEPS == 0
|
|||
|
self.SUB_BATCH_SIZE = int(self.BATCH_SIZE / self.GRAD_ACCU_STEPS)
|
|||
|
|
|||
|
# Use a small eval batch will reduce gpu memory usage
|
|||
|
self.EVAL_BATCH_SIZE = 32
|
|||
|
|
|||
|
|
|||
|
# ------------ Networks setup
|
|||
|
# FeedForwardNet size in every MCA layer
|
|||
|
self.FF_SIZE = int(self.HIDDEN_SIZE * 4)
|
|||
|
self.FF_MEM_SIZE = int()
|
|||
|
|
|||
|
# A pipe line hidden size in attention compute
|
|||
|
assert self.HIDDEN_SIZE % self.MULTI_HEAD == 0
|
|||
|
self.HIDDEN_SIZE_HEAD = int(self.HIDDEN_SIZE / self.MULTI_HEAD)
|
|||
|
|
|||
|
|
|||
|
def __str__(self):
|
|||
|
for attr in dir(self):
|
|||
|
if not attr.startswith('__') and not isinstance(getattr(self, attr), MethodType):
|
|||
|
print('{ %-17s }->' % attr, getattr(self, attr))
|
|||
|
|
|||
|
return ''
|
|||
|
|
|||
|
def check_path(self):
|
|||
|
print('Checking dataset ...')
|
|||
|
|
|||
|
|
|||
|
if not os.path.exists(self.FRAMES):
|
|||
|
print(self.FRAMES + 'NOT EXIST')
|
|||
|
exit(-1)
|
|||
|
|
|||
|
if not os.path.exists(self.CLIPS):
|
|||
|
print(self.CLIPS + 'NOT EXIST')
|
|||
|
exit(-1)
|
|||
|
|
|||
|
for mode in self.QA_PATH:
|
|||
|
if not os.path.exists(self.QA_PATH[mode]):
|
|||
|
print(self.QA_PATH[mode] + 'NOT EXIST')
|
|||
|
exit(-1)
|
|||
|
|
|||
|
print('Finished')
|
|||
|
print('')
|