neuro-symbolic-visual-dialog/prog_generator/options_caption_parser.py

284 lines
7.9 KiB
Python
Raw Normal View History

2022-08-10 16:49:55 +02:00
"""
author: Adnen Abdessaied
maintainer: "Adnen Abdessaied"
website: adnenabdessaied.de
version: 1.0.1
"""
# --------------------------------------------------------
# adapted from https://github.com/kexinyi/ns-vqa/blob/master/scene_parse/attr_net/options.py
# --------------------------------------------------------
import argparse
import os
import utils
import torch
class Options():
def __init__(self):
self.parser = argparse.ArgumentParser()
self.initialized = False
def initialize(self):
self.parser.add_argument(
'--mode',
required=True,
type=str,
choices=['train', 'test'],
help='The mode of the experiment')
self.parser.add_argument(
'--run_dir',
required=True,
type=str,
help='The experiment directory')
self.parser.add_argument(
'--load_checkpoint_path',
default=None,
type=str,
help='The path the the pretrained CaptionNet')
self.parser.add_argument(
'--res_path',
required=True,
type=str,
help='Path where to log the predicted caption programs')
self.parser.add_argument(
'--gpu_ids',
default='0',
type=str,
help='Id of the gpu to be used')
self.parser.add_argument(
'--seed',
default=42,
type=int,
help='The seed used in training')
self.parser.add_argument(
'--dataPathTr',
required=True,
type=str,
help='Path to the h5 file of the Clevr-Dialog preprocessed training data')
self.parser.add_argument(
'--dataPathVal',
required=True,
type=str,
help='Path to the h5 file of the Clevr-Dialog preprocessed validation data')
self.parser.add_argument(
'--dataPathTest',
required=True,
type=str,
help='Path to the h5 file of the Clevr-Dialog preprocessed test data')
self.parser.add_argument(
'--vocabPath',
required=True,
type=str,
help='Path to the generated vocabulary')
self.parser.add_argument(
'--batch_size',
default=64,
type=int,
help='Batch size')
self.parser.add_argument(
'--num_workers',
default=0,
type=int,
help='Number of workers for loading')
self.parser.add_argument(
'--num_iters',
default=5000,
type=int,
help='Total number of iterations')
self.parser.add_argument(
'--display_every',
default=5,
type=int,
help='Display training information every N iterations')
self.parser.add_argument(
'--debug_every',
default=100,
type=int,
help='Display debug message every N iterations')
self.parser.add_argument(
'--validate_every',
default=1000,
type=int,
help='Validate every N iterations')
self.parser.add_argument(
'--shuffle_data',
default=1,
type=int,
help='Activate to shuffle the training data')
self.parser.add_argument(
'--optim',
default='adam',
type=str,
help='The name of the optimizer to be used')
self.parser.add_argument(
'--lr',
default=1e-3,
type=float,
help='Base learning rate')
self.parser.add_argument(
'--betas',
default='0.9, 0.98',
type=str,
help='Adam optimizer\'s betas')
self.parser.add_argument(
'--eps',
default='1e-9',
type=float,
help='Adam optimizer\'s epsilon')
self.parser.add_argument(
'--lr_decay_marks',
default='50000, 55000',
type=str,
help='Learing rate decay marks')
self.parser.add_argument(
'--lr_decay_factor',
default=0.5,
type=float,
help='Learning rate decay factor')
self.parser.add_argument(
'--weight_decay',
default=1e-6,
type=float,
help='Weight decay')
self.parser.add_argument(
'--embedDim',
default=300,
type=int,
help='Embedding dimension')
self.parser.add_argument(
'--hiddenDim',
default=512,
type=int,
help='LSTM hidden dimension')
self.parser.add_argument(
'--numLayers',
default=2,
type=int,
help='Number of hidden LSTM layers')
self.parser.add_argument(
'--dropout',
default=0.1,
type=float,
help='Dropout value')
self.parser.add_argument(
'--multiHead',
default=8,
type=int,
help='Number of attention heads')
self.parser.add_argument(
'--hiddenSizeHead',
default=64,
type=int,
help='Dimension of each attention head')
self.parser.add_argument(
'--FeedForwardSize',
default=2048,
type=int,
help='Dimension of the feed forward layer')
self.parser.add_argument(
'--FlatMLPSize',
default=512,
type=int,
help='MLP flatten size')
self.parser.add_argument(
'--FlatGlimpses',
default=1,
type=int,
help='Number of flatten glimpses')
self.parser.add_argument(
'--FlatOutSize',
default=512,
type=int,
help='Final attention reduction dimension')
self.parser.add_argument(
'--layers',
default=6,
type=int,
help='Number of self attention layers')
self.parser.add_argument(
'--bidirectional',
default=1,
type=int,
help='Activate to use bidirectional LSTMs')
self.initialized = True
def parse(self):
# initialize parser
if not self.initialized:
self.initialize()
self.opts = self.parser.parse_args()
# parse gpu id list
str_gpu_ids = self.opts.gpu_ids.split(',')
self.opts.gpu_ids = []
for str_id in str_gpu_ids:
if str_id.isdigit() and int(str_id) >= 0:
self.opts.gpu_ids.append(int(str_id))
if len(self.opts.gpu_ids) > 0 and torch.cuda.is_available():
print('\n[INFO] Using {} CUDA device(s) ...'.format(len(self.opts.gpu_ids)))
else:
print('\n[INFO] Using cpu ...')
self.opts.gpu_ids = []
# parse the optimizer's betas and lr decay marks
self.opts.betas = [float(beta) for beta in self.opts.betas.split(',')]
lr_decay_marks = [int(m) for m in self.opts.lr_decay_marks.split(',')]
for i in range(1, len(lr_decay_marks)):
assert lr_decay_marks[i] > lr_decay_marks[i-1]
self.opts.lr_decay_marks = lr_decay_marks
# print and save options
args = vars(self.opts)
print('\n ' + 30*'-' + 'Opts' + 30*'-')
for k, v in args.items():
print('%s: %s' % (str(k), str(v)))
if not os.path.isdir(self.opts.run_dir):
os.makedirs(self.opts.run_dir)
filename = 'opts.txt'
file_path = os.path.join(self.opts.run_dir, filename)
with open(file_path, 'wt') as fout:
fout.write('| options\n')
for k, v in sorted(args.items()):
fout.write('%s: %s\n' % (str(k), str(v)))
return self.opts