initial commit
This commit is contained in:
commit
a82bbc593e
129 changed files with 33981 additions and 0 deletions
240
utils/scheduler.py
Normal file
240
utils/scheduler.py
Normal file
|
@ -0,0 +1,240 @@
|
|||
""" Scheduler Factory
|
||||
Hacked together by / Copyright 2020 Ross Wightman
|
||||
"""
|
||||
from torch.optim import Optimizer
|
||||
import math
|
||||
from torch.optim.lr_scheduler import LambdaLR, _LRScheduler
|
||||
import math
|
||||
|
||||
|
||||
# class LinearWarmupStepLRScheduler:
|
||||
# def __init__(
|
||||
# self,
|
||||
# optimizer,
|
||||
# max_epoch,
|
||||
# min_lr,
|
||||
# init_lr,
|
||||
# decay_rate=1,
|
||||
# warmup_start_lr=-1,
|
||||
# warmup_steps=0,
|
||||
# **kwargs
|
||||
# ):
|
||||
# self.optimizer = optimizer
|
||||
|
||||
# self.max_epoch = max_epoch
|
||||
# self.min_lr = min_lr
|
||||
|
||||
# self.decay_rate = decay_rate
|
||||
|
||||
# self.init_lr = init_lr
|
||||
# self.warmup_steps = warmup_steps
|
||||
# self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
|
||||
|
||||
# def step(self, cur_epoch, cur_step):
|
||||
# if cur_epoch == 0:
|
||||
# warmup_lr_schedule(
|
||||
# step=cur_step,
|
||||
# optimizer=self.optimizer,
|
||||
# max_step=self.warmup_steps,
|
||||
# init_lr=self.warmup_start_lr,
|
||||
# max_lr=self.init_lr,
|
||||
# )
|
||||
# else:
|
||||
# step_lr_schedule(
|
||||
# epoch=cur_epoch,
|
||||
# optimizer=self.optimizer,
|
||||
# init_lr=self.init_lr,
|
||||
# min_lr=self.min_lr,
|
||||
# decay_rate=self.decay_rate,
|
||||
# )
|
||||
|
||||
|
||||
# class LinearWarmupCosineLRScheduler:
|
||||
# def __init__(
|
||||
# self,
|
||||
# optimizer,
|
||||
# max_epoch,
|
||||
# min_lr,
|
||||
# init_lr,
|
||||
# warmup_steps=0,
|
||||
# warmup_start_lr=-1,
|
||||
# **kwargs
|
||||
# ):
|
||||
# self.optimizer = optimizer
|
||||
|
||||
# self.max_epoch = max_epoch
|
||||
# self.min_lr = min_lr
|
||||
|
||||
# self.init_lr = init_lr
|
||||
# self.warmup_steps = warmup_steps
|
||||
# self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
|
||||
|
||||
# def step(self, cur_epoch, cur_step):
|
||||
# # assuming the warmup iters less than one epoch
|
||||
# if cur_epoch == 0:
|
||||
# warmup_lr_schedule(
|
||||
# step=cur_step,
|
||||
# optimizer=self.optimizer,
|
||||
# max_step=self.warmup_steps,
|
||||
# init_lr=self.warmup_start_lr,
|
||||
# max_lr=self.init_lr,
|
||||
# )
|
||||
# else:
|
||||
# cosine_lr_schedule(
|
||||
# epoch=cur_epoch,
|
||||
# optimizer=self.optimizer,
|
||||
# max_epoch=self.max_epoch,
|
||||
# init_lr=self.init_lr,
|
||||
# min_lr=self.min_lr,
|
||||
# )
|
||||
|
||||
|
||||
# class ConstantLRScheduler:
|
||||
# def __init__(self, optimizer, init_lr, warmup_start_lr=-1, warmup_steps=0, **kwargs):
|
||||
# self.optimizer = optimizer
|
||||
# self.lr = init_lr
|
||||
# self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
|
||||
# self.warmup_steps = warmup_steps
|
||||
|
||||
# def step(self, cur_epoch, cur_step):
|
||||
# if cur_epoch == 0:
|
||||
# warmup_lr_schedule(
|
||||
# step=cur_step,
|
||||
# optimizer=self.optimizer,
|
||||
# max_step=self.warmup_steps,
|
||||
# init_lr=self.warmup_start_lr,
|
||||
# max_lr=self.lr,
|
||||
# )
|
||||
# else:
|
||||
# for param_group in self.optimizer.param_groups:
|
||||
# param_group["lr"] = self.lr
|
||||
|
||||
|
||||
# schedulers = {
|
||||
# 'constant_lr': ConstantLRScheduler,
|
||||
# 'linear_warmup_cosine_lr': LinearWarmupCosineLRScheduler,
|
||||
# 'linear_warmup_step_lr': LinearWarmupStepLRScheduler
|
||||
# }
|
||||
|
||||
|
||||
# def cosine_lr_schedule(optimizer, epoch, max_epoch, init_lr, min_lr):
|
||||
# """Decay the learning rate"""
|
||||
# lr = (init_lr - min_lr) * 0.5 * (
|
||||
# 1.0 + math.cos(math.pi * epoch / max_epoch)
|
||||
# ) + min_lr
|
||||
# for param_group in optimizer.param_groups:
|
||||
# param_group["lr"] = lr
|
||||
|
||||
|
||||
# def warmup_lr_schedule(optimizer, step, max_step, init_lr, max_lr):
|
||||
# """Warmup the learning rate"""
|
||||
# lr = min(max_lr, init_lr + (max_lr - init_lr) * step / max(max_step, 1))
|
||||
# for param_group in optimizer.param_groups:
|
||||
# param_group["lr"] = lr
|
||||
|
||||
|
||||
# def step_lr_schedule(optimizer, epoch, init_lr, min_lr, decay_rate):
|
||||
# """Decay the learning rate"""
|
||||
# lr = max(min_lr, init_lr * (decay_rate**epoch))
|
||||
# for param_group in optimizer.param_groups:
|
||||
# param_group["lr"] = lr
|
||||
|
||||
|
||||
# def create_scheduler(config, optimizer):
|
||||
# scheduler_cls = schedulers[config.get('scheduler', 'constant_lr')]
|
||||
# max_epoch = config.epochs
|
||||
# min_lr = config.min_lr
|
||||
# init_lr = config.lr
|
||||
# warmup_start_lr = config.get('warmup_lr', -1)
|
||||
# warmup_steps = config.get('warmup_steps', 0)
|
||||
|
||||
# scheduler = scheduler_cls(
|
||||
# optimizer=optimizer,
|
||||
# max_epoch=max_epoch,
|
||||
# min_lr=min_lr,
|
||||
# init_lr=init_lr,
|
||||
# decay_rate=None,
|
||||
# warmup_start_lr=warmup_start_lr,
|
||||
# warmup_steps=warmup_steps
|
||||
# )
|
||||
|
||||
# return scheduler
|
||||
|
||||
|
||||
|
||||
class WarmupLinearScheduleNonZero(_LRScheduler):
|
||||
""" Linear warmup and then linear decay.
|
||||
Linearly increases learning rate from 0 to max_lr over `warmup_steps` training steps.
|
||||
Linearly decreases learning rate linearly to min_lr over remaining `t_total - warmup_steps` steps.
|
||||
"""
|
||||
def __init__(self, optimizer, warmup_steps, t_total, min_lr=1e-5, last_epoch=-1):
|
||||
self.warmup_steps = warmup_steps
|
||||
self.t_total = t_total
|
||||
self.min_lr = min_lr
|
||||
super(WarmupLinearScheduleNonZero, self).__init__(optimizer, last_epoch=last_epoch)
|
||||
|
||||
def get_lr(self):
|
||||
step = self.last_epoch
|
||||
if step < self.warmup_steps:
|
||||
lr_factor = float(step) / float(max(1, self.warmup_steps))
|
||||
else:
|
||||
lr_factor = max(0, float(self.t_total - step) / float(max(1.0, self.t_total - self.warmup_steps)))
|
||||
|
||||
return [base_lr * lr_factor if (base_lr * lr_factor) > self.min_lr else self.min_lr for base_lr in self.base_lrs]
|
||||
|
||||
|
||||
def create_scheduler(config, optimizer):
|
||||
lr_scheduler = None
|
||||
if config['scheduler'] == 'cosine':
|
||||
lr_scheduler = get_cosine_schedule_with_warmup(
|
||||
optimizer,
|
||||
num_warmup_steps=config['num_warmup_steps'],
|
||||
num_training_steps=config['num_training_steps'],
|
||||
num_cycles=0.5,
|
||||
min_lr_multi=config['min_lr_multi']
|
||||
)
|
||||
elif config['scheduler'] == 'linear':
|
||||
lr_scheduler = WarmupLinearScheduleNonZero(
|
||||
optimizer,
|
||||
config['num_warmup_steps'],
|
||||
config['num_training_steps'],
|
||||
min_lr = config['min_lr']
|
||||
)
|
||||
return lr_scheduler
|
||||
|
||||
|
||||
def get_cosine_schedule_with_warmup(
|
||||
optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int,
|
||||
num_cycles: float = 0.5, min_lr_multi: float = 0., last_epoch: int = -1
|
||||
):
|
||||
"""
|
||||
Modified from https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/optimization.py
|
||||
|
||||
Create a schedule with a learning rate that decreases following the values of the cosine function between the
|
||||
initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
|
||||
initial lr set in the optimizer.
|
||||
Args:
|
||||
optimizer ([`~torch.optim.Optimizer`]):
|
||||
The optimizer for which to schedule the learning rate.
|
||||
num_warmup_steps (`int`):
|
||||
The number of steps for the warmup phase.
|
||||
num_training_steps (`int`):
|
||||
The total number of training steps.
|
||||
num_cycles (`float`, *optional*, defaults to 0.5):
|
||||
The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
|
||||
following a half-cosine).
|
||||
min_lr_multi (`float`, *optional*, defaults to 0):
|
||||
The minimum learning rate multiplier. Thus the minimum learning rate is base_lr * min_lr_multi.
|
||||
last_epoch (`int`, *optional*, defaults to -1):
|
||||
The index of the last epoch when resuming training.
|
||||
Return:
|
||||
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
|
||||
"""
|
||||
|
||||
def lr_lambda(current_step):
|
||||
if current_step < num_warmup_steps:
|
||||
return max(min_lr_multi, float(current_step) / float(max(1, num_warmup_steps)))
|
||||
progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
|
||||
return max(min_lr_multi, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
|
||||
|
||||
return LambdaLR(optimizer, lr_lambda, last_epoch)
|
Loading…
Add table
Add a link
Reference in a new issue