initial commit

2025-06-24 08:38:09 +02:00 · 2025-06-24 08:38:09 +02:00 · a82bbc593e
commit a82bbc593e
129 changed files with 33981 additions and 0 deletions
--- a/utils/scheduler.py
+++ b/utils/scheduler.py
@ -0,0 +1,240 @@
+""" Scheduler Factory
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+from torch.optim import Optimizer
+import math
+from torch.optim.lr_scheduler import LambdaLR, _LRScheduler
+import math
+
+
+# class LinearWarmupStepLRScheduler:
+#     def __init__(
+#         self,
+#         optimizer,
+#         max_epoch,
+#         min_lr,
+#         init_lr,
+#         decay_rate=1,
+#         warmup_start_lr=-1,
+#         warmup_steps=0,
+#         **kwargs
+#     ):
+#         self.optimizer = optimizer
+
+#         self.max_epoch = max_epoch
+#         self.min_lr = min_lr
+
+#         self.decay_rate = decay_rate
+
+#         self.init_lr = init_lr
+#         self.warmup_steps = warmup_steps
+#         self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
+
+#     def step(self, cur_epoch, cur_step):
+#         if cur_epoch == 0:
+#             warmup_lr_schedule(
+#                 step=cur_step,
+#                 optimizer=self.optimizer,
+#                 max_step=self.warmup_steps,
+#                 init_lr=self.warmup_start_lr,
+#                 max_lr=self.init_lr,
+#             )
+#         else:
+#             step_lr_schedule(
+#                 epoch=cur_epoch,
+#                 optimizer=self.optimizer,
+#                 init_lr=self.init_lr,
+#                 min_lr=self.min_lr,
+#                 decay_rate=self.decay_rate,
+#             )
+
+
+# class LinearWarmupCosineLRScheduler:
+#     def __init__(
+#         self,
+#         optimizer,
+#         max_epoch,
+#         min_lr,
+#         init_lr,
+#         warmup_steps=0,
+#         warmup_start_lr=-1,
+#         **kwargs
+#     ):
+#         self.optimizer = optimizer
+
+#         self.max_epoch = max_epoch
+#         self.min_lr = min_lr
+
+#         self.init_lr = init_lr
+#         self.warmup_steps = warmup_steps
+#         self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
+
+#     def step(self, cur_epoch, cur_step):
+#         # assuming the warmup iters less than one epoch
+#         if cur_epoch == 0:
+#             warmup_lr_schedule(
+#                 step=cur_step,
+#                 optimizer=self.optimizer,
+#                 max_step=self.warmup_steps,
+#                 init_lr=self.warmup_start_lr,
+#                 max_lr=self.init_lr,
+#             )
+#         else:
+#             cosine_lr_schedule(
+#                 epoch=cur_epoch,
+#                 optimizer=self.optimizer,
+#                 max_epoch=self.max_epoch,
+#                 init_lr=self.init_lr,
+#                 min_lr=self.min_lr,
+#             )
+
+
+# class ConstantLRScheduler:
+#     def __init__(self, optimizer, init_lr, warmup_start_lr=-1, warmup_steps=0, **kwargs):
+#         self.optimizer = optimizer
+#         self.lr = init_lr
+#         self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
+#         self.warmup_steps = warmup_steps
+    
+#     def step(self, cur_epoch, cur_step):
+#         if cur_epoch == 0:
+#             warmup_lr_schedule(
+#                 step=cur_step,
+#                 optimizer=self.optimizer,
+#                 max_step=self.warmup_steps,
+#                 init_lr=self.warmup_start_lr,
+#                 max_lr=self.lr,
+#             )
+#         else:
+#             for param_group in self.optimizer.param_groups:
+#                 param_group["lr"] = self.lr
+
+
+# schedulers = {
+#     'constant_lr': ConstantLRScheduler,
+#     'linear_warmup_cosine_lr': LinearWarmupCosineLRScheduler,
+#     'linear_warmup_step_lr': LinearWarmupStepLRScheduler
+# }
+
+
+# def cosine_lr_schedule(optimizer, epoch, max_epoch, init_lr, min_lr):
+#     """Decay the learning rate"""
+#     lr = (init_lr - min_lr) * 0.5 * (
+#         1.0 + math.cos(math.pi * epoch / max_epoch)
+#     ) + min_lr
+#     for param_group in optimizer.param_groups:
+#         param_group["lr"] = lr
+
+
+# def warmup_lr_schedule(optimizer, step, max_step, init_lr, max_lr):
+#     """Warmup the learning rate"""
+#     lr = min(max_lr, init_lr + (max_lr - init_lr) * step / max(max_step, 1))
+#     for param_group in optimizer.param_groups:
+#         param_group["lr"] = lr
+
+
+# def step_lr_schedule(optimizer, epoch, init_lr, min_lr, decay_rate):
+#     """Decay the learning rate"""
+#     lr = max(min_lr, init_lr * (decay_rate**epoch))
+#     for param_group in optimizer.param_groups:
+#         param_group["lr"] = lr
+
+
+# def create_scheduler(config, optimizer):
+#     scheduler_cls = schedulers[config.get('scheduler', 'constant_lr')]
+#     max_epoch = config.epochs
+#     min_lr = config.min_lr
+#     init_lr = config.lr
+#     warmup_start_lr = config.get('warmup_lr', -1)
+#     warmup_steps = config.get('warmup_steps', 0)
+
+#     scheduler = scheduler_cls(
+#         optimizer=optimizer,
+#         max_epoch=max_epoch,
+#         min_lr=min_lr,
+#         init_lr=init_lr,
+#         decay_rate=None,
+#         warmup_start_lr=warmup_start_lr,
+#         warmup_steps=warmup_steps
+#     )
+
+#     return scheduler
+
+
+
+class WarmupLinearScheduleNonZero(_LRScheduler):
+    """ Linear warmup and then linear decay.
+        Linearly increases learning rate from 0 to max_lr over `warmup_steps` training steps.
+        Linearly decreases learning rate linearly to min_lr over remaining `t_total - warmup_steps` steps.
+    """
+    def __init__(self, optimizer, warmup_steps, t_total, min_lr=1e-5, last_epoch=-1):
+        self.warmup_steps = warmup_steps
+        self.t_total = t_total
+        self.min_lr = min_lr
+        super(WarmupLinearScheduleNonZero, self).__init__(optimizer, last_epoch=last_epoch)
+    
+    def get_lr(self):
+        step = self.last_epoch
+        if step < self.warmup_steps:
+            lr_factor = float(step) / float(max(1, self.warmup_steps))
+        else:
+            lr_factor = max(0, float(self.t_total - step) / float(max(1.0, self.t_total - self.warmup_steps)))
+
+        return [base_lr * lr_factor if (base_lr * lr_factor) > self.min_lr else self.min_lr for base_lr in self.base_lrs]
+
+
+def create_scheduler(config, optimizer):
+    lr_scheduler = None
+    if config['scheduler'] == 'cosine':
+        lr_scheduler = get_cosine_schedule_with_warmup(
+            optimizer,
+            num_warmup_steps=config['num_warmup_steps'],
+            num_training_steps=config['num_training_steps'],
+            num_cycles=0.5,
+            min_lr_multi=config['min_lr_multi']
+        )
+    elif config['scheduler'] == 'linear':
+        lr_scheduler = WarmupLinearScheduleNonZero(
+            optimizer,
+            config['num_warmup_steps'],
+            config['num_training_steps'],
+            min_lr = config['min_lr']
+        )
+    return lr_scheduler
+
+
+def get_cosine_schedule_with_warmup(
+        optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int,
+        num_cycles: float = 0.5, min_lr_multi: float = 0., last_epoch: int = -1
+):
+    """
+    Modified from https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/optimization.py
+
+    Create a schedule with a learning rate that decreases following the values of the cosine function between the
+    initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
+    initial lr set in the optimizer.
+    Args:
+        optimizer ([`~torch.optim.Optimizer`]):
+            The optimizer for which to schedule the learning rate.
+        num_warmup_steps (`int`):
+            The number of steps for the warmup phase.
+        num_training_steps (`int`):
+            The total number of training steps.
+        num_cycles (`float`, *optional*, defaults to 0.5):
+            The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
+            following a half-cosine).
+        min_lr_multi (`float`, *optional*, defaults to 0):
+            The minimum learning rate multiplier. Thus the minimum learning rate is base_lr * min_lr_multi.
+        last_epoch (`int`, *optional*, defaults to -1):
+            The index of the last epoch when resuming training.
+    Return:
+        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
+    """
+
+    def lr_lambda(current_step):
+        if current_step < num_warmup_steps:
+            return max(min_lr_multi, float(current_step) / float(max(1, num_warmup_steps)))
+        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
+        return max(min_lr_multi, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
+
+    return LambdaLR(optimizer, lr_lambda, last_epoch)