Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/source/optim.rst
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ How to adjust Learning Rate
---------------------------

:mod:`torch.optim.lr_scheduler` provides several methods to adjust the learning
rate based on the number of epoches. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`
rate based on the number of epochs. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`
allows dynamic learning rate reducing based on some validation measurements.

.. autoclass:: torch.optim.lr_scheduler.LambdaLR
Expand All @@ -139,5 +139,7 @@ allows dynamic learning rate reducing based on some validation measurements.
:members:
.. autoclass:: torch.optim.lr_scheduler.ExponentialLR
:members:
.. autoclass:: torch.optim.lr_scheduler.CosineAnnealingLR
:members:
.. autoclass:: torch.optim.lr_scheduler.ReduceLROnPlateau
:members:
45 changes: 28 additions & 17 deletions test/test_optim.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import unittest
import functools
from copy import deepcopy
Expand All @@ -8,7 +9,7 @@
from torch.optim import SGD
from torch.autograd import Variable
from torch import sparse
from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau
from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau
from common import TestCase, run_tests


Expand Down Expand Up @@ -423,117 +424,127 @@ def test_step_lr(self):
# lr = 0.05 if epoch < 3
# lr = 0.005 if 30 <= epoch < 6
# lr = 0.0005 if epoch >= 9
epochs = 10
single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3
targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
scheduler = StepLR(self.opt, gamma=0.1, step_size=3)
epochs = 10
self._test(scheduler, targets, epochs)

def test_multi_step_lr(self):
# lr = 0.05 if epoch < 2
# lr = 0.005 if 2 <= epoch < 5
# lr = 0.0005 if epoch < 9
# lr = 0.00005 if epoch >= 9
epochs = 10
single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3
targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9])
epochs = 10
self._test(scheduler, targets, epochs)

def test_exp_lr(self):
single_targets = [0.05 * (0.9 ** x) for x in range(10)]
targets = [single_targets, list(map(lambda x: x * 10, single_targets))]
epochs = 10
single_targets = [0.05 * (0.9 ** x) for x in range(epochs)]
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
scheduler = ExponentialLR(self.opt, gamma=0.9)
self._test(scheduler, targets, epochs)

def test_cos_anneal_lr(self):
epochs = 10
eta_min = 1e-10
single_targets = [eta_min + (0.05 - eta_min) *
(1 + math.cos(x / epochs * math.pi)) / 2
for x in range(epochs)]
targets = [single_targets, list(map(lambda x: x * epochs, single_targets))]
scheduler = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min)
self._test(scheduler, targets, epochs)

def test_reduce_lr_on_plateau1(self):
epochs = 10
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 20]
metrics = [10 - i * 0.0167 for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, threshold_mode='abs', mode='min',
threshold=0.01, patience=5, cooldown=5)
epochs = 10
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_reduce_lr_on_plateau2(self):
epochs = 22
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2]
metrics = [10 - i * 0.0165 for i in range(22)]
scheduler = ReduceLROnPlateau(self.opt, patience=5, cooldown=0, threshold_mode='abs',
mode='min', threshold=0.1)
epochs = 22
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_reduce_lr_on_plateau3(self):
epochs = 22
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4]
metrics = [-0.8] * 2 + [-0.234] * 20
scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=5, cooldown=5,
threshold_mode='abs')
epochs = 22
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_reduce_lr_on_plateau4(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 20]
metrics = [1.5 * (1.025 ** i) for i in range(20)] # 1.025 > 1.1**0.25
scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=3,
threshold_mode='rel', threshold=0.1)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_reduce_lr_on_plateau5(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
metrics = [1.5 * (1.005 ** i) for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel',
threshold=0.1, patience=5, cooldown=5)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_reduce_lr_on_plateau6(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 20]
metrics = [1.5 * (0.85 ** i) for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
threshold=0.1)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_reduce_lr_on_plateau7(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4]
metrics = [1] * 7 + [0.6] + [0.5] * 12
scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel',
threshold=0.1, patience=5, cooldown=5)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_reduce_lr_on_plateau8(self):
epochs = 20
for param_group in self.opt.param_groups:
param_group['lr'] = 0.5
targets = [[0.5] * 6 + [0.4] * 14, [0.5] * 6 + [0.3] * 14]
metrics = [1.5 * (1.005 ** i) for i in range(20)]
scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel', min_lr=[0.4, 0.3],
threshold=0.1, patience=5, cooldown=5)
epochs = 20
self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs)

def test_lambda_lr(self):
epochs = 10
self.opt.param_groups[0]['lr'] = 0.05
self.opt.param_groups[1]['lr'] = 0.4
targets = [[0.05 * (0.9 ** x) for x in range(10)], [0.4 * (0.8 ** x) for x in range(10)]]
targets = [[0.05 * (0.9 ** x) for x in range(epochs)], [0.4 * (0.8 ** x) for x in range(epochs)]]
scheduler = LambdaLR(self.opt,
lr_lambda=[lambda x1: 0.9 ** x1, lambda x2: 0.8 ** x2])
epochs = 10
self._test(scheduler, targets, epochs)

def _test(self, scheduler, targets, epochs=10):
Expand Down
38 changes: 38 additions & 0 deletions torch/optim/lr_scheduler.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
from bisect import bisect_right
from .optimizer import Optimizer

Expand Down Expand Up @@ -160,6 +161,43 @@ def get_lr(self):
for base_lr in self.base_lrs]


class CosineAnnealingLR(_LRScheduler):
"""Set the learning rate of each parameter group using a cosine annealing
schedule, where :math:`\eta_{max}` is set to the initial lr and
:math:`T_{cur}` is the number of epochs since the last restart in SGDR:

.. math::

\eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +
\cos(\frac{T_{cur}}{T_{max}}\pi))

When last_epoch=-1, sets initial lr as lr.

It has been proposed in
`SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
implements the cosine annealing part of SGDR, and not the restarts.

Args:
optimizer (Optimizer): Wrapped optimizer.
T_max (int): Maximum number of iterations.
eta_min (float): Minimum learning rate. Default: 0.
last_epoch (int): The index of last epoch. Default: -1.

.. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
https://arxiv.org/abs/1608.03983
"""

def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
self.T_max = T_max
self.eta_min = eta_min
super(CosineAnnealingLR, self).__init__(optimizer, last_epoch)

def get_lr(self):
return [self.eta_min + (base_lr - self.eta_min) *
(1 + math.cos(self.last_epoch / self.T_max * math.pi)) / 2
for base_lr in self.base_lrs]


class ReduceLROnPlateau(object):
"""Reduce learning rate when a metric has stopped improving.
Models often benefit from reducing the learning rate by a factor
Expand Down