Skip to content

Commit d9ee045

Browse files
cpuhrschfacebook-github-bot
authored andcommitted
Remove torch/legacy (#11823)
Summary: Largely unused and hinders current development Pull Request resolved: #11823 Differential Revision: D9925094 Pulled By: cpuhrsch fbshipit-source-id: 1f53f8f21da85c16ae32e75fe6f796718e39fc92
1 parent 8770128 commit d9ee045

File tree

167 files changed

+1
-13137
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+1
-13137
lines changed

test/run_test.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
'distributions',
2828
'indexing',
2929
'jit',
30-
'legacy_nn',
3130
'multiprocessing',
3231
'nccl',
3332
'nn',

test/test_legacy_nn.py

Lines changed: 0 additions & 1378 deletions
This file was deleted.

test/test_nn.py

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import torch.nn.parallel as dp
2323
import torch.nn.init as init
2424
import torch.nn.utils.rnn as rnn_utils
25-
import torch.legacy.nn as legacy
2625
from torch.nn.utils import clip_grad_norm_, clip_grad_value_
2726
from torch.nn.utils import parameters_to_vector, vector_to_parameters
2827
from torch.autograd import Variable, gradcheck
@@ -5821,42 +5820,6 @@ def test_linear_broadcasting(self):
58215820
expected = m(inp.view(6, 5)).view(2, 3, 8)
58225821
self.assertEqual(expected, m(inp))
58235822

5824-
def test_bilinear(self):
5825-
module = nn.Bilinear(10, 10, 8)
5826-
module_legacy = legacy.Bilinear(10, 10, 8)
5827-
5828-
module_legacy.weight.copy_(module.weight.data)
5829-
module_legacy.bias.copy_(module.bias.data)
5830-
5831-
input1 = torch.randn(4, 10)
5832-
input2 = torch.randn(4, 10)
5833-
5834-
output = module(Variable(input1), Variable(input2))
5835-
output_legacy = module_legacy.forward([input1, input2])
5836-
5837-
self.assertEqual(output.data, output_legacy)
5838-
5839-
input1_1 = torch.tensor(input1, requires_grad=True)
5840-
input2_1 = torch.tensor(input2, requires_grad=True)
5841-
5842-
module.zero_grad()
5843-
module_legacy.zeroGradParameters()
5844-
5845-
output = module(input1_1, input2_1)
5846-
grad_output = torch.randn(*output.size())
5847-
gi1_legacy, gi2_legacy = module_legacy.backward([input1, input2], grad_output)
5848-
output.backward(grad_output)
5849-
gi1 = input1_1.grad.data.clone()
5850-
gi2 = input2_1.grad.data.clone()
5851-
5852-
self.assertEqual(gi1, gi1_legacy)
5853-
self.assertEqual(gi2, gi2_legacy)
5854-
self.assertEqual(module.weight.grad.data, module_legacy.gradWeight)
5855-
self.assertEqual(module.bias.grad.data, module_legacy.gradBias)
5856-
5857-
_assertGradAndGradgradChecks(self, lambda x1, x2: F.bilinear(x1, x2, module.weight, module.bias),
5858-
(input1_1, input2_1))
5859-
58605823
def test_bilinear_no_bias(self):
58615824
module = nn.Bilinear(10, 10, 8)
58625825
module_no_bias = nn.Bilinear(10, 10, 8, False)

test/test_optim.py

Lines changed: 0 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import torch
66
from torch._six import inf
77
import torch.optim as optim
8-
import torch.legacy.optim as old_optim
98
import torch.nn.functional as F
109
from torch.optim import SGD
1110
from torch.autograd import Variable
@@ -24,44 +23,7 @@ def drosenbrock(tensor):
2423
return torch.DoubleTensor((-400 * x * (y - x ** 2) - 2 * (1 - x), 200 * (y - x ** 2)))
2524

2625

27-
def wrap_old_fn(old_fn, **config):
28-
def wrapper(closure, params, state):
29-
return old_fn(closure, params, config, state)
30-
return wrapper
31-
32-
3326
class TestOptim(TestCase):
34-
def _test_rosenbrock(self, constructor, old_fn):
35-
params_t = torch.Tensor([1.5, 1.5])
36-
state = {}
37-
38-
params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True)
39-
optimizer = constructor([params])
40-
41-
solution = torch.Tensor([1, 1])
42-
initial_dist = params.data.dist(solution)
43-
44-
def eval():
45-
optimizer.zero_grad()
46-
loss = rosenbrock(params)
47-
loss.backward()
48-
# loss.backward() will give **slightly** different
49-
# gradients, than drosenbtock, because of a different ordering
50-
# of floating point operations. In most cases it doesn't matter,
51-
# but some optimizers are so sensitive that they can temporarily
52-
# diverge up to 1e-4, just to converge again. This makes the
53-
# comparison more stable.
54-
params.grad.data.copy_(drosenbrock(params.data))
55-
return loss
56-
57-
for i in range(2000):
58-
optimizer.step(eval)
59-
old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
60-
params_t, state)
61-
self.assertEqual(params.data, params_t)
62-
63-
self.assertLessEqual(params.data.dist(solution), initial_dist)
64-
6527
def _test_rosenbrock_sparse(self, constructor, sparse_only=False):
6628
params_t = torch.Tensor([1.5, 1.5])
6729

@@ -237,16 +199,6 @@ def _build_params_dict_single(self, weight, bias, **kwargs):
237199
return [dict(params=bias, **kwargs)]
238200

239201
def test_sgd(self):
240-
self._test_rosenbrock(
241-
lambda params: optim.SGD(params, lr=1e-3),
242-
wrap_old_fn(old_optim.sgd, learningRate=1e-3)
243-
)
244-
self._test_rosenbrock(
245-
lambda params: optim.SGD(params, lr=1e-3, momentum=0.9,
246-
dampening=0, weight_decay=1e-4),
247-
wrap_old_fn(old_optim.sgd, learningRate=1e-3, momentum=0.9,
248-
dampening=0, weightDecay=1e-4)
249-
)
250202
self._test_basic_cases(
251203
lambda weight, bias: optim.SGD([weight, bias], lr=1e-3)
252204
)
@@ -273,14 +225,6 @@ def test_sgd_sparse(self):
273225
)
274226

275227
def test_adam(self):
276-
self._test_rosenbrock(
277-
lambda params: optim.Adam(params, lr=1e-2),
278-
wrap_old_fn(old_optim.adam, learningRate=1e-2)
279-
)
280-
self._test_rosenbrock(
281-
lambda params: optim.Adam(params, lr=1e-2, weight_decay=1e-2),
282-
wrap_old_fn(old_optim.adam, learningRate=1e-2, weightDecay=1e-2)
283-
)
284228
self._test_basic_cases(
285229
lambda weight, bias: optim.Adam([weight, bias], lr=1e-3)
286230
)
@@ -310,18 +254,6 @@ def test_sparse_adam(self):
310254
optim.SparseAdam(None, lr=1e-2, betas=(1.0, 0.0))
311255

312256
def test_adadelta(self):
313-
self._test_rosenbrock(
314-
lambda params: optim.Adadelta(params),
315-
wrap_old_fn(old_optim.adadelta)
316-
)
317-
self._test_rosenbrock(
318-
lambda params: optim.Adadelta(params, rho=0.95),
319-
wrap_old_fn(old_optim.adadelta, rho=0.95)
320-
)
321-
self._test_rosenbrock(
322-
lambda params: optim.Adadelta(params, weight_decay=1e-2),
323-
wrap_old_fn(old_optim.adadelta, weightDecay=1e-2)
324-
)
325257
self._test_basic_cases(
326258
lambda weight, bias: optim.Adadelta([weight, bias])
327259
)
@@ -333,18 +265,6 @@ def test_adadelta(self):
333265
optim.Adadelta(None, lr=1e-2, rho=1.1)
334266

335267
def test_adagrad(self):
336-
self._test_rosenbrock(
337-
lambda params: optim.Adagrad(params, lr=1e-1),
338-
wrap_old_fn(old_optim.adagrad, learningRate=1e-1)
339-
)
340-
self._test_rosenbrock(
341-
lambda params: optim.Adagrad(params, lr=1e-1, lr_decay=1e-3),
342-
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, learningRateDecay=1e-3)
343-
)
344-
self._test_rosenbrock(
345-
lambda params: optim.Adagrad(params, lr=1e-1, weight_decay=1e-2),
346-
wrap_old_fn(old_optim.adagrad, learningRate=1e-1, weightDecay=1e-2)
347-
)
348268
self._test_basic_cases(
349269
lambda weight, bias: optim.Adagrad([weight, bias], lr=1e-1)
350270
)
@@ -367,18 +287,6 @@ def test_adagrad_sparse(self):
367287

368288
@skipIfRocm
369289
def test_adamax(self):
370-
self._test_rosenbrock(
371-
lambda params: optim.Adamax(params, lr=1e-1),
372-
wrap_old_fn(old_optim.adamax, learningRate=1e-1)
373-
)
374-
self._test_rosenbrock(
375-
lambda params: optim.Adamax(params, lr=1e-1, weight_decay=1e-2),
376-
wrap_old_fn(old_optim.adamax, learningRate=1e-1, weightDecay=1e-2)
377-
)
378-
self._test_rosenbrock(
379-
lambda params: optim.Adamax(params, lr=1e-1, betas=(0.95, 0.998)),
380-
wrap_old_fn(old_optim.adamax, learningRate=1e-1, beta1=0.95, beta2=0.998)
381-
)
382290
self._test_basic_cases(
383291
lambda weight, bias: optim.Adamax([weight, bias], lr=1e-1)
384292
)
@@ -391,18 +299,6 @@ def test_adamax(self):
391299
optim.Adamax(None, lr=1e-2, betas=(0.0, 1.0))
392300

393301
def test_rmsprop(self):
394-
self._test_rosenbrock(
395-
lambda params: optim.RMSprop(params, lr=1e-2),
396-
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2)
397-
)
398-
self._test_rosenbrock(
399-
lambda params: optim.RMSprop(params, lr=1e-2, weight_decay=1e-2),
400-
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, weightDecay=1e-2)
401-
)
402-
self._test_rosenbrock(
403-
lambda params: optim.RMSprop(params, lr=1e-2, alpha=0.95),
404-
wrap_old_fn(old_optim.rmsprop, learningRate=1e-2, alpha=0.95)
405-
)
406302
self._test_basic_cases(
407303
lambda weight, bias: optim.RMSprop([weight, bias], lr=1e-2)
408304
)
@@ -415,18 +311,6 @@ def test_rmsprop(self):
415311
optim.RMSprop(None, lr=1e-2, momentum=-1.0)
416312

417313
def test_asgd(self):
418-
self._test_rosenbrock(
419-
lambda params: optim.ASGD(params, lr=1e-3),
420-
wrap_old_fn(old_optim.asgd, eta0=1e-3)
421-
)
422-
self._test_rosenbrock(
423-
lambda params: optim.ASGD(params, lr=1e-3, alpha=0.8),
424-
wrap_old_fn(old_optim.asgd, eta0=1e-3, alpha=0.8)
425-
)
426-
self._test_rosenbrock(
427-
lambda params: optim.ASGD(params, lr=1e-3, t0=1e3),
428-
wrap_old_fn(old_optim.asgd, eta0=1e-3, t0=1e3)
429-
)
430314
self._test_basic_cases(
431315
lambda weight, bias: optim.ASGD([weight, bias], lr=1e-3, t0=100)
432316
)
@@ -440,18 +324,6 @@ def test_asgd(self):
440324

441325
@skipIfRocm
442326
def test_rprop(self):
443-
self._test_rosenbrock(
444-
lambda params: optim.Rprop(params, lr=1e-3),
445-
wrap_old_fn(old_optim.rprop, stepsize=1e-3)
446-
)
447-
self._test_rosenbrock(
448-
lambda params: optim.Rprop(params, lr=1e-3, etas=(0.6, 1.1)),
449-
wrap_old_fn(old_optim.rprop, stepsize=1e-3, etaminus=0.6, etaplus=1.1)
450-
)
451-
self._test_rosenbrock(
452-
lambda params: optim.Rprop(params, lr=1e-3, step_sizes=(1e-4, 3)),
453-
wrap_old_fn(old_optim.rprop, stepsize=1e-3, stepsizemin=1e-4, stepsizemax=3)
454-
)
455327
self._test_basic_cases(
456328
lambda weight, bias: optim.Rprop([weight, bias], lr=1e-3)
457329
)
@@ -464,14 +336,6 @@ def test_rprop(self):
464336
optim.Rprop(None, lr=1e-2, etas=(1.0, 0.5))
465337

466338
def test_lbfgs(self):
467-
self._test_rosenbrock(
468-
lambda params: optim.LBFGS(params),
469-
wrap_old_fn(old_optim.lbfgs)
470-
)
471-
self._test_rosenbrock(
472-
lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
473-
wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
474-
)
475339
self._test_basic_cases(
476340
lambda weight, bias: optim.LBFGS([weight, bias]),
477341
ignore_multidevice=True

test/test_utils.py

Lines changed: 0 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -441,98 +441,6 @@ def test_gpu(self):
441441
lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5))
442442

443443

444-
class TestLuaReader(TestCase):
445-
446-
@staticmethod
447-
def _module_test(name, test):
448-
def do_test(self):
449-
module = test['module']
450-
input = test['input']
451-
grad_output = test['grad_output']
452-
if hasattr(self, '_transform_' + name):
453-
input = getattr(self, '_transform_' + name)(input)
454-
output = module.forward(input)
455-
module.zeroGradParameters()
456-
grad_input = module.backward(input, grad_output)
457-
self.assertEqual(output, test['output'])
458-
self.assertEqual(grad_input, test['grad_input'])
459-
if module.parameters() is not None:
460-
params, d_params = module.parameters()
461-
self.assertEqual(params, test['params'])
462-
self.assertEqual(d_params, test['d_params'])
463-
else:
464-
self.assertFalse('params' in test and test['params'])
465-
self.assertFalse('params' in test and test['d_params'])
466-
return do_test
467-
468-
@staticmethod
469-
def _criterion_test(name, test):
470-
def do_test(self):
471-
module = test['module']
472-
input = test['input']
473-
if name == 'L1Cost':
474-
target = None
475-
else:
476-
target = test['target']
477-
if hasattr(self, '_transform_' + name):
478-
input, target = getattr(self, '_transform_' + name)(input, target)
479-
480-
output = module.forward(input, target)
481-
grad_input = module.backward(input, target)
482-
self.assertEqual(output, test['loss'])
483-
self.assertEqual(grad_input, test['grad_input'])
484-
return do_test
485-
486-
@classmethod
487-
def init(cls):
488-
try:
489-
path = download_file('https://download.pytorch.org/test_data/legacy_modules.t7')
490-
except unittest.SkipTest:
491-
return
492-
long_size = 8 if sys.platform == 'win32' else None
493-
tests = load_lua(path, long_size=long_size)
494-
for name, test in tests['modules'].items():
495-
if name == "HardShrink":
496-
continue
497-
test_name = 'test_' + name.replace('nn.', '')
498-
setattr(cls, test_name, cls._module_test(name, test))
499-
for name, test in tests['criterions'].items():
500-
if name == "HardShrink":
501-
continue
502-
test_name = 'test_' + name.replace('nn.', '')
503-
setattr(cls, test_name, cls._criterion_test(name, test))
504-
505-
def _transform_Index(self, input):
506-
return [input[0], input[1].sub(1)]
507-
508-
def _transform_LookupTable(self, input):
509-
return input.sub(1)
510-
511-
def _transform_MultiLabelMarginCriterion(self, input, target):
512-
return input, target.sub(1)
513-
514-
def _transform_ClassNLLCriterion(self, input, target):
515-
return input, target.sub(1)
516-
517-
def _transform_SpatialClassNLLCriterion(self, input, target):
518-
return input, target.sub(1)
519-
520-
def _transform_ClassSimplexCriterion(self, input, target):
521-
return input, target.sub(1)
522-
523-
def _transform_CrossEntropyCriterion(self, input, target):
524-
return input, target.sub(1)
525-
526-
def _transform_ParallelCriterion(self, input, target):
527-
return input, [target[0].sub(1), target[1]]
528-
529-
def _transform_MultiCriterion(self, input, target):
530-
return input, target.sub(1)
531-
532-
def _transform_MultiMarginCriterion(self, input, target):
533-
return input, target.sub(1)
534-
535-
536444
@unittest.skipIf('SKIP_TEST_BOTTLENECK' in os.environ.keys(), 'SKIP_TEST_BOTTLENECK is set')
537445
class TestBottleneck(TestCase):
538446
def _run(self, command):
@@ -700,6 +608,4 @@ def try_check_onnx_broadcast(dims1, dims2, expect_broadcast, expect_fail):
700608

701609

702610
if __name__ == '__main__':
703-
from torch.utils.serialization import load_lua
704-
TestLuaReader.init()
705611
run_tests()

torch/legacy/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
If you're looking for this legacy code please consider versions of PyTorch before 0.5

torch/legacy/__init__.py

Lines changed: 0 additions & 7 deletions
This file was deleted.

0 commit comments

Comments
 (0)