Skip to content

Commit d66c320

Browse files
lixinyufacebook-github-bot
authored andcommitted
disable leaky_relu_ backward calculation with negative slope (#33639)
Summary: Pull Request resolved: #33639 Test Plan: Imported from OSS Differential Revision: D20045735 Pulled By: glaringlee fbshipit-source-id: b3becf30a8fe9ee178792bd88f6ee10102504ed5
1 parent 997b5b5 commit d66c320

File tree

5 files changed

+49
-60
lines changed

5 files changed

+49
-60
lines changed

aten/src/ATen/native/Activation.cpp

Lines changed: 23 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ inline void _rrelu_with_noise_train(
165165
output.copy_(tmp_tensor);
166166
}
167167
}
168-
168+
169169
Tensor& rrelu_with_noise_out_cpu(
170170
Tensor& output,
171171
const Tensor& self,
@@ -209,42 +209,23 @@ Tensor& rrelu_with_noise_cpu_(
209209
return at::native::rrelu_with_noise_out_cpu(self, self, noise, lower, upper, training, generator);
210210
}
211211

212-
Tensor& rrelu_with_noise_backward_out(
213-
Tensor& grad_input,
214-
const Tensor& grad_output,
215-
const Tensor& self,
216-
const Tensor& noise,
217-
Scalar lower,
218-
Scalar upper,
219-
bool training) {
220-
auto lower_tensor = scalar_to_tensor(lower, grad_output.device());
221-
auto upper_tensor = scalar_to_tensor(upper, grad_output.device());
222-
if (training && (upper_tensor - lower_tensor).item().to<float>() > 1E-6) {
223-
grad_input = grad_output.mul(noise);
224-
} else {
225-
auto negative = (lower_tensor + upper_tensor) / 2;
226-
Scalar negative_slope = negative.item();
227-
grad_input = at::leaky_relu_backward(grad_output, self, negative_slope);
228-
}
229-
return grad_input;
230-
}
231-
232212
Tensor rrelu_with_noise_backward(
233213
const Tensor& grad_output,
234-
const Tensor& self,
214+
const Tensor& self_or_result,
235215
const Tensor& noise,
236216
Scalar lower,
237217
Scalar upper,
238-
bool training) {
218+
bool training,
219+
bool is_result) {
239220
auto lower_tensor = scalar_to_tensor(lower, grad_output.device());
240221
auto upper_tensor = scalar_to_tensor(upper, grad_output.device());
241222
if (training && (upper_tensor - lower_tensor).item().to<float>() > 1E-6) {
242223
return grad_output.mul(noise);
243224
} else {
244225
auto negative = (lower_tensor + upper_tensor) / 2;
245226
Scalar negative_slope = negative.item();
246-
return at::leaky_relu_backward(grad_output, self, negative_slope);
247-
}
227+
return at::leaky_relu_backward(grad_output, self_or_result, negative_slope, is_result);
228+
}
248229
}
249230

250231
Tensor rrelu(const Tensor & self, Scalar lower, Scalar upper, bool training, Generator* generator) {
@@ -663,22 +644,26 @@ Tensor & leaky_relu_(
663644
return at::leaky_relu_out(self, self, neg_val);
664645
}
665646

666-
Tensor& leaky_relu_backward_out(
667-
Tensor& grad_input,
668-
const Tensor& grad_output,
669-
const Tensor& input,
670-
Scalar negval) {
671-
auto iter = TensorIterator::binary_op(grad_input, input, grad_output);
672-
leaky_relu_backward_stub(iter.device_type(), iter, negval);
673-
return grad_input;
674-
}
675-
647+
// Note: leakyReLu backward calculation doesn't support in-place call with non-positive slope.
648+
// The reason is that for in-place forward call, the forward result will be saved into autograd
649+
// node instead of the input itself, when calculating backward gradient, there is no way to know
650+
// whether the original input for current node is positive or not if the input slope is
651+
// non-positive. eg. forward is 2, slope is -0.2, the original input for this node could be
652+
// either 2, or -10, so no way to get a correct backward gradient in this case.
676653
Tensor leaky_relu_backward(
677654
const Tensor& grad_output,
678-
const Tensor& input,
679-
Scalar negval) {
655+
const Tensor& self_or_result,
656+
Scalar negval,
657+
bool is_result) {
658+
TORCH_CHECK(
659+
!is_result || negval.to<double>() > 0.0,
660+
"In-place leakyReLu backward calculation is triggered with a non-positive slope which is not supported. "
661+
"This is caused by calling in-place forward function with a non-positive slope, "
662+
"please call out-of-place version instead. File an issue at https://github.com/pytorch/pytorch if you do "
663+
"require supporting in-place leakRelu backward calculation with non-positive slope");
664+
680665
Tensor result;
681-
auto iter = TensorIterator::binary_op(result, input, grad_output);
666+
auto iter = TensorIterator::binary_op(result, self_or_result, grad_output);
682667
leaky_relu_backward_stub(iter.device_type(), iter, negval);
683668
return iter.output();
684669
}

aten/src/ATen/native/native_functions.yaml

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5610,13 +5610,7 @@
56105610
CUDA: leaky_relu
56115611
QuantizedCPU: quantized_leaky_relu
56125612

5613-
- func: leaky_relu_backward.grad_input(Tensor grad_output, Tensor self, Scalar negative_slope, *, Tensor(a!) grad_input) -> Tensor(a!)
5614-
python_module: nn
5615-
dispatch:
5616-
CPU: leaky_relu_backward_out
5617-
CUDA: leaky_relu_backward_out
5618-
5619-
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope) -> Tensor
5613+
- func: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
56205614
use_c10_dispatcher: full
56215615
python_module: nn
56225616

@@ -5671,13 +5665,7 @@
56715665
CPU: rrelu_with_noise_cpu
56725666
CUDA: legacy::cuda::_thnn_rrelu_with_noise_forward
56735667

5674-
- func: rrelu_with_noise_backward.grad_input(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, *, Tensor(a!) grad_input) -> Tensor(a!)
5675-
python_module: nn
5676-
dispatch:
5677-
CPU: rrelu_with_noise_backward_out
5678-
CUDA: rrelu_with_noise_backward_out
5679-
5680-
- func: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training) -> Tensor
5668+
- func: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
56815669
use_c10_dispatcher: full
56825670
python_module: nn
56835671

test/backward_compatibility/check_backward_compatibility.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
('quantized::add_(scalar_)?(relu_)?out', datetime.date(2020, 3, 1)),
5858
('quantized::cat_(relu_)?out', datetime.date(2020, 3, 1)),
5959
('quantized::mul_(scalar_)?(relu_)?out', datetime.date(2020, 3, 1)),
60+
('aten::leaky_relu_backward', datetime.date(2020, 3, 6)),
61+
('aten::rrelu_with_noise_backward', datetime.date(2020, 3, 6)),
6062
]
6163

6264

test/test_autograd.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3886,6 +3886,18 @@ def forward(ctx, foo):
38863886
with self.assertRaisesRegex(RuntimeError, "must implement the backward"):
38873887
BadBw.apply(inp).sum().backward()
38883888

3889+
def test_leaky_relu_inplace_with_neg_slope(self):
3890+
for device in torch.testing.get_all_device_types():
3891+
a = torch.tensor([-1., 1.], device=device, requires_grad=True)
3892+
b = torch.nn.functional.leaky_relu_(a.clone(), -2)
3893+
with self.assertRaisesRegex(RuntimeError, "call out-of-place version"):
3894+
b.backward(torch.ones(2, device=device))
3895+
3896+
a = torch.tensor([-1., 1.], device=device, requires_grad=True)
3897+
b = torch.nn.functional.rrelu_(a.clone(), -5.0, 1.0)
3898+
with self.assertRaisesRegex(RuntimeError, "call out-of-place version"):
3899+
b.backward(torch.ones(2, device=device))
3900+
38893901
def index_variable(shape, max_indices):
38903902
if not isinstance(shape, tuple):
38913903
shape = (shape,)

tools/autograd/derivatives.yaml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,10 +1081,10 @@
10811081
self: hardtanh_backward(grad, result, min_val, max_val)
10821082

10831083
- name: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
1084-
self: leaky_relu_backward(grad, self, negative_slope)
1084+
self: leaky_relu_backward(grad, self, negative_slope, false)
10851085

10861086
- name: leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)
1087-
self: leaky_relu_backward(grad, result, negative_slope)
1087+
self: leaky_relu_backward(grad, result, negative_slope, true)
10881088

10891089
- name: log_sigmoid_forward(Tensor self) -> (Tensor output, Tensor buffer)
10901090
self: log_sigmoid_backward(grad, self, buffer)
@@ -1099,10 +1099,10 @@
10991099
grad_output, self, weight: prelu_double_backward(grads[0], grads[1], grad_output, self, weight)
11001100

11011101
- name: rrelu_with_noise(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
1102-
self: rrelu_with_noise_backward(grad, self, noise, lower, upper, training)
1102+
self: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
11031103

11041104
- name: rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
1105-
self: rrelu_with_noise_backward(grad, result, noise, lower, upper, training)
1105+
self: rrelu_with_noise_backward(grad, result, noise, lower, upper, training, true)
11061106

11071107
- name: _softmax(Tensor self, int dim, bool half_to_float) -> Tensor
11081108
self: _softmax_backward_data(grad, result, dim, self)
@@ -1313,8 +1313,9 @@
13131313
grad_output: grad.to(output.dtype()) - (grad.to(output.dtype()) * output.exp()).sum(dim, true)
13141314
self: log_softmax_double_backward(grad.to(output.dtype()), grad_output, dim, output).to(self.dtype())
13151315

1316-
- name: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope) -> Tensor
1317-
grad_output: leaky_relu_backward(grad, self, negative_slope)
1316+
- name: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
1317+
# self_is_result is always false here since double backward call is an out-of-place call, self is input itself
1318+
grad_output: leaky_relu_backward(grad, self, negative_slope, false)
13181319
self: zeros_like(grad, at::MemoryFormat::Preserve)
13191320

13201321
- name: max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor
@@ -1346,8 +1347,9 @@
13461347
self: zeros_like(grad, at::MemoryFormat::Preserve)
13471348
target: non_differentiable
13481349

1349-
- name: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training) -> Tensor
1350-
grad_output: rrelu_with_noise_backward(grad, self, noise, lower, upper, training)
1350+
- name: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
1351+
# self_is_result is always false here since double backward call is an out-of-place call, self is input itself
1352+
grad_output: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
13511353
self: zeros_like(grad, at::MemoryFormat::Preserve)
13521354

13531355
- name: reflection_pad1d_backward(Tensor grad_output, Tensor self, int[2] padding) -> Tensor

0 commit comments

Comments
 (0)