Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Don't copy on clamp, clamp_out
This makes clamp and relu faster (fixes #10276).

The extra copying was introduced when clamp moved to ATen and
the _th_clamp_ wrapper was used to forward to TH/THC,
we remove that and add _th_clamp(_out) instead.
  • Loading branch information
t-vi committed Aug 8, 2018
commit ec2bbcd20e23a32b5a716fd850bbd557f2377105
15 changes: 9 additions & 6 deletions aten/src/ATen/Declarations.cwrap
Original file line number Diff line number Diff line change
Expand Up @@ -2267,39 +2267,42 @@
- THTensor* other
]]
[[
name: _th_clamp_
name: _th_clamp
cname: clamp
variants:
- method
- function
return: argument 0
arguments:
- THTensor* self
- arg: THTensor* result
output: True
- THTensor* self
- real min
- real max
]]
[[
name: _th_clamp_min_
name: _th_clamp_min
cname: cmaxValue
variants:
- method
- function
return: argument 0
arguments:
- THTensor* self
- arg: THTensor* result
output: True
- THTensor* self
- real min
]]
[[
name: _th_clamp_max_
name: _th_clamp_max
cname: cminValue
variants:
- method
- function
return: argument 0
arguments:
- THTensor* self
- arg: THTensor* result
output: True
- THTensor* self
- real max
]]
Expand Down
26 changes: 10 additions & 16 deletions aten/src/ATen/native/UnaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ Tensor clamp_min(const Tensor& self, Scalar min) {

Tensor& _clamp__cpu(Tensor& self, Scalar min, Scalar max) {
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
return _th_clamp_(self, min, max);
return _th_clamp_out(self, self, min, max);
} else if (std::isnan(min.toDouble())) {
return _th_clamp_max_(self, max);
return _th_clamp_max_out(self, self, max);
} else if (std::isnan(max.toDouble())) {
return _th_clamp_min_(self, min);
return _th_clamp_min_out(self, self, min);
} else {
return self;
}
Expand All @@ -62,36 +62,30 @@ Tensor& _clamp_out_cpu(
const Tensor& self,
Scalar min,
Scalar max) {
result.resize_(self.sizes());
result.copy_(self);
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
_th_clamp_(result, min, max);
_th_clamp_out(result, self, min, max);
} else if (std::isnan(min.toDouble())) {
_th_clamp_max_(result, max);
_th_clamp_max_out(result, self, max);
} else if (std::isnan(max.toDouble())) {
_th_clamp_min_(result, min);
_th_clamp_min_out(result, self, min);
}
return result;
}

Tensor& _clamp_max__cpu(Tensor& self, Scalar max) {
return _th_clamp_max_(self, max);
return _th_clamp_max_out(self, self, max);
}

Tensor& _clamp_max_out_cpu(Tensor& result, const Tensor& self, Scalar max) {
result.resize_(self.sizes());
result.copy_(self);
return _th_clamp_max_(result, max);
return _th_clamp_max_out(result, self, max);
}

Tensor& _clamp_min__cpu(Tensor& self, Scalar min) {
return _th_clamp_min_(self, min);
return _th_clamp_min_out(self, self, min);
}

Tensor& _clamp_min_out_cpu(Tensor& result, const Tensor& self, Scalar min) {
result.resize_(self.sizes());
result.copy_(self);
return _th_clamp_min_(result, min);
return _th_clamp_min_out(result, self, min);
}

Tensor& fill_(Tensor& self, Scalar value) {
Expand Down
26 changes: 10 additions & 16 deletions aten/src/ATen/native/cuda/CUDAUnaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ namespace at { namespace native {

Tensor& _clamp__cuda(Tensor& self, Scalar min, Scalar max) {
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
return _th_clamp_(self, min, max);
return _th_clamp_out(self, self, min, max);
} else if (std::isnan(min.toDouble())) {
return _th_clamp_max_(self, max);
return _th_clamp_max_out(self, self, max);
} else if (std::isnan(max.toDouble())) {
return _th_clamp_min_(self, min);
return _th_clamp_min_out(self, self, min);
} else {
return self;
}
Expand All @@ -19,36 +19,30 @@ Tensor& _clamp_out_cuda(
const Tensor& self,
Scalar min,
Scalar max) {
result.resize_(self.sizes());
result.copy_(self);
if (!std::isnan(min.toDouble()) && !std::isnan(max.toDouble())) {
_th_clamp_(result, min, max);
_th_clamp_out(result, self, min, max);
} else if (std::isnan(min.toDouble())) {
_th_clamp_max_(result, max);
_th_clamp_max_out(result, self, max);
} else if (std::isnan(max.toDouble())) {
_th_clamp_min_(result, min);
_th_clamp_min_out(result, self, min);
}
return result;
}

Tensor& _clamp_max__cuda(Tensor& self, Scalar max) {
return _th_clamp_max_(self, max);
return _th_clamp_max_out(self, self, max);
}

Tensor& _clamp_max_out_cuda(Tensor& result, const Tensor& self, Scalar max) {
result.resize_(self.sizes());
result.copy_(self);
return _th_clamp_max_(result, max);
return _th_clamp_max_out(result, self, max);
}

Tensor& _clamp_min__cuda(Tensor& self, Scalar min) {
return _th_clamp_min_(self, min);
return _th_clamp_min_out(self, self, min);
}

Tensor& _clamp_min_out_cuda(Tensor& result, const Tensor& self, Scalar min) {
result.resize_(self.sizes());
result.copy_(self);
return _th_clamp_min_(result, min);
return _th_clamp_min_out(result, self, min);
}

// These are just forwarding stubs
Expand Down