Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 21 additions & 21 deletions aten/src/ATen/BatchingRegistrations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,19 +187,19 @@ std::vector<Tensor> chunk_batching_rule(const Tensor& self, int64_t chunks, int6
return result;
}

Tensor clamp_batching_rule(const Tensor& self, optional<Scalar> min, optional<Scalar> max) {
Tensor clamp_batching_rule(const Tensor& self, const optional<Scalar>& min, const optional<Scalar>& max) {
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
auto result = at::clamp(self_physical.tensor(), min, max);
return self_physical.getPhysicalToLogicalMap().apply(result);
}

Tensor clamp_min_batching_rule(const Tensor& self, Scalar min) {
Tensor clamp_min_batching_rule(const Tensor& self, const Scalar& min) {
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
auto result = at::clamp_min(self_physical.tensor(), min);
return self_physical.getPhysicalToLogicalMap().apply(result);
}

Tensor clamp_max_batching_rule(const Tensor& self, Scalar max) {
Tensor clamp_max_batching_rule(const Tensor& self, const Scalar& max) {
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
auto result = at::clamp_max(self_physical.tensor(), max);
return self_physical.getPhysicalToLogicalMap().apply(result);
Expand Down Expand Up @@ -233,7 +233,7 @@ Tensor unsqueeze_batching_rule(const Tensor& self, int64_t dim) {
return self_physical.getPhysicalToLogicalMap().apply(result);
}

Tensor& fill_inplace_scalar_batching_rule(Tensor& self, Scalar value) {
Tensor& fill_inplace_scalar_batching_rule(Tensor& self, const Scalar& value) {
auto self_physical = MultiBatchVmapTransform::logicalToPhysical(self);
self_physical.tensor().fill_(value);
return self;
Expand Down Expand Up @@ -708,7 +708,7 @@ Tensor unwrap_and_call_method(const Tensor& input, ExtraArgs... extra_args) {
return makeBatched(output_physical, BatchDims(old_bdims.begin(), old_bdims.end()));
}

Tensor pow_scalar_Tensor_batching_rule(Scalar other, const Tensor& self) {
Tensor pow_scalar_Tensor_batching_rule(const Scalar& other, const Tensor& self) {
auto* self_batched = unsafeGetBatchedImpl(self);
auto output_physical = at::pow(other, self_batched->value());
auto old_bdims = self_batched->bdims();
Expand Down Expand Up @@ -1120,36 +1120,36 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
#undef TO_BATCHING_RULE
m.impl("clone", clone_batching_rule);

using TensorTensorScalarType = Tensor (*)(const Tensor&, const Tensor&, Scalar);
using TensorTensorScalarType = Tensor (*)(const Tensor&, const Tensor&, const Scalar&);
using TensorTensorType = Tensor (*)(const Tensor&, const Tensor&);
using TensorScalarType = Tensor (*)(const Tensor&, Scalar);
using TensorScalarType = Tensor (*)(const Tensor&, const Scalar&);

#define BINARY_POINTWISE(op) \
m.impl(#op".Tensor", binary_pointwise_batching_rule<TensorTensorType, at::op>); \
m.impl(#op".Scalar", unwrap_and_call<TensorScalarType, at::op, Scalar>);
m.impl(#op".Scalar", unwrap_and_call<TensorScalarType, at::op, const Scalar&>);
#define BINARY_POINTWISE_VA(op, ...) \
{ \
using Binop = Tensor (*)(const Tensor&, const Tensor&, __VA_ARGS__); \
using Unop = Tensor (*)(const Tensor&, Scalar, __VA_ARGS__); \
using Unop = Tensor (*)(const Tensor&, const Scalar&, __VA_ARGS__); \
m.impl(#op".Tensor", binary_pointwise_batching_rule<Binop, at::op, __VA_ARGS__>); \
m.impl(#op".Scalar", unwrap_and_call<Unop, at::op, Scalar, __VA_ARGS__>); \
m.impl(#op".Scalar", unwrap_and_call<Unop, at::op, const Scalar&, __VA_ARGS__>); \
}

BINARY_POINTWISE_VA(add, Scalar);
BINARY_POINTWISE_VA(sub, Scalar);
BINARY_POINTWISE_VA(rsub, Scalar);
BINARY_POINTWISE_VA(add, const Scalar&);
BINARY_POINTWISE_VA(sub, const Scalar&);
BINARY_POINTWISE_VA(rsub, const Scalar&);
BINARY_POINTWISE(mul);
BINARY_POINTWISE(div);
{
using Binop = Tensor (*)(const Tensor&, const Tensor&, std::string);
using Unop = Tensor (*)(const Tensor&, Scalar, std::string);
using Unop = Tensor (*)(const Tensor&, const Scalar&, std::string);
m.impl("div.Tensor_mode", binary_pointwise_batching_rule<Binop, at::div, std::string>);
m.impl("div.Scalar_mode", unwrap_and_call<Unop, at::div, Scalar, std::string>);
m.impl("div.Scalar_mode", unwrap_and_call<Unop, at::div, const Scalar&, std::string>);
}

// at::pow has three out-of-place overloads
m.impl("pow.Tensor_Tensor", binary_pointwise_batching_rule<TensorTensorType, at::pow>);
m.impl("pow.Tensor_Scalar", unwrap_and_call<TensorScalarType, at::pow, Scalar>);
m.impl("pow.Tensor_Scalar", unwrap_and_call<TensorScalarType, at::pow, const Scalar&>);
m.impl("pow.Scalar", pow_scalar_Tensor_batching_rule);

m.impl("sigmoid_backward", binary_pointwise_batching_rule<TensorTensorType, at::sigmoid_backward>);
Expand All @@ -1158,15 +1158,15 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
binary_pointwise_batching_rule<
TensorTensorScalarType,
at::threshold_backward,
Scalar>);
const Scalar&>);

// for at::result_type, call the native::result_type implementation.
// We don't have to do anything special because native::result_type operates
// on the logical shape of the tensors.
m.impl("result_type.Tensor", static_cast<ScalarType (*)(const Tensor&, const Tensor&)>(native::result_type));
m.impl("result_type.Scalar", static_cast<ScalarType (*)(const Tensor&, Scalar)>(native::result_type));
m.impl("result_type.Scalar_Tensor", static_cast<ScalarType (*)(Scalar, const Tensor&)>(native::result_type));
m.impl("result_type.Scalar_Scalar", static_cast<ScalarType (*)(Scalar, Scalar)>(native::result_type));
m.impl("result_type.Scalar", static_cast<ScalarType (*)(const Tensor&, const Scalar&)>(native::result_type));
m.impl("result_type.Scalar_Tensor", static_cast<ScalarType (*)(const Scalar&, const Tensor&)>(native::result_type));
m.impl("result_type.Scalar_Scalar", static_cast<ScalarType (*)(const Scalar&, const Scalar&)>(native::result_type));

#undef BINARY_POINTWISE_VA
#undef BINARY_POINTWISE
Expand Down Expand Up @@ -1207,7 +1207,7 @@ TORCH_LIBRARY_IMPL(aten, Batched, m) {
// Comparison ops
#define COMPARISON_POINTWISE(op) \
m.impl(#op".Tensor", comparison_pointwise_batching_rule<TensorTensorType, at::op>); \
m.impl(#op".Scalar", unwrap_and_call<TensorScalarType, at::op, Scalar>);
m.impl(#op".Scalar", unwrap_and_call<TensorScalarType, at::op, const Scalar&>);

COMPARISON_POINTWISE(eq);
COMPARISON_POINTWISE(gt);
Expand Down
10 changes: 5 additions & 5 deletions aten/src/ATen/LegacyTHFunctionsCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ Tensor _th_std(const Tensor & self, bool unbiased) {
AT_ERROR("_th_std not supported on CPUType for ", dispatch_scalar_type);
}
}
Tensor & _th_renorm_out(Tensor & result, const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm) {
Tensor & _th_renorm_out(Tensor & result, const Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm) {
// DeviceGuard omitted
auto dispatch_scalar_type = infer_scalar_type(self);

Expand All @@ -468,7 +468,7 @@ Tensor & _th_renorm_out(Tensor & result, const Tensor & self, Scalar p, int64_t
}
return result;
}
Tensor _th_renorm(const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm) {
Tensor _th_renorm(const Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm) {
// DeviceGuard omitted
auto dispatch_scalar_type = infer_scalar_type(self);
auto result_ = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(c10::Storage(c10::Storage::use_byte_size_t(), 0, allocator(), true),DispatchKey::CPU, scalarTypeToTypeMeta(dispatch_scalar_type)).release();
Expand All @@ -493,7 +493,7 @@ Tensor _th_renorm(const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm) {
}
return result;
}
Tensor & _th_renorm_(Tensor & self, Scalar p, int64_t dim, Scalar maxnorm) {
Tensor & _th_renorm_(Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm) {
// DeviceGuard omitted
auto dispatch_scalar_type = infer_scalar_type(self);

Expand All @@ -517,7 +517,7 @@ Tensor & _th_renorm_(Tensor & self, Scalar p, int64_t dim, Scalar maxnorm) {
}
return self;
}
Tensor & _th_histc_out(Tensor & result, const Tensor & self, int64_t bins, Scalar min, Scalar max) {
Tensor & _th_histc_out(Tensor & result, const Tensor & self, int64_t bins, const Scalar& min, const Scalar& max) {
// DeviceGuard omitted
auto dispatch_scalar_type = infer_scalar_type(self);

Expand All @@ -543,7 +543,7 @@ Tensor & _th_histc_out(Tensor & result, const Tensor & self, int64_t bins, Scala
}
return result;
}
Tensor _th_histc(const Tensor & self, int64_t bins, Scalar min, Scalar max) {
Tensor _th_histc(const Tensor & self, int64_t bins, const Scalar& min, const Scalar& max) {
// DeviceGuard omitted
auto dispatch_scalar_type = infer_scalar_type(self);
auto result_ = c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(c10::Storage(c10::Storage::use_byte_size_t(), 0, allocator(), true),DispatchKey::CPU, scalarTypeToTypeMeta(dispatch_scalar_type)).release();
Expand Down
10 changes: 5 additions & 5 deletions aten/src/ATen/LegacyTHFunctionsCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ std::tuple<Tensor &,Tensor &> _th_mode_out(Tensor & values, Tensor & indices, co
std::tuple<Tensor,Tensor> _th_mode(const Tensor & self, int64_t dim, bool keepdim);
Tensor _th_var(const Tensor & self, bool unbiased);
Tensor _th_std(const Tensor & self, bool unbiased);
Tensor & _th_renorm_out(Tensor & result, const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm);
Tensor _th_renorm(const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm);
Tensor & _th_renorm_(Tensor & self, Scalar p, int64_t dim, Scalar maxnorm);
Tensor & _th_histc_out(Tensor & result, const Tensor & self, int64_t bins, Scalar min, Scalar max);
Tensor _th_histc(const Tensor & self, int64_t bins, Scalar min, Scalar max);
Tensor & _th_renorm_out(Tensor & result, const Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm);
Tensor _th_renorm(const Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm);
Tensor & _th_renorm_(Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm);
Tensor & _th_histc_out(Tensor & result, const Tensor & self, int64_t bins, const Scalar& min, const Scalar& max);
Tensor _th_histc(const Tensor & self, int64_t bins, const Scalar& min, const Scalar& max);
std::tuple<Tensor &,Tensor &> _th_gels_out(Tensor & res1, Tensor & res2, const Tensor & self, const Tensor & A);
std::tuple<Tensor,Tensor> _th_gels(const Tensor & self, const Tensor & A);
std::tuple<Tensor &,Tensor &> _th_geqrf_out(Tensor & res1, Tensor & res2, const Tensor & self);
Expand Down
26 changes: 13 additions & 13 deletions aten/src/ATen/LegacyTHFunctionsCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ namespace native {
namespace legacy {
namespace cuda {

Tensor & _th_masked_fill_(Tensor & self, const Tensor & mask, Scalar value);
Tensor & _th_masked_fill_bool_(Tensor & self, const Tensor & mask, Scalar value);
Tensor & _th_masked_fill_(Tensor & self, const Tensor & mask, const Scalar& value);
Tensor & _th_masked_fill_bool_(Tensor & self, const Tensor & mask, const Scalar& value);
Tensor & _th_index_copy_(Tensor & self, int64_t dim, const Tensor & index, const Tensor & source);
Tensor & _th_take_out(Tensor & result, const Tensor & self, const Tensor & index);
Tensor _th_take(const Tensor & self, const Tensor & index);
Expand All @@ -32,9 +32,9 @@ std::tuple<Tensor &,Tensor &> _th_sort_out_stable(Tensor & values, Tensor & indi
std::tuple<Tensor,Tensor> _th_sort_stable(const Tensor & self, c10::optional<bool> stable, int64_t dim, bool descending);
std::tuple<Tensor &,Tensor &> _th_topk_out(Tensor & values, Tensor & indices, const Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted);
std::tuple<Tensor,Tensor> _th_topk(const Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted);
Tensor & _th_renorm_out(Tensor & result, const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm);
Tensor _th_renorm(const Tensor & self, Scalar p, int64_t dim, Scalar maxnorm);
Tensor & _th_renorm_(Tensor & self, Scalar p, int64_t dim, Scalar maxnorm);
Tensor & _th_renorm_out(Tensor & result, const Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm);
Tensor _th_renorm(const Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm);
Tensor & _th_renorm_(Tensor & self, const Scalar& p, int64_t dim, const Scalar& maxnorm);
Tensor & _th_cross_kernel_out(Tensor & result, const Tensor & self, const Tensor & other, int64_t dim);
Tensor _th_cross_kernel(const Tensor & self, const Tensor & other, int64_t dim);
std::tuple<Tensor &,Tensor &> _th_gels_out(Tensor & res1, Tensor & res2, const Tensor & self, const Tensor & A);
Expand All @@ -44,10 +44,10 @@ Tensor _th_potri(const Tensor & self, bool upper);
std::tuple<Tensor &,Tensor &> _th_geqrf_out(Tensor & res1, Tensor & res2, const Tensor & self);
std::tuple<Tensor,Tensor> _th_geqrf(const Tensor & self);
Tensor & _th_copy_ignoring_overlaps_(Tensor & self, const Tensor & src);
Tensor & _thnn_multi_margin_loss_forward_out(Tensor & output, const Tensor & self, const Tensor & target, Scalar p, Scalar margin, const Tensor & weight, int64_t reduction);
Tensor _thnn_multi_margin_loss_forward(const Tensor & self, const Tensor & target, Scalar p, Scalar margin, const Tensor & weight, int64_t reduction);
Tensor & _thnn_multi_margin_loss_backward_out(Tensor & grad_input, const Tensor & grad_output, const Tensor & self, const Tensor & target, Scalar p, Scalar margin, const Tensor & weight, int64_t reduction);
Tensor _thnn_multi_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, Scalar p, Scalar margin, const Tensor & weight, int64_t reduction);
Tensor & _thnn_multi_margin_loss_forward_out(Tensor & output, const Tensor & self, const Tensor & target, const Scalar& p, const Scalar& margin, const Tensor & weight, int64_t reduction);
Tensor _thnn_multi_margin_loss_forward(const Tensor & self, const Tensor & target, const Scalar& p, const Scalar& margin, const Tensor & weight, int64_t reduction);
Tensor & _thnn_multi_margin_loss_backward_out(Tensor & grad_input, const Tensor & grad_output, const Tensor & self, const Tensor & target, const Scalar& p, const Scalar& margin, const Tensor & weight, int64_t reduction);
Tensor _thnn_multi_margin_loss_backward(const Tensor & grad_output, const Tensor & self, const Tensor & target, const Scalar& p, const Scalar& margin, const Tensor & weight, int64_t reduction);
std::tuple<Tensor &,Tensor &> _thnn_multilabel_margin_loss_forward_out(Tensor & output, Tensor & is_target, const Tensor & self, const Tensor & target, int64_t reduction);
std::tuple<Tensor,Tensor> _thnn_multilabel_margin_loss_forward(const Tensor & self, const Tensor & target, int64_t reduction);
Tensor & _thnn_multilabel_margin_loss_backward_out(Tensor & grad_input, const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, const Tensor & is_target);
Expand All @@ -68,10 +68,10 @@ std::tuple<Tensor &,Tensor &> _thnn_log_sigmoid_forward_out(Tensor & output, Ten
std::tuple<Tensor,Tensor> _thnn_log_sigmoid_forward(const Tensor & self);
Tensor & _thnn_log_sigmoid_backward_out(Tensor & grad_input, const Tensor & grad_output, const Tensor & self, const Tensor & buffer);
Tensor _thnn_log_sigmoid_backward(const Tensor & grad_output, const Tensor & self, const Tensor & buffer);
Tensor & _thnn_rrelu_with_noise_forward_out(Tensor & output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
Tensor _thnn_rrelu_with_noise_forward(const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
Tensor _thnn_rrelu_with_noise_backward(const Tensor & grad_output, const Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training);
Tensor & _thnn_rrelu_with_noise_forward_(Tensor & self, const Tensor & noise, Scalar lower, Scalar upper, bool training, c10::optional<at::Generator> generator);
Tensor & _thnn_rrelu_with_noise_forward_out(Tensor & output, const Tensor & self, const Tensor & noise, const Scalar& lower, const Scalar& upper, bool training, c10::optional<at::Generator> generator);
Tensor _thnn_rrelu_with_noise_forward(const Tensor & self, const Tensor & noise, const Scalar& lower, const Scalar& upper, bool training, c10::optional<at::Generator> generator);
Tensor _thnn_rrelu_with_noise_backward(const Tensor & grad_output, const Tensor & self, const Tensor & noise, const Scalar& lower, const Scalar& upper, bool training);
Tensor & _thnn_rrelu_with_noise_forward_(Tensor & self, const Tensor & noise, const Scalar& lower, const Scalar& upper, bool training, c10::optional<at::Generator> generator);
std::tuple<Tensor &,Tensor &,Tensor &> _thnn_conv2d_forward_out(Tensor & output, Tensor & columns, Tensor & ones, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const Tensor & bias, IntArrayRef stride, IntArrayRef padding);
std::tuple<Tensor,Tensor,Tensor> _thnn_conv2d_forward(const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const Tensor & bias, IntArrayRef stride, IntArrayRef padding);
std::tuple<Tensor &,Tensor &,Tensor &> _thnn_conv2d_backward_out(Tensor & grad_input, Tensor & grad_weight, Tensor & grad_bias, const Tensor & grad_output, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, const Tensor & columns, const Tensor & ones);
Expand Down
6 changes: 3 additions & 3 deletions aten/src/ATen/ScalarOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,23 @@
namespace at {
namespace {
template <typename scalar_t>
inline void fill_inplace(Tensor& self, Scalar value_scalar) {
inline void fill_inplace(Tensor& self, const Scalar& value_scalar) {
auto value = value_scalar.to<scalar_t>();
scalar_t* dptr = static_cast<scalar_t*>(self.data_ptr());
*dptr = value;
}
}

namespace detail {
Tensor& scalar_fill(Tensor& self, Scalar value) {
Tensor& scalar_fill(Tensor& self, const Scalar& value) {
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND3(
kHalf, kBool, kBFloat16, self.scalar_type(), "fill_out", [&]() {
fill_inplace<scalar_t>(self, value);
});
return self;
}

Tensor scalar_tensor_static(Scalar s, c10::optional<ScalarType> dtype_opt, c10::optional<Device> device_opt) {
Tensor scalar_tensor_static(const Scalar& s, c10::optional<ScalarType> dtype_opt, c10::optional<Device> device_opt) {
at::tracer::impl::NoTracerDispatchMode tracer_guard;
at::AutoNonVariableTypeMode non_var_type_mode(true);
auto result = at::detail::empty_cpu({}, dtype_opt, c10::nullopt, device_opt, c10::nullopt, c10::nullopt);
Expand Down
6 changes: 3 additions & 3 deletions aten/src/ATen/ScalarOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ namespace detail {
// Ideally this fast pass should be implemented in TensorIterator,
// but we also want to skip compute_types which in not avoidable
// in TensorIterator for now.
Tensor& scalar_fill(Tensor& self, Scalar value);
TORCH_API Tensor scalar_tensor_static(Scalar s, c10::optional<ScalarType> dtype_opt, c10::optional<Device> device_opt);
Tensor& scalar_fill(Tensor& self, const Scalar& value);
TORCH_API Tensor scalar_tensor_static(const Scalar& s, c10::optional<ScalarType> dtype_opt, c10::optional<Device> device_opt);
} // namespace detail
} // namespace at

Expand All @@ -21,7 +21,7 @@ namespace c10 {

// FIXME: this should be (and was) Scalar::toTensor, but there is currently no way
// to implement this without going through Derived Types (which are not part of core).
inline at::Tensor scalar_to_tensor(Scalar s, const Device device = at::kCPU) {
inline at::Tensor scalar_to_tensor(const Scalar& s, const Device device = at::kCPU) {
// This is the fast track we have for CPU scalar tensors.
if (device == at::kCPU) {
if (s.isFloatingPoint()) {
Expand Down
Loading