pytorch
diff --git a/‎aten/src/ATen/native/Col2Im.cpp‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/native/Col2Im.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/native/Im2Col.cpp‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/native/Im2Col.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/native/LinearAlgebra.cpp‎
Lines changed: 34 additions & 62 deletions b/‎aten/src/ATen/native/LinearAlgebra.cpp‎
Lines changed: 34 additions & 62 deletions
diff --git a/‎aten/src/ATen/native/RangeFactories.cpp‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/native/RangeFactories.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/native/SpectralOps.cpp‎
Lines changed: 101 additions & 25 deletions b/‎aten/src/ATen/native/SpectralOps.cpp‎
Lines changed: 101 additions & 25 deletions
diff --git a/‎aten/src/ATen/native/cuda/Col2Im.cu‎
Lines changed: 2 additions & 1 deletion b/‎aten/src/ATen/native/cuda/Col2Im.cu‎
Lines changed: 2 additions & 1 deletion
@@ -137,7 +137,7 @@ static void col2im_out_cpu_template(
   output.resize_({batch_size, n_output_plane, output_height, output_width});
   output.zero_();
 
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+  AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
       input.scalar_type(), "col2im_out_cpu", [&] {
         Tensor input_n = Tensor();
         Tensor output_n = Tensor();
 
@@ -87,7 +87,7 @@ static void im2col_out_cpu_template(
   output.resize_({batch_size, n_output_plane, output_length});
   output.zero_();
 
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+  AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
       input.scalar_type(), "im2col_out_cpu", [&] {
         Tensor input_n;
         Tensor output_n;
 
@@ -1283,23 +1283,11 @@ Tensor frobenius_norm(const Tensor& self) {
 }
 
 Tensor frobenius_norm(const Tensor& self, IntArrayRef dim, bool keepdim) {
-  TORCH_CHECK(!self.is_complex(), "frobenius norm not supported for complex tensors");
-  TORCH_CHECK(
-      dim.size() <= 2,
-      "Expected at most 2 dimensions, but got ",
-      dim.size(),
-      " dimensions instead.");
-  if (dim.size() == 1 || dim.size() == 0) {
-    return at::norm(self, 2, dim, keepdim);
-  }
-  auto dim_ = dim.vec();
-  maybe_wrap_dims(dim_, self.dim());
-  TORCH_CHECK(dim_[0] != dim_[1], "Expected dims to be different, got ", dim, " instead");
-  if (self.is_complex()){
-    return at::sqrt(at::sum(at::real(self.conj() * self), dim_, keepdim));
-  } else {
-    return at::sqrt(at::sum((self * self), dim_, keepdim));
-  }
+  // NOTE: As frobenius_norm_out is currently implemented, it will always produce a
+  //    strided tensor result, even if the input is sparse.
+  auto options = self.options().layout(c10::Layout::Strided);
+  Tensor result = at::empty({0}, options);
+  return at::native::frobenius_norm_out(result, self, dim, keepdim);
 }
 
 Tensor &frobenius_norm_out(
@@ -1313,65 +1301,46 @@ Tensor &frobenius_norm_out(
       "Expected at most 2 dimensions, but got ",
       dim.size(),
       " dimensions instead.");
+  Tensor result_;
   if (dim.size() == 1 || dim.size() == 0) {
-    return at::norm_out(result, self, 2, dim, keepdim, self.scalar_type());
-  }
-  auto dim_ = dim.vec();
-  maybe_wrap_dims(dim_, self.dim());
-  TORCH_CHECK(dim_[0] != dim_[1], "Expected dims to be different, got ", dim, " instead");
-  if (self.is_complex()){
-    return at::sqrt_out(result, at::sum(at::real(self.conj() * self), dim_, keepdim));
+    result_ = at::norm(self, 2, dim, keepdim);
   } else {
-    return at::sqrt_out(result, at::sum((self * self), dim_, keepdim));
+    auto dim_ = dim.vec();
+    maybe_wrap_dims(dim_, self.dim());
+    TORCH_CHECK(dim_[0] != dim_[1], "Expected dims to be different, got ", dim, " instead");
+    if (self.is_complex()){
+      result_ = at::sqrt(at::sum(at::real(self.conj() * self), dim_, keepdim));
+    } else {
+      result_ = at::sqrt(at::sum((self * self), dim_, keepdim));
+    }
   }
+  // NOTE: It would be better to avoid resize and copy by using norm_out and sqrt_out above.
+  //    However, norm_out and sqrt_out do not support automatic differentiation.
+  //    More details here: https://github.com/pytorch/pytorch/pull/44095#discussion_r486673947
+  resize_output(result, result_.sizes());
+  result.copy_(result_);
+  return result;
 }
 
 Tensor nuclear_norm(const Tensor& self, bool keepdim) {
   TORCH_CHECK(
       self.dim() == 2,
       "Expected a tensor with 2 dimensions, but got a tensor with ",
       self.dim(), " dimension", self.dim()==1 ? "" : "s", " instead.");
-  // Since we error out on svd_backward when we don't compute U and V, the backward pass for nuclear_norm
-  // would end up throwing an error as a result if U and V aren't computed.
-  // Due to this, we have to compute U and V conditionally.
-  Tensor result = at::sum(std::get<1>(at::svd(self, /*some=*/true,
-                 /*compute_uv=*/at::GradMode::is_enabled() && self.requires_grad())), 0, keepdim);
-  if (keepdim) {
-    result.unsqueeze_(0);
-  }
-  return result;
+  return at::native::nuclear_norm(self, IntArrayRef({0, 1}), keepdim);
 }
 
 Tensor &nuclear_norm_out(Tensor& result, const Tensor& self, bool keepdim) {
   TORCH_CHECK(
       self.dim() == 2,
       "Expected a tensor with 2 dimensions, but got a tensor with ",
       self.dim(), " dimension", self.dim()==1 ? "" : "s", " instead.");
-  at::sum_out(result, std::get<1>(at::svd(self, /*some=*/true, /*compute_uv=*/false)), 0, keepdim);
-  if (keepdim) {
-    result.unsqueeze_(0);
-  }
-  return result;
+  return at::native::nuclear_norm_out(result, self, IntArrayRef({0, 1}), keepdim);
 }
 
 Tensor nuclear_norm(const Tensor& self, IntArrayRef dim, bool keepdim) {
-  TORCH_CHECK(dim.size() == 2, "nuclear norm requires a 'dim' argument of size 2");
-  auto dim_ = dim.vec();
-  maybe_wrap_dims(dim_, self.dim());
-
-  auto permutation = create_dim_backshift_permutation(dim_[0], dim_[1], self.dim());
-  auto permutation_reverse = create_reverse_permutation(permutation);
-  Tensor p = self.permute(permutation);
-  // Since we error out on svd_backward when we don't compute U and V, the backward pass for nuclear_norm
-  // would end up throwing an error as a result if U and V aren't computed.
-  // Due to this, we have to compute U and V conditionally.
-  Tensor result = at::sum(std::get<1>(at::svd(p, /*some=*/true,
-                 /*compute_uv=*/at::GradMode::is_enabled() && self.requires_grad())), -1, keepdim);
-  if (keepdim) {
-    result.unsqueeze_(-1);
-    result = result.permute(permutation_reverse);
-  }
-  return result;
+  Tensor result = at::empty({0}, self.options());
+  return at::native::nuclear_norm_out(result, self, dim, keepdim);
 }
 
 Tensor& nuclear_norm_out(Tensor& result, const Tensor& self, IntArrayRef dim, bool keepdim) {
@@ -1380,15 +1349,18 @@ Tensor& nuclear_norm_out(Tensor& result, const Tensor& self, IntArrayRef dim, bo
   maybe_wrap_dims(dim_, self.dim());
 
   auto permutation = create_dim_backshift_permutation(dim_[0], dim_[1], self.dim());
-  auto permutation_reverse = create_reverse_permutation(permutation);
-
   Tensor p = self.permute(permutation);
-  at::sum_out(result, std::get<1>(at::svd(p, /*some=*/true, /*compute_uv=*/false)), -1, keepdim);
+  // NOTE: U and V are computed only if gradmode is enabled, since the backward for nuclear
+  //       norm uses svd_backward, which requires them.
+  Tensor result_ = at::sum(std::get<1>(at::svd(p, /*some=*/true,
+                  /*compute_uv=*/at::GradMode::is_enabled() && self.requires_grad())), -1, keepdim);
   if (keepdim) {
-    result.unsqueeze_(-1);
-    Tensor result_ = result.permute(permutation_reverse);
-    result.set_(result_);
+    result_.unsqueeze_(-1);
+    auto permutation_reverse = create_reverse_permutation(permutation);
+    result_ = result_.permute(permutation_reverse);
   }
+  resize_output(result, result_.sizes());
+  result.copy_(result_);
   return result;
 }
 
 
@@ -83,7 +83,7 @@ Tensor& logspace_cpu_out(Tensor& result, Scalar start, Scalar end, c10::optional
       });
     });
   } else {
-    AT_DISPATCH_ALL_TYPES(r.scalar_type(), "logspace_cpu", [&]() {
+    AT_DISPATCH_ALL_TYPES_AND(kBFloat16, r.scalar_type(), "logspace_cpu", [&]() {
       double scalar_base = static_cast<double>(base); // will be autopromoted anyway
       scalar_t scalar_start = start.to<scalar_t>();
       scalar_t scalar_end = end.to<scalar_t>();
 
@@ -183,10 +183,28 @@ Tensor irfft(const Tensor& self, const int64_t signal_ndim, const bool normalize
               normalized, onesided);
 }
 
+template <typename Stream, typename T>
+static Stream& write_opt(Stream& SS, const optional<T>& value) {
+  if (value) {
+    SS << *value;
+  } else {
+    SS << "None";
+  }
+  return SS;
+}
 
+/* Short-time Fourier Transform, for signal analysis.
+ *
+ * This is modeled after librosa but with support for complex time-domain
+ * signals and complex windows.
+ *
+ * NOTE: librosa's center and pad_mode arguments are currently only implemented
+ * in python because it uses torch.nn.functional.pad which is python-only.
+ */
 Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
             const optional<int64_t> win_lengthOpt, const Tensor& window,
-            const bool normalized, const bool onesided) {
+            const bool normalized, const optional<bool> onesidedOpt,
+            const optional<bool> return_complexOpt) {
   #define REPR(SS) \
     SS << "stft(" << self.toString() << self.sizes() << ", n_fft=" << n_fft \
        << ", hop_length=" << hop_length << ", win_length=" << win_length \
@@ -196,15 +214,28 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop
     } else { \
       SS << "None"; \
     } \
-    SS << ", normalized=" << normalized << ", onesided=" << onesided << ")"
+    SS << ", normalized=" << normalized << ", onesided="; \
+    write_opt(SS, onesidedOpt) << ", return_complex="; \
+    write_opt(SS, return_complexOpt) << ") "
 
   // default_init hop_length and win_length
   auto hop_length = hop_lengthOpt.value_or(n_fft >> 2);
   auto win_length = win_lengthOpt.value_or(n_fft);
+  const bool return_complex = return_complexOpt.value_or(
+      self.is_complex() || (window.defined() && window.is_complex()));
+  if (!return_complexOpt && !return_complex) {
+    TORCH_WARN("stft will return complex tensors by default in future, use"
+               " return_complex=False to preserve the current output format.");
+  }
 
-  if (!at::isFloatingType(self.scalar_type()) || self.dim() > 2 || self.dim() < 1) {
+  if (!at::isFloatingType(self.scalar_type()) && !at::isComplexType(self.scalar_type())) {
     std::ostringstream ss;
-    REPR(ss) << ": expected a 1D or 2D tensor of floating types";
+    REPR(ss) << ": expected a tensor of floating point or complex values";
+    AT_ERROR(ss.str());
+  }
+  if (self.dim() > 2 || self.dim() < 1) {
+    std::ostringstream ss;
+    REPR(ss) << ": expected a 1D or 2D tensor";
     AT_ERROR(ss.str());
   }
   Tensor input = self;
@@ -240,11 +271,12 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop
   auto window_ = window;
   if (win_length < n_fft) {
     // pad center
-    window_ = at::zeros({n_fft}, self.options());
     auto left = (n_fft - win_length) / 2;
     if (window.defined()) {
+      window_ = at::zeros({n_fft}, window.options());
       window_.narrow(0, left, win_length).copy_(window);
     } else {
+      window_ = at::zeros({n_fft}, self.options());
       window_.narrow(0, left, win_length).fill_(1);
     }
   }
@@ -257,19 +289,40 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop
   if (window_.defined()) {
     input = input.mul(window_);
   }
-  // rfft and transpose to get (batch x fft_size x num_frames)
-  auto out = input.rfft(1, normalized, onesided).transpose_(1, 2);
+
+  // FFT and transpose to get (batch x fft_size x num_frames)
+  const bool complex_fft = input.is_complex();
+  const auto onesided = onesidedOpt.value_or(!complex_fft);
+
+  Tensor out;
+  if (complex_fft) {
+    TORCH_CHECK(!onesided, "Cannot have onesided output if window or input is complex");
+    out = at::native::fft(at::view_as_real(input), 1, normalized);
+  } else {
+    out = at::native::rfft(input, 1, normalized, onesided);
+  }
+  out.transpose_(1, 2);
+
   if (self.dim() == 1) {
-    return out.squeeze_(0);
+    out.squeeze_(0);
+  }
+
+  if (return_complex) {
+    return at::view_as_complex(out);
   } else {
     return out;
   }
 }
 
+/* Inverse Short-time Fourier Transform
+ *
+ * This is modeled after librosa but with support for complex time-domain
+ * signals and complex windows.
+ */
 Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
              const optional<int64_t> win_lengthOpt, const Tensor& window,
-             const bool center, const bool normalized, const bool onesided,
-             const optional<int64_t> lengthOpt) {
+             const bool center, const bool normalized, const c10::optional<bool> onesidedOpt,
+             const optional<int64_t> lengthOpt, const bool return_complex) {
   #define REPR(SS) \
     SS << "istft(" << self.toString() << self.sizes() << ", n_fft=" << n_fft \
        << ", hop_length=" << hop_length << ", win_length=" << win_length \
@@ -279,26 +332,23 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
     } else { \
       SS << "None"; \
     } \
-    SS << ", center=" << center << ", normalized=" << normalized << ", onesided=" << onesided << ", length="; \
-    if (lengthOpt.has_value()) { \
-      SS << lengthOpt.value(); \
-    } else { \
-      SS << "None"; \
-    } \
-    SS << ")"
+    SS << ", center=" << center << ", normalized=" << normalized << ", onesided="; \
+    write_opt(SS, onesidedOpt) << ", length="; \
+    write_opt(SS, lengthOpt) << ", return_complex=" << return_complex << ") "
 
   // default_init hop_length and win_length
   const auto hop_length = hop_lengthOpt.value_or(n_fft >> 2);
   const auto win_length = win_lengthOpt.value_or(n_fft);
 
-  const auto input_dim = self.dim();
-  const auto n_frames = self.size(-2);
-  const auto fft_size = self.size(-3);
+  Tensor input = self.is_complex() ? at::view_as_real(self) : self;
+  const auto input_dim = input.dim();
+  const auto n_frames = input.size(-2);
+  const auto fft_size = input.size(-3);
 
   const auto expected_output_signal_len = n_fft + hop_length * (n_frames - 1);
 
-  const auto options = at::device(self.device()).dtype(self.dtype());
-  if (self.numel() == 0) {
+  const auto options = at::device(input.device()).dtype(input.dtype());
+  if (input.numel() == 0) {
     std::ostringstream ss;
     REPR(ss) << ": input tensor cannot be empty.";
     AT_ERROR(ss.str());
@@ -308,12 +358,13 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
     REPR(ss) << ": expected a tensor with 3 or 4 dimensions, but got " << input_dim;
     AT_ERROR(ss.str());
   }
- if (self.size(-1) != 2) {
+  if (input.size(-1) != 2) {
     std::ostringstream ss;
     REPR(ss) << ": expected the last dimension to be 2 (corresponding to real and imaginary parts), but got " << self.size(-1);
     AT_ERROR(ss.str());
   }
 
+  const bool onesided = onesidedOpt.value_or(fft_size != n_fft);
   if (onesided) {
     if (n_fft / 2 + 1 != fft_size) {
       std::ostringstream ss;
@@ -355,13 +406,21 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
     TORCH_INTERNAL_ASSERT(window_tmp.size(0) == n_fft);
   }
 
-  Tensor input = self;
   if (input_dim == 3) {
     input = input.unsqueeze(0);
   }
 
   input = input.transpose(1, 2);  // size: (channel, n_frames, fft_size, 2)
-  input = at::native::irfft(input, 1, normalized, onesided, {n_fft, });  // size: (channel, n_frames, n_fft)
+
+  if (return_complex) {
+    TORCH_CHECK(!onesided, "Cannot have onesided output if window or input is complex");
+    input = at::native::ifft(input, 1, normalized);  // size: (channel, n_frames, n_fft)
+    input = at::view_as_complex(input);
+  } else {
+    TORCH_CHECK(!window.defined() || !window.is_complex(),
+                "Complex windows are incompatible with return_complex=False");
+    input = at::native::irfft(input, 1, normalized, onesided, {n_fft,});  // size: (channel, n_frames, n_fft)
+  }
   TORCH_INTERNAL_ASSERT(input.size(2) == n_fft);
 
   Tensor y_tmp = input * window_tmp.view({1, 1, n_fft});  // size: (channel, n_frames, n_fft)
@@ -408,4 +467,21 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
   #undef REPR
 }
 
+Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
+            const optional<int64_t> win_lengthOpt, const Tensor& window,
+            const bool normalized, const optional<bool> onesidedOpt) {
+  return at::native::stft(
+      self, n_fft, hop_lengthOpt, win_lengthOpt, window, normalized, onesidedOpt,
+      /*return_complex=*/c10::nullopt);
+}
+
+Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
+             const optional<int64_t> win_lengthOpt, const Tensor& window,
+             const bool center, const bool normalized, const optional<bool> onesidedOpt,
+             const optional<int64_t> lengthOpt) {
+  return at::native::istft(
+      self, n_fft, hop_lengthOpt, win_lengthOpt, window, center, normalized,
+      onesidedOpt, lengthOpt, /*return_complex=*/false);
+}
+
 }} // at::native
@@ -93,7 +93,8 @@ void col2im_out_cuda_template(
   output.resize_({batch_size, n_output_plane, output_height, output_width});
   output.zero_();
 
-  AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "col2im_out_cuda", [&] {
+  AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
+      input.scalar_type(), "col2im_out_cuda", [&] {
     using accscalar_t = at::acc_type<scalar_t, true>;
 
     Tensor input_n;