Skip to content

Commit 010ec29

Browse files
committed
Update on "[RPC profiling] Add tests to ensure RPC profiling works on single threaded
server" server** * #44664 [RPC profiling] Extend RPC profiling to support async function execution over RPC. * #44655 [RPC profiling] Don't wrap toHere() calls with profiling * #44653 [RPC profiling] Allow disableProfiler() to be called from another thread. * #44646 Remove thread_local RecordFunctionGuard from profiler. server This ensures that RPC profiling works in single-threaded server scenarios and that we won't make the assumption that we'll have multiple threads when working on this code. For example, this assumption resulted in a bug in the previous diff (which was fixed). Differential Revision: [D23691304](https://our.internmc.facebook.com/intern/diff/D23691304/) [ghstack-poisoned]
2 parents 968126a + fead225 commit 010ec29

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1323
-533
lines changed

aten/src/ATen/native/Col2Im.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ static void col2im_out_cpu_template(
137137
output.resize_({batch_size, n_output_plane, output_height, output_width});
138138
output.zero_();
139139

140-
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
140+
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
141141
input.scalar_type(), "col2im_out_cpu", [&] {
142142
Tensor input_n = Tensor();
143143
Tensor output_n = Tensor();

aten/src/ATen/native/Im2Col.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ static void im2col_out_cpu_template(
8787
output.resize_({batch_size, n_output_plane, output_length});
8888
output.zero_();
8989

90-
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
90+
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
9191
input.scalar_type(), "im2col_out_cpu", [&] {
9292
Tensor input_n;
9393
Tensor output_n;

aten/src/ATen/native/LinearAlgebra.cpp

Lines changed: 34 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,23 +1283,11 @@ Tensor frobenius_norm(const Tensor& self) {
12831283
}
12841284

12851285
Tensor frobenius_norm(const Tensor& self, IntArrayRef dim, bool keepdim) {
1286-
TORCH_CHECK(!self.is_complex(), "frobenius norm not supported for complex tensors");
1287-
TORCH_CHECK(
1288-
dim.size() <= 2,
1289-
"Expected at most 2 dimensions, but got ",
1290-
dim.size(),
1291-
" dimensions instead.");
1292-
if (dim.size() == 1 || dim.size() == 0) {
1293-
return at::norm(self, 2, dim, keepdim);
1294-
}
1295-
auto dim_ = dim.vec();
1296-
maybe_wrap_dims(dim_, self.dim());
1297-
TORCH_CHECK(dim_[0] != dim_[1], "Expected dims to be different, got ", dim, " instead");
1298-
if (self.is_complex()){
1299-
return at::sqrt(at::sum(at::real(self.conj() * self), dim_, keepdim));
1300-
} else {
1301-
return at::sqrt(at::sum((self * self), dim_, keepdim));
1302-
}
1286+
// NOTE: As frobenius_norm_out is currently implemented, it will always produce a
1287+
// strided tensor result, even if the input is sparse.
1288+
auto options = self.options().layout(c10::Layout::Strided);
1289+
Tensor result = at::empty({0}, options);
1290+
return at::native::frobenius_norm_out(result, self, dim, keepdim);
13031291
}
13041292

13051293
Tensor &frobenius_norm_out(
@@ -1313,65 +1301,46 @@ Tensor &frobenius_norm_out(
13131301
"Expected at most 2 dimensions, but got ",
13141302
dim.size(),
13151303
" dimensions instead.");
1304+
Tensor result_;
13161305
if (dim.size() == 1 || dim.size() == 0) {
1317-
return at::norm_out(result, self, 2, dim, keepdim, self.scalar_type());
1318-
}
1319-
auto dim_ = dim.vec();
1320-
maybe_wrap_dims(dim_, self.dim());
1321-
TORCH_CHECK(dim_[0] != dim_[1], "Expected dims to be different, got ", dim, " instead");
1322-
if (self.is_complex()){
1323-
return at::sqrt_out(result, at::sum(at::real(self.conj() * self), dim_, keepdim));
1306+
result_ = at::norm(self, 2, dim, keepdim);
13241307
} else {
1325-
return at::sqrt_out(result, at::sum((self * self), dim_, keepdim));
1308+
auto dim_ = dim.vec();
1309+
maybe_wrap_dims(dim_, self.dim());
1310+
TORCH_CHECK(dim_[0] != dim_[1], "Expected dims to be different, got ", dim, " instead");
1311+
if (self.is_complex()){
1312+
result_ = at::sqrt(at::sum(at::real(self.conj() * self), dim_, keepdim));
1313+
} else {
1314+
result_ = at::sqrt(at::sum((self * self), dim_, keepdim));
1315+
}
13261316
}
1317+
// NOTE: It would be better to avoid resize and copy by using norm_out and sqrt_out above.
1318+
// However, norm_out and sqrt_out do not support automatic differentiation.
1319+
// More details here: https://github.com/pytorch/pytorch/pull/44095#discussion_r486673947
1320+
resize_output(result, result_.sizes());
1321+
result.copy_(result_);
1322+
return result;
13271323
}
13281324

13291325
Tensor nuclear_norm(const Tensor& self, bool keepdim) {
13301326
TORCH_CHECK(
13311327
self.dim() == 2,
13321328
"Expected a tensor with 2 dimensions, but got a tensor with ",
13331329
self.dim(), " dimension", self.dim()==1 ? "" : "s", " instead.");
1334-
// Since we error out on svd_backward when we don't compute U and V, the backward pass for nuclear_norm
1335-
// would end up throwing an error as a result if U and V aren't computed.
1336-
// Due to this, we have to compute U and V conditionally.
1337-
Tensor result = at::sum(std::get<1>(at::svd(self, /*some=*/true,
1338-
/*compute_uv=*/at::GradMode::is_enabled() && self.requires_grad())), 0, keepdim);
1339-
if (keepdim) {
1340-
result.unsqueeze_(0);
1341-
}
1342-
return result;
1330+
return at::native::nuclear_norm(self, IntArrayRef({0, 1}), keepdim);
13431331
}
13441332

13451333
Tensor &nuclear_norm_out(Tensor& result, const Tensor& self, bool keepdim) {
13461334
TORCH_CHECK(
13471335
self.dim() == 2,
13481336
"Expected a tensor with 2 dimensions, but got a tensor with ",
13491337
self.dim(), " dimension", self.dim()==1 ? "" : "s", " instead.");
1350-
at::sum_out(result, std::get<1>(at::svd(self, /*some=*/true, /*compute_uv=*/false)), 0, keepdim);
1351-
if (keepdim) {
1352-
result.unsqueeze_(0);
1353-
}
1354-
return result;
1338+
return at::native::nuclear_norm_out(result, self, IntArrayRef({0, 1}), keepdim);
13551339
}
13561340

13571341
Tensor nuclear_norm(const Tensor& self, IntArrayRef dim, bool keepdim) {
1358-
TORCH_CHECK(dim.size() == 2, "nuclear norm requires a 'dim' argument of size 2");
1359-
auto dim_ = dim.vec();
1360-
maybe_wrap_dims(dim_, self.dim());
1361-
1362-
auto permutation = create_dim_backshift_permutation(dim_[0], dim_[1], self.dim());
1363-
auto permutation_reverse = create_reverse_permutation(permutation);
1364-
Tensor p = self.permute(permutation);
1365-
// Since we error out on svd_backward when we don't compute U and V, the backward pass for nuclear_norm
1366-
// would end up throwing an error as a result if U and V aren't computed.
1367-
// Due to this, we have to compute U and V conditionally.
1368-
Tensor result = at::sum(std::get<1>(at::svd(p, /*some=*/true,
1369-
/*compute_uv=*/at::GradMode::is_enabled() && self.requires_grad())), -1, keepdim);
1370-
if (keepdim) {
1371-
result.unsqueeze_(-1);
1372-
result = result.permute(permutation_reverse);
1373-
}
1374-
return result;
1342+
Tensor result = at::empty({0}, self.options());
1343+
return at::native::nuclear_norm_out(result, self, dim, keepdim);
13751344
}
13761345

13771346
Tensor& nuclear_norm_out(Tensor& result, const Tensor& self, IntArrayRef dim, bool keepdim) {
@@ -1380,15 +1349,18 @@ Tensor& nuclear_norm_out(Tensor& result, const Tensor& self, IntArrayRef dim, bo
13801349
maybe_wrap_dims(dim_, self.dim());
13811350

13821351
auto permutation = create_dim_backshift_permutation(dim_[0], dim_[1], self.dim());
1383-
auto permutation_reverse = create_reverse_permutation(permutation);
1384-
13851352
Tensor p = self.permute(permutation);
1386-
at::sum_out(result, std::get<1>(at::svd(p, /*some=*/true, /*compute_uv=*/false)), -1, keepdim);
1353+
// NOTE: U and V are computed only if gradmode is enabled, since the backward for nuclear
1354+
// norm uses svd_backward, which requires them.
1355+
Tensor result_ = at::sum(std::get<1>(at::svd(p, /*some=*/true,
1356+
/*compute_uv=*/at::GradMode::is_enabled() && self.requires_grad())), -1, keepdim);
13871357
if (keepdim) {
1388-
result.unsqueeze_(-1);
1389-
Tensor result_ = result.permute(permutation_reverse);
1390-
result.set_(result_);
1358+
result_.unsqueeze_(-1);
1359+
auto permutation_reverse = create_reverse_permutation(permutation);
1360+
result_ = result_.permute(permutation_reverse);
13911361
}
1362+
resize_output(result, result_.sizes());
1363+
result.copy_(result_);
13921364
return result;
13931365
}
13941366

aten/src/ATen/native/RangeFactories.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ Tensor& logspace_cpu_out(Tensor& result, Scalar start, Scalar end, c10::optional
8383
});
8484
});
8585
} else {
86-
AT_DISPATCH_ALL_TYPES(r.scalar_type(), "logspace_cpu", [&]() {
86+
AT_DISPATCH_ALL_TYPES_AND(kBFloat16, r.scalar_type(), "logspace_cpu", [&]() {
8787
double scalar_base = static_cast<double>(base); // will be autopromoted anyway
8888
scalar_t scalar_start = start.to<scalar_t>();
8989
scalar_t scalar_end = end.to<scalar_t>();

aten/src/ATen/native/SpectralOps.cpp

Lines changed: 101 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,28 @@ Tensor irfft(const Tensor& self, const int64_t signal_ndim, const bool normalize
183183
normalized, onesided);
184184
}
185185

186+
template <typename Stream, typename T>
187+
static Stream& write_opt(Stream& SS, const optional<T>& value) {
188+
if (value) {
189+
SS << *value;
190+
} else {
191+
SS << "None";
192+
}
193+
return SS;
194+
}
186195

196+
/* Short-time Fourier Transform, for signal analysis.
197+
*
198+
* This is modeled after librosa but with support for complex time-domain
199+
* signals and complex windows.
200+
*
201+
* NOTE: librosa's center and pad_mode arguments are currently only implemented
202+
* in python because it uses torch.nn.functional.pad which is python-only.
203+
*/
187204
Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
188205
const optional<int64_t> win_lengthOpt, const Tensor& window,
189-
const bool normalized, const bool onesided) {
206+
const bool normalized, const optional<bool> onesidedOpt,
207+
const optional<bool> return_complexOpt) {
190208
#define REPR(SS) \
191209
SS << "stft(" << self.toString() << self.sizes() << ", n_fft=" << n_fft \
192210
<< ", hop_length=" << hop_length << ", win_length=" << win_length \
@@ -196,15 +214,28 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop
196214
} else { \
197215
SS << "None"; \
198216
} \
199-
SS << ", normalized=" << normalized << ", onesided=" << onesided << ")"
217+
SS << ", normalized=" << normalized << ", onesided="; \
218+
write_opt(SS, onesidedOpt) << ", return_complex="; \
219+
write_opt(SS, return_complexOpt) << ") "
200220

201221
// default_init hop_length and win_length
202222
auto hop_length = hop_lengthOpt.value_or(n_fft >> 2);
203223
auto win_length = win_lengthOpt.value_or(n_fft);
224+
const bool return_complex = return_complexOpt.value_or(
225+
self.is_complex() || (window.defined() && window.is_complex()));
226+
if (!return_complexOpt && !return_complex) {
227+
TORCH_WARN("stft will return complex tensors by default in future, use"
228+
" return_complex=False to preserve the current output format.");
229+
}
204230

205-
if (!at::isFloatingType(self.scalar_type()) || self.dim() > 2 || self.dim() < 1) {
231+
if (!at::isFloatingType(self.scalar_type()) && !at::isComplexType(self.scalar_type())) {
206232
std::ostringstream ss;
207-
REPR(ss) << ": expected a 1D or 2D tensor of floating types";
233+
REPR(ss) << ": expected a tensor of floating point or complex values";
234+
AT_ERROR(ss.str());
235+
}
236+
if (self.dim() > 2 || self.dim() < 1) {
237+
std::ostringstream ss;
238+
REPR(ss) << ": expected a 1D or 2D tensor";
208239
AT_ERROR(ss.str());
209240
}
210241
Tensor input = self;
@@ -240,11 +271,12 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop
240271
auto window_ = window;
241272
if (win_length < n_fft) {
242273
// pad center
243-
window_ = at::zeros({n_fft}, self.options());
244274
auto left = (n_fft - win_length) / 2;
245275
if (window.defined()) {
276+
window_ = at::zeros({n_fft}, window.options());
246277
window_.narrow(0, left, win_length).copy_(window);
247278
} else {
279+
window_ = at::zeros({n_fft}, self.options());
248280
window_.narrow(0, left, win_length).fill_(1);
249281
}
250282
}
@@ -257,19 +289,40 @@ Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop
257289
if (window_.defined()) {
258290
input = input.mul(window_);
259291
}
260-
// rfft and transpose to get (batch x fft_size x num_frames)
261-
auto out = input.rfft(1, normalized, onesided).transpose_(1, 2);
292+
293+
// FFT and transpose to get (batch x fft_size x num_frames)
294+
const bool complex_fft = input.is_complex();
295+
const auto onesided = onesidedOpt.value_or(!complex_fft);
296+
297+
Tensor out;
298+
if (complex_fft) {
299+
TORCH_CHECK(!onesided, "Cannot have onesided output if window or input is complex");
300+
out = at::native::fft(at::view_as_real(input), 1, normalized);
301+
} else {
302+
out = at::native::rfft(input, 1, normalized, onesided);
303+
}
304+
out.transpose_(1, 2);
305+
262306
if (self.dim() == 1) {
263-
return out.squeeze_(0);
307+
out.squeeze_(0);
308+
}
309+
310+
if (return_complex) {
311+
return at::view_as_complex(out);
264312
} else {
265313
return out;
266314
}
267315
}
268316

317+
/* Inverse Short-time Fourier Transform
318+
*
319+
* This is modeled after librosa but with support for complex time-domain
320+
* signals and complex windows.
321+
*/
269322
Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
270323
const optional<int64_t> win_lengthOpt, const Tensor& window,
271-
const bool center, const bool normalized, const bool onesided,
272-
const optional<int64_t> lengthOpt) {
324+
const bool center, const bool normalized, const c10::optional<bool> onesidedOpt,
325+
const optional<int64_t> lengthOpt, const bool return_complex) {
273326
#define REPR(SS) \
274327
SS << "istft(" << self.toString() << self.sizes() << ", n_fft=" << n_fft \
275328
<< ", hop_length=" << hop_length << ", win_length=" << win_length \
@@ -279,26 +332,23 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
279332
} else { \
280333
SS << "None"; \
281334
} \
282-
SS << ", center=" << center << ", normalized=" << normalized << ", onesided=" << onesided << ", length="; \
283-
if (lengthOpt.has_value()) { \
284-
SS << lengthOpt.value(); \
285-
} else { \
286-
SS << "None"; \
287-
} \
288-
SS << ")"
335+
SS << ", center=" << center << ", normalized=" << normalized << ", onesided="; \
336+
write_opt(SS, onesidedOpt) << ", length="; \
337+
write_opt(SS, lengthOpt) << ", return_complex=" << return_complex << ") "
289338

290339
// default_init hop_length and win_length
291340
const auto hop_length = hop_lengthOpt.value_or(n_fft >> 2);
292341
const auto win_length = win_lengthOpt.value_or(n_fft);
293342

294-
const auto input_dim = self.dim();
295-
const auto n_frames = self.size(-2);
296-
const auto fft_size = self.size(-3);
343+
Tensor input = self.is_complex() ? at::view_as_real(self) : self;
344+
const auto input_dim = input.dim();
345+
const auto n_frames = input.size(-2);
346+
const auto fft_size = input.size(-3);
297347

298348
const auto expected_output_signal_len = n_fft + hop_length * (n_frames - 1);
299349

300-
const auto options = at::device(self.device()).dtype(self.dtype());
301-
if (self.numel() == 0) {
350+
const auto options = at::device(input.device()).dtype(input.dtype());
351+
if (input.numel() == 0) {
302352
std::ostringstream ss;
303353
REPR(ss) << ": input tensor cannot be empty.";
304354
AT_ERROR(ss.str());
@@ -308,12 +358,13 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
308358
REPR(ss) << ": expected a tensor with 3 or 4 dimensions, but got " << input_dim;
309359
AT_ERROR(ss.str());
310360
}
311-
if (self.size(-1) != 2) {
361+
if (input.size(-1) != 2) {
312362
std::ostringstream ss;
313363
REPR(ss) << ": expected the last dimension to be 2 (corresponding to real and imaginary parts), but got " << self.size(-1);
314364
AT_ERROR(ss.str());
315365
}
316366

367+
const bool onesided = onesidedOpt.value_or(fft_size != n_fft);
317368
if (onesided) {
318369
if (n_fft / 2 + 1 != fft_size) {
319370
std::ostringstream ss;
@@ -355,13 +406,21 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
355406
TORCH_INTERNAL_ASSERT(window_tmp.size(0) == n_fft);
356407
}
357408

358-
Tensor input = self;
359409
if (input_dim == 3) {
360410
input = input.unsqueeze(0);
361411
}
362412

363413
input = input.transpose(1, 2); // size: (channel, n_frames, fft_size, 2)
364-
input = at::native::irfft(input, 1, normalized, onesided, {n_fft, }); // size: (channel, n_frames, n_fft)
414+
415+
if (return_complex) {
416+
TORCH_CHECK(!onesided, "Cannot have onesided output if window or input is complex");
417+
input = at::native::ifft(input, 1, normalized); // size: (channel, n_frames, n_fft)
418+
input = at::view_as_complex(input);
419+
} else {
420+
TORCH_CHECK(!window.defined() || !window.is_complex(),
421+
"Complex windows are incompatible with return_complex=False");
422+
input = at::native::irfft(input, 1, normalized, onesided, {n_fft,}); // size: (channel, n_frames, n_fft)
423+
}
365424
TORCH_INTERNAL_ASSERT(input.size(2) == n_fft);
366425

367426
Tensor y_tmp = input * window_tmp.view({1, 1, n_fft}); // size: (channel, n_frames, n_fft)
@@ -408,4 +467,21 @@ Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> ho
408467
#undef REPR
409468
}
410469

470+
Tensor stft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
471+
const optional<int64_t> win_lengthOpt, const Tensor& window,
472+
const bool normalized, const optional<bool> onesidedOpt) {
473+
return at::native::stft(
474+
self, n_fft, hop_lengthOpt, win_lengthOpt, window, normalized, onesidedOpt,
475+
/*return_complex=*/c10::nullopt);
476+
}
477+
478+
Tensor istft(const Tensor& self, const int64_t n_fft, const optional<int64_t> hop_lengthOpt,
479+
const optional<int64_t> win_lengthOpt, const Tensor& window,
480+
const bool center, const bool normalized, const optional<bool> onesidedOpt,
481+
const optional<int64_t> lengthOpt) {
482+
return at::native::istft(
483+
self, n_fft, hop_lengthOpt, win_lengthOpt, window, center, normalized,
484+
onesidedOpt, lengthOpt, /*return_complex=*/false);
485+
}
486+
411487
}} // at::native

aten/src/ATen/native/cuda/Col2Im.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ void col2im_out_cuda_template(
9393
output.resize_({batch_size, n_output_plane, output_height, output_width});
9494
output.zero_();
9595

96-
AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "col2im_out_cuda", [&] {
96+
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND1(kHalf,
97+
input.scalar_type(), "col2im_out_cuda", [&] {
9798
using accscalar_t = at::acc_type<scalar_t, true>;
9899

99100
Tensor input_n;

0 commit comments

Comments
 (0)