[CUDA] Bump tolerances for test_grad_pca_lowrank (#129902)

eqy · pytorchmergebot · commit 29ffa20bb13a · 2024-07-02T23:17:02.000Z
The revert of #127199 seems to surface an additional failure on A100---small tolerance bump to account for this. I did find what appears to be a race condition in the one of the kernels used in this workload but I'm not sure it's related here... CC @nWEIdia Pull Request resolved: #129902 Approved by: https://github.com/ezyang
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
@@ -17822,6 +17822,8 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs):
                        DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-03, rtol=1e-03),
                                                        torch.complex64: tol(atol=1e-02, rtol=1e-02)}),
                                     'TestCommon', 'test_noncontiguous_samples'),
+                       DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-05, rtol=5e-05)}),
+                                    'TestOperators', 'test_grad'),
                        # FIXME This should be the following, but the toleranceOverride does not seem to do anything!
                        # DecorateInfo(toleranceOverride({torch.complex128: tol(atol=1e-04, rtol=1e-04)}),
                        #              'TestFwdGradients', 'test_fn_fwgrad_bwgrad'),