Skip to content

Commit cce669e

Browse files
authored
Merge branch 'pytorch:master' into jiterator/lerp
2 parents 02aa0f6 + 116d0be commit cce669e

File tree

28 files changed

+572
-213
lines changed

28 files changed

+572
-213
lines changed

.circleci/cimodel/data/pytorch_build_data.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def child_constructor(self):
7474
"mlc": MLCConfigNode,
7575
"vulkan": VulkanConfigNode,
7676
"parallel_tbb": ParallelTBBConfigNode,
77-
"noarch": NoarchConfigNode,
77+
"crossref": CrossRefConfigNode,
7878
"parallel_native": ParallelNativeConfigNode,
7979
"onnx": ONNXConfigNode,
8080
"libtorch": LibTorchConfigNode,
@@ -171,9 +171,9 @@ def child_constructor(self):
171171
return ImportantConfigNode
172172

173173

174-
class NoarchConfigNode(TreeConfigNode):
174+
class CrossRefConfigNode(TreeConfigNode):
175175
def init2(self, node_name):
176-
self.props["is_noarch"] = node_name
176+
self.props["is_crossref"] = node_name
177177

178178
def child_constructor(self):
179179
return ImportantConfigNode

.circleci/cimodel/data/pytorch_build_definitions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def instantiate_configs(only_slow_gradcheck):
239239
compiler_version = fc.find_prop("compiler_version")
240240
is_xla = fc.find_prop("is_xla") or False
241241
is_asan = fc.find_prop("is_asan") or False
242-
is_noarch = fc.find_prop("is_noarch") or False
242+
is_crossref = fc.find_prop("is_crossref") or False
243243
is_onnx = fc.find_prop("is_onnx") or False
244244
is_pure_torch = fc.find_prop("is_pure_torch") or False
245245
is_vulkan = fc.find_prop("is_vulkan") or False
@@ -283,8 +283,8 @@ def instantiate_configs(only_slow_gradcheck):
283283
python_version = fc.find_prop("pyver")
284284
parms_list[0] = fc.find_prop("abbreviated_pyver")
285285

286-
if is_noarch:
287-
parms_list_ignored_for_docker_image.append("noarch")
286+
if is_crossref:
287+
parms_list_ignored_for_docker_image.append("crossref")
288288

289289
if is_onnx:
290290
parms_list.append("onnx")

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ jobs:
138138
{ include: [
139139
{ config: "default", shard: 1, num_shards: 2, runner: "linux.2xlarge" },
140140
{ config: "default", shard: 2, num_shards: 2, runner: "linux.2xlarge" },
141-
{ config: "noarch", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
141+
{ config: "crossref", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
142142
]}
143143
144144
linux-bionic-cuda11_3-py3_7-clang9-build:

.jenkins/pytorch/macos-test.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
# shellcheck source=./macos-common.sh
55
source "$(dirname "${BASH_SOURCE[0]}")/macos-common.sh"
66

7-
export PYTORCH_TEST_SKIP_NOARCH=1
8-
97
conda install -y six
108
pip install -q hypothesis "expecttest==0.1.3" "librosa>=0.6.2" "numba<=0.49.1" psutil "scipy==1.6.3"
119

.jenkins/pytorch/test.sh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,8 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
6262
export BUILD_SPLIT_CUDA=ON
6363
fi
6464

65-
if [[ "$BUILD_ENVIRONMENT" == *noarch* ]]; then
66-
export PYTORCH_TEST_SKIP_NOARCH=0
67-
else
68-
export PYTORCH_TEST_SKIP_NOARCH=1
65+
if [[ "$BUILD_ENVIRONMENT" == *crossref* ]]; then
66+
export PYTORCH_TEST_WITH_CROSSREF=1
6967
fi
7068

7169
if [[ -n "$PR_NUMBER" ]] && [[ -z "$CI_MASTER" || "$CI_MASTER" == "false" ]]; then

.jenkins/pytorch/win-test.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ export TEST_DIR_WIN
2626
export PYTORCH_FINAL_PACKAGE_DIR="${PYTORCH_FINAL_PACKAGE_DIR:-/c/users/circleci/workspace/build-results}"
2727
PYTORCH_FINAL_PACKAGE_DIR_WIN=$(cygpath -w "${PYTORCH_FINAL_PACKAGE_DIR}")
2828
export PYTORCH_FINAL_PACKAGE_DIR_WIN
29-
export PYTORCH_TEST_SKIP_NOARCH=1
3029

3130
mkdir -p "$TMP_DIR"/build/torch
3231

aten/src/ATen/native/quantized/cudnn/BinaryOps.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <ATen/native/quantized/cudnn/utils.h>
1313
#include <ATen/native/utils/ParamsHash.h>
1414
#include <ATen/TensorUtils.h>
15+
#include <c10/core/MemoryFormat.h>
1516
#include <c10/core/QScheme.h>
1617
#include <c10/cuda/CUDAFunctions.h>
1718
#include <c10/util/ArrayRef.h>
@@ -99,18 +100,19 @@ Tensor add(Tensor qa, Tensor qb, double output_scale, int64_t output_zero_point)
99100
}
100101
qa = qa.view(new_sizes);
101102
qb = qb.view(new_sizes);
103+
} else if (qa.dim() == 4) {
104+
qa = qa.contiguous(c10::MemoryFormat::ChannelsLast);
105+
qb = qb.contiguous(c10::MemoryFormat::ChannelsLast);
102106
}
103107

104-
at::Tensor add_output = at::empty(qa.sizes(), at::device(at::kCUDA).dtype(at::kFloat));
105-
at::Tensor quantized_output = at::_empty_affine_quantized(
106-
qa.sizes(),
107-
at::device(at::kCUDA).dtype(at::ScalarType::QInt8),
108-
output_scale,
109-
output_zero_point);
108+
auto memory_format = qa.dim() == 4 ? at::MemoryFormat::ChannelsLast : at::MemoryFormat::Contiguous;
109+
at::Tensor add_output = at::empty(qa.sizes(), at::device(at::kCUDA).dtype(at::kFloat), memory_format);
110+
at::Tensor quantized_output = at::_empty_affine_quantized(qa.sizes(), at::device(at::kCUDA).dtype(at::ScalarType::QInt8),
111+
output_scale, output_zero_point, memory_format);
110112
// TODO: When cudnn enables support for broadcasting, we can remove this tensor
111-
at::Tensor requantize_multiplier_tensor = at::empty(quantized_output.sizes(), at::device(at::kCUDA).dtype(at::kFloat));
113+
at::Tensor requantize_multiplier_tensor = at::empty(quantized_output.sizes(), at::device(at::kCUDA).dtype(at::kFloat), memory_format);
112114
requantize_multiplier_tensor.fill_(qa.q_scale() / output_scale);
113-
at::Tensor rhs_multiplier_tensor = at::empty(quantized_output.sizes(), at::device(at::kCUDA).dtype(at::kFloat));
115+
at::Tensor rhs_multiplier_tensor = at::empty(quantized_output.sizes(), at::device(at::kCUDA).dtype(at::kFloat), memory_format);
114116
rhs_multiplier_tensor.fill_(qb.q_scale() / qa.q_scale());
115117

116118
cudnnHandle_t handle = at::native::getCudnnHandle();

test/jit/test_tracer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
sys.path.append(pytorch_test_dir)
1818
from torch.testing._internal.common_utils import suppress_warnings, \
1919
skipIfCompiledWithoutNumpy, enable_profiling_mode_for_profiling_tests, \
20-
IS_SANDCASTLE, TemporaryFileName
20+
IS_SANDCASTLE, TemporaryFileName, skipIfCrossRef
2121
from torch.testing._internal.jit_utils import JitTestCase, enable_cpu_fuser, \
2222
_tmp_donotuse_dont_inline_everything, _trace, RUN_CUDA, \
2323
RUN_CUDA_MULTI_GPU, make_global
@@ -511,6 +511,7 @@ def to_tensor(x, y):
511511
self.assertEqual(to_tensor_trace(x, y), to_tensor(x, y))
512512

513513
@skipIfCompiledWithoutNumpy
514+
@skipIfCrossRef
514515
def test_trace_warn(self):
515516
def fn(x):
516517
int(x) # Warning 1.
@@ -1779,6 +1780,7 @@ def forward(self, x):
17791780

17801781
torch.jit.trace(Mod(), (torch.rand(3, 4),))
17811782

1783+
@skipIfCrossRef
17821784
def test_trace_records_names(self):
17831785
def foo(bar, baz):
17841786
baz = bar + 3

test/mobile/test_lite_script_type.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import torch
44
import torch.utils.bundled_inputs
55
import io
6-
from typing import List, NamedTuple
6+
from typing import Dict, List, NamedTuple
77

88
from torch.jit.mobile import _load_for_lite_interpreter
99
from torch.testing._internal.common_utils import TestCase, run_tests
@@ -33,6 +33,69 @@ def forward(self, a: torch.Tensor):
3333
mobile_module_result
3434
)
3535

36+
37+
def test_typing_dict_with_namedtuple(self):
38+
class Foo(NamedTuple):
39+
id: torch.Tensor
40+
41+
class Bar(torch.nn.Module):
42+
def __init__(self):
43+
super(Bar, self).__init__()
44+
self.foo = Foo(torch.tensor(1))
45+
46+
def forward(self, a: torch.Tensor):
47+
self.foo = Foo(a)
48+
re: Dict[str, Foo] = dict()
49+
re["test"] = Foo(a)
50+
return self.foo, re["test"]
51+
52+
# The corresponding bytecode is
53+
# (8,
54+
# ('__torch__.___torch_mangle_2.Bar.forward',
55+
# (('instructions',
56+
# (('STOREN', 1, 2),
57+
# ('DROPR', 1, 0),
58+
# ('DICT_CONSTRUCT', 0, 0),
59+
# ('STORE', 3, 0),
60+
# ('LOAD', 3, 0),
61+
# ('LOADC', 1, 0),
62+
# ('MOVE', 2, 0),
63+
# ('NAMED_TUPLE_CONSTRUCT', 1, 1),
64+
# ('OP', 0, 0),
65+
# ('MOVE', 3, 0),
66+
# ('LOADC', 1, 0),
67+
# ('DICT_INDEX', 0, 0),
68+
# ('LOADC', 0, 0),
69+
# ('TUPLE_INDEX', 0, 0),
70+
# ('RET', 0, 0))),
71+
# ('operators', (('aten::_set_item', 'str', 3),)),
72+
# ('constants', (0, 'test')),
73+
# ('types',
74+
# ('Dict[str,__torch__.Foo[NamedTuple, [[id, Tensor]]]]',
75+
# '__torch__.Foo[NamedTuple, [[id, Tensor]]]')),
76+
# ('register_size', 3)),
77+
# (('arguments',
78+
# ((('name', 'self'),
79+
# ('type', '__torch__.___torch_mangle_2.Bar'),
80+
# ('default_value', None)),
81+
# (('name', 'a'), ('type', 'Tensor'), ('default_value', None)))),
82+
# ('returns',
83+
# ((('name', ''), ('type', 'Tensor'), ('default_value', None)),)))))
84+
85+
sample_input = torch.tensor(5)
86+
script_module = torch.jit.script(Bar())
87+
88+
script_module_result = script_module(sample_input)
89+
90+
buffer_mobile = io.BytesIO(script_module._save_to_buffer_for_lite_interpreter())
91+
buffer_mobile.seek(0)
92+
mobile_module = _load_for_lite_interpreter(buffer_mobile)
93+
mobile_module_result = mobile_module(sample_input)
94+
torch.testing.assert_allclose(
95+
script_module_result,
96+
mobile_module_result
97+
)
98+
3699
def test_typing_namedtuple_custom_classtype(self):
37100
class Foo(NamedTuple):
38101
id: torch.Tensor

test/quantization/core/test_quantized_op.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,7 @@ def test_qadd_relu_same_qparams(self):
832832
"""Tests the correctness of the cudnn add and add_relu op
833833
(Similar to test_qadd_relu_different_qparams, will probably merge in the future)"""
834834
@unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.")
835-
@unittest.skip("Local only - currently the qconv2d_cudnn op is bulid "
835+
@unittest.skip("Local only - currently the test_qadd_relu_cudnn op is bulid "
836836
"with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test "
837837
"after it is built by default")
838838
def test_qadd_relu_cudnn(self):
@@ -865,6 +865,41 @@ def test_qadd_relu_cudnn(self):
865865
np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
866866
"Quantized addition with ReLU failed.")
867867

868+
"""Tests the correctness of the cudnn add and add_relu op for nhwc format"""
869+
@unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.")
870+
@unittest.skip("Local only - currently the test_qadd_relu_cudnn_nhwc op is bulid "
871+
"with USE_EXPERIMENTAL_CUDNN_V8_API, we can enable the test "
872+
"after it is built by default")
873+
def test_qadd_relu_cudnn_nhwc(self):
874+
dtype = torch.qint8
875+
add_relu = torch.ops.quantized.add_relu
876+
add = torch.ops.quantized.add
877+
878+
A = torch.rand(16, 8, 4, 12).to(device="cuda")
879+
B = torch.rand(16, 8, 4, 12).to(device="cuda")
880+
scale_A = 2.5
881+
scale_B = 6.3
882+
scale_C = 12.9
883+
zero_point = 0
884+
qA = torch.quantize_per_tensor(A, scale=scale_A, zero_point=zero_point,
885+
dtype=dtype)
886+
qB = torch.quantize_per_tensor(B, scale=scale_B, zero_point=zero_point,
887+
dtype=dtype)
888+
# Add ground truth
889+
C = (qA.dequantize() + qB.dequantize()).to(device="cpu").numpy()
890+
qC = _quantize(C, scale_C, zero_point, dtype=np_dtype[dtype])
891+
qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point).to(device="cpu")
892+
np.testing.assert_equal(qC, qC_hat.int_repr(),
893+
"Quantized addition failed.")
894+
895+
# Add + ReLU ground truth
896+
Crelu = C.copy()
897+
Crelu[C < 0] = 0
898+
qCrelu = _quantize(Crelu, scale_C, zero_point, dtype=np_dtype[dtype])
899+
qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point).to(device="cpu")
900+
np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
901+
"Quantized addition with ReLU failed.")
902+
868903
"""Tests the correctness of the add and add_relu op."""
869904
def test_qadd_relu_different_qparams(self):
870905
for dtype in [torch.quint8, torch.qint8, torch.qint32]:

0 commit comments

Comments
 (0)