-
Notifications
You must be signed in to change notification settings - Fork 26.3k
Change quantizer to account for input tensor's memory format. #42178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
811b0ee
42d035e
afe7be8
4ff59ef
b979075
ac9786a
072ec1a
ec085e5
65bab4e
fb5186f
9c6e33b
d3d24ed
45d2aa0
434019f
619da69
ba30e54
99b25ff
1e394c7
f9d796e
56aaf2e
7087dde
0a52957
2d38a48
049d119
802c7b3
5a6167b
4e0d517
05dcb8c
e831f8e
c349702
ecb3848
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -132,7 +132,16 @@ def test_qtensor_quant_dequant(self): | |
| scale = 0.02 | ||
| zero_point = 2 | ||
| for device in get_supported_device_types(): | ||
| r = torch.rand(3, 2, dtype=torch.float, device=device) * 4 - 2 | ||
| r = torch.rand(3, 2, 4, 5, dtype=torch.float, device=device) * 4 - 2 | ||
| for memory_format in [torch.contiguous_format, torch.channels_last]: | ||
| r = r.contiguous(memory_format=memory_format) | ||
| for dtype in [torch.qint8, torch.quint8, torch.qint32]: | ||
| qr = torch.quantize_per_tensor(r, scale, zero_point, dtype) | ||
| rqr = qr.dequantize() | ||
| self.assertTrue(np.allclose(r.cpu().numpy(), rqr.cpu().numpy(), atol=2 / scale)) | ||
| # Also check 5D tensors work. | ||
| for device in get_supported_device_types(): | ||
| r = torch.rand(3, 2, 4, 5, 6, dtype=torch.float, device=device) * 4 - 2 | ||
| for dtype in [torch.qint8, torch.quint8, torch.qint32]: | ||
| qr = torch.quantize_per_tensor(r, scale, zero_point, dtype) | ||
| rqr = qr.dequantize() | ||
|
|
@@ -217,6 +226,35 @@ def test_qtensor_dtypes(self): | |
| rqr = qr.dequantize() | ||
| self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale)) | ||
|
|
||
| def _test_quantize_per_channel(self, r, scales, zero_points, axis, float_params): | ||
|
|
||
| def _quantize_per_channel_ref_nd(data, scales, zero_points, float_params): | ||
| dims = data.size() | ||
| data = data.view(-1, dims[axis], np.prod(dims[axis + 1:])) | ||
| res = torch.empty_like(data) | ||
| quant_min, quant_max = 0, 255 | ||
| for i in range(res.size()[0]): | ||
| for j in range(res.size()[1]): | ||
| for k in range(res.size()[2]): | ||
| if float_params: | ||
| inv_scale = 1.0 / scales[j] | ||
| res[i][j][k] = np.clip( | ||
| np.round(data[i][j][k] * inv_scale + zero_points[j]), quant_min, quant_max) | ||
| else: | ||
| res[i][j][k] = np.clip( | ||
| np.round(data[i][j][k] / scales[j]) + zero_points[j], quant_min, quant_max) | ||
| res = res.view(*dims) | ||
| return res | ||
|
|
||
| contig_format = torch.channels_last if r.ndim == 4 else torch.channels_last_3d | ||
| for memory_format in [torch.contiguous_format, contig_format]: | ||
| ref_res = _quantize_per_channel_ref_nd(r, scales, zero_points, float_params) | ||
| r_contig = r.contiguous(memory_format=memory_format) | ||
| qr = torch.quantize_per_channel(r_contig, scales, zero_points, axis, torch.quint8) | ||
| rqr = qr.dequantize() | ||
| self.assertTrue(np.allclose(qr.int_repr(), ref_res)) | ||
| self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / np.min(scales.numpy()))) | ||
|
|
||
| def test_qtensor_quantize_per_channel(self): | ||
| r = torch.rand(3, 2, dtype=torch.float) * 4 - 2 | ||
| scales = torch.tensor([0.2, 0.03], dtype=torch.double) | ||
|
|
@@ -235,6 +273,26 @@ def quantize_c(data, scales, zero_points): | |
| self.assertTrue(np.allclose(qr.int_repr(), quantize_c(r, scales, zero_points))) | ||
| self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / np.min(scales.numpy()))) | ||
|
|
||
| # Check 4D tensor with 2 different memory formats. | ||
| r = torch.rand(3, 2, 4, 5, dtype=torch.float) * 4 - 2 | ||
| scales = torch.tensor([0.2, 0.03], dtype=torch.double) | ||
| zero_points = torch.tensor([5, 10], dtype=torch.long) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 1 , False) | ||
|
|
||
| scales = torch.tensor([0.2, 0.03, 0.5], dtype=torch.double) | ||
| zero_points = torch.tensor([5, 10, 7], dtype=torch.long) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 0, False) | ||
|
|
||
| # Check 5D tensor. | ||
| r = torch.rand(3, 2, 4, 5, 7, dtype=torch.float) * 4 - 2 | ||
| scales = torch.tensor([0.2, 0.03], dtype=torch.double) | ||
| zero_points = torch.tensor([5, 10], dtype=torch.long) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 1, False) | ||
|
|
||
| scales = torch.tensor([0.2, 0.03, 0.5], dtype=torch.double) | ||
| zero_points = torch.tensor([5, 10, 7], dtype=torch.long) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 0, False) | ||
|
Comment on lines
+276
to
+294
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: these can be in a loop as well, with |
||
|
|
||
| def test_quantize_per_channel_float_qparams(self): | ||
| r = torch.rand(3, 2, dtype=torch.float) * 4 | ||
| scales = torch.tensor([0.2, 0.03], dtype=torch.float) | ||
|
|
@@ -257,6 +315,25 @@ def quantize_ref(data, scales, zero_points): | |
| self.assertTrue(np.allclose(qr.int_repr(), ref)) | ||
| self.assertTrue(np.allclose(r.numpy(), dequant_tensor.numpy(), atol=1)) | ||
|
|
||
| # Check 4D tensor with 2 different memory formats. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here, I think maybe you can also merge this test with previous test
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Introduces unrelated changes. We should merge with previous one in a separate PR if we want to do that.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, sounds good |
||
| r = torch.rand(3, 2, 4, 5, dtype=torch.float) * 4 | ||
| scales = torch.tensor([0.2, 0.03], dtype=torch.float) | ||
| zero_points = torch.tensor([0.1, 0.2], dtype=torch.float) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 1, True) | ||
|
|
||
| scales = torch.tensor([0.2, 0.03, 0.5], dtype=torch.float) | ||
| zero_points = torch.tensor([0.1, 0.2, 1.], dtype=torch.float) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 0, True) | ||
|
|
||
| # Check 5D tensor. | ||
| r = torch.rand(3, 2, 4, 5, 7, dtype=torch.float) * 4 - 2 | ||
| scales = torch.tensor([0.2, 0.03], dtype=torch.float) | ||
| zero_points = torch.tensor([0.1, 0.2], dtype=torch.float) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 1, True) | ||
|
|
||
| scales = torch.tensor([0.2, 0.03, 0.5], dtype=torch.float) | ||
| zero_points = torch.tensor([0.1, 0.2, 1.], dtype=torch.float) | ||
| self._test_quantize_per_channel(r, scales, zero_points, 0, True) | ||
|
|
||
| def test_qtensor_permute(self): | ||
| scale = 0.02 | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.