[quant][embedding qat] Support Embedding QAT via FX API (#68296)

b-koopman · facebook-github-bot · commit abda069ce203 · 2021-12-02T08:42:45.000-08:00
Summary: Pull Request resolved: #68296 Support QAT workflow by using torch.fx QAT API. e.g. `prepare_qat_fx` and `convert_fx`. Test Plan: `pytest test/quantization/fx/test_quantize_fx.py -v -k "test_qat_embedding_linear"` Imported from OSS Reviewed By: jingsh, supriyar Differential Revision: D32404517 fbshipit-source-id: 0484df8c826b823b60dfecd9def77bf8cffe0527
diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py
@@ -62,6 +62,7 @@
     MovingAverageMinMaxObserver,
     HistogramObserver,
     QConfig,
+    default_embedding_qat_qconfig,
 )
 
 from torch.ao.quantization.fx.pattern_utils import (
@@ -5745,6 +5746,51 @@ def test_resnet18_ddp(self):
         self._test_model_impl(
             'ddp', 'resnet18', model, eager_quantizable_model)
 
+    def test_qat_embedding_linear(self):
+        for device in get_supported_device_types():
+            class EmbeddingLinear(torch.nn.Module):
+                def __init__(self):
+                    super(EmbeddingLinear, self).__init__()
+                    self.emb = torch.nn.Embedding(num_embeddings=10, embedding_dim=12)
+                    self.linear = torch.nn.Linear(12, 1).to(dtype=torch.float)
+
+                def forward(self, input: torch.Tensor):
+                    x = torch.sum(self.emb(input), dim=1)
+                    x = self.linear(x)
+                    return x
+
+            qconfig_dict = {"": get_default_qat_qconfig("qnnpack"),
+                            "object_type": [(torch.nn.Embedding, default_embedding_qat_qconfig)]}
+
+
+            train_indices = [[torch.randint(0, 10, (12, 12)), torch.randn((12, 1))] for _ in range(2)]
+            eval_output = [[torch.randint(0, 10, (12, 1))]]
+
+            model = EmbeddingLinear().train()
+            prepared_fx_model = prepare_qat_fx(model, qconfig_dict)
+            test_only_train_fn(prepared_fx_model, train_indices)
+            convert_custom_config_dict = {
+                "additional_object_mapping": {
+                    "static": {
+                        torch.nn.qat.Embedding: nn.quantized.Embedding,
+                    }
+                }
+            }
+            quant_model = convert_fx(prepared_fx_model,
+                                     convert_custom_config_dict=convert_custom_config_dict,
+                                     qconfig_dict=qconfig_dict)
+
+            def checkQuantized(model):
+                # Make sure EmbeddingBag is now a quantized EmbeddingBag.
+                self.assertTrue(type(model.emb), nn.quantized.Embedding)
+                # Also test that Linear has been quantized.
+                self.assertTrue(type(model.linear), nnq.Linear)
+
+                test_only_eval_fn(model, eval_output)
+                self.checkScriptable(model, eval_output)
+                self.checkNoQconfig(model)
+            checkQuantized(quant_model)
+
     @given(
         device=st.sampled_from(
             ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]
diff --git a/torch/ao/quantization/fx/quantization_patterns.py b/torch/ao/quantization/fx/quantization_patterns.py
@@ -1161,6 +1161,7 @@ def convert(self,
                 load_arg(quantized=[0])(self.bn_node.args),
                 load_arg(quantized=torch.float)(self.bn_node.kwargs))
 
+@register_quant_pattern(torch.nn.qat.Embedding)
 @register_quant_pattern(torch.nn.Embedding)
 @register_quant_pattern(torch.nn.EmbeddingBag)
 class EmbeddingQuantizeHandler(QuantizeHandler):
diff --git a/torch/ao/quantization/qconfig.py b/torch/ao/quantization/qconfig.py
@@ -192,10 +192,10 @@ def get_default_qconfig(backend='fbgemm'):
         qconfig = default_qconfig
     return qconfig
 
-default_embedding_qat_qconfig = QConfig(activation=NoopObserver,
+default_embedding_qat_qconfig = QConfig(activation=NoopObserver.with_args(dtype=torch.float32),
                                         weight=default_embedding_fake_quant)
 
-default_embedding_qat_qconfig_4bit = QConfig(activation=NoopObserver,
+default_embedding_qat_qconfig_4bit = QConfig(activation=NoopObserver.with_args(dtype=torch.float32),
                                              weight=default_embedding_fake_quant_4bit)
 
 def get_default_qat_qconfig(backend='fbgemm', version=1):