[quant][pyper] make embedding_bag quantization static (#44008)

supriyar · facebook-github-bot · commit 164b96c34ccb · 2020-09-05T12:06:16.000-07:00
Summary: Pull Request resolved: #44008 embedding_bag requires only quantization of weights (no dynamic quantization of inputs) So the type of quantization is essentially static (without calibration) This will enable pyper to do fc and embedding_bag quantization using the same API call Test Plan: python test/test_quantization.py test_embedding_bag Imported from OSS Reviewed By: vkuzo Differential Revision: D23467019 fbshipit-source-id: 41a61a17ee34bcb737ba5b4e19fb7a576d4aeaf9
diff --git a/test/quantization/test_quantize_jit.py b/test/quantization/test_quantize_jit.py
@@ -23,6 +23,7 @@
     fuse_modules,
     quantize_jit,
     quantize_dynamic_jit,
+    PlaceholderObserver,
 )
 
 # torch.quantization.quantize_jit
@@ -2947,14 +2948,14 @@ def forward(self, indices1, offsets1, indices2, offsets2):
                 m = torch.jit.trace(module, dummy_inputs)
             else:
                 m = torch.jit.script(module)
-            from torch.quantization import QConfigDynamic, PlaceholderObserver
-            int4_dynamic_qconfig = QConfigDynamic(activation=PlaceholderObserver.with_args(dtype=torch.float,
-                                                                                           custom_op_name="embedding_bag_4bit"),
-                                                  weight=PlaceholderObserver.with_args(custom_op_name="embedding_bag_4bit"))
-            int8_dynamic_qconfig = QConfigDynamic(activation=PlaceholderObserver.with_args(dtype=torch.float,
-                                                                                           custom_op_name="embedding_bag_byte"),
-                                                  weight=PlaceholderObserver.with_args(custom_op_name="embedding_bag_byte"))
-            m = quantize_dynamic_jit(m, {'embedding1' : int4_dynamic_qconfig, 'embedding2' : int8_dynamic_qconfig})
+            int4_qconfig = QConfig(activation=PlaceholderObserver.with_args(dtype=torch.float,
+                                                                            custom_op_name="embedding_bag_4bit"),
+                                   weight=PlaceholderObserver.with_args(custom_op_name="embedding_bag_4bit"))
+            int8_qconfig = QConfig(activation=PlaceholderObserver.with_args(dtype=torch.float,
+                                                                            custom_op_name="embedding_bag_byte"),
+                                   weight=PlaceholderObserver.with_args(custom_op_name="embedding_bag_byte"))
+            m = prepare_jit(m, {'embedding1' : int4_qconfig, 'embedding2' : int8_qconfig})
+            m = convert_jit(m)
             FileCheck().check("quantized::embedding_bag_4bit_rowwise_offsets") \
                        .check_next("quantized::embedding_bag_byte_rowwise_offsets") \
                        .run(m.graph)
diff --git a/torch/csrc/jit/passes/quantization/helper.cpp b/torch/csrc/jit/passes/quantization/helper.cpp
@@ -25,6 +25,7 @@ std::vector<std::string> _static_quantizable_call_funcs = {
     "layer_norm",
     "group_norm",
     "instance_norm",
+    "embedding_bag",
 };
 
 std::vector<std::string> _static_quantizable_aten_funcs = {
@@ -42,15 +43,21 @@ std::vector<std::string> _static_quantizable_aten_funcs = {
     "layer_norm",
     "group_norm",
     "instance_norm",
+    "embedding_bag",
 };
 
 std::vector<std::string> _dynamic_quantizable_call_funcs = {
     "linear",
-    "embedding_bag",
 };
 
 std::vector<std::string> _dynamic_quantizable_aten_funcs = {
     "linear",
+};
+
+std::vector<std::string> _static_weight_only_quant_aten_funcs = {
+    "embedding_bag",
+};
+std::vector<std::string> _static_weight_only_quant_call_funcs = {
     "embedding_bag",
 };
 
@@ -469,6 +476,13 @@ bool userDefinedCallFunction(Node* n) {
       !isFunctionNode(n, _static_quantizable_call_funcs, {});
 }
 
+bool isWeightOnlyStaticQuantOp(Node* n) {
+  return isFunctionNode(
+      n,
+      _static_weight_only_quant_call_funcs,
+      _static_weight_only_quant_aten_funcs);
+}
+
 bool nodeQuantizable(Node* n, QuantType quant_type) {
   bool is_dynamic = quant_type == QuantType::DYNAMIC;
   return isFunctionNode(
diff --git a/torch/csrc/jit/passes/quantization/helper.h b/torch/csrc/jit/passes/quantization/helper.h
@@ -100,6 +100,9 @@ TORCH_API bool nodeQuantizable(
     Node* n,
     QuantType quant_type = QuantType::STATIC);
 
+// Nodes which only require quantization of weight value, eg. embedding_bag
+bool isWeightOnlyStaticQuantOp(Node* n);
+
 // Check if a use of the value is quantizable, this depends on
 // both the use node and the offset
 TORCH_API bool useQuantizable(const Use& use, QuantType quant_type);
diff --git a/torch/csrc/jit/passes/quantization/insert_observers.cpp b/torch/csrc/jit/passes/quantization/insert_observers.cpp
@@ -1170,7 +1170,8 @@ bool InsertObserversHelper::valueNeedsToBeQuantized(
   // of the quantizable function.
   if (quant_type_ == QuantType::STATIC) {
     // Check whether producer is quantizable
-    if (nodeQuantizable(v->node()) || isPropagateQuantOp(v->node())) {
+    if (!isWeightOnlyStaticQuantOp(v->node()) &&
+        (nodeQuantizable(v->node()) || isPropagateQuantOp(v->node()))) {
       return true;
     }
   }
diff --git a/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp b/torch/csrc/jit/passes/quantization/insert_quant_dequant.cpp
@@ -403,8 +403,7 @@ void insertQuantizationOps(
   // Temporary solution to quantize embedding_bag operators. Will be re-written
   // once we support quantization of embedding_bag weights.
   auto embedding_bag_name = getEmbeddingBagObsName(module, observer);
-  if (quant_type == QuantType::DYNAMIC &&
-      isEmbeddingBagOp(observer, embedding_bag_name)) {
+  if (isEmbeddingBagOp(observer, embedding_bag_name)) {
     if (isWeight(module, observer_out)) {
       auto op_name = embedding_bag_name.value();
       Node* dequant = insertEmbeddingBagOps(observer, op_name);

Original file line number	Diff line number	Diff line change
`@@ -1170,7 +1170,8 @@ bool InsertObserversHelper::valueNeedsToBeQuantized(`
`1170`	`1170`	`// of the quantizable function.`
`1171`	`1171`	`if (quant_type_ == QuantType::STATIC) {`
`1172`	`1172`	`// Check whether producer is quantizable`
`1173`		`- if (nodeQuantizable(v->node()) \|\| isPropagateQuantOp(v->node())) {`
	`1173`	`+ if (!isWeightOnlyStaticQuantOp(v->node()) &&`
	`1174`	`+ (nodeQuantizable(v->node()) \|\| isPropagateQuantOp(v->node()))) {`
`1174`	`1175`	`return true;`
`1175`	`1176`	`}`
`1176`	`1177`	`}`