[quant][fx] Support override observers and fake quantize module in backend_config_dict

jerryzh168 · jerryzh168 · commit a1382b8d1fe6 · 2022-04-01T13:55:45.000-07:00
Summary: Some operators have fixed quantization parameters, this PR adds the support to override the qconfig in the backend_config_dict Test Plan: python test/test_quantization.py TestQuantizeFx python test/test_quantization.py TestQuantizeFxOps Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 1dbd088 Pull Request resolved: #75135
diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py
@@ -4034,7 +4034,7 @@ def _assertFixedQParamsFakeQuantizeEqual(self, fq1, fq2):
         self.assertEqual(fq1()._observer_ctr, fq2()._observer_ctr)
 
     def test_fixed_qparams_patterns(self):
-        hard_sigmoid_keys = [torch.nn.Hardsigmoid, torch.nn.functional.hardsigmoid, "hardsigmoid", "hardsigmoid_"]
+        hard_sigmoid_keys = [torch.nn.functional.hardsigmoid, "hardsigmoid", "hardsigmoid_"]
         sigmoid_keys = [torch.nn.Sigmoid, torch.sigmoid, "sigmoid", "sigmoid_"]
         tanh_keys = [torch.nn.Tanh, torch.tanh, "tanh", "tanh_"]
         for k in hard_sigmoid_keys + sigmoid_keys:
diff --git a/torch/ao/quantization/fx/backend_config/native.py b/torch/ao/quantization/fx/backend_config/native.py
@@ -2,6 +2,8 @@
 import torch.nn.qat as nnqat
 import operator
 from .observation_type import ObservationType
+from ...observer import default_affine_fixed_qparams_observer
+from ...fake_quantize import FixedQParamsFakeQuantize
 
 def _get_default_op_backend_config(op, dtype_configs):
     return {
@@ -91,6 +93,15 @@ def _get_default_op_backend_config(op, dtype_configs):
     ],
 }
 
+_HARDSIGMOID_MODULE_CONFIG = {
+    "pattern": torch.nn.Hardsigmoid,
+    "observation_type": ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
+    "overwrite_output_fake_quantizer": FixedQParamsFakeQuantize.with_args(observer=default_affine_fixed_qparams_observer),
+    "overwrite_output_observer": default_affine_fixed_qparams_observer,
+    "dtype_configs": [
+        weighted_op_int8_dtype_config,
+    ],
+}
 
 def get_native_backend_config_dict():
     """ Get backend for PyTorch Native backend_config_dict (fbgemm/qnnpack)
@@ -102,5 +113,6 @@ def get_native_backend_config_dict():
             _LINEAR_MODULE_CONFIG,
             *_DEFAULT_OP_INT8_CONFIGS,
             _ADD_CONFIG,
+            _HARDSIGMOID_MODULE_CONFIG,
         ],
     }
diff --git a/torch/ao/quantization/fx/backend_config/quantize_handler.py b/torch/ao/quantization/fx/backend_config/quantize_handler.py
@@ -1,11 +1,18 @@
 import torch
-from typing import Dict, Callable
+from typing import Dict, Callable, Any, Optional
 from .observation_type import ObservationType
 from ..quantization_patterns import QuantizeHandler
-from ..quantization_types import NodePattern
+from ..quantization_types import Pattern, NodePattern
+from ...utils import (
+    activation_dtype,
+)
 
 def get_quantize_handler_cls(
-        observation_type, dtype_configs, num_tensor_args_to_observation_type):
+        observation_type,
+        dtype_configs,
+        num_tensor_args_to_observation_type,
+        overwrite_output_fake_quantizer,
+        overwrite_output_observer):
 
     class ConfigurableQuantizeHandler(QuantizeHandler):
         def __init__(
@@ -22,8 +29,33 @@ def __init__(
             else:
                 self.observation_type = observation_type
             self.dtype_configs = dtype_configs
+            self.overwrite_output_fake_quantizer = overwrite_output_fake_quantizer
+            self.overwrite_output_observer = overwrite_output_observer
 
         def is_general_tensor_value_op(self) -> bool:
             return self.observation_type == ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT
 
+        # TODO: change this to output activation
+        def get_activation_ctr(
+                self,
+                qconfig: Any,
+                pattern: Pattern,
+                is_training: bool,
+        ) -> Optional[Callable]:
+            """
+            Returns the constructor for the activation observer which should be
+            used for the pattern matched to this handler. Some handlers override
+            this to a different value than what is specified in the qconfig.
+            """
+            act_dtype = activation_dtype(qconfig)
+            # TODO: change to is_qat
+            if is_training:
+                if act_dtype == torch.quint8 and self.overwrite_output_fake_quantizer is not None:
+                    return self.overwrite_output_fake_quantizer
+            else:
+                if act_dtype == torch.quint8 and self.overwrite_output_observer is not None:
+                    return self.overwrite_output_observer
+            return qconfig.activation
+
+
     return ConfigurableQuantizeHandler
diff --git a/torch/ao/quantization/fx/backend_config/utils.py b/torch/ao/quantization/fx/backend_config/utils.py
@@ -19,8 +19,15 @@ def get_pattern_to_quantize_handlers(
         observation_type = config.get("observation_type", None)
         dtype_configs = config["dtype_configs"]
         num_tensor_args_to_observation_type = config.get("num_tensor_args_to_observation_type", {})
+        overwrite_fake_quantizer = config.get("overwrite_output_fake_quantizer", None)
+        overwrite_observer = config.get("overwrite_output_observer", None)
         pattern_to_quantize_handlers[pattern] = \
-            get_quantize_handler_cls(observation_type, dtype_configs, num_tensor_args_to_observation_type)
+            get_quantize_handler_cls(
+                observation_type,
+                dtype_configs,
+                num_tensor_args_to_observation_type,
+                overwrite_fake_quantizer,
+                overwrite_observer)
 
     return pattern_to_quantize_handlers
 
diff --git a/torch/ao/quantization/fx/quantization_patterns.py b/torch/ao/quantization/fx/quantization_patterns.py
@@ -239,7 +239,6 @@ class DefaultNodeQuantizeHandler(QuantizeHandler):
     """
     pass
 
-@register_quant_pattern(torch.nn.Hardsigmoid, default_affine_fixed_qparams_observer)
 @register_quant_pattern(torch.nn.functional.hardsigmoid, default_affine_fixed_qparams_observer)
 @register_quant_pattern('hardsigmoid', default_affine_fixed_qparams_observer)
 @register_quant_pattern('hardsigmoid_', default_affine_fixed_qparams_observer)