joergoster
diff --git a/‎cross_check_eval.py‎
Lines changed: 15 additions & 4 deletions b/‎cross_check_eval.py‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎ftperm.py‎
Lines changed: 34 additions & 12 deletions b/‎ftperm.py‎
Lines changed: 34 additions & 12 deletions
diff --git a/‎model/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎model/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎model/lightning_module.py‎
Lines changed: 4 additions & 2 deletions b/‎model/lightning_module.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎model/model.py‎
Lines changed: 5 additions & 28 deletions b/‎model/model.py‎
Lines changed: 5 additions & 28 deletions
diff --git a/‎model/quantize.py‎
Lines changed: 140 additions & 0 deletions b/‎model/quantize.py‎
Lines changed: 140 additions & 0 deletions
@@ -12,12 +12,18 @@
     NNUE,
     NNUEReader,
     ModelConfig,
+    QuantizationConfig,
 )
 
 
-def read_model(nnue_path, feature_set: FeatureSet, config: ModelConfig):
+def read_model(
+    nnue_path,
+    feature_set: FeatureSet,
+    config: ModelConfig,
+    quantize_config: QuantizationConfig,
+):
     with open(nnue_path, "rb") as f:
-        reader = NNUEReader(f, feature_set, config)
+        reader = NNUEReader(f, feature_set, config, quantize_config)
         return reader.model
 
 
@@ -177,10 +183,15 @@ def main():
     feature_set = get_feature_set_from_name(args.features)
     if args.checkpoint:
         model = NNUE.load_from_checkpoint(
-            args.checkpoint, feature_set=feature_set, config=ModelConfig(L1=args.l1)
+            args.checkpoint,
+            feature_set=feature_set,
+            config=ModelConfig(L1=args.l1),
+            quantize_config=QuantizationConfig(),
         )
     else:
-        model = read_model(args.net, feature_set, ModelConfig(L1=args.l1))
+        model = read_model(
+            args.net, feature_set, ModelConfig(L1=args.l1), QuantizationConfig()
+        )
     model.eval()
     fen_batch_provider = make_fen_batch_provider(args.data, batch_size)
 
 
@@ -41,7 +41,14 @@
 
 import data_loader
 import model as M
-from model import FeatureSet, NNUE, NNUEModel, NNUEReader, ModelConfig
+from model import (
+    FeatureSet,
+    NNUE,
+    NNUEModel,
+    NNUEReader,
+    ModelConfig,
+    QuantizationConfig,
+)
 
 
 """
@@ -391,9 +398,14 @@ def find_perm_impl(actmat, use_cupy, L1: int):
 # -------------------------------------------------------------
 
 
-def read_model(nnue_path, feature_set: FeatureSet, config: ModelConfig):
+def read_model(
+    nnue_path,
+    feature_set: FeatureSet,
+    config: ModelConfig,
+    quantize_config: QuantizationConfig,
+):
     with open(nnue_path, "rb") as f:
-        reader = NNUEReader(f, feature_set, config)
+        reader = NNUEReader(f, feature_set, config, quantize_config)
         return reader.model
 
 
@@ -419,9 +431,13 @@ def filter_fens(fens):
     return filtered_fens
 
 
-def quantize_ft(model):
-    model.input.weight.data = model.input.weight.data.mul(model.quantized_one).round()
-    model.input.bias.data = model.input.bias.data.mul(model.quantized_one).round()
+def quantize_ft(model: NNUEModel):
+    model.input.weight.data = model.input.weight.data.mul(
+        model.quantization.quantized_one
+    ).round()
+    model.input.bias.data = model.input.bias.data.mul(
+        model.quantization.quantized_one
+    ).round()
 
 
 def forward_ft(
@@ -508,7 +524,7 @@ def ft_permute(model, ft_perm_path):
     ft_permute_impl(model, permutation)
 
 
-def gather_impl(model, dataset, count):
+def gather_impl(model: NNUEModel, dataset, count):
     ZERO_POINT = 0.0  # Vary this to check hypothetical forced larger truncation to zero
     BATCH_SIZE = 1000
 
@@ -546,11 +562,17 @@ def gather_impl(model, dataset, count):
 def command_gather(args):
     feature_set = M.get_feature_set_from_name(args.features)
     if args.checkpoint:
-        model = NNUE.load_from_checkpoint(
-            args.checkpoint, feature_set=feature_set, config=ModelConfig(L1=args.l1)
+        nnue = NNUE.load_from_checkpoint(
+            args.checkpoint,
+            feature_set=feature_set,
+            config=ModelConfig(L1=args.l1),
+            quantize_config=QuantizationConfig(),
         )
+        model = nnue.model
     else:
-        model = read_model(args.net, feature_set, ModelConfig(L1=args.l1))
+        model = read_model(
+            args.net, feature_set, ModelConfig(L1=args.l1), QuantizationConfig()
+        )
 
     model.eval()
 
@@ -595,7 +617,7 @@ def command_find_perm(args):
     with open(args.data, "rb") as file:
         actmat = np.load(file)
 
-    perm = find_perm_impl(actmat, args.use_cupy)
+    perm = find_perm_impl(actmat, args.use_cupy, args.l1)
 
     # perm = np.random.permutation([i for i in range(L1)])
     with open(args.out, "wb") as file:
@@ -618,7 +640,7 @@ def ft_optimize(
 
     print("Finding permutation...")
     perm = find_perm_impl(actmat, use_cupy, model.L1)
-    if actmat_save_path is not None:
+    if perm_save_path is not None:
         with open(perm_save_path, "wb") as file:
             np.save(file, perm)
 
 
@@ -3,6 +3,7 @@
 from .features import add_feature_args, FeatureSet, get_feature_set_from_name
 from .lightning_module import NNUE
 from .model import NNUEModel
+from .quantize import QuantizationConfig
 from .utils import coalesce_ft_weights, load_model, NNUEReader, NNUEWriter
 
 
@@ -15,6 +16,7 @@
     "get_feature_set_from_name",
     "NNUE",
     "NNUEModel",
+    "QuantizationConfig",
     "coalesce_ft_weights",
     "load_model",
     "NNUEReader",
 
@@ -6,6 +6,7 @@
 from .config import LossParams, ModelConfig
 from .features import FeatureSet
 from .model import NNUEModel
+from .quantize import QuantizationConfig
 
 
 def _get_parameters(layers: list[nn.Module]):
@@ -29,6 +30,7 @@ def __init__(
         self,
         feature_set: FeatureSet,
         config: ModelConfig,
+        quantize_config: QuantizationConfig,
         max_epoch=800,
         num_batches_per_epoch=int(100_000_000 / 16384),
         gamma=0.992,
@@ -40,7 +42,7 @@ def __init__(
     ):
         super().__init__()
         self.model: NNUEModel = NNUEModel(
-            feature_set, config, num_psqt_buckets, num_ls_buckets
+            feature_set, config, quantize_config, num_psqt_buckets, num_ls_buckets
         )
         self.loss_params = loss_params
         self.max_epoch = max_epoch
@@ -79,7 +81,7 @@ def step_(self, batch: tuple[Tensor, ...], batch_idx, loss_type):
                 psqt_indices,
                 layer_stack_indices,
             )
-            * self.model.nnue2score
+            * self.model.quantization.nnue2score
         )
 
         p = self.loss_params
 
@@ -6,6 +6,7 @@
 from .config import ModelConfig
 from .feature_transformer import DoubleFeatureTransformerSlice
 from .features import FeatureSet
+from .quantize import QuantizationConfig, QuantizationManager
 
 
 class LayerStacks(nn.Module):
@@ -128,6 +129,7 @@ def __init__(
         self,
         feature_set: FeatureSet,
         config: ModelConfig,
+        quantize_config: QuantizationConfig,
         num_psqt_buckets: int = 8,
         num_ls_buckets: int = 8,
     ):
@@ -146,33 +148,8 @@ def __init__(
         self.feature_set = feature_set
         self.layer_stacks = LayerStacks(self.num_ls_buckets, config)
 
-        self.nnue2score = 600.0
-        self.weight_scale_hidden = 64.0
-        self.weight_scale_out = 16.0
-        self.quantized_one = 127.0
-
-        max_hidden_weight = self.quantized_one / self.weight_scale_hidden
-        max_out_weight = (self.quantized_one * self.quantized_one) / (
-            self.nnue2score * self.weight_scale_out
-        )
-        self.weight_clipping = [
-            {
-                "params": [self.layer_stacks.l1.weight],
-                "min_weight": -max_hidden_weight,
-                "max_weight": max_hidden_weight,
-                "virtual_params": self.layer_stacks.l1_fact.weight,
-            },
-            {
-                "params": [self.layer_stacks.l2.weight],
-                "min_weight": -max_hidden_weight,
-                "max_weight": max_hidden_weight,
-            },
-            {
-                "params": [self.layer_stacks.output.weight],
-                "min_weight": -max_out_weight,
-                "max_weight": max_out_weight,
-            },
-        ]
+        self.quantization = QuantizationManager(quantize_config)
+        self.weight_clipping = self.quantization.generate_weight_clipping_config(self)
 
         self._init_layers()
 
@@ -195,7 +172,7 @@ def _init_psqt(self):
         input_weights = self.input.weight
         input_bias = self.input.bias
         # 1.0 / kPonanzaConstant
-        scale = 1 / self.nnue2score
+        scale = 1 / self.quantization.nnue2score
 
         with torch.no_grad():
             initial_values = self.feature_set.get_initial_psqt_features()
 
@@ -0,0 +1,140 @@
+from dataclasses import dataclass
+from typing import Callable, NotRequired, TypedDict, TYPE_CHECKING
+
+import torch
+
+if TYPE_CHECKING:
+    from .model import NNUEModel
+
+
+class WeightClippingConfig(TypedDict):
+    params: list[torch.Tensor]
+    min_weight: float
+    max_weight: float
+    virtual_params: NotRequired[torch.Tensor]
+
+
+@dataclass
+class QuantizationConfig:
+    nnue2score: float = 600.0
+    weight_scale_hidden: float = 64.0
+    weight_scale_out: float = 16.0
+    quantized_one: float = 127.0
+
+
+class QuantizationManager:
+    def __init__(self, config: QuantizationConfig):
+        self.nnue2score = config.nnue2score
+        self.weight_scale_hidden = config.weight_scale_hidden
+        self.weight_scale_out = config.weight_scale_out
+        self.quantized_one = config.quantized_one
+
+        self.max_hidden_weight = self.quantized_one / self.weight_scale_hidden
+        self.max_out_weight = (self.quantized_one * self.quantized_one) / (
+            self.nnue2score * self.weight_scale_out
+        )
+
+    def generate_weight_clipping_config(
+        self, model: "NNUEModel"
+    ) -> list[WeightClippingConfig]:
+        return [
+            {
+                "params": [model.layer_stacks.l1.weight],
+                "min_weight": -self.max_hidden_weight,
+                "max_weight": self.max_hidden_weight,
+                "virtual_params": model.layer_stacks.l1_fact.weight,
+            },
+            {
+                "params": [model.layer_stacks.l2.weight],
+                "min_weight": -self.max_hidden_weight,
+                "max_weight": self.max_hidden_weight,
+            },
+            {
+                "params": [model.layer_stacks.output.weight],
+                "min_weight": -self.max_out_weight,
+                "max_weight": self.max_out_weight,
+            },
+        ]
+
+    def quantize_feature_transformer(
+        self,
+        bias: torch.Tensor,
+        weight: torch.Tensor,
+        psqt_weight: torch.Tensor,
+        callback: Callable = lambda *args, **kwargs: None,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        bias = bias.mul(self.quantized_one).round().to(torch.int16)
+        weight = weight.mul(self.quantized_one).round().to(torch.int16)
+        psqt_weight = (
+            psqt_weight.mul(self.nnue2score * self.weight_scale_out)
+            .round()
+            .to(torch.int32)
+        )
+
+        callback(bias, weight, psqt_weight)
+
+        return bias, weight, psqt_weight
+
+    def dequantize_feature_transformer(
+        self,
+        bias: torch.Tensor,
+        weight: torch.Tensor,
+        psqt_weight: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        bias = bias.divide(self.quantized_one)
+        weight = weight.divide(self.quantized_one)
+        psqt_weight = psqt_weight.divide(self.nnue2score * self.weight_scale_out)
+
+        return bias, weight, psqt_weight
+
+    def quantize_fc_layer(
+        self,
+        bias: torch.Tensor,
+        weight: torch.Tensor,
+        output_layer: bool = False,
+        callback: Callable = lambda *args, **kwargs: None,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        kWeightScaleHidden = self.weight_scale_hidden
+        kWeightScaleOut = self.nnue2score * self.weight_scale_out / self.quantized_one
+        kWeightScale = kWeightScaleOut if output_layer else kWeightScaleHidden
+        kBiasScaleOut = self.weight_scale_out * self.nnue2score
+        kBiasScaleHidden = self.weight_scale_hidden * self.quantized_one
+        kBiasScale = kBiasScaleOut if output_layer else kBiasScaleHidden
+        kMaxWeight = self.quantized_one / kWeightScale
+
+        bias = bias.mul(kBiasScale).round().to(torch.int32)
+
+        clipped = torch.count_nonzero(weight.clamp(-kMaxWeight, kMaxWeight) - weight)
+        total_elements = torch.numel(weight)
+        clipped_max = torch.max(
+            torch.abs(weight.clamp(-kMaxWeight, kMaxWeight) - weight)
+        )
+
+        weight = (
+            weight.clamp(-kMaxWeight, kMaxWeight)
+            .mul(kWeightScale)
+            .round()
+            .to(torch.int8)
+        )
+
+        callback(bias, weight, clipped, total_elements, clipped_max, kMaxWeight)
+
+        return bias, weight
+
+    def dequantize_fc_layer(
+        self,
+        bias: torch.Tensor,
+        weight: torch.Tensor,
+        output_layer: bool = False,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        kWeightScaleHidden = self.weight_scale_hidden
+        kWeightScaleOut = self.nnue2score * self.weight_scale_out / self.quantized_one
+        kWeightScale = kWeightScaleOut if output_layer else kWeightScaleHidden
+        kBiasScaleOut = self.weight_scale_out * self.nnue2score
+        kBiasScaleHidden = self.weight_scale_hidden * self.quantized_one
+        kBiasScale = kBiasScaleOut if output_layer else kBiasScaleHidden
+
+        bias = bias.divide(kBiasScale)
+        weight = weight.divide(kWeightScale)
+
+        return bias, weight