ftperm type hints (official-stockfish#358)

xu-shawn · web-flow · commit b3becd34f87a · 2025-09-29T07:22:25.000+02:00
* ftperm type hints

* add typing to indices
diff --git a/ftperm.py b/ftperm.py
@@ -31,13 +31,17 @@
 
 """
 
-import time
 import argparse
-import chess
-import torch
 import copy
+from dataclasses import dataclass
+import time
+from typing import Callable, Generator, TypeAlias
+
+import chess
 import cupy as cp
 import numpy as np
+import numpy.typing as npt
+import torch
 
 import data_loader
 import model as M
@@ -61,7 +65,7 @@
 VERBOSE = False
 
 
-def batched(arr, batch_size):
+def batched(arr: npt.NDArray, batch_size: int) -> Generator[npt.NDArray, None, None]:
     """
     Utility generator that yields chunks of array `arr` of size `batch_size`
     Expects arr to be a numpy-like array
@@ -73,14 +77,14 @@ def batched(arr, batch_size):
         idx += batch_size
 
 
-def apply_swap(perm, i, j):
+def apply_swap(perm: npt.NDArray, i: int, j: int) -> None:
     """
     Swap `i`-th and `j`-th elements in the array `perm`.
     """
     perm[i], perm[j] = perm[j], perm[i]
 
 
-def apply_rotate_right(perm, indices):
+def apply_rotate_right(perm: npt.NDArray, indices: tuple[int, ...]) -> None:
     """
     Rotates right the values in `perm` at selected indices `indices`.
     The rotation is performed as-if the selected indices were layed out in the order
@@ -92,7 +96,9 @@ def apply_rotate_right(perm, indices):
         perm[i] = j
 
 
-def get_swapped_zero_positive_count(actmat_flat, use_cupy=True):
+def get_swapped_zero_positive_count(
+    actmat_flat: npt.NDArray[np.bool_], use_cupy: bool = True
+) -> int:
     if use_cupy:
         actmat_flat = cp.asarray(actmat_flat, dtype=cp.int8)
 
@@ -148,7 +154,9 @@ def get_swapped_zero_positive_count(actmat_flat, use_cupy=True):
     return swapped_zero_count
 
 
-def get_swapped_zero_increase(actmat, use_cupy=True):
+def get_swapped_zero_increase(
+    actmat: npt.NDArray[np.bool_], use_cupy: bool = True
+) -> npt.NDArray[np.int_]:
     n_neurons = actmat.shape[1]
     swapped_zero_count = 0
 
@@ -179,7 +187,9 @@ def get_swapped_zero_increase(actmat, use_cupy=True):
     return swapped_zero_increase
 
 
-def get_score_change(actmat, use_cupy=True):
+def get_score_change(
+    actmat: npt.NDArray[np.bool_], use_cupy: bool = True
+) -> npt.NDArray[np.int_]:
     # actmat is a boolean matrix of shape (N, L1) with "True" meaning 0
 
     n_neurons = actmat.shape[1]
@@ -193,7 +203,16 @@ def get_score_change(actmat, use_cupy=True):
     return score_change
 
 
-def make_swaps_2(actmat, use_cupy=True):
+@dataclass
+class SwapResult:
+    swaps: list[tuple[int, ...]]
+    score_change: float
+
+
+SwapFucntion: TypeAlias = Callable[[npt.NDArray[np.bool_], bool], SwapResult]
+
+
+def make_swaps_2(actmat: npt.NDArray[np.bool_], use_cupy: bool = True) -> SwapResult:
     """
     Returns a series of independent 2-swap operations that collectively improve the objective function.
     """
@@ -210,7 +229,7 @@ def make_swaps_2(actmat, use_cupy=True):
     # Sum score_change[i, j] + score_change[j, i] to get the cumulative impact of the swap.
     score_change = score_change + score_change.T
 
-    def all_indices_in_same_block(i):
+    def all_indices_in_same_block(i: int) -> list[int]:
         """Returns a list of indices of all neurons in the same block as the i-th neuron."""
         # Floor to the start of the block.
         base = i // ZERO_BLOCK_SIZE * ZERO_BLOCK_SIZE
@@ -248,10 +267,10 @@ def all_indices_in_same_block(i):
     print(f"Time elapsed: {time.time() - start_time:0.3f}")
     print(f"Improvement this iteration: {total_improvement:0.3f}")
 
-    return swaps, total_improvement
+    return SwapResult(swaps, total_improvement)
 
 
-def make_swaps_3(actmat, use_cupy=True):
+def make_swaps_3(actmat: npt.NDArray[np.bool_], use_cupy: bool = True) -> SwapResult:
     """
     Returns a series of independent left-rotates operations that collectively improve the objective function.
     """
@@ -340,10 +359,12 @@ def make_swaps_3(actmat, use_cupy=True):
     total_improvement = total_score_change / n_samples / (n_neurons // 4) * 100
     print(f"Time elapsed: {time.time() - start_time:0.3f}")
     print(f"Improvement this iteration: {total_improvement:0.3f}")
-    return cycles, total_improvement
+    return SwapResult(cycles, total_improvement)
 
 
-def find_perm_impl(actmat, use_cupy, L1: int):
+def find_perm_impl(
+    actmat: npt.NDArray[np.bool_], use_cupy: bool, L1: int
+) -> npt.NDArray[np.int_]:
     actmat = np.reshape(actmat, (actmat.shape[0] * 2, actmat.shape[1] // 2))
     if use_cupy:
         actmat = cp.asarray(actmat, dtype=cp.int8)
@@ -352,7 +373,7 @@ def find_perm_impl(actmat, use_cupy, L1: int):
     total_score_change = 0
     perm = np.arange(L1 // 2)
 
-    stages = [make_swaps_2, make_swaps_3]
+    stages: list[SwapFucntion] = [make_swaps_2, make_swaps_3]
     # The optimization routines are deterministic, so no need to retry.
     stages_max_fails = [0, 0]
     stage_id = 0
@@ -370,15 +391,15 @@ def find_perm_impl(actmat, use_cupy, L1: int):
 
         # Calculate a set of independent right rotates (so swaps for 2 element case)
         # that when applied improve the objective function
-        swaps, score_change = swap_fn(actmat, use_cupy)
-        for cycle in swaps:
+        swap_result = swap_fn(actmat, use_cupy)
+        for cycle in swap_result.swaps:
             # Update the current best permutation with the newly found adjustments.
             apply_rotate_right(perm, cycle)
 
-        total_score_change += score_change
+        total_score_change += swap_result.score_change
         print(f"Total improvement: {total_score_change}\n")
 
-        if score_change == 0:
+        if swap_result.score_change == 0:
             num_fails += 1
             if num_fails > stages_max_fails[stage_id]:
                 num_fails = 0
@@ -399,17 +420,19 @@ def find_perm_impl(actmat, use_cupy, L1: int):
 
 
 def read_model(
-    nnue_path,
+    nnue_path: str,
     feature_set: FeatureSet,
     config: ModelConfig,
     quantize_config: QuantizationConfig,
-):
+) -> NNUEModel:
     with open(nnue_path, "rb") as f:
         reader = NNUEReader(f, feature_set, config, quantize_config)
         return reader.model
 
 
-def make_fen_batch_provider(data_path, batch_size):
+def make_fen_batch_provider(
+    data_path: str, batch_size: int
+) -> data_loader.FenBatchProvider:
     return data_loader.FenBatchProvider(
         data_path,
         True,
@@ -421,7 +444,7 @@ def make_fen_batch_provider(data_path, batch_size):
     )
 
 
-def filter_fens(fens):
+def filter_fens(fens: list[str]) -> list[str]:
     # We don't want fens where a king is in check, as these cannot be evaluated by the engine.
     filtered_fens = []
     for fen in fens:
@@ -431,7 +454,7 @@ def filter_fens(fens):
     return filtered_fens
 
 
-def quantize_ft(model: NNUEModel):
+def quantize_ft(model: NNUEModel) -> None:
     model.input.weight.data = model.input.weight.data.mul(
         model.quantization.quantized_one
     ).round()
@@ -441,16 +464,16 @@ def quantize_ft(model: NNUEModel):
 
 
 def forward_ft(
-    model,
-    us,
-    them,
-    white_indices,
-    white_values,
-    black_indices,
-    black_values,
-    psqt_indices,
-    layer_stack_indices,
-):
+    model: NNUEModel,
+    us: torch.Tensor,
+    them: torch.Tensor,
+    white_indices: torch.Tensor,
+    white_values: torch.Tensor,
+    black_indices: torch.Tensor,
+    black_values: torch.Tensor,
+    psqt_indices: torch.Tensor,
+    layer_stack_indices: torch.Tensor,
+) -> torch.Tensor:
     wp, bp = model.input(white_indices, white_values, black_indices, black_values)
     w, _ = torch.split(wp, model.L1, dim=1)
     b, _ = torch.split(bp, model.L1, dim=1)
@@ -466,7 +489,7 @@ def forward_ft(
     return l0_.round()
 
 
-def eval_ft(model, batch: data_loader.SparseBatchPtr):
+def eval_ft(model: NNUEModel, batch: data_loader.SparseBatchPtr) -> torch.Tensor:
     with torch.no_grad():
         (
             us,
@@ -494,8 +517,8 @@ def eval_ft(model, batch: data_loader.SparseBatchPtr):
         return res
 
 
-def ft_permute_impl(model, permutation):
-    permutation = list(permutation)
+def ft_permute_impl(model: NNUEModel, perm: npt.NDArray[np.int_]) -> None:
+    permutation = list(perm)
 
     l1_size = model.layer_stacks.l1.in_features
     if l1_size != len(permutation) * 2:
@@ -517,14 +540,14 @@ def ft_permute_impl(model, permutation):
     ]
 
 
-def ft_permute(model, ft_perm_path):
+def ft_permute(model: NNUEModel, ft_perm_path: str) -> None:
     with open(ft_perm_path, "rb") as f:
         permutation = np.load(f)
 
     ft_permute_impl(model, permutation)
 
 
-def gather_impl(model: NNUEModel, dataset, count):
+def gather_impl(model: NNUEModel, dataset: str, count: int) -> npt.NDArray[np.bool_]:
     ZERO_POINT = 0.0  # Vary this to check hypothetical forced larger truncation to zero
     BATCH_SIZE = 1000
 
@@ -559,7 +582,7 @@ def gather_impl(model: NNUEModel, dataset, count):
     return np.concatenate(actmats, axis=0)
 
 
-def command_gather(args):
+def command_gather(args: argparse.Namespace) -> None:
     feature_set = M.get_feature_set_from_name(args.features)
     if args.checkpoint:
         nnue = NNUE.load_from_checkpoint(
@@ -582,13 +605,15 @@ def command_gather(args):
         np.save(file, actmat)
 
 
-def eval_act_mat(actmat):
+def eval_act_mat(actmat: npt.NDArray[np.bool_]) -> float:
     actmat = actmat.reshape((actmat.shape[0], actmat.shape[1] // 4, 4))
     r = np.all(actmat, axis=2)
     return np.count_nonzero(r) / r.shape[0] / r.shape[1]
 
 
-def eval_perm_impl(actmat, perm=None):
+def eval_perm_impl(
+    actmat: npt.NDArray[np.bool_], perm: npt.NDArray[np.int_] | None = None
+) -> None:
     actmat = np.reshape(actmat, (actmat.shape[0] * 2, actmat.shape[1] // 2))
 
     actmat_eval = eval_act_mat(actmat)
@@ -600,7 +625,7 @@ def eval_perm_impl(actmat, perm=None):
         print(f"Combined zeros in perm matrix: {perm_act_mat_eval * 100:0.6f}")
 
 
-def command_eval_perm(args):
+def command_eval_perm(args: argparse.Namespace) -> None:
     with open(args.data, "rb") as file:
         actmat = np.load(file)
 
@@ -613,7 +638,7 @@ def command_eval_perm(args):
     eval_perm_impl(actmat, perm)
 
 
-def command_find_perm(args):
+def command_find_perm(args: argparse.Namespace) -> None:
     with open(args.data, "rb") as file:
         actmat = np.load(file)
 
@@ -626,12 +651,12 @@ def command_find_perm(args):
 
 def ft_optimize(
     model: NNUEModel,
-    dataset_path,
-    count,
-    actmat_save_path=None,
-    perm_save_path=None,
-    use_cupy=True,
-):
+    dataset_path: str,
+    count: int,
+    actmat_save_path: str | None = None,
+    perm_save_path: str | None = None,
+    use_cupy: bool = True,
+) -> None:
     print("Gathering activation data...")
     actmat = gather_impl(model, dataset_path, count)
     if actmat_save_path is not None:
@@ -651,12 +676,12 @@ def ft_optimize(
     ft_permute_impl(model, perm)
 
 
-def set_cupy_device(device):
+def set_cupy_device(device: int) -> None:
     if device is not None:
         cp.cuda.runtime.setDevice(device)
 
 
-def main():
+def main() -> None:
     parser = argparse.ArgumentParser(description="")
     parser.add_argument(
         "--no-cupy",