nodchip
diff --git a/‎cross_check_eval.py‎
Lines changed: 12 additions & 8 deletions b/‎cross_check_eval.py‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎data_loader/stream.py‎
Lines changed: 2 additions & 3 deletions b/‎data_loader/stream.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎docs/features.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/features.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ftperm.py‎
Lines changed: 5 additions & 8 deletions b/‎ftperm.py‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎model/__init__.py‎
Lines changed: 8 additions & 1 deletion b/‎model/__init__.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎features/__init__.py‎ ‎model/features/__init__.py‎features/__init__.py renamed to model/features/__init__.py
Lines changed: 2 additions & 2 deletions b/‎features/__init__.py‎ ‎model/features/__init__.py‎features/__init__.py renamed to model/features/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎features/feature_block.py‎ ‎model/features/feature_block.py‎features/feature_block.py renamed to model/features/feature_block.py b/‎features/feature_block.py‎ ‎model/features/feature_block.py‎features/feature_block.py renamed to model/features/feature_block.py
diff --git a/‎features/feature_set.py‎ ‎model/features/feature_set.py‎features/feature_set.py renamed to model/features/feature_set.py
Lines changed: 27 additions & 31 deletions b/‎features/feature_set.py‎ ‎model/features/feature_set.py‎features/feature_set.py renamed to model/features/feature_set.py
Lines changed: 27 additions & 31 deletions
diff --git a/‎features/halfka.py‎ ‎model/features/halfka.py‎features/halfka.py renamed to model/features/halfka.py b/‎features/halfka.py‎ ‎model/features/halfka.py‎features/halfka.py renamed to model/features/halfka.py
diff --git a/‎features/halfka_v2.py‎ ‎model/features/halfka_v2.py‎features/halfka_v2.py renamed to model/features/halfka_v2.py b/‎features/halfka_v2.py‎ ‎model/features/halfka_v2.py‎features/halfka_v2.py renamed to model/features/halfka_v2.py
@@ -4,16 +4,20 @@
 
 import chess
 
-import features
-import serialize
 import data_loader
-from model import NNUE, ModelConfig
-from features import FeatureSet
+from model import (
+    add_feature_args,
+    FeatureSet,
+    get_feature_set_from_name,
+    NNUE,
+    NNUEReader,
+    ModelConfig,
+)
 
 
 def read_model(nnue_path, feature_set: FeatureSet, config: ModelConfig):
     with open(nnue_path, "rb") as f:
-        reader = serialize.NNUEReader(f, feature_set, config)
+        reader = NNUEReader(f, feature_set, config)
         return reader.model
 
 
@@ -165,12 +169,12 @@ def main():
         "--count", type=int, default=100, help="number of datapoints to process"
     )
     parser.add_argument("--l1", type=int, default=ModelConfig().L1)
-    features.add_argparse_args(parser)
+    add_feature_args(parser)
     args = parser.parse_args()
 
     batch_size = 1000
 
-    feature_set = features.get_feature_set_from_name(args.features)
+    feature_set = get_feature_set_from_name(args.features)
     if args.checkpoint:
         model = NNUE.load_from_checkpoint(
             args.checkpoint, feature_set=feature_set, config=ModelConfig(L1=args.l1)
@@ -189,7 +193,7 @@ def main():
         fens = filter_fens(next(fen_batch_provider))
 
         b = data_loader.get_sparse_batch_from_fens(
-            feature_set, fens, [0] * len(fens), [1] * len(fens), [0] * len(fens)
+            feature_set.name, fens, [0] * len(fens), [1] * len(fens), [0] * len(fens)
         )
         model_evals += eval_model_batch(model, b)
         data_loader.destroy_sparse_batch(b)
 
@@ -2,7 +2,6 @@
 
 from ._native import c_lib, SparseBatchPtr, FenBatchPtr
 from .config import CDataloaderSkipConfig, DataloaderSkipConfig
-from features import FeatureSet
 
 
 def _to_c_str_array(str_list):
@@ -64,15 +63,15 @@ def destroy_sparse_batch_stream(stream: ctypes.c_void_p):
 
 
 def get_sparse_batch_from_fens(
-    feature_set: FeatureSet, fens, scores, plies, results
+    feature_set: str, fens, scores, plies, results
 ) -> SparseBatchPtr:
     assert len(fens) == len(scores) == len(plies) == len(results)
 
     def to_c_int_array(data):
         return (ctypes.c_int * len(data))(*data)
 
     return c_lib.dll.get_sparse_batch_from_fens(
-        feature_set.name.encode("utf-8"),
+        feature_set.encode("utf-8"),
         len(fens),
         _to_c_str_array(fens),
         to_c_int_array(scores),
 
@@ -12,4 +12,4 @@ The current semantics are as follows:
 2. When resuming training from a .pt model - `--features` specifies the feature set to use for learning. If the feature set specified doesn't match the feature set from the .pt model a conversion is attempted. Right now only a conversion of feature set with a single block from non-factorized to factorized is supported. The factorized block must have the non-factorized features as the first factor. The virtual feature weights are initialized to zero.
 3. When converting .ckpt to .nnue - `--features` specifies the features as stored in the .ckpt file. The user must pass the correct feature set through `--features` because it can't be inferred from the .ckpt. If the features from `--features` and the saved model don't match it'll likely stack trace on some dimension mismatch.
 4. When converting .pt to .nnue - `--features` is ignored, the `feature_set` from the saved model is used, the weights are coalesced when writing the .nnue file.
-5. When converting .nnue to .pt - `--features` specifies the features in the .nnue file. The resulting .pt model has the same feature_set. Note that when resuming training this model can be converted to a compatible feature_set, see point 2.
+5. When converting .nnue to .pt - `--features` specifies the features in the .nnue file. The resulting .pt model has the same feature_set. Note that when resuming training this model can be converted to a compatible feature_set, see point 2.
@@ -33,8 +33,6 @@
 
 import time
 import argparse
-import features
-import serialize
 import chess
 import torch
 import copy
@@ -43,8 +41,7 @@
 
 import data_loader
 import model as M
-from model import NNUE, NNUEModel, ModelConfig
-from features import FeatureSet
+from model import FeatureSet, NNUE, NNUEModel, NNUEReader, ModelConfig
 
 
 """
@@ -398,7 +395,7 @@ def find_perm_impl(actmat, use_cupy, L1: int):
 
 def read_model(nnue_path, feature_set: FeatureSet, config: ModelConfig):
     with open(nnue_path, "rb") as f:
-        reader = serialize.NNUEReader(f, feature_set, config)
+        reader = NNUEReader(f, feature_set, config)
         return reader.model
 
 
@@ -531,7 +528,7 @@ def gather_impl(model, dataset, count):
         fens = filter_fens(next(fen_batch_provider))
 
         b = data_loader.get_sparse_batch_from_fens(
-            quantized_model.feature_set,
+            quantized_model.feature_set.name,
             fens,
             [0] * len(fens),
             [1] * len(fens),
@@ -549,7 +546,7 @@ def gather_impl(model, dataset, count):
 
 
 def command_gather(args):
-    feature_set = features.get_feature_set_from_name(args.features)
+    feature_set = M.get_feature_set_from_name(args.features)
     if args.checkpoint:
         model = NNUE.load_from_checkpoint(
             args.checkpoint, feature_set=feature_set, config=ModelConfig(L1=args.l1)
@@ -669,7 +666,7 @@ def main():
         "--out", type=str, help="Filename under which to save the resulting ft matrix"
     )
     parser_gather.add_argument("--l1", type=int, default=M.ModelConfig().L1)
-    features.add_argparse_args(parser_gather)
+    M.add_feature_args(parser_gather)
     parser_gather.set_defaults(func=command_gather)
 
     parser_gather = subparsers.add_parser("find_perm", help="a help")
 
@@ -1,15 +1,22 @@
 from .callbacks import WeightClippingCallback
 from .config import ModelConfig, LossParams
+from .features import add_feature_args, FeatureSet, get_feature_set_from_name
 from .lightning_module import NNUE
 from .model import NNUEModel
-from .utils import coalesce_ft_weights
+from .utils import coalesce_ft_weights, load_model, NNUEReader, NNUEWriter
 
 
 __all__ = [
     "WeightClippingCallback",
     "ModelConfig",
     "LossParams",
+    "add_feature_args",
+    "FeatureSet",
+    "get_feature_set_from_name",
     "NNUE",
     "NNUEModel",
     "coalesce_ft_weights",
+    "load_model",
+    "NNUEReader",
+    "NNUEWriter",
 ]
@@ -32,7 +32,7 @@ def get_feature_blocks_from_names(names):
     return [_feature_blocks_by_name[name] for name in names]
 
 
-def get_feature_set_from_name(name):
+def get_feature_set_from_name(name) -> FeatureSet:
     feature_block_names = name.split("+")
     blocks = get_feature_blocks_from_names(feature_block_names)
     return FeatureSet(blocks)
@@ -42,7 +42,7 @@ def get_available_feature_blocks_names():
     return list(iter(_feature_blocks_by_name))
 
 
-def add_argparse_args(parser):
+def add_feature_args(parser):
     _default_feature_set_name = "HalfKAv2_hm^"
     parser.add_argument(
         "--features",
 
@@ -34,14 +34,13 @@ def __init__(self, features):
         )
         self.num_features = sum(feature.num_features for feature in features)
 
-    """
-    This method returns the feature ranges for the virtual factors of the
-    underlying feature blocks. This is useful to know during initialization,
-    when we want to zero initialize the virtual feature weights, but give some other
-    values to the real feature weights.
-    """
-
     def get_virtual_feature_ranges(self):
+        """
+        This method returns the feature ranges for the virtual factors of the
+        underlying feature blocks. This is useful to know during initialization,
+        when we want to zero initialize the virtual feature weights, but give some other
+        values to the real feature weights.
+        """
         ranges = []
         offset = 0
         for feature in self.features:
@@ -62,14 +61,13 @@ def get_real_feature_ranges(self):
 
         return ranges
 
-    """
-    This method goes over all of the feature blocks and gathers the active features.
-    Each block has its own index space assigned so the features from two different
-    blocks will never have the same index here. Basically the thing you would expect
-    to happen after concatenating many feature blocks.
-    """
-
     def get_active_features(self, board):
+        """
+        This method goes over all of the feature blocks and gathers the active features.
+        Each block has its own index space assigned so the features from two different
+        blocks will never have the same index here. Basically the thing you would expect
+        to happen after concatenating many feature blocks.
+        """
         w = torch.zeros(0)
         b = torch.zeros(0)
 
@@ -84,13 +82,12 @@ def get_active_features(self, board):
 
         return w, b
 
-    """
-    This method takes a feature idx and looks for the block that owns it.
-    If it found the block it asks it to factorize the index, otherwise
-    it throws and Exception. The idx must refer to a real feature.
-    """
-
     def get_feature_factors(self, idx):
+        """
+        This method takes a feature idx and looks for the block that owns it.
+        If it found the block it asks it to factorize the index, otherwise
+        it throws and Exception. The idx must refer to a real feature.
+        """
         offset = 0
         for feature in self.features:
             if idx < offset + feature.num_real_features:
@@ -99,18 +96,17 @@ def get_feature_factors(self, idx):
 
         raise Exception("No feature block to factorize {}".format(idx))
 
-    """
-    This method does what get_feature_factors does but for all
-    valid features at the same time. It returns a list of length
-    self.num_real_features with ith element being a list of factors
-    of the ith feature.
-    This method is technically redundant but it allows to perform the operation
-    slightly faster when there's many feature blocks. It might be worth
-    to add a similar method to the FeatureBlock itself - to make it faster
-    for feature blocks with many factors.
-    """
-
     def get_virtual_to_real_features_gather_indices(self):
+        """
+        This method does what get_feature_factors does but for all
+        valid features at the same time. It returns a list of length
+        self.num_real_features with ith element being a list of factors
+        of the ith feature.
+        This method is technically redundant but it allows to perform the operation
+        slightly faster when there's many feature blocks. It might be worth
+        to add a similar method to the FeatureBlock itself - to make it faster
+        for feature blocks with many factors.
+        """
         indices = []
         real_offset = 0
         offset = 0