Backport changes needed for master nets (#375)

vondele · web-flow · commit cc44eeabc8e0 · 2025-12-02T18:01:14.000+01:00
this introduces two changes needed for training SF master nets, in particular for executing the recipes referenced in official-stockfish/Stockfish#6452 official-stockfish/Stockfish#6457 adding some additional flexibility for shaping the piece count distribution and weighting the individual configurations in the loss respectively.
diff --git a/data_loader/config.py b/data_loader/config.py
@@ -10,6 +10,9 @@ class DataloaderSkipConfig:
     early_fen_skipping: int = -1
     simple_eval_skipping: int = -1
     param_index: int = 0
+    pc_y1: float = 1.0
+    pc_y2: float = 2.0
+    pc_y3: float = 1.0
 
 
 class CDataloaderSkipConfig(ctypes.Structure):
@@ -20,6 +23,9 @@ class CDataloaderSkipConfig(ctypes.Structure):
         ("early_fen_skipping", ctypes.c_int),
         ("simple_eval_skipping", ctypes.c_int),
         ("param_index", ctypes.c_int),
+        ("pc_y1", ctypes.c_double),
+        ("pc_y2", ctypes.c_double),
+        ("pc_y3", ctypes.c_double),
     ]
 
     def __init__(self, config: DataloaderSkipConfig):
@@ -30,4 +36,7 @@ def __init__(self, config: DataloaderSkipConfig):
             early_fen_skipping=config.early_fen_skipping,
             simple_eval_skipping=config.simple_eval_skipping,
             param_index=config.param_index,
+            pc_y1=config.pc_y1,
+            pc_y2=config.pc_y2,
+            pc_y3=config.pc_y3,
         )
diff --git a/model/config.py b/model/config.py
@@ -20,3 +20,5 @@ class LossParams:
     end_lambda: float = 1.0
     pow_exp: float = 2.5
     qp_asymmetry: float = 0.0
+    w1: float = 0.0
+    w2: float = 0.5
diff --git a/model/lightning_module.py b/model/lightning_module.py
@@ -106,7 +106,9 @@ def step_(self, batch: tuple[Tensor, ...], batch_idx, loss_type):
         loss = torch.pow(torch.abs(pt - qf), p.pow_exp)
         if p.qp_asymmetry != 0.0:
             loss = loss * ((qf > pt) * p.qp_asymmetry + 1)
-        loss = loss.mean()
+
+        weights = 1 + (2.0**p.w1 - 1) * torch.pow((pf - 0.5) ** 2 * pf * (1 - pf), p.w2)
+        loss = (loss * weights).sum() / weights.sum()
 
         self.log(loss_type, loss, prog_bar=True)
 
diff --git a/train.py b/train.py
@@ -333,6 +333,42 @@ def main():
         dest="simple_eval_skipping",
         help="Skip positions that have abs(simple_eval(pos)) < n",
     )
+    parser.add_argument(
+        "--pc-y1",
+        type=float,
+        default=1.0,
+        dest="pc_y1",
+        help="piece count parameter y1 (default=1.0)",
+    )
+    parser.add_argument(
+        "--pc-y2",
+        type=float,
+        default=2.0,
+        dest="pc_y2",
+        help="piece count parameter y2 (default=2.0)",
+    )
+    parser.add_argument(
+        "--pc-y3",
+        type=float,
+        default=1.0,
+        dest="pc_y3",
+        help="piece count parameter y3 (default=1.0)",
+    )
+    parser.add_argument(
+        "--w1",
+        type=float,
+        default=0.0,
+        dest="w1",
+        help="weight boost parameter 1 (default=0.0)",
+    )
+    parser.add_argument(
+        "--w2",
+        type=float,
+        default=0.5,
+        dest="w2",
+        help="weight boost parameter 2 (default=0.5)",
+    )
+
     parser.add_argument("--l1", type=int, default=M.ModelConfig().L1)
     M.add_feature_args(parser)
     args = parser.parse_args()
@@ -377,6 +413,8 @@ def main():
         end_lambda=args.end_lambda or args.lambda_,
         pow_exp=args.pow_exp,
         qp_asymmetry=args.qp_asymmetry,
+        w1=args.w1,
+        w2=args.w2,
     )
     print("Loss parameters:")
     print(loss_params)
@@ -429,6 +467,11 @@ def main():
     print("Skip early plies: {}".format(args.early_fen_skipping))
     print("Skip simple eval : {}".format(args.simple_eval_skipping))
     print("Param index: {}".format(args.param_index))
+    print("piececount param y1 : {}".format(args.pc_y1))
+    print("piececount param y2 : {}".format(args.pc_y2))
+    print("piececount param y3 : {}".format(args.pc_y3))
+    print("Weighting param w1 : {}".format(args.w1))
+    print("Weighting param w2 : {}".format(args.w2))
 
     if args.threads > 0:
         print("limiting torch to {} threads.".format(args.threads))
@@ -481,6 +524,9 @@ def main():
             early_fen_skipping=args.early_fen_skipping,
             simple_eval_skipping=args.simple_eval_skipping,
             param_index=args.param_index,
+            pc_y1=args.pc_y1,
+            pc_y2=args.pc_y2,
+            pc_y3=args.pc_y3,
         ),
         args.epoch_size,
         args.validation_size,
diff --git a/training_data_loader.cpp b/training_data_loader.cpp
@@ -1029,12 +1029,13 @@ struct FenBatchStream: Stream<FenBatch> {
 };
 
 struct DataloaderSkipConfig {
-    bool filtered;
-    int  random_fen_skipping;
-    bool wld_filtered;
-    int  early_fen_skipping;
-    int  simple_eval_skipping;
-    int  param_index;
+    bool   filtered;
+    int    random_fen_skipping;
+    bool   wld_filtered;
+    int    early_fen_skipping;
+    int    simple_eval_skipping;
+    int    param_index;
+    double pc_y1, pc_y2, pc_y3;
 };
 
 std::function<bool(const TrainingDataEntry&)> make_skip_predicate(DataloaderSkipConfig config) {
@@ -1049,19 +1050,17 @@ std::function<bool(const TrainingDataEntry&)> make_skip_predicate(DataloaderSkip
             // compression ability.
             static constexpr int VALUE_NONE = 32002;
 
-            static constexpr double desired_piece_count_weights[33] = {
-              1.000000, 1.121094, 1.234375, 1.339844, 1.437500, 1.527344, 1.609375,
-              1.683594, 1.750000, 1.808594, 1.859375, 1.902344, 1.937500, 1.964844,
-              1.984375, 1.996094, 2.000000, 1.996094, 1.984375, 1.964844, 1.937500,
-              1.902344, 1.859375, 1.808594, 1.750000, 1.683594, 1.609375, 1.527344,
-              1.437500, 1.339844, 1.234375, 1.121094, 1.000000};
-
-            static constexpr double desired_piece_count_weights_total = []() {
-                double tot = 0;
-                for (auto w : desired_piece_count_weights)
-                    tot += w;
-                return tot;
-            }();
+            // lagrange interpolation weights for desired piece count distribution
+            auto desired_piece_count_weights = [&config](int pc) -> double {
+                double x  = pc;
+                double x1 = 0, y1 = config.pc_y1;
+                double x2 = 16, y2 = config.pc_y2;
+                double x3 = 32, y3 = config.pc_y3;
+                double l1 = (x - x2) * (x - x3) / ((x1 - x2) * (x1 - x3));
+                double l2 = (x - x1) * (x - x3) / ((x2 - x1) * (x2 - x3));
+                double l3 = (x - x1) * (x - x2) / ((x3 - x1) * (x3 - x2));
+                return l1 * y1 + l2 * y2 + l3 * y3;
+            };
 
             // keep stats on passing pieces
             static thread_local double alpha                            = 1;
@@ -1123,16 +1122,23 @@ std::function<bool(const TrainingDataEntry&)> make_skip_predicate(DataloaderSkip
             piece_count_history_all[pc] += 1;
             piece_count_history_all_total += 1;
 
+            double desired_piece_count_weights_total = [&desired_piece_count_weights]() {
+                double tot = 0;
+                for (int i = 0; i < 33; i++)
+                    tot += desired_piece_count_weights(i);
+                return tot;
+            }();
+
             // update alpha, which scales the filtering probability, to a maximum rate.
             if (uint64_t(piece_count_history_all_total) % 10000 == 0)
             {
                 double pass = piece_count_history_all_total * desired_piece_count_weights_total;
                 for (int i = 0; i < 33; ++i)
                 {
-                    if (desired_piece_count_weights[pc] > 0)
+                    if (desired_piece_count_weights(pc) > 0)
                     {
                         double tmp =
-                          piece_count_history_all_total * desired_piece_count_weights[pc]
+                          piece_count_history_all_total * desired_piece_count_weights(pc)
                           / (desired_piece_count_weights_total * piece_count_history_all[pc]);
                         if (tmp < pass)
                             pass = tmp;
@@ -1141,7 +1147,7 @@ std::function<bool(const TrainingDataEntry&)> make_skip_predicate(DataloaderSkip
                 alpha = 1.0 / (pass * max_skipping_rate);
             }
 
-            double tmp = alpha * piece_count_history_all_total * desired_piece_count_weights[pc]
+            double tmp = alpha * piece_count_history_all_total * desired_piece_count_weights(pc)
                        / (desired_piece_count_weights_total * piece_count_history_all[pc]);
             tmp = std::min(1.0, tmp);
             std::bernoulli_distribution distrib(1.0 - tmp);
@@ -1366,7 +1372,10 @@ int main(int argc, char** argv) {
                                              .wld_filtered         = true,
                                              .early_fen_skipping   = 5,
                                              .simple_eval_skipping = 0,
-                                             .param_index          = 0};
+                                             .param_index          = 0,
+                                             .pc_y1                = 1.0,
+                                             .pc_y2                = 2.0,
+                                             .pc_y3                = 1.0};
     auto stream = create_sparse_batch_stream("Full_Threats^", concurrency, file_count, files,
                                              batch_size, cyclic, config);