Fix ruff exclude (official-stockfish#371)

xu-shawn · web-flow · commit f8364be356cc · 2025-11-09T09:40:51.000+01:00
Roll back ranger21.py to upstream and correct ruff.toml to exclude the correct library
diff --git a/ranger21.py b/ranger21.py
@@ -74,7 +74,7 @@ def get_chebs(num_epochs):
 
 
 def normalize_gradient(x, use_channels=False, epsilon=1e-8):
-    """use stdev to normalize gradients"""
+    """  use stdev to normalize gradients """
     size = x.dim()
     # print(f"size = {size}")
 
@@ -90,7 +90,7 @@ def normalize_gradient(x, use_channels=False, epsilon=1e-8):
 
 
 def centralize_gradient(x, gc_conv_only=False):
-    """credit - https://github.com/Yonghongwei/Gradient-Centralization"""
+    """credit - https://github.com/Yonghongwei/Gradient-Centralization """
 
     size = x.dim()
     # print(f"size = {size}")
@@ -144,6 +144,7 @@ def __init__(
         warmup_pct_default=0.22,
         logging_active=True,
     ):
+
         # todo - checks on incoming params
         defaults = dict(
             lr=lr, momentum=momentum, betas=betas, eps=eps, weight_decay=weight_decay
@@ -352,13 +353,13 @@ def show_settings(self):
 
         if self.warmdown_active:
             print(
-                f"\nWarm-down: Linear warmdown, starting at {self.warm_down_start_pct * 100}%, iteration {self.start_warm_down} of {self.total_iterations}"
+                f"\nWarm-down: Linear warmdown, starting at {self.warm_down_start_pct*100}%, iteration {self.start_warm_down} of {self.total_iterations}"
             )
             print(f"warm down will decay until {self.min_lr} lr")
 
     # lookahead functions
     def clear_cache(self):
-        """clears the lookahead cached params"""
+        """clears the lookahead cached params """
 
         print(f"clearing lookahead cache...")
         for group in self.param_groups:
@@ -390,7 +391,7 @@ def backup_and_load_cache(self):
                 p.data.copy_(param_state["lookahead_params"])
 
     def unit_norm(self, x):
-        """axis-based Euclidean norm"""
+        """ axis-based Euclidean norm"""
         # verify shape
         keepdim = True
         dim = None
@@ -432,6 +433,7 @@ def agc(self, p):
         p.grad.detach().copy_(new_grads)
 
     def warmup_dampening(self, lr, step):
+
         style = self.warmup_type
         warmup = self.num_warmup_iters
 
@@ -440,6 +442,7 @@ def warmup_dampening(self, lr, step):
 
         if step > warmup:
             if not self.warmup_complete:
+
                 if not self.warmup_curr_pct == 1.0:
                     print(
                         f"Error - lr did not achieve full set point from warmup, currently {self.warmup_curr_pct}"
@@ -462,7 +465,7 @@ def warmup_dampening(self, lr, step):
             raise ValueError(f"warmup type {style} not implemented.")
 
     def get_warm_down(self, lr, iteration):
-        """linear style warmdown"""
+        """ linear style warmdown """
         if iteration < self.start_warm_down:
             return lr
 
@@ -475,8 +478,8 @@ def get_warm_down(self, lr, iteration):
                 self.warmdown_displayed = True
 
             warmdown_iteration = (
-                (iteration + 1) - self.start_warm_down
-            )  # to force the first iteration to be 1 instead of 0
+                iteration + 1
+            ) - self.start_warm_down  # to force the first iteration to be 1 instead of 0
 
             if warmdown_iteration < 1:
                 print(
@@ -486,8 +489,8 @@ def get_warm_down(self, lr, iteration):
             # print(f"warmdown iteration = {warmdown_iteration}")
             # linear start 3672  5650 total iterations 1972 iterations
 
-            warmdown_pct = (
-                warmdown_iteration / (self.warmdown_total_iterations + 1)
+            warmdown_pct = warmdown_iteration / (
+                self.warmdown_total_iterations + 1
             )  # +1 to offset that we have to include first as an iteration to support 1 index instead of 0 based.
             if warmdown_pct > 1.00:
                 print(f"error in warmdown pct calc.  new pct = {warmdown_pct}")
@@ -534,6 +537,7 @@ def track_epochs(self, iteration):
                 self.backup_and_load_cache()
 
     def get_cheb_lr(self, lr, iteration):
+
         # first confirm we are done with warmup
         if self.use_warmup:
             if iteration < self.num_warmup_iters + 1:
@@ -569,6 +573,7 @@ def get_state_values(self, group, state):
     # @staticmethod
     @torch.no_grad()
     def step(self, closure=None):
+
         loss = None
         if closure is not None and isinstance(closure, collections.abc.Callable):
             with torch.enable_grad():
@@ -693,15 +698,15 @@ def step(self, closure=None):
         if not self.param_size:
             self.param_size = param_size
             print(f"params size saved")
-            print(f"total param groups = {i + 1}")
-            print(f"total params in groups = {j + 1}")
+            print(f"total param groups = {i+1}")
+            print(f"total params in groups = {j+1}")
 
         if not self.param_size:
             raise ValueError("failed to set param size")
 
         # stable weight decay
         if self.use_madgrad:
-            variance_normalized = torch.pow(variance_ma_sum / param_size, 1 / 3)
+            variance_normalized = torch.pow(variance_ma_sum / param_size, 1/3)
         else:
             variance_normalized = math.sqrt(variance_ma_sum / param_size)
         # variance_mean = variance_ma_sum / param_size
@@ -849,6 +854,7 @@ def step(self, closure=None):
                         variance_ma_belief = state["variance_ma_belief"]
 
                     if self.momentum_pnm:
+
                         max_variance_ma = state["max_variance_ma"]
 
                         if state["step"] % 2 == 1:
@@ -862,8 +868,8 @@ def step(self, closure=None):
                                 state["grad_ma"],
                             )
 
-                    bias_correction1 = 1 - beta1**step
-                    bias_correction2 = 1 - beta2**step
+                    bias_correction1 = 1 - beta1 ** step
+                    bias_correction2 = 1 - beta2 ** step
 
                     if self.momentum_pnm:
                         # Maintains the maximum of all 2nd moment running avg. till now
@@ -883,9 +889,9 @@ def step(self, closure=None):
                         grad = normalize_gradient(grad)
 
                     if not self.use_adabelief:
-                        grad_ma.mul_(beta1**2).add_(grad, alpha=1 - beta1**2)
+                        grad_ma.mul_(beta1 ** 2).add_(grad, alpha=1 - beta1 ** 2)
 
-                    noise_norm = math.sqrt((1 + beta2) ** 2 + beta2**2)
+                    noise_norm = math.sqrt((1 + beta2) ** 2 + beta2 ** 2)
 
                     step_size = lr / bias_correction1
 
diff --git a/ruff.toml b/ruff.toml
@@ -25,5 +25,5 @@ exclude = [
     "node_modules",
     "site-packages",
     "venv",
-    "ranger.py"
+    "ranger21.py"
 ]

Original file line number	Diff line number	Diff line change
`@@ -25,5 +25,5 @@ exclude = [`
`25`	`25`	`"node_modules",`
`26`	`26`	`"site-packages",`
`27`	`27`	`"venv",`
`28`		`- "ranger.py"`
	`28`	`+ "ranger21.py"`
`29`	`29`	`]`