Enable quantized net loading

martinnovaak · martinnovaak · commit 11d45401a392 · 2024-05-14T15:32:30.000+02:00
diff --git a/trainer/quantize.py b/trainer/quantize.py
@@ -2,6 +2,8 @@
 import struct
 import numpy as np
 
+from model import PerspectiveNetwork
+
 QA = 403
 QB = 64
 QAB = QA * QB
@@ -31,3 +33,20 @@ def quantize(model, bin_path):
         bin_file.write(struct.pack('<' + 'h' * len(feature_bias_values), *feature_bias_values))
         bin_file.write(struct.pack('<' + 'h' * len(output_weight_values), *output_weight_values))
         bin_file.write(struct.pack('<' + 'h' * len(output_bias_values), *output_bias_values))
+
+
+def load_quantized_net(bin_path, hl_size, qa, qb):
+    with open(bin_path, "rb") as bin_file:
+        # Read feature weights
+        feature_weights = struct.unpack(f'<{768 * hl_size}h', bin_file.read(768 * hl_size * 2))
+        feature_bias = struct.unpack(f'<{hl_size}h', bin_file.read(hl_size * 2))
+        output_weights = struct.unpack(f'<{2 * hl_size}h', bin_file.read(2 * hl_size * 2))
+        output_bias = struct.unpack('<1h', bin_file.read(1 * 2))
+
+    model = PerspectiveNetwork(hl_size)
+    model.feature_transformer.weight.data = torch.tensor(np.array(feature_weights).reshape(768, hl_size).T / qa, dtype=torch.float32)
+    model.feature_transformer.bias.data = torch.tensor(np.array(feature_bias) / qa, dtype=torch.float32)
+    model.output_layer.weight.data = torch.tensor(np.array(output_weights).reshape(1, 2 * hl_size) / qb, dtype=torch.float32)
+    model.output_layer.bias.data = torch.tensor(np.array(output_bias) / (qa * qb), dtype=torch.float32)
+
+    return model