Skip to content

Commit 95f82c4

Browse files
committed
Fully quantized with CReLU
1 parent 2dfb805 commit 95f82c4

File tree

2 files changed

+33
-26
lines changed

2 files changed

+33
-26
lines changed

src/main.cu

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,7 @@ struct BerserkModel : ChessModel {
217217
SparseInput* in2;
218218

219219
const float sigmoid_scale = 1.0 / 160.0;
220-
const float quant_one = 32.0;
221-
const float quant_two = 32.0;
220+
const float quant = 32.0;
222221

223222
const size_t n_features = 16 * 12 * 64;
224223
const size_t n_l1 = 16;
@@ -236,24 +235,26 @@ struct BerserkModel : ChessModel {
236235
ft->ft_regularization = 1.0 / 16384.0 / 4194304.0;
237236
fta->max = 127.0;
238237

239-
auto l1 = add<Affine>(fta, n_l1);
240-
auto l1a = add<ReLU>(l1);
238+
auto l1 = add<Affine>(fta, n_l1);
239+
auto l1a = add<ClippedRelu>(l1);
240+
l1a->max = 127.0;
241241

242-
auto l2 = add<Affine>(l1a, n_l2);
243-
auto l2a = add<ReLU>(l2);
242+
auto l2 = add<Affine>(l1a, n_l2);
243+
auto l2a = add<ClippedRelu>(l2);
244+
l2a->max = 127.0;
244245

245-
auto pos_eval = add<Affine>(l2a, n_out);
246-
auto sigmoid = add<Sigmoid>(pos_eval, sigmoid_scale);
246+
auto cp_eval = add<Affine>(l2a, n_out);
247+
auto sigmoid = add<Sigmoid>(cp_eval, sigmoid_scale);
247248

248-
const float hidden_max = 127.0 / quant_two;
249+
const float hidden_max = 127.0 / quant;
249250
add_optimizer(AdamWarmup({{OptimizerEntry {&ft->weights}},
250251
{OptimizerEntry {&ft->bias}},
251252
{OptimizerEntry {&l1->weights}.clamp(-hidden_max, hidden_max)},
252253
{OptimizerEntry {&l1->bias}},
253-
{OptimizerEntry {&l2->weights}},
254+
{OptimizerEntry {&l2->weights}.clamp(-hidden_max, hidden_max)},
254255
{OptimizerEntry {&l2->bias}},
255-
{OptimizerEntry {&pos_eval->weights}},
256-
{OptimizerEntry {&pos_eval->bias}}},
256+
{OptimizerEntry {&cp_eval->weights}.clamp(-hidden_max, hidden_max)},
257+
{OptimizerEntry {&cp_eval->bias}}},
257258
0.95,
258259
0.999,
259260
1e-8,
@@ -263,14 +264,14 @@ struct BerserkModel : ChessModel {
263264
add_quantization(Quantizer {
264265
"quant",
265266
save_rate,
266-
QuantizerEntry<int16_t>(&ft->weights.values, quant_one, true),
267-
QuantizerEntry<int16_t>(&ft->bias.values, quant_one),
268-
QuantizerEntry<int8_t>(&l1->weights.values, quant_two),
269-
QuantizerEntry<int32_t>(&l1->bias.values, quant_two),
270-
QuantizerEntry<float>(&l2->weights.values, 1.0),
271-
QuantizerEntry<float>(&l2->bias.values, quant_two),
272-
QuantizerEntry<float>(&pos_eval->weights.values, 1.0),
273-
QuantizerEntry<float>(&pos_eval->bias.values, quant_two),
267+
QuantizerEntry<int16_t>(&ft->weights.values, quant, true),
268+
QuantizerEntry<int16_t>(&ft->bias.values, quant),
269+
QuantizerEntry<int8_t>(&l1->weights.values, quant),
270+
QuantizerEntry<int32_t>(&l1->bias.values, quant),
271+
QuantizerEntry<int8_t>(&l2->weights.values, quant),
272+
QuantizerEntry<int32_t>(&l2->bias.values, quant),
273+
QuantizerEntry<int8_t>(&cp_eval->weights.values, quant),
274+
QuantizerEntry<int32_t>(&cp_eval->bias.values, quant),
274275
});
275276
}
276277

@@ -313,7 +314,7 @@ struct BerserkModel : ChessModel {
313314

314315
auto& target = m_loss->target;
315316

316-
#pragma omp parallel for schedule(static) num_threads(8)
317+
#pragma omp parallel for schedule(static) num_threads(16)
317318
for (int b = 0; b < positions->header.entry_count; b++) {
318319
chess::Position* pos = &positions->positions[b];
319320
// fill in the inputs and target values
@@ -356,9 +357,6 @@ struct BerserkModel : ChessModel {
356357
float w_target = (w_value + 1) / 2.0f;
357358

358359
target(b) = lambda * p_target + (1.0 - lambda) * w_target;
359-
360-
// layer_selector->dense_output.values(b, 0) =
361-
// (int) ((chess::popcount(pos->m_occupancy) - 1) / 4);
362360
}
363361
}
364362
};

src/nn/layers/affine_multi.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,16 @@ namespace nn {
66

77
struct AffineMulti : public nn::Affine {
88

9-
AffineMulti(Layer* prev, size_t size, size_t counts)
10-
: Affine(prev, size * counts) {}
9+
AffineMulti(Layer* prev, size_t size, size_t batches)
10+
: Affine(prev, size * batches) {
11+
12+
for (size_t i = size; i < this->size; i++)
13+
for (size_t j = 0; j < prev->size; j++)
14+
weights.values(i, j) = weights.values(i % size, j);
15+
16+
weights.values >> data::GPU;
17+
bias.values >> data::GPU;
18+
}
1119
};
20+
1221
} // namespace nn

0 commit comments

Comments
 (0)