Skip to content

Commit d85ed49

Browse files
committed
Revert "Fully quantized with CReLU"
This reverts commit 95f82c4.
1 parent 95f82c4 commit d85ed49

File tree

2 files changed

+26
-33
lines changed

2 files changed

+26
-33
lines changed

src/main.cu

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,8 @@ struct BerserkModel : ChessModel {
217217
SparseInput* in2;
218218

219219
const float sigmoid_scale = 1.0 / 160.0;
220-
const float quant = 32.0;
220+
const float quant_one = 32.0;
221+
const float quant_two = 32.0;
221222

222223
const size_t n_features = 16 * 12 * 64;
223224
const size_t n_l1 = 16;
@@ -235,26 +236,24 @@ struct BerserkModel : ChessModel {
235236
ft->ft_regularization = 1.0 / 16384.0 / 4194304.0;
236237
fta->max = 127.0;
237238

238-
auto l1 = add<Affine>(fta, n_l1);
239-
auto l1a = add<ClippedRelu>(l1);
240-
l1a->max = 127.0;
239+
auto l1 = add<Affine>(fta, n_l1);
240+
auto l1a = add<ReLU>(l1);
241241

242-
auto l2 = add<Affine>(l1a, n_l2);
243-
auto l2a = add<ClippedRelu>(l2);
244-
l2a->max = 127.0;
242+
auto l2 = add<Affine>(l1a, n_l2);
243+
auto l2a = add<ReLU>(l2);
245244

246-
auto cp_eval = add<Affine>(l2a, n_out);
247-
auto sigmoid = add<Sigmoid>(cp_eval, sigmoid_scale);
245+
auto pos_eval = add<Affine>(l2a, n_out);
246+
auto sigmoid = add<Sigmoid>(pos_eval, sigmoid_scale);
248247

249-
const float hidden_max = 127.0 / quant;
248+
const float hidden_max = 127.0 / quant_two;
250249
add_optimizer(AdamWarmup({{OptimizerEntry {&ft->weights}},
251250
{OptimizerEntry {&ft->bias}},
252251
{OptimizerEntry {&l1->weights}.clamp(-hidden_max, hidden_max)},
253252
{OptimizerEntry {&l1->bias}},
254-
{OptimizerEntry {&l2->weights}.clamp(-hidden_max, hidden_max)},
253+
{OptimizerEntry {&l2->weights}},
255254
{OptimizerEntry {&l2->bias}},
256-
{OptimizerEntry {&cp_eval->weights}.clamp(-hidden_max, hidden_max)},
257-
{OptimizerEntry {&cp_eval->bias}}},
255+
{OptimizerEntry {&pos_eval->weights}},
256+
{OptimizerEntry {&pos_eval->bias}}},
258257
0.95,
259258
0.999,
260259
1e-8,
@@ -264,14 +263,14 @@ struct BerserkModel : ChessModel {
264263
add_quantization(Quantizer {
265264
"quant",
266265
save_rate,
267-
QuantizerEntry<int16_t>(&ft->weights.values, quant, true),
268-
QuantizerEntry<int16_t>(&ft->bias.values, quant),
269-
QuantizerEntry<int8_t>(&l1->weights.values, quant),
270-
QuantizerEntry<int32_t>(&l1->bias.values, quant),
271-
QuantizerEntry<int8_t>(&l2->weights.values, quant),
272-
QuantizerEntry<int32_t>(&l2->bias.values, quant),
273-
QuantizerEntry<int8_t>(&cp_eval->weights.values, quant),
274-
QuantizerEntry<int32_t>(&cp_eval->bias.values, quant),
266+
QuantizerEntry<int16_t>(&ft->weights.values, quant_one, true),
267+
QuantizerEntry<int16_t>(&ft->bias.values, quant_one),
268+
QuantizerEntry<int8_t>(&l1->weights.values, quant_two),
269+
QuantizerEntry<int32_t>(&l1->bias.values, quant_two),
270+
QuantizerEntry<float>(&l2->weights.values, 1.0),
271+
QuantizerEntry<float>(&l2->bias.values, quant_two),
272+
QuantizerEntry<float>(&pos_eval->weights.values, 1.0),
273+
QuantizerEntry<float>(&pos_eval->bias.values, quant_two),
275274
});
276275
}
277276

@@ -314,7 +313,7 @@ struct BerserkModel : ChessModel {
314313

315314
auto& target = m_loss->target;
316315

317-
#pragma omp parallel for schedule(static) num_threads(16)
316+
#pragma omp parallel for schedule(static) num_threads(8)
318317
for (int b = 0; b < positions->header.entry_count; b++) {
319318
chess::Position* pos = &positions->positions[b];
320319
// fill in the inputs and target values
@@ -357,6 +356,9 @@ struct BerserkModel : ChessModel {
357356
float w_target = (w_value + 1) / 2.0f;
358357

359358
target(b) = lambda * p_target + (1.0 - lambda) * w_target;
359+
360+
// layer_selector->dense_output.values(b, 0) =
361+
// (int) ((chess::popcount(pos->m_occupancy) - 1) / 4);
360362
}
361363
}
362364
};

src/nn/layers/affine_multi.h

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,7 @@ namespace nn {
66

77
struct AffineMulti : public nn::Affine {
88

9-
AffineMulti(Layer* prev, size_t size, size_t batches)
10-
: Affine(prev, size * batches) {
11-
12-
for (size_t i = size; i < this->size; i++)
13-
for (size_t j = 0; j < prev->size; j++)
14-
weights.values(i, j) = weights.values(i % size, j);
15-
16-
weights.values >> data::GPU;
17-
bias.values >> data::GPU;
18-
}
9+
AffineMulti(Layer* prev, size_t size, size_t counts)
10+
: Affine(prev, size * counts) {}
1911
};
20-
2112
} // namespace nn

0 commit comments

Comments
 (0)