Skip to content

Commit 696d4a8

Browse files
committed
Update to include Finny Grad Op
2 parents 45a0ec0 + e92b7c5 commit 696d4a8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+905
-1965
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time")
1717

1818
set_target_properties(Grapheus PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
1919

20-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -march=native -fopenmp")
20+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -march=native -fopenmp -Xcudafe --diag_suppress=68 -Xcudafe --diag_suppress=20050")
2121

2222
target_link_libraries(Grapheus ${CUDA_LIBRARIES})
2323
target_link_libraries(Grapheus ${CUDA_CUBLAS_LIBRARIES})

src/chess/bitboard.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
#include <immintrin.h>
77
#endif
88

9-
namespace chess {
9+
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
10+
#include <intrin.h>
11+
#pragma intrinsic(__popcnt64) // For MSVC, this ensures the intrinsic is available.
12+
#endif
13+
14+
namespace chess{
1015

1116
/**
1217
* toggles the bit
@@ -54,8 +59,12 @@ inline bool has(BB number, Square index) {
5459
* @return
5560
*/
5661
inline Square lsb(BB bb) {
57-
// UCI_ASSERT(bb != 0);
62+
// UCI_ASSERT(bb != 0);
63+
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
64+
return _tzcnt_u64(bb);
65+
#else
5866
return __builtin_ctzll(bb);
67+
#endif
5968
}
6069

6170
/**
@@ -101,7 +110,11 @@ inline Square nlsb(BB bb, Square n) {
101110
* @return
102111
*/
103112
inline int popcount(BB bb) {
113+
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
114+
return __popcnt64(bb);
115+
#else
104116
return __builtin_popcountll(bb);
117+
#endif
105118
}
106119

107120
/**

src/data/matrix_dense.h

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "sarray.h"
99

1010
#include <iostream>
11+
#include <functional>
1112

1213
namespace data {
1314
template<typename TYPE = float>
@@ -67,8 +68,13 @@ struct DenseMatrix : public SArray<TYPE>, Matrix {
6768
inline DenseMatrix<TYPE> operator-(TYPE val);
6869
inline DenseMatrix<TYPE>& operator/=(TYPE val);
6970
inline DenseMatrix<TYPE> operator/(TYPE val);
71+
inline DenseMatrix<TYPE>& operator=(TYPE value);
72+
73+
// for each function which allows easy iteration over the content
74+
inline void for_each(std::function<void(size_t, size_t, TYPE&)> func);
7075
};
7176

77+
7278
template<typename TYPE>
7379
DenseMatrix<TYPE>::DenseMatrix(const size_t& m, const size_t& n)
7480
: SArray<TYPE>(m * n)
@@ -152,6 +158,7 @@ DenseMatrix<TYPE>& DenseMatrix<TYPE>::operator=(DenseMatrix<TYPE>&& other) {
152158
return *this;
153159
}
154160

161+
155162
template<typename TYPE>
156163
template<Device DEV>
157164
TYPE* DenseMatrix<TYPE>::first() const {
@@ -314,11 +321,7 @@ DenseMatrix<TYPE> DenseMatrix<TYPE>::operator-() const {
314321
template<typename TYPE>
315322
DenseMatrix<TYPE>& DenseMatrix<TYPE>::operator*=(TYPE val) {
316323
ASSERT(this->template is_allocated<CPU>());
317-
for (size_t m = 0; m < this->m; m++) {
318-
for (size_t n = 0; n < this->n; n++) {
319-
this->get(m, n) *= val;
320-
}
321-
}
324+
for_each([&val](size_t m, size_t n, float& v) {v *= val;});
322325
return *this;
323326
}
324327
template<typename TYPE>
@@ -329,11 +332,7 @@ DenseMatrix<TYPE> DenseMatrix<TYPE>::operator*(TYPE val) {
329332
template<typename TYPE>
330333
DenseMatrix<TYPE>& DenseMatrix<TYPE>::operator+=(TYPE val) {
331334
ASSERT(this->template is_allocated<CPU>());
332-
for (size_t m = 0; m < this->m; m++) {
333-
for (size_t n = 0; n < this->n; n++) {
334-
this->get(m, n) += val;
335-
}
336-
}
335+
for_each([&val](size_t m, size_t n, float& v) {v += val;});
337336
return *this;
338337
}
339338
template<typename TYPE>
@@ -343,12 +342,7 @@ DenseMatrix<TYPE> DenseMatrix<TYPE>::operator+(TYPE val) {
343342
}
344343
template<typename TYPE>
345344
DenseMatrix<TYPE>& DenseMatrix<TYPE>::operator-=(TYPE val) {
346-
ASSERT(this->template is_allocated<CPU>());
347-
for (size_t m = 0; m < this->m; m++) {
348-
for (size_t n = 0; n < this->n; n++) {
349-
this->get(m, n) -= val;
350-
}
351-
}
345+
for_each([&val](size_t m, size_t n, float& v) {v -= val;});
352346
return *this;
353347
}
354348
template<typename TYPE>
@@ -358,12 +352,7 @@ DenseMatrix<TYPE> DenseMatrix<TYPE>::operator-(TYPE val) {
358352
}
359353
template<typename TYPE>
360354
DenseMatrix<TYPE>& DenseMatrix<TYPE>::operator/=(TYPE val) {
361-
ASSERT(this->template is_allocated<CPU>());
362-
for (size_t m = 0; m < this->m; m++) {
363-
for (size_t n = 0; n < this->n; n++) {
364-
this->get(m, n) /= val;
365-
}
366-
}
355+
for_each([&val](size_t m, size_t n, float& v) {v /= val;});
367356
return *this;
368357
}
369358
template<typename TYPE>
@@ -372,4 +361,20 @@ DenseMatrix<TYPE> DenseMatrix<TYPE>::operator/(TYPE val) {
372361
return DenseMatrix<TYPE>(*this) /= val;
373362
}
374363

364+
template<typename TYPE>
365+
DenseMatrix<TYPE>& DenseMatrix<TYPE>::operator=(TYPE value){
366+
for_each([&value](size_t m, size_t n, float& v) {v = value;});
367+
return *this;
368+
}
369+
370+
template<typename TYPE>
371+
void DenseMatrix<TYPE>::for_each(std::function<void(size_t, size_t, TYPE&)> func) {
372+
ASSERT(this->template is_allocated<CPU>());
373+
for (size_t i = 0; i < this->m; ++i) {
374+
for (size_t j = 0; j < this->n; ++j) {
375+
func(i, j, this->get(i, j));
376+
}
377+
}
378+
}
379+
375380
} // namespace data

src/main.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,4 +478,4 @@ int main(int argc, char* argv[]) {
478478

479479
close();
480480
return 0;
481-
}
481+
}

src/math/random.h

Lines changed: 51 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,60 +15,80 @@ inline void seed(uint32_t seed_value) {
1515

1616
template<typename TYPE>
1717
inline void fill(data::DenseMatrix<TYPE>& matrix, TYPE value) {
18-
for (size_t i = 0; i < matrix.m; i++)
19-
for (size_t j = 0; j < matrix.n; j++)
20-
matrix.get(i, j) = value;
18+
if (!matrix.template is_allocated<data::CPU>()) {
19+
matrix.template malloc<data::CPU>();
20+
}
21+
matrix = value;
2122
}
2223

2324
template<typename TYPE>
2425
inline void kaiming(data::DenseMatrix<TYPE>& matrix, size_t expected_inputs) {
25-
std::uniform_real_distribution<> dis(0.0, 1.0);
26-
27-
for (size_t i = 0; i < matrix.m; i++) {
28-
for (size_t j = 0; j < matrix.n; j++) {
29-
auto r1 = dis(twister), r2 = dis(twister);
30-
auto r = std::sqrt(-2.0 * std::log(r1)) * std::cos(6.28318530718 * r2);
31-
matrix.get(i, j) = r * std::sqrt(2.0 / expected_inputs);
32-
}
26+
if (!matrix.template is_allocated<data::CPU>()) {
27+
matrix.template malloc<data::CPU>();
3328
}
29+
std::uniform_real_distribution<> dis(0.0, 1.0);
30+
matrix.for_each([&](size_t, size_t, TYPE& element) {
31+
auto r1 = dis(twister), r2 = dis(twister);
32+
auto r = std::sqrt(-2.0 * std::log(r1)) * std::cos(6.28318530718 * r2);
33+
element = r * std::sqrt(2.0 / expected_inputs);
34+
});
3435
}
3536

3637
template<typename TYPE>
3738
inline void normal(data::DenseMatrix<TYPE>& matrix, TYPE mean, TYPE dev) {
38-
std::normal_distribution<TYPE> distribution(mean, dev);
39-
for (int j = 0; j < matrix.n; j++) {
40-
for (int i = 0; i < matrix.m; i++) {
41-
matrix.get(i, j) = distribution(twister);
42-
}
39+
if (!matrix.template is_allocated<data::CPU>()) {
40+
matrix.template malloc<data::CPU>();
4341
}
42+
std::normal_distribution<TYPE> distribution(mean, dev);
43+
matrix.for_each([&](size_t, size_t, TYPE& element) { element = distribution(twister); });
4444
}
4545

4646
template<typename TYPE>
4747
inline void uniform(data::DenseMatrix<TYPE>& matrix, TYPE lower, TYPE upper) {
48+
if (!matrix.template is_allocated<data::CPU>()) {
49+
matrix.template malloc<data::CPU>();
50+
}
4851
if constexpr (std::is_integral_v<TYPE>) {
4952
std::uniform_int_distribution<TYPE> distribution(lower, upper);
50-
for (int i = 0; i < matrix.m; i++) {
51-
for (int j = 0; j < matrix.n; j++) {
52-
matrix.get(i, j) = distribution(twister);
53-
}
54-
}
53+
matrix.for_each([&](size_t, size_t, TYPE& element) { element = distribution(twister); });
5554
} else if constexpr (std::is_floating_point_v<TYPE>) {
5655
std::uniform_real_distribution<TYPE> distribution(lower, upper);
57-
for (int i = 0; i < matrix.m; i++) {
58-
for (int j = 0; j < matrix.n; j++) {
59-
matrix.get(i, j) = distribution(twister);
60-
}
61-
}
56+
matrix.for_each([&](size_t, size_t, TYPE& element) { element = distribution(twister); });
6257
}
6358
}
6459

6560
inline void uniform(data::DenseMatrix<bool>& matrix, bool lower, bool upper) {
66-
std::uniform_int_distribution<int> distribution(lower, upper);
67-
for (int i = 0; i < matrix.m; i++) {
68-
for (int j = 0; j < matrix.n; j++) {
69-
matrix.get(i, j) = (bool) distribution(twister);
70-
}
61+
if (!matrix.template is_allocated<data::CPU>()) {
62+
matrix.template malloc<data::CPU>();
7163
}
64+
std::uniform_int_distribution<int> distribution(lower ? 1 : 0, upper ? 1 : 0);
65+
matrix.for_each([&](size_t, size_t, bool& element) { element = distribution(twister) != 0; });
7266
}
7367

68+
template<typename TYPE>
69+
class Initialiser {
70+
public:
71+
virtual void operator()(data::DenseMatrix<TYPE>& matrix, size_t expected_inputs) const {};
72+
};
73+
74+
template<typename TYPE>
75+
class FillInitialiser : public Initialiser<TYPE> {
76+
TYPE value;
77+
78+
public:
79+
FillInitialiser(TYPE v)
80+
: value(v) {}
81+
82+
void operator()(data::DenseMatrix<TYPE>& matrix, size_t expected_inputs) const {
83+
fill(matrix, value);
84+
}
85+
};
86+
87+
template<typename TYPE>
88+
class KaimingInitializer : public Initialiser<TYPE> {
89+
void operator()(data::DenseMatrix<TYPE>& matrix, size_t expected_inputs) const {
90+
kaiming(matrix, expected_inputs);
91+
}
92+
};
93+
7494
} // namespace math

0 commit comments

Comments
 (0)