Skip to content

Commit f5338a1

Browse files
committed
compile AVX and AVX2 intrinsic code in separate files. Cleanup use of USE_AVX and USE_AVX2 macros in favor of __AVX__ and __AVX2__
1 parent d96ad41 commit f5338a1

File tree

8 files changed

+84
-38
lines changed

8 files changed

+84
-38
lines changed

CMakeLists.txt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,18 @@ IF(NOT NO_GCC_EBX_FPIC_BUG)
116116
ENDIF(NOT NO_GCC_EBX_FPIC_BUG)
117117

118118

119-
FIND_PACKAGE(SSE)
119+
FIND_PACKAGE(SSE) # checks SSE, AVX and AVX2
120120
IF(C_SSE2_FOUND)
121121
SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}")
122122
ENDIF(C_SSE2_FOUND)
123123
IF(C_SSE3_FOUND)
124124
SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}")
125125
ENDIF(C_SSE3_FOUND)
126126
IF(C_AVX_FOUND)
127-
SET(CMAKE_C_FLAGS "${C_AVX_FLAGS} -DUSE_AVX ${CMAKE_C_FLAGS}")
127+
SET(CMAKE_C_FLAGS "${C_AVX_FLAGS} ${CMAKE_C_FLAGS}")
128128
ENDIF(C_AVX_FOUND)
129129
IF(C_AVX2_FOUND)
130-
SET(CMAKE_C_FLAGS "${C_AVX2_FLAGS} -DUSE_AVX2 ${CMAKE_C_FLAGS}")
130+
SET(CMAKE_C_FLAGS "${C_AVX2_FLAGS} ${CMAKE_C_FLAGS}")
131131
ENDIF(C_AVX2_FOUND)
132132

133133
CHECK_C_SOURCE_RUNS("
@@ -208,7 +208,6 @@ ELSE(MSVC)
208208
ENDIF(MSVC)
209209

210210
IF(C_AVX_FOUND OR C_AVX2_FOUND)
211-
SET(CMAKE_C_FLAGS "-DUSE_AVX ${CMAKE_C_FLAGS}")
212211
IF(MSVC)
213212
SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS "/Ox /fp:fast /arch:AVX /std:c99")
214213
ELSE(MSVC)
@@ -225,6 +224,8 @@ SET(src
225224
THGeneral.c THHalf.c THAllocator.c THStorage.c THTensor.c THBlas.c THLapack.c
226225
THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c)
227226

227+
SET(src ${src} vector/AVX.c vector/AVX2.c)
228+
228229
SET(src ${src} ${hdr} ${simd})
229230

230231
#######################################################################
@@ -364,6 +365,11 @@ INSTALL(FILES
364365
THHalf.h
365366
DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH")
366367

368+
INSTALL(FILES
369+
vector/AVX.h
370+
vector/AVX2.h
371+
DESTINATION "${TH_INSTALL_INCLUDE_SUBDIR}/TH/vector")
372+
367373
INSTALL(FILES
368374
generic/THBlas.c
369375
generic/THBlas.h

THVector.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
#include "vector/SSE.c"
1616
#endif
1717

18-
#if defined(USE_AVX) || defined(USE_AVX2)
19-
#include "vector/AVX.c"
18+
#if defined(__AVX__) || defined(__AVX2__)
19+
#include "vector/AVX.h"
2020
#endif
2121

22-
#if defined(USE_AVX2)
23-
#include "vector/AVX2.c"
22+
#if defined(__AVX2__)
23+
#include "vector/AVX2.h"
2424
#endif
2525

2626
#include "generic/THVectorDefault.c"

generic/THVectorDispatch.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static FunctionDescription THVector_(fill_DISPATCHTABLE)[] = {
2626
#endif
2727
#endif
2828

29-
#if defined(USE_AVX)
29+
#if defined(__AVX__)
3030
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
3131
FUNCTION_IMPL(THVector_(fill_AVX), SIMDExtension_AVX),
3232
#endif
@@ -52,13 +52,13 @@ static FunctionDescription THVector_(cadd_DISPATCHTABLE)[] = {
5252
#endif
5353
#endif
5454

55-
#if defined(USE_AVX2)
55+
#if defined(__AVX2__)
5656
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
5757
FUNCTION_IMPL(THVector_(cadd_AVX2), SIMDExtension_AVX2),
5858
#endif
5959
#endif
6060

61-
#if defined(USE_AVX)
61+
#if defined(__AVX__)
6262
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
6363
FUNCTION_IMPL(THVector_(cadd_AVX), SIMDExtension_AVX),
6464
#endif
@@ -91,7 +91,7 @@ static FunctionDescription THVector_(adds_DISPATCHTABLE)[] = {
9191
#endif
9292
#endif
9393

94-
#if defined(USE_AVX)
94+
#if defined(__AVX__)
9595
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
9696
FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX),
9797
#endif
@@ -119,7 +119,7 @@ static FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {
119119
#endif
120120
#endif
121121

122-
#if defined(USE_AVX)
122+
#if defined(__AVX__)
123123
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
124124
FUNCTION_IMPL(THVector_(cmul_AVX), SIMDExtension_AVX),
125125
#endif
@@ -152,7 +152,7 @@ static FunctionDescription THVector_(muls_DISPATCHTABLE)[] = {
152152
#endif
153153
#endif
154154

155-
#if defined(USE_AVX)
155+
#if defined(__AVX__)
156156
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
157157
FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX),
158158
#endif
@@ -179,7 +179,7 @@ static FunctionDescription THVector_(cdiv_DISPATCHTABLE)[] = {
179179
#endif
180180
#endif
181181

182-
#if defined(USE_AVX)
182+
#if defined(__AVX__)
183183
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
184184
FUNCTION_IMPL(THVector_(cdiv_AVX), SIMDExtension_AVX),
185185
#endif
@@ -206,7 +206,7 @@ static FunctionDescription THVector_(divs_DISPATCHTABLE)[] = {
206206
#endif
207207
#endif
208208

209-
#if defined(USE_AVX)
209+
#if defined(__AVX__)
210210
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
211211
FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX),
212212
#endif
@@ -227,7 +227,7 @@ void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) {
227227

228228
static void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT);
229229
static FunctionDescription THVector_(copy_DISPATCHTABLE)[] = {
230-
#if defined(USE_AVX)
230+
#if defined(__AVX__)
231231
#if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)
232232
FUNCTION_IMPL(THVector_(copy_AVX), SIMDExtension_AVX),
233233
#endif

generic/simd/convolve.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#if defined(USE_AVX)
1+
#if defined(__AVX__)
22

33
#ifdef _MSC_VER
44
#include <intrin.h>
@@ -113,7 +113,7 @@ void convolve_5x5_sse(float* output, float* input, float* kernel, long outRows,
113113
void convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);
114114

115115
void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols) {
116-
#if defined(USE_AVX)
116+
#if defined(__AVX__)
117117
int avx = haveCPUFeature(kCPUFeature_AVX);
118118
if (avx)
119119
{
@@ -124,4 +124,4 @@ void convolve_5x5(float* output, float* input, float* kernel, long outRows, long
124124
{
125125
convolve_5x5_sse(output, input, kernel, outRows, outCols, outCols, inCols);
126126
}
127-
}
127+
}

vector/AVX.c

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
#if defined(__AVX__)
12
#ifndef _MSC_VER
23
#include <x86intrin.h>
34
#else
45
#include <intrin.h>
56
#endif
67

7-
static void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {
8+
#include "AVX.h"
9+
10+
void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {
811
ptrdiff_t i;
912
ptrdiff_t off;
1013
for (i=0; i<=((n)-8); i+=8) {
@@ -17,7 +20,7 @@ static void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t
1720
}
1821
}
1922

20-
static void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
23+
void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {
2124
ptrdiff_t i;
2225
ptrdiff_t off;
2326
__m256d YMM0 = _mm256_set_pd(c, c, c, c);
@@ -33,7 +36,7 @@ static void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n
3336
}
3437
}
3538

36-
static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
39+
void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
3740
ptrdiff_t i;
3841
__m256d YMM0, YMM1, YMM2, YMM3;
3942
for (i=0; i<=((n)-8); i+=8) {
@@ -51,7 +54,7 @@ static void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y,
5154
}
5255
}
5356

54-
static void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
57+
void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
5558
ptrdiff_t i;
5659
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
5760
__m256d YMM0, YMM1;
@@ -68,7 +71,7 @@ static void THDoubleVector_divs_AVX(double *y, const double *x, const double c,
6871
}
6972
}
7073

71-
static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
74+
void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {
7275
ptrdiff_t i;
7376
__m256d YMM0, YMM1, YMM2, YMM3;
7477
for (i=0; i<=((n)-8); i+=8) {
@@ -86,7 +89,7 @@ static void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y,
8689
}
8790
}
8891

89-
static void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
92+
void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
9093
ptrdiff_t i;
9194
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
9295
__m256d YMM0, YMM1;
@@ -103,7 +106,7 @@ static void THDoubleVector_muls_AVX(double *y, const double *x, const double c,
103106
}
104107
}
105108

106-
static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
109+
void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
107110
ptrdiff_t i;
108111
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
109112
__m256d YMM0, YMM1, YMM2, YMM3;
@@ -119,7 +122,7 @@ static void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y,
119122
}
120123
}
121124

122-
static void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
125+
void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {
123126
ptrdiff_t i;
124127
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
125128
__m256d YMM0, YMM1;
@@ -136,7 +139,7 @@ static void THDoubleVector_adds_AVX(double *y, const double *x, const double c,
136139
}
137140
}
138141

139-
static void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {
142+
void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {
140143
ptrdiff_t i;
141144
ptrdiff_t off;
142145
for (i=0; i<=((n)-16); i+=16) {
@@ -149,7 +152,7 @@ static void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n)
149152
}
150153
}
151154

152-
static void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
155+
void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
153156
ptrdiff_t i;
154157
ptrdiff_t off;
155158
__m256 YMM0 = _mm256_set_ps(c, c, c, c, c, c, c, c);
@@ -165,7 +168,7 @@ static void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {
165168
}
166169
}
167170

168-
static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
171+
void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
169172
ptrdiff_t i;
170173
__m256 YMM0, YMM1, YMM2, YMM3;
171174
for (i=0; i<=((n)-16); i+=16) {
@@ -183,7 +186,7 @@ static void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, con
183186
}
184187
}
185188

186-
static void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
189+
void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
187190
ptrdiff_t i;
188191
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
189192
__m256 YMM0, YMM1;
@@ -200,7 +203,7 @@ static void THFloatVector_divs_AVX(float *y, const float *x, const float c, cons
200203
}
201204
}
202205

203-
static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
206+
void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {
204207
ptrdiff_t i;
205208
__m256 YMM0, YMM1, YMM2, YMM3;
206209
for (i=0; i<=((n)-16); i+=16) {
@@ -218,7 +221,7 @@ static void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, con
218221
}
219222
}
220223

221-
static void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
224+
void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
222225
ptrdiff_t i;
223226
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
224227
__m256 YMM0, YMM1;
@@ -235,7 +238,7 @@ static void THFloatVector_muls_AVX(float *y, const float *x, const float c, cons
235238
}
236239
}
237240

238-
static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
241+
void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
239242
ptrdiff_t i;
240243
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
241244
__m256 YMM0, YMM1, YMM2, YMM3;
@@ -251,7 +254,7 @@ static void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, con
251254
}
252255
}
253256

254-
static void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
257+
void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {
255258
ptrdiff_t i;
256259
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
257260
__m256 YMM0, YMM1;
@@ -267,3 +270,5 @@ static void THFloatVector_adds_AVX(float *y, const float *x, const float c, cons
267270
y[i] = x[i] + c;
268271
}
269272
}
273+
274+
#endif // defined(__AVX__)

vector/AVX.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#ifndef TH_AVX_H
2+
#define TH_AVX_H
3+
4+
#include <stddef.h>
5+
6+
void THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n);
7+
void THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n);
8+
void THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
9+
void THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
10+
void THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);
11+
void THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
12+
void THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
13+
void THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n);
14+
void THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n);
15+
void THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n);
16+
void THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
17+
void THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
18+
void THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);
19+
void THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
20+
void THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
21+
void THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n);
22+
23+
#endif

vector/AVX2.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
#if defined(__AVX2__)
12
#ifndef _MSC_VER
23
#include <x86intrin.h>
34
#else
45
#include <intrin.h>
56
#endif
7+
#include "AVX2.h"
68

7-
static void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
9+
void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {
810
ptrdiff_t i;
911
__m256d YMM15 = _mm256_set_pd(c, c, c, c);
1012
__m256d YMM0, YMM1, YMM2, YMM3;
@@ -23,7 +25,7 @@ static void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y
2325
}
2426
}
2527

26-
static void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
28+
void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {
2729
ptrdiff_t i;
2830
__m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);
2931
__m256 YMM0, YMM1, YMM2, YMM3;
@@ -42,3 +44,4 @@ static void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, co
4244
}
4345
}
4446

47+
#endif // defined(__AVX2__)

vector/AVX2.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#ifndef TH_AVX2_H
2+
#define TH_AVX2_H
3+
4+
#include <stddef.h>
5+
6+
void THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);
7+
void THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);
8+
9+
#endif

0 commit comments

Comments
 (0)