|
11 | 11 | #include <caffe2/core/hip/common_hip.h> |
12 | 12 | #endif |
13 | 13 |
|
14 | | -#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) |
| 14 | +#if __CUDA_ARCH__ || __HIP_DEVICE_COMPILE__ |
15 | 15 | #define CONVERSIONS_DECL __host__ __device__ inline |
16 | 16 | #else |
17 | 17 | #define CONVERSIONS_DECL inline |
@@ -175,38 +175,28 @@ CONVERSIONS_DECL OUT To(const IN in) { |
175 | 175 | // explicit for fp16 |
176 | 176 | template <> |
177 | 177 | CONVERSIONS_DECL float16 To(const float in) { |
178 | | -#if __CUDA_ARCH__ |
| 178 | +#if __CUDA_ARCH__ && CUDA_VERSION >= 9000 |
179 | 179 | // hacky interface between C2 fp16 and CUDA |
180 | | -#if CUDA_VERSION >= 9000 |
181 | 180 | half rh = __float2half(in); |
182 | 181 | return halfToFloat16(rh); |
183 | | -#else |
| 182 | +#elif __CUDA_ARCH__ || __HIP_DEVICE_COMPILE__ |
184 | 183 | float16 ret; |
185 | 184 | ret.x = __float2half(in).x; |
186 | 185 | return ret; |
187 | | -#endif // CUDA_VERSION >= 9000 |
188 | | -#elif __HIP_DEVICE_COMPILE__ |
189 | | - float16 ret; |
190 | | - ret.x = __float2half(in); |
191 | | - return ret; |
192 | 186 | #else |
193 | 187 | return cpu_float2half_rn(in); |
194 | 188 | #endif |
195 | 189 | } |
196 | 190 |
|
197 | 191 | template <> |
198 | 192 | CONVERSIONS_DECL float To(const float16 in) { |
199 | | -#if __CUDA_ARCH__ |
200 | | -#if CUDA_VERSION >= 9000 |
| 193 | +#if __CUDA_ARCH__ && CUDA_VERSION >= 9000 |
201 | 194 | __half_raw tmp; |
202 | | -#else |
203 | | - __half tmp; |
204 | | -#endif |
205 | 195 | tmp.x = in.x; |
206 | 196 | return __half2float(tmp); |
207 | | -#elif __HIP_DEVICE_COMPILE__ |
| 197 | +#elif __CUDA_ARCH__ || __HIP_DEVICE_COMPILE__ |
208 | 198 | __half tmp; |
209 | | - tmp = in.x; |
| 199 | + tmp.x = in.x; |
210 | 200 | return __half2float(tmp); |
211 | 201 | #else |
212 | 202 | return cpu_half2float(in); |
|
0 commit comments