|
| 1 | +#define CATCH_CONFIG_MAIN |
| 2 | +#include "catch.hpp" |
| 3 | + |
| 4 | +#include "ATen/ATen.h" |
| 5 | +#include "ATen/cuda/NumericLimits.cuh" |
| 6 | +#include "cuda.h" |
| 7 | +#include "cuda_fp16.h" |
| 8 | +#include "cuda_runtime.h" |
| 9 | + |
| 10 | +#include <assert.h> |
| 11 | + |
| 12 | +using namespace at; |
| 13 | + |
| 14 | +__device__ void test(){ |
| 15 | + |
| 16 | + // test half construction and implicit conversions in device |
| 17 | + assert(Half(3) == Half(3.0f)); |
| 18 | + assert(static_cast<Half>(3.0f) == Half(3.0f)); |
| 19 | + // there is no float <=> __half implicit conversion |
| 20 | + assert(static_cast<Half>(3.0f) == 3.0f); |
| 21 | + |
| 22 | + __half a = __float2half(3.0f); |
| 23 | + __half b = __float2half(2.0f); |
| 24 | + __half c = a - Half(b); |
| 25 | + assert(static_cast<Half>(c) == Half(1.0)); |
| 26 | + |
| 27 | + // asserting if the functions used on |
| 28 | + // half types give almost equivalent results when using |
| 29 | + // functions on double. |
| 30 | + // The purpose of these asserts are to test the device side |
| 31 | + // half API for the common mathematical functions. |
| 32 | + // Note: When calling std math functions from device, don't |
| 33 | + // use the std namespace, but just "::" so that the function |
| 34 | + // gets resolved from nvcc math_functions.hpp |
| 35 | + |
| 36 | + float threshold = 0.00001; |
| 37 | + assert(::abs(::lgamma(Half(10.0)) - ::lgamma(10.0f)) <= threshold); |
| 38 | + assert(::abs(::exp(Half(1.0)) - ::exp(1.0f)) <= threshold); |
| 39 | + assert(::abs(::log(Half(1.0)) - ::log(1.0f)) <= threshold); |
| 40 | + assert(::abs(::log10(Half(1000.0)) - ::log10(1000.0f)) <= threshold); |
| 41 | + assert(::abs(::log1p(Half(0.0)) - ::log1p(0.0f)) <= threshold); |
| 42 | + assert(::abs(::log2(Half(1000.0)) - ::log2(1000.0f)) <= threshold); |
| 43 | + assert(::abs(::expm1(Half(1.0)) - ::expm1(1.0f)) <= threshold); |
| 44 | + assert(::abs(::cos(Half(0.0)) - ::cos(0.0f)) <= threshold); |
| 45 | + assert(::abs(::sin(Half(0.0)) - ::sin(0.0f)) <= threshold); |
| 46 | + assert(::abs(::sqrt(Half(100.0)) - ::sqrt(100.0f)) <= threshold); |
| 47 | + assert(::abs(::ceil(Half(2.4)) - ::ceil(2.4f)) <= threshold); |
| 48 | + assert(::abs(::floor(Half(2.7)) - ::floor(2.7f)) <= threshold); |
| 49 | + assert(::abs(::trunc(Half(2.7)) - ::trunc(2.7f)) <= threshold); |
| 50 | + assert(::abs(::acos(Half(-1.0)) - ::acos(-1.0f)) <= threshold); |
| 51 | + assert(::abs(::cosh(Half(1.0)) - ::cosh(1.0f)) <= threshold); |
| 52 | + assert(::abs(::acosh(Half(1.0)) - ::acosh(1.0f)) <= threshold); |
| 53 | + assert(::abs(::asin(Half(1.0)) - ::asin(1.0f)) <= threshold); |
| 54 | + assert(::abs(::sinh(Half(1.0)) - ::sinh(1.0f)) <= threshold); |
| 55 | + assert(::abs(::asinh(Half(1.0)) - ::asinh(1.0f)) <= threshold); |
| 56 | + assert(::abs(::tan(Half(0.0)) - ::tan(0.0f)) <= threshold); |
| 57 | + assert(::abs(::atan(Half(1.0)) - ::atan(1.0f)) <= threshold); |
| 58 | + assert(::abs(::tanh(Half(1.0)) - ::tanh(1.0f)) <= threshold); |
| 59 | + assert(::abs(::erf(Half(10.0)) - ::erf(10.0f)) <= threshold); |
| 60 | + assert(::abs(::erfc(Half(10.0)) - ::erfc(10.0f)) <= threshold); |
| 61 | + assert(::abs(::abs(Half(-3.0)) - ::abs(-3.0f)) <= threshold); |
| 62 | + assert(::abs(::round(Half(2.3)) - ::round(2.3f)) <= threshold); |
| 63 | + assert(::abs(::pow(Half(2.0), Half(10.0)) - ::pow(2.0f, 10.0f)) <= threshold); |
| 64 | + assert(::abs(::atan2(Half(7.0), Half(0.0)) - ::atan2(7.0f, 0.0f)) <= threshold); |
| 65 | + // note: can't use namespace on isnan and isinf in device code |
| 66 | + #ifdef _MSC_VER |
| 67 | + // Windows requires this explicit conversion. The reason is unclear |
| 68 | + // related issue with clang: https://reviews.llvm.org/D37906 |
| 69 | + assert(::abs(::isnan((float)Half(0.0)) - ::isnan(0.0f)) <= threshold); |
| 70 | + assert(::abs(::isinf((float)Half(0.0)) - ::isinf(0.0f)) <= threshold); |
| 71 | + #else |
| 72 | + assert(::abs(::isnan(Half(0.0)) - ::isnan(0.0f)) <= threshold); |
| 73 | + assert(::abs(::isinf(Half(0.0)) - ::isinf(0.0f)) <= threshold); |
| 74 | + #endif |
| 75 | +} |
| 76 | + |
| 77 | +__global__ void kernel(){ |
| 78 | + test(); |
| 79 | +} |
| 80 | + |
| 81 | +void launch_function(){ |
| 82 | + kernel<<<1,1>>>(); |
| 83 | +} |
| 84 | + |
| 85 | +TEST_CASE( "half common math functions tests in device", "[cuda]" ) { |
| 86 | + launch_function(); |
| 87 | + cudaError_t err = cudaDeviceSynchronize(); |
| 88 | + REQUIRE(err == cudaSuccess); |
| 89 | +} |
| 90 | + |
0 commit comments