9#ifndef __CLANG_HIP_MATH_H__
10#define __CLANG_HIP_MATH_H__
12#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13#error "This file is for HIP and OpenMP AMDGCN device compilation only."
18#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
20#if !defined(__HIPCC_RTC__)
23#ifdef __OPENMP_AMDGCN__
28#pragma push_macro("__DEVICE__")
30#ifdef __OPENMP_AMDGCN__
31#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
33#define __DEVICE__ static __device__ inline __attribute__((always_inline))
40#pragma push_macro("__FAST_OR_SLOW")
41#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
42#define __FAST_OR_SLOW(fast, slow) fast
44#define __FAST_OR_SLOW(fast, slow) slow
48#pragma push_macro("__RETURN_TYPE")
49#ifdef __OPENMP_AMDGCN__
50#define __RETURN_TYPE int
52#if defined(__cplusplus)
53#define __RETURN_TYPE bool
55#define __RETURN_TYPE int
59#if defined (__cplusplus) && __cplusplus < 201103L
62struct __compare_result{};
64struct __compare_result<
true> {
69void __suppress_unused_warning(
bool b){};
70template <
unsigned int S,
unsigned int T>
72 __suppress_unused_warning(__compare_result<S == T>::valid);
75#define __static_assert_type_size_equal(A, B) \
76 __static_assert_equal_size<A,B>()
79#define __static_assert_type_size_equal(A,B) \
80 static_assert((A) == (B), "")
87 while (*__tagp !=
'\0') {
90 if (__tmp >=
'0' && __tmp <=
'7')
91 __r = (__r * 8u) + __tmp -
'0';
104 while (*__tagp !=
'\0') {
105 char __tmp = *__tagp;
107 if (__tmp >=
'0' && __tmp <=
'9')
108 __r = (__r * 10u) + __tmp -
'0';
121 while (*__tagp !=
'\0') {
122 char __tmp = *__tagp;
124 if (__tmp >=
'0' && __tmp <=
'9')
125 __r = (__r * 16u) + __tmp -
'0';
126 else if (__tmp >=
'a' && __tmp <=
'f')
127 __r = (__r * 16u) + __tmp -
'a' + 10;
128 else if (__tmp >=
'A' && __tmp <=
'F')
129 __r = (__r * 16u) + __tmp -
'A' + 10;
141 if (*__tagp ==
'0') {
144 if (*__tagp ==
'x' || *__tagp ==
'X')
162 const float __log2_10 = 0x1.a934f0p+1f;
163 return __builtin_amdgcn_exp2f(__log2_10 * __x);
168 const float __log2_e = 0x1.715476p+0;
169 return __builtin_amdgcn_exp2f(__log2_e * __x);
172#if defined OCML_BASIC_ROUNDED_OPERATIONS
174float __fadd_rd(
float __x,
float __y) {
return __ocml_add_rtn_f32(__x,
__y); }
176float __fadd_rn(
float __x,
float __y) {
return __ocml_add_rte_f32(__x,
__y); }
178float __fadd_ru(
float __x,
float __y) {
return __ocml_add_rtp_f32(__x,
__y); }
180float __fadd_rz(
float __x,
float __y) {
return __ocml_add_rtz_f32(__x,
__y); }
186#if defined OCML_BASIC_ROUNDED_OPERATIONS
188float __fdiv_rd(
float __x,
float __y) {
return __ocml_div_rtn_f32(__x,
__y); }
190float __fdiv_rn(
float __x,
float __y) {
return __ocml_div_rte_f32(__x,
__y); }
192float __fdiv_ru(
float __x,
float __y) {
return __ocml_div_rtp_f32(__x,
__y); }
194float __fdiv_rz(
float __x,
float __y) {
return __ocml_div_rtz_f32(__x,
__y); }
203#if defined OCML_BASIC_ROUNDED_OPERATIONS
206 return __ocml_fma_rtn_f32(__x,
__y, __z);
210 return __ocml_fma_rte_f32(__x,
__y, __z);
214 return __ocml_fma_rtp_f32(__x,
__y, __z);
218 return __ocml_fma_rtz_f32(__x,
__y, __z);
223 return __builtin_fmaf(__x,
__y, __z);
227#if defined OCML_BASIC_ROUNDED_OPERATIONS
229float __fmul_rd(
float __x,
float __y) {
return __ocml_mul_rtn_f32(__x,
__y); }
231float __fmul_rn(
float __x,
float __y) {
return __ocml_mul_rte_f32(__x,
__y); }
233float __fmul_ru(
float __x,
float __y) {
return __ocml_mul_rtp_f32(__x,
__y); }
235float __fmul_rz(
float __x,
float __y) {
return __ocml_mul_rtz_f32(__x,
__y); }
241#if defined OCML_BASIC_ROUNDED_OPERATIONS
243float __frcp_rd(
float __x) {
return __ocml_div_rtn_f32(1.0f, __x); }
245float __frcp_rn(
float __x) {
return __ocml_div_rte_f32(1.0f, __x); }
247float __frcp_ru(
float __x) {
return __ocml_div_rtp_f32(1.0f, __x); }
249float __frcp_rz(
float __x) {
return __ocml_div_rtz_f32(1.0f, __x); }
256float __frsqrt_rn(
float __x) {
return __builtin_amdgcn_rsqf(__x); }
258#if defined OCML_BASIC_ROUNDED_OPERATIONS
260float __fsqrt_rd(
float __x) {
return __ocml_sqrt_rtn_f32(__x); }
262float __fsqrt_rn(
float __x) {
return __ocml_sqrt_rte_f32(__x); }
264float __fsqrt_ru(
float __x) {
return __ocml_sqrt_rtp_f32(__x); }
266float __fsqrt_rz(
float __x) {
return __ocml_sqrt_rtz_f32(__x); }
269float __fsqrt_rn(
float __x) {
return __ocml_native_sqrt_f32(__x); }
272#if defined OCML_BASIC_ROUNDED_OPERATIONS
274float __fsub_rd(
float __x,
float __y) {
return __ocml_sub_rtn_f32(__x,
__y); }
276float __fsub_rn(
float __x,
float __y) {
return __ocml_sub_rte_f32(__x,
__y); }
278float __fsub_ru(
float __x,
float __y) {
return __ocml_sub_rtp_f32(__x,
__y); }
280float __fsub_rz(
float __x,
float __y) {
return __ocml_sub_rtz_f32(__x,
__y); }
287float __log10f(
float __x) {
return __builtin_log10f(__x); }
290float __log2f(
float __x) {
return __builtin_amdgcn_logf(__x); }
293float __logf(
float __x) {
return __builtin_logf(__x); }
299float __saturatef(
float __x) {
return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
302void __sincosf(
float __x,
float *__sinptr,
float *__cosptr) {
312 return __sinf(__x) * __builtin_amdgcn_rcpf(
__cosf(__x));
316#if defined(__cplusplus)
319 return __builtin_abs(__x);
323 return __builtin_labs(__x);
326long long llabs(
long long __x) {
327 return __builtin_llabs(__x);
332float acosf(
float __x) {
return __ocml_acos_f32(__x); }
335float acoshf(
float __x) {
return __ocml_acosh_f32(__x); }
338float asinf(
float __x) {
return __ocml_asin_f32(__x); }
341float asinhf(
float __x) {
return __ocml_asinh_f32(__x); }
344float atan2f(
float __x,
float __y) {
return __ocml_atan2_f32(__x,
__y); }
347float atanf(
float __x) {
return __ocml_atan_f32(__x); }
350float atanhf(
float __x) {
return __ocml_atanh_f32(__x); }
353float cbrtf(
float __x) {
return __ocml_cbrt_f32(__x); }
356float ceilf(
float __x) {
return __builtin_ceilf(__x); }
365float coshf(
float __x) {
return __ocml_cosh_f32(__x); }
377float erfcf(
float __x) {
return __ocml_erfc_f32(__x); }
380float erfcinvf(
float __x) {
return __ocml_erfcinv_f32(__x); }
383float erfcxf(
float __x) {
return __ocml_erfcx_f32(__x); }
386float erff(
float __x) {
return __ocml_erf_f32(__x); }
389float erfinvf(
float __x) {
return __ocml_erfinv_f32(__x); }
392float exp10f(
float __x) {
return __ocml_exp10_f32(__x); }
395float exp2f(
float __x) {
return __builtin_exp2f(__x); }
398float expf(
float __x) {
return __builtin_expf(__x); }
401float expm1f(
float __x) {
return __ocml_expm1_f32(__x); }
404float fabsf(
float __x) {
return __builtin_fabsf(__x); }
407float fdimf(
float __x,
float __y) {
return __ocml_fdim_f32(__x,
__y); }
413float floorf(
float __x) {
return __builtin_floorf(__x); }
417 return __builtin_fmaf(__x,
__y, __z);
421float fmaxf(
float __x,
float __y) {
return __builtin_fmaxf(__x,
__y); }
424float fminf(
float __x,
float __y) {
return __builtin_fminf(__x,
__y); }
427float fmodf(
float __x,
float __y) {
return __ocml_fmod_f32(__x,
__y); }
431 return __builtin_frexpf(__x, __nptr);
435float hypotf(
float __x,
float __y) {
return __ocml_hypot_f32(__x,
__y); }
438int ilogbf(
float __x) {
return __ocml_ilogb_f32(__x); }
456float jnf(
int __n,
float __x) {
465 float __x0 =
j0f(__x);
466 float __x1 =
j1f(__x);
467 for (
int __i = 1; __i < __n; ++__i) {
468 float __x2 = (2 * __i) / __x * __x1 - __x0;
477float ldexpf(
float __x,
int __e) {
return __builtin_amdgcn_ldexpf(__x, __e); }
483long long int llrintf(
float __x) {
return __builtin_rintf(__x); }
486long long int llroundf(
float __x) {
return __builtin_roundf(__x); }
489float log10f(
float __x) {
return __builtin_log10f(__x); }
492float log1pf(
float __x) {
return __ocml_log1p_f32(__x); }
498float logbf(
float __x) {
return __ocml_logb_f32(__x); }
504long int lrintf(
float __x) {
return __builtin_rintf(__x); }
507long int lroundf(
float __x) {
return __builtin_roundf(__x); }
510float modff(
float __x,
float *__iptr) {
512#ifdef __OPENMP_AMDGCN__
513#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
526 unsigned int mantissa : 22;
527 unsigned int quiet : 1;
528 unsigned int exponent : 8;
529 unsigned int sign : 1;
534 __tmp.bits.sign = 0u;
535 __tmp.bits.exponent = ~0u;
536 __tmp.bits.quiet = 1u;
543float nearbyintf(
float __x) {
return __builtin_nearbyintf(__x); }
547 return __ocml_nextafter_f32(__x,
__y);
552 return __ocml_len3_f32(__x,
__y, __z);
557 return __ocml_len4_f32(__x,
__y, __z, __w);
561float normcdff(
float __x) {
return __ocml_ncdf_f32(__x); }
575 return __builtin_sqrtf(__r);
579float powf(
float __x,
float __y) {
return __ocml_pow_f32(__x,
__y); }
582float powif(
float __x,
int __y) {
return __ocml_pown_f32(__x,
__y); }
585float rcbrtf(
float __x) {
return __ocml_rcbrt_f32(__x); }
589 return __ocml_remainder_f32(__x,
__y);
595#ifdef __OPENMP_AMDGCN__
596#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
609float rintf(
float __x) {
return __builtin_rintf(__x); }
613 return __ocml_rlen3_f32(__x,
__y, __z);
618 return __ocml_rlen4_f32(__x,
__y, __z, __w);
630 return __ocml_rsqrt_f32(__r);
634float roundf(
float __x) {
return __builtin_roundf(__x); }
637float rsqrtf(
float __x) {
return __ocml_rsqrt_f32(__x); }
641 return (__n <
INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n)
642 : __ocml_scalb_f32(__x, __n);
646float scalbnf(
float __x,
int __n) {
return __builtin_amdgcn_ldexpf(__x, __n); }
652void sincosf(
float __x,
float *__sinptr,
float *__cosptr) {
654#ifdef __OPENMP_AMDGCN__
655#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
657#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
667void sincospif(
float __x,
float *__sinptr,
float *__cosptr) {
669#ifdef __OPENMP_AMDGCN__
670#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
681float sinhf(
float __x) {
return __ocml_sinh_f32(__x); }
687float sqrtf(
float __x) {
return __builtin_sqrtf(__x); }
693float tanhf(
float __x) {
return __ocml_tanh_f32(__x); }
699float truncf(
float __x) {
return __builtin_truncf(__x); }
708float ynf(
int __n,
float __x) {
718 float __x0 =
y0f(__x);
719 float __x1 =
y1f(__x);
720 for (
int __i = 1; __i < __n; ++__i) {
721 float __x2 = (2 * __i) / __x * __x1 - __x0;
734double acos(
double __x) {
return __ocml_acos_f64(__x); }
737double acosh(
double __x) {
return __ocml_acosh_f64(__x); }
740double asin(
double __x) {
return __ocml_asin_f64(__x); }
743double asinh(
double __x) {
return __ocml_asinh_f64(__x); }
746double atan(
double __x) {
return __ocml_atan_f64(__x); }
749double atan2(
double __x,
double __y) {
return __ocml_atan2_f64(__x,
__y); }
752double atanh(
double __x) {
return __ocml_atanh_f64(__x); }
755double cbrt(
double __x) {
return __ocml_cbrt_f64(__x); }
758double ceil(
double __x) {
return __builtin_ceil(__x); }
762 return __builtin_copysign(__x,
__y);
769double cosh(
double __x) {
return __ocml_cosh_f64(__x); }
781double erf(
double __x) {
return __ocml_erf_f64(__x); }
784double erfc(
double __x) {
return __ocml_erfc_f64(__x); }
787double erfcinv(
double __x) {
return __ocml_erfcinv_f64(__x); }
790double erfcx(
double __x) {
return __ocml_erfcx_f64(__x); }
793double erfinv(
double __x) {
return __ocml_erfinv_f64(__x); }
796double exp(
double __x) {
return __ocml_exp_f64(__x); }
799double exp10(
double __x) {
return __ocml_exp10_f64(__x); }
802double exp2(
double __x) {
return __ocml_exp2_f64(__x); }
805double expm1(
double __x) {
return __ocml_expm1_f64(__x); }
808double fabs(
double __x) {
return __builtin_fabs(__x); }
811double fdim(
double __x,
double __y) {
return __ocml_fdim_f64(__x,
__y); }
814double floor(
double __x) {
return __builtin_floor(__x); }
817double fma(
double __x,
double __y,
double __z) {
818 return __builtin_fma(__x,
__y, __z);
822double fmax(
double __x,
double __y) {
return __builtin_fmax(__x,
__y); }
825double fmin(
double __x,
double __y) {
return __builtin_fmin(__x,
__y); }
828double fmod(
double __x,
double __y) {
return __ocml_fmod_f64(__x,
__y); }
831double frexp(
double __x,
int *__nptr) {
832 return __builtin_frexp(__x, __nptr);
836double hypot(
double __x,
double __y) {
return __ocml_hypot_f64(__x,
__y); }
839int ilogb(
double __x) {
return __ocml_ilogb_f64(__x); }
857double jn(
int __n,
double __x) {
867 double __x0 =
j0(__x);
868 double __x1 =
j1(__x);
869 for (
int __i = 1; __i < __n; ++__i) {
870 double __x2 = (2 * __i) / __x * __x1 - __x0;
878double ldexp(
double __x,
int __e) {
return __builtin_amdgcn_ldexp(__x, __e); }
884long long int llrint(
double __x) {
return __builtin_rint(__x); }
887long long int llround(
double __x) {
return __builtin_round(__x); }
890double log(
double __x) {
return __ocml_log_f64(__x); }
893double log10(
double __x) {
return __ocml_log10_f64(__x); }
896double log1p(
double __x) {
return __ocml_log1p_f64(__x); }
899double log2(
double __x) {
return __ocml_log2_f64(__x); }
902double logb(
double __x) {
return __ocml_logb_f64(__x); }
905long int lrint(
double __x) {
return __builtin_rint(__x); }
908long int lround(
double __x) {
return __builtin_round(__x); }
911double modf(
double __x,
double *__iptr) {
913#ifdef __OPENMP_AMDGCN__
914#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
924double nan(
const char *__tagp) {
929 uint64_t mantissa : 51;
931 uint32_t exponent : 11;
937 __tmp.bits.sign = 0u;
938 __tmp.bits.exponent = ~0u;
939 __tmp.bits.quiet = 1u;
946 __val |= 0xFFF << 51;
947 return *
reinterpret_cast<double *
>(&__val);
952double nearbyint(
double __x) {
return __builtin_nearbyint(__x); }
956 return __ocml_nextafter_f64(__x,
__y);
968 return __builtin_sqrt(__r);
973 return __ocml_len3_f64(__x,
__y, __z);
977double norm4d(
double __x,
double __y,
double __z,
double __w) {
978 return __ocml_len4_f64(__x,
__y, __z, __w);
982double normcdf(
double __x) {
return __ocml_ncdf_f64(__x); }
985double normcdfinv(
double __x) {
return __ocml_ncdfinv_f64(__x); }
988double pow(
double __x,
double __y) {
return __ocml_pow_f64(__x,
__y); }
991double powi(
double __x,
int __y) {
return __ocml_pown_f64(__x,
__y); }
994double rcbrt(
double __x) {
return __ocml_rcbrt_f64(__x); }
998 return __ocml_remainder_f64(__x,
__y);
1004#ifdef __OPENMP_AMDGCN__
1005#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1015double rhypot(
double __x,
double __y) {
return __ocml_rhypot_f64(__x,
__y); }
1018double rint(
double __x) {
return __builtin_rint(__x); }
1022 const double *
__a) {
1029 return __ocml_rsqrt_f64(__r);
1034 return __ocml_rlen3_f64(__x,
__y, __z);
1039 return __ocml_rlen4_f64(__x,
__y, __z, __w);
1043double round(
double __x) {
return __builtin_round(__x); }
1046double rsqrt(
double __x) {
return __ocml_rsqrt_f64(__x); }
1050 return (__n <
INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n)
1051 : __ocml_scalb_f64(__x, __n);
1054double scalbn(
double __x,
int __n) {
return __builtin_amdgcn_ldexp(__x, __n); }
1063void sincos(
double __x,
double *__sinptr,
double *__cosptr) {
1065#ifdef __OPENMP_AMDGCN__
1066#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1074void sincospi(
double __x,
double *__sinptr,
double *__cosptr) {
1076#ifdef __OPENMP_AMDGCN__
1077#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1085double sinh(
double __x) {
return __ocml_sinh_f64(__x); }
1091double sqrt(
double __x) {
return __builtin_sqrt(__x); }
1097double tanh(
double __x) {
return __ocml_tanh_f64(__x); }
1103double trunc(
double __x) {
return __builtin_trunc(__x); }
1112double yn(
int __n,
double __x) {
1122 double __x0 =
y0(__x);
1123 double __x1 =
y1(__x);
1124 for (
int __i = 1; __i < __n; ++__i) {
1125 double __x2 = (2 * __i) / __x * __x1 - __x0;
1134#if defined OCML_BASIC_ROUNDED_OPERATIONS
1137 return __ocml_add_rtn_f64(__x,
__y);
1141 return __ocml_add_rte_f64(__x,
__y);
1145 return __ocml_add_rtp_f64(__x,
__y);
1149 return __ocml_add_rtz_f64(__x,
__y);
1156#if defined OCML_BASIC_ROUNDED_OPERATIONS
1159 return __ocml_div_rtn_f64(__x,
__y);
1163 return __ocml_div_rte_f64(__x,
__y);
1167 return __ocml_div_rtp_f64(__x,
__y);
1171 return __ocml_div_rtz_f64(__x,
__y);
1178#if defined OCML_BASIC_ROUNDED_OPERATIONS
1181 return __ocml_mul_rtn_f64(__x,
__y);
1185 return __ocml_mul_rte_f64(__x,
__y);
1189 return __ocml_mul_rtp_f64(__x,
__y);
1193 return __ocml_mul_rtz_f64(__x,
__y);
1200#if defined OCML_BASIC_ROUNDED_OPERATIONS
1202double __drcp_rd(
double __x) {
return __ocml_div_rtn_f64(1.0, __x); }
1204double __drcp_rn(
double __x) {
return __ocml_div_rte_f64(1.0, __x); }
1206double __drcp_ru(
double __x) {
return __ocml_div_rtp_f64(1.0, __x); }
1208double __drcp_rz(
double __x) {
return __ocml_div_rtz_f64(1.0, __x); }
1214#if defined OCML_BASIC_ROUNDED_OPERATIONS
1216double __dsqrt_rd(
double __x) {
return __ocml_sqrt_rtn_f64(__x); }
1218double __dsqrt_rn(
double __x) {
return __ocml_sqrt_rte_f64(__x); }
1220double __dsqrt_ru(
double __x) {
return __ocml_sqrt_rtp_f64(__x); }
1222double __dsqrt_rz(
double __x) {
return __ocml_sqrt_rtz_f64(__x); }
1228#if defined OCML_BASIC_ROUNDED_OPERATIONS
1231 return __ocml_sub_rtn_f64(__x,
__y);
1235 return __ocml_sub_rte_f64(__x,
__y);
1239 return __ocml_sub_rtp_f64(__x,
__y);
1243 return __ocml_sub_rtz_f64(__x,
__y);
1250#if defined OCML_BASIC_ROUNDED_OPERATIONS
1252double __fma_rd(
double __x,
double __y,
double __z) {
1253 return __ocml_fma_rtn_f64(__x,
__y, __z);
1256double __fma_rn(
double __x,
double __y,
double __z) {
1257 return __ocml_fma_rte_f64(__x,
__y, __z);
1260double __fma_ru(
double __x,
double __y,
double __z) {
1261 return __ocml_fma_rtp_f64(__x,
__y, __z);
1264double __fma_rz(
double __x,
double __y,
double __z) {
1265 return __ocml_fma_rtz_f64(__x,
__y, __z);
1270 return __builtin_fma(__x,
__y, __z);
1277#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1278#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1279#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1280#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1281#define signbit(__x) \
1282 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1285#if defined(__cplusplus)
1287 return (__arg1 < __arg2) ? __arg1 : __arg2;
1291 return (__arg1 > __arg2) ? __arg1 : __arg2;
1295 return (__arg1 < __arg2) ? __arg1 : __arg2;
1298 return (__arg1 > __arg2) ? __arg1 : __arg2;
1302float max(
float __x,
float __y) {
return __builtin_fmaxf(__x,
__y); }
1305double max(
double __x,
double __y) {
return __builtin_fmax(__x,
__y); }
1308float min(
float __x,
float __y) {
return __builtin_fminf(__x,
__y); }
1311double min(
double __x,
double __y) {
return __builtin_fmin(__x,
__y); }
1313#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1314__host__ inline static int min(
int __arg1,
int __arg2) {
1315 return __arg1 < __arg2 ? __arg1 : __arg2;
1318__host__ inline static int max(
int __arg1,
int __arg2) {
1319 return __arg1 > __arg2 ? __arg1 : __arg2;
1324#pragma pop_macro("__DEVICE__")
1325#pragma pop_macro("__RETURN_TYPE")
1326#pragma pop_macro("__FAST_OR_SLOW")
__DEVICE__ long long abs(long long __n)
__DEVICE__ float __fsqrt_rd(float __a)
__DEVICE__ float __fdiv_rd(float __a, float __b)
__DEVICE__ double __dsub_ru(double __a, double __b)
__DEVICE__ double __drcp_ru(double __a)
__DEVICE__ float __frcp_rz(float __a)
__DEVICE__ float __fmul_ru(float __a, float __b)
__DEVICE__ double __dsub_rd(double __a, double __b)
__DEVICE__ float __frcp_ru(float __a)
__DEVICE__ float __frcp_rd(float __a)
__DEVICE__ double __dmul_ru(double __a, double __b)
__DEVICE__ float __fmaf_ru(float __a, float __b, float __c)
__DEVICE__ double __fma_rz(double __a, double __b, double __c)
__DEVICE__ double __fma_rd(double __a, double __b, double __c)
__DEVICE__ double __dmul_rd(double __a, double __b)
__DEVICE__ double __ddiv_ru(double __a, double __b)
__DEVICE__ double __ddiv_rd(double __a, double __b)
__DEVICE__ double __dadd_ru(double __a, double __b)
__DEVICE__ float __fmul_rd(float __a, float __b)
__DEVICE__ float __fsub_rd(float __a, float __b)
__DEVICE__ float __fsub_rz(float __a, float __b)
__DEVICE__ double __fma_ru(double __a, double __b, double __c)
__DEVICE__ double __dsqrt_ru(double __a)
__DEVICE__ float __fsqrt_rz(float __a)
__DEVICE__ double __dsub_rz(double __a, double __b)
__DEVICE__ float __fadd_rd(float __a, float __b)
__DEVICE__ float __fmul_rz(float __a, float __b)
__DEVICE__ float __fadd_rz(float __a, float __b)
__DEVICE__ double __dsqrt_rd(double __a)
__DEVICE__ float __fmaf_rd(float __a, float __b, float __c)
__DEVICE__ double __dadd_rd(double __a, double __b)
__DEVICE__ double __dsqrt_rz(double __a)
__DEVICE__ double __drcp_rd(double __a)
__DEVICE__ float __fdiv_rz(float __a, float __b)
__DEVICE__ float __fmaf_rz(float __a, float __b, float __c)
__DEVICE__ double __drcp_rz(double __a)
__DEVICE__ float __fsub_ru(float __a, float __b)
__DEVICE__ double __dmul_rz(double __a, double __b)
__DEVICE__ float __fsqrt_ru(float __a)
__DEVICE__ float __fadd_ru(float __a, float __b)
__DEVICE__ float __fdiv_ru(float __a, float __b)
__DEVICE__ double __ddiv_rz(double __a, double __b)
__DEVICE__ double __dadd_rz(double __a, double __b)
__DEVICE__ long labs(long __a)
__DEVICE__ long long llabs(long long __a)
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
__device__ double __ocml_i0_f64(double)
__device__ float __ocml_j1_f32(float)
__device__ double __ocml_remquo_f64(double, double, __attribute__((address_space(5))) int *)
__device__ double __ocml_modf_f64(double, __attribute__((address_space(5))) double *)
__device__ float __ocml_cospi_f32(float)
__device__ float __ocml_i0_f32(float)
__device__ double __ocml_lgamma_f64(double)
__device__ float __ocml_sincos_f32(float, __attribute__((address_space(5))) float *)
__device__ float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int *)
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__device__ float __ocml_y0_f32(float)
__device__ float __ocml_modf_f32(float, __attribute__((address_space(5))) float *)
__device__ float __ocml_i1_f32(float)
__device__ float __ocml_lgamma_f32(float)
__device__ double __ocml_sinpi_f64(double)
__device__ double __ocml_cospi_f64(double)
__device__ double __ocml_tgamma_f64(double)
__device__ float __ocml_tan_f32(float)
__device__ float __ocml_tgamma_f32(float)
__device__ double __ocml_sincos_f64(double, __attribute__((address_space(5))) double *)
__device__ float __ocml_sinpi_f32(float)
__device__ double __ocml_j1_f64(double)
__device__ double __ocml_y1_f64(double)
__device__ double __ocml_j0_f64(double)
__device__ float __ocml_cos_f32(float)
__device__ float __ocml_y1_f32(float)
__device__ float __ocml_j0_f32(float)
__device__ double __ocml_cos_f64(double)
__device__ double __ocml_i1_f64(double)
__device__ double __ocml_sin_f64(double)
__device__ double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double *)
__device__ float __ocml_sin_f32(float)
__device__ float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float *)
__device__ float __ocml_native_sin_f32(float)
__device__ float __ocml_native_cos_f32(float)
__device__ double __ocml_y0_f64(double)
__device__ double __ocml_tan_f64(double)
__DEVICE__ double __dsub_rn(double __x, double __y)
__DEVICE__ __RETURN_TYPE __isinff(float __x)
__DEVICE__ float sinpif(float __x)
__DEVICE__ float tanf(float __x)
__DEVICE__ float log2f(float __x)
__DEVICE__ float y0f(float __x)
__DEVICE__ float tanhf(float __x)
__DEVICE__ float coshf(float __x)
__DEVICE__ float log10f(float __x)
__DEVICE__ float j1f(float __x)
__DEVICE__ __RETURN_TYPE __finitef(float __x)
__DEVICE__ float ldexpf(float __x, int __e)
__DEVICE__ long long int llroundf(float __x)
__DEVICE__ double rhypot(double __x, double __y)
__DEVICE__ double normcdfinv(double __x)
__DEVICE__ double norm3d(double __x, double __y, double __z)
__DEVICE__ float truncf(float __x)
__DEVICE__ float remainderf(float __x, float __y)
__DEVICE__ float fabsf(float __x)
__DEVICE__ float __fdiv_rn(float __x, float __y)
__DEVICE__ float scalbnf(float __x, int __n)
__DEVICE__ float cyl_bessel_i0f(float __x)
__DEVICE__ float nanf(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float lgammaf(float __x)
__DEVICE__ float cospif(float __x)
__DEVICE__ __RETURN_TYPE __signbitf(float __x)
__DEVICE__ double __dsqrt_rn(double __x)
__DEVICE__ float frexpf(float __x, int *__nptr)
__DEVICE__ float tgammaf(float __x)
__DEVICE__ float __sinf(float __x)
__DEVICE__ float erfinvf(float __x)
__DEVICE__ float modff(float __x, float *__iptr)
__DEVICE__ double erfinv(double __x)
__DEVICE__ float expm1f(float __x)
__DEVICE__ float sinhf(float __x)
__DEVICE__ double j0(double __x)
__DEVICE__ float y1f(float __x)
__DEVICE__ float acosf(float __x)
__DEVICE__ float fmaf(float __x, float __y, float __z)
__DEVICE__ float cyl_bessel_i1f(float __x)
__DEVICE__ float fmodf(float __x, float __y)
__DEVICE__ float log1pf(float __x)
__DEVICE__ float atan2f(float __x, float __y)
__DEVICE__ float copysignf(float __x, float __y)
__DEVICE__ double j1(double __x)
__DEVICE__ __RETURN_TYPE __isnan(double __x)
__DEVICE__ float rnormf(int __dim, const float *__a)
__DEVICE__ float rnorm4df(float __x, float __y, float __z, float __w)
__DEVICE__ float __cosf(float __x)
__DEVICE__ float erff(float __x)
__DEVICE__ float atanf(float __x)
__DEVICE__ float rnorm3df(float __x, float __y, float __z)
__DEVICE__ double norm(int __dim, const double *__a)
__DEVICE__ float erfcxf(float __x)
__DEVICE__ float erfcinvf(float __x)
__DEVICE__ float asinf(float __x)
__DEVICE__ long int lroundf(float __x)
__DEVICE__ float __fdividef(float __x, float __y)
__DEVICE__ float __frsqrt_rn(float __x)
__DEVICE__ float __log2f(float __x)
__DEVICE__ float norm4df(float __x, float __y, float __z, float __w)
__DEVICE__ __RETURN_TYPE __isnanf(float __x)
__DEVICE__ uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull)))
__DEVICE__ double jn(int __n, double __x)
__DEVICE__ float __exp10f(float __x)
__DEVICE__ float __frcp_rn(float __x)
__DEVICE__ float ynf(int __n, float __x)
__DEVICE__ float powf(float __x, float __y)
__DEVICE__ float __fsub_rn(float __x, float __y)
__DEVICE__ double __dadd_rn(double __x, double __y)
__DEVICE__ float sinf(float __x)
__DEVICE__ float __tanf(float __x)
__DEVICE__ float remquof(float __x, float __y, int *__quo)
__DEVICE__ double normcdf(double __x)
__DEVICE__ float __fsqrt_rn(float __x)
__DEVICE__ float hypotf(float __x, float __y)
__DEVICE__ float __fmaf_rn(float __x, float __y, float __z)
__DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float exp10f(float __x)
__DEVICE__ double y1(double __x)
#define __FAST_OR_SLOW(fast, slow)
__DEVICE__ float fmaxf(float __x, float __y)
__DEVICE__ float fminf(float __x, float __y)
__DEVICE__ double erfcinv(double __x)
__DEVICE__ double powi(double __x, int __y)
__DEVICE__ float logf(float __x)
__DEVICE__ float __fadd_rn(float __x, float __y)
__DEVICE__ double cospi(double __x)
__DEVICE__ double rsqrt(double __x)
__DEVICE__ float erfcf(float __x)
__DEVICE__ float atanhf(float __x)
__DEVICE__ float asinhf(float __x)
__DEVICE__ float __expf(float __x)
__DEVICE__ double norm4d(double __x, double __y, double __z, double __w)
__DEVICE__ float __logf(float __x)
__DEVICE__ double __fma_rn(double __x, double __y, double __z)
__DEVICE__ double nan(const char *__tagp)
__DEVICE__ double rnorm(int __dim, const double *__a)
__DEVICE__ float j0f(float __x)
__DEVICE__ float rsqrtf(float __x)
__DEVICE__ float jnf(int __n, float __x)
__DEVICE__ double sinpi(double __x)
__DEVICE__ float logbf(float __x)
__DEVICE__ double y0(double __x)
__DEVICE__ __RETURN_TYPE __finite(double __x)
__DEVICE__ void __sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double yn(int __n, double __x)
__DEVICE__ float rhypotf(float __x, float __y)
__DEVICE__ float exp2f(float __x)
__DEVICE__ double cyl_bessel_i0(double __x)
__DEVICE__ float powif(float __x, int __y)
__DEVICE__ double __ddiv_rn(double __x, double __y)
__DEVICE__ double cyl_bessel_i1(double __x)
__DEVICE__ float ceilf(float __x)
__DEVICE__ double rcbrt(double __x)
__DEVICE__ double rnorm3d(double __x, double __y, double __z)
__DEVICE__ float normcdfinvf(float __x)
__DEVICE__ float norm3df(float __x, float __y, float __z)
__DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr)
#define __static_assert_type_size_equal(A, B)
__DEVICE__ __RETURN_TYPE __signbit(double __x)
__DEVICE__ float fdimf(float __x, float __y)
__DEVICE__ double __dmul_rn(double __x, double __y)
__DEVICE__ float normf(int __dim, const float *__a)
__DEVICE__ float nearbyintf(float __x)
__DEVICE__ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull)))
__DEVICE__ int ilogbf(float __x)
__DEVICE__ float floorf(float __x)
__DEVICE__ float sqrtf(float __x)
__DEVICE__ float roundf(float __x)
__DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double __drcp_rn(double __x)
__DEVICE__ long int lrintf(float __x)
__DEVICE__ float acoshf(float __x)
__DEVICE__ double modf(double __x, double *__iptr)
__DEVICE__ float cosf(float __x)
__DEVICE__ float expf(float __x)
__DEVICE__ float nextafterf(float __x, float __y)
__DEVICE__ double rnorm4d(double __x, double __y, double __z, double __w)
__DEVICE__ long long int llrintf(float __x)
__DEVICE__ double erfcx(double __x)
__DEVICE__ float fdividef(float __x, float __y)
__DEVICE__ float rcbrtf(float __x)
__DEVICE__ double exp10(double __x)
__DEVICE__ float __log10f(float __x)
__DEVICE__ float cbrtf(float __x)
__DEVICE__ float __fmul_rn(float __x, float __y)
__DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr)
__DEVICE__ float scalblnf(float __x, long int __n)
__DEVICE__ __RETURN_TYPE __isinf(double __x)
__DEVICE__ float rintf(float __x)
__DEVICE__ float normcdff(float __x)
__DEVICE__ uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float __saturatef(float __x)
__DEVICE__ float __powf(float __x, float __y)
static __inline__ uint32_t uint32_t __y
static __inline__ void int __a
const FunctionProtoType * T
float __ovld __cnfn sign(float)
Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x = +0.0, or -1.0 if x < 0.
#define scalbln(__x, __y)
#define copysign(__x, __y)
#define remquo(__x, __y, __z)
#define nextafter(__x, __y)
#define remainder(__x, __y)
#define fma(__x, __y, __z)