clang 20.0.0git
__clang_hip_math.h
Go to the documentation of this file.
1/*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __CLANG_HIP_MATH_H__
10#define __CLANG_HIP_MATH_H__
11
12#if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13#error "This file is for HIP and OpenMP AMDGCN device compilation only."
14#endif
15
16// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
17// libcalls reach the link step instead of being eagerly replaced.
18#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
19
20#if !defined(__HIPCC_RTC__)
21#include <limits.h>
22#include <stdint.h>
23#ifdef __OPENMP_AMDGCN__
24#include <omp.h>
25#endif
26#endif // !defined(__HIPCC_RTC__)
27
28#pragma push_macro("__DEVICE__")
29
30#ifdef __OPENMP_AMDGCN__
31#define __DEVICE__ static inline __attribute__((always_inline, nothrow))
32#else
33#define __DEVICE__ static __device__ inline __attribute__((always_inline))
34#endif
35
36// Device library provides fast low precision and slow full-recision
37// implementations for some functions. Which one gets selected depends on
38// __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
39// -ffast-math or -fgpu-approx-transcendentals are in effect.
40#pragma push_macro("__FAST_OR_SLOW")
41#if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
42#define __FAST_OR_SLOW(fast, slow) fast
43#else
44#define __FAST_OR_SLOW(fast, slow) slow
45#endif
46
47// A few functions return bool type starting only in C++11.
48#pragma push_macro("__RETURN_TYPE")
49#ifdef __OPENMP_AMDGCN__
50#define __RETURN_TYPE int
51#else
52#if defined(__cplusplus)
53#define __RETURN_TYPE bool
54#else
55#define __RETURN_TYPE int
56#endif
57#endif // __OPENMP_AMDGCN__
58
59#if defined (__cplusplus) && __cplusplus < 201103L
60// emulate static_assert on type sizes
61template<bool>
62struct __compare_result{};
63template<>
64struct __compare_result<true> {
65 static const __device__ bool valid;
66};
67
69void __suppress_unused_warning(bool b){};
70template <unsigned int S, unsigned int T>
71__DEVICE__ void __static_assert_equal_size() {
72 __suppress_unused_warning(__compare_result<S == T>::valid);
73}
74
75#define __static_assert_type_size_equal(A, B) \
76 __static_assert_equal_size<A,B>()
77
78#else
79#define __static_assert_type_size_equal(A,B) \
80 static_assert((A) == (B), "")
81
82#endif
83
85uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
86 uint64_t __r = 0;
87 while (*__tagp != '\0') {
88 char __tmp = *__tagp;
89
90 if (__tmp >= '0' && __tmp <= '7')
91 __r = (__r * 8u) + __tmp - '0';
92 else
93 return 0;
94
95 ++__tagp;
96 }
97
98 return __r;
99}
100
102uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
103 uint64_t __r = 0;
104 while (*__tagp != '\0') {
105 char __tmp = *__tagp;
106
107 if (__tmp >= '0' && __tmp <= '9')
108 __r = (__r * 10u) + __tmp - '0';
109 else
110 return 0;
111
112 ++__tagp;
113 }
114
115 return __r;
116}
117
119uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
120 uint64_t __r = 0;
121 while (*__tagp != '\0') {
122 char __tmp = *__tagp;
123
124 if (__tmp >= '0' && __tmp <= '9')
125 __r = (__r * 16u) + __tmp - '0';
126 else if (__tmp >= 'a' && __tmp <= 'f')
127 __r = (__r * 16u) + __tmp - 'a' + 10;
128 else if (__tmp >= 'A' && __tmp <= 'F')
129 __r = (__r * 16u) + __tmp - 'A' + 10;
130 else
131 return 0;
132
133 ++__tagp;
134 }
135
136 return __r;
137}
138
140uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
141 if (*__tagp == '0') {
142 ++__tagp;
143
144 if (*__tagp == 'x' || *__tagp == 'X')
145 return __make_mantissa_base16(__tagp);
146 else
147 return __make_mantissa_base8(__tagp);
148 }
149
150 return __make_mantissa_base10(__tagp);
151}
152
153// BEGIN FLOAT
154
155// BEGIN INTRINSICS
156
158float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
159
161float __exp10f(float __x) {
162 const float __log2_10 = 0x1.a934f0p+1f;
163 return __builtin_amdgcn_exp2f(__log2_10 * __x);
164}
165
167float __expf(float __x) {
168 const float __log2_e = 0x1.715476p+0;
169 return __builtin_amdgcn_exp2f(__log2_e * __x);
170}
171
172#if defined OCML_BASIC_ROUNDED_OPERATIONS
174float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
176float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
178float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
180float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
181#else
183float __fadd_rn(float __x, float __y) { return __x + __y; }
184#endif
185
186#if defined OCML_BASIC_ROUNDED_OPERATIONS
188float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
190float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
192float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
194float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
195#else
197float __fdiv_rn(float __x, float __y) { return __x / __y; }
198#endif
199
201float __fdividef(float __x, float __y) { return __x / __y; }
202
203#if defined OCML_BASIC_ROUNDED_OPERATIONS
205float __fmaf_rd(float __x, float __y, float __z) {
206 return __ocml_fma_rtn_f32(__x, __y, __z);
207}
209float __fmaf_rn(float __x, float __y, float __z) {
210 return __ocml_fma_rte_f32(__x, __y, __z);
211}
213float __fmaf_ru(float __x, float __y, float __z) {
214 return __ocml_fma_rtp_f32(__x, __y, __z);
215}
217float __fmaf_rz(float __x, float __y, float __z) {
218 return __ocml_fma_rtz_f32(__x, __y, __z);
219}
220#else
222float __fmaf_rn(float __x, float __y, float __z) {
223 return __builtin_fmaf(__x, __y, __z);
224}
225#endif
226
227#if defined OCML_BASIC_ROUNDED_OPERATIONS
229float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
231float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
233float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
235float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
236#else
238float __fmul_rn(float __x, float __y) { return __x * __y; }
239#endif
240
241#if defined OCML_BASIC_ROUNDED_OPERATIONS
243float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
245float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
247float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
249float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
250#else
252float __frcp_rn(float __x) { return 1.0f / __x; }
253#endif
254
256float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
257
258#if defined OCML_BASIC_ROUNDED_OPERATIONS
260float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
262float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
264float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
266float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
267#else
269float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
270#endif
271
272#if defined OCML_BASIC_ROUNDED_OPERATIONS
274float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
276float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
278float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
280float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
281#else
283float __fsub_rn(float __x, float __y) { return __x - __y; }
284#endif
285
287float __log10f(float __x) { return __builtin_log10f(__x); }
288
290float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
291
293float __logf(float __x) { return __builtin_logf(__x); }
294
296float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
297
299float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
300
302void __sincosf(float __x, float *__sinptr, float *__cosptr) {
303 *__sinptr = __ocml_native_sin_f32(__x);
304 *__cosptr = __ocml_native_cos_f32(__x);
305}
306
308float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
309
311float __tanf(float __x) {
312 return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
313}
314// END INTRINSICS
315
316#if defined(__cplusplus)
318int abs(int __x) {
319 return __builtin_abs(__x);
320}
322long labs(long __x) {
323 return __builtin_labs(__x);
324}
326long long llabs(long long __x) {
327 return __builtin_llabs(__x);
328}
329#endif
330
332float acosf(float __x) { return __ocml_acos_f32(__x); }
333
335float acoshf(float __x) { return __ocml_acosh_f32(__x); }
336
338float asinf(float __x) { return __ocml_asin_f32(__x); }
339
341float asinhf(float __x) { return __ocml_asinh_f32(__x); }
342
344float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
345
347float atanf(float __x) { return __ocml_atan_f32(__x); }
348
350float atanhf(float __x) { return __ocml_atanh_f32(__x); }
351
353float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
354
356float ceilf(float __x) { return __builtin_ceilf(__x); }
357
359float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
360
362float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
363
365float coshf(float __x) { return __ocml_cosh_f32(__x); }
366
368float cospif(float __x) { return __ocml_cospi_f32(__x); }
369
371float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
372
374float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
375
377float erfcf(float __x) { return __ocml_erfc_f32(__x); }
378
380float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
381
383float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
384
386float erff(float __x) { return __ocml_erf_f32(__x); }
387
389float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
390
392float exp10f(float __x) { return __ocml_exp10_f32(__x); }
393
395float exp2f(float __x) { return __builtin_exp2f(__x); }
396
398float expf(float __x) { return __builtin_expf(__x); }
399
401float expm1f(float __x) { return __ocml_expm1_f32(__x); }
402
404float fabsf(float __x) { return __builtin_fabsf(__x); }
405
407float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
408
410float fdividef(float __x, float __y) { return __x / __y; }
411
413float floorf(float __x) { return __builtin_floorf(__x); }
414
416float fmaf(float __x, float __y, float __z) {
417 return __builtin_fmaf(__x, __y, __z);
418}
419
421float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
422
424float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); }
425
427float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
428
430float frexpf(float __x, int *__nptr) {
431 return __builtin_frexpf(__x, __nptr);
432}
433
435float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
436
438int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
439
441__RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); }
442
444__RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); }
445
447__RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); }
448
450float j0f(float __x) { return __ocml_j0_f32(__x); }
451
453float j1f(float __x) { return __ocml_j1_f32(__x); }
454
456float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
457 // and the Miller & Brown algorithm
458 // for linear recurrences to get O(log n) steps, but it's unclear if
459 // it'd be beneficial in this case.
460 if (__n == 0)
461 return j0f(__x);
462 if (__n == 1)
463 return j1f(__x);
464
465 float __x0 = j0f(__x);
466 float __x1 = j1f(__x);
467 for (int __i = 1; __i < __n; ++__i) {
468 float __x2 = (2 * __i) / __x * __x1 - __x0;
469 __x0 = __x1;
470 __x1 = __x2;
471 }
472
473 return __x1;
474}
475
477float ldexpf(float __x, int __e) { return __builtin_amdgcn_ldexpf(__x, __e); }
478
480float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
481
483long long int llrintf(float __x) { return __builtin_rintf(__x); }
484
486long long int llroundf(float __x) { return __builtin_roundf(__x); }
487
489float log10f(float __x) { return __builtin_log10f(__x); }
490
492float log1pf(float __x) { return __ocml_log1p_f32(__x); }
493
495float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); }
496
498float logbf(float __x) { return __ocml_logb_f32(__x); }
499
501float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); }
502
504long int lrintf(float __x) { return __builtin_rintf(__x); }
505
507long int lroundf(float __x) { return __builtin_roundf(__x); }
508
510float modff(float __x, float *__iptr) {
511 float __tmp;
512#ifdef __OPENMP_AMDGCN__
513#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
514#endif
515 float __r =
516 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
517 *__iptr = __tmp;
518 return __r;
519}
520
522float nanf(const char *__tagp __attribute__((nonnull))) {
523 union {
524 float val;
525 struct ieee_float {
526 unsigned int mantissa : 22;
527 unsigned int quiet : 1;
528 unsigned int exponent : 8;
529 unsigned int sign : 1;
530 } bits;
531 } __tmp;
532 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
533
534 __tmp.bits.sign = 0u;
535 __tmp.bits.exponent = ~0u;
536 __tmp.bits.quiet = 1u;
537 __tmp.bits.mantissa = __make_mantissa(__tagp);
538
539 return __tmp.val;
540}
541
543float nearbyintf(float __x) { return __builtin_nearbyintf(__x); }
544
546float nextafterf(float __x, float __y) {
547 return __ocml_nextafter_f32(__x, __y);
548}
549
551float norm3df(float __x, float __y, float __z) {
552 return __ocml_len3_f32(__x, __y, __z);
553}
554
556float norm4df(float __x, float __y, float __z, float __w) {
557 return __ocml_len4_f32(__x, __y, __z, __w);
558}
559
561float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
562
564float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
565
567float normf(int __dim,
568 const float *__a) { // TODO: placeholder until OCML adds support.
569 float __r = 0;
570 while (__dim--) {
571 __r += __a[0] * __a[0];
572 ++__a;
573 }
574
575 return __builtin_sqrtf(__r);
576}
577
579float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
580
582float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
583
585float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
586
588float remainderf(float __x, float __y) {
589 return __ocml_remainder_f32(__x, __y);
590}
591
593float remquof(float __x, float __y, int *__quo) {
594 int __tmp;
595#ifdef __OPENMP_AMDGCN__
596#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
597#endif
598 float __r = __ocml_remquo_f32(
599 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
600 *__quo = __tmp;
601
602 return __r;
603}
604
606float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
607
609float rintf(float __x) { return __builtin_rintf(__x); }
610
612float rnorm3df(float __x, float __y, float __z) {
613 return __ocml_rlen3_f32(__x, __y, __z);
614}
615
617float rnorm4df(float __x, float __y, float __z, float __w) {
618 return __ocml_rlen4_f32(__x, __y, __z, __w);
619}
620
622float rnormf(int __dim,
623 const float *__a) { // TODO: placeholder until OCML adds support.
624 float __r = 0;
625 while (__dim--) {
626 __r += __a[0] * __a[0];
627 ++__a;
628 }
629
630 return __ocml_rsqrt_f32(__r);
631}
632
634float roundf(float __x) { return __builtin_roundf(__x); }
635
637float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
638
640float scalblnf(float __x, long int __n) {
641 return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n)
642 : __ocml_scalb_f32(__x, __n);
643}
644
646float scalbnf(float __x, int __n) { return __builtin_amdgcn_ldexpf(__x, __n); }
647
649__RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); }
650
652void sincosf(float __x, float *__sinptr, float *__cosptr) {
653 float __tmp;
654#ifdef __OPENMP_AMDGCN__
655#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
656#endif
657#ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
658 __sincosf(__x, __sinptr, __cosptr);
659#else
660 *__sinptr =
661 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
662 *__cosptr = __tmp;
663#endif
664}
665
667void sincospif(float __x, float *__sinptr, float *__cosptr) {
668 float __tmp;
669#ifdef __OPENMP_AMDGCN__
670#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
671#endif
672 *__sinptr = __ocml_sincospi_f32(
673 __x, (__attribute__((address_space(5))) float *)&__tmp);
674 *__cosptr = __tmp;
675}
676
678float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
679
681float sinhf(float __x) { return __ocml_sinh_f32(__x); }
682
684float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
685
687float sqrtf(float __x) { return __builtin_sqrtf(__x); }
688
690float tanf(float __x) { return __ocml_tan_f32(__x); }
691
693float tanhf(float __x) { return __ocml_tanh_f32(__x); }
694
696float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
697
699float truncf(float __x) { return __builtin_truncf(__x); }
700
702float y0f(float __x) { return __ocml_y0_f32(__x); }
703
705float y1f(float __x) { return __ocml_y1_f32(__x); }
706
708float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
709 // and the Miller & Brown algorithm
710 // for linear recurrences to get O(log n) steps, but it's unclear if
711 // it'd be beneficial in this case. Placeholder until OCML adds
712 // support.
713 if (__n == 0)
714 return y0f(__x);
715 if (__n == 1)
716 return y1f(__x);
717
718 float __x0 = y0f(__x);
719 float __x1 = y1f(__x);
720 for (int __i = 1; __i < __n; ++__i) {
721 float __x2 = (2 * __i) / __x * __x1 - __x0;
722 __x0 = __x1;
723 __x1 = __x2;
724 }
725
726 return __x1;
727}
728
729
730// END FLOAT
731
732// BEGIN DOUBLE
734double acos(double __x) { return __ocml_acos_f64(__x); }
735
737double acosh(double __x) { return __ocml_acosh_f64(__x); }
738
740double asin(double __x) { return __ocml_asin_f64(__x); }
741
743double asinh(double __x) { return __ocml_asinh_f64(__x); }
744
746double atan(double __x) { return __ocml_atan_f64(__x); }
747
749double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
750
752double atanh(double __x) { return __ocml_atanh_f64(__x); }
753
755double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
756
758double ceil(double __x) { return __builtin_ceil(__x); }
759
761double copysign(double __x, double __y) {
762 return __builtin_copysign(__x, __y);
763}
764
766double cos(double __x) { return __ocml_cos_f64(__x); }
767
769double cosh(double __x) { return __ocml_cosh_f64(__x); }
770
772double cospi(double __x) { return __ocml_cospi_f64(__x); }
773
775double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
776
778double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
779
781double erf(double __x) { return __ocml_erf_f64(__x); }
782
784double erfc(double __x) { return __ocml_erfc_f64(__x); }
785
787double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
788
790double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
791
793double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
794
796double exp(double __x) { return __ocml_exp_f64(__x); }
797
799double exp10(double __x) { return __ocml_exp10_f64(__x); }
800
802double exp2(double __x) { return __ocml_exp2_f64(__x); }
803
805double expm1(double __x) { return __ocml_expm1_f64(__x); }
806
808double fabs(double __x) { return __builtin_fabs(__x); }
809
811double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
812
814double floor(double __x) { return __builtin_floor(__x); }
815
817double fma(double __x, double __y, double __z) {
818 return __builtin_fma(__x, __y, __z);
819}
820
822double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); }
823
825double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); }
826
828double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
829
831double frexp(double __x, int *__nptr) {
832 return __builtin_frexp(__x, __nptr);
833}
834
836double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
837
839int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
840
842__RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); }
843
845__RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); }
846
848__RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); }
849
851double j0(double __x) { return __ocml_j0_f64(__x); }
852
854double j1(double __x) { return __ocml_j1_f64(__x); }
855
857double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
858 // and the Miller & Brown algorithm
859 // for linear recurrences to get O(log n) steps, but it's unclear if
860 // it'd be beneficial in this case. Placeholder until OCML adds
861 // support.
862 if (__n == 0)
863 return j0(__x);
864 if (__n == 1)
865 return j1(__x);
866
867 double __x0 = j0(__x);
868 double __x1 = j1(__x);
869 for (int __i = 1; __i < __n; ++__i) {
870 double __x2 = (2 * __i) / __x * __x1 - __x0;
871 __x0 = __x1;
872 __x1 = __x2;
873 }
874 return __x1;
875}
876
878double ldexp(double __x, int __e) { return __builtin_amdgcn_ldexp(__x, __e); }
879
881double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
882
884long long int llrint(double __x) { return __builtin_rint(__x); }
885
887long long int llround(double __x) { return __builtin_round(__x); }
888
890double log(double __x) { return __ocml_log_f64(__x); }
891
893double log10(double __x) { return __ocml_log10_f64(__x); }
894
896double log1p(double __x) { return __ocml_log1p_f64(__x); }
897
899double log2(double __x) { return __ocml_log2_f64(__x); }
900
902double logb(double __x) { return __ocml_logb_f64(__x); }
903
905long int lrint(double __x) { return __builtin_rint(__x); }
906
908long int lround(double __x) { return __builtin_round(__x); }
909
911double modf(double __x, double *__iptr) {
912 double __tmp;
913#ifdef __OPENMP_AMDGCN__
914#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
915#endif
916 double __r =
917 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
918 *__iptr = __tmp;
919
920 return __r;
921}
922
924double nan(const char *__tagp) {
925#if !_WIN32
926 union {
927 double val;
928 struct ieee_double {
929 uint64_t mantissa : 51;
930 uint32_t quiet : 1;
931 uint32_t exponent : 11;
932 uint32_t sign : 1;
933 } bits;
934 } __tmp;
935 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
936
937 __tmp.bits.sign = 0u;
938 __tmp.bits.exponent = ~0u;
939 __tmp.bits.quiet = 1u;
940 __tmp.bits.mantissa = __make_mantissa(__tagp);
941
942 return __tmp.val;
943#else
944 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
945 uint64_t __val = __make_mantissa(__tagp);
946 __val |= 0xFFF << 51;
947 return *reinterpret_cast<double *>(&__val);
948#endif
949}
950
952double nearbyint(double __x) { return __builtin_nearbyint(__x); }
953
955double nextafter(double __x, double __y) {
956 return __ocml_nextafter_f64(__x, __y);
957}
958
960double norm(int __dim,
961 const double *__a) { // TODO: placeholder until OCML adds support.
962 double __r = 0;
963 while (__dim--) {
964 __r += __a[0] * __a[0];
965 ++__a;
966 }
967
968 return __builtin_sqrt(__r);
969}
970
972double norm3d(double __x, double __y, double __z) {
973 return __ocml_len3_f64(__x, __y, __z);
974}
975
977double norm4d(double __x, double __y, double __z, double __w) {
978 return __ocml_len4_f64(__x, __y, __z, __w);
979}
980
982double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
983
985double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
986
988double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
989
991double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
992
994double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
995
997double remainder(double __x, double __y) {
998 return __ocml_remainder_f64(__x, __y);
999}
1000
1002double remquo(double __x, double __y, int *__quo) {
1003 int __tmp;
1004#ifdef __OPENMP_AMDGCN__
1005#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1006#endif
1007 double __r = __ocml_remquo_f64(
1008 __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
1009 *__quo = __tmp;
1010
1011 return __r;
1012}
1013
1015double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
1016
1018double rint(double __x) { return __builtin_rint(__x); }
1019
1021double rnorm(int __dim,
1022 const double *__a) { // TODO: placeholder until OCML adds support.
1023 double __r = 0;
1024 while (__dim--) {
1025 __r += __a[0] * __a[0];
1026 ++__a;
1027 }
1028
1029 return __ocml_rsqrt_f64(__r);
1030}
1031
1033double rnorm3d(double __x, double __y, double __z) {
1034 return __ocml_rlen3_f64(__x, __y, __z);
1035}
1036
1038double rnorm4d(double __x, double __y, double __z, double __w) {
1039 return __ocml_rlen4_f64(__x, __y, __z, __w);
1040}
1041
1043double round(double __x) { return __builtin_round(__x); }
1044
1046double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1047
1049double scalbln(double __x, long int __n) {
1050 return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n)
1051 : __ocml_scalb_f64(__x, __n);
1052}
1054double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); }
1055
1057__RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); }
1058
1060double sin(double __x) { return __ocml_sin_f64(__x); }
1061
1063void sincos(double __x, double *__sinptr, double *__cosptr) {
1064 double __tmp;
1065#ifdef __OPENMP_AMDGCN__
1066#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1067#endif
1068 *__sinptr = __ocml_sincos_f64(
1069 __x, (__attribute__((address_space(5))) double *)&__tmp);
1070 *__cosptr = __tmp;
1071}
1072
1074void sincospi(double __x, double *__sinptr, double *__cosptr) {
1075 double __tmp;
1076#ifdef __OPENMP_AMDGCN__
1077#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1078#endif
1079 *__sinptr = __ocml_sincospi_f64(
1080 __x, (__attribute__((address_space(5))) double *)&__tmp);
1081 *__cosptr = __tmp;
1082}
1083
1085double sinh(double __x) { return __ocml_sinh_f64(__x); }
1086
1088double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1089
1091double sqrt(double __x) { return __builtin_sqrt(__x); }
1092
1094double tan(double __x) { return __ocml_tan_f64(__x); }
1095
1097double tanh(double __x) { return __ocml_tanh_f64(__x); }
1098
1100double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1101
1103double trunc(double __x) { return __builtin_trunc(__x); }
1104
1106double y0(double __x) { return __ocml_y0_f64(__x); }
1107
1109double y1(double __x) { return __ocml_y1_f64(__x); }
1110
1112double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1113 // and the Miller & Brown algorithm
1114 // for linear recurrences to get O(log n) steps, but it's unclear if
1115 // it'd be beneficial in this case. Placeholder until OCML adds
1116 // support.
1117 if (__n == 0)
1118 return y0(__x);
1119 if (__n == 1)
1120 return y1(__x);
1121
1122 double __x0 = y0(__x);
1123 double __x1 = y1(__x);
1124 for (int __i = 1; __i < __n; ++__i) {
1125 double __x2 = (2 * __i) / __x * __x1 - __x0;
1126 __x0 = __x1;
1127 __x1 = __x2;
1128 }
1129
1130 return __x1;
1131}
1132
1133// BEGIN INTRINSICS
1134#if defined OCML_BASIC_ROUNDED_OPERATIONS
1136double __dadd_rd(double __x, double __y) {
1137 return __ocml_add_rtn_f64(__x, __y);
1138}
1140double __dadd_rn(double __x, double __y) {
1141 return __ocml_add_rte_f64(__x, __y);
1142}
1144double __dadd_ru(double __x, double __y) {
1145 return __ocml_add_rtp_f64(__x, __y);
1146}
1148double __dadd_rz(double __x, double __y) {
1149 return __ocml_add_rtz_f64(__x, __y);
1150}
1151#else
1153double __dadd_rn(double __x, double __y) { return __x + __y; }
1154#endif
1155
1156#if defined OCML_BASIC_ROUNDED_OPERATIONS
1158double __ddiv_rd(double __x, double __y) {
1159 return __ocml_div_rtn_f64(__x, __y);
1160}
1162double __ddiv_rn(double __x, double __y) {
1163 return __ocml_div_rte_f64(__x, __y);
1164}
1166double __ddiv_ru(double __x, double __y) {
1167 return __ocml_div_rtp_f64(__x, __y);
1168}
1170double __ddiv_rz(double __x, double __y) {
1171 return __ocml_div_rtz_f64(__x, __y);
1172}
1173#else
1175double __ddiv_rn(double __x, double __y) { return __x / __y; }
1176#endif
1177
1178#if defined OCML_BASIC_ROUNDED_OPERATIONS
1180double __dmul_rd(double __x, double __y) {
1181 return __ocml_mul_rtn_f64(__x, __y);
1182}
1184double __dmul_rn(double __x, double __y) {
1185 return __ocml_mul_rte_f64(__x, __y);
1186}
1188double __dmul_ru(double __x, double __y) {
1189 return __ocml_mul_rtp_f64(__x, __y);
1190}
1192double __dmul_rz(double __x, double __y) {
1193 return __ocml_mul_rtz_f64(__x, __y);
1194}
1195#else
1197double __dmul_rn(double __x, double __y) { return __x * __y; }
1198#endif
1199
1200#if defined OCML_BASIC_ROUNDED_OPERATIONS
1202double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1204double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1206double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1208double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1209#else
1211double __drcp_rn(double __x) { return 1.0 / __x; }
1212#endif
1213
1214#if defined OCML_BASIC_ROUNDED_OPERATIONS
1216double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1218double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1220double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1222double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1223#else
1225double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
1226#endif
1227
1228#if defined OCML_BASIC_ROUNDED_OPERATIONS
1230double __dsub_rd(double __x, double __y) {
1231 return __ocml_sub_rtn_f64(__x, __y);
1232}
1234double __dsub_rn(double __x, double __y) {
1235 return __ocml_sub_rte_f64(__x, __y);
1236}
1238double __dsub_ru(double __x, double __y) {
1239 return __ocml_sub_rtp_f64(__x, __y);
1240}
1242double __dsub_rz(double __x, double __y) {
1243 return __ocml_sub_rtz_f64(__x, __y);
1244}
1245#else
1247double __dsub_rn(double __x, double __y) { return __x - __y; }
1248#endif
1249
1250#if defined OCML_BASIC_ROUNDED_OPERATIONS
1252double __fma_rd(double __x, double __y, double __z) {
1253 return __ocml_fma_rtn_f64(__x, __y, __z);
1254}
1256double __fma_rn(double __x, double __y, double __z) {
1257 return __ocml_fma_rte_f64(__x, __y, __z);
1258}
1260double __fma_ru(double __x, double __y, double __z) {
1261 return __ocml_fma_rtp_f64(__x, __y, __z);
1262}
1264double __fma_rz(double __x, double __y, double __z) {
1265 return __ocml_fma_rtz_f64(__x, __y, __z);
1266}
1267#else
1269double __fma_rn(double __x, double __y, double __z) {
1270 return __builtin_fma(__x, __y, __z);
1271}
1272#endif
1273// END INTRINSICS
1274// END DOUBLE
1275
1276// C only macros
1277#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1278#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1279#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1280#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1281#define signbit(__x) \
1282 _Generic((__x), float : __signbitf, double : __signbit)(__x)
1283#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1284
1285#if defined(__cplusplus)
1286template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1287 return (__arg1 < __arg2) ? __arg1 : __arg2;
1288}
1289
1290template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1291 return (__arg1 > __arg2) ? __arg1 : __arg2;
1292}
1293
1294__DEVICE__ int min(int __arg1, int __arg2) {
1295 return (__arg1 < __arg2) ? __arg1 : __arg2;
1296}
1297__DEVICE__ int max(int __arg1, int __arg2) {
1298 return (__arg1 > __arg2) ? __arg1 : __arg2;
1299}
1300
1302float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
1303
1305double max(double __x, double __y) { return __builtin_fmax(__x, __y); }
1306
1308float min(float __x, float __y) { return __builtin_fminf(__x, __y); }
1309
1311double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
1312
1313#if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1314__host__ inline static int min(int __arg1, int __arg2) {
1315 return __arg1 < __arg2 ? __arg1 : __arg2;
1316}
1317
1318__host__ inline static int max(int __arg1, int __arg2) {
1319 return __arg1 > __arg2 ? __arg1 : __arg2;
1320}
1321#endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1322#endif
1323
1324#pragma pop_macro("__DEVICE__")
1325#pragma pop_macro("__RETURN_TYPE")
1326#pragma pop_macro("__FAST_OR_SLOW")
1327
1328#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
1329#endif // __CLANG_HIP_MATH_H__
__DEVICE__ long long abs(long long __n)
__DEVICE__ float __fsqrt_rd(float __a)
__DEVICE__ float __fdiv_rd(float __a, float __b)
__DEVICE__ double __dsub_ru(double __a, double __b)
__DEVICE__ double __drcp_ru(double __a)
__DEVICE__ float __frcp_rz(float __a)
__DEVICE__ float __fmul_ru(float __a, float __b)
__DEVICE__ double __dsub_rd(double __a, double __b)
__DEVICE__ float __frcp_ru(float __a)
__DEVICE__ float __frcp_rd(float __a)
__DEVICE__ double __dmul_ru(double __a, double __b)
__DEVICE__ float __fmaf_ru(float __a, float __b, float __c)
__DEVICE__ double __fma_rz(double __a, double __b, double __c)
__DEVICE__ double __fma_rd(double __a, double __b, double __c)
__DEVICE__ double __dmul_rd(double __a, double __b)
__DEVICE__ double __ddiv_ru(double __a, double __b)
__DEVICE__ double __ddiv_rd(double __a, double __b)
__DEVICE__ double __dadd_ru(double __a, double __b)
__DEVICE__ float __fmul_rd(float __a, float __b)
__DEVICE__ float __fsub_rd(float __a, float __b)
__DEVICE__ float __fsub_rz(float __a, float __b)
__DEVICE__ double __fma_ru(double __a, double __b, double __c)
__DEVICE__ double __dsqrt_ru(double __a)
__DEVICE__ float __fsqrt_rz(float __a)
__DEVICE__ double __dsub_rz(double __a, double __b)
__DEVICE__ float __fadd_rd(float __a, float __b)
__DEVICE__ float __fmul_rz(float __a, float __b)
__DEVICE__ float __fadd_rz(float __a, float __b)
__DEVICE__ double __dsqrt_rd(double __a)
__DEVICE__ float __fmaf_rd(float __a, float __b, float __c)
__DEVICE__ double __dadd_rd(double __a, double __b)
__DEVICE__ double __dsqrt_rz(double __a)
__DEVICE__ double __drcp_rd(double __a)
__DEVICE__ float __fdiv_rz(float __a, float __b)
__DEVICE__ float __fmaf_rz(float __a, float __b, float __c)
__DEVICE__ double __drcp_rz(double __a)
__DEVICE__ float __fsub_ru(float __a, float __b)
__DEVICE__ double __dmul_rz(double __a, double __b)
__DEVICE__ float __fsqrt_ru(float __a)
__DEVICE__ float __fadd_ru(float __a, float __b)
__DEVICE__ float __fdiv_ru(float __a, float __b)
__DEVICE__ double __ddiv_rz(double __a, double __b)
__DEVICE__ double __dadd_rz(double __a, double __b)
__DEVICE__ long labs(long __a)
__DEVICE__ long long llabs(long long __a)
__DEVICE__ int min(int __a, int __b)
__DEVICE__ int max(int __a, int __b)
__device__ double __ocml_i0_f64(double)
__device__ float __ocml_j1_f32(float)
__device__ double __ocml_remquo_f64(double, double, __attribute__((address_space(5))) int *)
__device__ double __ocml_modf_f64(double, __attribute__((address_space(5))) double *)
__device__ float __ocml_cospi_f32(float)
__device__ float __ocml_i0_f32(float)
__device__ double __ocml_lgamma_f64(double)
__device__ __2f16 b
__device__ float __ocml_sincos_f32(float, __attribute__((address_space(5))) float *)
__device__ float __ocml_remquo_f32(float, float, __attribute__((address_space(5))) int *)
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
__device__ float __ocml_y0_f32(float)
__device__ float __ocml_modf_f32(float, __attribute__((address_space(5))) float *)
__device__ float __ocml_i1_f32(float)
__device__ float __ocml_lgamma_f32(float)
__device__ double __ocml_sinpi_f64(double)
__device__ double __ocml_cospi_f64(double)
__device__ double __ocml_tgamma_f64(double)
__device__ float __ocml_tan_f32(float)
__device__ float __ocml_tgamma_f32(float)
__device__ double __ocml_sincos_f64(double, __attribute__((address_space(5))) double *)
__device__ float __ocml_sinpi_f32(float)
__device__ double __ocml_j1_f64(double)
__device__ double __ocml_y1_f64(double)
__device__ double __ocml_j0_f64(double)
__device__ float __ocml_cos_f32(float)
__device__ float __ocml_y1_f32(float)
__device__ float __ocml_j0_f32(float)
__device__ double __ocml_cos_f64(double)
__device__ double __ocml_i1_f64(double)
__device__ double __ocml_sin_f64(double)
__device__ double __ocml_sincospi_f64(double, __attribute__((address_space(5))) double *)
__device__ float __ocml_sin_f32(float)
__device__ float __ocml_sincospi_f32(float, __attribute__((address_space(5))) float *)
__device__ float __ocml_native_sin_f32(float)
__device__ float __ocml_native_cos_f32(float)
__device__ double __ocml_y0_f64(double)
__device__ double __ocml_tan_f64(double)
__DEVICE__ double __dsub_rn(double __x, double __y)
__DEVICE__ __RETURN_TYPE __isinff(float __x)
__DEVICE__ float sinpif(float __x)
__DEVICE__ float tanf(float __x)
__DEVICE__ float log2f(float __x)
__DEVICE__ float y0f(float __x)
__DEVICE__ float tanhf(float __x)
__DEVICE__ float coshf(float __x)
__DEVICE__ float log10f(float __x)
__DEVICE__ float j1f(float __x)
__DEVICE__ __RETURN_TYPE __finitef(float __x)
__DEVICE__ float ldexpf(float __x, int __e)
__DEVICE__ long long int llroundf(float __x)
__DEVICE__ double rhypot(double __x, double __y)
__DEVICE__ double normcdfinv(double __x)
__DEVICE__ double norm3d(double __x, double __y, double __z)
__DEVICE__ float truncf(float __x)
__DEVICE__ float remainderf(float __x, float __y)
__DEVICE__ float fabsf(float __x)
__DEVICE__ float __fdiv_rn(float __x, float __y)
__DEVICE__ float scalbnf(float __x, int __n)
__DEVICE__ float cyl_bessel_i0f(float __x)
__DEVICE__ float nanf(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float lgammaf(float __x)
__DEVICE__ float cospif(float __x)
__DEVICE__ __RETURN_TYPE __signbitf(float __x)
__DEVICE__ double __dsqrt_rn(double __x)
__DEVICE__ float frexpf(float __x, int *__nptr)
__DEVICE__ float tgammaf(float __x)
__DEVICE__ float __sinf(float __x)
__DEVICE__ float erfinvf(float __x)
__DEVICE__ float modff(float __x, float *__iptr)
__DEVICE__ double erfinv(double __x)
__DEVICE__ float expm1f(float __x)
__DEVICE__ float sinhf(float __x)
__DEVICE__ double j0(double __x)
__DEVICE__ float y1f(float __x)
__DEVICE__ float acosf(float __x)
__DEVICE__ float fmaf(float __x, float __y, float __z)
__DEVICE__ float cyl_bessel_i1f(float __x)
__DEVICE__ float fmodf(float __x, float __y)
__DEVICE__ float log1pf(float __x)
__DEVICE__ float atan2f(float __x, float __y)
__DEVICE__ float copysignf(float __x, float __y)
__DEVICE__ double j1(double __x)
__DEVICE__ __RETURN_TYPE __isnan(double __x)
__DEVICE__ float rnormf(int __dim, const float *__a)
__DEVICE__ float rnorm4df(float __x, float __y, float __z, float __w)
__DEVICE__ float __cosf(float __x)
__DEVICE__ float erff(float __x)
__DEVICE__ float atanf(float __x)
__DEVICE__ float rnorm3df(float __x, float __y, float __z)
__DEVICE__ double norm(int __dim, const double *__a)
__DEVICE__ float erfcxf(float __x)
__DEVICE__ float erfcinvf(float __x)
__DEVICE__ float asinf(float __x)
__DEVICE__ long int lroundf(float __x)
__DEVICE__ float __fdividef(float __x, float __y)
__DEVICE__ float __frsqrt_rn(float __x)
__DEVICE__ float __log2f(float __x)
__DEVICE__ float norm4df(float __x, float __y, float __z, float __w)
__DEVICE__ __RETURN_TYPE __isnanf(float __x)
__DEVICE__ uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull)))
__DEVICE__ double jn(int __n, double __x)
__DEVICE__ float __exp10f(float __x)
__DEVICE__ float __frcp_rn(float __x)
#define __DEVICE__
__DEVICE__ float ynf(int __n, float __x)
__DEVICE__ float powf(float __x, float __y)
__DEVICE__ float __fsub_rn(float __x, float __y)
__DEVICE__ double __dadd_rn(double __x, double __y)
__DEVICE__ float sinf(float __x)
__DEVICE__ float __tanf(float __x)
__DEVICE__ float remquof(float __x, float __y, int *__quo)
__DEVICE__ double normcdf(double __x)
__DEVICE__ float __fsqrt_rn(float __x)
__DEVICE__ float hypotf(float __x, float __y)
__DEVICE__ float __fmaf_rn(float __x, float __y, float __z)
__DEVICE__ void sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float exp10f(float __x)
__DEVICE__ double y1(double __x)
#define __FAST_OR_SLOW(fast, slow)
__DEVICE__ float fmaxf(float __x, float __y)
__DEVICE__ float fminf(float __x, float __y)
__DEVICE__ double erfcinv(double __x)
__DEVICE__ double powi(double __x, int __y)
__DEVICE__ float logf(float __x)
__DEVICE__ float __fadd_rn(float __x, float __y)
__DEVICE__ double cospi(double __x)
__DEVICE__ double rsqrt(double __x)
__DEVICE__ float erfcf(float __x)
__DEVICE__ float atanhf(float __x)
__DEVICE__ float asinhf(float __x)
__DEVICE__ float __expf(float __x)
__DEVICE__ double norm4d(double __x, double __y, double __z, double __w)
#define __RETURN_TYPE
__DEVICE__ float __logf(float __x)
__DEVICE__ double __fma_rn(double __x, double __y, double __z)
__DEVICE__ double nan(const char *__tagp)
__DEVICE__ double rnorm(int __dim, const double *__a)
__DEVICE__ float j0f(float __x)
__DEVICE__ float rsqrtf(float __x)
__DEVICE__ float jnf(int __n, float __x)
__DEVICE__ double sinpi(double __x)
__DEVICE__ float logbf(float __x)
__DEVICE__ double y0(double __x)
__DEVICE__ __RETURN_TYPE __finite(double __x)
__DEVICE__ void __sincosf(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double yn(int __n, double __x)
__DEVICE__ float rhypotf(float __x, float __y)
__DEVICE__ float exp2f(float __x)
__DEVICE__ double cyl_bessel_i0(double __x)
__DEVICE__ float powif(float __x, int __y)
__DEVICE__ double __ddiv_rn(double __x, double __y)
__DEVICE__ double cyl_bessel_i1(double __x)
__DEVICE__ float ceilf(float __x)
__DEVICE__ double rcbrt(double __x)
__DEVICE__ double rnorm3d(double __x, double __y, double __z)
__DEVICE__ float normcdfinvf(float __x)
__DEVICE__ float norm3df(float __x, float __y, float __z)
__DEVICE__ void sincos(double __x, double *__sinptr, double *__cosptr)
#define __static_assert_type_size_equal(A, B)
__DEVICE__ __RETURN_TYPE __signbit(double __x)
__DEVICE__ float fdimf(float __x, float __y)
__DEVICE__ double __dmul_rn(double __x, double __y)
__DEVICE__ float normf(int __dim, const float *__a)
__DEVICE__ float nearbyintf(float __x)
__DEVICE__ uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull)))
__DEVICE__ int ilogbf(float __x)
__DEVICE__ float floorf(float __x)
__DEVICE__ float sqrtf(float __x)
__DEVICE__ float roundf(float __x)
__DEVICE__ void sincospif(float __x, float *__sinptr, float *__cosptr)
__DEVICE__ double __drcp_rn(double __x)
__DEVICE__ long int lrintf(float __x)
__DEVICE__ float acoshf(float __x)
__DEVICE__ double modf(double __x, double *__iptr)
__DEVICE__ float cosf(float __x)
__DEVICE__ float expf(float __x)
__DEVICE__ float nextafterf(float __x, float __y)
__DEVICE__ double rnorm4d(double __x, double __y, double __z, double __w)
__DEVICE__ long long int llrintf(float __x)
__DEVICE__ double erfcx(double __x)
__DEVICE__ float fdividef(float __x, float __y)
__DEVICE__ float rcbrtf(float __x)
__DEVICE__ double exp10(double __x)
__DEVICE__ float __log10f(float __x)
__DEVICE__ float cbrtf(float __x)
__DEVICE__ float __fmul_rn(float __x, float __y)
__DEVICE__ void sincospi(double __x, double *__sinptr, double *__cosptr)
__DEVICE__ float scalblnf(float __x, long int __n)
__DEVICE__ __RETURN_TYPE __isinf(double __x)
__DEVICE__ float rintf(float __x)
__DEVICE__ float normcdff(float __x)
__DEVICE__ uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull)))
__DEVICE__ float __saturatef(float __x)
__DEVICE__ float __powf(float __x, float __y)
#define __host__
#define __device__
static __inline__ uint32_t uint32_t __y
Definition: arm_acle.h:130
static __inline__ void int __a
Definition: emmintrin.h:4079
#define INT_MAX
Definition: limits.h:50
const FunctionProtoType * T
float __ovld __cnfn sign(float)
Returns 1.0 if x > 0, -0.0 if x = -0.0, +0.0 if x = +0.0, or -1.0 if x < 0.
#define true
Definition: stdbool.h:25
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742