11 "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
16#ifndef __AVX10_2CONVERTINTRIN_H
17#define __AVX10_2CONVERTINTRIN_H
20#define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
22 __min_vector_width__(128)))
23#define __DEFAULT_FN_ATTRS256 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
25 __min_vector_width__(256)))
29 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
30 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)(-1));
34_mm_mask_cvtx2ps_ph(__m128h __W,
__mmask8 __U, __m128 __A, __m128 __B) {
35 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
36 (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (
__mmask8)__U);
40_mm_maskz_cvtx2ps_ph(
__mmask8 __U, __m128 __A, __m128 __B) {
41 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
42 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (
__mmask8)__U);
47 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
48 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)(-1),
53_mm256_mask_cvtx2ps_ph(__m256h __W,
__mmask16 __U, __m256 __A, __m256 __B) {
54 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
55 (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (
__mmask16)__U,
60_mm256_maskz_cvtx2ps_ph(
__mmask16 __U, __m256 __A, __m256 __B) {
61 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
62 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (
__mmask16)__U,
66#define _mm256_cvtx_round2ps_ph(A, B, R) \
67 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
68 (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \
69 (__mmask16)(-1), (const int)(R)))
71#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
72 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
73 (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R)))
75#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
76 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
77 (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \
78 (__mmask16)(U), (const int)(R)))
81_mm_cvtbiasph_pbf8(__m128i __A, __m128h __B) {
82 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
87_mm_mask_cvtbiasph_pbf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
88 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
89 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
93_mm_maskz_cvtbiasph_pbf8(
__mmask8 __U, __m128i __A, __m128h __B) {
94 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
100_mm256_cvtbiasph_pbf8(__m256i __A, __m256h __B) {
101 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
107 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
108 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
109 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
113_mm256_maskz_cvtbiasph_pbf8(
__mmask16 __U, __m256i __A, __m256h __B) {
114 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
120_mm_cvtbiassph_pbf8(__m128i __A, __m128h __B) {
121 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
126_mm_mask_cvtbiassph_pbf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
127 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
128 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
132_mm_maskz_cvtbiassph_pbf8(
__mmask8 __U, __m128i __A, __m128h __B) {
133 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
139_mm256_cvtbiassph_pbf8(__m256i __A, __m256h __B) {
140 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
146 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
147 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
148 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
152_mm256_maskz_cvtbiassph_pbf8(
__mmask16 __U, __m256i __A, __m256h __B) {
153 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
159_mm_cvtbiasph_phf8(__m128i __A, __m128h __B) {
160 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
165_mm_mask_cvtbiasph_phf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
166 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
167 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
171_mm_maskz_cvtbiasph_phf8(
__mmask8 __U, __m128i __A, __m128h __B) {
172 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
178_mm256_cvtbiasph_phf8(__m256i __A, __m256h __B) {
179 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
185 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
186 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
187 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
191_mm256_maskz_cvtbiasph_phf8(
__mmask16 __U, __m256i __A, __m256h __B) {
192 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
198_mm_cvtbiassph_phf8(__m128i __A, __m128h __B) {
199 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
204_mm_mask_cvtbiassph_phf8(__m128i __W,
__mmask8 __U, __m128i __A, __m128h __B) {
205 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
206 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (
__mmask8)__U);
210_mm_maskz_cvtbiassph_phf8(
__mmask8 __U, __m128i __A, __m128h __B) {
211 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
217_mm256_cvtbiassph_phf8(__m256i __A, __m256h __B) {
218 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
224 __m128i __W,
__mmask16 __U, __m256i __A, __m256h __B) {
225 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
226 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (
__mmask16)__U);
230_mm256_maskz_cvtbiassph_phf8(
__mmask16 __U, __m256i __A, __m256h __B) {
231 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
238 return (__m128i)__builtin_ia32_vcvtne2ph2bf8_128((__v8hf)(__A),
243_mm_mask_cvtne2ph_pbf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
244 return (__m128i)__builtin_ia32_selectb_128(
245 (
__mmask16)__U, (__v16qi)_mm_cvtne2ph_pbf8(__A, __B), (__v16qi)__W);
249_mm_maskz_cvtne2ph_pbf8(
__mmask16 __U, __m128h __A, __m128h __B) {
250 return (__m128i)__builtin_ia32_selectb_128(
251 (
__mmask16)__U, (__v16qi)_mm_cvtne2ph_pbf8(__A, __B),
256_mm256_cvtne2ph_pbf8(__m256h __A, __m256h __B) {
257 return (__m256i)__builtin_ia32_vcvtne2ph2bf8_256((__v16hf)(__A),
262 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
263 return (__m256i)__builtin_ia32_selectb_256(
264 (
__mmask16)__U, (__v32qi)_mm256_cvtne2ph_pbf8(__A, __B), (__v32qi)__W);
268_mm256_maskz_cvtne2ph_pbf8(
__mmask32 __U, __m256h __A, __m256h __B) {
269 return (__m256i)__builtin_ia32_selectb_256(
270 (
__mmask16)__U, (__v32qi)_mm256_cvtne2ph_pbf8(__A, __B),
275_mm_cvtnes2ph_pbf8(__m128h __A, __m128h __B) {
276 return (__m128i)__builtin_ia32_vcvtne2ph2bf8s_128((__v8hf)(__A),
281_mm_mask_cvtnes2ph_pbf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
282 return (__m128i)__builtin_ia32_selectb_128(
283 (
__mmask16)__U, (__v16qi)_mm_cvtnes2ph_pbf8(__A, __B), (__v16qi)__W);
287_mm_maskz_cvtnes2ph_pbf8(
__mmask16 __U, __m128h __A, __m128h __B) {
288 return (__m128i)__builtin_ia32_selectb_128(
289 (
__mmask16)__U, (__v16qi)_mm_cvtnes2ph_pbf8(__A, __B),
294_mm256_cvtnes2ph_pbf8(__m256h __A, __m256h __B) {
295 return (__m256i)__builtin_ia32_vcvtne2ph2bf8s_256((__v16hf)(__A),
300 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
301 return (__m256i)__builtin_ia32_selectb_256(
302 (
__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_pbf8(__A, __B), (__v32qi)__W);
306_mm256_maskz_cvtnes2ph_pbf8(
__mmask32 __U, __m256h __A, __m256h __B) {
307 return (__m256i)__builtin_ia32_selectb_256(
308 (
__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_pbf8(__A, __B),
314 return (__m128i)__builtin_ia32_vcvtne2ph2hf8_128((__v8hf)(__A),
319_mm_mask_cvtne2ph_phf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
320 return (__m128i)__builtin_ia32_selectb_128(
321 (
__mmask16)__U, (__v16qi)_mm_cvtne2ph_phf8(__A, __B), (__v16qi)__W);
325_mm_maskz_cvtne2ph_phf8(
__mmask16 __U, __m128h __A, __m128h __B) {
326 return (__m128i)__builtin_ia32_selectb_128(
327 (
__mmask16)__U, (__v16qi)_mm_cvtne2ph_phf8(__A, __B),
332_mm256_cvtne2ph_phf8(__m256h __A, __m256h __B) {
333 return (__m256i)__builtin_ia32_vcvtne2ph2hf8_256((__v16hf)(__A),
338 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
339 return (__m256i)__builtin_ia32_selectb_256(
340 (
__mmask16)__U, (__v32qi)_mm256_cvtne2ph_phf8(__A, __B), (__v32qi)__W);
344_mm256_maskz_cvtne2ph_phf8(
__mmask32 __U, __m256h __A, __m256h __B) {
345 return (__m256i)__builtin_ia32_selectb_256(
346 (
__mmask16)__U, (__v32qi)_mm256_cvtne2ph_phf8(__A, __B),
351_mm_cvtnes2ph_phf8(__m128h __A, __m128h __B) {
352 return (__m128i)__builtin_ia32_vcvtne2ph2hf8s_128((__v8hf)(__A),
357_mm_mask_cvtnes2ph_phf8(__m128i __W,
__mmask16 __U, __m128h __A, __m128h __B) {
358 return (__m128i)__builtin_ia32_selectb_128(
359 (
__mmask16)__U, (__v16qi)_mm_cvtnes2ph_phf8(__A, __B), (__v16qi)__W);
363_mm_maskz_cvtnes2ph_phf8(
__mmask16 __U, __m128h __A, __m128h __B) {
364 return (__m128i)__builtin_ia32_selectb_128(
365 (
__mmask16)__U, (__v16qi)_mm_cvtnes2ph_phf8(__A, __B),
370_mm256_cvtnes2ph_phf8(__m256h __A, __m256h __B) {
371 return (__m256i)__builtin_ia32_vcvtne2ph2hf8s_256((__v16hf)(__A),
376 __m256i __W,
__mmask32 __U, __m256h __A, __m256h __B) {
377 return (__m256i)__builtin_ia32_selectb_256(
378 (
__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_phf8(__A, __B), (__v32qi)__W);
382_mm256_maskz_cvtnes2ph_phf8(
__mmask32 __U, __m256h __A, __m256h __B) {
383 return (__m256i)__builtin_ia32_selectb_256(
384 (
__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_phf8(__A, __B),
389 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
390 (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (
__mmask8)-1);
394_mm_mask_cvtnehf8_ph(__m128h __W,
__mmask8 __U, __m128i __A) {
395 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
396 (__v16qi)__A, (__v8hf)(__m128h)__W, (
__mmask8)__U);
400_mm_maskz_cvtnehf8_ph(
__mmask8 __U, __m128i __A) {
401 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
402 (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (
__mmask8)__U);
406_mm256_cvtnehf8_ph(__m128i __A) {
407 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
408 (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (
__mmask16)-1);
412_mm256_mask_cvtnehf8_ph(__m256h __W,
__mmask16 __U, __m128i __A) {
413 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
414 (__v16qi)__A, (__v16hf)(__m256h)__W, (
__mmask16)__U);
418_mm256_maskz_cvtnehf8_ph(
__mmask16 __U, __m128i __A) {
419 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
420 (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (
__mmask16)__U);
424 return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask(
429_mm_mask_cvtneph_pbf8(__m128i __W,
__mmask8 __U, __m128h __A) {
430 return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask(
431 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
435_mm_maskz_cvtneph_pbf8(
__mmask8 __U, __m128h __A) {
436 return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask(
441_mm256_cvtneph_pbf8(__m256h __A) {
442 return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask(
447_mm256_mask_cvtneph_pbf8(__m128i __W,
__mmask16 __U, __m256h __A) {
448 return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask(
449 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
453_mm256_maskz_cvtneph_pbf8(
__mmask16 __U, __m256h __A) {
454 return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask(
459 return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask(
464_mm_mask_cvtnesph_pbf8(__m128i __W,
__mmask8 __U, __m128h __A) {
465 return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask(
466 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
470_mm_maskz_cvtnesph_pbf8(
__mmask8 __U, __m128h __A) {
471 return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask(
476_mm256_cvtnesph_pbf8(__m256h __A) {
477 return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask(
482_mm256_mask_cvtnesph_pbf8(__m128i __W,
__mmask16 __U, __m256h __A) {
483 return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask(
484 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
488_mm256_maskz_cvtnesph_pbf8(
__mmask16 __U, __m256h __A) {
489 return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask(
494 return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask(
499_mm_mask_cvtneph_phf8(__m128i __W,
__mmask8 __U, __m128h __A) {
500 return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask(
501 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
505_mm_maskz_cvtneph_phf8(
__mmask8 __U, __m128h __A) {
506 return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask(
511_mm256_cvtneph_phf8(__m256h __A) {
512 return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask(
517_mm256_mask_cvtneph_phf8(__m128i __W,
__mmask16 __U, __m256h __A) {
518 return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask(
519 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
523_mm256_maskz_cvtneph_phf8(
__mmask16 __U, __m256h __A) {
524 return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask(
529 return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask(
534_mm_mask_cvtnesph_phf8(__m128i __W,
__mmask8 __U, __m128h __A) {
535 return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask(
536 (__v8hf)__A, (__v16qi)(__m128i)__W, (
__mmask8)__U);
540_mm_maskz_cvtnesph_phf8(
__mmask8 __U, __m128h __A) {
541 return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask(
546_mm256_cvtnesph_phf8(__m256h __A) {
547 return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask(
552_mm256_mask_cvtnesph_phf8(__m128i __W,
__mmask16 __U, __m256h __A) {
553 return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask(
554 (__v16hf)__A, (__v16qi)(__m128i)__W, (
__mmask16)__U);
558_mm256_maskz_cvtnesph_phf8(
__mmask16 __U, __m256h __A) {
559 return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask(
568_mm_mask_cvtpbf8_ph(__m128h __S,
__mmask8 __U, __m128i __A) {
569 return _mm_castsi128_ph(
574_mm_maskz_cvtpbf8_ph(
__mmask8 __U, __m128i __A) {
583_mm256_mask_cvtpbf8_ph(__m256h __S,
__mmask8 __U, __m128i __A) {
584 return _mm256_castsi256_ph(
589_mm256_maskz_cvtpbf8_ph(
__mmask8 __U, __m128i __A) {
590 return _mm256_castsi256_ph(
594#undef __DEFAULT_FN_ATTRS128
595#undef __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V)
Sign-extends bytes from the 128-bit integer vector in __V and returns the 16-bit values in the corres...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count)
Shifts each 16-bit element of the 256-bit vector of [16 x i16] in __a left by __count bits,...
#define __DEFAULT_FN_ATTRS256
#define __DEFAULT_FN_ATTRS128
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...