clang 20.0.0git
avx10_2_512bf16intrin.h
Go to the documentation of this file.
1/*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifdef __SSE2__
15
16#ifndef __AVX10_2_512BF16INTRIN_H
17#define __AVX10_2_512BF16INTRIN_H
18
19/* Define the default attributes for the functions in this file. */
20typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
21
22/* Define the default attributes for the functions in this file. */
23#define __DEFAULT_FN_ATTRS512 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
25 __min_vector_width__(512)))
26
27static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
28 return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
29}
30
31static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) {
32 return (__m512bh)__builtin_ia32_undef512();
33}
34
35static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) {
36 return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
37 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
38 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf};
39}
40
41static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh(
42 __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
43 __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
44 __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17,
45 __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22,
46 __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27,
47 __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
48 return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25,
49 bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17,
50 bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9,
51 bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1};
52}
53
54#define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
55 bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \
56 bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \
57 bf29, bf30, bf31, bf32) \
58 _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \
59 (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \
60 (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \
61 (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \
62 (bf3), (bf2), (bf1))
63
64static __inline__ __m512 __DEFAULT_FN_ATTRS512
65_mm512_castpbf16_ps(__m512bh __a) {
66 return (__m512)__a;
67}
68
69static __inline__ __m512d __DEFAULT_FN_ATTRS512
70_mm512_castpbf16_pd(__m512bh __a) {
71 return (__m512d)__a;
72}
73
74static __inline__ __m512i __DEFAULT_FN_ATTRS512
75_mm512_castpbf16_si512(__m512bh __a) {
76 return (__m512i)__a;
77}
78
79static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) {
80 return (__m512bh)__a;
81}
82
83static __inline__ __m512bh __DEFAULT_FN_ATTRS512
84_mm512_castpd_pbh(__m512d __a) {
85 return (__m512bh)__a;
86}
87
88static __inline__ __m512bh __DEFAULT_FN_ATTRS512
89_mm512_castsi512_pbh(__m512i __a) {
90 return (__m512bh)__a;
91}
92
93static __inline__ __m128bh __DEFAULT_FN_ATTRS512
94_mm512_castpbf16512_pbh128(__m512bh __a) {
95 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
96}
97
98static __inline__ __m256bh __DEFAULT_FN_ATTRS512
99_mm512_castpbf16512_pbh256(__m512bh __a) {
100 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
101 12, 13, 14, 15);
102}
103
104static __inline__ __m512bh __DEFAULT_FN_ATTRS512
105_mm512_castpbf16128_pbh512(__m128bh __a) {
106 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
108 -1, -1, -1, -1, -1, -1, -1, -1, -1);
109}
110
111static __inline__ __m512bh __DEFAULT_FN_ATTRS512
112_mm512_castpbf16256_pbh512(__m256bh __a) {
113 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
114 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
115 -1, -1, -1, -1, -1, -1, -1, -1);
116}
117
118static __inline__ __m512bh __DEFAULT_FN_ATTRS512
119_mm512_zextpbf16128_pbh512(__m128bh __a) {
120 return __builtin_shufflevector(
121 __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
122 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
123}
124
125static __inline__ __m512bh __DEFAULT_FN_ATTRS512
126_mm512_zextpbf16256_pbh512(__m256bh __a) {
127 return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3,
128 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
129 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
130 29, 30, 31);
131}
132
133static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) {
134 return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
135 (__m512i)__A);
136}
137
138static __inline__ __m512bh __DEFAULT_FN_ATTRS512
139_mm512_load_pbh(void const *__p) {
140 return *(const __m512bh *)__p;
141}
142
143static __inline__ __m512bh __DEFAULT_FN_ATTRS512
144_mm512_loadu_pbh(void const *__p) {
145 struct __loadu_pbh {
146 __m512bh_u __v;
147 } __attribute__((__packed__, __may_alias__));
148 return ((const struct __loadu_pbh *)__p)->__v;
149}
150
151static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P,
152 __m512bh __A) {
153 *(__m512bh *)__P = __A;
154}
155
156static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P,
157 __m512bh __A) {
158 struct __storeu_pbh {
159 __m512bh_u __v;
160 } __attribute__((__packed__, __may_alias__));
161 ((struct __storeu_pbh *)__P)->__v = __A;
162}
163
164static __inline__ __m512bh __DEFAULT_FN_ATTRS512
165_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
166 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W,
167 (__v32bf)__A);
168}
169
170static __inline__ __m512bh __DEFAULT_FN_ATTRS512
171_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
172 return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
173 (__v32hi)__B);
174}
175
176static __inline__ __m512bh __DEFAULT_FN_ATTRS512
177_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
178 return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
179}
180
181static __inline__ __m512bh __DEFAULT_FN_ATTRS512
182_mm512_addne_pbh(__m512bh __A, __m512bh __B) {
183 return (__m512bh)((__v32bf)__A + (__v32bf)__B);
184}
185
186static __inline__ __m512bh __DEFAULT_FN_ATTRS512
187_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
188 return (__m512bh)__builtin_ia32_selectpbf_512(
189 (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), (__v32bf)__W);
190}
191
192static __inline__ __m512bh __DEFAULT_FN_ATTRS512
193_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
194 return (__m512bh)__builtin_ia32_selectpbf_512(
195 (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B),
196 (__v32bf)_mm512_setzero_pbh());
197}
198
199static __inline__ __m512bh __DEFAULT_FN_ATTRS512
200_mm512_subne_pbh(__m512bh __A, __m512bh __B) {
201 return (__m512bh)((__v32bf)__A - (__v32bf)__B);
202}
203
204static __inline__ __m512bh __DEFAULT_FN_ATTRS512
205_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
206 return (__m512bh)__builtin_ia32_selectpbf_512(
207 (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), (__v32bf)__W);
208}
209
210static __inline__ __m512bh __DEFAULT_FN_ATTRS512
211_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
212 return (__m512bh)__builtin_ia32_selectpbf_512(
213 (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B),
214 (__v32bf)_mm512_setzero_pbh());
215}
216
217static __inline__ __m512bh __DEFAULT_FN_ATTRS512
218_mm512_mulne_pbh(__m512bh __A, __m512bh __B) {
219 return (__m512bh)((__v32bf)__A * (__v32bf)__B);
220}
221
222static __inline__ __m512bh __DEFAULT_FN_ATTRS512
223_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
224 return (__m512bh)__builtin_ia32_selectpbf_512(
225 (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), (__v32bf)__W);
226}
227
228static __inline__ __m512bh __DEFAULT_FN_ATTRS512
229_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
230 return (__m512bh)__builtin_ia32_selectpbf_512(
231 (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B),
232 (__v32bf)_mm512_setzero_pbh());
233}
234
235static __inline__ __m512bh __DEFAULT_FN_ATTRS512
236_mm512_divne_pbh(__m512bh __A, __m512bh __B) {
237 return (__m512bh)((__v32bf)__A / (__v32bf)__B);
238}
239
240static __inline__ __m512bh __DEFAULT_FN_ATTRS512
241_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
242 return (__m512bh)__builtin_ia32_selectpbf_512(
243 (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), (__v32bf)__W);
244}
245
246static __inline__ __m512bh __DEFAULT_FN_ATTRS512
247_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
248 return (__m512bh)__builtin_ia32_selectpbf_512(
249 (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B),
250 (__v32bf)_mm512_setzero_pbh());
251}
252
253static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A,
254 __m512bh __B) {
255 return (__m512bh)__builtin_ia32_vmaxpbf16512((__v32bf)__A, (__v32bf)__B);
256}
257
258static __inline__ __m512bh __DEFAULT_FN_ATTRS512
259_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
260 return (__m512bh)__builtin_ia32_selectpbf_512(
261 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W);
262}
263
264static __inline__ __m512bh __DEFAULT_FN_ATTRS512
265_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
266 return (__m512bh)__builtin_ia32_selectpbf_512(
267 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B),
268 (__v32bf)_mm512_setzero_pbh());
269}
270
271static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A,
272 __m512bh __B) {
273 return (__m512bh)__builtin_ia32_vminpbf16512((__v32bf)__A, (__v32bf)__B);
274}
275
276static __inline__ __m512bh __DEFAULT_FN_ATTRS512
277_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
278 return (__m512bh)__builtin_ia32_selectpbf_512(
279 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W);
280}
281
282static __inline__ __m512bh __DEFAULT_FN_ATTRS512
283_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
284 return (__m512bh)__builtin_ia32_selectpbf_512(
285 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B),
286 (__v32bf)_mm512_setzero_pbh());
287}
288
289#define _mm512_cmp_pbh_mask(__A, __B, __P) \
290 ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
291 (__v32bf)(__m512bh)(__B), \
292 (int)(__P), (__mmask32) - 1))
293
294#define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \
295 ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
296 (__v32bf)(__m512bh)(__B), \
297 (int)(__P), (__mmask32)(__U)))
298
299#define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \
300 ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
301 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U)))
302
303#define _mm512_fpclass_pbh_mask(__A, imm) \
304 ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
305 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
306
307static __inline__ __m512bh __DEFAULT_FN_ATTRS512
308_mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
309 return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
310 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(),
311 (__mmask32)-1);
312}
313
314static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh(
315 __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
316 return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
317 (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U);
318}
319
320static __inline__ __m512bh __DEFAULT_FN_ATTRS512
321_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
322 return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
323 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(),
324 (__mmask32)__U);
325}
326
327static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) {
328 return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
329 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
330}
331
332static __inline__ __m512bh __DEFAULT_FN_ATTRS512
333_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
334 return (__m512bh)__builtin_ia32_vrcppbf16512_mask((__v32bf)__A, (__v32bf)__W,
335 (__mmask32)__U);
336}
337
338static __inline__ __m512bh __DEFAULT_FN_ATTRS512
339_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
340 return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
341 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
342}
343
344static __inline__ __m512bh __DEFAULT_FN_ATTRS512
345_mm512_getexp_pbh(__m512bh __A) {
346 return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
347 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
348}
349
350static __inline__ __m512bh __DEFAULT_FN_ATTRS512
351_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
352 return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
353 (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
354}
355
356static __inline__ __m512bh __DEFAULT_FN_ATTRS512
357_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
358 return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
359 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
360}
361
362static __inline__ __m512bh __DEFAULT_FN_ATTRS512
363_mm512_rsqrt_pbh(__m512bh __A) {
364 return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
365 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
366}
367
368static __inline__ __m512bh __DEFAULT_FN_ATTRS512
369_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
370 return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
371 (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
372}
373
374static __inline__ __m512bh __DEFAULT_FN_ATTRS512
375_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
376 return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
377 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
378}
379
380#define _mm512_reducene_pbh(__A, imm) \
381 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
382 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
383 (__mmask32) - 1))
384
385#define _mm512_mask_reducene_pbh(__W, __U, __A, imm) \
386 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
387 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
388 (__mmask32)(__U)))
389
390#define _mm512_maskz_reducene_pbh(__U, __A, imm) \
391 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
392 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
393 (__mmask32)(__U)))
394
395#define _mm512_roundscalene_pbh(__A, imm) \
396 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
397 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
398 (__mmask32) - 1))
399
400#define _mm512_mask_roundscalene_pbh(__W, __U, __A, imm) \
401 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
402 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
403 (__mmask32)(__U)))
404
405#define _mm512_maskz_roundscalene_pbh(__U, __A, imm) \
406 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
407 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
408 (__mmask32)(__U)))
409
410#define _mm512_getmant_pbh(__A, __B, __C) \
411 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
412 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
413 (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
414
415#define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \
416 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
417 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
418 (__v32bf)(__m512bh)(__W), (__mmask32)(__U)))
419
420#define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \
421 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
422 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
423 (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
424
425static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
426 return (__m512bh)__builtin_ia32_vsqrtnepbf16512((__v32bf)__A);
427}
428
429static __inline__ __m512bh __DEFAULT_FN_ATTRS512
430_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
431 return (__m512bh)__builtin_ia32_selectpbf_512(
432 (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W);
433}
434
435static __inline__ __m512bh __DEFAULT_FN_ATTRS512
436_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
437 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
438 (__v32bf)_mm512_sqrt_pbh(__A),
439 (__v32bf)_mm512_setzero_pbh());
440}
441
442static __inline__ __m512bh __DEFAULT_FN_ATTRS512
443_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
444 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
445 (__v32bf)__C);
446}
447
448static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddne_pbh(
449 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
450 return (__m512bh)__builtin_ia32_selectpbf_512(
451 (__mmask32)__U,
452 _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
453 (__v32bf)__A);
454}
455
456static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddne_pbh(
457 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
458 return (__m512bh)__builtin_ia32_selectpbf_512(
459 (__mmask32)__U,
460 _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
461 (__v32bf)__C);
462}
463
464static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddne_pbh(
465 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
466 return (__m512bh)__builtin_ia32_selectpbf_512(
467 (__mmask32)__U,
468 _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
469 (__v32bf)_mm512_setzero_pbh());
470}
471
472static __inline__ __m512bh __DEFAULT_FN_ATTRS512
473_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
474 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
475 -(__v32bf)__C);
476}
477
478static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubne_pbh(
479 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
480 return (__m512bh)__builtin_ia32_selectpbf_512(
481 (__mmask32)__U,
482 _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
483 (__v32bf)__A);
484}
485
486static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubne_pbh(
487 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
488 return (__m512bh)__builtin_ia32_selectpbf_512(
489 (__mmask32)__U,
490 _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
491 (__v32bf)__C);
492}
493
494static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubne_pbh(
495 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
496 return (__m512bh)__builtin_ia32_selectpbf_512(
497 (__mmask32)__U,
498 _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
499 (__v32bf)_mm512_setzero_pbh());
500}
501
502static __inline__ __m512bh __DEFAULT_FN_ATTRS512
503_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
504 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
505 (__v32bf)__C);
506}
507
508static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmaddne_pbh(
509 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
510 return (__m512bh)__builtin_ia32_selectpbf_512(
511 (__mmask32)__U,
512 _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
513 (__v32bf)__A);
514}
515
516static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmaddne_pbh(
517 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
518 return (__m512bh)__builtin_ia32_selectpbf_512(
519 (__mmask32)__U,
520 _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
521 (__v32bf)__C);
522}
523
524static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmaddne_pbh(
525 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
526 return (__m512bh)__builtin_ia32_selectpbf_512(
527 (__mmask32)__U,
528 _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
529 (__v32bf)_mm512_setzero_pbh());
530}
531
532static __inline__ __m512bh __DEFAULT_FN_ATTRS512
533_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
534 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
535 -(__v32bf)__C);
536}
537
538static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsubne_pbh(
539 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
540 return (__m512bh)__builtin_ia32_selectpbf_512(
541 (__mmask32)__U,
542 _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
543 (__v32bf)__A);
544}
545
546static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsubne_pbh(
547 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
548 return (__m512bh)__builtin_ia32_selectpbf_512(
549 (__mmask32)__U,
550 _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
551 (__v32bf)__C);
552}
553
554static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsubne_pbh(
555 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
556 return (__m512bh)__builtin_ia32_selectpbf_512(
557 (__mmask32)__U,
558 _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
559 (__v32bf)_mm512_setzero_pbh());
560}
561
562#undef __DEFAULT_FN_ATTRS512
563
564#endif
565#endif
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ uint32_t volatile uint32_t * __p
Definition: arm_acle.h:88
#define __DEFAULT_FN_ATTRS512
unsigned int __mmask32
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void)
static __inline__ void int __a
Definition: emmintrin.h:4079
struct __storeu_i16 *__P __v
Definition: immintrin.h:472
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25