clang 20.0.0git
avx10_2satcvtintrin.h
Go to the documentation of this file.
1/*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error \
11 "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
12#endif // __IMMINTRIN_H
13
14#ifndef __AVX10_2SATCVTINTRIN_H
15#define __AVX10_2SATCVTINTRIN_H
16
17#define _mm_ipcvtnebf16_epi8(A) \
18 ((__m128i)__builtin_ia32_vcvtnebf162ibs128((__v8bf)(__m128bh)(A)))
19
20#define _mm_mask_ipcvtnebf16_epi8(W, U, A) \
21 ((__m128i)__builtin_ia32_selectw_128( \
22 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epi8(A), (__v8hi)(__m128i)(W)))
23
24#define _mm_maskz_ipcvtnebf16_epi8(U, A) \
25 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
26 (__v8hi)_mm_ipcvtnebf16_epi8(A), \
27 (__v8hi)_mm_setzero_si128()))
28
29#define _mm256_ipcvtnebf16_epi8(A) \
30 ((__m256i)__builtin_ia32_vcvtnebf162ibs256((__v16bf)(__m256bh)(A)))
31
32#define _mm256_mask_ipcvtnebf16_epi8(W, U, A) \
33 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
34 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
35 (__v16hi)(__m256i)(W)))
36
37#define _mm256_maskz_ipcvtnebf16_epi8(U, A) \
38 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
39 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
40 (__v16hi)_mm256_setzero_si256()))
41
42#define _mm_ipcvtnebf16_epu8(A) \
43 ((__m128i)__builtin_ia32_vcvtnebf162iubs128((__v8bf)(__m128bh)(A)))
44
45#define _mm_mask_ipcvtnebf16_epu8(W, U, A) \
46 ((__m128i)__builtin_ia32_selectw_128( \
47 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epu8(A), (__v8hi)(__m128i)(W)))
48
49#define _mm_maskz_ipcvtnebf16_epu8(U, A) \
50 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
51 (__v8hi)_mm_ipcvtnebf16_epu8(A), \
52 (__v8hi)_mm_setzero_si128()))
53
54#define _mm256_ipcvtnebf16_epu8(A) \
55 ((__m256i)__builtin_ia32_vcvtnebf162iubs256((__v16bf)(__m256bh)(A)))
56
57#define _mm256_mask_ipcvtnebf16_epu8(W, U, A) \
58 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
59 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
60 (__v16hi)(__m256i)(W)))
61
62#define _mm256_maskz_ipcvtnebf16_epu8(U, A) \
63 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
64 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
65 (__v16hi)_mm256_setzero_si256()))
66
67#define _mm_ipcvtph_epi8(A) \
68 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
69 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
70
71#define _mm_mask_ipcvtph_epi8(W, U, A) \
72 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \
73 (__v8hu)(W), (__mmask8)(U)))
74
75#define _mm_maskz_ipcvtph_epi8(U, A) \
76 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
77 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
78
79#define _mm256_ipcvtph_epi8(A) \
80 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
81 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
82 _MM_FROUND_CUR_DIRECTION))
83
84#define _mm256_mask_ipcvtph_epi8(W, U, A) \
85 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
86 (__v16hu)(W), (__mmask16)(U), \
87 _MM_FROUND_CUR_DIRECTION))
88
89#define _mm256_maskz_ipcvtph_epi8(U, A) \
90 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
91 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
92 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
93
94#define _mm256_ipcvt_roundph_epi8(A, R) \
95 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
96 (__v16hu)_mm256_setzero_si256(), \
97 (__mmask16)-1, (const int)R))
98
99#define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \
100 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
101 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
102
103#define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \
104 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
105 (__v16hu)_mm256_setzero_si256(), \
106 (__mmask16)(U), (const int)R))
107
108#define _mm_ipcvtph_epu8(A) \
109 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
110 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
111
112#define _mm_mask_ipcvtph_epu8(W, U, A) \
113 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \
114 (__v8hu)(W), (__mmask8)(U)))
115
116#define _mm_maskz_ipcvtph_epu8(U, A) \
117 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
118 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
119
120#define _mm256_ipcvtph_epu8(A) \
121 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
122 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
123 _MM_FROUND_CUR_DIRECTION))
124
125#define _mm256_mask_ipcvtph_epu8(W, U, A) \
126 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \
127 (__v16hu)(W), (__mmask16)(U), \
128 _MM_FROUND_CUR_DIRECTION))
129
130#define _mm256_maskz_ipcvtph_epu8(U, A) \
131 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
132 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
133 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
134
135#define _mm256_ipcvt_roundph_epu8(A, R) \
136 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
137 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
138 (const int)R))
139
140#define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \
141 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
142 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
143
144#define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \
145 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
146 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
147 (const int)R))
148
149#define _mm_ipcvtps_epi8(A) \
150 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
151 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
152
153#define _mm_mask_ipcvtps_epi8(W, U, A) \
154 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \
155 (__v4su)(W), (__mmask8)(U)))
156
157#define _mm_maskz_ipcvtps_epi8(U, A) \
158 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
159 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
160
161#define _mm256_ipcvtps_epi8(A) \
162 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
163 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
164 _MM_FROUND_CUR_DIRECTION))
165
166#define _mm256_mask_ipcvtps_epi8(W, U, A) \
167 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
168 (__v8su)(W), (__mmask8)(U), \
169 _MM_FROUND_CUR_DIRECTION))
170
171#define _mm256_maskz_ipcvtps_epi8(U, A) \
172 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
173 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
174 _MM_FROUND_CUR_DIRECTION))
175
176#define _mm256_ipcvt_roundps_epi8(A, R) \
177 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
178 (__v8su)_mm256_setzero_si256(), \
179 (__mmask8)-1, (const int)R))
180
181#define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \
182 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
183 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
184
185#define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \
186 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
187 (__v8su)_mm256_setzero_si256(), \
188 (__mmask8)(U), (const int)R))
189
190#define _mm_ipcvtps_epu8(A) \
191 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
192 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
193
194#define _mm_mask_ipcvtps_epu8(W, U, A) \
195 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \
196 (__v4su)(W), (__mmask8)(U)))
197
198#define _mm_maskz_ipcvtps_epu8(U, A) \
199 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
200 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
201
202#define _mm256_ipcvtps_epu8(A) \
203 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
204 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
205 _MM_FROUND_CUR_DIRECTION))
206
207#define _mm256_mask_ipcvtps_epu8(W, U, A) \
208 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
209 (__v8su)(W), (__mmask8)(U), \
210 _MM_FROUND_CUR_DIRECTION))
211
212#define _mm256_maskz_ipcvtps_epu8(U, A) \
213 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
214 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
215 _MM_FROUND_CUR_DIRECTION))
216
217#define _mm256_ipcvt_roundps_epu8(A, R) \
218 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
219 (__v8su)_mm256_setzero_si256(), \
220 (__mmask8)-1, (const int)R))
221
222#define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \
223 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
224 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
225
226#define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \
227 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
228 (__v8su)_mm256_setzero_si256(), \
229 (__mmask8)(U), (const int)R))
230
231#define _mm_ipcvttnebf16_epi8(A) \
232 ((__m128i)__builtin_ia32_vcvttnebf162ibs128((__v8bf)(__m128bh)(A)))
233
234#define _mm_mask_ipcvttnebf16_epi8(W, U, A) \
235 ((__m128i)__builtin_ia32_selectw_128( \
236 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epi8(A), (__v8hi)(__m128i)(W)))
237
238#define _mm_maskz_ipcvttnebf16_epi8(U, A) \
239 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
240 (__v8hi)_mm_ipcvttnebf16_epi8(A), \
241 (__v8hi)_mm_setzero_si128()))
242
243#define _mm256_ipcvttnebf16_epi8(A) \
244 ((__m256i)__builtin_ia32_vcvttnebf162ibs256((__v16bf)(__m256bh)(A)))
245
246#define _mm256_mask_ipcvttnebf16_epi8(W, U, A) \
247 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
248 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
249 (__v16hi)(__m256i)(W)))
250
251#define _mm256_maskz_ipcvttnebf16_epi8(U, A) \
252 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
253 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
254 (__v16hi)_mm256_setzero_si256()))
255
256#define _mm_ipcvttnebf16_epu8(A) \
257 ((__m128i)__builtin_ia32_vcvttnebf162iubs128((__v8bf)(__m128bh)(A)))
258
259#define _mm_mask_ipcvttnebf16_epu8(W, U, A) \
260 ((__m128i)__builtin_ia32_selectw_128( \
261 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epu8(A), (__v8hi)(__m128i)(W)))
262
263#define _mm_maskz_ipcvttnebf16_epu8(U, A) \
264 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
265 (__v8hi)_mm_ipcvttnebf16_epu8(A), \
266 (__v8hi)_mm_setzero_si128()))
267
268#define _mm256_ipcvttnebf16_epu8(A) \
269 ((__m256i)__builtin_ia32_vcvttnebf162iubs256((__v16bf)(__m256bh)(A)))
270
271#define _mm256_mask_ipcvttnebf16_epu8(W, U, A) \
272 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
273 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
274 (__v16hi)(__m256i)(W)))
275
276#define _mm256_maskz_ipcvttnebf16_epu8(U, A) \
277 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
278 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
279 (__v16hi)_mm256_setzero_si256()))
280
281#define _mm_ipcvttph_epi8(A) \
282 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
283 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
284
285#define _mm_mask_ipcvttph_epi8(W, U, A) \
286 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \
287 (__v8hu)(W), (__mmask8)(U)))
288
289#define _mm_maskz_ipcvttph_epi8(U, A) \
290 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
291 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
292
293#define _mm256_ipcvttph_epi8(A) \
294 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
295 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
296 _MM_FROUND_CUR_DIRECTION))
297
298#define _mm256_mask_ipcvttph_epi8(W, U, A) \
299 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \
300 (__v16hu)(W), (__mmask16)(U), \
301 _MM_FROUND_CUR_DIRECTION))
302
303#define _mm256_maskz_ipcvttph_epi8(U, A) \
304 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
305 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
306 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
307
308#define _mm256_ipcvtt_roundph_epi8(A, R) \
309 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
310 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
311 (const int)R))
312
313#define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \
314 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
315 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
316
317#define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \
318 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
319 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
320 (const int)R))
321
322#define _mm_ipcvttph_epu8(A) \
323 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
324 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
325
326#define _mm_mask_ipcvttph_epu8(W, U, A) \
327 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \
328 (__v8hu)(W), (__mmask8)(U)))
329
330#define _mm_maskz_ipcvttph_epu8(U, A) \
331 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
332 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
333
334#define _mm256_ipcvttph_epu8(A) \
335 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
336 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
337 _MM_FROUND_CUR_DIRECTION))
338
339#define _mm256_mask_ipcvttph_epu8(W, U, A) \
340 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \
341 (__v16hu)(W), (__mmask16)(U), \
342 _MM_FROUND_CUR_DIRECTION))
343
344#define _mm256_maskz_ipcvttph_epu8(U, A) \
345 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
346 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
347 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
348
349#define _mm256_ipcvtt_roundph_epu8(A, R) \
350 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
351 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
352 (const int)R))
353
354#define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \
355 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
356 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
357
358#define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \
359 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
360 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
361 (const int)R))
362
363#define _mm_ipcvttps_epi8(A) \
364 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
365 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
366
367#define _mm_mask_ipcvttps_epi8(W, U, A) \
368 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \
369 (__v4su)(W), (__mmask8)(U)))
370
371#define _mm_maskz_ipcvttps_epi8(U, A) \
372 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
373 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
374
375#define _mm256_ipcvttps_epi8(A) \
376 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
377 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
378 _MM_FROUND_CUR_DIRECTION))
379
380#define _mm256_mask_ipcvttps_epi8(W, U, A) \
381 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
382 (__v8su)(W), (__mmask8)(U), \
383 _MM_FROUND_CUR_DIRECTION))
384
385#define _mm256_maskz_ipcvttps_epi8(U, A) \
386 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
387 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
388 _MM_FROUND_CUR_DIRECTION))
389
390#define _mm256_ipcvtt_roundps_epi8(A, R) \
391 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
392 (__v8su)_mm256_setzero_si256(), \
393 (__mmask8)-1, (const int)R))
394
395#define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \
396 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
397 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
398
399#define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \
400 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
401 (__v8su)_mm256_setzero_si256(), \
402 (__mmask8)(U), (const int)R))
403
404#define _mm_ipcvttps_epu8(A) \
405 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
406 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
407
408#define _mm_mask_ipcvttps_epu8(W, U, A) \
409 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \
410 (__v4su)(W), (__mmask8)(U)))
411
412#define _mm_maskz_ipcvttps_epu8(U, A) \
413 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
414 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
415
416#define _mm256_ipcvttps_epu8(A) \
417 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
418 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
419 _MM_FROUND_CUR_DIRECTION))
420
421#define _mm256_mask_ipcvttps_epu8(W, U, A) \
422 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \
423 (__v8su)(W), (__mmask8)(U), \
424 _MM_FROUND_CUR_DIRECTION))
425
426#define _mm256_maskz_ipcvttps_epu8(U, A) \
427 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
428 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
429 _MM_FROUND_CUR_DIRECTION))
430
431#define _mm256_ipcvtt_roundps_epu8(A, R) \
432 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
433 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
434 (const int)R))
435
436#define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \
437 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
438 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
439
440#define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \
441 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
442 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \
443 (const int)R))
444#endif // __AVX10_2SATCVTINTRIN_H