Go to the documentation of this file.
11 "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
14#ifndef __AVX10_2SATCVTINTRIN_H
15#define __AVX10_2SATCVTINTRIN_H
17#define _mm_ipcvtnebf16_epi8(A) \
18 ((__m128i)__builtin_ia32_vcvtnebf162ibs128((__v8bf)(__m128bh)(A)))
20#define _mm_mask_ipcvtnebf16_epi8(W, U, A) \
21 ((__m128i)__builtin_ia32_selectw_128( \
22 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epi8(A), (__v8hi)(__m128i)(W)))
24#define _mm_maskz_ipcvtnebf16_epi8(U, A) \
25 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
26 (__v8hi)_mm_ipcvtnebf16_epi8(A), \
27 (__v8hi)_mm_setzero_si128()))
29#define _mm256_ipcvtnebf16_epi8(A) \
30 ((__m256i)__builtin_ia32_vcvtnebf162ibs256((__v16bf)(__m256bh)(A)))
32#define _mm256_mask_ipcvtnebf16_epi8(W, U, A) \
33 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
34 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
35 (__v16hi)(__m256i)(W)))
37#define _mm256_maskz_ipcvtnebf16_epi8(U, A) \
38 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
39 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
40 (__v16hi)_mm256_setzero_si256()))
42#define _mm_ipcvtnebf16_epu8(A) \
43 ((__m128i)__builtin_ia32_vcvtnebf162iubs128((__v8bf)(__m128bh)(A)))
45#define _mm_mask_ipcvtnebf16_epu8(W, U, A) \
46 ((__m128i)__builtin_ia32_selectw_128( \
47 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epu8(A), (__v8hi)(__m128i)(W)))
49#define _mm_maskz_ipcvtnebf16_epu8(U, A) \
50 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
51 (__v8hi)_mm_ipcvtnebf16_epu8(A), \
52 (__v8hi)_mm_setzero_si128()))
54#define _mm256_ipcvtnebf16_epu8(A) \
55 ((__m256i)__builtin_ia32_vcvtnebf162iubs256((__v16bf)(__m256bh)(A)))
57#define _mm256_mask_ipcvtnebf16_epu8(W, U, A) \
58 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
59 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
60 (__v16hi)(__m256i)(W)))
62#define _mm256_maskz_ipcvtnebf16_epu8(U, A) \
63 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
64 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
65 (__v16hi)_mm256_setzero_si256()))
67#define _mm_ipcvtph_epi8(A) \
68 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
69 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
71#define _mm_mask_ipcvtph_epi8(W, U, A) \
72 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \
73 (__v8hu)(W), (__mmask8)(U)))
75#define _mm_maskz_ipcvtph_epi8(U, A) \
76 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
77 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
79#define _mm256_ipcvtph_epi8(A) \
80 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
81 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
82 _MM_FROUND_CUR_DIRECTION))
84#define _mm256_mask_ipcvtph_epi8(W, U, A) \
85 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
86 (__v16hu)(W), (__mmask16)(U), \
87 _MM_FROUND_CUR_DIRECTION))
89#define _mm256_maskz_ipcvtph_epi8(U, A) \
90 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
91 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
92 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
94#define _mm256_ipcvt_roundph_epi8(A, R) \
95 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
96 (__v16hu)_mm256_setzero_si256(), \
97 (__mmask16)-1, (const int)R))
99#define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \
100 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
101 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
103#define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \
104 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
105 (__v16hu)_mm256_setzero_si256(), \
106 (__mmask16)(U), (const int)R))
108#define _mm_ipcvtph_epu8(A) \
109 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
110 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
112#define _mm_mask_ipcvtph_epu8(W, U, A) \
113 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \
114 (__v8hu)(W), (__mmask8)(U)))
116#define _mm_maskz_ipcvtph_epu8(U, A) \
117 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
118 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
120#define _mm256_ipcvtph_epu8(A) \
121 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
122 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
123 _MM_FROUND_CUR_DIRECTION))
125#define _mm256_mask_ipcvtph_epu8(W, U, A) \
126 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \
127 (__v16hu)(W), (__mmask16)(U), \
128 _MM_FROUND_CUR_DIRECTION))
130#define _mm256_maskz_ipcvtph_epu8(U, A) \
131 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
132 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
133 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
135#define _mm256_ipcvt_roundph_epu8(A, R) \
136 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
137 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
140#define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \
141 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
142 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
144#define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \
145 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
146 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
149#define _mm_ipcvtps_epi8(A) \
150 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
151 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
153#define _mm_mask_ipcvtps_epi8(W, U, A) \
154 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \
155 (__v4su)(W), (__mmask8)(U)))
157#define _mm_maskz_ipcvtps_epi8(U, A) \
158 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
159 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
161#define _mm256_ipcvtps_epi8(A) \
162 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
163 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
164 _MM_FROUND_CUR_DIRECTION))
166#define _mm256_mask_ipcvtps_epi8(W, U, A) \
167 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
168 (__v8su)(W), (__mmask8)(U), \
169 _MM_FROUND_CUR_DIRECTION))
171#define _mm256_maskz_ipcvtps_epi8(U, A) \
172 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
173 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
174 _MM_FROUND_CUR_DIRECTION))
176#define _mm256_ipcvt_roundps_epi8(A, R) \
177 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
178 (__v8su)_mm256_setzero_si256(), \
179 (__mmask8)-1, (const int)R))
181#define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \
182 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
183 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
185#define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \
186 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
187 (__v8su)_mm256_setzero_si256(), \
188 (__mmask8)(U), (const int)R))
190#define _mm_ipcvtps_epu8(A) \
191 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
192 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
194#define _mm_mask_ipcvtps_epu8(W, U, A) \
195 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \
196 (__v4su)(W), (__mmask8)(U)))
198#define _mm_maskz_ipcvtps_epu8(U, A) \
199 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
200 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
202#define _mm256_ipcvtps_epu8(A) \
203 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
204 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
205 _MM_FROUND_CUR_DIRECTION))
207#define _mm256_mask_ipcvtps_epu8(W, U, A) \
208 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
209 (__v8su)(W), (__mmask8)(U), \
210 _MM_FROUND_CUR_DIRECTION))
212#define _mm256_maskz_ipcvtps_epu8(U, A) \
213 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
214 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
215 _MM_FROUND_CUR_DIRECTION))
217#define _mm256_ipcvt_roundps_epu8(A, R) \
218 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
219 (__v8su)_mm256_setzero_si256(), \
220 (__mmask8)-1, (const int)R))
222#define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \
223 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
224 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
226#define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \
227 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
228 (__v8su)_mm256_setzero_si256(), \
229 (__mmask8)(U), (const int)R))
231#define _mm_ipcvttnebf16_epi8(A) \
232 ((__m128i)__builtin_ia32_vcvttnebf162ibs128((__v8bf)(__m128bh)(A)))
234#define _mm_mask_ipcvttnebf16_epi8(W, U, A) \
235 ((__m128i)__builtin_ia32_selectw_128( \
236 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epi8(A), (__v8hi)(__m128i)(W)))
238#define _mm_maskz_ipcvttnebf16_epi8(U, A) \
239 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
240 (__v8hi)_mm_ipcvttnebf16_epi8(A), \
241 (__v8hi)_mm_setzero_si128()))
243#define _mm256_ipcvttnebf16_epi8(A) \
244 ((__m256i)__builtin_ia32_vcvttnebf162ibs256((__v16bf)(__m256bh)(A)))
246#define _mm256_mask_ipcvttnebf16_epi8(W, U, A) \
247 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
248 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
249 (__v16hi)(__m256i)(W)))
251#define _mm256_maskz_ipcvttnebf16_epi8(U, A) \
252 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
253 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
254 (__v16hi)_mm256_setzero_si256()))
256#define _mm_ipcvttnebf16_epu8(A) \
257 ((__m128i)__builtin_ia32_vcvttnebf162iubs128((__v8bf)(__m128bh)(A)))
259#define _mm_mask_ipcvttnebf16_epu8(W, U, A) \
260 ((__m128i)__builtin_ia32_selectw_128( \
261 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epu8(A), (__v8hi)(__m128i)(W)))
263#define _mm_maskz_ipcvttnebf16_epu8(U, A) \
264 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
265 (__v8hi)_mm_ipcvttnebf16_epu8(A), \
266 (__v8hi)_mm_setzero_si128()))
268#define _mm256_ipcvttnebf16_epu8(A) \
269 ((__m256i)__builtin_ia32_vcvttnebf162iubs256((__v16bf)(__m256bh)(A)))
271#define _mm256_mask_ipcvttnebf16_epu8(W, U, A) \
272 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
273 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
274 (__v16hi)(__m256i)(W)))
276#define _mm256_maskz_ipcvttnebf16_epu8(U, A) \
277 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
278 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
279 (__v16hi)_mm256_setzero_si256()))
281#define _mm_ipcvttph_epi8(A) \
282 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
283 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
285#define _mm_mask_ipcvttph_epi8(W, U, A) \
286 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \
287 (__v8hu)(W), (__mmask8)(U)))
289#define _mm_maskz_ipcvttph_epi8(U, A) \
290 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
291 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
293#define _mm256_ipcvttph_epi8(A) \
294 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
295 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
296 _MM_FROUND_CUR_DIRECTION))
298#define _mm256_mask_ipcvttph_epi8(W, U, A) \
299 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \
300 (__v16hu)(W), (__mmask16)(U), \
301 _MM_FROUND_CUR_DIRECTION))
303#define _mm256_maskz_ipcvttph_epi8(U, A) \
304 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
305 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
306 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
308#define _mm256_ipcvtt_roundph_epi8(A, R) \
309 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
310 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
313#define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \
314 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
315 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
317#define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \
318 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
319 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
322#define _mm_ipcvttph_epu8(A) \
323 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
324 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
326#define _mm_mask_ipcvttph_epu8(W, U, A) \
327 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \
328 (__v8hu)(W), (__mmask8)(U)))
330#define _mm_maskz_ipcvttph_epu8(U, A) \
331 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
332 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
334#define _mm256_ipcvttph_epu8(A) \
335 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
336 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
337 _MM_FROUND_CUR_DIRECTION))
339#define _mm256_mask_ipcvttph_epu8(W, U, A) \
340 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \
341 (__v16hu)(W), (__mmask16)(U), \
342 _MM_FROUND_CUR_DIRECTION))
344#define _mm256_maskz_ipcvttph_epu8(U, A) \
345 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
346 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
347 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
349#define _mm256_ipcvtt_roundph_epu8(A, R) \
350 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
351 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
354#define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \
355 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
356 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
358#define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \
359 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
360 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
363#define _mm_ipcvttps_epi8(A) \
364 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
365 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
367#define _mm_mask_ipcvttps_epi8(W, U, A) \
368 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \
369 (__v4su)(W), (__mmask8)(U)))
371#define _mm_maskz_ipcvttps_epi8(U, A) \
372 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
373 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
375#define _mm256_ipcvttps_epi8(A) \
376 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
377 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
378 _MM_FROUND_CUR_DIRECTION))
380#define _mm256_mask_ipcvttps_epi8(W, U, A) \
381 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
382 (__v8su)(W), (__mmask8)(U), \
383 _MM_FROUND_CUR_DIRECTION))
385#define _mm256_maskz_ipcvttps_epi8(U, A) \
386 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
387 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
388 _MM_FROUND_CUR_DIRECTION))
390#define _mm256_ipcvtt_roundps_epi8(A, R) \
391 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
392 (__v8su)_mm256_setzero_si256(), \
393 (__mmask8)-1, (const int)R))
395#define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \
396 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
397 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
399#define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \
400 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
401 (__v8su)_mm256_setzero_si256(), \
402 (__mmask8)(U), (const int)R))
404#define _mm_ipcvttps_epu8(A) \
405 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
406 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
408#define _mm_mask_ipcvttps_epu8(W, U, A) \
409 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \
410 (__v4su)(W), (__mmask8)(U)))
412#define _mm_maskz_ipcvttps_epu8(U, A) \
413 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
414 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
416#define _mm256_ipcvttps_epu8(A) \
417 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
418 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
419 _MM_FROUND_CUR_DIRECTION))
421#define _mm256_mask_ipcvttps_epu8(W, U, A) \
422 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \
423 (__v8su)(W), (__mmask8)(U), \
424 _MM_FROUND_CUR_DIRECTION))
426#define _mm256_maskz_ipcvttps_epu8(U, A) \
427 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
428 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
429 _MM_FROUND_CUR_DIRECTION))
431#define _mm256_ipcvtt_roundps_epu8(A, R) \
432 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
433 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
436#define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \
437 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
438 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
440#define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \
441 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
442 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \