13#ifndef NO_WARN_X86_INTRINSICS
28#if defined(__powerpc64__) && \
29 (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
36extern __inline __m128i
37 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
39 return (__m128i)
vec_abs((__v8hi)__A);
42extern __inline __m128i
43 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
45 return (__m128i)
vec_abs((__v4si)__A);
48extern __inline __m128i
49 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
51 return (__m128i)
vec_abs((__v16qi)__A);
55 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57 __v8hi __B = (__v8hi)(__v2du){__A, __A};
58 return (__m64)((__v2du)
vec_abs(__B))[0];
62 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 __v4si __B = (__v4si)(__v2du){__A, __A};
65 return (__m64)((__v2du)
vec_abs(__B))[0];
69 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 __v16qi __B = (__v16qi)(__v2du){__A, __A};
72 return (__m64)((__v2du)
vec_abs(__B))[0];
75extern __inline __m128i
76 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
78 if (__builtin_constant_p(__count) && __count < 16) {
79#ifdef __LITTLE_ENDIAN__
80 __A = (__m128i)
vec_reve((__v16qu)__A);
81 __B = (__m128i)
vec_reve((__v16qu)__B);
83 __A = (__m128i)
vec_sld((__v16qu)__B, (__v16qu)__A, __count);
84#ifdef __LITTLE_ENDIAN__
85 __A = (__m128i)
vec_reve((__v16qu)__A);
95 const __v16qu __zero = {0};
96 return (__m128i)__zero;
98 const __v16qu __shift =
vec_splats((
unsigned char)((__count - 16) * 8));
99#ifdef __LITTLE_ENDIAN__
100 return (__m128i)
vec_sro((__v16qu)__A, __shift);
102 return (__m128i)
vec_slo((__v16qu)__A, __shift);
106 const __v16qu __shiftA =
vec_splats((
unsigned char)((16 - __count) * 8));
107 const __v16qu __shiftB =
vec_splats((
unsigned char)(__count * 8));
108#ifdef __LITTLE_ENDIAN__
109 __A = (__m128i)
vec_slo((__v16qu)__A, __shiftA);
110 __B = (__m128i)
vec_sro((__v16qu)__B, __shiftB);
112 __A = (__m128i)
vec_sro((__v16qu)__A, __shiftA);
113 __B = (__m128i)
vec_slo((__v16qu)__B, __shiftB);
115 return (__m128i)
vec_or((__v16qu)__A, (__v16qu)__B);
120 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
123 __v2du __C = {__B, __A};
124#ifdef __LITTLE_ENDIAN__
125 const __v4su __shift = {__count << 3, 0, 0, 0};
126 __C = (__v2du)
vec_sro((__v16qu)__C, (__v16qu)__shift);
128 const __v4su __shift = {0, 0, 0, __count << 3};
129 __C = (__v2du)
vec_slo((__v16qu)__C, (__v16qu)__shift);
131 return (__m64)__C[0];
133 const __m64 __zero = {0};
138extern __inline __m128i
139 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
141 const __v16qu
__P = {0, 1, 4, 5, 8, 9, 12, 13,
142 16, 17, 20, 21, 24, 25, 28, 29};
143 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
144 18, 19, 22, 23, 26, 27, 30, 31};
145 __v8hi __C =
vec_perm((__v8hi)__A, (__v8hi)__B,
__P);
146 __v8hi
__D =
vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
150extern __inline __m128i
151 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
153 const __v16qu
__P = {0, 1, 2, 3, 8, 9, 10, 11,
154 16, 17, 18, 19, 24, 25, 26, 27};
155 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
156 20, 21, 22, 23, 28, 29, 30, 31};
157 __v4si __C =
vec_perm((__v4si)__A, (__v4si)__B,
__P);
158 __v4si
__D =
vec_perm((__v4si)__A, (__v4si)__B, __Q);
163 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
165 __v8hi __C = (__v8hi)(__v2du){__A, __B};
166 const __v16qu
__P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
167 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
171 return (__m64)((__v2du)__C)[1];
175 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
177 __v4si __C = (__v4si)(__v2du){__A, __B};
178 const __v16qu
__P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
179 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
183 return (__m64)((__v2du)__C)[1];
186extern __inline __m128i
187 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
189 __v4si __C = {0},
__D = {0};
197 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
199 const __v4si __zero = {0};
200 __v8hi __C = (__v8hi)(__v2du){__A, __B};
203 return (__m64)((__v2du)__C)[1];
206extern __inline __m128i
207 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
209 const __v16qu
__P = {0, 1, 4, 5, 8, 9, 12, 13,
210 16, 17, 20, 21, 24, 25, 28, 29};
211 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
212 18, 19, 22, 23, 26, 27, 30, 31};
213 __v8hi __C =
vec_perm((__v8hi)__A, (__v8hi)__B,
__P);
214 __v8hi
__D =
vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
218extern __inline __m128i
219 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
221 const __v16qu
__P = {0, 1, 2, 3, 8, 9, 10, 11,
222 16, 17, 18, 19, 24, 25, 26, 27};
223 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
224 20, 21, 22, 23, 28, 29, 30, 31};
225 __v4si __C =
vec_perm((__v4si)__A, (__v4si)__B,
__P);
226 __v4si
__D =
vec_perm((__v4si)__A, (__v4si)__B, __Q);
231 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
233 const __v16qu
__P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
234 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
235 __v8hi __C = (__v8hi)(__v2du){__A, __B};
239 return (__m64)((__v2du)__C)[1];
243 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245 const __v16qu
__P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
246 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
247 __v4si __C = (__v4si)(__v2du){__A, __B};
251 return (__m64)((__v2du)__C)[1];
254extern __inline __m128i
255 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
257 const __v16qu
__P = {0, 1, 4, 5, 8, 9, 12, 13,
258 16, 17, 20, 21, 24, 25, 28, 29};
259 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
260 18, 19, 22, 23, 26, 27, 30, 31};
261 __v8hi __C =
vec_perm((__v8hi)__A, (__v8hi)__B,
__P);
262 __v8hi
__D =
vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
267 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
269 const __v16qu
__P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
270 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
271 __v8hi __C = (__v8hi)(__v2du){__A, __B};
273 __v8hi __E =
vec_perm(__C, __C, __Q);
275 return (__m64)((__v2du)__C)[1];
278extern __inline __m128i
279 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
281 const __v16qi __zero = {0};
282 __vector __bool
char __select =
vec_cmplt((__v16qi)__B, __zero);
283 __v16qi __C =
vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);
284 return (__m128i)
vec_sel(__C, __zero, __select);
288 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
290 const __v16qi __zero = {0};
291 __v16qi __C = (__v16qi)(__v2du){__A, __A};
292 __v16qi
__D = (__v16qi)(__v2du){__B, __B};
293 __vector __bool
char __select =
vec_cmplt((__v16qi)
__D, __zero);
294 __C =
vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)
__D);
295 __C =
vec_sel(__C, __zero, __select);
296 return (__m64)((__v2du)(__C))[0];
300extern __inline __m128i
301 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
303 const __v16qi __zero = {0};
304 __v16qi __selectneg = (__v16qi)
vec_cmplt((__v16qi)__B, __zero);
305 __v16qi __selectpos =
313extern __inline __m128i
314 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
316 const __v8hi __zero = {0};
317 __v8hi __selectneg = (__v8hi)
vec_cmplt((__v8hi)__B, __zero);
318 __v8hi __selectpos = (__v8hi)
vec_neg((__v8hi)
vec_cmpgt((__v8hi)__B, __zero));
325extern __inline __m128i
326 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
328 const __v4si __zero = {0};
329 __v4si __selectneg = (__v4si)
vec_cmplt((__v4si)__B, __zero);
330 __v4si __selectpos = (__v4si)
vec_neg((__v4si)
vec_cmpgt((__v4si)__B, __zero));
338 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
340 const __v16qi __zero = {0};
341 __v16qi __C = (__v16qi)(__v2du){__A, __A};
342 __v16qi
__D = (__v16qi)(__v2du){__B, __B};
344 return (__m64)((__v2du)(__C))[0];
350 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
352 const __v8hi __zero = {0};
353 __v8hi __C = (__v8hi)(__v2du){__A, __A};
354 __v8hi
__D = (__v8hi)(__v2du){__B, __B};
356 return (__m64)((__v2du)(__C))[0];
362 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
364 const __v4si __zero = {0};
365 __v4si __C = (__v4si)(__v2du){__A, __A};
366 __v4si
__D = (__v4si)(__v2du){__B, __B};
368 return (__m64)((__v2du)(__C))[0];
372extern __inline __m128i
373 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
375 __v8hi __unsigned =
vec_splats((
signed short)0x00ff);
382 const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
383 16, 17, 20, 21, 24, 25, 28, 29};
384 const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
385 18, 19, 22, 23, 26, 27, 30, 31};
392 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
394 __v8hi __C = (__v8hi)(__v2du){__A, __A};
396 const __v8hi __unsigned =
vec_splats((
signed short)0x00ff);
397 __C =
vec_and(__C, __unsigned);
398 __v8hi
__D = (__v8hi)(__v2du){__B, __B};
401 const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
402 16, 17, 20, 21, 24, 25, 28, 29};
403 const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
404 18, 19, 22, 23, 26, 27, 30, 31};
408 return (__m64)((__v2du)(__C))[0];
411extern __inline __m128i
412 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
420 const __v4su __shift =
vec_splats((
unsigned int)14);
421 __C =
vec_sr(__C, __shift);
423 const __v4si __ones =
vec_splats((
signed int)1);
425 __C =
vec_sr(__C, (__v4su)__ones);
432 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434 __v4si __C = (__v4si)(__v2du){__A, __A};
436 __v4si
__D = (__v4si)(__v2du){__B, __B};
439 const __v4su __shift =
vec_splats((
unsigned int)14);
440 __C =
vec_sr(__C, __shift);
441 const __v4si __ones =
vec_splats((
signed int)1);
443 __C =
vec_sr(__C, (__v4su)__ones);
445 return (__m64)((__v2du)(__E))[0];
449#include_next <tmmintrin.h>
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
static __ATTRS_o_ai vector bool char vec_reve(vector bool char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
static vector float __ATTRS_o_ai vec_neg(vector float __a)
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
static __inline__ void short __D
__inline unsigned int unsigned int unsigned int * __P
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
#define _mm_alignr_epi8(a, b, n)
Concatenates the two 128-bit integer vector operands, and right-shifts the result by the number of by...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
#define _mm_alignr_pi8(a, b, n)
Concatenates the two 64-bit integer vector operands, and right-shifts the result by the number of byt...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...