clang 20.0.0git
tmmintrin.h
Go to the documentation of this file.
1/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10/* Implemented from the specification included in the Intel C++ Compiler
11 User Guide and Reference, version 9.0. */
12
13#ifndef NO_WARN_X86_INTRINSICS
14/* This header is distributed to simplify porting x86_64 code that
15 makes explicit use of Intel intrinsics to powerpc64le.
16
17 It is the user's responsibility to determine if the results are
18 acceptable and make additional changes as necessary.
19
20 Note that much code that uses Intel intrinsics can be rewritten in
21 standard C or GNU C extensions, which are more portable and better
22 optimized across multiple targets. */
23#endif
24
25#ifndef TMMINTRIN_H_
26#define TMMINTRIN_H_
27
28#if defined(__powerpc64__) && \
29 (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
30
31#include <altivec.h>
32
33/* We need definitions from the SSE header files. */
34#include <pmmintrin.h>
35
36extern __inline __m128i
37 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
38 _mm_abs_epi16(__m128i __A) {
39 return (__m128i)vec_abs((__v8hi)__A);
40}
41
42extern __inline __m128i
43 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
44 _mm_abs_epi32(__m128i __A) {
45 return (__m128i)vec_abs((__v4si)__A);
46}
47
48extern __inline __m128i
49 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
50 _mm_abs_epi8(__m128i __A) {
51 return (__m128i)vec_abs((__v16qi)__A);
52}
53
54extern __inline __m64
55 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
56 _mm_abs_pi16(__m64 __A) {
57 __v8hi __B = (__v8hi)(__v2du){__A, __A};
58 return (__m64)((__v2du)vec_abs(__B))[0];
59}
60
61extern __inline __m64
62 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63 _mm_abs_pi32(__m64 __A) {
64 __v4si __B = (__v4si)(__v2du){__A, __A};
65 return (__m64)((__v2du)vec_abs(__B))[0];
66}
67
68extern __inline __m64
69 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
70 _mm_abs_pi8(__m64 __A) {
71 __v16qi __B = (__v16qi)(__v2du){__A, __A};
72 return (__m64)((__v2du)vec_abs(__B))[0];
73}
74
75extern __inline __m128i
76 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) {
78 if (__builtin_constant_p(__count) && __count < 16) {
79#ifdef __LITTLE_ENDIAN__
80 __A = (__m128i)vec_reve((__v16qu)__A);
81 __B = (__m128i)vec_reve((__v16qu)__B);
82#endif
83 __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count);
84#ifdef __LITTLE_ENDIAN__
85 __A = (__m128i)vec_reve((__v16qu)__A);
86#endif
87 return __A;
88 }
89
90 if (__count == 0)
91 return __B;
92
93 if (__count >= 16) {
94 if (__count >= 32) {
95 const __v16qu __zero = {0};
96 return (__m128i)__zero;
97 } else {
98 const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8));
99#ifdef __LITTLE_ENDIAN__
100 return (__m128i)vec_sro((__v16qu)__A, __shift);
101#else
102 return (__m128i)vec_slo((__v16qu)__A, __shift);
103#endif
104 }
105 } else {
106 const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8));
107 const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8));
108#ifdef __LITTLE_ENDIAN__
109 __A = (__m128i)vec_slo((__v16qu)__A, __shiftA);
110 __B = (__m128i)vec_sro((__v16qu)__B, __shiftB);
111#else
112 __A = (__m128i)vec_sro((__v16qu)__A, __shiftA);
113 __B = (__m128i)vec_slo((__v16qu)__B, __shiftB);
114#endif
115 return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B);
116 }
117}
118
119extern __inline __m64
120 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
121 _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) {
122 if (__count < 16) {
123 __v2du __C = {__B, __A};
124#ifdef __LITTLE_ENDIAN__
125 const __v4su __shift = {__count << 3, 0, 0, 0};
126 __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift);
127#else
128 const __v4su __shift = {0, 0, 0, __count << 3};
129 __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift);
130#endif
131 return (__m64)__C[0];
132 } else {
133 const __m64 __zero = {0};
134 return __zero;
135 }
136}
137
138extern __inline __m128i
139 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
140 _mm_hadd_epi16(__m128i __A, __m128i __B) {
141 const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
142 16, 17, 20, 21, 24, 25, 28, 29};
143 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
144 18, 19, 22, 23, 26, 27, 30, 31};
145 __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
146 __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
147 return (__m128i)vec_add(__C, __D);
148}
149
150extern __inline __m128i
151 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
152 _mm_hadd_epi32(__m128i __A, __m128i __B) {
153 const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11,
154 16, 17, 18, 19, 24, 25, 26, 27};
155 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
156 20, 21, 22, 23, 28, 29, 30, 31};
157 __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
158 __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
159 return (__m128i)vec_add(__C, __D);
160}
161
162extern __inline __m64
163 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
164 _mm_hadd_pi16(__m64 __A, __m64 __B) {
165 __v8hi __C = (__v8hi)(__v2du){__A, __B};
166 const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
167 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
168 __v8hi __D = vec_perm(__C, __C, __Q);
169 __C = vec_perm(__C, __C, __P);
170 __C = vec_add(__C, __D);
171 return (__m64)((__v2du)__C)[1];
172}
173
174extern __inline __m64
175 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
176 _mm_hadd_pi32(__m64 __A, __m64 __B) {
177 __v4si __C = (__v4si)(__v2du){__A, __B};
178 const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
179 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
180 __v4si __D = vec_perm(__C, __C, __Q);
181 __C = vec_perm(__C, __C, __P);
182 __C = vec_add(__C, __D);
183 return (__m64)((__v2du)__C)[1];
184}
185
186extern __inline __m128i
187 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
188 _mm_hadds_epi16(__m128i __A, __m128i __B) {
189 __v4si __C = {0}, __D = {0};
190 __C = vec_sum4s((__v8hi)__A, __C);
191 __D = vec_sum4s((__v8hi)__B, __D);
192 __C = (__v4si)vec_packs(__C, __D);
193 return (__m128i)__C;
194}
195
196extern __inline __m64
197 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
198 _mm_hadds_pi16(__m64 __A, __m64 __B) {
199 const __v4si __zero = {0};
200 __v8hi __C = (__v8hi)(__v2du){__A, __B};
201 __v4si __D = vec_sum4s(__C, __zero);
202 __C = vec_packs(__D, __D);
203 return (__m64)((__v2du)__C)[1];
204}
205
206extern __inline __m128i
207 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
208 _mm_hsub_epi16(__m128i __A, __m128i __B) {
209 const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
210 16, 17, 20, 21, 24, 25, 28, 29};
211 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
212 18, 19, 22, 23, 26, 27, 30, 31};
213 __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
214 __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
215 return (__m128i)vec_sub(__C, __D);
216}
217
218extern __inline __m128i
219 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
220 _mm_hsub_epi32(__m128i __A, __m128i __B) {
221 const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11,
222 16, 17, 18, 19, 24, 25, 26, 27};
223 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15,
224 20, 21, 22, 23, 28, 29, 30, 31};
225 __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);
226 __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);
227 return (__m128i)vec_sub(__C, __D);
228}
229
230extern __inline __m64
231 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
232 _mm_hsub_pi16(__m64 __A, __m64 __B) {
233 const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
234 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
235 __v8hi __C = (__v8hi)(__v2du){__A, __B};
236 __v8hi __D = vec_perm(__C, __C, __Q);
237 __C = vec_perm(__C, __C, __P);
238 __C = vec_sub(__C, __D);
239 return (__m64)((__v2du)__C)[1];
240}
241
242extern __inline __m64
243 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
244 _mm_hsub_pi32(__m64 __A, __m64 __B) {
245 const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};
246 const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};
247 __v4si __C = (__v4si)(__v2du){__A, __B};
248 __v4si __D = vec_perm(__C, __C, __Q);
249 __C = vec_perm(__C, __C, __P);
250 __C = vec_sub(__C, __D);
251 return (__m64)((__v2du)__C)[1];
252}
253
254extern __inline __m128i
255 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
256 _mm_hsubs_epi16(__m128i __A, __m128i __B) {
257 const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13,
258 16, 17, 20, 21, 24, 25, 28, 29};
259 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15,
260 18, 19, 22, 23, 26, 27, 30, 31};
261 __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);
262 __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);
263 return (__m128i)vec_subs(__C, __D);
264}
265
266extern __inline __m64
267 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
268 _mm_hsubs_pi16(__m64 __A, __m64 __B) {
269 const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
270 const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};
271 __v8hi __C = (__v8hi)(__v2du){__A, __B};
272 __v8hi __D = vec_perm(__C, __C, __P);
273 __v8hi __E = vec_perm(__C, __C, __Q);
274 __C = vec_subs(__D, __E);
275 return (__m64)((__v2du)__C)[1];
276}
277
278extern __inline __m128i
279 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
280 _mm_shuffle_epi8(__m128i __A, __m128i __B) {
281 const __v16qi __zero = {0};
282 __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero);
283 __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);
284 return (__m128i)vec_sel(__C, __zero, __select);
285}
286
287extern __inline __m64
288 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
289 _mm_shuffle_pi8(__m64 __A, __m64 __B) {
290 const __v16qi __zero = {0};
291 __v16qi __C = (__v16qi)(__v2du){__A, __A};
292 __v16qi __D = (__v16qi)(__v2du){__B, __B};
293 __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero);
294 __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D);
295 __C = vec_sel(__C, __zero, __select);
296 return (__m64)((__v2du)(__C))[0];
297}
298
299#ifdef _ARCH_PWR8
300extern __inline __m128i
301 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
302 _mm_sign_epi8(__m128i __A, __m128i __B) {
303 const __v16qi __zero = {0};
304 __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero);
305 __v16qi __selectpos =
306 (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero));
307 __v16qi __conv = vec_add(__selectneg, __selectpos);
308 return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv);
309}
310#endif
311
312#ifdef _ARCH_PWR8
313extern __inline __m128i
314 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
315 _mm_sign_epi16(__m128i __A, __m128i __B) {
316 const __v8hi __zero = {0};
317 __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero);
318 __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero));
319 __v8hi __conv = vec_add(__selectneg, __selectpos);
320 return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv);
321}
322#endif
323
324#ifdef _ARCH_PWR8
325extern __inline __m128i
326 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
327 _mm_sign_epi32(__m128i __A, __m128i __B) {
328 const __v4si __zero = {0};
329 __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero);
330 __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero));
331 __v4si __conv = vec_add(__selectneg, __selectpos);
332 return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv);
333}
334#endif
335
336#ifdef _ARCH_PWR8
337extern __inline __m64
338 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
339 _mm_sign_pi8(__m64 __A, __m64 __B) {
340 const __v16qi __zero = {0};
341 __v16qi __C = (__v16qi)(__v2du){__A, __A};
342 __v16qi __D = (__v16qi)(__v2du){__B, __B};
343 __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D);
344 return (__m64)((__v2du)(__C))[0];
345}
346#endif
347
348#ifdef _ARCH_PWR8
349extern __inline __m64
350 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
351 _mm_sign_pi16(__m64 __A, __m64 __B) {
352 const __v8hi __zero = {0};
353 __v8hi __C = (__v8hi)(__v2du){__A, __A};
354 __v8hi __D = (__v8hi)(__v2du){__B, __B};
355 __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D);
356 return (__m64)((__v2du)(__C))[0];
357}
358#endif
359
360#ifdef _ARCH_PWR8
361extern __inline __m64
362 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
363 _mm_sign_pi32(__m64 __A, __m64 __B) {
364 const __v4si __zero = {0};
365 __v4si __C = (__v4si)(__v2du){__A, __A};
366 __v4si __D = (__v4si)(__v2du){__B, __B};
367 __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D);
368 return (__m64)((__v2du)(__C))[0];
369}
370#endif
371
372extern __inline __m128i
373 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
374 _mm_maddubs_epi16(__m128i __A, __m128i __B) {
375 __v8hi __unsigned = vec_splats((signed short)0x00ff);
376 __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned);
377 __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned);
378 __v8hi __E = vec_unpackh((__v16qi)__B);
379 __v8hi __F = vec_unpackl((__v16qi)__B);
380 __C = vec_mul(__C, __E);
381 __D = vec_mul(__D, __F);
382 const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
383 16, 17, 20, 21, 24, 25, 28, 29};
384 const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
385 18, 19, 22, 23, 26, 27, 30, 31};
386 __E = vec_perm(__C, __D, __odds);
387 __F = vec_perm(__C, __D, __evens);
388 return (__m128i)vec_adds(__E, __F);
389}
390
391extern __inline __m64
392 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
393 _mm_maddubs_pi16(__m64 __A, __m64 __B) {
394 __v8hi __C = (__v8hi)(__v2du){__A, __A};
395 __C = vec_unpackl((__v16qi)__C);
396 const __v8hi __unsigned = vec_splats((signed short)0x00ff);
397 __C = vec_and(__C, __unsigned);
398 __v8hi __D = (__v8hi)(__v2du){__B, __B};
399 __D = vec_unpackl((__v16qi)__D);
400 __D = vec_mul(__C, __D);
401 const __v16qu __odds = {0, 1, 4, 5, 8, 9, 12, 13,
402 16, 17, 20, 21, 24, 25, 28, 29};
403 const __v16qu __evens = {2, 3, 6, 7, 10, 11, 14, 15,
404 18, 19, 22, 23, 26, 27, 30, 31};
405 __C = vec_perm(__D, __D, __odds);
406 __D = vec_perm(__D, __D, __evens);
407 __C = vec_adds(__C, __D);
408 return (__m64)((__v2du)(__C))[0];
409}
410
411extern __inline __m128i
412 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
413 _mm_mulhrs_epi16(__m128i __A, __m128i __B) {
414 __v4si __C = vec_unpackh((__v8hi)__A);
415 __v4si __D = vec_unpackh((__v8hi)__B);
416 __C = vec_mul(__C, __D);
417 __D = vec_unpackl((__v8hi)__A);
418 __v4si __E = vec_unpackl((__v8hi)__B);
419 __D = vec_mul(__D, __E);
420 const __v4su __shift = vec_splats((unsigned int)14);
421 __C = vec_sr(__C, __shift);
422 __D = vec_sr(__D, __shift);
423 const __v4si __ones = vec_splats((signed int)1);
424 __C = vec_add(__C, __ones);
425 __C = vec_sr(__C, (__v4su)__ones);
426 __D = vec_add(__D, __ones);
427 __D = vec_sr(__D, (__v4su)__ones);
428 return (__m128i)vec_pack(__C, __D);
429}
430
431extern __inline __m64
432 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
433 _mm_mulhrs_pi16(__m64 __A, __m64 __B) {
434 __v4si __C = (__v4si)(__v2du){__A, __A};
435 __C = vec_unpackh((__v8hi)__C);
436 __v4si __D = (__v4si)(__v2du){__B, __B};
437 __D = vec_unpackh((__v8hi)__D);
438 __C = vec_mul(__C, __D);
439 const __v4su __shift = vec_splats((unsigned int)14);
440 __C = vec_sr(__C, __shift);
441 const __v4si __ones = vec_splats((signed int)1);
442 __C = vec_add(__C, __ones);
443 __C = vec_sr(__C, (__v4su)__ones);
444 __v8hi __E = vec_pack(__C, __D);
445 return (__m64)((__v2du)(__E))[0];
446}
447
448#else
449#include_next <tmmintrin.h>
450#endif /* defined(__powerpc64__) && \
451 * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
452
453#endif /* TMMINTRIN_H_ */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
Definition: altivec.h:10393
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
Definition: altivec.h:10979
static __ATTRS_o_ai vector bool char vec_reve(vector bool char __a)
Definition: altivec.h:17528
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
Definition: altivec.h:14737
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
Definition: altivec.h:9149
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
Definition: altivec.h:12781
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
Definition: altivec.h:12487
static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b)
Definition: altivec.h:6205
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
Definition: altivec.h:882
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
Definition: altivec.h:12149
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
Definition: altivec.h:626
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition: altivec.h:7962
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition: altivec.h:8588
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
Definition: altivec.h:2435
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
Definition: altivec.h:9884
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
Definition: altivec.h:7389
static vector float __ATTRS_o_ai vec_neg(vector float __a)
Definition: altivec.h:18235
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
Definition: altivec.h:6865
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
Definition: altivec.h:12642
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
Definition: altivec.h:200
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
Definition: altivec.h:117
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
Definition: altivec.h:2131
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
Definition: altivec.h:7715
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
Definition: altivec.h:11869
static __inline__ void short __D
Definition: immintrin.h:468
#define __conv
Definition: opencl-c.h:36
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:694
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:587
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition: tmmintrin.h:234
#define _mm_alignr_epi8(a, b, n)
Concatenates the two 128-bit integer vector operands, and right-shifts the result by the number of by...
Definition: tmmintrin.h:164
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:85
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:49
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:428
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:546
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition: tmmintrin.h:481
#define _mm_alignr_pi8(a, b, n)
Concatenates the two 64-bit integer vector operands, and right-shifts the result by the number of byt...
Definition: tmmintrin.h:187
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:381
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:67
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition: tmmintrin.h:281
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:773
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:567
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:103
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:746
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:516
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:800
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:668
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:358
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition: tmmintrin.h:455
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:257
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:121
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:404
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition: tmmintrin.h:308
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:720
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:211
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:614
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition: tmmintrin.h:334
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:139
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:639