clang 20.0.0git
tmmintrin.h
Go to the documentation of this file.
1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __TMMINTRIN_H
11#define __TMMINTRIN_H
12
13#if !defined(__i386__) && !defined(__x86_64__)
14#error "This header is only meant to be used on x86 and x64 architecture"
15#endif
16
17#include <pmmintrin.h>
18
19/* Define the default attributes for the functions in this file. */
20#if defined(__EVEX512__) && !defined(__AVX10_1_512__)
21#define __DEFAULT_FN_ATTRS \
22 __attribute__((__always_inline__, __nodebug__, \
23 __target__("ssse3,no-evex512"), __min_vector_width__(128)))
24#else
25#define __DEFAULT_FN_ATTRS \
26 __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), \
27 __min_vector_width__(128)))
28#endif
29
30#define __trunc64(x) \
31 (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
32#define __anyext128(x) \
33 (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
34 1, -1, -1)
35
36/// Computes the absolute value of each of the packed 8-bit signed
37/// integers in the source operand and stores the 8-bit unsigned integer
38/// results in the destination.
39///
40/// \headerfile <x86intrin.h>
41///
42/// This intrinsic corresponds to the \c PABSB instruction.
43///
44/// \param __a
45/// A 64-bit vector of [8 x i8].
46/// \returns A 64-bit integer vector containing the absolute values of the
47/// elements in the operand.
48static __inline__ __m64 __DEFAULT_FN_ATTRS
50{
51 return (__m64)__builtin_elementwise_abs((__v8qs)__a);
52}
53
54/// Computes the absolute value of each of the packed 8-bit signed
55/// integers in the source operand and stores the 8-bit unsigned integer
56/// results in the destination.
57///
58/// \headerfile <x86intrin.h>
59///
60/// This intrinsic corresponds to the \c VPABSB instruction.
61///
62/// \param __a
63/// A 128-bit vector of [16 x i8].
64/// \returns A 128-bit integer vector containing the absolute values of the
65/// elements in the operand.
66static __inline__ __m128i __DEFAULT_FN_ATTRS
68{
69 return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
70}
71
72/// Computes the absolute value of each of the packed 16-bit signed
73/// integers in the source operand and stores the 16-bit unsigned integer
74/// results in the destination.
75///
76/// \headerfile <x86intrin.h>
77///
78/// This intrinsic corresponds to the \c PABSW instruction.
79///
80/// \param __a
81/// A 64-bit vector of [4 x i16].
82/// \returns A 64-bit integer vector containing the absolute values of the
83/// elements in the operand.
84static __inline__ __m64 __DEFAULT_FN_ATTRS
86{
87 return (__m64)__builtin_elementwise_abs((__v4hi)__a);
88}
89
90/// Computes the absolute value of each of the packed 16-bit signed
91/// integers in the source operand and stores the 16-bit unsigned integer
92/// results in the destination.
93///
94/// \headerfile <x86intrin.h>
95///
96/// This intrinsic corresponds to the \c VPABSW instruction.
97///
98/// \param __a
99/// A 128-bit vector of [8 x i16].
100/// \returns A 128-bit integer vector containing the absolute values of the
101/// elements in the operand.
102static __inline__ __m128i __DEFAULT_FN_ATTRS
104{
105 return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
106}
107
108/// Computes the absolute value of each of the packed 32-bit signed
109/// integers in the source operand and stores the 32-bit unsigned integer
110/// results in the destination.
111///
112/// \headerfile <x86intrin.h>
113///
114/// This intrinsic corresponds to the \c PABSD instruction.
115///
116/// \param __a
117/// A 64-bit vector of [2 x i32].
118/// \returns A 64-bit integer vector containing the absolute values of the
119/// elements in the operand.
120static __inline__ __m64 __DEFAULT_FN_ATTRS
122{
123 return (__m64)__builtin_elementwise_abs((__v2si)__a);
124}
125
126/// Computes the absolute value of each of the packed 32-bit signed
127/// integers in the source operand and stores the 32-bit unsigned integer
128/// results in the destination.
129///
130/// \headerfile <x86intrin.h>
131///
132/// This intrinsic corresponds to the \c VPABSD instruction.
133///
134/// \param __a
135/// A 128-bit vector of [4 x i32].
136/// \returns A 128-bit integer vector containing the absolute values of the
137/// elements in the operand.
138static __inline__ __m128i __DEFAULT_FN_ATTRS
140{
141 return (__m128i)__builtin_elementwise_abs((__v4si)__a);
142}
143
144/// Concatenates the two 128-bit integer vector operands, and
145/// right-shifts the result by the number of bytes specified in the immediate
146/// operand.
147///
148/// \headerfile <x86intrin.h>
149///
150/// \code
151/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
152/// \endcode
153///
154/// This intrinsic corresponds to the \c PALIGNR instruction.
155///
156/// \param a
157/// A 128-bit vector of [16 x i8] containing one of the source operands.
158/// \param b
159/// A 128-bit vector of [16 x i8] containing one of the source operands.
160/// \param n
161/// An immediate operand specifying how many bytes to right-shift the result.
162/// \returns A 128-bit integer vector containing the concatenated right-shifted
163/// value.
164#define _mm_alignr_epi8(a, b, n) \
165 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
166 (__v16qi)(__m128i)(b), (n)))
167
168/// Concatenates the two 64-bit integer vector operands, and right-shifts
169/// the result by the number of bytes specified in the immediate operand.
170///
171/// \headerfile <x86intrin.h>
172///
173/// \code
174/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
175/// \endcode
176///
177/// This intrinsic corresponds to the \c PALIGNR instruction.
178///
179/// \param a
180/// A 64-bit vector of [8 x i8] containing one of the source operands.
181/// \param b
182/// A 64-bit vector of [8 x i8] containing one of the source operands.
183/// \param n
184/// An immediate operand specifying how many bytes to right-shift the result.
185/// \returns A 64-bit integer vector containing the concatenated right-shifted
186/// value.
187#define _mm_alignr_pi8(a, b, n) \
188 ((__m64)__builtin_shufflevector( \
189 __builtin_ia32_psrldqi128_byteshift( \
190 __builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0), \
191 (n)), __extension__ (__v2di){}, 0))
192
193/// Horizontally adds the adjacent pairs of values contained in 2 packed
194/// 128-bit vectors of [8 x i16].
195///
196/// \headerfile <x86intrin.h>
197///
198/// This intrinsic corresponds to the \c VPHADDW instruction.
199///
200/// \param __a
201/// A 128-bit vector of [8 x i16] containing one of the source operands. The
202/// horizontal sums of the values are stored in the lower bits of the
203/// destination.
204/// \param __b
205/// A 128-bit vector of [8 x i16] containing one of the source operands. The
206/// horizontal sums of the values are stored in the upper bits of the
207/// destination.
208/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
209/// both operands.
210static __inline__ __m128i __DEFAULT_FN_ATTRS
211_mm_hadd_epi16(__m128i __a, __m128i __b)
212{
213 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
214}
215
216/// Horizontally adds the adjacent pairs of values contained in 2 packed
217/// 128-bit vectors of [4 x i32].
218///
219/// \headerfile <x86intrin.h>
220///
221/// This intrinsic corresponds to the \c VPHADDD instruction.
222///
223/// \param __a
224/// A 128-bit vector of [4 x i32] containing one of the source operands. The
225/// horizontal sums of the values are stored in the lower bits of the
226/// destination.
227/// \param __b
228/// A 128-bit vector of [4 x i32] containing one of the source operands. The
229/// horizontal sums of the values are stored in the upper bits of the
230/// destination.
231/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
232/// both operands.
233static __inline__ __m128i __DEFAULT_FN_ATTRS
234_mm_hadd_epi32(__m128i __a, __m128i __b)
235{
236 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
237}
238
239/// Horizontally adds the adjacent pairs of values contained in 2 packed
240/// 64-bit vectors of [4 x i16].
241///
242/// \headerfile <x86intrin.h>
243///
244/// This intrinsic corresponds to the \c PHADDW instruction.
245///
246/// \param __a
247/// A 64-bit vector of [4 x i16] containing one of the source operands. The
248/// horizontal sums of the values are stored in the lower bits of the
249/// destination.
250/// \param __b
251/// A 64-bit vector of [4 x i16] containing one of the source operands. The
252/// horizontal sums of the values are stored in the upper bits of the
253/// destination.
254/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
255/// operands.
256static __inline__ __m64 __DEFAULT_FN_ATTRS
257_mm_hadd_pi16(__m64 __a, __m64 __b)
258{
259 return __trunc64(__builtin_ia32_phaddw128(
260 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
261}
262
263/// Horizontally adds the adjacent pairs of values contained in 2 packed
264/// 64-bit vectors of [2 x i32].
265///
266/// \headerfile <x86intrin.h>
267///
268/// This intrinsic corresponds to the \c PHADDD instruction.
269///
270/// \param __a
271/// A 64-bit vector of [2 x i32] containing one of the source operands. The
272/// horizontal sums of the values are stored in the lower bits of the
273/// destination.
274/// \param __b
275/// A 64-bit vector of [2 x i32] containing one of the source operands. The
276/// horizontal sums of the values are stored in the upper bits of the
277/// destination.
278/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
279/// operands.
280static __inline__ __m64 __DEFAULT_FN_ATTRS
281_mm_hadd_pi32(__m64 __a, __m64 __b)
282{
283 return __trunc64(__builtin_ia32_phaddd128(
284 (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
285}
286
287/// Horizontally adds, with saturation, the adjacent pairs of values contained
288/// in two packed 128-bit vectors of [8 x i16].
289///
290/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
291/// less than 0x8000 are saturated to 0x8000.
292///
293/// \headerfile <x86intrin.h>
294///
295/// This intrinsic corresponds to the \c VPHADDSW instruction.
296///
297/// \param __a
298/// A 128-bit vector of [8 x i16] containing one of the source operands. The
299/// horizontal sums of the values are stored in the lower bits of the
300/// destination.
301/// \param __b
302/// A 128-bit vector of [8 x i16] containing one of the source operands. The
303/// horizontal sums of the values are stored in the upper bits of the
304/// destination.
305/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
306/// sums of both operands.
307static __inline__ __m128i __DEFAULT_FN_ATTRS
308_mm_hadds_epi16(__m128i __a, __m128i __b)
309{
310 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
311}
312
313/// Horizontally adds, with saturation, the adjacent pairs of values contained
314/// in two packed 64-bit vectors of [4 x i16].
315///
316/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
317/// less than 0x8000 are saturated to 0x8000.
318///
319/// \headerfile <x86intrin.h>
320///
321/// This intrinsic corresponds to the \c PHADDSW instruction.
322///
323/// \param __a
324/// A 64-bit vector of [4 x i16] containing one of the source operands. The
325/// horizontal sums of the values are stored in the lower bits of the
326/// destination.
327/// \param __b
328/// A 64-bit vector of [4 x i16] containing one of the source operands. The
329/// horizontal sums of the values are stored in the upper bits of the
330/// destination.
331/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
332/// sums of both operands.
333static __inline__ __m64 __DEFAULT_FN_ATTRS
334_mm_hadds_pi16(__m64 __a, __m64 __b)
335{
336 return __trunc64(__builtin_ia32_phaddsw128(
337 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
338}
339
340/// Horizontally subtracts the adjacent pairs of values contained in 2
341/// packed 128-bit vectors of [8 x i16].
342///
343/// \headerfile <x86intrin.h>
344///
345/// This intrinsic corresponds to the \c VPHSUBW instruction.
346///
347/// \param __a
348/// A 128-bit vector of [8 x i16] containing one of the source operands. The
349/// horizontal differences between the values are stored in the lower bits of
350/// the destination.
351/// \param __b
352/// A 128-bit vector of [8 x i16] containing one of the source operands. The
353/// horizontal differences between the values are stored in the upper bits of
354/// the destination.
355/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
356/// of both operands.
357static __inline__ __m128i __DEFAULT_FN_ATTRS
358_mm_hsub_epi16(__m128i __a, __m128i __b)
359{
360 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
361}
362
363/// Horizontally subtracts the adjacent pairs of values contained in 2
364/// packed 128-bit vectors of [4 x i32].
365///
366/// \headerfile <x86intrin.h>
367///
368/// This intrinsic corresponds to the \c VPHSUBD instruction.
369///
370/// \param __a
371/// A 128-bit vector of [4 x i32] containing one of the source operands. The
372/// horizontal differences between the values are stored in the lower bits of
373/// the destination.
374/// \param __b
375/// A 128-bit vector of [4 x i32] containing one of the source operands. The
376/// horizontal differences between the values are stored in the upper bits of
377/// the destination.
378/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
379/// of both operands.
380static __inline__ __m128i __DEFAULT_FN_ATTRS
381_mm_hsub_epi32(__m128i __a, __m128i __b)
382{
383 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
384}
385
386/// Horizontally subtracts the adjacent pairs of values contained in 2
387/// packed 64-bit vectors of [4 x i16].
388///
389/// \headerfile <x86intrin.h>
390///
391/// This intrinsic corresponds to the \c PHSUBW instruction.
392///
393/// \param __a
394/// A 64-bit vector of [4 x i16] containing one of the source operands. The
395/// horizontal differences between the values are stored in the lower bits of
396/// the destination.
397/// \param __b
398/// A 64-bit vector of [4 x i16] containing one of the source operands. The
399/// horizontal differences between the values are stored in the upper bits of
400/// the destination.
401/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
402/// of both operands.
403static __inline__ __m64 __DEFAULT_FN_ATTRS
404_mm_hsub_pi16(__m64 __a, __m64 __b)
405{
406 return __trunc64(__builtin_ia32_phsubw128(
407 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
408}
409
410/// Horizontally subtracts the adjacent pairs of values contained in 2
411/// packed 64-bit vectors of [2 x i32].
412///
413/// \headerfile <x86intrin.h>
414///
415/// This intrinsic corresponds to the \c PHSUBD instruction.
416///
417/// \param __a
418/// A 64-bit vector of [2 x i32] containing one of the source operands. The
419/// horizontal differences between the values are stored in the lower bits of
420/// the destination.
421/// \param __b
422/// A 64-bit vector of [2 x i32] containing one of the source operands. The
423/// horizontal differences between the values are stored in the upper bits of
424/// the destination.
425/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
426/// of both operands.
427static __inline__ __m64 __DEFAULT_FN_ATTRS
428_mm_hsub_pi32(__m64 __a, __m64 __b)
429{
430 return __trunc64(__builtin_ia32_phsubd128(
431 (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){}));
432}
433
434/// Horizontally subtracts, with saturation, the adjacent pairs of values
435/// contained in two packed 128-bit vectors of [8 x i16].
436///
437/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
438/// Negative differences less than 0x8000 are saturated to 0x8000.
439///
440/// \headerfile <x86intrin.h>
441///
442/// This intrinsic corresponds to the \c VPHSUBSW instruction.
443///
444/// \param __a
445/// A 128-bit vector of [8 x i16] containing one of the source operands. The
446/// horizontal differences between the values are stored in the lower bits of
447/// the destination.
448/// \param __b
449/// A 128-bit vector of [8 x i16] containing one of the source operands. The
450/// horizontal differences between the values are stored in the upper bits of
451/// the destination.
452/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
453/// differences of both operands.
454static __inline__ __m128i __DEFAULT_FN_ATTRS
455_mm_hsubs_epi16(__m128i __a, __m128i __b)
456{
457 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
458}
459
460/// Horizontally subtracts, with saturation, the adjacent pairs of values
461/// contained in two packed 64-bit vectors of [4 x i16].
462///
463/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
464/// Negative differences less than 0x8000 are saturated to 0x8000.
465///
466/// \headerfile <x86intrin.h>
467///
468/// This intrinsic corresponds to the \c PHSUBSW instruction.
469///
470/// \param __a
471/// A 64-bit vector of [4 x i16] containing one of the source operands. The
472/// horizontal differences between the values are stored in the lower bits of
473/// the destination.
474/// \param __b
475/// A 64-bit vector of [4 x i16] containing one of the source operands. The
476/// horizontal differences between the values are stored in the upper bits of
477/// the destination.
478/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
479/// differences of both operands.
480static __inline__ __m64 __DEFAULT_FN_ATTRS
481_mm_hsubs_pi16(__m64 __a, __m64 __b)
482{
483 return __trunc64(__builtin_ia32_phsubsw128(
484 (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){}));
485}
486
487/// Multiplies corresponding pairs of packed 8-bit unsigned integer
488/// values contained in the first source operand and packed 8-bit signed
489/// integer values contained in the second source operand, adds pairs of
490/// contiguous products with signed saturation, and writes the 16-bit sums to
491/// the corresponding bits in the destination.
492///
493/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
494/// both operands are multiplied, and the sum of both results is written to
495/// bits [15:0] of the destination.
496///
497/// \headerfile <x86intrin.h>
498///
499/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
500///
501/// \param __a
502/// A 128-bit integer vector containing the first source operand.
503/// \param __b
504/// A 128-bit integer vector containing the second source operand.
505/// \returns A 128-bit integer vector containing the sums of products of both
506/// operands: \n
507/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
508/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
509/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
510/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
511/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
512/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
513/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
514/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
515static __inline__ __m128i __DEFAULT_FN_ATTRS
516_mm_maddubs_epi16(__m128i __a, __m128i __b)
517{
518 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
519}
520
521/// Multiplies corresponding pairs of packed 8-bit unsigned integer
522/// values contained in the first source operand and packed 8-bit signed
523/// integer values contained in the second source operand, adds pairs of
524/// contiguous products with signed saturation, and writes the 16-bit sums to
525/// the corresponding bits in the destination.
526///
527/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
528/// both operands are multiplied, and the sum of both results is written to
529/// bits [15:0] of the destination.
530///
531/// \headerfile <x86intrin.h>
532///
533/// This intrinsic corresponds to the \c PMADDUBSW instruction.
534///
535/// \param __a
536/// A 64-bit integer vector containing the first source operand.
537/// \param __b
538/// A 64-bit integer vector containing the second source operand.
539/// \returns A 64-bit integer vector containing the sums of products of both
540/// operands: \n
541/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
542/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
543/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
544/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
545static __inline__ __m64 __DEFAULT_FN_ATTRS
547{
548 return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a),
549 (__v16qi)__anyext128(__b)));
550}
551
552/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
553/// products to the 18 most significant bits by right-shifting, rounds the
554/// truncated value by adding 1, and writes bits [16:1] to the destination.
555///
556/// \headerfile <x86intrin.h>
557///
558/// This intrinsic corresponds to the \c VPMULHRSW instruction.
559///
560/// \param __a
561/// A 128-bit vector of [8 x i16] containing one of the source operands.
562/// \param __b
563/// A 128-bit vector of [8 x i16] containing one of the source operands.
564/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
565/// products of both operands.
566static __inline__ __m128i __DEFAULT_FN_ATTRS
567_mm_mulhrs_epi16(__m128i __a, __m128i __b)
568{
569 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
570}
571
572/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
573/// products to the 18 most significant bits by right-shifting, rounds the
574/// truncated value by adding 1, and writes bits [16:1] to the destination.
575///
576/// \headerfile <x86intrin.h>
577///
578/// This intrinsic corresponds to the \c PMULHRSW instruction.
579///
580/// \param __a
581/// A 64-bit vector of [4 x i16] containing one of the source operands.
582/// \param __b
583/// A 64-bit vector of [4 x i16] containing one of the source operands.
584/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
585/// products of both operands.
586static __inline__ __m64 __DEFAULT_FN_ATTRS
588{
589 return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a),
590 (__v8hi)__anyext128(__b)));
591}
592
593/// Copies the 8-bit integers from a 128-bit integer vector to the
594/// destination or clears 8-bit values in the destination, as specified by
595/// the second source operand.
596///
597/// \headerfile <x86intrin.h>
598///
599/// This intrinsic corresponds to the \c VPSHUFB instruction.
600///
601/// \param __a
602/// A 128-bit integer vector containing the values to be copied.
603/// \param __b
604/// A 128-bit integer vector containing control bytes corresponding to
605/// positions in the destination:
606/// Bit 7: \n
607/// 1: Clear the corresponding byte in the destination. \n
608/// 0: Copy the selected source byte to the corresponding byte in the
609/// destination. \n
610/// Bits [6:4] Reserved. \n
611/// Bits [3:0] select the source byte to be copied.
612/// \returns A 128-bit integer vector containing the copied or cleared values.
613static __inline__ __m128i __DEFAULT_FN_ATTRS
614_mm_shuffle_epi8(__m128i __a, __m128i __b)
615{
616 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
617}
618
619/// Copies the 8-bit integers from a 64-bit integer vector to the
620/// destination or clears 8-bit values in the destination, as specified by
621/// the second source operand.
622///
623/// \headerfile <x86intrin.h>
624///
625/// This intrinsic corresponds to the \c PSHUFB instruction.
626///
627/// \param __a
628/// A 64-bit integer vector containing the values to be copied.
629/// \param __b
630/// A 64-bit integer vector containing control bytes corresponding to
631/// positions in the destination:
632/// Bit 7: \n
633/// 1: Clear the corresponding byte in the destination. \n
634/// 0: Copy the selected source byte to the corresponding byte in the
635/// destination. \n
636/// Bits [2:0] select the source byte to be copied.
637/// \returns A 64-bit integer vector containing the copied or cleared values.
638static __inline__ __m64 __DEFAULT_FN_ATTRS
640{
641 return __trunc64(__builtin_ia32_pshufb128(
642 (__v16qi)__builtin_shufflevector(
643 (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1),
644 (__v16qi)__anyext128(__b)));
645}
646
647/// For each 8-bit integer in the first source operand, perform one of
648/// the following actions as specified by the second source operand.
649///
650/// If the byte in the second source is negative, calculate the two's
651/// complement of the corresponding byte in the first source, and write that
652/// value to the destination. If the byte in the second source is positive,
653/// copy the corresponding byte from the first source to the destination. If
654/// the byte in the second source is zero, clear the corresponding byte in
655/// the destination.
656///
657/// \headerfile <x86intrin.h>
658///
659/// This intrinsic corresponds to the \c VPSIGNB instruction.
660///
661/// \param __a
662/// A 128-bit integer vector containing the values to be copied.
663/// \param __b
664/// A 128-bit integer vector containing control bytes corresponding to
665/// positions in the destination.
666/// \returns A 128-bit integer vector containing the resultant values.
667static __inline__ __m128i __DEFAULT_FN_ATTRS
668_mm_sign_epi8(__m128i __a, __m128i __b)
669{
670 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
671}
672
673/// For each 16-bit integer in the first source operand, perform one of
674/// the following actions as specified by the second source operand.
675///
676/// If the word in the second source is negative, calculate the two's
677/// complement of the corresponding word in the first source, and write that
678/// value to the destination. If the word in the second source is positive,
679/// copy the corresponding word from the first source to the destination. If
680/// the word in the second source is zero, clear the corresponding word in
681/// the destination.
682///
683/// \headerfile <x86intrin.h>
684///
685/// This intrinsic corresponds to the \c VPSIGNW instruction.
686///
687/// \param __a
688/// A 128-bit integer vector containing the values to be copied.
689/// \param __b
690/// A 128-bit integer vector containing control words corresponding to
691/// positions in the destination.
692/// \returns A 128-bit integer vector containing the resultant values.
693static __inline__ __m128i __DEFAULT_FN_ATTRS
694_mm_sign_epi16(__m128i __a, __m128i __b)
695{
696 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
697}
698
699/// For each 32-bit integer in the first source operand, perform one of
700/// the following actions as specified by the second source operand.
701///
702/// If the doubleword in the second source is negative, calculate the two's
703/// complement of the corresponding word in the first source, and write that
704/// value to the destination. If the doubleword in the second source is
705/// positive, copy the corresponding word from the first source to the
706/// destination. If the doubleword in the second source is zero, clear the
707/// corresponding word in the destination.
708///
709/// \headerfile <x86intrin.h>
710///
711/// This intrinsic corresponds to the \c VPSIGND instruction.
712///
713/// \param __a
714/// A 128-bit integer vector containing the values to be copied.
715/// \param __b
716/// A 128-bit integer vector containing control doublewords corresponding to
717/// positions in the destination.
718/// \returns A 128-bit integer vector containing the resultant values.
719static __inline__ __m128i __DEFAULT_FN_ATTRS
720_mm_sign_epi32(__m128i __a, __m128i __b)
721{
722 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
723}
724
725/// For each 8-bit integer in the first source operand, perform one of
726/// the following actions as specified by the second source operand.
727///
728/// If the byte in the second source is negative, calculate the two's
729/// complement of the corresponding byte in the first source, and write that
730/// value to the destination. If the byte in the second source is positive,
731/// copy the corresponding byte from the first source to the destination. If
732/// the byte in the second source is zero, clear the corresponding byte in
733/// the destination.
734///
735/// \headerfile <x86intrin.h>
736///
737/// This intrinsic corresponds to the \c PSIGNB instruction.
738///
739/// \param __a
740/// A 64-bit integer vector containing the values to be copied.
741/// \param __b
742/// A 64-bit integer vector containing control bytes corresponding to
743/// positions in the destination.
744/// \returns A 64-bit integer vector containing the resultant values.
745static __inline__ __m64 __DEFAULT_FN_ATTRS
746_mm_sign_pi8(__m64 __a, __m64 __b)
747{
748 return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a),
749 (__v16qi)__anyext128(__b)));
750}
751
752/// For each 16-bit integer in the first source operand, perform one of
753/// the following actions as specified by the second source operand.
754///
755/// If the word in the second source is negative, calculate the two's
756/// complement of the corresponding word in the first source, and write that
757/// value to the destination. If the word in the second source is positive,
758/// copy the corresponding word from the first source to the destination. If
759/// the word in the second source is zero, clear the corresponding word in
760/// the destination.
761///
762/// \headerfile <x86intrin.h>
763///
764/// This intrinsic corresponds to the \c PSIGNW instruction.
765///
766/// \param __a
767/// A 64-bit integer vector containing the values to be copied.
768/// \param __b
769/// A 64-bit integer vector containing control words corresponding to
770/// positions in the destination.
771/// \returns A 64-bit integer vector containing the resultant values.
772static __inline__ __m64 __DEFAULT_FN_ATTRS
773_mm_sign_pi16(__m64 __a, __m64 __b)
774{
775 return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a),
776 (__v8hi)__anyext128(__b)));
777}
778
779/// For each 32-bit integer in the first source operand, perform one of
780/// the following actions as specified by the second source operand.
781///
782/// If the doubleword in the second source is negative, calculate the two's
783/// complement of the corresponding doubleword in the first source, and
784/// write that value to the destination. If the doubleword in the second
785/// source is positive, copy the corresponding doubleword from the first
786/// source to the destination. If the doubleword in the second source is
787/// zero, clear the corresponding doubleword in the destination.
788///
789/// \headerfile <x86intrin.h>
790///
791/// This intrinsic corresponds to the \c PSIGND instruction.
792///
793/// \param __a
794/// A 64-bit integer vector containing the values to be copied.
795/// \param __b
796/// A 64-bit integer vector containing two control doublewords corresponding
797/// to positions in the destination.
798/// \returns A 64-bit integer vector containing the resultant values.
799static __inline__ __m64 __DEFAULT_FN_ATTRS
800_mm_sign_pi32(__m64 __a, __m64 __b)
801{
802 return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a),
803 (__v4si)__anyext128(__b)));
804}
805
806#undef __anyext128
807#undef __trunc64
808#undef __DEFAULT_FN_ATTRS
809
810#endif /* __TMMINTRIN_H */
static __inline__ vector float vector float __b
Definition: altivec.h:578
static __inline__ void int __a
Definition: emmintrin.h:4064
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:694
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:587
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition: tmmintrin.h:234
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:85
#define __anyext128(x)
Definition: tmmintrin.h:32
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:49
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:428
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:546
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition: tmmintrin.h:481
#define __DEFAULT_FN_ATTRS
Definition: tmmintrin.h:25
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:381
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:67
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition: tmmintrin.h:281
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:773
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:567
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:103
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:746
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:516
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:800
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:668
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:358
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition: tmmintrin.h:455
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:257
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:121
#define __trunc64(x)
Definition: tmmintrin.h:30
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:404
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition: tmmintrin.h:308
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:720
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:211
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:614
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition: tmmintrin.h:334
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:139
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:639