doxygen/ppc__wrappers_2tmmintrin_8h_source.html

/*===---- tmmintrin.h - Implementation of SSSE3 intrinsics on PowerPC ------===

 *

 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 * See https://llvm.org/LICENSE.txt for license information.

 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 *

 *===-----------------------------------------------------------------------===

 */


/* Implemented from the specification included in the Intel C++ Compiler

   User Guide and Reference, version 9.0.  */


#ifndef NO_WARN_X86_INTRINSICS

/* This header is distributed to simplify porting x86_64 code that

   makes explicit use of Intel intrinsics to powerpc64le.


   It is the user's responsibility to determine if the results are

   acceptable and make additional changes as necessary.


   Note that much code that uses Intel intrinsics can be rewritten in

   standard C or GNU C extensions, which are more portable and better

   optimized across multiple targets.  */

#endif


#ifndef TMMINTRIN_H_

#define TMMINTRIN_H_


#if defined(__powerpc64__) &&                                                  \

    (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))


#include <altivec.h>


/* We need definitions from the SSE header files.  */

#include <pmmintrin.h>


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_epi16(__m128i __A) {

  return (__m128i)vec_abs((__v8hi)__A);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_epi32(__m128i __A) {

  return (__m128i)vec_abs((__v4si)__A);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_epi8(__m128i __A) {

  return (__m128i)vec_abs((__v16qi)__A);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_pi16(__m64 __A) {

  __v8hi __B = (__v8hi)(__v2du){__A, __A};

  return (__m64)((__v2du)vec_abs(__B))[0];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_pi32(__m64 __A) {

  __v4si __B = (__v4si)(__v2du){__A, __A};

  return (__m64)((__v2du)vec_abs(__B))[0];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_abs_pi8(__m64 __A) {

  __v16qi __B = (__v16qi)(__v2du){__A, __A};

  return (__m64)((__v2du)vec_abs(__B))[0];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_alignr_epi8(__m128i __A, __m128i __B, const unsigned int __count) {

  if (__builtin_constant_p(__count) && __count < 16) {

#ifdef __LITTLE_ENDIAN__

    __A = (__m128i)vec_reve((__v16qu)__A);

    __B = (__m128i)vec_reve((__v16qu)__B);

#endif

    __A = (__m128i)vec_sld((__v16qu)__B, (__v16qu)__A, __count);

#ifdef __LITTLE_ENDIAN__

    __A = (__m128i)vec_reve((__v16qu)__A);

#endif

    return __A;

  }


  if (__count == 0)

    return __B;


  if (__count >= 16) {

    if (__count >= 32) {

      const __v16qu __zero = {0};

      return (__m128i)__zero;

    } else {

      const __v16qu __shift = vec_splats((unsigned char)((__count - 16) * 8));

#ifdef __LITTLE_ENDIAN__

      return (__m128i)vec_sro((__v16qu)__A, __shift);

#else

      return (__m128i)vec_slo((__v16qu)__A, __shift);

#endif

    }

  } else {

    const __v16qu __shiftA = vec_splats((unsigned char)((16 - __count) * 8));

    const __v16qu __shiftB = vec_splats((unsigned char)(__count * 8));

#ifdef __LITTLE_ENDIAN__

    __A = (__m128i)vec_slo((__v16qu)__A, __shiftA);

    __B = (__m128i)vec_sro((__v16qu)__B, __shiftB);

#else

    __A = (__m128i)vec_sro((__v16qu)__A, __shiftA);

    __B = (__m128i)vec_slo((__v16qu)__B, __shiftB);

#endif

    return (__m128i)vec_or((__v16qu)__A, (__v16qu)__B);

  }

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_alignr_pi8(__m64 __A, __m64 __B, unsigned int __count) {

  if (__count < 16) {

    __v2du __C = {__B, __A};

#ifdef __LITTLE_ENDIAN__

    const __v4su __shift = {__count << 3, 0, 0, 0};

    __C = (__v2du)vec_sro((__v16qu)__C, (__v16qu)__shift);

#else

    const __v4su __shift = {0, 0, 0, __count << 3};

    __C = (__v2du)vec_slo((__v16qu)__C, (__v16qu)__shift);

#endif

    return (__m64)__C[0];

  } else {

    const __m64 __zero = {0};

    return __zero;

  }

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_epi16(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,

                       16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,

                       18, 19, 22, 23, 26, 27, 30, 31};

  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);

  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);

  return (__m128i)vec_add(__C, __D);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_epi32(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,

                       16, 17, 18, 19, 24, 25, 26, 27};

  const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,

                       20, 21, 22, 23, 28, 29, 30, 31};

  __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);

  __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);

  return (__m128i)vec_add(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_pi16(__m64 __A, __m64 __B) {

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};

  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};

  __v8hi __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_add(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadd_pi32(__m64 __A, __m64 __B) {

  __v4si __C = (__v4si)(__v2du){__A, __B};

  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};

  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};

  __v4si __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_add(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadds_epi16(__m128i __A, __m128i __B) {

  __v4si __C = {0}, __D = {0};

  __C = vec_sum4s((__v8hi)__A, __C);

  __D = vec_sum4s((__v8hi)__B, __D);

  __C = (__v4si)vec_packs(__C, __D);

  return (__m128i)__C;

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hadds_pi16(__m64 __A, __m64 __B) {

  const __v4si __zero = {0};

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  __v4si __D = vec_sum4s(__C, __zero);

  __C = vec_packs(__D, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_epi16(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,

                       16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,

                       18, 19, 22, 23, 26, 27, 30, 31};

  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);

  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);

  return (__m128i)vec_sub(__C, __D);

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_epi32(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  2,  3,  8,  9,  10, 11,

                       16, 17, 18, 19, 24, 25, 26, 27};

  const __v16qu __Q = {4,  5,  6,  7,  12, 13, 14, 15,

                       20, 21, 22, 23, 28, 29, 30, 31};

  __v4si __C = vec_perm((__v4si)__A, (__v4si)__B, __P);

  __v4si __D = vec_perm((__v4si)__A, (__v4si)__B, __Q);

  return (__m128i)vec_sub(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_pi16(__m64 __A, __m64 __B) {

  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};

  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  __v8hi __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_sub(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsub_pi32(__m64 __A, __m64 __B) {

  const __v16qu __P = {0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11};

  const __v16qu __Q = {4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15};

  __v4si __C = (__v4si)(__v2du){__A, __B};

  __v4si __D = vec_perm(__C, __C, __Q);

  __C = vec_perm(__C, __C, __P);

  __C = vec_sub(__C, __D);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsubs_epi16(__m128i __A, __m128i __B) {

  const __v16qu __P = {0,  1,  4,  5,  8,  9,  12, 13,

                       16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __Q = {2,  3,  6,  7,  10, 11, 14, 15,

                       18, 19, 22, 23, 26, 27, 30, 31};

  __v8hi __C = vec_perm((__v8hi)__A, (__v8hi)__B, __P);

  __v8hi __D = vec_perm((__v8hi)__A, (__v8hi)__B, __Q);

  return (__m128i)vec_subs(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_hsubs_pi16(__m64 __A, __m64 __B) {

  const __v16qu __P = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};

  const __v16qu __Q = {2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15};

  __v8hi __C = (__v8hi)(__v2du){__A, __B};

  __v8hi __D = vec_perm(__C, __C, __P);

  __v8hi __E = vec_perm(__C, __C, __Q);

  __C = vec_subs(__D, __E);

  return (__m64)((__v2du)__C)[1];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_shuffle_epi8(__m128i __A, __m128i __B) {

  const __v16qi __zero = {0};

  __vector __bool char __select = vec_cmplt((__v16qi)__B, __zero);

  __v16qi __C = vec_perm((__v16qi)__A, (__v16qi)__A, (__v16qu)__B);

  return (__m128i)vec_sel(__C, __zero, __select);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_shuffle_pi8(__m64 __A, __m64 __B) {

  const __v16qi __zero = {0};

  __v16qi __C = (__v16qi)(__v2du){__A, __A};

  __v16qi __D = (__v16qi)(__v2du){__B, __B};

  __vector __bool char __select = vec_cmplt((__v16qi)__D, __zero);

  __C = vec_perm((__v16qi)__C, (__v16qi)__C, (__v16qu)__D);

  __C = vec_sel(__C, __zero, __select);

  return (__m64)((__v2du)(__C))[0];

}


#ifdef _ARCH_PWR8

extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_epi8(__m128i __A, __m128i __B) {

  const __v16qi __zero = {0};

  __v16qi __selectneg = (__v16qi)vec_cmplt((__v16qi)__B, __zero);

  __v16qi __selectpos =

      (__v16qi)vec_neg((__v16qi)vec_cmpgt((__v16qi)__B, __zero));

  __v16qi __conv = vec_add(__selectneg, __selectpos);

  return (__m128i)vec_mul((__v16qi)__A, (__v16qi)__conv);

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_epi16(__m128i __A, __m128i __B) {

  const __v8hi __zero = {0};

  __v8hi __selectneg = (__v8hi)vec_cmplt((__v8hi)__B, __zero);

  __v8hi __selectpos = (__v8hi)vec_neg((__v8hi)vec_cmpgt((__v8hi)__B, __zero));

  __v8hi __conv = vec_add(__selectneg, __selectpos);

  return (__m128i)vec_mul((__v8hi)__A, (__v8hi)__conv);

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_epi32(__m128i __A, __m128i __B) {

  const __v4si __zero = {0};

  __v4si __selectneg = (__v4si)vec_cmplt((__v4si)__B, __zero);

  __v4si __selectpos = (__v4si)vec_neg((__v4si)vec_cmpgt((__v4si)__B, __zero));

  __v4si __conv = vec_add(__selectneg, __selectpos);

  return (__m128i)vec_mul((__v4si)__A, (__v4si)__conv);

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_pi8(__m64 __A, __m64 __B) {

  const __v16qi __zero = {0};

  __v16qi __C = (__v16qi)(__v2du){__A, __A};

  __v16qi __D = (__v16qi)(__v2du){__B, __B};

  __C = (__v16qi)_mm_sign_epi8((__m128i)__C, (__m128i)__D);

  return (__m64)((__v2du)(__C))[0];

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_pi16(__m64 __A, __m64 __B) {

  const __v8hi __zero = {0};

  __v8hi __C = (__v8hi)(__v2du){__A, __A};

  __v8hi __D = (__v8hi)(__v2du){__B, __B};

  __C = (__v8hi)_mm_sign_epi16((__m128i)__C, (__m128i)__D);

  return (__m64)((__v2du)(__C))[0];

}

#endif


#ifdef _ARCH_PWR8

extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_sign_pi32(__m64 __A, __m64 __B) {

  const __v4si __zero = {0};

  __v4si __C = (__v4si)(__v2du){__A, __A};

  __v4si __D = (__v4si)(__v2du){__B, __B};

  __C = (__v4si)_mm_sign_epi32((__m128i)__C, (__m128i)__D);

  return (__m64)((__v2du)(__C))[0];

}

#endif


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_maddubs_epi16(__m128i __A, __m128i __B) {

  __v8hi __unsigned = vec_splats((signed short)0x00ff);

  __v8hi __C = vec_and(vec_unpackh((__v16qi)__A), __unsigned);

  __v8hi __D = vec_and(vec_unpackl((__v16qi)__A), __unsigned);

  __v8hi __E = vec_unpackh((__v16qi)__B);

  __v8hi __F = vec_unpackl((__v16qi)__B);

  __C = vec_mul(__C, __E);

  __D = vec_mul(__D, __F);

  const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,

                          16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,

                           18, 19, 22, 23, 26, 27, 30, 31};

  __E = vec_perm(__C, __D, __odds);

  __F = vec_perm(__C, __D, __evens);

  return (__m128i)vec_adds(__E, __F);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_maddubs_pi16(__m64 __A, __m64 __B) {

  __v8hi __C = (__v8hi)(__v2du){__A, __A};

  __C = vec_unpackl((__v16qi)__C);

  const __v8hi __unsigned = vec_splats((signed short)0x00ff);

  __C = vec_and(__C, __unsigned);

  __v8hi __D = (__v8hi)(__v2du){__B, __B};

  __D = vec_unpackl((__v16qi)__D);

  __D = vec_mul(__C, __D);

  const __v16qu __odds = {0,  1,  4,  5,  8,  9,  12, 13,

                          16, 17, 20, 21, 24, 25, 28, 29};

  const __v16qu __evens = {2,  3,  6,  7,  10, 11, 14, 15,

                           18, 19, 22, 23, 26, 27, 30, 31};

  __C = vec_perm(__D, __D, __odds);

  __D = vec_perm(__D, __D, __evens);

  __C = vec_adds(__C, __D);

  return (__m64)((__v2du)(__C))[0];

}


extern __inline __m128i

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_mulhrs_epi16(__m128i __A, __m128i __B) {

  __v4si __C = vec_unpackh((__v8hi)__A);

  __v4si __D = vec_unpackh((__v8hi)__B);

  __C = vec_mul(__C, __D);

  __D = vec_unpackl((__v8hi)__A);

  __v4si __E = vec_unpackl((__v8hi)__B);

  __D = vec_mul(__D, __E);

  const __v4su __shift = vec_splats((unsigned int)14);

  __C = vec_sr(__C, __shift);

  __D = vec_sr(__D, __shift);

  const __v4si __ones = vec_splats((signed int)1);

  __C = vec_add(__C, __ones);

  __C = vec_sr(__C, (__v4su)__ones);

  __D = vec_add(__D, __ones);

  __D = vec_sr(__D, (__v4su)__ones);

  return (__m128i)vec_pack(__C, __D);

}


extern __inline __m64

    __attribute__((__gnu_inline__, __always_inline__, __artificial__))

    _mm_mulhrs_pi16(__m64 __A, __m64 __B) {

  __v4si __C = (__v4si)(__v2du){__A, __A};

  __C = vec_unpackh((__v8hi)__C);

  __v4si __D = (__v4si)(__v2du){__B, __B};

  __D = vec_unpackh((__v8hi)__D);

  __C = vec_mul(__C, __D);

  const __v4su __shift = vec_splats((unsigned int)14);

  __C = vec_sr(__C, __shift);

  const __v4si __ones = vec_splats((signed int)1);

  __C = vec_add(__C, __ones);

  __C = vec_sr(__C, (__v4su)__ones);

  __v8hi __E = vec_pack(__C, __D);

  return (__m64)((__v2du)(__E))[0];

}


#else

#include_next <tmmintrin.h>

#endif /* defined(__powerpc64__) &&                                            \

        *   (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */


#endif /* TMMINTRIN_H_ */

__attribute__
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
Definition: __clang_hip_libdevice_declares.h:293

altivec.h

vec_sr
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
Definition: altivec.h:10393

vec_sro
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
Definition: altivec.h:10979

vec_reve
static __ATTRS_o_ai vector bool char vec_reve(vector bool char __a)
Definition: altivec.h:17528

vec_splats
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
Definition: altivec.h:14737

vec_sld
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
Definition: altivec.h:9149

vec_unpackl
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
Definition: altivec.h:12781

vec_sum4s
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
Definition: altivec.h:12487

vec_mul
static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b)
Definition: altivec.h:6205

vec_and
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
Definition: altivec.h:882

vec_subs
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
Definition: altivec.h:12149

vec_adds
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
Definition: altivec.h:626

vec_perm
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition: altivec.h:7962

vec_sel
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
Definition: altivec.h:8588

vec_cmplt
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
Definition: altivec.h:2435

vec_slo
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
Definition: altivec.h:9884

vec_pack
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
Definition: altivec.h:7389

vec_neg
static vector float __ATTRS_o_ai vec_neg(vector float __a)
Definition: altivec.h:18235

vec_or
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
Definition: altivec.h:6865

vec_unpackh
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
Definition: altivec.h:12642

vec_add
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
Definition: altivec.h:200

vec_abs
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
Definition: altivec.h:117

vec_cmpgt
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
Definition: altivec.h:2131

vec_packs
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
Definition: altivec.h:7715

vec_sub
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
Definition: altivec.h:11869

__D
static __inline__ void short __D
Definition: immintrin.h:468

__conv
#define __conv
Definition: opencl-c.h:36

__P
__inline unsigned int unsigned int unsigned int * __P
Definition: bmi2intrin.h:25

_mm_sign_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:694

_mm_mulhrs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:587

_mm_hadd_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
Definition: tmmintrin.h:234

_mm_alignr_epi8
#define _mm_alignr_epi8(a, b, n)
Concatenates the two 128-bit integer vector operands, and right-shifts the result by the number of by...
Definition: tmmintrin.h:164

_mm_abs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:85

_mm_abs_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:49

_mm_hsub_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
Definition: tmmintrin.h:428

_mm_maddubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:546

_mm_hsubs_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 64-bit ...
Definition: tmmintrin.h:481

_mm_alignr_pi8
#define _mm_alignr_pi8(a, b, n)
Concatenates the two 64-bit integer vector operands, and right-shifts the result by the number of byt...
Definition: tmmintrin.h:187

_mm_hsub_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
Definition: tmmintrin.h:381

_mm_abs_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
Definition: tmmintrin.h:67

_mm_hadd_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
Definition: tmmintrin.h:281

_mm_sign_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:773

_mm_mulhrs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
Definition: tmmintrin.h:567

_mm_abs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
Definition: tmmintrin.h:103

_mm_sign_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:746

_mm_maddubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
Definition: tmmintrin.h:516

_mm_sign_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:800

_mm_sign_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
Definition: tmmintrin.h:668

_mm_hsub_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
Definition: tmmintrin.h:358

_mm_hsubs_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts, with saturation, the adjacent pairs of values contained in two packed 128-bit...
Definition: tmmintrin.h:455

_mm_hadd_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
Definition: tmmintrin.h:257

_mm_abs_pi32
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:121

_mm_hsub_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
Definition: tmmintrin.h:404

_mm_hadds_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 128-bit vect...
Definition: tmmintrin.h:308

_mm_sign_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
Definition: tmmintrin.h:720

_mm_hadd_epi16
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
Definition: tmmintrin.h:211

_mm_shuffle_epi8
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
Definition: tmmintrin.h:614

_mm_hadds_pi16
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds, with saturation, the adjacent pairs of values contained in two packed 64-bit vecto...
Definition: tmmintrin.h:334

_mm_abs_epi32
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition: tmmintrin.h:139

_mm_shuffle_pi8
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
Definition: tmmintrin.h:639