12 "Never use <amxmovrstransposeintrin.h> directly; use <immintrin.h> instead."
15#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H
16#define __AMX_MOVRS_TRANSPOSEINTRIN_H
19#define __DEFAULT_FN_ATTRS \
20 __attribute__((__always_inline__, __nodebug__, \
21 __target__("amx-transpose,amx-movrs")))
23#define _tile_2rpntlvwz0rs(tdst, base, stride) \
24 __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride)
25#define _tile_2rpntlvwz0rst1(tdst, base, stride) \
26 __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride)
27#define _tile_2rpntlvwz1rs(tdst, base, stride) \
28 __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride)
29#define _tile_2rpntlvwz1rst1(tdst, base, stride) \
30 __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride)
33 unsigned short row,
unsigned short col0,
unsigned short col1,
34 _tile1024i *dst0, _tile1024i *dst1,
const void *base,
35 __SIZE_TYPE__ stride) {
38 __builtin_ia32_t2rpntlvwz0rs_internal(
39 row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
40 (__SIZE_TYPE__)(stride));
44 unsigned short row,
unsigned short col0,
unsigned short col1,
45 _tile1024i *dst0, _tile1024i *dst1,
const void *base,
46 __SIZE_TYPE__ stride) {
47 __builtin_ia32_t2rpntlvwz0rst1_internal(
48 row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
49 (__SIZE_TYPE__)(stride));
53 unsigned short row,
unsigned short col0,
unsigned short col1,
54 _tile1024i *dst0, _tile1024i *dst1,
const void *base,
55 __SIZE_TYPE__ stride) {
56 __builtin_ia32_t2rpntlvwz1rs_internal(
57 row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
58 (__SIZE_TYPE__)(stride));
62 unsigned short row,
unsigned short col0,
unsigned short col1,
63 _tile1024i *dst0, _tile1024i *dst1,
const void *base,
64 __SIZE_TYPE__ stride) {
65 __builtin_ia32_t2rpntlvwz1rst1_internal(
66 row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base,
67 (__SIZE_TYPE__)(stride));
96static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1,
97 const void *base, __SIZE_TYPE__ stride) {
98 _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
99 &dst1->tile, base, stride);
126static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1,
127 const void *base, __SIZE_TYPE__ stride) {
128 _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
129 &dst1->tile, base, stride);
159static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1,
160 const void *base, __SIZE_TYPE__ stride) {
161 _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
162 &dst1->tile, base, stride);
192static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1,
193 const void *base, __SIZE_TYPE__ stride) {
194 _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile,
195 &dst1->tile, base, stride);
198#undef __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS