clang 19.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CodeGenFunction.h"
21#include "CodeGenModule.h"
22#include "ConstantEmitter.h"
23#include "PatternInit.h"
24#include "TargetInfo.h"
26#include "clang/AST/Attr.h"
27#include "clang/AST/Decl.h"
28#include "clang/AST/OSLog.h"
35#include "llvm/ADT/APFloat.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/FloatingPointMode.h"
38#include "llvm/ADT/SmallPtrSet.h"
39#include "llvm/ADT/StringExtras.h"
40#include "llvm/Analysis/ValueTracking.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Intrinsics.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/IntrinsicsAMDGPU.h"
46#include "llvm/IR/IntrinsicsARM.h"
47#include "llvm/IR/IntrinsicsBPF.h"
48#include "llvm/IR/IntrinsicsDirectX.h"
49#include "llvm/IR/IntrinsicsHexagon.h"
50#include "llvm/IR/IntrinsicsNVPTX.h"
51#include "llvm/IR/IntrinsicsPowerPC.h"
52#include "llvm/IR/IntrinsicsR600.h"
53#include "llvm/IR/IntrinsicsRISCV.h"
54#include "llvm/IR/IntrinsicsS390.h"
55#include "llvm/IR/IntrinsicsVE.h"
56#include "llvm/IR/IntrinsicsWebAssembly.h"
57#include "llvm/IR/IntrinsicsX86.h"
58#include "llvm/IR/MDBuilder.h"
59#include "llvm/IR/MatrixBuilder.h"
60#include "llvm/Support/ConvertUTF.h"
61#include "llvm/Support/MathExtras.h"
62#include "llvm/Support/ScopedPrinter.h"
63#include "llvm/TargetParser/AArch64TargetParser.h"
64#include "llvm/TargetParser/X86TargetParser.h"
65#include <optional>
66#include <sstream>
67
68using namespace clang;
69using namespace CodeGen;
70using namespace llvm;
71
72static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
73 Align AlignmentInBytes) {
74 ConstantInt *Byte;
75 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
76 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
77 // Nothing to initialize.
78 return;
79 case LangOptions::TrivialAutoVarInitKind::Zero:
80 Byte = CGF.Builder.getInt8(0x00);
81 break;
82 case LangOptions::TrivialAutoVarInitKind::Pattern: {
83 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
84 Byte = llvm::dyn_cast<llvm::ConstantInt>(
85 initializationPatternFor(CGF.CGM, Int8));
86 break;
87 }
88 }
89 if (CGF.CGM.stopAutoInit())
90 return;
91 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
92 I->addAnnotationMetadata("auto-init");
93}
94
95/// getBuiltinLibFunction - Given a builtin id for a function like
96/// "__builtin_fabsf", return a Function* for "fabsf".
98 unsigned BuiltinID) {
99 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
100
101 // Get the name, skip over the __builtin_ prefix (if necessary).
102 StringRef Name;
103 GlobalDecl D(FD);
104
105 // TODO: This list should be expanded or refactored after all GCC-compatible
106 // std libcall builtins are implemented.
107 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
108 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
109 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
110 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
111 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
112 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
113 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
114 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
115 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
116 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
117 {Builtin::BI__builtin_printf, "__printfieee128"},
118 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
119 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
120 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
121 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
122 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
123 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
124 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
125 {Builtin::BI__builtin_scanf, "__scanfieee128"},
126 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
127 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
128 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
129 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
130 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
131 };
132
133 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
134 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
135 // if it is 64-bit 'long double' mode.
136 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
137 {Builtin::BI__builtin_frexpl, "frexp"},
138 {Builtin::BI__builtin_ldexpl, "ldexp"},
139 {Builtin::BI__builtin_modfl, "modf"},
140 };
141
142 // If the builtin has been declared explicitly with an assembler label,
143 // use the mangled name. This differs from the plain label on platforms
144 // that prefix labels.
145 if (FD->hasAttr<AsmLabelAttr>())
146 Name = getMangledName(D);
147 else {
148 // TODO: This mutation should also be applied to other targets other than
149 // PPC, after backend supports IEEE 128-bit style libcalls.
150 if (getTriple().isPPC64() &&
151 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
152 F128Builtins.contains(BuiltinID))
153 Name = F128Builtins[BuiltinID];
154 else if (getTriple().isOSAIX() &&
155 &getTarget().getLongDoubleFormat() ==
156 &llvm::APFloat::IEEEdouble() &&
157 AIXLongDouble64Builtins.contains(BuiltinID))
158 Name = AIXLongDouble64Builtins[BuiltinID];
159 else
160 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
161 }
162
163 llvm::FunctionType *Ty =
164 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
165
166 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
167}
168
169/// Emit the conversions required to turn the given value into an
170/// integer of the given size.
171static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
172 QualType T, llvm::IntegerType *IntType) {
173 V = CGF.EmitToMemory(V, T);
174
175 if (V->getType()->isPointerTy())
176 return CGF.Builder.CreatePtrToInt(V, IntType);
177
178 assert(V->getType() == IntType);
179 return V;
180}
181
182static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
183 QualType T, llvm::Type *ResultType) {
184 V = CGF.EmitFromMemory(V, T);
185
186 if (ResultType->isPointerTy())
187 return CGF.Builder.CreateIntToPtr(V, ResultType);
188
189 assert(V->getType() == ResultType);
190 return V;
191}
192
194 ASTContext &Ctx = CGF.getContext();
195 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
196 unsigned Bytes = Ptr.getElementType()->isPointerTy()
198 : Ptr.getElementType()->getScalarSizeInBits() / 8;
199 unsigned Align = Ptr.getAlignment().getQuantity();
200 if (Align % Bytes != 0) {
201 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
202 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
203 // Force address to be at least naturally-aligned.
204 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
205 }
206 return Ptr;
207}
208
209/// Utility to insert an atomic instruction based on Intrinsic::ID
210/// and the expression node.
212 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
213 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
214
215 QualType T = E->getType();
216 assert(E->getArg(0)->getType()->isPointerType());
218 E->getArg(0)->getType()->getPointeeType()));
219 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
220
221 Address DestAddr = CheckAtomicAlignment(CGF, E);
222
223 llvm::IntegerType *IntType = llvm::IntegerType::get(
224 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
225
226 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
227 llvm::Type *ValueType = Val->getType();
228 Val = EmitToInt(CGF, Val, T, IntType);
229
230 llvm::Value *Result =
231 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
232 return EmitFromInt(CGF, Result, T, ValueType);
233}
234
236 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
237 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
238
239 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
240 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
241 LV.setNontemporal(true);
242 CGF.EmitStoreOfScalar(Val, LV, false);
243 return nullptr;
244}
245
247 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
248
249 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
250 LV.setNontemporal(true);
251 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
252}
253
255 llvm::AtomicRMWInst::BinOp Kind,
256 const CallExpr *E) {
257 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
258}
259
260/// Utility to insert an atomic instruction based Intrinsic::ID and
261/// the expression node, where the return value is the result of the
262/// operation.
264 llvm::AtomicRMWInst::BinOp Kind,
265 const CallExpr *E,
266 Instruction::BinaryOps Op,
267 bool Invert = false) {
268 QualType T = E->getType();
269 assert(E->getArg(0)->getType()->isPointerType());
271 E->getArg(0)->getType()->getPointeeType()));
272 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
273
274 Address DestAddr = CheckAtomicAlignment(CGF, E);
275
276 llvm::IntegerType *IntType = llvm::IntegerType::get(
277 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
278
279 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
280 llvm::Type *ValueType = Val->getType();
281 Val = EmitToInt(CGF, Val, T, IntType);
282
283 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
284 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
285 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
286 if (Invert)
287 Result =
288 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
289 llvm::ConstantInt::getAllOnesValue(IntType));
290 Result = EmitFromInt(CGF, Result, T, ValueType);
291 return RValue::get(Result);
292}
293
294/// Utility to insert an atomic cmpxchg instruction.
295///
296/// @param CGF The current codegen function.
297/// @param E Builtin call expression to convert to cmpxchg.
298/// arg0 - address to operate on
299/// arg1 - value to compare with
300/// arg2 - new value
301/// @param ReturnBool Specifies whether to return success flag of
302/// cmpxchg result or the old value.
303///
304/// @returns result of cmpxchg, according to ReturnBool
305///
306/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
307/// invoke the function EmitAtomicCmpXchgForMSIntrin.
309 bool ReturnBool) {
310 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
311 Address DestAddr = CheckAtomicAlignment(CGF, E);
312
313 llvm::IntegerType *IntType = llvm::IntegerType::get(
314 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
315
316 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
317 llvm::Type *ValueType = Cmp->getType();
318 Cmp = EmitToInt(CGF, Cmp, T, IntType);
319 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
320
322 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
323 llvm::AtomicOrdering::SequentiallyConsistent);
324 if (ReturnBool)
325 // Extract boolean success flag and zext it to int.
326 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
327 CGF.ConvertType(E->getType()));
328 else
329 // Extract old value and emit it using the same type as compare value.
330 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
331 ValueType);
332}
333
334/// This function should be invoked to emit atomic cmpxchg for Microsoft's
335/// _InterlockedCompareExchange* intrinsics which have the following signature:
336/// T _InterlockedCompareExchange(T volatile *Destination,
337/// T Exchange,
338/// T Comparand);
339///
340/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
341/// cmpxchg *Destination, Comparand, Exchange.
342/// So we need to swap Comparand and Exchange when invoking
343/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
344/// function MakeAtomicCmpXchgValue since it expects the arguments to be
345/// already swapped.
346
347static
349 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
350 assert(E->getArg(0)->getType()->isPointerType());
352 E->getType(), E->getArg(0)->getType()->getPointeeType()));
353 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
354 E->getArg(1)->getType()));
355 assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
356 E->getArg(2)->getType()));
357
358 Address DestAddr = CheckAtomicAlignment(CGF, E);
359
360 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
361 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
362
363 // For Release ordering, the failure ordering should be Monotonic.
364 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
365 AtomicOrdering::Monotonic :
366 SuccessOrdering;
367
368 // The atomic instruction is marked volatile for consistency with MSVC. This
369 // blocks the few atomics optimizations that LLVM has. If we want to optimize
370 // _Interlocked* operations in the future, we will have to remove the volatile
371 // marker.
373 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
374 Result->setVolatile(true);
375 return CGF.Builder.CreateExtractValue(Result, 0);
376}
377
378// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
379// prototyped like this:
380//
381// unsigned char _InterlockedCompareExchange128...(
382// __int64 volatile * _Destination,
383// __int64 _ExchangeHigh,
384// __int64 _ExchangeLow,
385// __int64 * _ComparandResult);
386//
387// Note that Destination is assumed to be at least 16-byte aligned, despite
388// being typed int64.
389
391 const CallExpr *E,
392 AtomicOrdering SuccessOrdering) {
393 assert(E->getNumArgs() == 4);
394 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
395 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
396 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
397 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
398
399 assert(DestPtr->getType()->isPointerTy());
400 assert(!ExchangeHigh->getType()->isPointerTy());
401 assert(!ExchangeLow->getType()->isPointerTy());
402
403 // For Release ordering, the failure ordering should be Monotonic.
404 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
405 ? AtomicOrdering::Monotonic
406 : SuccessOrdering;
407
408 // Convert to i128 pointers and values. Alignment is also overridden for
409 // destination pointer.
410 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
411 Address DestAddr(DestPtr, Int128Ty,
413 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
414
415 // (((i128)hi) << 64) | ((i128)lo)
416 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
417 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
418 ExchangeHigh =
419 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
420 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
421
422 // Load the comparand for the instruction.
423 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
424
425 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
426 SuccessOrdering, FailureOrdering);
427
428 // The atomic instruction is marked volatile for consistency with MSVC. This
429 // blocks the few atomics optimizations that LLVM has. If we want to optimize
430 // _Interlocked* operations in the future, we will have to remove the volatile
431 // marker.
432 CXI->setVolatile(true);
433
434 // Store the result as an outparameter.
435 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
436 ComparandAddr);
437
438 // Get the success boolean and zero extend it to i8.
439 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
440 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
441}
442
444 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
445 assert(E->getArg(0)->getType()->isPointerType());
446
447 auto *IntTy = CGF.ConvertType(E->getType());
448 Address DestAddr = CheckAtomicAlignment(CGF, E);
449 auto *Result = CGF.Builder.CreateAtomicRMW(
450 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
451 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
452}
453
455 CodeGenFunction &CGF, const CallExpr *E,
456 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
457 assert(E->getArg(0)->getType()->isPointerType());
458
459 auto *IntTy = CGF.ConvertType(E->getType());
460 Address DestAddr = CheckAtomicAlignment(CGF, E);
461 auto *Result = CGF.Builder.CreateAtomicRMW(
462 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
463 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
464}
465
466// Build a plain volatile load.
468 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
469 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
470 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
471 llvm::Type *ITy =
472 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
473 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
474 Load->setVolatile(true);
475 return Load;
476}
477
478// Build a plain volatile store.
480 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
481 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
482 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
483 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
484 llvm::StoreInst *Store =
485 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
486 Store->setVolatile(true);
487 return Store;
488}
489
490// Emit a simple mangled intrinsic that has 1 argument and a return type
491// matching the argument type. Depending on mode, this may be a constrained
492// floating-point intrinsic.
494 const CallExpr *E, unsigned IntrinsicID,
495 unsigned ConstrainedIntrinsicID) {
496 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
497
498 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
499 if (CGF.Builder.getIsFPConstrained()) {
500 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
501 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
502 } else {
503 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
504 return CGF.Builder.CreateCall(F, Src0);
505 }
506}
507
508// Emit an intrinsic that has 2 operands of the same type as its result.
509// Depending on mode, this may be a constrained floating-point intrinsic.
511 const CallExpr *E, unsigned IntrinsicID,
512 unsigned ConstrainedIntrinsicID) {
513 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
514 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
515
516 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
517 if (CGF.Builder.getIsFPConstrained()) {
518 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
519 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
520 } else {
521 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
522 return CGF.Builder.CreateCall(F, { Src0, Src1 });
523 }
524}
525
526// Has second type mangled argument.
528 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
529 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
530 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
531 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
532
533 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
534 if (CGF.Builder.getIsFPConstrained()) {
535 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
536 {Src0->getType(), Src1->getType()});
537 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
538 }
539
540 Function *F =
541 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
542 return CGF.Builder.CreateCall(F, {Src0, Src1});
543}
544
545// Emit an intrinsic that has 3 operands of the same type as its result.
546// Depending on mode, this may be a constrained floating-point intrinsic.
548 const CallExpr *E, unsigned IntrinsicID,
549 unsigned ConstrainedIntrinsicID) {
550 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
551 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
552 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
553
554 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
555 if (CGF.Builder.getIsFPConstrained()) {
556 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
557 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
558 } else {
559 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
560 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
561 }
562}
563
564// Emit an intrinsic where all operands are of the same type as the result.
565// Depending on mode, this may be a constrained floating-point intrinsic.
567 unsigned IntrinsicID,
568 unsigned ConstrainedIntrinsicID,
569 llvm::Type *Ty,
570 ArrayRef<Value *> Args) {
571 Function *F;
572 if (CGF.Builder.getIsFPConstrained())
573 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
574 else
575 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
576
577 if (CGF.Builder.getIsFPConstrained())
578 return CGF.Builder.CreateConstrainedFPCall(F, Args);
579 else
580 return CGF.Builder.CreateCall(F, Args);
581}
582
583// Emit a simple mangled intrinsic that has 1 argument and a return type
584// matching the argument type.
586 unsigned IntrinsicID,
587 llvm::StringRef Name = "") {
588 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
589
590 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
591 return CGF.Builder.CreateCall(F, Src0, Name);
592}
593
594// Emit an intrinsic that has 2 operands of the same type as its result.
596 const CallExpr *E,
597 unsigned IntrinsicID) {
598 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
599 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
600
601 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
602 return CGF.Builder.CreateCall(F, { Src0, Src1 });
603}
604
605// Emit an intrinsic that has 3 operands of the same type as its result.
607 const CallExpr *E,
608 unsigned IntrinsicID) {
609 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
610 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
611 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
612
613 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
614 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
615}
616
617// Emit an intrinsic that has 1 float or double operand, and 1 integer.
619 const CallExpr *E,
620 unsigned IntrinsicID) {
621 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
622 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
623
624 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
625 return CGF.Builder.CreateCall(F, {Src0, Src1});
626}
627
628// Emit an intrinsic that has overloaded integer result and fp operand.
629static Value *
631 unsigned IntrinsicID,
632 unsigned ConstrainedIntrinsicID) {
633 llvm::Type *ResultType = CGF.ConvertType(E->getType());
634 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
635
636 if (CGF.Builder.getIsFPConstrained()) {
637 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
638 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
639 {ResultType, Src0->getType()});
640 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
641 } else {
642 Function *F =
643 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
644 return CGF.Builder.CreateCall(F, Src0);
645 }
646}
647
649 llvm::Intrinsic::ID IntrinsicID) {
650 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
651 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
652
653 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
654 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
655 llvm::Function *F =
656 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
657 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
658
659 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
660 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
661 CGF.EmitStoreOfScalar(Exp, LV);
662
663 return CGF.Builder.CreateExtractValue(Call, 0);
664}
665
666/// EmitFAbs - Emit a call to @llvm.fabs().
668 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
669 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
670 Call->setDoesNotAccessMemory();
671 return Call;
672}
673
674/// Emit the computation of the sign bit for a floating point value. Returns
675/// the i1 sign bit value.
677 LLVMContext &C = CGF.CGM.getLLVMContext();
678
679 llvm::Type *Ty = V->getType();
680 int Width = Ty->getPrimitiveSizeInBits();
681 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
682 V = CGF.Builder.CreateBitCast(V, IntTy);
683 if (Ty->isPPC_FP128Ty()) {
684 // We want the sign bit of the higher-order double. The bitcast we just
685 // did works as if the double-double was stored to memory and then
686 // read as an i128. The "store" will put the higher-order double in the
687 // lower address in both little- and big-Endian modes, but the "load"
688 // will treat those bits as a different part of the i128: the low bits in
689 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
690 // we need to shift the high bits down to the low before truncating.
691 Width >>= 1;
692 if (CGF.getTarget().isBigEndian()) {
693 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
694 V = CGF.Builder.CreateLShr(V, ShiftCst);
695 }
696 // We are truncating value in order to extract the higher-order
697 // double, which we will be using to extract the sign from.
698 IntTy = llvm::IntegerType::get(C, Width);
699 V = CGF.Builder.CreateTrunc(V, IntTy);
700 }
701 Value *Zero = llvm::Constant::getNullValue(IntTy);
702 return CGF.Builder.CreateICmpSLT(V, Zero);
703}
704
706 const CallExpr *E, llvm::Constant *calleeValue) {
707 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
708 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
709 return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
710}
711
712/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
713/// depending on IntrinsicID.
714///
715/// \arg CGF The current codegen function.
716/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
717/// \arg X The first argument to the llvm.*.with.overflow.*.
718/// \arg Y The second argument to the llvm.*.with.overflow.*.
719/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
720/// \returns The result (i.e. sum/product) returned by the intrinsic.
721static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
722 const llvm::Intrinsic::ID IntrinsicID,
723 llvm::Value *X, llvm::Value *Y,
724 llvm::Value *&Carry) {
725 // Make sure we have integers of the same width.
726 assert(X->getType() == Y->getType() &&
727 "Arguments must be the same type. (Did you forget to make sure both "
728 "arguments have the same integer width?)");
729
730 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
731 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
732 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
733 return CGF.Builder.CreateExtractValue(Tmp, 0);
734}
735
737 unsigned IntrinsicID,
738 int low, int high) {
739 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
740 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
741 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
742 llvm::Instruction *Call = CGF.Builder.CreateCall(F);
743 Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
744 Call->setMetadata(llvm::LLVMContext::MD_noundef,
745 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
746 return Call;
747}
748
749namespace {
750 struct WidthAndSignedness {
751 unsigned Width;
752 bool Signed;
753 };
754}
755
756static WidthAndSignedness
758 const clang::QualType Type) {
759 assert(Type->isIntegerType() && "Given type is not an integer.");
760 unsigned Width = Type->isBooleanType() ? 1
761 : Type->isBitIntType() ? context.getIntWidth(Type)
762 : context.getTypeInfo(Type).Width;
764 return {Width, Signed};
765}
766
767// Given one or more integer types, this function produces an integer type that
768// encompasses them: any value in one of the given types could be expressed in
769// the encompassing type.
770static struct WidthAndSignedness
771EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
772 assert(Types.size() > 0 && "Empty list of types.");
773
774 // If any of the given types is signed, we must return a signed type.
775 bool Signed = false;
776 for (const auto &Type : Types) {
777 Signed |= Type.Signed;
778 }
779
780 // The encompassing type must have a width greater than or equal to the width
781 // of the specified types. Additionally, if the encompassing type is signed,
782 // its width must be strictly greater than the width of any unsigned types
783 // given.
784 unsigned Width = 0;
785 for (const auto &Type : Types) {
786 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
787 if (Width < MinWidth) {
788 Width = MinWidth;
789 }
790 }
791
792 return {Width, Signed};
793}
794
795Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
796 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
797 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
798 ArgValue);
799}
800
801/// Checks if using the result of __builtin_object_size(p, @p From) in place of
802/// __builtin_object_size(p, @p To) is correct
803static bool areBOSTypesCompatible(int From, int To) {
804 // Note: Our __builtin_object_size implementation currently treats Type=0 and
805 // Type=2 identically. Encoding this implementation detail here may make
806 // improving __builtin_object_size difficult in the future, so it's omitted.
807 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
808}
809
810static llvm::Value *
811getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
812 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
813}
814
815llvm::Value *
816CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
817 llvm::IntegerType *ResType,
818 llvm::Value *EmittedE,
819 bool IsDynamic) {
820 uint64_t ObjectSize;
821 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
822 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
823 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
824}
825
827 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
828 uint64_t &Offset) {
829 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
830 getLangOpts().getStrictFlexArraysLevel();
831 uint32_t FieldNo = 0;
832
833 if (RD->isImplicit())
834 return nullptr;
835
836 for (const FieldDecl *FD : RD->fields()) {
837 if ((!FAMDecl || FD == FAMDecl) &&
839 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
840 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
841 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
842 Offset += Layout.getFieldOffset(FieldNo);
843 return FD;
844 }
845
846 QualType Ty = FD->getType();
847 if (Ty->isRecordType()) {
849 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
850 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
851 Offset += Layout.getFieldOffset(FieldNo);
852 return Field;
853 }
854 }
855
856 if (!RD->isUnion())
857 ++FieldNo;
858 }
859
860 return nullptr;
861}
862
863static unsigned CountCountedByAttrs(const RecordDecl *RD) {
864 unsigned Num = 0;
865
866 for (const FieldDecl *FD : RD->fields()) {
867 if (FD->getType()->isCountAttributedType())
868 return ++Num;
869
870 QualType Ty = FD->getType();
871 if (Ty->isRecordType())
873 }
874
875 return Num;
876}
877
878llvm::Value *
879CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
880 llvm::IntegerType *ResType) {
881 // The code generated here calculates the size of a struct with a flexible
882 // array member that uses the counted_by attribute. There are two instances
883 // we handle:
884 //
885 // struct s {
886 // unsigned long flags;
887 // int count;
888 // int array[] __attribute__((counted_by(count)));
889 // }
890 //
891 // 1) bdos of the flexible array itself:
892 //
893 // __builtin_dynamic_object_size(p->array, 1) ==
894 // p->count * sizeof(*p->array)
895 //
896 // 2) bdos of a pointer into the flexible array:
897 //
898 // __builtin_dynamic_object_size(&p->array[42], 1) ==
899 // (p->count - 42) * sizeof(*p->array)
900 //
901 // 2) bdos of the whole struct, including the flexible array:
902 //
903 // __builtin_dynamic_object_size(p, 1) ==
904 // max(sizeof(struct s),
905 // offsetof(struct s, array) + p->count * sizeof(*p->array))
906 //
907 ASTContext &Ctx = getContext();
908 const Expr *Base = E->IgnoreParenImpCasts();
909 const Expr *Idx = nullptr;
910
911 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
912 UO && UO->getOpcode() == UO_AddrOf) {
913 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
914 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
915 Base = ASE->getBase()->IgnoreParenImpCasts();
916 Idx = ASE->getIdx()->IgnoreParenImpCasts();
917
918 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
919 int64_t Val = IL->getValue().getSExtValue();
920 if (Val < 0)
922
923 if (Val == 0)
924 // The index is 0, so we don't need to take it into account.
925 Idx = nullptr;
926 }
927 } else {
928 // Potential pointer to another element in the struct.
929 Base = SubExpr;
930 }
931 }
932
933 // Get the flexible array member Decl.
934 const RecordDecl *OuterRD = nullptr;
935 const FieldDecl *FAMDecl = nullptr;
936 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
937 // Check if \p Base is referencing the FAM itself.
938 const ValueDecl *VD = ME->getMemberDecl();
940 FAMDecl = dyn_cast<FieldDecl>(VD);
941 if (!FAMDecl)
942 return nullptr;
943 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
944 // Check if we're pointing to the whole struct.
945 QualType Ty = DRE->getDecl()->getType();
946 if (Ty->isPointerType())
947 Ty = Ty->getPointeeType();
948 OuterRD = Ty->getAsRecordDecl();
949
950 // If we have a situation like this:
951 //
952 // struct union_of_fams {
953 // int flags;
954 // union {
955 // signed char normal_field;
956 // struct {
957 // int count1;
958 // int arr1[] __counted_by(count1);
959 // };
960 // struct {
961 // signed char count2;
962 // int arr2[] __counted_by(count2);
963 // };
964 // };
965 // };
966 //
967 // We don't know which 'count' to use in this scenario:
968 //
969 // size_t get_size(struct union_of_fams *p) {
970 // return __builtin_dynamic_object_size(p, 1);
971 // }
972 //
973 // Instead of calculating a wrong number, we give up.
974 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
975 return nullptr;
976 }
977
978 if (!OuterRD)
979 return nullptr;
980
981 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
982 // get its offset.
983 uint64_t Offset = 0;
984 FAMDecl =
985 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
986 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
987
988 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
989 // No flexible array member found or it doesn't have the "counted_by"
990 // attribute.
991 return nullptr;
992
993 const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
994 if (!CountedByFD)
995 // Can't find the field referenced by the "counted_by" attribute.
996 return nullptr;
997
998 // Build a load of the counted_by field.
999 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1000 Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);
1001 if (!CountedByInst)
1002 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1003
1004 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1005
1006 // Build a load of the index and subtract it from the count.
1007 Value *IdxInst = nullptr;
1008 if (Idx) {
1009 if (Idx->HasSideEffects(getContext()))
1010 // We can't have side-effects.
1011 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1012
1013 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1014 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1015 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1016
1017 // We go ahead with the calculation here. If the index turns out to be
1018 // negative, we'll catch it at the end.
1019 CountedByInst =
1020 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1021 }
1022
1023 // Calculate how large the flexible array member is in bytes.
1024 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1026 llvm::Constant *ElemSize =
1027 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1028 Value *FAMSize =
1029 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1030 FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1031 Value *Res = FAMSize;
1032
1033 if (isa<DeclRefExpr>(Base)) {
1034 // The whole struct is specificed in the __bdos.
1035 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1036
1037 // Get the offset of the FAM.
1038 llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1039 Value *OffsetAndFAMSize =
1040 Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1041
1042 // Get the full size of the struct.
1043 llvm::Constant *SizeofStruct =
1044 ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1045
1046 // max(sizeof(struct s),
1047 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1048 Res = IsSigned
1049 ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1050 OffsetAndFAMSize, SizeofStruct)
1051 : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1052 OffsetAndFAMSize, SizeofStruct);
1053 }
1054
1055 // A negative \p IdxInst or \p CountedByInst means that the index lands
1056 // outside of the flexible array member. If that's the case, we want to
1057 // return 0.
1058 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1059 if (IdxInst)
1060 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1061
1062 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1063}
1064
1065/// Returns a Value corresponding to the size of the given expression.
1066/// This Value may be either of the following:
1067/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1068/// it)
1069/// - A call to the @llvm.objectsize intrinsic
1070///
1071/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1072/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1073/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1074llvm::Value *
1075CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1076 llvm::IntegerType *ResType,
1077 llvm::Value *EmittedE, bool IsDynamic) {
1078 // We need to reference an argument if the pointer is a parameter with the
1079 // pass_object_size attribute.
1080 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1081 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1082 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1083 if (Param != nullptr && PS != nullptr &&
1084 areBOSTypesCompatible(PS->getType(), Type)) {
1085 auto Iter = SizeArguments.find(Param);
1086 assert(Iter != SizeArguments.end());
1087
1088 const ImplicitParamDecl *D = Iter->second;
1089 auto DIter = LocalDeclMap.find(D);
1090 assert(DIter != LocalDeclMap.end());
1091
1092 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1093 getContext().getSizeType(), E->getBeginLoc());
1094 }
1095 }
1096
1097 if (IsDynamic) {
1098 // Emit special code for a flexible array member with the "counted_by"
1099 // attribute.
1100 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1101 return V;
1102 }
1103
1104 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1105 // evaluate E for side-effects. In either case, we shouldn't lower to
1106 // @llvm.objectsize.
1107 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1108 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1109
1110 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1111 assert(Ptr->getType()->isPointerTy() &&
1112 "Non-pointer passed to __builtin_object_size?");
1113
1114 Function *F =
1115 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1116
1117 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1118 Value *Min = Builder.getInt1((Type & 2) != 0);
1119 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1120 Value *NullIsUnknown = Builder.getTrue();
1121 Value *Dynamic = Builder.getInt1(IsDynamic);
1122 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1123}
1124
1125namespace {
1126/// A struct to generically describe a bit test intrinsic.
1127struct BitTest {
1128 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1129 enum InterlockingKind : uint8_t {
1130 Unlocked,
1131 Sequential,
1132 Acquire,
1133 Release,
1134 NoFence
1135 };
1136
1137 ActionKind Action;
1138 InterlockingKind Interlocking;
1139 bool Is64Bit;
1140
1141 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1142};
1143
1144} // namespace
1145
1146BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1147 switch (BuiltinID) {
1148 // Main portable variants.
1149 case Builtin::BI_bittest:
1150 return {TestOnly, Unlocked, false};
1151 case Builtin::BI_bittestandcomplement:
1152 return {Complement, Unlocked, false};
1153 case Builtin::BI_bittestandreset:
1154 return {Reset, Unlocked, false};
1155 case Builtin::BI_bittestandset:
1156 return {Set, Unlocked, false};
1157 case Builtin::BI_interlockedbittestandreset:
1158 return {Reset, Sequential, false};
1159 case Builtin::BI_interlockedbittestandset:
1160 return {Set, Sequential, false};
1161
1162 // X86-specific 64-bit variants.
1163 case Builtin::BI_bittest64:
1164 return {TestOnly, Unlocked, true};
1165 case Builtin::BI_bittestandcomplement64:
1166 return {Complement, Unlocked, true};
1167 case Builtin::BI_bittestandreset64:
1168 return {Reset, Unlocked, true};
1169 case Builtin::BI_bittestandset64:
1170 return {Set, Unlocked, true};
1171 case Builtin::BI_interlockedbittestandreset64:
1172 return {Reset, Sequential, true};
1173 case Builtin::BI_interlockedbittestandset64:
1174 return {Set, Sequential, true};
1175
1176 // ARM/AArch64-specific ordering variants.
1177 case Builtin::BI_interlockedbittestandset_acq:
1178 return {Set, Acquire, false};
1179 case Builtin::BI_interlockedbittestandset_rel:
1180 return {Set, Release, false};
1181 case Builtin::BI_interlockedbittestandset_nf:
1182 return {Set, NoFence, false};
1183 case Builtin::BI_interlockedbittestandreset_acq:
1184 return {Reset, Acquire, false};
1185 case Builtin::BI_interlockedbittestandreset_rel:
1186 return {Reset, Release, false};
1187 case Builtin::BI_interlockedbittestandreset_nf:
1188 return {Reset, NoFence, false};
1189 }
1190 llvm_unreachable("expected only bittest intrinsics");
1191}
1192
1193static char bitActionToX86BTCode(BitTest::ActionKind A) {
1194 switch (A) {
1195 case BitTest::TestOnly: return '\0';
1196 case BitTest::Complement: return 'c';
1197 case BitTest::Reset: return 'r';
1198 case BitTest::Set: return 's';
1199 }
1200 llvm_unreachable("invalid action");
1201}
1202
1204 BitTest BT,
1205 const CallExpr *E, Value *BitBase,
1206 Value *BitPos) {
1207 char Action = bitActionToX86BTCode(BT.Action);
1208 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1209
1210 // Build the assembly.
1212 raw_svector_ostream AsmOS(Asm);
1213 if (BT.Interlocking != BitTest::Unlocked)
1214 AsmOS << "lock ";
1215 AsmOS << "bt";
1216 if (Action)
1217 AsmOS << Action;
1218 AsmOS << SizeSuffix << " $2, ($1)";
1219
1220 // Build the constraints. FIXME: We should support immediates when possible.
1221 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1222 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1223 if (!MachineClobbers.empty()) {
1224 Constraints += ',';
1225 Constraints += MachineClobbers;
1226 }
1227 llvm::IntegerType *IntType = llvm::IntegerType::get(
1228 CGF.getLLVMContext(),
1229 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1230 llvm::FunctionType *FTy =
1231 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1232
1233 llvm::InlineAsm *IA =
1234 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1235 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1236}
1237
1238static llvm::AtomicOrdering
1239getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1240 switch (I) {
1241 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1242 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1243 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1244 case BitTest::Release: return llvm::AtomicOrdering::Release;
1245 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1246 }
1247 llvm_unreachable("invalid interlocking");
1248}
1249
1250/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1251/// bits and a bit position and read and optionally modify the bit at that
1252/// position. The position index can be arbitrarily large, i.e. it can be larger
1253/// than 31 or 63, so we need an indexed load in the general case.
1254static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1255 unsigned BuiltinID,
1256 const CallExpr *E) {
1257 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1258 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1259
1260 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1261
1262 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1263 // indexing operation internally. Use them if possible.
1264 if (CGF.getTarget().getTriple().isX86())
1265 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1266
1267 // Otherwise, use generic code to load one byte and test the bit. Use all but
1268 // the bottom three bits as the array index, and the bottom three bits to form
1269 // a mask.
1270 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1271 Value *ByteIndex = CGF.Builder.CreateAShr(
1272 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1273 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1274 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1275 ByteIndex, "bittest.byteaddr"),
1276 CGF.Int8Ty, CharUnits::One());
1277 Value *PosLow =
1278 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1279 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1280
1281 // The updating instructions will need a mask.
1282 Value *Mask = nullptr;
1283 if (BT.Action != BitTest::TestOnly) {
1284 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1285 "bittest.mask");
1286 }
1287
1288 // Check the action and ordering of the interlocked intrinsics.
1289 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1290
1291 Value *OldByte = nullptr;
1292 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1293 // Emit a combined atomicrmw load/store operation for the interlocked
1294 // intrinsics.
1295 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1296 if (BT.Action == BitTest::Reset) {
1297 Mask = CGF.Builder.CreateNot(Mask);
1298 RMWOp = llvm::AtomicRMWInst::And;
1299 }
1300 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1301 } else {
1302 // Emit a plain load for the non-interlocked intrinsics.
1303 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1304 Value *NewByte = nullptr;
1305 switch (BT.Action) {
1306 case BitTest::TestOnly:
1307 // Don't store anything.
1308 break;
1309 case BitTest::Complement:
1310 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1311 break;
1312 case BitTest::Reset:
1313 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1314 break;
1315 case BitTest::Set:
1316 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1317 break;
1318 }
1319 if (NewByte)
1320 CGF.Builder.CreateStore(NewByte, ByteAddr);
1321 }
1322
1323 // However we loaded the old byte, either by plain load or atomicrmw, shift
1324 // the bit into the low position and mask it to 0 or 1.
1325 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1326 return CGF.Builder.CreateAnd(
1327 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1328}
1329
1331 unsigned BuiltinID,
1332 const CallExpr *E) {
1333 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1334
1336 raw_svector_ostream AsmOS(Asm);
1337 llvm::IntegerType *RetType = CGF.Int32Ty;
1338
1339 switch (BuiltinID) {
1340 case clang::PPC::BI__builtin_ppc_ldarx:
1341 AsmOS << "ldarx ";
1342 RetType = CGF.Int64Ty;
1343 break;
1344 case clang::PPC::BI__builtin_ppc_lwarx:
1345 AsmOS << "lwarx ";
1346 RetType = CGF.Int32Ty;
1347 break;
1348 case clang::PPC::BI__builtin_ppc_lharx:
1349 AsmOS << "lharx ";
1350 RetType = CGF.Int16Ty;
1351 break;
1352 case clang::PPC::BI__builtin_ppc_lbarx:
1353 AsmOS << "lbarx ";
1354 RetType = CGF.Int8Ty;
1355 break;
1356 default:
1357 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1358 }
1359
1360 AsmOS << "$0, ${1:y}";
1361
1362 std::string Constraints = "=r,*Z,~{memory}";
1363 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1364 if (!MachineClobbers.empty()) {
1365 Constraints += ',';
1366 Constraints += MachineClobbers;
1367 }
1368
1369 llvm::Type *PtrType = CGF.UnqualPtrTy;
1370 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1371
1372 llvm::InlineAsm *IA =
1373 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1374 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1375 CI->addParamAttr(
1376 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1377 return CI;
1378}
1379
1380namespace {
1381enum class MSVCSetJmpKind {
1382 _setjmpex,
1383 _setjmp3,
1384 _setjmp
1385};
1386}
1387
1388/// MSVC handles setjmp a bit differently on different platforms. On every
1389/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1390/// parameters can be passed as variadic arguments, but we always pass none.
1391static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1392 const CallExpr *E) {
1393 llvm::Value *Arg1 = nullptr;
1394 llvm::Type *Arg1Ty = nullptr;
1395 StringRef Name;
1396 bool IsVarArg = false;
1397 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1398 Name = "_setjmp3";
1399 Arg1Ty = CGF.Int32Ty;
1400 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1401 IsVarArg = true;
1402 } else {
1403 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1404 Arg1Ty = CGF.Int8PtrTy;
1405 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1406 Arg1 = CGF.Builder.CreateCall(
1407 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1408 } else
1409 Arg1 = CGF.Builder.CreateCall(
1410 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1411 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1412 }
1413
1414 // Mark the call site and declaration with ReturnsTwice.
1415 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1416 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1417 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1418 llvm::Attribute::ReturnsTwice);
1419 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1420 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1421 ReturnsTwiceAttr, /*Local=*/true);
1422
1423 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1424 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1425 llvm::Value *Args[] = {Buf, Arg1};
1426 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1427 CB->setAttributes(ReturnsTwiceAttr);
1428 return RValue::get(CB);
1429}
1430
1431// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1432// we handle them here.
1472 __fastfail,
1473};
1474
1475static std::optional<CodeGenFunction::MSVCIntrin>
1476translateArmToMsvcIntrin(unsigned BuiltinID) {
1477 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1478 switch (BuiltinID) {
1479 default:
1480 return std::nullopt;
1481 case clang::ARM::BI_BitScanForward:
1482 case clang::ARM::BI_BitScanForward64:
1483 return MSVCIntrin::_BitScanForward;
1484 case clang::ARM::BI_BitScanReverse:
1485 case clang::ARM::BI_BitScanReverse64:
1486 return MSVCIntrin::_BitScanReverse;
1487 case clang::ARM::BI_InterlockedAnd64:
1488 return MSVCIntrin::_InterlockedAnd;
1489 case clang::ARM::BI_InterlockedExchange64:
1490 return MSVCIntrin::_InterlockedExchange;
1491 case clang::ARM::BI_InterlockedExchangeAdd64:
1492 return MSVCIntrin::_InterlockedExchangeAdd;
1493 case clang::ARM::BI_InterlockedExchangeSub64:
1494 return MSVCIntrin::_InterlockedExchangeSub;
1495 case clang::ARM::BI_InterlockedOr64:
1496 return MSVCIntrin::_InterlockedOr;
1497 case clang::ARM::BI_InterlockedXor64:
1498 return MSVCIntrin::_InterlockedXor;
1499 case clang::ARM::BI_InterlockedDecrement64:
1500 return MSVCIntrin::_InterlockedDecrement;
1501 case clang::ARM::BI_InterlockedIncrement64:
1502 return MSVCIntrin::_InterlockedIncrement;
1503 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1504 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1505 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1506 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1507 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1508 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1509 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1510 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1511 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1512 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1513 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1514 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1515 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1516 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1517 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1518 case clang::ARM::BI_InterlockedExchange8_acq:
1519 case clang::ARM::BI_InterlockedExchange16_acq:
1520 case clang::ARM::BI_InterlockedExchange_acq:
1521 case clang::ARM::BI_InterlockedExchange64_acq:
1522 return MSVCIntrin::_InterlockedExchange_acq;
1523 case clang::ARM::BI_InterlockedExchange8_rel:
1524 case clang::ARM::BI_InterlockedExchange16_rel:
1525 case clang::ARM::BI_InterlockedExchange_rel:
1526 case clang::ARM::BI_InterlockedExchange64_rel:
1527 return MSVCIntrin::_InterlockedExchange_rel;
1528 case clang::ARM::BI_InterlockedExchange8_nf:
1529 case clang::ARM::BI_InterlockedExchange16_nf:
1530 case clang::ARM::BI_InterlockedExchange_nf:
1531 case clang::ARM::BI_InterlockedExchange64_nf:
1532 return MSVCIntrin::_InterlockedExchange_nf;
1533 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1534 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1535 case clang::ARM::BI_InterlockedCompareExchange_acq:
1536 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1537 return MSVCIntrin::_InterlockedCompareExchange_acq;
1538 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1539 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1540 case clang::ARM::BI_InterlockedCompareExchange_rel:
1541 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1542 return MSVCIntrin::_InterlockedCompareExchange_rel;
1543 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1544 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1545 case clang::ARM::BI_InterlockedCompareExchange_nf:
1546 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1547 return MSVCIntrin::_InterlockedCompareExchange_nf;
1548 case clang::ARM::BI_InterlockedOr8_acq:
1549 case clang::ARM::BI_InterlockedOr16_acq:
1550 case clang::ARM::BI_InterlockedOr_acq:
1551 case clang::ARM::BI_InterlockedOr64_acq:
1552 return MSVCIntrin::_InterlockedOr_acq;
1553 case clang::ARM::BI_InterlockedOr8_rel:
1554 case clang::ARM::BI_InterlockedOr16_rel:
1555 case clang::ARM::BI_InterlockedOr_rel:
1556 case clang::ARM::BI_InterlockedOr64_rel:
1557 return MSVCIntrin::_InterlockedOr_rel;
1558 case clang::ARM::BI_InterlockedOr8_nf:
1559 case clang::ARM::BI_InterlockedOr16_nf:
1560 case clang::ARM::BI_InterlockedOr_nf:
1561 case clang::ARM::BI_InterlockedOr64_nf:
1562 return MSVCIntrin::_InterlockedOr_nf;
1563 case clang::ARM::BI_InterlockedXor8_acq:
1564 case clang::ARM::BI_InterlockedXor16_acq:
1565 case clang::ARM::BI_InterlockedXor_acq:
1566 case clang::ARM::BI_InterlockedXor64_acq:
1567 return MSVCIntrin::_InterlockedXor_acq;
1568 case clang::ARM::BI_InterlockedXor8_rel:
1569 case clang::ARM::BI_InterlockedXor16_rel:
1570 case clang::ARM::BI_InterlockedXor_rel:
1571 case clang::ARM::BI_InterlockedXor64_rel:
1572 return MSVCIntrin::_InterlockedXor_rel;
1573 case clang::ARM::BI_InterlockedXor8_nf:
1574 case clang::ARM::BI_InterlockedXor16_nf:
1575 case clang::ARM::BI_InterlockedXor_nf:
1576 case clang::ARM::BI_InterlockedXor64_nf:
1577 return MSVCIntrin::_InterlockedXor_nf;
1578 case clang::ARM::BI_InterlockedAnd8_acq:
1579 case clang::ARM::BI_InterlockedAnd16_acq:
1580 case clang::ARM::BI_InterlockedAnd_acq:
1581 case clang::ARM::BI_InterlockedAnd64_acq:
1582 return MSVCIntrin::_InterlockedAnd_acq;
1583 case clang::ARM::BI_InterlockedAnd8_rel:
1584 case clang::ARM::BI_InterlockedAnd16_rel:
1585 case clang::ARM::BI_InterlockedAnd_rel:
1586 case clang::ARM::BI_InterlockedAnd64_rel:
1587 return MSVCIntrin::_InterlockedAnd_rel;
1588 case clang::ARM::BI_InterlockedAnd8_nf:
1589 case clang::ARM::BI_InterlockedAnd16_nf:
1590 case clang::ARM::BI_InterlockedAnd_nf:
1591 case clang::ARM::BI_InterlockedAnd64_nf:
1592 return MSVCIntrin::_InterlockedAnd_nf;
1593 case clang::ARM::BI_InterlockedIncrement16_acq:
1594 case clang::ARM::BI_InterlockedIncrement_acq:
1595 case clang::ARM::BI_InterlockedIncrement64_acq:
1596 return MSVCIntrin::_InterlockedIncrement_acq;
1597 case clang::ARM::BI_InterlockedIncrement16_rel:
1598 case clang::ARM::BI_InterlockedIncrement_rel:
1599 case clang::ARM::BI_InterlockedIncrement64_rel:
1600 return MSVCIntrin::_InterlockedIncrement_rel;
1601 case clang::ARM::BI_InterlockedIncrement16_nf:
1602 case clang::ARM::BI_InterlockedIncrement_nf:
1603 case clang::ARM::BI_InterlockedIncrement64_nf:
1604 return MSVCIntrin::_InterlockedIncrement_nf;
1605 case clang::ARM::BI_InterlockedDecrement16_acq:
1606 case clang::ARM::BI_InterlockedDecrement_acq:
1607 case clang::ARM::BI_InterlockedDecrement64_acq:
1608 return MSVCIntrin::_InterlockedDecrement_acq;
1609 case clang::ARM::BI_InterlockedDecrement16_rel:
1610 case clang::ARM::BI_InterlockedDecrement_rel:
1611 case clang::ARM::BI_InterlockedDecrement64_rel:
1612 return MSVCIntrin::_InterlockedDecrement_rel;
1613 case clang::ARM::BI_InterlockedDecrement16_nf:
1614 case clang::ARM::BI_InterlockedDecrement_nf:
1615 case clang::ARM::BI_InterlockedDecrement64_nf:
1616 return MSVCIntrin::_InterlockedDecrement_nf;
1617 }
1618 llvm_unreachable("must return from switch");
1619}
1620
1621static std::optional<CodeGenFunction::MSVCIntrin>
1622translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1623 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1624 switch (BuiltinID) {
1625 default:
1626 return std::nullopt;
1627 case clang::AArch64::BI_BitScanForward:
1628 case clang::AArch64::BI_BitScanForward64:
1629 return MSVCIntrin::_BitScanForward;
1630 case clang::AArch64::BI_BitScanReverse:
1631 case clang::AArch64::BI_BitScanReverse64:
1632 return MSVCIntrin::_BitScanReverse;
1633 case clang::AArch64::BI_InterlockedAnd64:
1634 return MSVCIntrin::_InterlockedAnd;
1635 case clang::AArch64::BI_InterlockedExchange64:
1636 return MSVCIntrin::_InterlockedExchange;
1637 case clang::AArch64::BI_InterlockedExchangeAdd64:
1638 return MSVCIntrin::_InterlockedExchangeAdd;
1639 case clang::AArch64::BI_InterlockedExchangeSub64:
1640 return MSVCIntrin::_InterlockedExchangeSub;
1641 case clang::AArch64::BI_InterlockedOr64:
1642 return MSVCIntrin::_InterlockedOr;
1643 case clang::AArch64::BI_InterlockedXor64:
1644 return MSVCIntrin::_InterlockedXor;
1645 case clang::AArch64::BI_InterlockedDecrement64:
1646 return MSVCIntrin::_InterlockedDecrement;
1647 case clang::AArch64::BI_InterlockedIncrement64:
1648 return MSVCIntrin::_InterlockedIncrement;
1649 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1650 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1651 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1652 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1653 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1654 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1655 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1656 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1657 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1658 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1659 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1660 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1661 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1662 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1663 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1664 case clang::AArch64::BI_InterlockedExchange8_acq:
1665 case clang::AArch64::BI_InterlockedExchange16_acq:
1666 case clang::AArch64::BI_InterlockedExchange_acq:
1667 case clang::AArch64::BI_InterlockedExchange64_acq:
1668 return MSVCIntrin::_InterlockedExchange_acq;
1669 case clang::AArch64::BI_InterlockedExchange8_rel:
1670 case clang::AArch64::BI_InterlockedExchange16_rel:
1671 case clang::AArch64::BI_InterlockedExchange_rel:
1672 case clang::AArch64::BI_InterlockedExchange64_rel:
1673 return MSVCIntrin::_InterlockedExchange_rel;
1674 case clang::AArch64::BI_InterlockedExchange8_nf:
1675 case clang::AArch64::BI_InterlockedExchange16_nf:
1676 case clang::AArch64::BI_InterlockedExchange_nf:
1677 case clang::AArch64::BI_InterlockedExchange64_nf:
1678 return MSVCIntrin::_InterlockedExchange_nf;
1679 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1680 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1681 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1682 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1683 return MSVCIntrin::_InterlockedCompareExchange_acq;
1684 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1685 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1686 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1687 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1688 return MSVCIntrin::_InterlockedCompareExchange_rel;
1689 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1690 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1691 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1692 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1693 return MSVCIntrin::_InterlockedCompareExchange_nf;
1694 case clang::AArch64::BI_InterlockedCompareExchange128:
1695 return MSVCIntrin::_InterlockedCompareExchange128;
1696 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1697 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1698 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1699 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1700 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1701 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1702 case clang::AArch64::BI_InterlockedOr8_acq:
1703 case clang::AArch64::BI_InterlockedOr16_acq:
1704 case clang::AArch64::BI_InterlockedOr_acq:
1705 case clang::AArch64::BI_InterlockedOr64_acq:
1706 return MSVCIntrin::_InterlockedOr_acq;
1707 case clang::AArch64::BI_InterlockedOr8_rel:
1708 case clang::AArch64::BI_InterlockedOr16_rel:
1709 case clang::AArch64::BI_InterlockedOr_rel:
1710 case clang::AArch64::BI_InterlockedOr64_rel:
1711 return MSVCIntrin::_InterlockedOr_rel;
1712 case clang::AArch64::BI_InterlockedOr8_nf:
1713 case clang::AArch64::BI_InterlockedOr16_nf:
1714 case clang::AArch64::BI_InterlockedOr_nf:
1715 case clang::AArch64::BI_InterlockedOr64_nf:
1716 return MSVCIntrin::_InterlockedOr_nf;
1717 case clang::AArch64::BI_InterlockedXor8_acq:
1718 case clang::AArch64::BI_InterlockedXor16_acq:
1719 case clang::AArch64::BI_InterlockedXor_acq:
1720 case clang::AArch64::BI_InterlockedXor64_acq:
1721 return MSVCIntrin::_InterlockedXor_acq;
1722 case clang::AArch64::BI_InterlockedXor8_rel:
1723 case clang::AArch64::BI_InterlockedXor16_rel:
1724 case clang::AArch64::BI_InterlockedXor_rel:
1725 case clang::AArch64::BI_InterlockedXor64_rel:
1726 return MSVCIntrin::_InterlockedXor_rel;
1727 case clang::AArch64::BI_InterlockedXor8_nf:
1728 case clang::AArch64::BI_InterlockedXor16_nf:
1729 case clang::AArch64::BI_InterlockedXor_nf:
1730 case clang::AArch64::BI_InterlockedXor64_nf:
1731 return MSVCIntrin::_InterlockedXor_nf;
1732 case clang::AArch64::BI_InterlockedAnd8_acq:
1733 case clang::AArch64::BI_InterlockedAnd16_acq:
1734 case clang::AArch64::BI_InterlockedAnd_acq:
1735 case clang::AArch64::BI_InterlockedAnd64_acq:
1736 return MSVCIntrin::_InterlockedAnd_acq;
1737 case clang::AArch64::BI_InterlockedAnd8_rel:
1738 case clang::AArch64::BI_InterlockedAnd16_rel:
1739 case clang::AArch64::BI_InterlockedAnd_rel:
1740 case clang::AArch64::BI_InterlockedAnd64_rel:
1741 return MSVCIntrin::_InterlockedAnd_rel;
1742 case clang::AArch64::BI_InterlockedAnd8_nf:
1743 case clang::AArch64::BI_InterlockedAnd16_nf:
1744 case clang::AArch64::BI_InterlockedAnd_nf:
1745 case clang::AArch64::BI_InterlockedAnd64_nf:
1746 return MSVCIntrin::_InterlockedAnd_nf;
1747 case clang::AArch64::BI_InterlockedIncrement16_acq:
1748 case clang::AArch64::BI_InterlockedIncrement_acq:
1749 case clang::AArch64::BI_InterlockedIncrement64_acq:
1750 return MSVCIntrin::_InterlockedIncrement_acq;
1751 case clang::AArch64::BI_InterlockedIncrement16_rel:
1752 case clang::AArch64::BI_InterlockedIncrement_rel:
1753 case clang::AArch64::BI_InterlockedIncrement64_rel:
1754 return MSVCIntrin::_InterlockedIncrement_rel;
1755 case clang::AArch64::BI_InterlockedIncrement16_nf:
1756 case clang::AArch64::BI_InterlockedIncrement_nf:
1757 case clang::AArch64::BI_InterlockedIncrement64_nf:
1758 return MSVCIntrin::_InterlockedIncrement_nf;
1759 case clang::AArch64::BI_InterlockedDecrement16_acq:
1760 case clang::AArch64::BI_InterlockedDecrement_acq:
1761 case clang::AArch64::BI_InterlockedDecrement64_acq:
1762 return MSVCIntrin::_InterlockedDecrement_acq;
1763 case clang::AArch64::BI_InterlockedDecrement16_rel:
1764 case clang::AArch64::BI_InterlockedDecrement_rel:
1765 case clang::AArch64::BI_InterlockedDecrement64_rel:
1766 return MSVCIntrin::_InterlockedDecrement_rel;
1767 case clang::AArch64::BI_InterlockedDecrement16_nf:
1768 case clang::AArch64::BI_InterlockedDecrement_nf:
1769 case clang::AArch64::BI_InterlockedDecrement64_nf:
1770 return MSVCIntrin::_InterlockedDecrement_nf;
1771 }
1772 llvm_unreachable("must return from switch");
1773}
1774
1775static std::optional<CodeGenFunction::MSVCIntrin>
1776translateX86ToMsvcIntrin(unsigned BuiltinID) {
1777 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1778 switch (BuiltinID) {
1779 default:
1780 return std::nullopt;
1781 case clang::X86::BI_BitScanForward:
1782 case clang::X86::BI_BitScanForward64:
1783 return MSVCIntrin::_BitScanForward;
1784 case clang::X86::BI_BitScanReverse:
1785 case clang::X86::BI_BitScanReverse64:
1786 return MSVCIntrin::_BitScanReverse;
1787 case clang::X86::BI_InterlockedAnd64:
1788 return MSVCIntrin::_InterlockedAnd;
1789 case clang::X86::BI_InterlockedCompareExchange128:
1790 return MSVCIntrin::_InterlockedCompareExchange128;
1791 case clang::X86::BI_InterlockedExchange64:
1792 return MSVCIntrin::_InterlockedExchange;
1793 case clang::X86::BI_InterlockedExchangeAdd64:
1794 return MSVCIntrin::_InterlockedExchangeAdd;
1795 case clang::X86::BI_InterlockedExchangeSub64:
1796 return MSVCIntrin::_InterlockedExchangeSub;
1797 case clang::X86::BI_InterlockedOr64:
1798 return MSVCIntrin::_InterlockedOr;
1799 case clang::X86::BI_InterlockedXor64:
1800 return MSVCIntrin::_InterlockedXor;
1801 case clang::X86::BI_InterlockedDecrement64:
1802 return MSVCIntrin::_InterlockedDecrement;
1803 case clang::X86::BI_InterlockedIncrement64:
1804 return MSVCIntrin::_InterlockedIncrement;
1805 }
1806 llvm_unreachable("must return from switch");
1807}
1808
1809// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1810Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1811 const CallExpr *E) {
1812 switch (BuiltinID) {
1813 case MSVCIntrin::_BitScanForward:
1814 case MSVCIntrin::_BitScanReverse: {
1815 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1816 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1817
1818 llvm::Type *ArgType = ArgValue->getType();
1819 llvm::Type *IndexType = IndexAddress.getElementType();
1820 llvm::Type *ResultType = ConvertType(E->getType());
1821
1822 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1823 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1824 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1825
1826 BasicBlock *Begin = Builder.GetInsertBlock();
1827 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1828 Builder.SetInsertPoint(End);
1829 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1830
1831 Builder.SetInsertPoint(Begin);
1832 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1833 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1834 Builder.CreateCondBr(IsZero, End, NotZero);
1835 Result->addIncoming(ResZero, Begin);
1836
1837 Builder.SetInsertPoint(NotZero);
1838
1839 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1840 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1841 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1842 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1843 Builder.CreateStore(ZeroCount, IndexAddress, false);
1844 } else {
1845 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1846 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1847
1848 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1849 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1850 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1851 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1852 Builder.CreateStore(Index, IndexAddress, false);
1853 }
1854 Builder.CreateBr(End);
1855 Result->addIncoming(ResOne, NotZero);
1856
1857 Builder.SetInsertPoint(End);
1858 return Result;
1859 }
1860 case MSVCIntrin::_InterlockedAnd:
1861 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1862 case MSVCIntrin::_InterlockedExchange:
1863 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1864 case MSVCIntrin::_InterlockedExchangeAdd:
1865 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1866 case MSVCIntrin::_InterlockedExchangeSub:
1867 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1868 case MSVCIntrin::_InterlockedOr:
1869 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1870 case MSVCIntrin::_InterlockedXor:
1871 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1872 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1873 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1874 AtomicOrdering::Acquire);
1875 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1876 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1877 AtomicOrdering::Release);
1878 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1879 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1880 AtomicOrdering::Monotonic);
1881 case MSVCIntrin::_InterlockedExchange_acq:
1882 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1883 AtomicOrdering::Acquire);
1884 case MSVCIntrin::_InterlockedExchange_rel:
1885 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1886 AtomicOrdering::Release);
1887 case MSVCIntrin::_InterlockedExchange_nf:
1888 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1889 AtomicOrdering::Monotonic);
1890 case MSVCIntrin::_InterlockedCompareExchange_acq:
1891 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1892 case MSVCIntrin::_InterlockedCompareExchange_rel:
1893 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1894 case MSVCIntrin::_InterlockedCompareExchange_nf:
1895 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1896 case MSVCIntrin::_InterlockedCompareExchange128:
1898 *this, E, AtomicOrdering::SequentiallyConsistent);
1899 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1900 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1901 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1902 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1903 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1904 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1905 case MSVCIntrin::_InterlockedOr_acq:
1906 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1907 AtomicOrdering::Acquire);
1908 case MSVCIntrin::_InterlockedOr_rel:
1909 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1910 AtomicOrdering::Release);
1911 case MSVCIntrin::_InterlockedOr_nf:
1912 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1913 AtomicOrdering::Monotonic);
1914 case MSVCIntrin::_InterlockedXor_acq:
1915 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1916 AtomicOrdering::Acquire);
1917 case MSVCIntrin::_InterlockedXor_rel:
1918 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1919 AtomicOrdering::Release);
1920 case MSVCIntrin::_InterlockedXor_nf:
1921 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1922 AtomicOrdering::Monotonic);
1923 case MSVCIntrin::_InterlockedAnd_acq:
1924 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1925 AtomicOrdering::Acquire);
1926 case MSVCIntrin::_InterlockedAnd_rel:
1927 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1928 AtomicOrdering::Release);
1929 case MSVCIntrin::_InterlockedAnd_nf:
1930 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1931 AtomicOrdering::Monotonic);
1932 case MSVCIntrin::_InterlockedIncrement_acq:
1933 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1934 case MSVCIntrin::_InterlockedIncrement_rel:
1935 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1936 case MSVCIntrin::_InterlockedIncrement_nf:
1937 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1938 case MSVCIntrin::_InterlockedDecrement_acq:
1939 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1940 case MSVCIntrin::_InterlockedDecrement_rel:
1941 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1942 case MSVCIntrin::_InterlockedDecrement_nf:
1943 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1944
1945 case MSVCIntrin::_InterlockedDecrement:
1946 return EmitAtomicDecrementValue(*this, E);
1947 case MSVCIntrin::_InterlockedIncrement:
1948 return EmitAtomicIncrementValue(*this, E);
1949
1950 case MSVCIntrin::__fastfail: {
1951 // Request immediate process termination from the kernel. The instruction
1952 // sequences to do this are documented on MSDN:
1953 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1954 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1955 StringRef Asm, Constraints;
1956 switch (ISA) {
1957 default:
1958 ErrorUnsupported(E, "__fastfail call for this architecture");
1959 break;
1960 case llvm::Triple::x86:
1961 case llvm::Triple::x86_64:
1962 Asm = "int $$0x29";
1963 Constraints = "{cx}";
1964 break;
1965 case llvm::Triple::thumb:
1966 Asm = "udf #251";
1967 Constraints = "{r0}";
1968 break;
1969 case llvm::Triple::aarch64:
1970 Asm = "brk #0xF003";
1971 Constraints = "{w0}";
1972 }
1973 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1974 llvm::InlineAsm *IA =
1975 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1976 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1977 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1978 llvm::Attribute::NoReturn);
1979 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1980 CI->setAttributes(NoReturnAttr);
1981 return CI;
1982 }
1983 }
1984 llvm_unreachable("Incorrect MSVC intrinsic!");
1985}
1986
1987namespace {
1988// ARC cleanup for __builtin_os_log_format
1989struct CallObjCArcUse final : EHScopeStack::Cleanup {
1990 CallObjCArcUse(llvm::Value *object) : object(object) {}
1991 llvm::Value *object;
1992
1993 void Emit(CodeGenFunction &CGF, Flags flags) override {
1994 CGF.EmitARCIntrinsicUse(object);
1995 }
1996};
1997}
1998
2000 BuiltinCheckKind Kind) {
2001 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2002 && "Unsupported builtin check kind");
2003
2004 Value *ArgValue = EmitScalarExpr(E);
2005 if (!SanOpts.has(SanitizerKind::Builtin))
2006 return ArgValue;
2007
2008 SanitizerScope SanScope(this);
2009 Value *Cond = Builder.CreateICmpNE(
2010 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2011 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2012 SanitizerHandler::InvalidBuiltin,
2014 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2015 std::nullopt);
2016 return ArgValue;
2017}
2018
2019static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2020 return CGF.Builder.CreateBinaryIntrinsic(
2021 Intrinsic::abs, ArgValue,
2022 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2023}
2024
2026 bool SanitizeOverflow) {
2027 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2028
2029 // Try to eliminate overflow check.
2030 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2031 if (!VCI->isMinSignedValue())
2032 return EmitAbs(CGF, ArgValue, true);
2033 }
2034
2035 CodeGenFunction::SanitizerScope SanScope(&CGF);
2036
2037 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2038 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2039 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2040 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2041 Value *NotOverflow = CGF.Builder.CreateNot(
2042 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2043
2044 // TODO: support -ftrapv-handler.
2045 if (SanitizeOverflow) {
2046 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2047 SanitizerHandler::NegateOverflow,
2050 {ArgValue});
2051 } else
2052 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2053
2054 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2055 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2056}
2057
2058/// Get the argument type for arguments to os_log_helper.
2060 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2061 return C.getCanonicalType(UnsignedTy);
2062}
2063
2066 CharUnits BufferAlignment) {
2067 ASTContext &Ctx = getContext();
2068
2070 {
2071 raw_svector_ostream OS(Name);
2072 OS << "__os_log_helper";
2073 OS << "_" << BufferAlignment.getQuantity();
2074 OS << "_" << int(Layout.getSummaryByte());
2075 OS << "_" << int(Layout.getNumArgsByte());
2076 for (const auto &Item : Layout.Items)
2077 OS << "_" << int(Item.getSizeByte()) << "_"
2078 << int(Item.getDescriptorByte());
2079 }
2080
2081 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2082 return F;
2083
2085 FunctionArgList Args;
2086 Args.push_back(ImplicitParamDecl::Create(
2087 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2089 ArgTys.emplace_back(Ctx.VoidPtrTy);
2090
2091 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2092 char Size = Layout.Items[I].getSizeByte();
2093 if (!Size)
2094 continue;
2095
2096 QualType ArgTy = getOSLogArgType(Ctx, Size);
2097 Args.push_back(ImplicitParamDecl::Create(
2098 Ctx, nullptr, SourceLocation(),
2099 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2101 ArgTys.emplace_back(ArgTy);
2102 }
2103
2104 QualType ReturnTy = Ctx.VoidTy;
2105
2106 // The helper function has linkonce_odr linkage to enable the linker to merge
2107 // identical functions. To ensure the merging always happens, 'noinline' is
2108 // attached to the function when compiling with -Oz.
2109 const CGFunctionInfo &FI =
2111 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2112 llvm::Function *Fn = llvm::Function::Create(
2113 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2114 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2115 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2117 Fn->setDoesNotThrow();
2118
2119 // Attach 'noinline' at -Oz.
2120 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2121 Fn->addFnAttr(llvm::Attribute::NoInline);
2122
2123 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2124 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2125
2126 // Create a scope with an artificial location for the body of this function.
2127 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2128
2129 CharUnits Offset;
2131 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2132 BufferAlignment);
2133 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2134 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2135 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2136 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2137
2138 unsigned I = 1;
2139 for (const auto &Item : Layout.Items) {
2141 Builder.getInt8(Item.getDescriptorByte()),
2142 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2144 Builder.getInt8(Item.getSizeByte()),
2145 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2146
2147 CharUnits Size = Item.size();
2148 if (!Size.getQuantity())
2149 continue;
2150
2151 Address Arg = GetAddrOfLocalVar(Args[I]);
2152 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2153 Addr = Addr.withElementType(Arg.getElementType());
2155 Offset += Size;
2156 ++I;
2157 }
2158
2160
2161 return Fn;
2162}
2163
2165 assert(E.getNumArgs() >= 2 &&
2166 "__builtin_os_log_format takes at least 2 arguments");
2167 ASTContext &Ctx = getContext();
2170 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2171 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2172
2173 // Ignore argument 1, the format string. It is not currently used.
2174 CallArgList Args;
2175 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2176
2177 for (const auto &Item : Layout.Items) {
2178 int Size = Item.getSizeByte();
2179 if (!Size)
2180 continue;
2181
2182 llvm::Value *ArgVal;
2183
2184 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2185 uint64_t Val = 0;
2186 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2187 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2188 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2189 } else if (const Expr *TheExpr = Item.getExpr()) {
2190 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2191
2192 // If a temporary object that requires destruction after the full
2193 // expression is passed, push a lifetime-extended cleanup to extend its
2194 // lifetime to the end of the enclosing block scope.
2195 auto LifetimeExtendObject = [&](const Expr *E) {
2196 E = E->IgnoreParenCasts();
2197 // Extend lifetimes of objects returned by function calls and message
2198 // sends.
2199
2200 // FIXME: We should do this in other cases in which temporaries are
2201 // created including arguments of non-ARC types (e.g., C++
2202 // temporaries).
2203 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2204 return true;
2205 return false;
2206 };
2207
2208 if (TheExpr->getType()->isObjCRetainableType() &&
2209 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2210 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2211 "Only scalar can be a ObjC retainable type");
2212 if (!isa<Constant>(ArgVal)) {
2213 CleanupKind Cleanup = getARCCleanupKind();
2214 QualType Ty = TheExpr->getType();
2216 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2217 ArgVal = EmitARCRetain(Ty, ArgVal);
2218 Builder.CreateStore(ArgVal, Addr);
2219 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2221 Cleanup & EHCleanup);
2222
2223 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2224 // argument has to be alive.
2225 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2226 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2227 }
2228 }
2229 } else {
2230 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2231 }
2232
2233 unsigned ArgValSize =
2234 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2235 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2236 ArgValSize);
2237 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2238 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2239 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2240 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2241 Args.add(RValue::get(ArgVal), ArgTy);
2242 }
2243
2244 const CGFunctionInfo &FI =
2247 Layout, BufAddr.getAlignment());
2249 return RValue::get(BufAddr, *this);
2250}
2251
2253 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2254 WidthAndSignedness ResultInfo) {
2255 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2256 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2257 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2258}
2259
2261 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2262 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2263 const clang::Expr *ResultArg, QualType ResultQTy,
2264 WidthAndSignedness ResultInfo) {
2266 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2267 "Cannot specialize this multiply");
2268
2269 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2270 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2271
2272 llvm::Value *HasOverflow;
2273 llvm::Value *Result = EmitOverflowIntrinsic(
2274 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2275
2276 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2277 // however, since the original builtin had a signed result, we need to report
2278 // an overflow when the result is greater than INT_MAX.
2279 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2280 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2281
2282 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2283 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2284
2285 bool isVolatile =
2286 ResultArg->getType()->getPointeeType().isVolatileQualified();
2287 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2288 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2289 isVolatile);
2290 return RValue::get(HasOverflow);
2291}
2292
2293/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2294static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2295 WidthAndSignedness Op1Info,
2296 WidthAndSignedness Op2Info,
2297 WidthAndSignedness ResultInfo) {
2298 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2299 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2300 Op1Info.Signed != Op2Info.Signed;
2301}
2302
2303/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2304/// the generic checked-binop irgen.
2305static RValue
2307 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2308 WidthAndSignedness Op2Info,
2309 const clang::Expr *ResultArg, QualType ResultQTy,
2310 WidthAndSignedness ResultInfo) {
2311 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2312 Op2Info, ResultInfo) &&
2313 "Not a mixed-sign multipliction we can specialize");
2314
2315 // Emit the signed and unsigned operands.
2316 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2317 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2318 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2319 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2320 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2321 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2322
2323 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2324 if (SignedOpWidth < UnsignedOpWidth)
2325 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2326 if (UnsignedOpWidth < SignedOpWidth)
2327 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2328
2329 llvm::Type *OpTy = Signed->getType();
2330 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2331 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2332 llvm::Type *ResTy = ResultPtr.getElementType();
2333 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2334
2335 // Take the absolute value of the signed operand.
2336 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2337 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2338 llvm::Value *AbsSigned =
2339 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2340
2341 // Perform a checked unsigned multiplication.
2342 llvm::Value *UnsignedOverflow;
2343 llvm::Value *UnsignedResult =
2344 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2345 Unsigned, UnsignedOverflow);
2346
2347 llvm::Value *Overflow, *Result;
2348 if (ResultInfo.Signed) {
2349 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2350 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2351 auto IntMax =
2352 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2353 llvm::Value *MaxResult =
2354 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2355 CGF.Builder.CreateZExt(IsNegative, OpTy));
2356 llvm::Value *SignedOverflow =
2357 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2358 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2359
2360 // Prepare the signed result (possibly by negating it).
2361 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2362 llvm::Value *SignedResult =
2363 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2364 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2365 } else {
2366 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2367 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2368 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2369 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2370 if (ResultInfo.Width < OpWidth) {
2371 auto IntMax =
2372 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2373 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2374 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2375 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2376 }
2377
2378 // Negate the product if it would be negative in infinite precision.
2379 Result = CGF.Builder.CreateSelect(
2380 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2381
2382 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2383 }
2384 assert(Overflow && Result && "Missing overflow or result");
2385
2386 bool isVolatile =
2387 ResultArg->getType()->getPointeeType().isVolatileQualified();
2388 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2389 isVolatile);
2390 return RValue::get(Overflow);
2391}
2392
2393static bool
2395 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2396 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2397 Ty = Ctx.getBaseElementType(Arr);
2398
2399 const auto *Record = Ty->getAsCXXRecordDecl();
2400 if (!Record)
2401 return false;
2402
2403 // We've already checked this type, or are in the process of checking it.
2404 if (!Seen.insert(Record).second)
2405 return false;
2406
2407 assert(Record->hasDefinition() &&
2408 "Incomplete types should already be diagnosed");
2409
2410 if (Record->isDynamicClass())
2411 return true;
2412
2413 for (FieldDecl *F : Record->fields()) {
2414 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2415 return true;
2416 }
2417 return false;
2418}
2419
2420/// Determine if the specified type requires laundering by checking if it is a
2421/// dynamic class type or contains a subobject which is a dynamic class type.
2423 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2424 return false;
2426 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2427}
2428
2429RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2430 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2431 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2432
2433 // The builtin's shift arg may have a different type than the source arg and
2434 // result, but the LLVM intrinsic uses the same type for all values.
2435 llvm::Type *Ty = Src->getType();
2436 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2437
2438 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2439 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2440 Function *F = CGM.getIntrinsic(IID, Ty);
2441 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2442}
2443
2444// Map math builtins for long-double to f128 version.
2445static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2446 switch (BuiltinID) {
2447#define MUTATE_LDBL(func) \
2448 case Builtin::BI__builtin_##func##l: \
2449 return Builtin::BI__builtin_##func##f128;
2480 MUTATE_LDBL(nans)
2481 MUTATE_LDBL(inf)
2500 MUTATE_LDBL(huge_val)
2510#undef MUTATE_LDBL
2511 default:
2512 return BuiltinID;
2513 }
2514}
2515
2516static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2517 Value *V) {
2518 if (CGF.Builder.getIsFPConstrained() &&
2519 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2520 if (Value *Result =
2521 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2522 return Result;
2523 }
2524 return nullptr;
2525}
2526
2528 const FunctionDecl *FD) {
2529 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2530 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2531 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2532
2534 for (auto &&FormalTy : FnTy->params())
2535 Args.push_back(llvm::PoisonValue::get(FormalTy));
2536
2537 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2538}
2539
2540RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2541 const CallExpr *E,
2542 ReturnValueSlot ReturnValue) {
2543 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2544 // See if we can constant fold this builtin. If so, don't emit it at all.
2545 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2547 if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&
2548 !Result.hasSideEffects()) {
2549 if (Result.Val.isInt())
2550 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2551 Result.Val.getInt()));
2552 if (Result.Val.isFloat())
2553 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2554 Result.Val.getFloat()));
2555 }
2556
2557 // If current long-double semantics is IEEE 128-bit, replace math builtins
2558 // of long-double with f128 equivalent.
2559 // TODO: This mutation should also be applied to other targets other than PPC,
2560 // after backend supports IEEE 128-bit style libcalls.
2561 if (getTarget().getTriple().isPPC64() &&
2562 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2563 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2564
2565 // If the builtin has been declared explicitly with an assembler label,
2566 // disable the specialized emitting below. Ideally we should communicate the
2567 // rename in IR, or at least avoid generating the intrinsic calls that are
2568 // likely to get lowered to the renamed library functions.
2569 const unsigned BuiltinIDIfNoAsmLabel =
2570 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2571
2572 std::optional<bool> ErrnoOverriden;
2573 // ErrnoOverriden is true if math-errno is overriden via the
2574 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2575 // which implies math-errno.
2576 if (E->hasStoredFPFeatures()) {
2578 if (OP.hasMathErrnoOverride())
2579 ErrnoOverriden = OP.getMathErrnoOverride();
2580 }
2581 // True if 'attribute__((optnone))' is used. This attribute overrides
2582 // fast-math which implies math-errno.
2583 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2584
2585 // True if we are compiling at -O2 and errno has been disabled
2586 // using the '#pragma float_control(precise, off)', and
2587 // attribute opt-none hasn't been seen.
2588 bool ErrnoOverridenToFalseWithOpt =
2589 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2590 CGM.getCodeGenOpts().OptimizationLevel != 0;
2591
2592 // There are LLVM math intrinsics/instructions corresponding to math library
2593 // functions except the LLVM op will never set errno while the math library
2594 // might. Also, math builtins have the same semantics as their math library
2595 // twins. Thus, we can transform math library and builtin calls to their
2596 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2597 // In case FP exceptions are enabled, the experimental versions of the
2598 // intrinsics model those.
2599 bool ConstAlways =
2600 getContext().BuiltinInfo.isConst(BuiltinID);
2601
2602 // There's a special case with the fma builtins where they are always const
2603 // if the target environment is GNU or the target is OS is Windows and we're
2604 // targeting the MSVCRT.dll environment.
2605 // FIXME: This list can be become outdated. Need to find a way to get it some
2606 // other way.
2607 switch (BuiltinID) {
2608 case Builtin::BI__builtin_fma:
2609 case Builtin::BI__builtin_fmaf:
2610 case Builtin::BI__builtin_fmal:
2611 case Builtin::BIfma:
2612 case Builtin::BIfmaf:
2613 case Builtin::BIfmal: {
2614 auto &Trip = CGM.getTriple();
2615 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2616 ConstAlways = true;
2617 break;
2618 }
2619 default:
2620 break;
2621 }
2622
2623 bool ConstWithoutErrnoAndExceptions =
2625 bool ConstWithoutExceptions =
2627
2628 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2629 // disabled.
2630 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2631 // or attributes that affect math-errno should prevent or allow math
2632 // intrincs to be generated. Intrinsics are generated:
2633 // 1- In fast math mode, unless math-errno is overriden
2634 // via '#pragma float_control(precise, on)', or via an
2635 // 'attribute__((optnone))'.
2636 // 2- If math-errno was enabled on command line but overriden
2637 // to false via '#pragma float_control(precise, off))' and
2638 // 'attribute__((optnone))' hasn't been used.
2639 // 3- If we are compiling with optimization and errno has been disabled
2640 // via '#pragma float_control(precise, off)', and
2641 // 'attribute__((optnone))' hasn't been used.
2642
2643 bool ConstWithoutErrnoOrExceptions =
2644 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2645 bool GenerateIntrinsics =
2646 (ConstAlways && !OptNone) ||
2647 (!getLangOpts().MathErrno &&
2648 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2649 if (!GenerateIntrinsics) {
2650 GenerateIntrinsics =
2651 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2652 if (!GenerateIntrinsics)
2653 GenerateIntrinsics =
2654 ConstWithoutErrnoOrExceptions &&
2655 (!getLangOpts().MathErrno &&
2656 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2657 if (!GenerateIntrinsics)
2658 GenerateIntrinsics =
2659 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2660 }
2661 if (GenerateIntrinsics) {
2662 switch (BuiltinIDIfNoAsmLabel) {
2663 case Builtin::BIceil:
2664 case Builtin::BIceilf:
2665 case Builtin::BIceill:
2666 case Builtin::BI__builtin_ceil:
2667 case Builtin::BI__builtin_ceilf:
2668 case Builtin::BI__builtin_ceilf16:
2669 case Builtin::BI__builtin_ceill:
2670 case Builtin::BI__builtin_ceilf128:
2672 Intrinsic::ceil,
2673 Intrinsic::experimental_constrained_ceil));
2674
2675 case Builtin::BIcopysign:
2676 case Builtin::BIcopysignf:
2677 case Builtin::BIcopysignl:
2678 case Builtin::BI__builtin_copysign:
2679 case Builtin::BI__builtin_copysignf:
2680 case Builtin::BI__builtin_copysignf16:
2681 case Builtin::BI__builtin_copysignl:
2682 case Builtin::BI__builtin_copysignf128:
2683 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
2684
2685 case Builtin::BIcos:
2686 case Builtin::BIcosf:
2687 case Builtin::BIcosl:
2688 case Builtin::BI__builtin_cos:
2689 case Builtin::BI__builtin_cosf:
2690 case Builtin::BI__builtin_cosf16:
2691 case Builtin::BI__builtin_cosl:
2692 case Builtin::BI__builtin_cosf128:
2694 Intrinsic::cos,
2695 Intrinsic::experimental_constrained_cos));
2696
2697 case Builtin::BIexp:
2698 case Builtin::BIexpf:
2699 case Builtin::BIexpl:
2700 case Builtin::BI__builtin_exp:
2701 case Builtin::BI__builtin_expf:
2702 case Builtin::BI__builtin_expf16:
2703 case Builtin::BI__builtin_expl:
2704 case Builtin::BI__builtin_expf128:
2706 Intrinsic::exp,
2707 Intrinsic::experimental_constrained_exp));
2708
2709 case Builtin::BIexp2:
2710 case Builtin::BIexp2f:
2711 case Builtin::BIexp2l:
2712 case Builtin::BI__builtin_exp2:
2713 case Builtin::BI__builtin_exp2f:
2714 case Builtin::BI__builtin_exp2f16:
2715 case Builtin::BI__builtin_exp2l:
2716 case Builtin::BI__builtin_exp2f128:
2718 Intrinsic::exp2,
2719 Intrinsic::experimental_constrained_exp2));
2720 case Builtin::BI__builtin_exp10:
2721 case Builtin::BI__builtin_exp10f:
2722 case Builtin::BI__builtin_exp10f16:
2723 case Builtin::BI__builtin_exp10l:
2724 case Builtin::BI__builtin_exp10f128: {
2725 // TODO: strictfp support
2726 if (Builder.getIsFPConstrained())
2727 break;
2728 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp10));
2729 }
2730 case Builtin::BIfabs:
2731 case Builtin::BIfabsf:
2732 case Builtin::BIfabsl:
2733 case Builtin::BI__builtin_fabs:
2734 case Builtin::BI__builtin_fabsf:
2735 case Builtin::BI__builtin_fabsf16:
2736 case Builtin::BI__builtin_fabsl:
2737 case Builtin::BI__builtin_fabsf128:
2738 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
2739
2740 case Builtin::BIfloor:
2741 case Builtin::BIfloorf:
2742 case Builtin::BIfloorl:
2743 case Builtin::BI__builtin_floor:
2744 case Builtin::BI__builtin_floorf:
2745 case Builtin::BI__builtin_floorf16:
2746 case Builtin::BI__builtin_floorl:
2747 case Builtin::BI__builtin_floorf128:
2749 Intrinsic::floor,
2750 Intrinsic::experimental_constrained_floor));
2751
2752 case Builtin::BIfma:
2753 case Builtin::BIfmaf:
2754 case Builtin::BIfmal:
2755 case Builtin::BI__builtin_fma:
2756 case Builtin::BI__builtin_fmaf:
2757 case Builtin::BI__builtin_fmaf16:
2758 case Builtin::BI__builtin_fmal:
2759 case Builtin::BI__builtin_fmaf128:
2761 Intrinsic::fma,
2762 Intrinsic::experimental_constrained_fma));
2763
2764 case Builtin::BIfmax:
2765 case Builtin::BIfmaxf:
2766 case Builtin::BIfmaxl:
2767 case Builtin::BI__builtin_fmax:
2768 case Builtin::BI__builtin_fmaxf:
2769 case Builtin::BI__builtin_fmaxf16:
2770 case Builtin::BI__builtin_fmaxl:
2771 case Builtin::BI__builtin_fmaxf128:
2773 Intrinsic::maxnum,
2774 Intrinsic::experimental_constrained_maxnum));
2775
2776 case Builtin::BIfmin:
2777 case Builtin::BIfminf:
2778 case Builtin::BIfminl:
2779 case Builtin::BI__builtin_fmin:
2780 case Builtin::BI__builtin_fminf:
2781 case Builtin::BI__builtin_fminf16:
2782 case Builtin::BI__builtin_fminl:
2783 case Builtin::BI__builtin_fminf128:
2785 Intrinsic::minnum,
2786 Intrinsic::experimental_constrained_minnum));
2787
2788 // fmod() is a special-case. It maps to the frem instruction rather than an
2789 // LLVM intrinsic.
2790 case Builtin::BIfmod:
2791 case Builtin::BIfmodf:
2792 case Builtin::BIfmodl:
2793 case Builtin::BI__builtin_fmod:
2794 case Builtin::BI__builtin_fmodf:
2795 case Builtin::BI__builtin_fmodf16:
2796 case Builtin::BI__builtin_fmodl:
2797 case Builtin::BI__builtin_fmodf128: {
2798 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2799 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2800 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2801 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2802 }
2803
2804 case Builtin::BIlog:
2805 case Builtin::BIlogf:
2806 case Builtin::BIlogl:
2807 case Builtin::BI__builtin_log:
2808 case Builtin::BI__builtin_logf:
2809 case Builtin::BI__builtin_logf16:
2810 case Builtin::BI__builtin_logl:
2811 case Builtin::BI__builtin_logf128:
2813 Intrinsic::log,
2814 Intrinsic::experimental_constrained_log));
2815
2816 case Builtin::BIlog10:
2817 case Builtin::BIlog10f:
2818 case Builtin::BIlog10l:
2819 case Builtin::BI__builtin_log10:
2820 case Builtin::BI__builtin_log10f:
2821 case Builtin::BI__builtin_log10f16:
2822 case Builtin::BI__builtin_log10l:
2823 case Builtin::BI__builtin_log10f128:
2825 Intrinsic::log10,
2826 Intrinsic::experimental_constrained_log10));
2827
2828 case Builtin::BIlog2:
2829 case Builtin::BIlog2f:
2830 case Builtin::BIlog2l:
2831 case Builtin::BI__builtin_log2:
2832 case Builtin::BI__builtin_log2f:
2833 case Builtin::BI__builtin_log2f16:
2834 case Builtin::BI__builtin_log2l:
2835 case Builtin::BI__builtin_log2f128:
2837 Intrinsic::log2,
2838 Intrinsic::experimental_constrained_log2));
2839
2840 case Builtin::BInearbyint:
2841 case Builtin::BInearbyintf:
2842 case Builtin::BInearbyintl:
2843 case Builtin::BI__builtin_nearbyint:
2844 case Builtin::BI__builtin_nearbyintf:
2845 case Builtin::BI__builtin_nearbyintl:
2846 case Builtin::BI__builtin_nearbyintf128:
2848 Intrinsic::nearbyint,
2849 Intrinsic::experimental_constrained_nearbyint));
2850
2851 case Builtin::BIpow:
2852 case Builtin::BIpowf:
2853 case Builtin::BIpowl:
2854 case Builtin::BI__builtin_pow:
2855 case Builtin::BI__builtin_powf:
2856 case Builtin::BI__builtin_powf16:
2857 case Builtin::BI__builtin_powl:
2858 case Builtin::BI__builtin_powf128:
2860 Intrinsic::pow,
2861 Intrinsic::experimental_constrained_pow));
2862
2863 case Builtin::BIrint:
2864 case Builtin::BIrintf:
2865 case Builtin::BIrintl:
2866 case Builtin::BI__builtin_rint:
2867 case Builtin::BI__builtin_rintf:
2868 case Builtin::BI__builtin_rintf16:
2869 case Builtin::BI__builtin_rintl:
2870 case Builtin::BI__builtin_rintf128:
2872 Intrinsic::rint,
2873 Intrinsic::experimental_constrained_rint));
2874
2875 case Builtin::BIround:
2876 case Builtin::BIroundf:
2877 case Builtin::BIroundl:
2878 case Builtin::BI__builtin_round:
2879 case Builtin::BI__builtin_roundf:
2880 case Builtin::BI__builtin_roundf16:
2881 case Builtin::BI__builtin_roundl:
2882 case Builtin::BI__builtin_roundf128:
2884 Intrinsic::round,
2885 Intrinsic::experimental_constrained_round));
2886
2887 case Builtin::BIroundeven:
2888 case Builtin::BIroundevenf:
2889 case Builtin::BIroundevenl:
2890 case Builtin::BI__builtin_roundeven:
2891 case Builtin::BI__builtin_roundevenf:
2892 case Builtin::BI__builtin_roundevenf16:
2893 case Builtin::BI__builtin_roundevenl:
2894 case Builtin::BI__builtin_roundevenf128:
2896 Intrinsic::roundeven,
2897 Intrinsic::experimental_constrained_roundeven));
2898
2899 case Builtin::BIsin:
2900 case Builtin::BIsinf:
2901 case Builtin::BIsinl:
2902 case Builtin::BI__builtin_sin:
2903 case Builtin::BI__builtin_sinf:
2904 case Builtin::BI__builtin_sinf16:
2905 case Builtin::BI__builtin_sinl:
2906 case Builtin::BI__builtin_sinf128:
2908 Intrinsic::sin,
2909 Intrinsic::experimental_constrained_sin));
2910
2911 case Builtin::BIsqrt:
2912 case Builtin::BIsqrtf:
2913 case Builtin::BIsqrtl:
2914 case Builtin::BI__builtin_sqrt:
2915 case Builtin::BI__builtin_sqrtf:
2916 case Builtin::BI__builtin_sqrtf16:
2917 case Builtin::BI__builtin_sqrtl:
2918 case Builtin::BI__builtin_sqrtf128:
2919 case Builtin::BI__builtin_elementwise_sqrt: {
2921 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2923 return RValue::get(Call);
2924 }
2925 case Builtin::BItrunc:
2926 case Builtin::BItruncf:
2927 case Builtin::BItruncl:
2928 case Builtin::BI__builtin_trunc:
2929 case Builtin::BI__builtin_truncf:
2930 case Builtin::BI__builtin_truncf16:
2931 case Builtin::BI__builtin_truncl:
2932 case Builtin::BI__builtin_truncf128:
2934 Intrinsic::trunc,
2935 Intrinsic::experimental_constrained_trunc));
2936
2937 case Builtin::BIlround:
2938 case Builtin::BIlroundf:
2939 case Builtin::BIlroundl:
2940 case Builtin::BI__builtin_lround:
2941 case Builtin::BI__builtin_lroundf:
2942 case Builtin::BI__builtin_lroundl:
2943 case Builtin::BI__builtin_lroundf128:
2945 *this, E, Intrinsic::lround,
2946 Intrinsic::experimental_constrained_lround));
2947
2948 case Builtin::BIllround:
2949 case Builtin::BIllroundf:
2950 case Builtin::BIllroundl:
2951 case Builtin::BI__builtin_llround:
2952 case Builtin::BI__builtin_llroundf:
2953 case Builtin::BI__builtin_llroundl:
2954 case Builtin::BI__builtin_llroundf128:
2956 *this, E, Intrinsic::llround,
2957 Intrinsic::experimental_constrained_llround));
2958
2959 case Builtin::BIlrint:
2960 case Builtin::BIlrintf:
2961 case Builtin::BIlrintl:
2962 case Builtin::BI__builtin_lrint:
2963 case Builtin::BI__builtin_lrintf:
2964 case Builtin::BI__builtin_lrintl:
2965 case Builtin::BI__builtin_lrintf128:
2967 *this, E, Intrinsic::lrint,
2968 Intrinsic::experimental_constrained_lrint));
2969
2970 case Builtin::BIllrint:
2971 case Builtin::BIllrintf:
2972 case Builtin::BIllrintl:
2973 case Builtin::BI__builtin_llrint:
2974 case Builtin::BI__builtin_llrintf:
2975 case Builtin::BI__builtin_llrintl:
2976 case Builtin::BI__builtin_llrintf128:
2978 *this, E, Intrinsic::llrint,
2979 Intrinsic::experimental_constrained_llrint));
2980 case Builtin::BI__builtin_ldexp:
2981 case Builtin::BI__builtin_ldexpf:
2982 case Builtin::BI__builtin_ldexpl:
2983 case Builtin::BI__builtin_ldexpf16:
2984 case Builtin::BI__builtin_ldexpf128: {
2986 *this, E, Intrinsic::ldexp,
2987 Intrinsic::experimental_constrained_ldexp));
2988 }
2989 default:
2990 break;
2991 }
2992 }
2993
2994 // Check NonnullAttribute/NullabilityArg and Alignment.
2995 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
2996 unsigned ParmNum) {
2997 Value *Val = A.emitRawPointer(*this);
2998 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
2999 ParmNum);
3000
3001 if (SanOpts.has(SanitizerKind::Alignment)) {
3002 SanitizerSet SkippedChecks;
3003 SkippedChecks.set(SanitizerKind::All);
3004 SkippedChecks.clear(SanitizerKind::Alignment);
3005 SourceLocation Loc = Arg->getExprLoc();
3006 // Strip an implicit cast.
3007 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3008 if (CE->getCastKind() == CK_BitCast)
3009 Arg = CE->getSubExpr();
3010 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3011 SkippedChecks);
3012 }
3013 };
3014
3015 switch (BuiltinIDIfNoAsmLabel) {
3016 default: break;
3017 case Builtin::BI__builtin___CFStringMakeConstantString:
3018 case Builtin::BI__builtin___NSStringMakeConstantString:
3019 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3020 case Builtin::BI__builtin_stdarg_start:
3021 case Builtin::BI__builtin_va_start:
3022 case Builtin::BI__va_start:
3023 case Builtin::BI__builtin_va_end:
3024 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3025 ? EmitScalarExpr(E->getArg(0))
3026 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3027 BuiltinID != Builtin::BI__builtin_va_end);
3028 return RValue::get(nullptr);
3029 case Builtin::BI__builtin_va_copy: {
3030 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3031 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3032 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3033 {DstPtr, SrcPtr});
3034 return RValue::get(nullptr);
3035 }
3036 case Builtin::BIabs:
3037 case Builtin::BIlabs:
3038 case Builtin::BIllabs:
3039 case Builtin::BI__builtin_abs:
3040 case Builtin::BI__builtin_labs:
3041 case Builtin::BI__builtin_llabs: {
3042 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3043
3044 Value *Result;
3045 switch (getLangOpts().getSignedOverflowBehavior()) {
3047 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3048 break;
3050 if (!SanitizeOverflow) {
3051 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3052 break;
3053 }
3054 [[fallthrough]];
3056 // TODO: Somehow handle the corner case when the address of abs is taken.
3057 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3058 break;
3059 }
3060 return RValue::get(Result);
3061 }
3062 case Builtin::BI__builtin_complex: {
3063 Value *Real = EmitScalarExpr(E->getArg(0));
3064 Value *Imag = EmitScalarExpr(E->getArg(1));
3065 return RValue::getComplex({Real, Imag});
3066 }
3067 case Builtin::BI__builtin_conj:
3068 case Builtin::BI__builtin_conjf:
3069 case Builtin::BI__builtin_conjl:
3070 case Builtin::BIconj:
3071 case Builtin::BIconjf:
3072 case Builtin::BIconjl: {
3073 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3074 Value *Real = ComplexVal.first;
3075 Value *Imag = ComplexVal.second;
3076 Imag = Builder.CreateFNeg(Imag, "neg");
3077 return RValue::getComplex(std::make_pair(Real, Imag));
3078 }
3079 case Builtin::BI__builtin_creal:
3080 case Builtin::BI__builtin_crealf:
3081 case Builtin::BI__builtin_creall:
3082 case Builtin::BIcreal:
3083 case Builtin::BIcrealf:
3084 case Builtin::BIcreall: {
3085 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3086 return RValue::get(ComplexVal.first);
3087 }
3088
3089 case Builtin::BI__builtin_preserve_access_index: {
3090 // Only enabled preserved access index region when debuginfo
3091 // is available as debuginfo is needed to preserve user-level
3092 // access pattern.
3093 if (!getDebugInfo()) {
3094 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3095 return RValue::get(EmitScalarExpr(E->getArg(0)));
3096 }
3097
3098 // Nested builtin_preserve_access_index() not supported
3100 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3101 return RValue::get(EmitScalarExpr(E->getArg(0)));
3102 }
3103
3104 IsInPreservedAIRegion = true;
3105 Value *Res = EmitScalarExpr(E->getArg(0));
3106 IsInPreservedAIRegion = false;
3107 return RValue::get(Res);
3108 }
3109
3110 case Builtin::BI__builtin_cimag:
3111 case Builtin::BI__builtin_cimagf:
3112 case Builtin::BI__builtin_cimagl:
3113 case Builtin::BIcimag:
3114 case Builtin::BIcimagf:
3115 case Builtin::BIcimagl: {
3116 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3117 return RValue::get(ComplexVal.second);
3118 }
3119
3120 case Builtin::BI__builtin_clrsb:
3121 case Builtin::BI__builtin_clrsbl:
3122 case Builtin::BI__builtin_clrsbll: {
3123 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3124 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3125
3126 llvm::Type *ArgType = ArgValue->getType();
3127 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3128
3129 llvm::Type *ResultType = ConvertType(E->getType());
3130 Value *Zero = llvm::Constant::getNullValue(ArgType);
3131 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3132 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3133 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3134 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3135 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3136 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3137 "cast");
3138 return RValue::get(Result);
3139 }
3140 case Builtin::BI__builtin_ctzs:
3141 case Builtin::BI__builtin_ctz:
3142 case Builtin::BI__builtin_ctzl:
3143 case Builtin::BI__builtin_ctzll:
3144 case Builtin::BI__builtin_ctzg: {
3145 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3146 E->getNumArgs() > 1;
3147
3148 Value *ArgValue =
3149 HasFallback ? EmitScalarExpr(E->getArg(0))
3151
3152 llvm::Type *ArgType = ArgValue->getType();
3153 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3154
3155 llvm::Type *ResultType = ConvertType(E->getType());
3156 Value *ZeroUndef =
3157 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3158 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3159 if (Result->getType() != ResultType)
3160 Result =
3161 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3162 if (!HasFallback)
3163 return RValue::get(Result);
3164
3165 Value *Zero = Constant::getNullValue(ArgType);
3166 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3167 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3168 Value *ResultOrFallback =
3169 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3170 return RValue::get(ResultOrFallback);
3171 }
3172 case Builtin::BI__builtin_clzs:
3173 case Builtin::BI__builtin_clz:
3174 case Builtin::BI__builtin_clzl:
3175 case Builtin::BI__builtin_clzll:
3176 case Builtin::BI__builtin_clzg: {
3177 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3178 E->getNumArgs() > 1;
3179
3180 Value *ArgValue =
3181 HasFallback ? EmitScalarExpr(E->getArg(0))
3183
3184 llvm::Type *ArgType = ArgValue->getType();
3185 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3186
3187 llvm::Type *ResultType = ConvertType(E->getType());
3188 Value *ZeroUndef =
3189 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3190 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3191 if (Result->getType() != ResultType)
3192 Result =
3193 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3194 if (!HasFallback)
3195 return RValue::get(Result);
3196
3197 Value *Zero = Constant::getNullValue(ArgType);
3198 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3199 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3200 Value *ResultOrFallback =
3201 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3202 return RValue::get(ResultOrFallback);
3203 }
3204 case Builtin::BI__builtin_ffs:
3205 case Builtin::BI__builtin_ffsl:
3206 case Builtin::BI__builtin_ffsll: {
3207 // ffs(x) -> x ? cttz(x) + 1 : 0
3208 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3209
3210 llvm::Type *ArgType = ArgValue->getType();
3211 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3212
3213 llvm::Type *ResultType = ConvertType(E->getType());
3214 Value *Tmp =
3215 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3216 llvm::ConstantInt::get(ArgType, 1));
3217 Value *Zero = llvm::Constant::getNullValue(ArgType);
3218 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3219 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3220 if (Result->getType() != ResultType)
3221 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3222 "cast");
3223 return RValue::get(Result);
3224 }
3225 case Builtin::BI__builtin_parity:
3226 case Builtin::BI__builtin_parityl:
3227 case Builtin::BI__builtin_parityll: {
3228 // parity(x) -> ctpop(x) & 1
3229 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3230
3231 llvm::Type *ArgType = ArgValue->getType();
3232 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3233
3234 llvm::Type *ResultType = ConvertType(E->getType());
3235 Value *Tmp = Builder.CreateCall(F, ArgValue);
3236 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3237 if (Result->getType() != ResultType)
3238 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3239 "cast");
3240 return RValue::get(Result);
3241 }
3242 case Builtin::BI__lzcnt16:
3243 case Builtin::BI__lzcnt:
3244 case Builtin::BI__lzcnt64: {
3245 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3246
3247 llvm::Type *ArgType = ArgValue->getType();
3248 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3249
3250 llvm::Type *ResultType = ConvertType(E->getType());
3251 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3252 if (Result->getType() != ResultType)
3253 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3254 "cast");
3255 return RValue::get(Result);
3256 }
3257 case Builtin::BI__popcnt16:
3258 case Builtin::BI__popcnt:
3259 case Builtin::BI__popcnt64:
3260 case Builtin::BI__builtin_popcount:
3261 case Builtin::BI__builtin_popcountl:
3262 case Builtin::BI__builtin_popcountll:
3263 case Builtin::BI__builtin_popcountg: {
3264 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3265
3266 llvm::Type *ArgType = ArgValue->getType();
3267 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3268
3269 llvm::Type *ResultType = ConvertType(E->getType());
3270 Value *Result = Builder.CreateCall(F, ArgValue);
3271 if (Result->getType() != ResultType)
3272 Result =
3273 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3274 return RValue::get(Result);
3275 }
3276 case Builtin::BI__builtin_unpredictable: {
3277 // Always return the argument of __builtin_unpredictable. LLVM does not
3278 // handle this builtin. Metadata for this builtin should be added directly
3279 // to instructions such as branches or switches that use it.
3280 return RValue::get(EmitScalarExpr(E->getArg(0)));
3281 }
3282 case Builtin::BI__builtin_expect: {
3283 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3284 llvm::Type *ArgType = ArgValue->getType();
3285
3286 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3287 // Don't generate llvm.expect on -O0 as the backend won't use it for
3288 // anything.
3289 // Note, we still IRGen ExpectedValue because it could have side-effects.
3290 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3291 return RValue::get(ArgValue);
3292
3293 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3294 Value *Result =
3295 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3296 return RValue::get(Result);
3297 }
3298 case Builtin::BI__builtin_expect_with_probability: {
3299 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3300 llvm::Type *ArgType = ArgValue->getType();
3301
3302 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3303 llvm::APFloat Probability(0.0);
3304 const Expr *ProbArg = E->getArg(2);
3305 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3306 assert(EvalSucceed && "probability should be able to evaluate as float");
3307 (void)EvalSucceed;
3308 bool LoseInfo = false;
3309 Probability.convert(llvm::APFloat::IEEEdouble(),
3310 llvm::RoundingMode::Dynamic, &LoseInfo);
3311 llvm::Type *Ty = ConvertType(ProbArg->getType());
3312 Constant *Confidence = ConstantFP::get(Ty, Probability);
3313 // Don't generate llvm.expect.with.probability on -O0 as the backend
3314 // won't use it for anything.
3315 // Note, we still IRGen ExpectedValue because it could have side-effects.
3316 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3317 return RValue::get(ArgValue);
3318
3319 Function *FnExpect =
3320 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3321 Value *Result = Builder.CreateCall(
3322 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3323 return RValue::get(Result);
3324 }
3325 case Builtin::BI__builtin_assume_aligned: {
3326 const Expr *Ptr = E->getArg(0);
3327 Value *PtrValue = EmitScalarExpr(Ptr);
3328 Value *OffsetValue =
3329 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3330
3331 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3332 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3333 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3334 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3335 llvm::Value::MaximumAlignment);
3336
3337 emitAlignmentAssumption(PtrValue, Ptr,
3338 /*The expr loc is sufficient.*/ SourceLocation(),
3339 AlignmentCI, OffsetValue);
3340 return RValue::get(PtrValue);
3341 }
3342 case Builtin::BI__assume:
3343 case Builtin::BI__builtin_assume: {
3344 if (E->getArg(0)->HasSideEffects(getContext()))
3345 return RValue::get(nullptr);
3346
3347 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3348 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3349 Builder.CreateCall(FnAssume, ArgValue);
3350 return RValue::get(nullptr);
3351 }
3352 case Builtin::BI__builtin_assume_separate_storage: {
3353 const Expr *Arg0 = E->getArg(0);
3354 const Expr *Arg1 = E->getArg(1);
3355
3356 Value *Value0 = EmitScalarExpr(Arg0);
3357 Value *Value1 = EmitScalarExpr(Arg1);
3358
3359 Value *Values[] = {Value0, Value1};
3360 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3361 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3362 return RValue::get(nullptr);
3363 }
3364 case Builtin::BI__builtin_allow_runtime_check: {
3365 StringRef Kind =
3366 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3367 LLVMContext &Ctx = CGM.getLLVMContext();
3368 llvm::Value *Allow = Builder.CreateCall(
3369 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3370 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3371 return RValue::get(Allow);
3372 }
3373 case Builtin::BI__arithmetic_fence: {
3374 // Create the builtin call if FastMath is selected, and the target
3375 // supports the builtin, otherwise just return the argument.
3376 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3377 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3378 bool isArithmeticFenceEnabled =
3379 FMF.allowReassoc() &&
3381 QualType ArgType = E->getArg(0)->getType();
3382 if (ArgType->isComplexType()) {
3383 if (isArithmeticFenceEnabled) {
3384 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3385 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3386 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3387 ConvertType(ElementType));
3388 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3389 ConvertType(ElementType));
3390 return RValue::getComplex(std::make_pair(Real, Imag));
3391 }
3392 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3393 Value *Real = ComplexVal.first;
3394 Value *Imag = ComplexVal.second;
3395 return RValue::getComplex(std::make_pair(Real, Imag));
3396 }
3397 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3398 if (isArithmeticFenceEnabled)
3399 return RValue::get(
3400 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3401 return RValue::get(ArgValue);
3402 }
3403 case Builtin::BI__builtin_bswap16:
3404 case Builtin::BI__builtin_bswap32:
3405 case Builtin::BI__builtin_bswap64:
3406 case Builtin::BI_byteswap_ushort:
3407 case Builtin::BI_byteswap_ulong:
3408 case Builtin::BI_byteswap_uint64: {
3409 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
3410 }
3411 case Builtin::BI__builtin_bitreverse8:
3412 case Builtin::BI__builtin_bitreverse16:
3413 case Builtin::BI__builtin_bitreverse32:
3414 case Builtin::BI__builtin_bitreverse64: {
3415 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
3416 }
3417 case Builtin::BI__builtin_rotateleft8:
3418 case Builtin::BI__builtin_rotateleft16:
3419 case Builtin::BI__builtin_rotateleft32:
3420 case Builtin::BI__builtin_rotateleft64:
3421 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3422 case Builtin::BI_rotl16:
3423 case Builtin::BI_rotl:
3424 case Builtin::BI_lrotl:
3425 case Builtin::BI_rotl64:
3426 return emitRotate(E, false);
3427
3428 case Builtin::BI__builtin_rotateright8:
3429 case Builtin::BI__builtin_rotateright16:
3430 case Builtin::BI__builtin_rotateright32:
3431 case Builtin::BI__builtin_rotateright64:
3432 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3433 case Builtin::BI_rotr16:
3434 case Builtin::BI_rotr:
3435 case Builtin::BI_lrotr:
3436 case Builtin::BI_rotr64:
3437 return emitRotate(E, true);
3438
3439 case Builtin::BI__builtin_constant_p: {
3440 llvm::Type *ResultType = ConvertType(E->getType());
3441
3442 const Expr *Arg = E->getArg(0);
3443 QualType ArgType = Arg->getType();
3444 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3445 // and likely a mistake.
3446 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3447 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3448 // Per the GCC documentation, only numeric constants are recognized after
3449 // inlining.
3450 return RValue::get(ConstantInt::get(ResultType, 0));
3451
3452 if (Arg->HasSideEffects(getContext()))
3453 // The argument is unevaluated, so be conservative if it might have
3454 // side-effects.
3455 return RValue::get(ConstantInt::get(ResultType, 0));
3456
3457 Value *ArgValue = EmitScalarExpr(Arg);
3458 if (ArgType->isObjCObjectPointerType()) {
3459 // Convert Objective-C objects to id because we cannot distinguish between
3460 // LLVM types for Obj-C classes as they are opaque.
3461 ArgType = CGM.getContext().getObjCIdType();
3462 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3463 }
3464 Function *F =
3465 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3466 Value *Result = Builder.CreateCall(F, ArgValue);
3467 if (Result->getType() != ResultType)
3468 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3469 return RValue::get(Result);
3470 }
3471 case Builtin::BI__builtin_dynamic_object_size:
3472 case Builtin::BI__builtin_object_size: {
3473 unsigned Type =
3474 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3475 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3476
3477 // We pass this builtin onto the optimizer so that it can figure out the
3478 // object size in more complex cases.
3479 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3480 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3481 /*EmittedE=*/nullptr, IsDynamic));
3482 }
3483 case Builtin::BI__builtin_prefetch: {
3484 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3485 // FIXME: Technically these constants should of type 'int', yes?
3486 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3487 llvm::ConstantInt::get(Int32Ty, 0);
3488 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3489 llvm::ConstantInt::get(Int32Ty, 3);
3490 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3491 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3492 Builder.CreateCall(F, {Address, RW, Locality, Data});
3493 return RValue::get(nullptr);
3494 }
3495 case Builtin::BI__builtin_readcyclecounter: {
3496 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3497 return RValue::get(Builder.CreateCall(F));
3498 }
3499 case Builtin::BI__builtin_readsteadycounter: {
3500 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3501 return RValue::get(Builder.CreateCall(F));
3502 }
3503 case Builtin::BI__builtin___clear_cache: {
3504 Value *Begin = EmitScalarExpr(E->getArg(0));
3505 Value *End = EmitScalarExpr(E->getArg(1));
3506 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3507 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3508 }
3509 case Builtin::BI__builtin_trap:
3510 EmitTrapCall(Intrinsic::trap);
3511 return RValue::get(nullptr);
3512 case Builtin::BI__debugbreak:
3513 EmitTrapCall(Intrinsic::debugtrap);
3514 return RValue::get(nullptr);
3515 case Builtin::BI__builtin_unreachable: {
3517
3518 // We do need to preserve an insertion point.
3519 EmitBlock(createBasicBlock("unreachable.cont"));
3520
3521 return RValue::get(nullptr);
3522 }
3523
3524 case Builtin::BI__builtin_powi:
3525 case Builtin::BI__builtin_powif:
3526 case Builtin::BI__builtin_powil: {
3527 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3528 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3529
3530 if (Builder.getIsFPConstrained()) {
3531 // FIXME: llvm.powi has 2 mangling types,
3532 // llvm.experimental.constrained.powi has one.
3533 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3534 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3535 Src0->getType());
3536 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3537 }
3538
3539 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3540 { Src0->getType(), Src1->getType() });
3541 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3542 }
3543 case Builtin::BI__builtin_frexpl: {
3544 // Linux PPC will not be adding additional PPCDoubleDouble support.
3545 // WIP to switch default to IEEE long double. Will emit libcall for
3546 // frexpl instead of legalizing this type in the BE.
3547 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3548 break;
3549 [[fallthrough]];
3550 }
3551 case Builtin::BI__builtin_frexp:
3552 case Builtin::BI__builtin_frexpf:
3553 case Builtin::BI__builtin_frexpf128:
3554 case Builtin::BI__builtin_frexpf16:
3555 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3556 case Builtin::BI__builtin_isgreater:
3557 case Builtin::BI__builtin_isgreaterequal:
3558 case Builtin::BI__builtin_isless:
3559 case Builtin::BI__builtin_islessequal:
3560 case Builtin::BI__builtin_islessgreater:
3561 case Builtin::BI__builtin_isunordered: {
3562 // Ordered comparisons: we know the arguments to these are matching scalar
3563 // floating point values.
3564 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3565 Value *LHS = EmitScalarExpr(E->getArg(0));
3566 Value *RHS = EmitScalarExpr(E->getArg(1));
3567
3568 switch (BuiltinID) {
3569 default: llvm_unreachable("Unknown ordered comparison");
3570 case Builtin::BI__builtin_isgreater:
3571 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3572 break;
3573 case Builtin::BI__builtin_isgreaterequal:
3574 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3575 break;
3576 case Builtin::BI__builtin_isless:
3577 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3578 break;
3579 case Builtin::BI__builtin_islessequal:
3580 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3581 break;
3582 case Builtin::BI__builtin_islessgreater:
3583 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3584 break;
3585 case Builtin::BI__builtin_isunordered:
3586 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3587 break;
3588 }
3589 // ZExt bool to int type.
3590 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3591 }
3592
3593 case Builtin::BI__builtin_isnan: {
3594 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3595 Value *V = EmitScalarExpr(E->getArg(0));
3596 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3597 return RValue::get(Result);
3598 return RValue::get(
3599 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3600 ConvertType(E->getType())));
3601 }
3602
3603 case Builtin::BI__builtin_issignaling: {
3604 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3605 Value *V = EmitScalarExpr(E->getArg(0));
3606 return RValue::get(
3607 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3608 ConvertType(E->getType())));
3609 }
3610
3611 case Builtin::BI__builtin_isinf: {
3612 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3613 Value *V = EmitScalarExpr(E->getArg(0));
3614 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3615 return RValue::get(Result);
3616 return RValue::get(
3617 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3618 ConvertType(E->getType())));
3619 }
3620
3621 case Builtin::BIfinite:
3622 case Builtin::BI__finite:
3623 case Builtin::BIfinitef:
3624 case Builtin::BI__finitef:
3625 case Builtin::BIfinitel:
3626 case Builtin::BI__finitel:
3627 case Builtin::BI__builtin_isfinite: {
3628 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3629 Value *V = EmitScalarExpr(E->getArg(0));
3630 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3631 return RValue::get(Result);
3632 return RValue::get(
3633 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3634 ConvertType(E->getType())));
3635 }
3636
3637 case Builtin::BI__builtin_isnormal: {
3638 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3639 Value *V = EmitScalarExpr(E->getArg(0));
3640 return RValue::get(
3641 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3642 ConvertType(E->getType())));
3643 }
3644
3645 case Builtin::BI__builtin_issubnormal: {
3646 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3647 Value *V = EmitScalarExpr(E->getArg(0));
3648 return RValue::get(
3649 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3650 ConvertType(E->getType())));
3651 }
3652
3653 case Builtin::BI__builtin_iszero: {
3654 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3655 Value *V = EmitScalarExpr(E->getArg(0));
3656 return RValue::get(
3657 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3658 ConvertType(E->getType())));
3659 }
3660
3661 case Builtin::BI__builtin_isfpclass: {
3663 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3664 break;
3665 uint64_t Test = Result.Val.getInt().getLimitedValue();
3666 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3667 Value *V = EmitScalarExpr(E->getArg(0));
3668 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3669 ConvertType(E->getType())));
3670 }
3671
3672 case Builtin::BI__builtin_nondeterministic_value: {
3673 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3674
3675 Value *Result = PoisonValue::get(Ty);
3676 Result = Builder.CreateFreeze(Result);
3677
3678 return RValue::get(Result);
3679 }
3680
3681 case Builtin::BI__builtin_elementwise_abs: {
3682 Value *Result;
3683 QualType QT = E->getArg(0)->getType();
3684
3685 if (auto *VecTy = QT->getAs<VectorType>())
3686 QT = VecTy->getElementType();
3687 if (QT->isIntegerType())
3688 Result = Builder.CreateBinaryIntrinsic(
3689 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3690 Builder.getFalse(), nullptr, "elt.abs");
3691 else
3692 Result = emitUnaryBuiltin(*this, E, llvm::Intrinsic::fabs, "elt.abs");
3693
3694 return RValue::get(Result);
3695 }
3696
3697 case Builtin::BI__builtin_elementwise_ceil:
3698 return RValue::get(
3699 emitUnaryBuiltin(*this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3700 case Builtin::BI__builtin_elementwise_exp:
3701 return RValue::get(
3702 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp, "elt.exp"));
3703 case Builtin::BI__builtin_elementwise_exp2:
3704 return RValue::get(
3705 emitUnaryBuiltin(*this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3706 case Builtin::BI__builtin_elementwise_log:
3707 return RValue::get(
3708 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log, "elt.log"));
3709 case Builtin::BI__builtin_elementwise_log2:
3710 return RValue::get(
3711 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log2, "elt.log2"));
3712 case Builtin::BI__builtin_elementwise_log10:
3713 return RValue::get(
3714 emitUnaryBuiltin(*this, E, llvm::Intrinsic::log10, "elt.log10"));
3715 case Builtin::BI__builtin_elementwise_pow: {
3716 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::pow));
3717 }
3718 case Builtin::BI__builtin_elementwise_bitreverse:
3719 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::bitreverse,
3720 "elt.bitreverse"));
3721 case Builtin::BI__builtin_elementwise_cos:
3722 return RValue::get(
3723 emitUnaryBuiltin(*this, E, llvm::Intrinsic::cos, "elt.cos"));
3724 case Builtin::BI__builtin_elementwise_floor:
3725 return RValue::get(
3726 emitUnaryBuiltin(*this, E, llvm::Intrinsic::floor, "elt.floor"));
3727 case Builtin::BI__builtin_elementwise_roundeven:
3728 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::roundeven,
3729 "elt.roundeven"));
3730 case Builtin::BI__builtin_elementwise_round:
3731 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::round,
3732 "elt.round"));
3733 case Builtin::BI__builtin_elementwise_rint:
3734 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::rint,
3735 "elt.rint"));
3736 case Builtin::BI__builtin_elementwise_nearbyint:
3737 return RValue::get(emitUnaryBuiltin(*this, E, llvm::Intrinsic::nearbyint,
3738 "elt.nearbyint"));
3739 case Builtin::BI__builtin_elementwise_sin:
3740 return RValue::get(
3741 emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
3742 case Builtin::BI__builtin_elementwise_tan:
3743 return RValue::get(
3744 emitUnaryBuiltin(*this, E, llvm::Intrinsic::tan, "elt.tan"));
3745 case Builtin::BI__builtin_elementwise_trunc:
3746 return RValue::get(
3747 emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3748 case Builtin::BI__builtin_elementwise_canonicalize:
3749 return RValue::get(
3750 emitUnaryBuiltin(*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3751 case Builtin::BI__builtin_elementwise_copysign:
3752 return RValue::get(emitBinaryBuiltin(*this, E, llvm::Intrinsic::copysign));
3753 case Builtin::BI__builtin_elementwise_fma:
3754 return RValue::get(emitTernaryBuiltin(*this, E, llvm::Intrinsic::fma));
3755 case Builtin::BI__builtin_elementwise_add_sat:
3756 case Builtin::BI__builtin_elementwise_sub_sat: {
3757 Value *Op0 = EmitScalarExpr(E->getArg(0));
3758 Value *Op1 = EmitScalarExpr(E->getArg(1));
3759 Value *Result;
3760 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3761 QualType Ty = E->getArg(0)->getType();
3762 if (auto *VecTy = Ty->getAs<VectorType>())
3763 Ty = VecTy->getElementType();
3764 bool IsSigned = Ty->isSignedIntegerType();
3765 unsigned Opc;
3766 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3767 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3768 else
3769 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3770 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3771 return RValue::get(Result);
3772 }
3773
3774 case Builtin::BI__builtin_elementwise_max: {
3775 Value *Op0 = EmitScalarExpr(E->getArg(0));
3776 Value *Op1 = EmitScalarExpr(E->getArg(1));
3777 Value *Result;
3778 if (Op0->getType()->isIntOrIntVectorTy()) {
3779 QualType Ty = E->getArg(0)->getType();
3780 if (auto *VecTy = Ty->getAs<VectorType>())
3781 Ty = VecTy->getElementType();
3782 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3783 ? llvm::Intrinsic::smax
3784 : llvm::Intrinsic::umax,
3785 Op0, Op1, nullptr, "elt.max");
3786 } else
3787 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3788 return RValue::get(Result);
3789 }
3790 case Builtin::BI__builtin_elementwise_min: {
3791 Value *Op0 = EmitScalarExpr(E->getArg(0));
3792 Value *Op1 = EmitScalarExpr(E->getArg(1));
3793 Value *Result;
3794 if (Op0->getType()->isIntOrIntVectorTy()) {
3795 QualType Ty = E->getArg(0)->getType();
3796 if (auto *VecTy = Ty->getAs<VectorType>())
3797 Ty = VecTy->getElementType();
3798 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3799 ? llvm::Intrinsic::smin
3800 : llvm::Intrinsic::umin,
3801 Op0, Op1, nullptr, "elt.min");
3802 } else
3803 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3804 return RValue::get(Result);
3805 }
3806
3807 case Builtin::BI__builtin_reduce_max: {
3808 auto GetIntrinsicID = [this](QualType QT) {
3809 if (auto *VecTy = QT->getAs<VectorType>())
3810 QT = VecTy->getElementType();
3811 else if (QT->isSizelessVectorType())
3813
3814 if (QT->isSignedIntegerType())
3815 return llvm::Intrinsic::vector_reduce_smax;
3816 if (QT->isUnsignedIntegerType())
3817 return llvm::Intrinsic::vector_reduce_umax;
3818 assert(QT->isFloatingType() && "must have a float here");
3819 return llvm::Intrinsic::vector_reduce_fmax;
3820 };
3822 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3823 }
3824
3825 case Builtin::BI__builtin_reduce_min: {
3826 auto GetIntrinsicID = [this](QualType QT) {
3827 if (auto *VecTy = QT->getAs<VectorType>())
3828 QT = VecTy->getElementType();
3829 else if (QT->isSizelessVectorType())
3831
3832 if (QT->isSignedIntegerType())
3833 return llvm::Intrinsic::vector_reduce_smin;
3834 if (QT->isUnsignedIntegerType())
3835 return llvm::Intrinsic::vector_reduce_umin;
3836 assert(QT->isFloatingType() && "must have a float here");
3837 return llvm::Intrinsic::vector_reduce_fmin;
3838 };
3839
3841 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3842 }
3843
3844 case Builtin::BI__builtin_reduce_add:
3846 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3847 case Builtin::BI__builtin_reduce_mul:
3849 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3850 case Builtin::BI__builtin_reduce_xor:
3852 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3853 case Builtin::BI__builtin_reduce_or:
3855 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3856 case Builtin::BI__builtin_reduce_and:
3858 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3859
3860 case Builtin::BI__builtin_matrix_transpose: {
3861 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3862 Value *MatValue = EmitScalarExpr(E->getArg(0));
3863 MatrixBuilder MB(Builder);
3864 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3865 MatrixTy->getNumColumns());
3866 return RValue::get(Result);
3867 }
3868
3869 case Builtin::BI__builtin_matrix_column_major_load: {
3870 MatrixBuilder MB(Builder);
3871 // Emit everything that isn't dependent on the first parameter type
3872 Value *Stride = EmitScalarExpr(E->getArg(3));
3873 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3874 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3875 assert(PtrTy && "arg0 must be of pointer type");
3876 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3877
3880 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
3881 0);
3882 Value *Result = MB.CreateColumnMajorLoad(
3883 Src.getElementType(), Src.emitRawPointer(*this),
3884 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
3885 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
3886 return RValue::get(Result);
3887 }
3888
3889 case Builtin::BI__builtin_matrix_column_major_store: {
3890 MatrixBuilder MB(Builder);
3891 Value *Matrix = EmitScalarExpr(E->getArg(0));
3893 Value *Stride = EmitScalarExpr(E->getArg(2));
3894
3895 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
3896 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
3897 assert(PtrTy && "arg1 must be of pointer type");
3898 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3899
3901 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
3902 0);
3903 Value *Result = MB.CreateColumnMajorStore(
3904 Matrix, Dst.emitRawPointer(*this),
3905 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
3906 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
3907 return RValue::get(Result);
3908 }
3909
3910 case Builtin::BI__builtin_isinf_sign: {
3911 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
3912 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3913 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3914 Value *Arg = EmitScalarExpr(E->getArg(0));
3915 Value *AbsArg = EmitFAbs(*this, Arg);
3916 Value *IsInf = Builder.CreateFCmpOEQ(
3917 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
3918 Value *IsNeg = EmitSignBit(*this, Arg);
3919
3920 llvm::Type *IntTy = ConvertType(E->getType());
3921 Value *Zero = Constant::getNullValue(IntTy);
3922 Value *One = ConstantInt::get(IntTy, 1);
3923 Value *NegativeOne = ConstantInt::get(IntTy, -1);
3924 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
3925 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
3926 return RValue::get(Result);
3927 }
3928
3929 case Builtin::BI__builtin_flt_rounds: {
3930 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
3931
3932 llvm::Type *ResultType = ConvertType(E->getType());
3933 Value *Result = Builder.CreateCall(F);
3934 if (Result->getType() != ResultType)
3935 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3936 "cast");
3937 return RValue::get(Result);
3938 }
3939
3940 case Builtin::BI__builtin_set_flt_rounds: {
3941 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
3942
3943 Value *V = EmitScalarExpr(E->getArg(0));
3944 Builder.CreateCall(F, V);
3945 return RValue::get(nullptr);
3946 }
3947
3948 case Builtin::BI__builtin_fpclassify: {
3949 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3950 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
3951 Value *V = EmitScalarExpr(E->getArg(5));
3952 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
3953
3954 // Create Result
3955 BasicBlock *Begin = Builder.GetInsertBlock();
3956 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
3957 Builder.SetInsertPoint(End);
3958 PHINode *Result =
3959 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
3960 "fpclassify_result");
3961
3962 // if (V==0) return FP_ZERO
3963 Builder.SetInsertPoint(Begin);
3964 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
3965 "iszero");
3966 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
3967 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
3968 Builder.CreateCondBr(IsZero, End, NotZero);
3969 Result->addIncoming(ZeroLiteral, Begin);
3970
3971 // if (V != V) return FP_NAN
3972 Builder.SetInsertPoint(NotZero);
3973 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
3974 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
3975 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
3976 Builder.CreateCondBr(IsNan, End, NotNan);
3977 Result->addIncoming(NanLiteral, NotZero);
3978
3979 // if (fabs(V) == infinity) return FP_INFINITY
3980 Builder.SetInsertPoint(NotNan);
3981 Value *VAbs = EmitFAbs(*this, V);
3982 Value *IsInf =
3983 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
3984 "isinf");
3985 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
3986 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
3987 Builder.CreateCondBr(IsInf, End, NotInf);
3988 Result->addIncoming(InfLiteral, NotNan);
3989
3990 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
3991 Builder.SetInsertPoint(NotInf);
3992 APFloat Smallest = APFloat::getSmallestNormalized(
3993 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
3994 Value *IsNormal =
3995 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
3996 "isnormal");
3997 Value *NormalResult =
3998 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
3999 EmitScalarExpr(E->getArg(3)));
4000 Builder.CreateBr(End);
4001 Result->addIncoming(NormalResult, NotInf);
4002
4003 // return Result
4004 Builder.SetInsertPoint(End);
4005 return RValue::get(Result);
4006 }
4007
4008 // An alloca will always return a pointer to the alloca (stack) address
4009 // space. This address space need not be the same as the AST / Language
4010 // default (e.g. in C / C++ auto vars are in the generic address space). At
4011 // the AST level this is handled within CreateTempAlloca et al., but for the
4012 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4013 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4014 case Builtin::BIalloca:
4015 case Builtin::BI_alloca:
4016 case Builtin::BI__builtin_alloca_uninitialized:
4017 case Builtin::BI__builtin_alloca: {
4018 Value *Size = EmitScalarExpr(E->getArg(0));
4019 const TargetInfo &TI = getContext().getTargetInfo();
4020 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4021 const Align SuitableAlignmentInBytes =
4022 CGM.getContext()
4024 .getAsAlign();
4025 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4026 AI->setAlignment(SuitableAlignmentInBytes);
4027 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4028 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4031 if (AAS != EAS) {
4032 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4033 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4034 EAS, Ty));
4035 }
4036 return RValue::get(AI);
4037 }
4038
4039 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4040 case Builtin::BI__builtin_alloca_with_align: {
4041 Value *Size = EmitScalarExpr(E->getArg(0));
4042 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4043 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4044 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4045 const Align AlignmentInBytes =
4046 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4047 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4048 AI->setAlignment(AlignmentInBytes);
4049 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4050 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4053 if (AAS != EAS) {
4054 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4055 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4056 EAS, Ty));
4057 }
4058 return RValue::get(AI);
4059 }
4060
4061 case Builtin::BIbzero:
4062 case Builtin::BI__builtin_bzero: {
4064 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4065 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4066 E->getArg(0)->getExprLoc(), FD, 0);
4067 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4068 return RValue::get(nullptr);
4069 }
4070
4071 case Builtin::BIbcopy:
4072 case Builtin::BI__builtin_bcopy: {
4075 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4077 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4078 0);
4080 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4081 0);
4082 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4083 return RValue::get(nullptr);
4084 }
4085
4086 case Builtin::BImemcpy:
4087 case Builtin::BI__builtin_memcpy:
4088 case Builtin::BImempcpy:
4089 case Builtin::BI__builtin_mempcpy: {
4092 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4093 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4094 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4095 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4096 if (BuiltinID == Builtin::BImempcpy ||
4097 BuiltinID == Builtin::BI__builtin_mempcpy)
4099 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4100 else
4101 return RValue::get(Dest, *this);
4102 }
4103
4104 case Builtin::BI__builtin_memcpy_inline: {
4107 uint64_t Size =
4108 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4109 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4110 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4111 Builder.CreateMemCpyInline(Dest, Src, Size);
4112 return RValue::get(nullptr);
4113 }
4114
4115 case Builtin::BI__builtin_char_memchr:
4116 BuiltinID = Builtin::BI__builtin_memchr;
4117 break;
4118
4119 case Builtin::BI__builtin___memcpy_chk: {
4120 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4121 Expr::EvalResult SizeResult, DstSizeResult;
4122 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4123 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4124 break;
4125 llvm::APSInt Size = SizeResult.Val.getInt();
4126 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4127 if (Size.ugt(DstSize))
4128 break;
4131 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4132 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4133 return RValue::get(Dest, *this);
4134 }
4135
4136 case Builtin::BI__builtin_objc_memmove_collectable: {
4137 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4138 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4139 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4141 DestAddr, SrcAddr, SizeVal);
4142 return RValue::get(DestAddr, *this);
4143 }
4144
4145 case Builtin::BI__builtin___memmove_chk: {
4146 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4147 Expr::EvalResult SizeResult, DstSizeResult;
4148 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4149 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4150 break;
4151 llvm::APSInt Size = SizeResult.Val.getInt();
4152 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4153 if (Size.ugt(DstSize))
4154 break;
4157 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4158 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4159 return RValue::get(Dest, *this);
4160 }
4161
4162 case Builtin::BImemmove:
4163 case Builtin::BI__builtin_memmove: {
4166 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4167 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4168 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4169 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4170 return RValue::get(Dest, *this);
4171 }
4172 case Builtin::BImemset:
4173 case Builtin::BI__builtin_memset: {
4175 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4176 Builder.getInt8Ty());
4177 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4178 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4179 E->getArg(0)->getExprLoc(), FD, 0);
4180 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4181 return RValue::get(Dest, *this);
4182 }
4183 case Builtin::BI__builtin_memset_inline: {
4185 Value *ByteVal =
4186 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4187 uint64_t Size =
4188 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4190 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4191 0);
4192 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4193 return RValue::get(nullptr);
4194 }
4195 case Builtin::BI__builtin___memset_chk: {
4196 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4197 Expr::EvalResult SizeResult, DstSizeResult;
4198 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4199 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4200 break;
4201 llvm::APSInt Size = SizeResult.Val.getInt();
4202 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4203 if (Size.ugt(DstSize))
4204 break;
4206 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4207 Builder.getInt8Ty());
4208 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4209 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4210 return RValue::get(Dest, *this);
4211 }
4212 case Builtin::BI__builtin_wmemchr: {
4213 // The MSVC runtime library does not provide a definition of wmemchr, so we
4214 // need an inline implementation.
4215 if (!getTarget().getTriple().isOSMSVCRT())
4216 break;
4217
4218 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4219 Value *Str = EmitScalarExpr(E->getArg(0));
4220 Value *Chr = EmitScalarExpr(E->getArg(1));
4221 Value *Size = EmitScalarExpr(E->getArg(2));
4222
4223 BasicBlock *Entry = Builder.GetInsertBlock();
4224 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4225 BasicBlock *Next = createBasicBlock("wmemchr.next");
4226 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4227 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4228 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4229
4230 EmitBlock(CmpEq);
4231 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4232 StrPhi->addIncoming(Str, Entry);
4233 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4234 SizePhi->addIncoming(Size, Entry);
4235 CharUnits WCharAlign =
4237 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4238 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4239 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4240 Builder.CreateCondBr(StrEqChr, Exit, Next);
4241
4242 EmitBlock(Next);
4243 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4244 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4245 Value *NextSizeEq0 =
4246 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4247 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4248 StrPhi->addIncoming(NextStr, Next);
4249 SizePhi->addIncoming(NextSize, Next);
4250
4251 EmitBlock(Exit);
4252 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4253 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4254 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4255 Ret->addIncoming(FoundChr, CmpEq);
4256 return RValue::get(Ret);
4257 }
4258 case Builtin::BI__builtin_wmemcmp: {
4259 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4260 // need an inline implementation.
4261 if (!getTarget().getTriple().isOSMSVCRT())
4262 break;
4263
4264 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4265
4266 Value *Dst = EmitScalarExpr(E->getArg(0));
4267 Value *Src = EmitScalarExpr(E->getArg(1));
4268 Value *Size = EmitScalarExpr(E->getArg(2));
4269
4270 BasicBlock *Entry = Builder.GetInsertBlock();
4271 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4272 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4273 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4274 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4275 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4276 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4277
4278 EmitBlock(CmpGT);
4279 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4280 DstPhi->addIncoming(Dst, Entry);
4281 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4282 SrcPhi->addIncoming(Src, Entry);
4283 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4284 SizePhi->addIncoming(Size, Entry);
4285 CharUnits WCharAlign =
4287 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4288 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4289 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4290 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4291
4292 EmitBlock(CmpLT);
4293 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4294 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4295
4296 EmitBlock(Next);
4297 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4298 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4299 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4300 Value *NextSizeEq0 =
4301 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4302 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4303 DstPhi->addIncoming(NextDst, Next);
4304 SrcPhi->addIncoming(NextSrc, Next);
4305 SizePhi->addIncoming(NextSize, Next);
4306
4307 EmitBlock(Exit);
4308 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4309 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4310 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4311 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4312 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4313 return RValue::get(Ret);
4314 }
4315 case Builtin::BI__builtin_dwarf_cfa: {
4316 // The offset in bytes from the first argument to the CFA.
4317 //
4318 // Why on earth is this in the frontend? Is there any reason at
4319 // all that the backend can't reasonably determine this while
4320 // lowering llvm.eh.dwarf.cfa()?
4321 //
4322 // TODO: If there's a satisfactory reason, add a target hook for
4323 // this instead of hard-coding 0, which is correct for most targets.
4324 int32_t Offset = 0;
4325
4326 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4327 return RValue::get(Builder.CreateCall(F,
4328 llvm::ConstantInt::get(Int32Ty, Offset)));
4329 }
4330 case Builtin::BI__builtin_return_address: {
4331 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4332 getContext().UnsignedIntTy);
4333 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4334 return RValue::get(Builder.CreateCall(F, Depth));
4335 }
4336 case Builtin::BI_ReturnAddress: {
4337 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4338 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4339 }
4340 case Builtin::BI__builtin_frame_address: {
4341 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4342 getContext().UnsignedIntTy);
4343 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4344 return RValue::get(Builder.CreateCall(F, Depth));
4345 }
4346 case Builtin::BI__builtin_extract_return_addr: {
4349 return RValue::get(Result);
4350 }
4351 case Builtin::BI__builtin_frob_return_addr: {
4354 return RValue::get(Result);
4355 }
4356 case Builtin::BI__builtin_dwarf_sp_column: {
4357 llvm::IntegerType *Ty
4358 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4360 if (Column == -1) {
4361 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4362 return RValue::get(llvm::UndefValue::get(Ty));
4363 }
4364 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4365 }
4366 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4368 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4369 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4370 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4371 }
4372 case Builtin::BI__builtin_eh_return: {
4373 Value *Int = EmitScalarExpr(E->getArg(0));
4374 Value *Ptr = EmitScalarExpr(E->getArg(1));
4375
4376 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4377 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4378 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4379 Function *F =
4380 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4381 : Intrinsic::eh_return_i64);
4382 Builder.CreateCall(F, {Int, Ptr});
4383 Builder.CreateUnreachable();
4384
4385 // We do need to preserve an insertion point.
4386 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4387
4388 return RValue::get(nullptr);
4389 }
4390 case Builtin::BI__builtin_unwind_init: {
4391 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4392 Builder.CreateCall(F);
4393 return RValue::get(nullptr);
4394 }
4395 case Builtin::BI__builtin_extend_pointer: {
4396 // Extends a pointer to the size of an _Unwind_Word, which is
4397 // uint64_t on all platforms. Generally this gets poked into a
4398 // register and eventually used as an address, so if the
4399 // addressing registers are wider than pointers and the platform
4400 // doesn't implicitly ignore high-order bits when doing
4401 // addressing, we need to make sure we zext / sext based on
4402 // the platform's expectations.
4403 //
4404 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4405
4406 // Cast the pointer to intptr_t.
4407 Value *Ptr = EmitScalarExpr(E->getArg(0));
4408 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4409
4410 // If that's 64 bits, we're done.
4411 if (IntPtrTy->getBitWidth() == 64)
4412 return RValue::get(Result);
4413
4414 // Otherwise, ask the codegen data what to do.
4415 if (getTargetHooks().extendPointerWithSExt())
4416 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4417 else
4418 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4419 }
4420 case Builtin::BI__builtin_setjmp: {
4421 // Buffer is a void**.
4423
4424 // Store the frame pointer to the setjmp buffer.
4425 Value *FrameAddr = Builder.CreateCall(
4426 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4427 ConstantInt::get(Int32Ty, 0));
4428 Builder.CreateStore(FrameAddr, Buf);
4429
4430 // Store the stack pointer to the setjmp buffer.
4431 Value *StackAddr = Builder.CreateStackSave();
4432 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4433
4434 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4435 Builder.CreateStore(StackAddr, StackSaveSlot);
4436
4437 // Call LLVM's EH setjmp, which is lightweight.
4438 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4439 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4440 }
4441 case Builtin::BI__builtin_longjmp: {
4442 Value *Buf = EmitScalarExpr(E->getArg(0));
4443
4444 // Call LLVM's EH longjmp, which is lightweight.
4445 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4446
4447 // longjmp doesn't return; mark this as unreachable.
4448 Builder.CreateUnreachable();
4449
4450 // We do need to preserve an insertion point.
4451 EmitBlock(createBasicBlock("longjmp.cont"));
4452
4453 return RValue::get(nullptr);
4454 }
4455 case Builtin::BI__builtin_launder: {
4456 const Expr *Arg = E->getArg(0);
4457 QualType ArgTy = Arg->getType()->getPointeeType();
4458 Value *Ptr = EmitScalarExpr(Arg);
4459 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4461
4462 return RValue::get(Ptr);
4463 }
4464 case Builtin::BI__sync_fetch_and_add:
4465 case Builtin::BI__sync_fetch_and_sub:
4466 case Builtin::BI__sync_fetch_and_or:
4467 case Builtin::BI__sync_fetch_and_and:
4468 case Builtin::BI__sync_fetch_and_xor:
4469 case Builtin::BI__sync_fetch_and_nand:
4470 case Builtin::BI__sync_add_and_fetch:
4471 case Builtin::BI__sync_sub_and_fetch:
4472 case Builtin::BI__sync_and_and_fetch:
4473 case Builtin::BI__sync_or_and_fetch:
4474 case Builtin::BI__sync_xor_and_fetch:
4475 case Builtin::BI__sync_nand_and_fetch:
4476 case Builtin::BI__sync_val_compare_and_swap:
4477 case Builtin::BI__sync_bool_compare_and_swap:
4478 case Builtin::BI__sync_lock_test_and_set:
4479 case Builtin::BI__sync_lock_release:
4480 case Builtin::BI__sync_swap:
4481 llvm_unreachable("Shouldn't make it through sema");
4482 case Builtin::BI__sync_fetch_and_add_1:
4483 case Builtin::BI__sync_fetch_and_add_2:
4484 case Builtin::BI__sync_fetch_and_add_4:
4485 case Builtin::BI__sync_fetch_and_add_8:
4486 case Builtin::BI__sync_fetch_and_add_16:
4487 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4488 case Builtin::BI__sync_fetch_and_sub_1:
4489 case Builtin::BI__sync_fetch_and_sub_2:
4490 case Builtin::BI__sync_fetch_and_sub_4:
4491 case Builtin::BI__sync_fetch_and_sub_8:
4492 case Builtin::BI__sync_fetch_and_sub_16:
4493 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4494 case Builtin::BI__sync_fetch_and_or_1:
4495 case Builtin::BI__sync_fetch_and_or_2:
4496 case Builtin::BI__sync_fetch_and_or_4:
4497 case Builtin::BI__sync_fetch_and_or_8:
4498 case Builtin::BI__sync_fetch_and_or_16:
4499 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4500 case Builtin::BI__sync_fetch_and_and_1:
4501 case Builtin::BI__sync_fetch_and_and_2:
4502 case Builtin::BI__sync_fetch_and_and_4:
4503 case Builtin::BI__sync_fetch_and_and_8:
4504 case Builtin::BI__sync_fetch_and_and_16:
4505 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4506 case Builtin::BI__sync_fetch_and_xor_1:
4507 case Builtin::BI__sync_fetch_and_xor_2:
4508 case Builtin::BI__sync_fetch_and_xor_4:
4509 case Builtin::BI__sync_fetch_and_xor_8:
4510 case Builtin::BI__sync_fetch_and_xor_16:
4511 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4512 case Builtin::BI__sync_fetch_and_nand_1:
4513 case Builtin::BI__sync_fetch_and_nand_2:
4514 case Builtin::BI__sync_fetch_and_nand_4:
4515 case Builtin::BI__sync_fetch_and_nand_8:
4516 case Builtin::BI__sync_fetch_and_nand_16:
4517 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4518
4519 // Clang extensions: not overloaded yet.
4520 case Builtin::BI__sync_fetch_and_min:
4521 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4522 case Builtin::BI__sync_fetch_and_max:
4523 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4524 case Builtin::BI__sync_fetch_and_umin:
4525 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4526 case Builtin::BI__sync_fetch_and_umax:
4527 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4528
4529 case Builtin::BI__sync_add_and_fetch_1:
4530 case Builtin::BI__sync_add_and_fetch_2:
4531 case Builtin::BI__sync_add_and_fetch_4:
4532 case Builtin::BI__sync_add_and_fetch_8:
4533 case Builtin::BI__sync_add_and_fetch_16:
4534 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4535 llvm::Instruction::Add);
4536 case Builtin::BI__sync_sub_and_fetch_1:
4537 case Builtin::BI__sync_sub_and_fetch_2:
4538 case Builtin::BI__sync_sub_and_fetch_4:
4539 case Builtin::BI__sync_sub_and_fetch_8:
4540 case Builtin::BI__sync_sub_and_fetch_16:
4541 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4542 llvm::Instruction::Sub);
4543 case Builtin::BI__sync_and_and_fetch_1:
4544 case Builtin::BI__sync_and_and_fetch_2:
4545 case Builtin::BI__sync_and_and_fetch_4:
4546 case Builtin::BI__sync_and_and_fetch_8:
4547 case Builtin::BI__sync_and_and_fetch_16:
4548 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4549 llvm::Instruction::And);
4550 case Builtin::BI__sync_or_and_fetch_1:
4551 case Builtin::BI__sync_or_and_fetch_2:
4552 case Builtin::BI__sync_or_and_fetch_4:
4553 case Builtin::BI__sync_or_and_fetch_8:
4554 case Builtin::BI__sync_or_and_fetch_16:
4555 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4556 llvm::Instruction::Or);
4557 case Builtin::BI__sync_xor_and_fetch_1:
4558 case Builtin::BI__sync_xor_and_fetch_2:
4559 case Builtin::BI__sync_xor_and_fetch_4:
4560 case Builtin::BI__sync_xor_and_fetch_8:
4561 case Builtin::BI__sync_xor_and_fetch_16:
4562 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4563 llvm::Instruction::Xor);
4564 case Builtin::BI__sync_nand_and_fetch_1:
4565 case Builtin::BI__sync_nand_and_fetch_2:
4566 case Builtin::BI__sync_nand_and_fetch_4:
4567 case Builtin::BI__sync_nand_and_fetch_8:
4568 case Builtin::BI__sync_nand_and_fetch_16:
4569 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4570 llvm::Instruction::And, true);
4571
4572 case Builtin::BI__sync_val_compare_and_swap_1:
4573 case Builtin::BI__sync_val_compare_and_swap_2:
4574 case Builtin::BI__sync_val_compare_and_swap_4:
4575 case Builtin::BI__sync_val_compare_and_swap_8:
4576 case Builtin::BI__sync_val_compare_and_swap_16:
4577 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4578
4579 case Builtin::BI__sync_bool_compare_and_swap_1:
4580 case Builtin::BI__sync_bool_compare_and_swap_2:
4581 case Builtin::BI__sync_bool_compare_and_swap_4:
4582 case Builtin::BI__sync_bool_compare_and_swap_8:
4583 case Builtin::BI__sync_bool_compare_and_swap_16:
4584 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4585
4586 case Builtin::BI__sync_swap_1:
4587 case Builtin::BI__sync_swap_2:
4588 case Builtin::BI__sync_swap_4:
4589 case Builtin::BI__sync_swap_8:
4590 case Builtin::BI__sync_swap_16:
4591 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4592
4593 case Builtin::BI__sync_lock_test_and_set_1:
4594 case Builtin::BI__sync_lock_test_and_set_2:
4595 case Builtin::BI__sync_lock_test_and_set_4:
4596 case Builtin::BI__sync_lock_test_and_set_8:
4597 case Builtin::BI__sync_lock_test_and_set_16:
4598 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4599
4600 case Builtin::BI__sync_lock_release_1:
4601 case Builtin::BI__sync_lock_release_2:
4602 case Builtin::BI__sync_lock_release_4:
4603 case Builtin::BI__sync_lock_release_8:
4604 case Builtin::BI__sync_lock_release_16: {
4605 Address Ptr = CheckAtomicAlignment(*this, E);
4606 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4607
4608 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4609 getContext().getTypeSize(ElTy));
4610 llvm::StoreInst *Store =
4611 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4612 Store->setAtomic(llvm::AtomicOrdering::Release);
4613 return RValue::get(nullptr);
4614 }
4615
4616 case Builtin::BI__sync_synchronize: {
4617 // We assume this is supposed to correspond to a C++0x-style
4618 // sequentially-consistent fence (i.e. this is only usable for
4619 // synchronization, not device I/O or anything like that). This intrinsic
4620 // is really badly designed in the sense that in theory, there isn't
4621 // any way to safely use it... but in practice, it mostly works
4622 // to use it with non-atomic loads and stores to get acquire/release
4623 // semantics.
4624 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4625 return RValue::get(nullptr);
4626 }
4627
4628 case Builtin::BI__builtin_nontemporal_load:
4629 return RValue::get(EmitNontemporalLoad(*this, E));
4630 case Builtin::BI__builtin_nontemporal_store:
4631 return RValue::get(EmitNontemporalStore(*this, E));
4632 case Builtin::BI__c11_atomic_is_lock_free:
4633 case Builtin::BI__atomic_is_lock_free: {
4634 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4635 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4636 // _Atomic(T) is always properly-aligned.
4637 const char *LibCallName = "__atomic_is_lock_free";
4638 CallArgList Args;
4639 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4640 getContext().getSizeType());
4641 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4642 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4644 else
4645 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4647 const CGFunctionInfo &FuncInfo =
4649 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4650 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4651 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4652 ReturnValueSlot(), Args);
4653 }
4654
4655 case Builtin::BI__atomic_test_and_set: {
4656 // Look at the argument type to determine whether this is a volatile
4657 // operation. The parameter type is always volatile.
4658 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4659 bool Volatile =
4660 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4661
4662 Address Ptr =
4664
4665 Value *NewVal = Builder.getInt8(1);
4666 Value *Order = EmitScalarExpr(E->getArg(1));
4667 if (isa<llvm::ConstantInt>(Order)) {
4668 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4669 AtomicRMWInst *Result = nullptr;
4670 switch (ord) {
4671 case 0: // memory_order_relaxed
4672 default: // invalid order
4673 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4674 llvm::AtomicOrdering::Monotonic);
4675 break;
4676 case 1: // memory_order_consume
4677 case 2: // memory_order_acquire
4678 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4679 llvm::AtomicOrdering::Acquire);
4680 break;
4681 case 3: // memory_order_release
4682 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4683 llvm::AtomicOrdering::Release);
4684 break;
4685 case 4: // memory_order_acq_rel
4686
4687 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4688 llvm::AtomicOrdering::AcquireRelease);
4689 break;
4690 case 5: // memory_order_seq_cst
4692 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4693 llvm::AtomicOrdering::SequentiallyConsistent);
4694 break;
4695 }
4696 Result->setVolatile(Volatile);
4697 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4698 }
4699
4700 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4701
4702 llvm::BasicBlock *BBs[5] = {
4703 createBasicBlock("monotonic", CurFn),
4704 createBasicBlock("acquire", CurFn),
4705 createBasicBlock("release", CurFn),
4706 createBasicBlock("acqrel", CurFn),
4707 createBasicBlock("seqcst", CurFn)
4708 };
4709 llvm::AtomicOrdering Orders[5] = {
4710 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4711 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4712 llvm::AtomicOrdering::SequentiallyConsistent};
4713
4714 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4715 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4716
4717 Builder.SetInsertPoint(ContBB);
4718 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4719
4720 for (unsigned i = 0; i < 5; ++i) {
4721 Builder.SetInsertPoint(BBs[i]);
4722 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4723 Ptr, NewVal, Orders[i]);
4724 RMW->setVolatile(Volatile);
4725 Result->addIncoming(RMW, BBs[i]);
4726 Builder.CreateBr(ContBB);
4727 }
4728
4729 SI->addCase(Builder.getInt32(0), BBs[0]);
4730 SI->addCase(Builder.getInt32(1), BBs[1]);
4731 SI->addCase(Builder.getInt32(2), BBs[1]);
4732 SI->addCase(Builder.getInt32(3), BBs[2]);
4733 SI->addCase(Builder.getInt32(4), BBs[3]);
4734 SI->addCase(Builder.getInt32(5), BBs[4]);
4735
4736 Builder.SetInsertPoint(ContBB);
4737 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4738 }
4739
4740 case Builtin::BI__atomic_clear: {
4741 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4742 bool Volatile =
4743 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
4744
4746 Ptr = Ptr.withElementType(Int8Ty);
4747 Value *NewVal = Builder.getInt8(0);
4748 Value *Order = EmitScalarExpr(E->getArg(1));
4749 if (isa<llvm::ConstantInt>(Order)) {
4750 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4751 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4752 switch (ord) {
4753 case 0: // memory_order_relaxed
4754 default: // invalid order
4755 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4756 break;
4757 case 3: // memory_order_release
4758 Store->setOrdering(llvm::AtomicOrdering::Release);
4759 break;
4760 case 5: // memory_order_seq_cst
4761 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4762 break;
4763 }
4764 return RValue::get(nullptr);
4765 }
4766
4767 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4768
4769 llvm::BasicBlock *BBs[3] = {
4770 createBasicBlock("monotonic", CurFn),
4771 createBasicBlock("release", CurFn),
4772 createBasicBlock("seqcst", CurFn)
4773 };
4774 llvm::AtomicOrdering Orders[3] = {
4775 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4776 llvm::AtomicOrdering::SequentiallyConsistent};
4777
4778 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4779 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4780
4781 for (unsigned i = 0; i < 3; ++i) {
4782 Builder.SetInsertPoint(BBs[i]);
4783 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4784 Store->setOrdering(Orders[i]);
4785 Builder.CreateBr(ContBB);
4786 }
4787
4788 SI->addCase(Builder.getInt32(0), BBs[0]);
4789 SI->addCase(Builder.getInt32(3), BBs[1]);
4790 SI->addCase(Builder.getInt32(5), BBs[2]);
4791
4792 Builder.SetInsertPoint(ContBB);
4793 return RValue::get(nullptr);
4794 }
4795
4796 case Builtin::BI__atomic_thread_fence:
4797 case Builtin::BI__atomic_signal_fence:
4798 case Builtin::BI__c11_atomic_thread_fence:
4799 case Builtin::BI__c11_atomic_signal_fence: {
4800 llvm::SyncScope::ID SSID;
4801 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4802 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4803 SSID = llvm::SyncScope::SingleThread;
4804 else
4805 SSID = llvm::SyncScope::System;
4806 Value *Order = EmitScalarExpr(E->getArg(0));
4807 if (isa<llvm::ConstantInt>(Order)) {
4808 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4809 switch (ord) {
4810 case 0: // memory_order_relaxed
4811 default: // invalid order
4812 break;
4813 case 1: // memory_order_consume
4814 case 2: // memory_order_acquire
4815 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4816 break;
4817 case 3: // memory_order_release
4818 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4819 break;
4820 case 4: // memory_order_acq_rel
4821 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4822 break;
4823 case 5: // memory_order_seq_cst
4824 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4825 break;
4826 }
4827 return RValue::get(nullptr);
4828 }
4829
4830 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4831 AcquireBB = createBasicBlock("acquire", CurFn);
4832 ReleaseBB = createBasicBlock("release", CurFn);
4833 AcqRelBB = createBasicBlock("acqrel", CurFn);
4834 SeqCstBB = createBasicBlock("seqcst", CurFn);
4835 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4836
4837 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4838 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4839
4840 Builder.SetInsertPoint(AcquireBB);
4841 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4842 Builder.CreateBr(ContBB);
4843 SI->addCase(Builder.getInt32(1), AcquireBB);
4844 SI->addCase(Builder.getInt32(2), AcquireBB);
4845
4846 Builder.SetInsertPoint(ReleaseBB);
4847 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4848 Builder.CreateBr(ContBB);
4849 SI->addCase(Builder.getInt32(3), ReleaseBB);
4850
4851 Builder.SetInsertPoint(AcqRelBB);
4852 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4853 Builder.CreateBr(ContBB);
4854 SI->addCase(Builder.getInt32(4), AcqRelBB);
4855
4856 Builder.SetInsertPoint(SeqCstBB);
4857 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4858 Builder.CreateBr(ContBB);
4859 SI->addCase(Builder.getInt32(5), SeqCstBB);
4860
4861 Builder.SetInsertPoint(ContBB);
4862 return RValue::get(nullptr);
4863 }
4864
4865 case Builtin::BI__builtin_signbit:
4866 case Builtin::BI__builtin_signbitf:
4867 case Builtin::BI__builtin_signbitl: {
4868 return RValue::get(
4869 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4870 ConvertType(E->getType())));
4871 }
4872 case Builtin::BI__warn_memset_zero_len:
4873 return RValue::getIgnored();
4874 case Builtin::BI__annotation: {
4875 // Re-encode each wide string to UTF8 and make an MDString.
4877 for (const Expr *Arg : E->arguments()) {
4878 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4879 assert(Str->getCharByteWidth() == 2);
4880 StringRef WideBytes = Str->getBytes();
4881 std::string StrUtf8;
4882 if (!convertUTF16ToUTF8String(
4883 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4884 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
4885 continue;
4886 }
4887 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
4888 }
4889
4890 // Build and MDTuple of MDStrings and emit the intrinsic call.
4891 llvm::Function *F =
4892 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
4893 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
4894 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
4895 return RValue::getIgnored();
4896 }
4897 case Builtin::BI__builtin_annotation: {
4898 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
4899 llvm::Function *F =
4900 CGM.getIntrinsic(llvm::Intrinsic::annotation,
4901 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
4902
4903 // Get the annotation string, go through casts. Sema requires this to be a
4904 // non-wide string literal, potentially casted, so the cast<> is safe.
4905 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
4906 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
4907 return RValue::get(
4908 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
4909 }
4910 case Builtin::BI__builtin_addcb:
4911 case Builtin::BI__builtin_addcs:
4912 case Builtin::BI__builtin_addc:
4913 case Builtin::BI__builtin_addcl:
4914 case Builtin::BI__builtin_addcll:
4915 case Builtin::BI__builtin_subcb:
4916 case Builtin::BI__builtin_subcs:
4917 case Builtin::BI__builtin_subc:
4918 case Builtin::BI__builtin_subcl:
4919 case Builtin::BI__builtin_subcll: {
4920
4921 // We translate all of these builtins from expressions of the form:
4922 // int x = ..., y = ..., carryin = ..., carryout, result;
4923 // result = __builtin_addc(x, y, carryin, &carryout);
4924 //
4925 // to LLVM IR of the form:
4926 //
4927 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
4928 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
4929 // %carry1 = extractvalue {i32, i1} %tmp1, 1
4930 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
4931 // i32 %carryin)
4932 // %result = extractvalue {i32, i1} %tmp2, 0
4933 // %carry2 = extractvalue {i32, i1} %tmp2, 1
4934 // %tmp3 = or i1 %carry1, %carry2
4935 // %tmp4 = zext i1 %tmp3 to i32
4936 // store i32 %tmp4, i32* %carryout
4937
4938 // Scalarize our inputs.
4939 llvm::Value *X = EmitScalarExpr(E->getArg(0));
4940 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
4941 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
4942 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
4943
4944 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
4945 llvm::Intrinsic::ID IntrinsicId;
4946 switch (BuiltinID) {
4947 default: llvm_unreachable("Unknown multiprecision builtin id.");
4948 case Builtin::BI__builtin_addcb:
4949 case Builtin::BI__builtin_addcs:
4950 case Builtin::BI__builtin_addc:
4951 case Builtin::BI__builtin_addcl:
4952 case Builtin::BI__builtin_addcll:
4953 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
4954 break;
4955 case Builtin::BI__builtin_subcb:
4956 case Builtin::BI__builtin_subcs:
4957 case Builtin::BI__builtin_subc:
4958 case Builtin::BI__builtin_subcl:
4959 case Builtin::BI__builtin_subcll:
4960 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
4961 break;
4962 }
4963
4964 // Construct our resulting LLVM IR expression.
4965 llvm::Value *Carry1;
4966 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
4967 X, Y, Carry1);
4968 llvm::Value *Carry2;
4969 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
4970 Sum1, Carryin, Carry2);
4971 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
4972 X->getType());
4973 Builder.CreateStore(CarryOut, CarryOutPtr);
4974 return RValue::get(Sum2);
4975 }
4976
4977 case Builtin::BI__builtin_add_overflow:
4978 case Builtin::BI__builtin_sub_overflow:
4979 case Builtin::BI__builtin_mul_overflow: {
4980 const clang::Expr *LeftArg = E->getArg(0);
4981 const clang::Expr *RightArg = E->getArg(1);
4982 const clang::Expr *ResultArg = E->getArg(2);
4983
4984 clang::QualType ResultQTy =
4985 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
4986
4987 WidthAndSignedness LeftInfo =
4989 WidthAndSignedness RightInfo =
4991 WidthAndSignedness ResultInfo =
4993
4994 // Handle mixed-sign multiplication as a special case, because adding
4995 // runtime or backend support for our generic irgen would be too expensive.
4996 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
4997 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
4998 RightInfo, ResultArg, ResultQTy,
4999 ResultInfo);
5000
5001 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5002 ResultInfo))
5004 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5005 ResultInfo);
5006
5007 WidthAndSignedness EncompassingInfo =
5008 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5009
5010 llvm::Type *EncompassingLLVMTy =
5011 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5012
5013 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5014
5015 llvm::Intrinsic::ID IntrinsicId;
5016 switch (BuiltinID) {
5017 default:
5018 llvm_unreachable("Unknown overflow builtin id.");
5019 case Builtin::BI__builtin_add_overflow:
5020 IntrinsicId = EncompassingInfo.Signed
5021 ? llvm::Intrinsic::sadd_with_overflow
5022 : llvm::Intrinsic::uadd_with_overflow;
5023 break;
5024 case Builtin::BI__builtin_sub_overflow:
5025 IntrinsicId = EncompassingInfo.Signed
5026 ? llvm::Intrinsic::ssub_with_overflow
5027 : llvm::Intrinsic::usub_with_overflow;
5028 break;
5029 case Builtin::BI__builtin_mul_overflow:
5030 IntrinsicId = EncompassingInfo.Signed
5031 ? llvm::Intrinsic::smul_with_overflow
5032 : llvm::Intrinsic::umul_with_overflow;
5033 break;
5034 }
5035
5036 llvm::Value *Left = EmitScalarExpr(LeftArg);
5037 llvm::Value *Right = EmitScalarExpr(RightArg);
5038 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5039
5040 // Extend each operand to the encompassing type.
5041 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5042 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5043
5044 // Perform the operation on the extended values.
5045 llvm::Value *Overflow, *Result;
5046 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5047
5048 if (EncompassingInfo.Width > ResultInfo.Width) {
5049 // The encompassing type is wider than the result type, so we need to
5050 // truncate it.
5051 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5052
5053 // To see if the truncation caused an overflow, we will extend
5054 // the result and then compare it to the original result.
5055 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5056 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5057 llvm::Value *TruncationOverflow =
5058 Builder.CreateICmpNE(Result, ResultTruncExt);
5059
5060 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5061 Result = ResultTrunc;
5062 }
5063
5064 // Finally, store the result using the pointer.
5065 bool isVolatile =
5066 ResultArg->getType()->getPointeeType().isVolatileQualified();
5067 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5068
5069 return RValue::get(Overflow);
5070 }
5071
5072 case Builtin::BI__builtin_uadd_overflow:
5073 case Builtin::BI__builtin_uaddl_overflow:
5074 case Builtin::BI__builtin_uaddll_overflow:
5075 case Builtin::BI__builtin_usub_overflow:
5076 case Builtin::BI__builtin_usubl_overflow:
5077 case Builtin::BI__builtin_usubll_overflow:
5078 case Builtin::BI__builtin_umul_overflow:
5079 case Builtin::BI__builtin_umull_overflow:
5080 case Builtin::BI__builtin_umulll_overflow:
5081 case Builtin::BI__builtin_sadd_overflow:
5082 case Builtin::BI__builtin_saddl_overflow:
5083 case Builtin::BI__builtin_saddll_overflow:
5084 case Builtin::BI__builtin_ssub_overflow:
5085 case Builtin::BI__builtin_ssubl_overflow:
5086 case Builtin::BI__builtin_ssubll_overflow:
5087 case Builtin::BI__builtin_smul_overflow:
5088 case Builtin::BI__builtin_smull_overflow:
5089 case Builtin::BI__builtin_smulll_overflow: {
5090
5091 // We translate all of these builtins directly to the relevant llvm IR node.
5092
5093 // Scalarize our inputs.
5094 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5095 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5096 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5097
5098 // Decide which of the overflow intrinsics we are lowering to:
5099 llvm::Intrinsic::ID IntrinsicId;
5100 switch (BuiltinID) {
5101 default: llvm_unreachable("Unknown overflow builtin id.");
5102 case Builtin::BI__builtin_uadd_overflow:
5103 case Builtin::BI__builtin_uaddl_overflow:
5104 case Builtin::BI__builtin_uaddll_overflow:
5105 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5106 break;
5107 case Builtin::BI__builtin_usub_overflow:
5108 case Builtin::BI__builtin_usubl_overflow:
5109 case Builtin::BI__builtin_usubll_overflow:
5110 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5111 break;
5112 case Builtin::BI__builtin_umul_overflow:
5113 case Builtin::BI__builtin_umull_overflow:
5114 case Builtin::BI__builtin_umulll_overflow:
5115 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5116 break;
5117 case Builtin::BI__builtin_sadd_overflow:
5118 case Builtin::BI__builtin_saddl_overflow:
5119 case Builtin::BI__builtin_saddll_overflow:
5120 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5121 break;
5122 case Builtin::BI__builtin_ssub_overflow:
5123 case Builtin::BI__builtin_ssubl_overflow:
5124 case Builtin::BI__builtin_ssubll_overflow:
5125 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5126 break;
5127 case Builtin::BI__builtin_smul_overflow:
5128 case Builtin::BI__builtin_smull_overflow:
5129 case Builtin::BI__builtin_smulll_overflow:
5130 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5131 break;
5132 }
5133
5134
5135 llvm::Value *Carry;
5136 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5137 Builder.CreateStore(Sum, SumOutPtr);
5138
5139 return RValue::get(Carry);
5140 }
5141 case Builtin::BIaddressof:
5142 case Builtin::BI__addressof:
5143 case Builtin::BI__builtin_addressof:
5144 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5145 case Builtin::BI__builtin_function_start:
5148 case Builtin::BI__builtin_operator_new:
5150 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5151 case Builtin::BI__builtin_operator_delete:
5153 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5154 return RValue::get(nullptr);
5155
5156 case Builtin::BI__builtin_is_aligned:
5157 return EmitBuiltinIsAligned(E);
5158 case Builtin::BI__builtin_align_up:
5159 return EmitBuiltinAlignTo(E, true);
5160 case Builtin::BI__builtin_align_down:
5161 return EmitBuiltinAlignTo(E, false);
5162
5163 case Builtin::BI__noop:
5164 // __noop always evaluates to an integer literal zero.
5165 return RValue::get(ConstantInt::get(IntTy, 0));
5166 case Builtin::BI__builtin_call_with_static_chain: {
5167 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5168 const Expr *Chain = E->getArg(1);
5169 return EmitCall(Call->getCallee()->getType(),
5170 EmitCallee(Call->getCallee()), Call, ReturnValue,
5171 EmitScalarExpr(Chain));
5172 }
5173 case Builtin::BI_InterlockedExchange8:
5174 case Builtin::BI_InterlockedExchange16:
5175 case Builtin::BI_InterlockedExchange:
5176 case Builtin::BI_InterlockedExchangePointer:
5177 return RValue::get(
5178 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5179 case Builtin::BI_InterlockedCompareExchangePointer:
5180 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5181 llvm::Type *RTy;
5182 llvm::IntegerType *IntType = IntegerType::get(
5184
5185 Address DestAddr = CheckAtomicAlignment(*this, E);
5186
5187 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5188 RTy = Exchange->getType();
5189 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5190
5191 llvm::Value *Comparand =
5192 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5193
5194 auto Ordering =
5195 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5196 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5197
5198 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5199 Ordering, Ordering);
5200 Result->setVolatile(true);
5201
5202 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5203 0),
5204 RTy));
5205 }
5206 case Builtin::BI_InterlockedCompareExchange8:
5207 case Builtin::BI_InterlockedCompareExchange16:
5208 case Builtin::BI_InterlockedCompareExchange:
5209 case Builtin::BI_InterlockedCompareExchange64:
5210 return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
5211 case Builtin::BI_InterlockedIncrement16:
5212 case Builtin::BI_InterlockedIncrement:
5213 return RValue::get(
5214 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5215 case Builtin::BI_InterlockedDecrement16:
5216 case Builtin::BI_InterlockedDecrement:
5217 return RValue::get(
5218 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5219 case Builtin::BI_InterlockedAnd8:
5220 case Builtin::BI_InterlockedAnd16:
5221 case Builtin::BI_InterlockedAnd:
5222 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5223 case Builtin::BI_InterlockedExchangeAdd8:
5224 case Builtin::BI_InterlockedExchangeAdd16:
5225 case Builtin::BI_InterlockedExchangeAdd:
5226 return RValue::get(
5227 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5228 case Builtin::BI_InterlockedExchangeSub8:
5229 case Builtin::BI_InterlockedExchangeSub16:
5230 case Builtin::BI_InterlockedExchangeSub:
5231 return RValue::get(
5232 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5233 case Builtin::BI_InterlockedOr8:
5234 case Builtin::BI_InterlockedOr16:
5235 case Builtin::BI_InterlockedOr:
5236 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5237 case Builtin::BI_InterlockedXor8:
5238 case Builtin::BI_InterlockedXor16:
5239 case Builtin::BI_InterlockedXor:
5240 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5241
5242 case Builtin::BI_bittest64:
5243 case Builtin::BI_bittest:
5244 case Builtin::BI_bittestandcomplement64:
5245 case Builtin::BI_bittestandcomplement:
5246 case Builtin::BI_bittestandreset64:
5247 case Builtin::BI_bittestandreset:
5248 case Builtin::BI_bittestandset64:
5249 case Builtin::BI_bittestandset:
5250 case Builtin::BI_interlockedbittestandreset:
5251 case Builtin::BI_interlockedbittestandreset64:
5252 case Builtin::BI_interlockedbittestandset64:
5253 case Builtin::BI_interlockedbittestandset:
5254 case Builtin::BI_interlockedbittestandset_acq:
5255 case Builtin::BI_interlockedbittestandset_rel:
5256 case Builtin::BI_interlockedbittestandset_nf:
5257 case Builtin::BI_interlockedbittestandreset_acq:
5258 case Builtin::BI_interlockedbittestandreset_rel:
5259 case Builtin::BI_interlockedbittestandreset_nf:
5260 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5261
5262 // These builtins exist to emit regular volatile loads and stores not
5263 // affected by the -fms-volatile setting.
5264 case Builtin::BI__iso_volatile_load8:
5265 case Builtin::BI__iso_volatile_load16:
5266 case Builtin::BI__iso_volatile_load32:
5267 case Builtin::BI__iso_volatile_load64:
5268 return RValue::get(EmitISOVolatileLoad(*this, E));
5269 case Builtin::BI__iso_volatile_store8:
5270 case Builtin::BI__iso_volatile_store16:
5271 case Builtin::BI__iso_volatile_store32:
5272 case Builtin::BI__iso_volatile_store64:
5273 return RValue::get(EmitISOVolatileStore(*this, E));
5274
5275 case Builtin::BI__builtin_ptrauth_auth:
5276 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5277 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5278 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5279 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5280 case Builtin::BI__builtin_ptrauth_strip: {
5281 // Emit the arguments.
5283 for (auto argExpr : E->arguments())
5284 Args.push_back(EmitScalarExpr(argExpr));
5285
5286 // Cast the value to intptr_t, saving its original type.
5287 llvm::Type *OrigValueType = Args[0]->getType();
5288 if (OrigValueType->isPointerTy())
5289 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5290
5291 switch (BuiltinID) {
5292 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5293 if (Args[4]->getType()->isPointerTy())
5294 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5295 [[fallthrough]];
5296
5297 case Builtin::BI__builtin_ptrauth_auth:
5298 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5299 if (Args[2]->getType()->isPointerTy())
5300 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5301 break;
5302
5303 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5304 if (Args[1]->getType()->isPointerTy())
5305 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5306 break;
5307
5308 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5309 case Builtin::BI__builtin_ptrauth_strip:
5310 break;
5311 }
5312
5313 // Call the intrinsic.
5314 auto IntrinsicID = [&]() -> unsigned {
5315 switch (BuiltinID) {
5316 case Builtin::BI__builtin_ptrauth_auth:
5317 return llvm::Intrinsic::ptrauth_auth;
5318 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5319 return llvm::Intrinsic::ptrauth_resign;
5320 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5321 return llvm::Intrinsic::ptrauth_blend;
5322 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5323 return llvm::Intrinsic::ptrauth_sign_generic;
5324 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5325 return llvm::Intrinsic::ptrauth_sign;
5326 case Builtin::BI__builtin_ptrauth_strip:
5327 return llvm::Intrinsic::ptrauth_strip;
5328 }
5329 llvm_unreachable("bad ptrauth intrinsic");
5330 }();
5331 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5332 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5333
5334 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5335 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5336 OrigValueType->isPointerTy()) {
5337 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5338 }
5339 return RValue::get(Result);
5340 }
5341
5342 case Builtin::BI__exception_code:
5343 case Builtin::BI_exception_code:
5345 case Builtin::BI__exception_info:
5346 case Builtin::BI_exception_info:
5348 case Builtin::BI__abnormal_termination:
5349 case Builtin::BI_abnormal_termination:
5351 case Builtin::BI_setjmpex:
5352 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5353 E->getArg(0)->getType()->isPointerType())
5354 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5355 break;
5356 case Builtin::BI_setjmp:
5357 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5358 E->getArg(0)->getType()->isPointerType()) {
5359 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5360 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5361 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5362 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5363 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5364 }
5365 break;
5366
5367 // C++ std:: builtins.
5368 case Builtin::BImove:
5369 case Builtin::BImove_if_noexcept:
5370 case Builtin::BIforward:
5371 case Builtin::BIforward_like:
5372 case Builtin::BIas_const:
5373 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5374 case Builtin::BI__GetExceptionInfo: {
5375 if (llvm::GlobalVariable *GV =
5377 return RValue::get(GV);
5378 break;
5379 }
5380
5381 case Builtin::BI__fastfail:
5382 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5383
5384 case Builtin::BI__builtin_coro_id:
5385 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5386 case Builtin::BI__builtin_coro_promise:
5387 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5388 case Builtin::BI__builtin_coro_resume:
5389 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5390 return RValue::get(nullptr);
5391 case Builtin::BI__builtin_coro_frame:
5392 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5393 case Builtin::BI__builtin_coro_noop:
5394 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5395 case Builtin::BI__builtin_coro_free:
5396 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5397 case Builtin::BI__builtin_coro_destroy:
5398 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5399 return RValue::get(nullptr);
5400 case Builtin::BI__builtin_coro_done:
5401 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5402 case Builtin::BI__builtin_coro_alloc:
5403 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5404 case Builtin::BI__builtin_coro_begin:
5405 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5406 case Builtin::BI__builtin_coro_end:
5407 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5408 case Builtin::BI__builtin_coro_suspend:
5409 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5410 case Builtin::BI__builtin_coro_size:
5411 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5412 case Builtin::BI__builtin_coro_align:
5413 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5414
5415 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5416 case Builtin::BIread_pipe:
5417 case Builtin::BIwrite_pipe: {
5418 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5419 *Arg1 = EmitScalarExpr(E->getArg(1));
5420 CGOpenCLRuntime OpenCLRT(CGM);
5421 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5422 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5423
5424 // Type of the generic packet parameter.
5425 unsigned GenericAS =
5427 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5428
5429 // Testing which overloaded version we should generate the call for.
5430 if (2U == E->getNumArgs()) {
5431 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5432 : "__write_pipe_2";
5433 // Creating a generic function type to be able to call with any builtin or
5434 // user defined type.
5435 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5436 llvm::FunctionType *FTy = llvm::FunctionType::get(
5437 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5438 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5439 return RValue::get(
5441 {Arg0, BCast, PacketSize, PacketAlign}));
5442 } else {
5443 assert(4 == E->getNumArgs() &&
5444 "Illegal number of parameters to pipe function");
5445 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5446 : "__write_pipe_4";
5447
5448 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5449 Int32Ty, Int32Ty};
5450 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5451 *Arg3 = EmitScalarExpr(E->getArg(3));
5452 llvm::FunctionType *FTy = llvm::FunctionType::get(
5453 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5454 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5455 // We know the third argument is an integer type, but we may need to cast
5456 // it to i32.
5457 if (Arg2->getType() != Int32Ty)
5458 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5459 return RValue::get(
5461 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5462 }
5463 }
5464 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5465 // functions
5466 case Builtin::BIreserve_read_pipe:
5467 case Builtin::BIreserve_write_pipe:
5468 case Builtin::BIwork_group_reserve_read_pipe:
5469 case Builtin::BIwork_group_reserve_write_pipe:
5470 case Builtin::BIsub_group_reserve_read_pipe:
5471 case Builtin::BIsub_group_reserve_write_pipe: {
5472 // Composing the mangled name for the function.
5473 const char *Name;
5474 if (BuiltinID == Builtin::BIreserve_read_pipe)
5475 Name = "__reserve_read_pipe";
5476 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5477 Name = "__reserve_write_pipe";
5478 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5479 Name = "__work_group_reserve_read_pipe";
5480 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5481 Name = "__work_group_reserve_write_pipe";
5482 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5483 Name = "__sub_group_reserve_read_pipe";
5484 else
5485 Name = "__sub_group_reserve_write_pipe";
5486
5487 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5488 *Arg1 = EmitScalarExpr(E->getArg(1));
5489 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5490 CGOpenCLRuntime OpenCLRT(CGM);
5491 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5492 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5493
5494 // Building the generic function prototype.
5495 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5496 llvm::FunctionType *FTy = llvm::FunctionType::get(
5497 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5498 // We know the second argument is an integer type, but we may need to cast
5499 // it to i32.
5500 if (Arg1->getType() != Int32Ty)
5501 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5503 {Arg0, Arg1, PacketSize, PacketAlign}));
5504 }
5505 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5506 // functions
5507 case Builtin::BIcommit_read_pipe:
5508 case Builtin::BIcommit_write_pipe:
5509 case Builtin::BIwork_group_commit_read_pipe:
5510 case Builtin::BIwork_group_commit_write_pipe:
5511 case Builtin::BIsub_group_commit_read_pipe:
5512 case Builtin::BIsub_group_commit_write_pipe: {
5513 const char *Name;
5514 if (BuiltinID == Builtin::BIcommit_read_pipe)
5515 Name = "__commit_read_pipe";
5516 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5517 Name = "__commit_write_pipe";
5518 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5519 Name = "__work_group_commit_read_pipe";
5520 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5521 Name = "__work_group_commit_write_pipe";
5522 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5523 Name = "__sub_group_commit_read_pipe";
5524 else
5525 Name = "__sub_group_commit_write_pipe";
5526
5527 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5528 *Arg1 = EmitScalarExpr(E->getArg(1));
5529 CGOpenCLRuntime OpenCLRT(CGM);
5530 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5531 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5532
5533 // Building the generic function prototype.
5534 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5535 llvm::FunctionType *FTy =
5536 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5537 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5538
5540 {Arg0, Arg1, PacketSize, PacketAlign}));
5541 }
5542 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5543 case Builtin::BIget_pipe_num_packets:
5544 case Builtin::BIget_pipe_max_packets: {
5545 const char *BaseName;
5546 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5547 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5548 BaseName = "__get_pipe_num_packets";
5549 else
5550 BaseName = "__get_pipe_max_packets";
5551 std::string Name = std::string(BaseName) +
5552 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5553
5554 // Building the generic function prototype.
5555 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5556 CGOpenCLRuntime OpenCLRT(CGM);
5557 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5558 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5559 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5560 llvm::FunctionType *FTy = llvm::FunctionType::get(
5561 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5562
5564 {Arg0, PacketSize, PacketAlign}));
5565 }
5566
5567 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5568 case Builtin::BIto_global:
5569 case Builtin::BIto_local:
5570 case Builtin::BIto_private: {
5571 auto Arg0 = EmitScalarExpr(E->getArg(0));
5572 auto NewArgT = llvm::PointerType::get(
5575 auto NewRetT = llvm::PointerType::get(
5579 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5580 llvm::Value *NewArg;
5581 if (Arg0->getType()->getPointerAddressSpace() !=
5582 NewArgT->getPointerAddressSpace())
5583 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5584 else
5585 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5586 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5587 auto NewCall =
5588 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5589 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5590 ConvertType(E->getType())));
5591 }
5592
5593 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5594 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5595 // The code below expands the builtin call to a call to one of the following
5596 // functions that an OpenCL runtime library will have to provide:
5597 // __enqueue_kernel_basic
5598 // __enqueue_kernel_varargs
5599 // __enqueue_kernel_basic_events
5600 // __enqueue_kernel_events_varargs
5601 case Builtin::BIenqueue_kernel: {
5602 StringRef Name; // Generated function call name
5603 unsigned NumArgs = E->getNumArgs();
5604
5605 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5606 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5607 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5608
5609 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5610 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5611 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5612 llvm::Value *Range = NDRangeL.getAddress(*this).emitRawPointer(*this);
5613 llvm::Type *RangeTy = NDRangeL.getAddress(*this).getType();
5614
5615 if (NumArgs == 4) {
5616 // The most basic form of the call with parameters:
5617 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5618 Name = "__enqueue_kernel_basic";
5619 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5620 GenericVoidPtrTy};
5621 llvm::FunctionType *FTy = llvm::FunctionType::get(
5622 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5623
5624 auto Info =
5626 llvm::Value *Kernel =
5627 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5628 llvm::Value *Block =
5629 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5630
5631 AttrBuilder B(Builder.getContext());
5632 B.addByValAttr(NDRangeL.getAddress(*this).getElementType());
5633 llvm::AttributeList ByValAttrSet =
5634 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5635
5636 auto RTCall =
5637 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5638 {Queue, Flags, Range, Kernel, Block});
5639 RTCall->setAttributes(ByValAttrSet);
5640 return RValue::get(RTCall);
5641 }
5642 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5643
5644 // Create a temporary array to hold the sizes of local pointer arguments
5645 // for the block. \p First is the position of the first size argument.
5646 auto CreateArrayForSizeVar = [=](unsigned First)
5647 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5648 llvm::APInt ArraySize(32, NumArgs - First);
5650 getContext().getSizeType(), ArraySize, nullptr,
5652 /*IndexTypeQuals=*/0);
5653 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5654 llvm::Value *TmpPtr = Tmp.getPointer();
5655 llvm::Value *TmpSize = EmitLifetimeStart(
5656 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5657 llvm::Value *ElemPtr;
5658 // Each of the following arguments specifies the size of the corresponding
5659 // argument passed to the enqueued block.
5660 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5661 for (unsigned I = First; I < NumArgs; ++I) {
5662 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5663 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5664 {Zero, Index});
5665 if (I == First)
5666 ElemPtr = GEP;
5667 auto *V =
5668 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5670 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5671 }
5672 return std::tie(ElemPtr, TmpSize, TmpPtr);
5673 };
5674
5675 // Could have events and/or varargs.
5676 if (E->getArg(3)->getType()->isBlockPointerType()) {
5677 // No events passed, but has variadic arguments.
5678 Name = "__enqueue_kernel_varargs";
5679 auto Info =
5681 llvm::Value *Kernel =
5682 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5683 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5684 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5685 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5686
5687 // Create a vector of the arguments, as well as a constant value to
5688 // express to the runtime the number of variadic arguments.
5689 llvm::Value *const Args[] = {Queue, Flags,
5690 Range, Kernel,
5691 Block, ConstantInt::get(IntTy, NumArgs - 4),
5692 ElemPtr};
5693 llvm::Type *const ArgTys[] = {
5694 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5695 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5696
5697 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5698 auto Call = RValue::get(
5699 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5700 if (TmpSize)
5701 EmitLifetimeEnd(TmpSize, TmpPtr);
5702 return Call;
5703 }
5704 // Any calls now have event arguments passed.
5705 if (NumArgs >= 7) {
5706 llvm::PointerType *PtrTy = llvm::PointerType::get(
5709
5710 llvm::Value *NumEvents =
5711 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5712
5713 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5714 // to be a null pointer constant (including `0` literal), we can take it
5715 // into account and emit null pointer directly.
5716 llvm::Value *EventWaitList = nullptr;
5717 if (E->getArg(4)->isNullPointerConstant(
5719 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5720 } else {
5721 EventWaitList =
5722 E->getArg(4)->getType()->isArrayType()
5724 : EmitScalarExpr(E->getArg(4));
5725 // Convert to generic address space.
5726 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5727 }
5728 llvm::Value *EventRet = nullptr;
5729 if (E->getArg(5)->isNullPointerConstant(
5731 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5732 } else {
5733 EventRet =
5734 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5735 }
5736
5737 auto Info =
5739 llvm::Value *Kernel =
5740 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5741 llvm::Value *Block =
5742 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5743
5744 std::vector<llvm::Type *> ArgTys = {
5745 QueueTy, Int32Ty, RangeTy, Int32Ty,
5746 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5747
5748 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5749 NumEvents, EventWaitList, EventRet,
5750 Kernel, Block};
5751
5752 if (NumArgs == 7) {
5753 // Has events but no variadics.
5754 Name = "__enqueue_kernel_basic_events";
5755 llvm::FunctionType *FTy = llvm::FunctionType::get(
5756 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5757 return RValue::get(
5760 }
5761 // Has event info and variadics
5762 // Pass the number of variadics to the runtime function too.
5763 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5764 ArgTys.push_back(Int32Ty);
5765 Name = "__enqueue_kernel_events_varargs";
5766
5767 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5768 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5769 Args.push_back(ElemPtr);
5770 ArgTys.push_back(ElemPtr->getType());
5771
5772 llvm::FunctionType *FTy = llvm::FunctionType::get(
5773 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5774 auto Call =
5777 if (TmpSize)
5778 EmitLifetimeEnd(TmpSize, TmpPtr);
5779 return Call;
5780 }
5781 llvm_unreachable("Unexpected enqueue_kernel signature");
5782 }
5783 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5784 // parameter.
5785 case Builtin::BIget_kernel_work_group_size: {
5786 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5787 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5788 auto Info =
5790 Value *Kernel =
5791 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5792 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5795 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5796 false),
5797 "__get_kernel_work_group_size_impl"),
5798 {Kernel, Arg}));
5799 }
5800 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5801 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5802 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5803 auto Info =
5805 Value *Kernel =
5806 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5807 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5810 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5811 false),
5812 "__get_kernel_preferred_work_group_size_multiple_impl"),
5813 {Kernel, Arg}));
5814 }
5815 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5816 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5817 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5818 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5819 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5820 llvm::Value *NDRange = NDRangeL.getAddress(*this).emitRawPointer(*this);
5821 auto Info =
5823 Value *Kernel =
5824 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5825 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5826 const char *Name =
5827 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5828 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5829 : "__get_kernel_sub_group_count_for_ndrange_impl";
5832 llvm::FunctionType::get(
5833 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5834 false),
5835 Name),
5836 {NDRange, Kernel, Block}));
5837 }
5838 case Builtin::BI__builtin_store_half:
5839 case Builtin::BI__builtin_store_halff: {
5840 Value *Val = EmitScalarExpr(E->getArg(0));
5842 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5843 Builder.CreateStore(HalfVal, Address);
5844 return RValue::get(nullptr);
5845 }
5846 case Builtin::BI__builtin_load_half: {
5848 Value *HalfVal = Builder.CreateLoad(Address);
5849 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5850 }
5851 case Builtin::BI__builtin_load_halff: {
5853 Value *HalfVal = Builder.CreateLoad(Address);
5854 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5855 }
5856 case Builtin::BI__builtin_printf:
5857 case Builtin::BIprintf:
5858 if (getTarget().getTriple().isNVPTX() ||
5859 getTarget().getTriple().isAMDGCN()) {
5860 if (getLangOpts().OpenMPIsTargetDevice)
5862 if (getTarget().getTriple().isNVPTX())
5864 if (getTarget().getTriple().isAMDGCN() && getLangOpts().HIP)
5866 }
5867
5868 break;
5869 case Builtin::BI__builtin_canonicalize:
5870 case Builtin::BI__builtin_canonicalizef:
5871 case Builtin::BI__builtin_canonicalizef16:
5872 case Builtin::BI__builtin_canonicalizel:
5873 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
5874
5875 case Builtin::BI__builtin_thread_pointer: {
5876 if (!getContext().getTargetInfo().isTLSSupported())
5877 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5878 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
5879 break;
5880 }
5881 case Builtin::BI__builtin_os_log_format:
5882 return emitBuiltinOSLogFormat(*E);
5883
5884 case Builtin::BI__xray_customevent: {
5886 return RValue::getIgnored();
5887
5890 return RValue::getIgnored();
5891
5892 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5893 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
5894 return RValue::getIgnored();
5895
5896 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
5897 auto FTy = F->getFunctionType();
5898 auto Arg0 = E->getArg(0);
5899 auto Arg0Val = EmitScalarExpr(Arg0);
5900 auto Arg0Ty = Arg0->getType();
5901 auto PTy0 = FTy->getParamType(0);
5902 if (PTy0 != Arg0Val->getType()) {
5903 if (Arg0Ty->isArrayType())
5904 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
5905 else
5906 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
5907 }
5908 auto Arg1 = EmitScalarExpr(E->getArg(1));
5909 auto PTy1 = FTy->getParamType(1);
5910 if (PTy1 != Arg1->getType())
5911 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
5912 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
5913 }
5914
5915 case Builtin::BI__xray_typedevent: {
5916 // TODO: There should be a way to always emit events even if the current
5917 // function is not instrumented. Losing events in a stream can cripple
5918 // a trace.
5920 return RValue::getIgnored();
5921
5924 return RValue::getIgnored();
5925
5926 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
5927 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
5928 return RValue::getIgnored();
5929
5930 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
5931 auto FTy = F->getFunctionType();
5932 auto Arg0 = EmitScalarExpr(E->getArg(0));
5933 auto PTy0 = FTy->getParamType(0);
5934 if (PTy0 != Arg0->getType())
5935 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
5936 auto Arg1 = E->getArg(1);
5937 auto Arg1Val = EmitScalarExpr(Arg1);
5938 auto Arg1Ty = Arg1->getType();
5939 auto PTy1 = FTy->getParamType(1);
5940 if (PTy1 != Arg1Val->getType()) {
5941 if (Arg1Ty->isArrayType())
5942 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
5943 else
5944 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
5945 }
5946 auto Arg2 = EmitScalarExpr(E->getArg(2));
5947 auto PTy2 = FTy->getParamType(2);
5948 if (PTy2 != Arg2->getType())
5949 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
5950 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
5951 }
5952
5953 case Builtin::BI__builtin_ms_va_start:
5954 case Builtin::BI__builtin_ms_va_end:
5955 return RValue::get(
5957 BuiltinID == Builtin::BI__builtin_ms_va_start));
5958
5959 case Builtin::BI__builtin_ms_va_copy: {
5960 // Lower this manually. We can't reliably determine whether or not any
5961 // given va_copy() is for a Win64 va_list from the calling convention
5962 // alone, because it's legal to do this from a System V ABI function.
5963 // With opaque pointer types, we won't have enough information in LLVM
5964 // IR to determine this from the argument types, either. Best to do it
5965 // now, while we have enough information.
5966 Address DestAddr = EmitMSVAListRef(E->getArg(0));
5967 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
5968
5969 DestAddr = DestAddr.withElementType(Int8PtrTy);
5970 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
5971
5972 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
5973 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
5974 }
5975
5976 case Builtin::BI__builtin_get_device_side_mangled_name: {
5977 auto Name = CGM.getCUDARuntime().getDeviceSideName(
5978 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
5979 auto Str = CGM.GetAddrOfConstantCString(Name, "");
5980 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
5981 llvm::ConstantInt::get(SizeTy, 0)};
5982 auto *Ptr = llvm::ConstantExpr::getGetElementPtr(Str.getElementType(),
5983 Str.getPointer(), Zeros);
5984 return RValue::get(Ptr);
5985 }
5986 }
5987
5988 // If this is an alias for a lib function (e.g. __builtin_sin), emit
5989 // the call using the normal call path, but using the unmangled
5990 // version of the function name.
5991 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
5992 return emitLibraryCall(*this, FD, E,
5993 CGM.getBuiltinLibFunction(FD, BuiltinID));
5994
5995 // If this is a predefined lib function (e.g. malloc), emit the call
5996 // using exactly the normal call path.
5997 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
5998 return emitLibraryCall(
5999 *this, FD, E, cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
6000
6001 // Check that a call to a target specific builtin has the correct target
6002 // features.
6003 // This is down here to avoid non-target specific builtins, however, if
6004 // generic builtins start to require generic target features then we
6005 // can move this up to the beginning of the function.
6006 checkTargetFeatures(E, FD);
6007
6008 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6009 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6010
6011 // See if we have a target specific intrinsic.
6012 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6013 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6014 StringRef Prefix =
6015 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6016 if (!Prefix.empty()) {
6017 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6018 // NOTE we don't need to perform a compatibility flag check here since the
6019 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6020 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6021 if (IntrinsicID == Intrinsic::not_intrinsic)
6022 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6023 }
6024
6025 if (IntrinsicID != Intrinsic::not_intrinsic) {
6027
6028 // Find out if any arguments are required to be integer constant
6029 // expressions.
6030 unsigned ICEArguments = 0;
6032 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6033 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6034
6035 Function *F = CGM.getIntrinsic(IntrinsicID);
6036 llvm::FunctionType *FTy = F->getFunctionType();
6037
6038 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6039 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6040 // If the intrinsic arg type is different from the builtin arg type
6041 // we need to do a bit cast.
6042 llvm::Type *PTy = FTy->getParamType(i);
6043 if (PTy != ArgValue->getType()) {
6044 // XXX - vector of pointers?
6045 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6046 if (PtrTy->getAddressSpace() !=
6047 ArgValue->getType()->getPointerAddressSpace()) {
6048 ArgValue = Builder.CreateAddrSpaceCast(
6049 ArgValue, llvm::PointerType::get(getLLVMContext(),
6050 PtrTy->getAddressSpace()));
6051 }
6052 }
6053
6054 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6055 // in amx intrinsics.
6056 if (PTy->isX86_AMXTy())
6057 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6058 {ArgValue->getType()}, {ArgValue});
6059 else
6060 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6061 }
6062
6063 Args.push_back(ArgValue);
6064 }
6065
6066 Value *V = Builder.CreateCall(F, Args);
6067 QualType BuiltinRetType = E->getType();
6068
6069 llvm::Type *RetTy = VoidTy;
6070 if (!BuiltinRetType->isVoidType())
6071 RetTy = ConvertType(BuiltinRetType);
6072
6073 if (RetTy != V->getType()) {
6074 // XXX - vector of pointers?
6075 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6076 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6078 V, llvm::PointerType::get(getLLVMContext(),
6079 PtrTy->getAddressSpace()));
6080 }
6081 }
6082
6083 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6084 // in amx intrinsics.
6085 if (V->getType()->isX86_AMXTy())
6086 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6087 {V});
6088 else
6089 V = Builder.CreateBitCast(V, RetTy);
6090 }
6091
6092 if (RetTy->isVoidTy())
6093 return RValue::get(nullptr);
6094
6095 return RValue::get(V);
6096 }
6097
6098 // Some target-specific builtins can have aggregate return values, e.g.
6099 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6100 // ReturnValue to be non-null, so that the target-specific emission code can
6101 // always just emit into it.
6103 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6104 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6105 ReturnValue = ReturnValueSlot(DestPtr, false);
6106 }
6107
6108 // Now see if we can emit a target-specific builtin.
6109 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6110 switch (EvalKind) {
6111 case TEK_Scalar:
6112 if (V->getType()->isVoidTy())
6113 return RValue::get(nullptr);
6114 return RValue::get(V);
6115 case TEK_Aggregate:
6116 return RValue::getAggregate(ReturnValue.getAddress(),
6117 ReturnValue.isVolatile());
6118 case TEK_Complex:
6119 llvm_unreachable("No current target builtin returns complex");
6120 }
6121 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6122 }
6123
6124 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6125 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6126 return RValue::get(V);
6127
6128 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6129 return EmitHipStdParUnsupportedBuiltin(this, FD);
6130
6131 ErrorUnsupported(E, "builtin function");
6132
6133 // Unknown builtin, for now just dump it out and return undef.
6134 return GetUndefRValue(E->getType());
6135}
6136
6138 unsigned BuiltinID, const CallExpr *E,
6139 ReturnValueSlot ReturnValue,
6140 llvm::Triple::ArchType Arch) {
6141 // When compiling in HipStdPar mode we have to be conservative in rejecting
6142 // target specific features in the FE, and defer the possible error to the
6143 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6144 // referenced by an accelerator executable function, we emit an error.
6145 // Returning nullptr here leads to the builtin being handled in
6146 // EmitStdParUnsupportedBuiltin.
6147 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6148 Arch != CGF->getTarget().getTriple().getArch())
6149 return nullptr;
6150
6151 switch (Arch) {
6152 case llvm::Triple::arm:
6153 case llvm::Triple::armeb:
6154 case llvm::Triple::thumb:
6155 case llvm::Triple::thumbeb:
6156 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6157 case llvm::Triple::aarch64:
6158 case llvm::Triple::aarch64_32:
6159 case llvm::Triple::aarch64_be:
6160 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6161 case llvm::Triple::bpfeb:
6162 case llvm::Triple::bpfel:
6163 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6164 case llvm::Triple::x86:
6165 case llvm::Triple::x86_64:
6166 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6167 case llvm::Triple::ppc:
6168 case llvm::Triple::ppcle:
6169 case llvm::Triple::ppc64:
6170 case llvm::Triple::ppc64le:
6171 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6172 case llvm::Triple::r600:
6173 case llvm::Triple::amdgcn:
6174 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6175 case llvm::Triple::systemz:
6176 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6177 case llvm::Triple::nvptx:
6178 case llvm::Triple::nvptx64:
6179 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6180 case llvm::Triple::wasm32:
6181 case llvm::Triple::wasm64:
6182 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6183 case llvm::Triple::hexagon:
6184 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6185 case llvm::Triple::riscv32:
6186 case llvm::Triple::riscv64:
6187 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6188 default:
6189 return nullptr;
6190 }
6191}
6192
6194 const CallExpr *E,
6195 ReturnValueSlot ReturnValue) {
6196 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6197 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6199 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6200 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6201 }
6202
6203 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6204 getTarget().getTriple().getArch());
6205}
6206
6207static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6208 NeonTypeFlags TypeFlags,
6209 bool HasLegalHalfType = true,
6210 bool V1Ty = false,
6211 bool AllowBFloatArgsAndRet = true) {
6212 int IsQuad = TypeFlags.isQuad();
6213 switch (TypeFlags.getEltType()) {
6216 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6219 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6221 if (AllowBFloatArgsAndRet)
6222 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6223 else
6224 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6226 if (HasLegalHalfType)
6227 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6228 else
6229 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6231 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6234 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6236 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6237 // There is a lot of i128 and f128 API missing.
6238 // so we use v16i8 to represent poly128 and get pattern matched.
6239 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6241 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6243 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6244 }
6245 llvm_unreachable("Unknown vector element type!");
6246}
6247
6248static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6249 NeonTypeFlags IntTypeFlags) {
6250 int IsQuad = IntTypeFlags.isQuad();
6251 switch (IntTypeFlags.getEltType()) {
6253 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6255 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6257 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6258 default:
6259 llvm_unreachable("Type can't be converted to floating-point!");
6260 }
6261}
6262
6264 const ElementCount &Count) {
6265 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6266 return Builder.CreateShuffleVector(V, V, SV, "lane");
6267}
6268
6270 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6271 return EmitNeonSplat(V, C, EC);
6272}
6273
6275 const char *name,
6276 unsigned shift, bool rightshift) {
6277 unsigned j = 0;
6278 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6279 ai != ae; ++ai, ++j) {
6280 if (F->isConstrainedFPIntrinsic())
6281 if (ai->getType()->isMetadataTy())
6282 continue;
6283 if (shift > 0 && shift == j)
6284 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6285 else
6286 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6287 }
6288
6289 if (F->isConstrainedFPIntrinsic())
6290 return Builder.CreateConstrainedFPCall(F, Ops, name);
6291 else
6292 return Builder.CreateCall(F, Ops, name);
6293}
6294
6296 bool neg) {
6297 int SV = cast<ConstantInt>(V)->getSExtValue();
6298 return ConstantInt::get(Ty, neg ? -SV : SV);
6299}
6300
6301// Right-shift a vector by a constant.
6303 llvm::Type *Ty, bool usgn,
6304 const char *name) {
6305 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6306
6307 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6308 int EltSize = VTy->getScalarSizeInBits();
6309
6310 Vec = Builder.CreateBitCast(Vec, Ty);
6311
6312 // lshr/ashr are undefined when the shift amount is equal to the vector
6313 // element size.
6314 if (ShiftAmt == EltSize) {
6315 if (usgn) {
6316 // Right-shifting an unsigned value by its size yields 0.
6317 return llvm::ConstantAggregateZero::get(VTy);
6318 } else {
6319 // Right-shifting a signed value by its size is equivalent
6320 // to a shift of size-1.
6321 --ShiftAmt;
6322 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6323 }
6324 }
6325
6326 Shift = EmitNeonShiftVector(Shift, Ty, false);
6327 if (usgn)
6328 return Builder.CreateLShr(Vec, Shift, name);
6329 else
6330 return Builder.CreateAShr(Vec, Shift, name);
6331}
6332
6333enum {
6334 AddRetType = (1 << 0),
6335 Add1ArgType = (1 << 1),
6336 Add2ArgTypes = (1 << 2),
6337
6340
6342 UnsignedAlts = (1 << 6),
6343
6346
6354
6355namespace {
6356struct ARMVectorIntrinsicInfo {
6357 const char *NameHint;
6358 unsigned BuiltinID;
6359 unsigned LLVMIntrinsic;
6360 unsigned AltLLVMIntrinsic;
6362
6363 bool operator<(unsigned RHSBuiltinID) const {
6364 return BuiltinID < RHSBuiltinID;
6365 }
6366 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6367 return BuiltinID < TE.BuiltinID;
6368 }
6369};
6370} // end anonymous namespace
6371
6372#define NEONMAP0(NameBase) \
6373 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6374
6375#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6376 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6377 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6378
6379#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6380 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6381 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6382 TypeModifier }
6383
6384static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6385 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6386 NEONMAP0(splat_lane_v),
6387 NEONMAP0(splat_laneq_v),
6388 NEONMAP0(splatq_lane_v),
6389 NEONMAP0(splatq_laneq_v),
6390 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6391 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6392 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6393 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6394 NEONMAP0(vadd_v),
6395 NEONMAP0(vaddhn_v),
6396 NEONMAP0(vaddq_v),
6397 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6398 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6399 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6400 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6401 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6402 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6403 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6404 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6405 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6406 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6407 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6408 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6409 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6410 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6411 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6412 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6413 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6414 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6415 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6416 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6417 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6418 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6419 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6420 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6421 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6422 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6423 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6424 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6425 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6426 NEONMAP0(vceqz_v),
6427 NEONMAP0(vceqzq_v),
6428 NEONMAP0(vcgez_v),
6429 NEONMAP0(vcgezq_v),
6430 NEONMAP0(vcgtz_v),
6431 NEONMAP0(vcgtzq_v),
6432 NEONMAP0(vclez_v),
6433 NEONMAP0(vclezq_v),
6434 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6435 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6436 NEONMAP0(vcltz_v),
6437 NEONMAP0(vcltzq_v),
6438 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6439 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6440 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6441 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6442 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6443 NEONMAP0(vcvt_f16_s16),
6444 NEONMAP0(vcvt_f16_u16),
6445 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6446 NEONMAP0(vcvt_f32_v),
6447 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6448 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6449 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6450 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6451 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6452 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6453 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6454 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6455 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6456 NEONMAP0(vcvt_s16_f16),
6457 NEONMAP0(vcvt_s32_v),
6458 NEONMAP0(vcvt_s64_v),
6459 NEONMAP0(vcvt_u16_f16),
6460 NEONMAP0(vcvt_u32_v),
6461 NEONMAP0(vcvt_u64_v),
6462 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6463 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6464 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6465 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6466 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6467 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6468 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6469 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6470 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6471 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6472 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6473 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6474 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6475 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6476 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6477 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6478 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6479 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6480 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6481 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6482 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6483 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6484 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6485 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6486 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6487 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6488 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6489 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6490 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6491 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6492 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6493 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6494 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6495 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6496 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6497 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6498 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6499 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6500 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6501 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6502 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6503 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6504 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6505 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6506 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6507 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6508 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6509 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6510 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6511 NEONMAP0(vcvtq_f16_s16),
6512 NEONMAP0(vcvtq_f16_u16),
6513 NEONMAP0(vcvtq_f32_v),
6514 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6515 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6516 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6517 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6518 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6519 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6520 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6521 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6522 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6523 NEONMAP0(vcvtq_s16_f16),
6524 NEONMAP0(vcvtq_s32_v),
6525 NEONMAP0(vcvtq_s64_v),
6526 NEONMAP0(vcvtq_u16_f16),
6527 NEONMAP0(vcvtq_u32_v),
6528 NEONMAP0(vcvtq_u64_v),
6529 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6530 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6531 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6532 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6533 NEONMAP0(vext_v),
6534 NEONMAP0(vextq_v),
6535 NEONMAP0(vfma_v),
6536 NEONMAP0(vfmaq_v),
6537 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6538 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6539 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6540 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6541 NEONMAP0(vld1_dup_v),
6542 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6543 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6544 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6545 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6546 NEONMAP0(vld1q_dup_v),
6547 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6548 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6549 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6550 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6551 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6552 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6553 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6554 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6555 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6556 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6557 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6558 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6559 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6560 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6561 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6562 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6563 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6564 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6565 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6566 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6567 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6568 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6569 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6570 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6571 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6572 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6573 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6574 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6575 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6576 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6577 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6578 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6579 NEONMAP0(vmovl_v),
6580 NEONMAP0(vmovn_v),
6581 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6582 NEONMAP0(vmull_v),
6583 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6584 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6585 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6586 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6587 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6588 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6589 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6590 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6591 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6592 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6593 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6594 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6595 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6596 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6597 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6598 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6599 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6600 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6601 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6602 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6603 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6604 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6605 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6606 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6607 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6608 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6609 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6610 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6611 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6612 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6613 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6614 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6615 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6616 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6617 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6618 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6619 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6620 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6621 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6622 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6623 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6624 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6625 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6626 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6627 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6628 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6629 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6630 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6631 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6632 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6633 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6634 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6635 NEONMAP0(vrndi_v),
6636 NEONMAP0(vrndiq_v),
6637 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6638 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6639 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6640 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6641 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6642 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6643 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6644 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6645 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6646 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6647 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6648 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6649 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6650 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6651 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6652 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6653 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6654 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6655 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6656 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6657 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6658 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6659 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6660 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6661 NEONMAP0(vshl_n_v),
6662 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6663 NEONMAP0(vshll_n_v),
6664 NEONMAP0(vshlq_n_v),
6665 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6666 NEONMAP0(vshr_n_v),
6667 NEONMAP0(vshrn_n_v),
6668 NEONMAP0(vshrq_n_v),
6669 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6670 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6671 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6672 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6673 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6674 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6675 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6676 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6677 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6678 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6679 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6680 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6681 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6682 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6683 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6684 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6685 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6686 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6687 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6688 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6689 NEONMAP0(vsubhn_v),
6690 NEONMAP0(vtrn_v),
6691 NEONMAP0(vtrnq_v),
6692 NEONMAP0(vtst_v),
6693 NEONMAP0(vtstq_v),
6694 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6695 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6696 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6697 NEONMAP0(vuzp_v),
6698 NEONMAP0(vuzpq_v),
6699 NEONMAP0(vzip_v),
6700 NEONMAP0(vzipq_v)
6701};
6702
6703static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6704 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6705 NEONMAP0(splat_lane_v),
6706 NEONMAP0(splat_laneq_v),
6707 NEONMAP0(splatq_lane_v),
6708 NEONMAP0(splatq_laneq_v),
6709 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6710 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6711 NEONMAP0(vadd_v),
6712 NEONMAP0(vaddhn_v),
6713 NEONMAP0(vaddq_p128),
6714 NEONMAP0(vaddq_v),
6715 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6716 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6717 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6718 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6719 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6720 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6721 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6722 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6723 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6724 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6725 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6726 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6727 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6728 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6729 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6730 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6731 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6732 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6733 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6734 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6735 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6736 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6737 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6738 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6739 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6740 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6741 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6742 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6743 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6744 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6745 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6746 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6747 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6748 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6749 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6750 NEONMAP0(vceqz_v),
6751 NEONMAP0(vceqzq_v),
6752 NEONMAP0(vcgez_v),
6753 NEONMAP0(vcgezq_v),
6754 NEONMAP0(vcgtz_v),
6755 NEONMAP0(vcgtzq_v),
6756 NEONMAP0(vclez_v),
6757 NEONMAP0(vclezq_v),
6758 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6759 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6760 NEONMAP0(vcltz_v),
6761 NEONMAP0(vcltzq_v),
6762 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6763 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6764 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6765 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6766 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6767 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6768 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6769 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6770 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6771 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6772 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6773 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6774 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6775 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6776 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6777 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6778 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6779 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6780 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6781 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6782 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6783 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6784 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6785 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6786 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6787 NEONMAP0(vcvt_f16_s16),
6788 NEONMAP0(vcvt_f16_u16),
6789 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6790 NEONMAP0(vcvt_f32_v),
6791 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6792 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6793 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6794 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6795 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6796 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6797 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6798 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6799 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6800 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6801 NEONMAP0(vcvtq_f16_s16),
6802 NEONMAP0(vcvtq_f16_u16),
6803 NEONMAP0(vcvtq_f32_v),
6804 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6805 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6806 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6807 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6808 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6809 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6810 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6811 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6812 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6813 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6814 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6815 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6816 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6817 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6818 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6819 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6820 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6821 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6822 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6823 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6824 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6825 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6826 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6827 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6828 NEONMAP0(vext_v),
6829 NEONMAP0(vextq_v),
6830 NEONMAP0(vfma_v),
6831 NEONMAP0(vfmaq_v),
6832 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6833 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6834 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6835 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6836 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6837 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6838 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6839 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6840 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6841 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6842 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6843 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6844 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6845 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6846 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6847 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6848 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6849 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6850 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6851 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6852 NEONMAP0(vmovl_v),
6853 NEONMAP0(vmovn_v),
6854 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6855 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6856 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6857 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6858 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6859 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6860 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6861 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6862 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6863 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6864 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6865 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6866 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6867 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6868 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6869 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6870 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6871 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6872 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6873 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6874 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6875 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6876 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
6877 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6878 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6879 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
6880 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
6881 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6882 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6883 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
6884 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
6885 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6886 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6887 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
6888 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
6889 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
6890 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
6891 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6892 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
6893 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
6894 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6895 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
6896 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
6897 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
6898 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
6899 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6900 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
6901 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
6902 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
6903 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6904 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
6905 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
6906 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
6907 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6908 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
6909 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
6910 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
6911 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
6912 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
6913 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
6914 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
6915 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
6916 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
6917 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
6918 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
6919 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
6920 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
6921 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
6922 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
6923 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
6924 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
6925 NEONMAP0(vrndi_v),
6926 NEONMAP0(vrndiq_v),
6927 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6928 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
6929 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6930 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
6931 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6932 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
6933 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
6934 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
6935 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
6936 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
6937 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
6938 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
6939 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
6940 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
6941 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
6942 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
6943 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
6944 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
6945 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
6946 NEONMAP0(vshl_n_v),
6947 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6948 NEONMAP0(vshll_n_v),
6949 NEONMAP0(vshlq_n_v),
6950 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
6951 NEONMAP0(vshr_n_v),
6952 NEONMAP0(vshrn_n_v),
6953 NEONMAP0(vshrq_n_v),
6954 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
6955 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
6956 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
6957 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
6958 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
6959 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
6960 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
6961 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
6962 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
6963 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
6964 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
6965 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
6966 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
6967 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
6968 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
6969 NEONMAP0(vsubhn_v),
6970 NEONMAP0(vtst_v),
6971 NEONMAP0(vtstq_v),
6972 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
6973 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
6974 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
6975 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
6976};
6977
6978static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
6979 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
6980 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
6981 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
6982 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6983 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6984 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
6985 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
6986 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6987 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6988 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6989 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
6990 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
6991 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
6992 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
6993 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6994 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
6995 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
6996 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
6997 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
6998 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
6999 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7000 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7001 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7002 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7003 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7004 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7005 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7006 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7007 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7008 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7009 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7010 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7011 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7012 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7013 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7014 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7015 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7016 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7017 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7018 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7019 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7020 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7021 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7022 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7023 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7024 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7025 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7026 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7027 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7028 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7029 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7030 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7031 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7032 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7033 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7034 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7035 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7036 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7037 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7038 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7039 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7040 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7041 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7042 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7043 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7044 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7045 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7046 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7047 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7048 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7049 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7050 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7051 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7052 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7053 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7054 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7055 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7056 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7057 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7058 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7059 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7060 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7061 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7062 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7063 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7064 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7065 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7066 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7067 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7068 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7069 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7070 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7071 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7072 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7073 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7074 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7075 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7076 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7077 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7078 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7079 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7080 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7081 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7082 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7083 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7084 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7085 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7086 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7087 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7088 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7089 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7090 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7091 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7092 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7093 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7094 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7095 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7096 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7097 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7098 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7099 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7100 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7101 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7102 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7103 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7104 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7105 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7106 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7107 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7108 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7109 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7110 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7111 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7112 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7113 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7114 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7115 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7116 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7117 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7118 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7119 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7120 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7121 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7122 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7123 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7124 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7125 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7126 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7127 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7128 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7129 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7130 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7131 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7132 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7133 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7134 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7135 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7136 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7137 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7138 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7139 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7140 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7141 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7142 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7143 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7144 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7145 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7146 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7147 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7148 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7149 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7150 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7151 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7152 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7153 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7154 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7155 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7156 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7157 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7158 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7159 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7160 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7161 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7162 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7163 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7164 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7165 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7166 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7167 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7168 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7169 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7170 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7171 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7172 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7173 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7174 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7175 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7176 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7177 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7178 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7179 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7180 // FP16 scalar intrinisics go here.
7181 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7182 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7183 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7184 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7185 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7186 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7187 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7188 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7189 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7190 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7191 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7192 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7193 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7194 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7195 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7196 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7197 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7198 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7199 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7200 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7201 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7202 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7203 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7204 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7205 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7206 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7207 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7208 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7209 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7210 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7211 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7212 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7213 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7214 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7215};
7216
7217// Some intrinsics are equivalent for codegen.
7218static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7219 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7220 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7221 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7222 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7223 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7224 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7225 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7226 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7227 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7228 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7229 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7230 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7231 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7232 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7233 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7234 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7235 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7236 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7237 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7238 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7239 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7240 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7241 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7242 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7243 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7244 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7245 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7246 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7247 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7248 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7249 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7250 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7251 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7252 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7253 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7254 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7255 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7256 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7257 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7258 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7259 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7260 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7261 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7262 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7263 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7264 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7265 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7266 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7267 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7268 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7269 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7270 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7271 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7272 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7273 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7274 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7275 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7276 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7277 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7278 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7279 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7280 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7281 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7282 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7283 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7284 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7285 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7286 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7287 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7288 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7289 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7290 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7291 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7292 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7293 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7294 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7295 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7296 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7297 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7298 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7299 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7300 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7301 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7302 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7303 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7304 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7305 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7306 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7307 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7308 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7309 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7310 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7311 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7312 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7313 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7314 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7315 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7316 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7317 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7318 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7319 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7320 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7321 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7322 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7323 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7324 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7325 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7326 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7327 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7328 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7329 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7330 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7331 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7332 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7333 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7334 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7335 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7336 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7337 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7338 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7339 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7340 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7341 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7342 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7343 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7344 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7345 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7346 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7347 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7348 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7349 // arbitrary one to be handled as tha canonical variation.
7350 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7351 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7352 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7353 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7354 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7355 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7356 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7357 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7358 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7359 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7360 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7361 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7362};
7363
7364#undef NEONMAP0
7365#undef NEONMAP1
7366#undef NEONMAP2
7367
7368#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7369 { \
7370 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7371 TypeModifier \
7372 }
7373
7374#define SVEMAP2(NameBase, TypeModifier) \
7375 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7376static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7377#define GET_SVE_LLVM_INTRINSIC_MAP
7378#include "clang/Basic/arm_sve_builtin_cg.inc"
7379#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7380#undef GET_SVE_LLVM_INTRINSIC_MAP
7381};
7382
7383#undef SVEMAP1
7384#undef SVEMAP2
7385
7386#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7387 { \
7388 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7389 TypeModifier \
7390 }
7391
7392#define SMEMAP2(NameBase, TypeModifier) \
7393 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7394static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7395#define GET_SME_LLVM_INTRINSIC_MAP
7396#include "clang/Basic/arm_sme_builtin_cg.inc"
7397#undef GET_SME_LLVM_INTRINSIC_MAP
7398};
7399
7400#undef SMEMAP1
7401#undef SMEMAP2
7402
7404
7409
7410static const ARMVectorIntrinsicInfo *
7412 unsigned BuiltinID, bool &MapProvenSorted) {
7413
7414#ifndef NDEBUG
7415 if (!MapProvenSorted) {
7416 assert(llvm::is_sorted(IntrinsicMap));
7417 MapProvenSorted = true;
7418 }
7419#endif
7420
7421 const ARMVectorIntrinsicInfo *Builtin =
7422 llvm::lower_bound(IntrinsicMap, BuiltinID);
7423
7424 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7425 return Builtin;
7426
7427 return nullptr;
7428}
7429
7431 unsigned Modifier,
7432 llvm::Type *ArgType,
7433 const CallExpr *E) {
7434 int VectorSize = 0;
7435 if (Modifier & Use64BitVectors)
7436 VectorSize = 64;
7437 else if (Modifier & Use128BitVectors)
7438 VectorSize = 128;
7439
7440 // Return type.
7442 if (Modifier & AddRetType) {
7443 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7444 if (Modifier & VectorizeRetType)
7445 Ty = llvm::FixedVectorType::get(
7446 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7447
7448 Tys.push_back(Ty);
7449 }
7450
7451 // Arguments.
7452 if (Modifier & VectorizeArgTypes) {
7453 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7454 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7455 }
7456
7457 if (Modifier & (Add1ArgType | Add2ArgTypes))
7458 Tys.push_back(ArgType);
7459
7460 if (Modifier & Add2ArgTypes)
7461 Tys.push_back(ArgType);
7462
7463 if (Modifier & InventFloatType)
7464 Tys.push_back(FloatTy);
7465
7466 return CGM.getIntrinsic(IntrinsicID, Tys);
7467}
7468
7470 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7471 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7472 unsigned BuiltinID = SISDInfo.BuiltinID;
7473 unsigned int Int = SISDInfo.LLVMIntrinsic;
7474 unsigned Modifier = SISDInfo.TypeModifier;
7475 const char *s = SISDInfo.NameHint;
7476
7477 switch (BuiltinID) {
7478 case NEON::BI__builtin_neon_vcled_s64:
7479 case NEON::BI__builtin_neon_vcled_u64:
7480 case NEON::BI__builtin_neon_vcles_f32:
7481 case NEON::BI__builtin_neon_vcled_f64:
7482 case NEON::BI__builtin_neon_vcltd_s64:
7483 case NEON::BI__builtin_neon_vcltd_u64:
7484 case NEON::BI__builtin_neon_vclts_f32:
7485 case NEON::BI__builtin_neon_vcltd_f64:
7486 case NEON::BI__builtin_neon_vcales_f32:
7487 case NEON::BI__builtin_neon_vcaled_f64:
7488 case NEON::BI__builtin_neon_vcalts_f32:
7489 case NEON::BI__builtin_neon_vcaltd_f64:
7490 // Only one direction of comparisons actually exist, cmle is actually a cmge
7491 // with swapped operands. The table gives us the right intrinsic but we
7492 // still need to do the swap.
7493 std::swap(Ops[0], Ops[1]);
7494 break;
7495 }
7496
7497 assert(Int && "Generic code assumes a valid intrinsic");
7498
7499 // Determine the type(s) of this overloaded AArch64 intrinsic.
7500 const Expr *Arg = E->getArg(0);
7501 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7502 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7503
7504 int j = 0;
7505 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7506 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7507 ai != ae; ++ai, ++j) {
7508 llvm::Type *ArgTy = ai->getType();
7509 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7510 ArgTy->getPrimitiveSizeInBits())
7511 continue;
7512
7513 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7514 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7515 // it before inserting.
7516 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7517 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7518 Ops[j] =
7519 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7520 }
7521
7522 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7523 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7524 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7525 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7526 return CGF.Builder.CreateExtractElement(Result, C0);
7527
7528 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7529}
7530
7532 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7533 const char *NameHint, unsigned Modifier, const CallExpr *E,
7534 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7535 llvm::Triple::ArchType Arch) {
7536 // Get the last argument, which specifies the vector type.
7537 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7538 std::optional<llvm::APSInt> NeonTypeConst =
7540 if (!NeonTypeConst)
7541 return nullptr;
7542
7543 // Determine the type of this overloaded NEON intrinsic.
7544 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7545 bool Usgn = Type.isUnsigned();
7546 bool Quad = Type.isQuad();
7547 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7548 const bool AllowBFloatArgsAndRet =
7549 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7550
7551 llvm::FixedVectorType *VTy =
7552 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7553 llvm::Type *Ty = VTy;
7554 if (!Ty)
7555 return nullptr;
7556
7557 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7558 return Builder.getInt32(addr.getAlignment().getQuantity());
7559 };
7560
7561 unsigned Int = LLVMIntrinsic;
7562 if ((Modifier & UnsignedAlts) && !Usgn)
7563 Int = AltLLVMIntrinsic;
7564
7565 switch (BuiltinID) {
7566 default: break;
7567 case NEON::BI__builtin_neon_splat_lane_v:
7568 case NEON::BI__builtin_neon_splat_laneq_v:
7569 case NEON::BI__builtin_neon_splatq_lane_v:
7570 case NEON::BI__builtin_neon_splatq_laneq_v: {
7571 auto NumElements = VTy->getElementCount();
7572 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7573 NumElements = NumElements * 2;
7574 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7575 NumElements = NumElements.divideCoefficientBy(2);
7576
7577 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7578 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7579 }
7580 case NEON::BI__builtin_neon_vpadd_v:
7581 case NEON::BI__builtin_neon_vpaddq_v:
7582 // We don't allow fp/int overloading of intrinsics.
7583 if (VTy->getElementType()->isFloatingPointTy() &&
7584 Int == Intrinsic::aarch64_neon_addp)
7585 Int = Intrinsic::aarch64_neon_faddp;
7586 break;
7587 case NEON::BI__builtin_neon_vabs_v:
7588 case NEON::BI__builtin_neon_vabsq_v:
7589 if (VTy->getElementType()->isFloatingPointTy())
7590 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7591 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7592 case NEON::BI__builtin_neon_vadd_v:
7593 case NEON::BI__builtin_neon_vaddq_v: {
7594 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7595 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7596 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7597 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7598 return Builder.CreateBitCast(Ops[0], Ty);
7599 }
7600 case NEON::BI__builtin_neon_vaddhn_v: {
7601 llvm::FixedVectorType *SrcTy =
7602 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7603
7604 // %sum = add <4 x i32> %lhs, %rhs
7605 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7606 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7607 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7608
7609 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7610 Constant *ShiftAmt =
7611 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7612 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7613
7614 // %res = trunc <4 x i32> %high to <4 x i16>
7615 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7616 }
7617 case NEON::BI__builtin_neon_vcale_v:
7618 case NEON::BI__builtin_neon_vcaleq_v:
7619 case NEON::BI__builtin_neon_vcalt_v:
7620 case NEON::BI__builtin_neon_vcaltq_v:
7621 std::swap(Ops[0], Ops[1]);
7622 [[fallthrough]];
7623 case NEON::BI__builtin_neon_vcage_v:
7624 case NEON::BI__builtin_neon_vcageq_v:
7625 case NEON::BI__builtin_neon_vcagt_v:
7626 case NEON::BI__builtin_neon_vcagtq_v: {
7627 llvm::Type *Ty;
7628 switch (VTy->getScalarSizeInBits()) {
7629 default: llvm_unreachable("unexpected type");
7630 case 32:
7631 Ty = FloatTy;
7632 break;
7633 case 64:
7634 Ty = DoubleTy;
7635 break;
7636 case 16:
7637 Ty = HalfTy;
7638 break;
7639 }
7640 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7641 llvm::Type *Tys[] = { VTy, VecFlt };
7642 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7643 return EmitNeonCall(F, Ops, NameHint);
7644 }
7645 case NEON::BI__builtin_neon_vceqz_v:
7646 case NEON::BI__builtin_neon_vceqzq_v:
7647 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7648 ICmpInst::ICMP_EQ, "vceqz");
7649 case NEON::BI__builtin_neon_vcgez_v:
7650 case NEON::BI__builtin_neon_vcgezq_v:
7651 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7652 ICmpInst::ICMP_SGE, "vcgez");
7653 case NEON::BI__builtin_neon_vclez_v:
7654 case NEON::BI__builtin_neon_vclezq_v:
7655 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7656 ICmpInst::ICMP_SLE, "vclez");
7657 case NEON::BI__builtin_neon_vcgtz_v:
7658 case NEON::BI__builtin_neon_vcgtzq_v:
7659 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7660 ICmpInst::ICMP_SGT, "vcgtz");
7661 case NEON::BI__builtin_neon_vcltz_v:
7662 case NEON::BI__builtin_neon_vcltzq_v:
7663 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7664 ICmpInst::ICMP_SLT, "vcltz");
7665 case NEON::BI__builtin_neon_vclz_v:
7666 case NEON::BI__builtin_neon_vclzq_v:
7667 // We generate target-independent intrinsic, which needs a second argument
7668 // for whether or not clz of zero is undefined; on ARM it isn't.
7669 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7670 break;
7671 case NEON::BI__builtin_neon_vcvt_f32_v:
7672 case NEON::BI__builtin_neon_vcvtq_f32_v:
7673 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7674 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7675 HasLegalHalfType);
7676 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7677 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7678 case NEON::BI__builtin_neon_vcvt_f16_s16:
7679 case NEON::BI__builtin_neon_vcvt_f16_u16:
7680 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7681 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7682 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7683 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7684 HasLegalHalfType);
7685 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7686 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7687 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7688 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7689 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7690 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7691 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7692 Function *F = CGM.getIntrinsic(Int, Tys);
7693 return EmitNeonCall(F, Ops, "vcvt_n");
7694 }
7695 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7696 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7697 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7698 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7699 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7700 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7701 Function *F = CGM.getIntrinsic(Int, Tys);
7702 return EmitNeonCall(F, Ops, "vcvt_n");
7703 }
7704 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7705 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7706 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7707 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7708 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7709 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7710 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7711 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7712 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7713 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7714 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7715 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7716 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7717 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7718 return EmitNeonCall(F, Ops, "vcvt_n");
7719 }
7720 case NEON::BI__builtin_neon_vcvt_s32_v:
7721 case NEON::BI__builtin_neon_vcvt_u32_v:
7722 case NEON::BI__builtin_neon_vcvt_s64_v:
7723 case NEON::BI__builtin_neon_vcvt_u64_v:
7724 case NEON::BI__builtin_neon_vcvt_s16_f16:
7725 case NEON::BI__builtin_neon_vcvt_u16_f16:
7726 case NEON::BI__builtin_neon_vcvtq_s32_v:
7727 case NEON::BI__builtin_neon_vcvtq_u32_v:
7728 case NEON::BI__builtin_neon_vcvtq_s64_v:
7729 case NEON::BI__builtin_neon_vcvtq_u64_v:
7730 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7731 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7732 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7733 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7734 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7735 }
7736 case NEON::BI__builtin_neon_vcvta_s16_f16:
7737 case NEON::BI__builtin_neon_vcvta_s32_v:
7738 case NEON::BI__builtin_neon_vcvta_s64_v:
7739 case NEON::BI__builtin_neon_vcvta_u16_f16:
7740 case NEON::BI__builtin_neon_vcvta_u32_v:
7741 case NEON::BI__builtin_neon_vcvta_u64_v:
7742 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7743 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7744 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7745 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7746 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7747 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7748 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7749 case NEON::BI__builtin_neon_vcvtn_s32_v:
7750 case NEON::BI__builtin_neon_vcvtn_s64_v:
7751 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7752 case NEON::BI__builtin_neon_vcvtn_u32_v:
7753 case NEON::BI__builtin_neon_vcvtn_u64_v:
7754 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7755 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7756 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7757 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7758 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7759 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7760 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7761 case NEON::BI__builtin_neon_vcvtp_s32_v:
7762 case NEON::BI__builtin_neon_vcvtp_s64_v:
7763 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7764 case NEON::BI__builtin_neon_vcvtp_u32_v:
7765 case NEON::BI__builtin_neon_vcvtp_u64_v:
7766 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7767 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7768 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7769 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7770 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7771 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7772 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7773 case NEON::BI__builtin_neon_vcvtm_s32_v:
7774 case NEON::BI__builtin_neon_vcvtm_s64_v:
7775 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7776 case NEON::BI__builtin_neon_vcvtm_u32_v:
7777 case NEON::BI__builtin_neon_vcvtm_u64_v:
7778 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7779 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7780 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7781 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7782 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7783 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7784 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7785 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7786 }
7787 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7788 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7789 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7790
7791 }
7792 case NEON::BI__builtin_neon_vext_v:
7793 case NEON::BI__builtin_neon_vextq_v: {
7794 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7795 SmallVector<int, 16> Indices;
7796 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7797 Indices.push_back(i+CV);
7798
7799 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7800 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7801 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7802 }
7803 case NEON::BI__builtin_neon_vfma_v:
7804 case NEON::BI__builtin_neon_vfmaq_v: {
7805 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7806 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7807 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7808
7809 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7811 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7812 {Ops[1], Ops[2], Ops[0]});
7813 }
7814 case NEON::BI__builtin_neon_vld1_v:
7815 case NEON::BI__builtin_neon_vld1q_v: {
7816 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7817 Ops.push_back(getAlignmentValue32(PtrOp0));
7818 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7819 }
7820 case NEON::BI__builtin_neon_vld1_x2_v:
7821 case NEON::BI__builtin_neon_vld1q_x2_v:
7822 case NEON::BI__builtin_neon_vld1_x3_v:
7823 case NEON::BI__builtin_neon_vld1q_x3_v:
7824 case NEON::BI__builtin_neon_vld1_x4_v:
7825 case NEON::BI__builtin_neon_vld1q_x4_v: {
7826 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7827 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7828 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7829 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7830 }
7831 case NEON::BI__builtin_neon_vld2_v:
7832 case NEON::BI__builtin_neon_vld2q_v:
7833 case NEON::BI__builtin_neon_vld3_v:
7834 case NEON::BI__builtin_neon_vld3q_v:
7835 case NEON::BI__builtin_neon_vld4_v:
7836 case NEON::BI__builtin_neon_vld4q_v:
7837 case NEON::BI__builtin_neon_vld2_dup_v:
7838 case NEON::BI__builtin_neon_vld2q_dup_v:
7839 case NEON::BI__builtin_neon_vld3_dup_v:
7840 case NEON::BI__builtin_neon_vld3q_dup_v:
7841 case NEON::BI__builtin_neon_vld4_dup_v:
7842 case NEON::BI__builtin_neon_vld4q_dup_v: {
7843 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7844 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7845 Value *Align = getAlignmentValue32(PtrOp1);
7846 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7847 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7848 }
7849 case NEON::BI__builtin_neon_vld1_dup_v:
7850 case NEON::BI__builtin_neon_vld1q_dup_v: {
7851 Value *V = PoisonValue::get(Ty);
7852 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7853 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7854 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7855 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7856 return EmitNeonSplat(Ops[0], CI);
7857 }
7858 case NEON::BI__builtin_neon_vld2_lane_v:
7859 case NEON::BI__builtin_neon_vld2q_lane_v:
7860 case NEON::BI__builtin_neon_vld3_lane_v:
7861 case NEON::BI__builtin_neon_vld3q_lane_v:
7862 case NEON::BI__builtin_neon_vld4_lane_v:
7863 case NEON::BI__builtin_neon_vld4q_lane_v: {
7864 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7865 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7866 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7867 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7868 Ops.push_back(getAlignmentValue32(PtrOp1));
7869 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7870 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7871 }
7872 case NEON::BI__builtin_neon_vmovl_v: {
7873 llvm::FixedVectorType *DTy =
7874 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7875 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7876 if (Usgn)
7877 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
7878 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
7879 }
7880 case NEON::BI__builtin_neon_vmovn_v: {
7881 llvm::FixedVectorType *QTy =
7882 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7883 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
7884 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
7885 }
7886 case NEON::BI__builtin_neon_vmull_v:
7887 // FIXME: the integer vmull operations could be emitted in terms of pure
7888 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
7889 // hoisting the exts outside loops. Until global ISel comes along that can
7890 // see through such movement this leads to bad CodeGen. So we need an
7891 // intrinsic for now.
7892 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
7893 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
7894 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
7895 case NEON::BI__builtin_neon_vpadal_v:
7896 case NEON::BI__builtin_neon_vpadalq_v: {
7897 // The source operand type has twice as many elements of half the size.
7898 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7899 llvm::Type *EltTy =
7900 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7901 auto *NarrowTy =
7902 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7903 llvm::Type *Tys[2] = { Ty, NarrowTy };
7904 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7905 }
7906 case NEON::BI__builtin_neon_vpaddl_v:
7907 case NEON::BI__builtin_neon_vpaddlq_v: {
7908 // The source operand type has twice as many elements of half the size.
7909 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
7910 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
7911 auto *NarrowTy =
7912 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
7913 llvm::Type *Tys[2] = { Ty, NarrowTy };
7914 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
7915 }
7916 case NEON::BI__builtin_neon_vqdmlal_v:
7917 case NEON::BI__builtin_neon_vqdmlsl_v: {
7918 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
7919 Ops[1] =
7920 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
7921 Ops.resize(2);
7922 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
7923 }
7924 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
7925 case NEON::BI__builtin_neon_vqdmulh_lane_v:
7926 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
7927 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
7928 auto *RTy = cast<llvm::FixedVectorType>(Ty);
7929 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
7930 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
7931 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
7932 RTy->getNumElements() * 2);
7933 llvm::Type *Tys[2] = {
7934 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7935 /*isQuad*/ false))};
7936 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7937 }
7938 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
7939 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
7940 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
7941 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
7942 llvm::Type *Tys[2] = {
7943 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
7944 /*isQuad*/ true))};
7945 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
7946 }
7947 case NEON::BI__builtin_neon_vqshl_n_v:
7948 case NEON::BI__builtin_neon_vqshlq_n_v:
7949 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
7950 1, false);
7951 case NEON::BI__builtin_neon_vqshlu_n_v:
7952 case NEON::BI__builtin_neon_vqshluq_n_v:
7953 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
7954 1, false);
7955 case NEON::BI__builtin_neon_vrecpe_v:
7956 case NEON::BI__builtin_neon_vrecpeq_v:
7957 case NEON::BI__builtin_neon_vrsqrte_v:
7958 case NEON::BI__builtin_neon_vrsqrteq_v:
7959 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
7960 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7961 case NEON::BI__builtin_neon_vrndi_v:
7962 case NEON::BI__builtin_neon_vrndiq_v:
7963 Int = Builder.getIsFPConstrained()
7964 ? Intrinsic::experimental_constrained_nearbyint
7965 : Intrinsic::nearbyint;
7966 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
7967 case NEON::BI__builtin_neon_vrshr_n_v:
7968 case NEON::BI__builtin_neon_vrshrq_n_v:
7969 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
7970 1, true);
7971 case NEON::BI__builtin_neon_vsha512hq_u64:
7972 case NEON::BI__builtin_neon_vsha512h2q_u64:
7973 case NEON::BI__builtin_neon_vsha512su0q_u64:
7974 case NEON::BI__builtin_neon_vsha512su1q_u64: {
7975 Function *F = CGM.getIntrinsic(Int);
7976 return EmitNeonCall(F, Ops, "");
7977 }
7978 case NEON::BI__builtin_neon_vshl_n_v:
7979 case NEON::BI__builtin_neon_vshlq_n_v:
7980 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
7981 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
7982 "vshl_n");
7983 case NEON::BI__builtin_neon_vshll_n_v: {
7984 llvm::FixedVectorType *SrcTy =
7985 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7986 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7987 if (Usgn)
7988 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
7989 else
7990 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
7991 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
7992 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
7993 }
7994 case NEON::BI__builtin_neon_vshrn_n_v: {
7995 llvm::FixedVectorType *SrcTy =
7996 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7997 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7998 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
7999 if (Usgn)
8000 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8001 else
8002 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8003 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8004 }
8005 case NEON::BI__builtin_neon_vshr_n_v:
8006 case NEON::BI__builtin_neon_vshrq_n_v:
8007 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8008 case NEON::BI__builtin_neon_vst1_v:
8009 case NEON::BI__builtin_neon_vst1q_v:
8010 case NEON::BI__builtin_neon_vst2_v:
8011 case NEON::BI__builtin_neon_vst2q_v:
8012 case NEON::BI__builtin_neon_vst3_v:
8013 case NEON::BI__builtin_neon_vst3q_v:
8014 case NEON::BI__builtin_neon_vst4_v:
8015 case NEON::BI__builtin_neon_vst4q_v:
8016 case NEON::BI__builtin_neon_vst2_lane_v:
8017 case NEON::BI__builtin_neon_vst2q_lane_v:
8018 case NEON::BI__builtin_neon_vst3_lane_v:
8019 case NEON::BI__builtin_neon_vst3q_lane_v:
8020 case NEON::BI__builtin_neon_vst4_lane_v:
8021 case NEON::BI__builtin_neon_vst4q_lane_v: {
8022 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8023 Ops.push_back(getAlignmentValue32(PtrOp0));
8024 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8025 }
8026 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8027 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8028 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8029 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8030 case NEON::BI__builtin_neon_vsm4eq_u32: {
8031 Function *F = CGM.getIntrinsic(Int);
8032 return EmitNeonCall(F, Ops, "");
8033 }
8034 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8035 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8036 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8037 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8038 Function *F = CGM.getIntrinsic(Int);
8039 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8040 return EmitNeonCall(F, Ops, "");
8041 }
8042 case NEON::BI__builtin_neon_vst1_x2_v:
8043 case NEON::BI__builtin_neon_vst1q_x2_v:
8044 case NEON::BI__builtin_neon_vst1_x3_v:
8045 case NEON::BI__builtin_neon_vst1q_x3_v:
8046 case NEON::BI__builtin_neon_vst1_x4_v:
8047 case NEON::BI__builtin_neon_vst1q_x4_v: {
8048 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8049 // in AArch64 it comes last. We may want to stick to one or another.
8050 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8051 Arch == llvm::Triple::aarch64_32) {
8052 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8053 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8054 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8055 }
8056 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8057 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8058 }
8059 case NEON::BI__builtin_neon_vsubhn_v: {
8060 llvm::FixedVectorType *SrcTy =
8061 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8062
8063 // %sum = add <4 x i32> %lhs, %rhs
8064 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8065 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8066 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8067
8068 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8069 Constant *ShiftAmt =
8070 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8071 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8072
8073 // %res = trunc <4 x i32> %high to <4 x i16>
8074 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8075 }
8076 case NEON::BI__builtin_neon_vtrn_v:
8077 case NEON::BI__builtin_neon_vtrnq_v: {
8078 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8079 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8080 Value *SV = nullptr;
8081
8082 for (unsigned vi = 0; vi != 2; ++vi) {
8083 SmallVector<int, 16> Indices;
8084 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8085 Indices.push_back(i+vi);
8086 Indices.push_back(i+e+vi);
8087 }
8088 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8089 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8090 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8091 }
8092 return SV;
8093 }
8094 case NEON::BI__builtin_neon_vtst_v:
8095 case NEON::BI__builtin_neon_vtstq_v: {
8096 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8097 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8098 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8099 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8100 ConstantAggregateZero::get(Ty));
8101 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8102 }
8103 case NEON::BI__builtin_neon_vuzp_v:
8104 case NEON::BI__builtin_neon_vuzpq_v: {
8105 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8106 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8107 Value *SV = nullptr;
8108
8109 for (unsigned vi = 0; vi != 2; ++vi) {
8110 SmallVector<int, 16> Indices;
8111 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8112 Indices.push_back(2*i+vi);
8113
8114 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8115 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8116 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8117 }
8118 return SV;
8119 }
8120 case NEON::BI__builtin_neon_vxarq_u64: {
8121 Function *F = CGM.getIntrinsic(Int);
8122 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8123 return EmitNeonCall(F, Ops, "");
8124 }
8125 case NEON::BI__builtin_neon_vzip_v:
8126 case NEON::BI__builtin_neon_vzipq_v: {
8127 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8128 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8129 Value *SV = nullptr;
8130
8131 for (unsigned vi = 0; vi != 2; ++vi) {
8132 SmallVector<int, 16> Indices;
8133 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8134 Indices.push_back((i + vi*e) >> 1);
8135 Indices.push_back(((i + vi*e) >> 1)+e);
8136 }
8137 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8138 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8139 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8140 }
8141 return SV;
8142 }
8143 case NEON::BI__builtin_neon_vdot_s32:
8144 case NEON::BI__builtin_neon_vdot_u32:
8145 case NEON::BI__builtin_neon_vdotq_s32:
8146 case NEON::BI__builtin_neon_vdotq_u32: {
8147 auto *InputTy =
8148 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8149 llvm::Type *Tys[2] = { Ty, InputTy };
8150 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8151 }
8152 case NEON::BI__builtin_neon_vfmlal_low_f16:
8153 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8154 auto *InputTy =
8155 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8156 llvm::Type *Tys[2] = { Ty, InputTy };
8157 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8158 }
8159 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8160 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8161 auto *InputTy =
8162 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8163 llvm::Type *Tys[2] = { Ty, InputTy };
8164 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8165 }
8166 case NEON::BI__builtin_neon_vfmlal_high_f16:
8167 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8168 auto *InputTy =
8169 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8170 llvm::Type *Tys[2] = { Ty, InputTy };
8171 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8172 }
8173 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8174 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8175 auto *InputTy =
8176 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8177 llvm::Type *Tys[2] = { Ty, InputTy };
8178 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8179 }
8180 case NEON::BI__builtin_neon_vmmlaq_s32:
8181 case NEON::BI__builtin_neon_vmmlaq_u32: {
8182 auto *InputTy =
8183 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8184 llvm::Type *Tys[2] = { Ty, InputTy };
8185 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8186 }
8187 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8188 auto *InputTy =
8189 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8190 llvm::Type *Tys[2] = { Ty, InputTy };
8191 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8192 }
8193 case NEON::BI__builtin_neon_vusdot_s32:
8194 case NEON::BI__builtin_neon_vusdotq_s32: {
8195 auto *InputTy =
8196 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8197 llvm::Type *Tys[2] = { Ty, InputTy };
8198 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8199 }
8200 case NEON::BI__builtin_neon_vbfdot_f32:
8201 case NEON::BI__builtin_neon_vbfdotq_f32: {
8202 llvm::Type *InputTy =
8203 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8204 llvm::Type *Tys[2] = { Ty, InputTy };
8205 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8206 }
8207 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8208 llvm::Type *Tys[1] = { Ty };
8209 Function *F = CGM.getIntrinsic(Int, Tys);
8210 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8211 }
8212
8213 }
8214
8215 assert(Int && "Expected valid intrinsic number");
8216
8217 // Determine the type(s) of this overloaded AArch64 intrinsic.
8218 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8219
8220 Value *Result = EmitNeonCall(F, Ops, NameHint);
8221 llvm::Type *ResultType = ConvertType(E->getType());
8222 // AArch64 intrinsic one-element vector type cast to
8223 // scalar type expected by the builtin
8224 return Builder.CreateBitCast(Result, ResultType, NameHint);
8225}
8226
8228 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8229 const CmpInst::Predicate Ip, const Twine &Name) {
8230 llvm::Type *OTy = Op->getType();
8231
8232 // FIXME: this is utterly horrific. We should not be looking at previous
8233 // codegen context to find out what needs doing. Unfortunately TableGen
8234 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8235 // (etc).
8236 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8237 OTy = BI->getOperand(0)->getType();
8238
8239 Op = Builder.CreateBitCast(Op, OTy);
8240 if (OTy->getScalarType()->isFloatingPointTy()) {
8241 if (Fp == CmpInst::FCMP_OEQ)
8242 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8243 else
8244 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8245 } else {
8246 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8247 }
8248 return Builder.CreateSExt(Op, Ty, Name);
8249}
8250
8252 Value *ExtOp, Value *IndexOp,
8253 llvm::Type *ResTy, unsigned IntID,
8254 const char *Name) {
8256 if (ExtOp)
8257 TblOps.push_back(ExtOp);
8258
8259 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8260 SmallVector<int, 16> Indices;
8261 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8262 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8263 Indices.push_back(2*i);
8264 Indices.push_back(2*i+1);
8265 }
8266
8267 int PairPos = 0, End = Ops.size() - 1;
8268 while (PairPos < End) {
8269 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8270 Ops[PairPos+1], Indices,
8271 Name));
8272 PairPos += 2;
8273 }
8274
8275 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8276 // of the 128-bit lookup table with zero.
8277 if (PairPos == End) {
8278 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8279 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8280 ZeroTbl, Indices, Name));
8281 }
8282
8283 Function *TblF;
8284 TblOps.push_back(IndexOp);
8285 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8286
8287 return CGF.EmitNeonCall(TblF, TblOps, Name);
8288}
8289
8290Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8291 unsigned Value;
8292 switch (BuiltinID) {
8293 default:
8294 return nullptr;
8295 case clang::ARM::BI__builtin_arm_nop:
8296 Value = 0;
8297 break;
8298 case clang::ARM::BI__builtin_arm_yield:
8299 case clang::ARM::BI__yield:
8300 Value = 1;
8301 break;
8302 case clang::ARM::BI__builtin_arm_wfe:
8303 case clang::ARM::BI__wfe:
8304 Value = 2;
8305 break;
8306 case clang::ARM::BI__builtin_arm_wfi:
8307 case clang::ARM::BI__wfi:
8308 Value = 3;
8309 break;
8310 case clang::ARM::BI__builtin_arm_sev:
8311 case clang::ARM::BI__sev:
8312 Value = 4;
8313 break;
8314 case clang::ARM::BI__builtin_arm_sevl:
8315 case clang::ARM::BI__sevl:
8316 Value = 5;
8317 break;
8318 }
8319
8320 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8321 llvm::ConstantInt::get(Int32Ty, Value));
8322}
8323
8328};
8329
8330// Generates the IR for __builtin_read_exec_*.
8331// Lowers the builtin to amdgcn_ballot intrinsic.
8333 llvm::Type *RegisterType,
8334 llvm::Type *ValueType, bool isExecHi) {
8335 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8336 CodeGen::CodeGenModule &CGM = CGF.CGM;
8337
8338 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8339 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8340
8341 if (isExecHi) {
8342 Value *Rt2 = Builder.CreateLShr(Call, 32);
8343 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8344 return Rt2;
8345 }
8346
8347 return Call;
8348}
8349
8350// Generates the IR for the read/write special register builtin,
8351// ValueType is the type of the value that is to be written or read,
8352// RegisterType is the type of the register being written to or read from.
8354 const CallExpr *E,
8355 llvm::Type *RegisterType,
8356 llvm::Type *ValueType,
8357 SpecialRegisterAccessKind AccessKind,
8358 StringRef SysReg = "") {
8359 // write and register intrinsics only support 32, 64 and 128 bit operations.
8360 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8361 RegisterType->isIntegerTy(128)) &&
8362 "Unsupported size for register.");
8363
8364 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8365 CodeGen::CodeGenModule &CGM = CGF.CGM;
8366 LLVMContext &Context = CGM.getLLVMContext();
8367
8368 if (SysReg.empty()) {
8369 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8370 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8371 }
8372
8373 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8374 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8375 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8376
8377 llvm::Type *Types[] = { RegisterType };
8378
8379 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8380 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8381 && "Can't fit 64-bit value in 32-bit register");
8382
8383 if (AccessKind != Write) {
8384 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8385 llvm::Function *F = CGM.getIntrinsic(
8386 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8387 : llvm::Intrinsic::read_register,
8388 Types);
8389 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8390
8391 if (MixedTypes)
8392 // Read into 64 bit register and then truncate result to 32 bit.
8393 return Builder.CreateTrunc(Call, ValueType);
8394
8395 if (ValueType->isPointerTy())
8396 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8397 return Builder.CreateIntToPtr(Call, ValueType);
8398
8399 return Call;
8400 }
8401
8402 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8403 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8404 if (MixedTypes) {
8405 // Extend 32 bit write value to 64 bit to pass to write.
8406 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8407 return Builder.CreateCall(F, { Metadata, ArgValue });
8408 }
8409
8410 if (ValueType->isPointerTy()) {
8411 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8412 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8413 return Builder.CreateCall(F, { Metadata, ArgValue });
8414 }
8415
8416 return Builder.CreateCall(F, { Metadata, ArgValue });
8417}
8418
8419/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8420/// argument that specifies the vector type.
8421static bool HasExtraNeonArgument(unsigned BuiltinID) {
8422 switch (BuiltinID) {
8423 default: break;
8424 case NEON::BI__builtin_neon_vget_lane_i8:
8425 case NEON::BI__builtin_neon_vget_lane_i16:
8426 case NEON::BI__builtin_neon_vget_lane_bf16:
8427 case NEON::BI__builtin_neon_vget_lane_i32:
8428 case NEON::BI__builtin_neon_vget_lane_i64:
8429 case NEON::BI__builtin_neon_vget_lane_f32:
8430 case NEON::BI__builtin_neon_vgetq_lane_i8:
8431 case NEON::BI__builtin_neon_vgetq_lane_i16:
8432 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8433 case NEON::BI__builtin_neon_vgetq_lane_i32:
8434 case NEON::BI__builtin_neon_vgetq_lane_i64:
8435 case NEON::BI__builtin_neon_vgetq_lane_f32:
8436 case NEON::BI__builtin_neon_vduph_lane_bf16:
8437 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8438 case NEON::BI__builtin_neon_vset_lane_i8:
8439 case NEON::BI__builtin_neon_vset_lane_i16:
8440 case NEON::BI__builtin_neon_vset_lane_bf16:
8441 case NEON::BI__builtin_neon_vset_lane_i32:
8442 case NEON::BI__builtin_neon_vset_lane_i64:
8443 case NEON::BI__builtin_neon_vset_lane_f32:
8444 case NEON::BI__builtin_neon_vsetq_lane_i8:
8445 case NEON::BI__builtin_neon_vsetq_lane_i16:
8446 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8447 case NEON::BI__builtin_neon_vsetq_lane_i32:
8448 case NEON::BI__builtin_neon_vsetq_lane_i64:
8449 case NEON::BI__builtin_neon_vsetq_lane_f32:
8450 case NEON::BI__builtin_neon_vsha1h_u32:
8451 case NEON::BI__builtin_neon_vsha1cq_u32:
8452 case NEON::BI__builtin_neon_vsha1pq_u32:
8453 case NEON::BI__builtin_neon_vsha1mq_u32:
8454 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8455 case clang::ARM::BI_MoveToCoprocessor:
8456 case clang::ARM::BI_MoveToCoprocessor2:
8457 return false;
8458 }
8459 return true;
8460}
8461
8462Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8463 const CallExpr *E,
8464 ReturnValueSlot ReturnValue,
8465 llvm::Triple::ArchType Arch) {
8466 if (auto Hint = GetValueForARMHint(BuiltinID))
8467 return Hint;
8468
8469 if (BuiltinID == clang::ARM::BI__emit) {
8470 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8471 llvm::FunctionType *FTy =
8472 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8473
8475 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8476 llvm_unreachable("Sema will ensure that the parameter is constant");
8477
8478 llvm::APSInt Value = Result.Val.getInt();
8479 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8480
8481 llvm::InlineAsm *Emit =
8482 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8483 /*hasSideEffects=*/true)
8484 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8485 /*hasSideEffects=*/true);
8486
8487 return Builder.CreateCall(Emit);
8488 }
8489
8490 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8491 Value *Option = EmitScalarExpr(E->getArg(0));
8492 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8493 }
8494
8495 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8497 Value *RW = EmitScalarExpr(E->getArg(1));
8498 Value *IsData = EmitScalarExpr(E->getArg(2));
8499
8500 // Locality is not supported on ARM target
8501 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8502
8503 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8504 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8505 }
8506
8507 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8508 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8509 return Builder.CreateCall(
8510 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8511 }
8512
8513 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8514 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8515 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8516 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8517 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8518 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8519 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8520 return Res;
8521 }
8522
8523
8524 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8525 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8526 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8527 }
8528 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8529 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8530 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8531 "cls");
8532 }
8533
8534 if (BuiltinID == clang::ARM::BI__clear_cache) {
8535 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8536 const FunctionDecl *FD = E->getDirectCallee();
8537 Value *Ops[2];
8538 for (unsigned i = 0; i < 2; i++)
8539 Ops[i] = EmitScalarExpr(E->getArg(i));
8540 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8541 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8542 StringRef Name = FD->getName();
8543 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8544 }
8545
8546 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8547 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8548 Function *F;
8549
8550 switch (BuiltinID) {
8551 default: llvm_unreachable("unexpected builtin");
8552 case clang::ARM::BI__builtin_arm_mcrr:
8553 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8554 break;
8555 case clang::ARM::BI__builtin_arm_mcrr2:
8556 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8557 break;
8558 }
8559
8560 // MCRR{2} instruction has 5 operands but
8561 // the intrinsic has 4 because Rt and Rt2
8562 // are represented as a single unsigned 64
8563 // bit integer in the intrinsic definition
8564 // but internally it's represented as 2 32
8565 // bit integers.
8566
8567 Value *Coproc = EmitScalarExpr(E->getArg(0));
8568 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8569 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8570 Value *CRm = EmitScalarExpr(E->getArg(3));
8571
8572 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8573 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8574 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8575 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8576
8577 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8578 }
8579
8580 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8581 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8582 Function *F;
8583
8584 switch (BuiltinID) {
8585 default: llvm_unreachable("unexpected builtin");
8586 case clang::ARM::BI__builtin_arm_mrrc:
8587 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8588 break;
8589 case clang::ARM::BI__builtin_arm_mrrc2:
8590 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8591 break;
8592 }
8593
8594 Value *Coproc = EmitScalarExpr(E->getArg(0));
8595 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8596 Value *CRm = EmitScalarExpr(E->getArg(2));
8597 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8598
8599 // Returns an unsigned 64 bit integer, represented
8600 // as two 32 bit integers.
8601
8602 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8603 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8604 Rt = Builder.CreateZExt(Rt, Int64Ty);
8605 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8606
8607 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8608 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8609 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8610
8611 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8612 }
8613
8614 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8615 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8616 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8617 getContext().getTypeSize(E->getType()) == 64) ||
8618 BuiltinID == clang::ARM::BI__ldrexd) {
8619 Function *F;
8620
8621 switch (BuiltinID) {
8622 default: llvm_unreachable("unexpected builtin");
8623 case clang::ARM::BI__builtin_arm_ldaex:
8624 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8625 break;
8626 case clang::ARM::BI__builtin_arm_ldrexd:
8627 case clang::ARM::BI__builtin_arm_ldrex:
8628 case clang::ARM::BI__ldrexd:
8629 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8630 break;
8631 }
8632
8633 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8634 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8635
8636 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8637 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8638 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8639 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8640
8641 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8642 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8643 Val = Builder.CreateOr(Val, Val1);
8644 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8645 }
8646
8647 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8648 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8649 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8650
8651 QualType Ty = E->getType();
8652 llvm::Type *RealResTy = ConvertType(Ty);
8653 llvm::Type *IntTy =
8654 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8655
8657 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8658 : Intrinsic::arm_ldrex,
8659 UnqualPtrTy);
8660 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8661 Val->addParamAttr(
8662 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8663
8664 if (RealResTy->isPointerTy())
8665 return Builder.CreateIntToPtr(Val, RealResTy);
8666 else {
8667 llvm::Type *IntResTy = llvm::IntegerType::get(
8668 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8669 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8670 RealResTy);
8671 }
8672 }
8673
8674 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8675 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8676 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8677 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8679 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8680 : Intrinsic::arm_strexd);
8681 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8682
8683 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8684 Value *Val = EmitScalarExpr(E->getArg(0));
8685 Builder.CreateStore(Val, Tmp);
8686
8687 Address LdPtr = Tmp.withElementType(STy);
8688 Val = Builder.CreateLoad(LdPtr);
8689
8690 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8691 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8692 Value *StPtr = EmitScalarExpr(E->getArg(1));
8693 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8694 }
8695
8696 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8697 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8698 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8699 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8700
8701 QualType Ty = E->getArg(0)->getType();
8702 llvm::Type *StoreTy =
8703 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8704
8705 if (StoreVal->getType()->isPointerTy())
8706 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8707 else {
8708 llvm::Type *IntTy = llvm::IntegerType::get(
8710 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8711 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8712 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8713 }
8714
8716 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8717 : Intrinsic::arm_strex,
8718 StoreAddr->getType());
8719
8720 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8721 CI->addParamAttr(
8722 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8723 return CI;
8724 }
8725
8726 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8727 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8728 return Builder.CreateCall(F);
8729 }
8730
8731 // CRC32
8732 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8733 switch (BuiltinID) {
8734 case clang::ARM::BI__builtin_arm_crc32b:
8735 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8736 case clang::ARM::BI__builtin_arm_crc32cb:
8737 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8738 case clang::ARM::BI__builtin_arm_crc32h:
8739 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8740 case clang::ARM::BI__builtin_arm_crc32ch:
8741 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8742 case clang::ARM::BI__builtin_arm_crc32w:
8743 case clang::ARM::BI__builtin_arm_crc32d:
8744 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8745 case clang::ARM::BI__builtin_arm_crc32cw:
8746 case clang::ARM::BI__builtin_arm_crc32cd:
8747 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8748 }
8749
8750 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8751 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8752 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8753
8754 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8755 // intrinsics, hence we need different codegen for these cases.
8756 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8757 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8758 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8759 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8760 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8761 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8762
8763 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8764 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8765 return Builder.CreateCall(F, {Res, Arg1b});
8766 } else {
8767 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8768
8769 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8770 return Builder.CreateCall(F, {Arg0, Arg1});
8771 }
8772 }
8773
8774 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8775 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8776 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8777 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8778 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8779 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8780
8781 SpecialRegisterAccessKind AccessKind = Write;
8782 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8783 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8784 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8785 AccessKind = VolatileRead;
8786
8787 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8788 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8789
8790 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8791 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8792
8793 llvm::Type *ValueType;
8794 llvm::Type *RegisterType;
8795 if (IsPointerBuiltin) {
8796 ValueType = VoidPtrTy;
8797 RegisterType = Int32Ty;
8798 } else if (Is64Bit) {
8799 ValueType = RegisterType = Int64Ty;
8800 } else {
8801 ValueType = RegisterType = Int32Ty;
8802 }
8803
8804 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8805 AccessKind);
8806 }
8807
8808 if (BuiltinID == ARM::BI__builtin_sponentry) {
8809 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8810 return Builder.CreateCall(F);
8811 }
8812
8813 // Handle MSVC intrinsics before argument evaluation to prevent double
8814 // evaluation.
8815 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8816 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8817
8818 // Deal with MVE builtins
8819 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8820 return Result;
8821 // Handle CDE builtins
8822 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8823 return Result;
8824
8825 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8826 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8827 return P.first == BuiltinID;
8828 });
8829 if (It != end(NEONEquivalentIntrinsicMap))
8830 BuiltinID = It->second;
8831
8832 // Find out if any arguments are required to be integer constant
8833 // expressions.
8834 unsigned ICEArguments = 0;
8836 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8837 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8838
8839 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8840 return Builder.getInt32(addr.getAlignment().getQuantity());
8841 };
8842
8843 Address PtrOp0 = Address::invalid();
8844 Address PtrOp1 = Address::invalid();
8846 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8847 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8848 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8849 if (i == 0) {
8850 switch (BuiltinID) {
8851 case NEON::BI__builtin_neon_vld1_v:
8852 case NEON::BI__builtin_neon_vld1q_v:
8853 case NEON::BI__builtin_neon_vld1q_lane_v:
8854 case NEON::BI__builtin_neon_vld1_lane_v:
8855 case NEON::BI__builtin_neon_vld1_dup_v:
8856 case NEON::BI__builtin_neon_vld1q_dup_v:
8857 case NEON::BI__builtin_neon_vst1_v:
8858 case NEON::BI__builtin_neon_vst1q_v:
8859 case NEON::BI__builtin_neon_vst1q_lane_v:
8860 case NEON::BI__builtin_neon_vst1_lane_v:
8861 case NEON::BI__builtin_neon_vst2_v:
8862 case NEON::BI__builtin_neon_vst2q_v:
8863 case NEON::BI__builtin_neon_vst2_lane_v:
8864 case NEON::BI__builtin_neon_vst2q_lane_v:
8865 case NEON::BI__builtin_neon_vst3_v:
8866 case NEON::BI__builtin_neon_vst3q_v:
8867 case NEON::BI__builtin_neon_vst3_lane_v:
8868 case NEON::BI__builtin_neon_vst3q_lane_v:
8869 case NEON::BI__builtin_neon_vst4_v:
8870 case NEON::BI__builtin_neon_vst4q_v:
8871 case NEON::BI__builtin_neon_vst4_lane_v:
8872 case NEON::BI__builtin_neon_vst4q_lane_v:
8873 // Get the alignment for the argument in addition to the value;
8874 // we'll use it later.
8875 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8876 Ops.push_back(PtrOp0.emitRawPointer(*this));
8877 continue;
8878 }
8879 }
8880 if (i == 1) {
8881 switch (BuiltinID) {
8882 case NEON::BI__builtin_neon_vld2_v:
8883 case NEON::BI__builtin_neon_vld2q_v:
8884 case NEON::BI__builtin_neon_vld3_v:
8885 case NEON::BI__builtin_neon_vld3q_v:
8886 case NEON::BI__builtin_neon_vld4_v:
8887 case NEON::BI__builtin_neon_vld4q_v:
8888 case NEON::BI__builtin_neon_vld2_lane_v:
8889 case NEON::BI__builtin_neon_vld2q_lane_v:
8890 case NEON::BI__builtin_neon_vld3_lane_v:
8891 case NEON::BI__builtin_neon_vld3q_lane_v:
8892 case NEON::BI__builtin_neon_vld4_lane_v:
8893 case NEON::BI__builtin_neon_vld4q_lane_v:
8894 case NEON::BI__builtin_neon_vld2_dup_v:
8895 case NEON::BI__builtin_neon_vld2q_dup_v:
8896 case NEON::BI__builtin_neon_vld3_dup_v:
8897 case NEON::BI__builtin_neon_vld3q_dup_v:
8898 case NEON::BI__builtin_neon_vld4_dup_v:
8899 case NEON::BI__builtin_neon_vld4q_dup_v:
8900 // Get the alignment for the argument in addition to the value;
8901 // we'll use it later.
8902 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
8903 Ops.push_back(PtrOp1.emitRawPointer(*this));
8904 continue;
8905 }
8906 }
8907
8908 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
8909 }
8910
8911 switch (BuiltinID) {
8912 default: break;
8913
8914 case NEON::BI__builtin_neon_vget_lane_i8:
8915 case NEON::BI__builtin_neon_vget_lane_i16:
8916 case NEON::BI__builtin_neon_vget_lane_i32:
8917 case NEON::BI__builtin_neon_vget_lane_i64:
8918 case NEON::BI__builtin_neon_vget_lane_bf16:
8919 case NEON::BI__builtin_neon_vget_lane_f32:
8920 case NEON::BI__builtin_neon_vgetq_lane_i8:
8921 case NEON::BI__builtin_neon_vgetq_lane_i16:
8922 case NEON::BI__builtin_neon_vgetq_lane_i32:
8923 case NEON::BI__builtin_neon_vgetq_lane_i64:
8924 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8925 case NEON::BI__builtin_neon_vgetq_lane_f32:
8926 case NEON::BI__builtin_neon_vduph_lane_bf16:
8927 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8928 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
8929
8930 case NEON::BI__builtin_neon_vrndns_f32: {
8931 Value *Arg = EmitScalarExpr(E->getArg(0));
8932 llvm::Type *Tys[] = {Arg->getType()};
8933 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
8934 return Builder.CreateCall(F, {Arg}, "vrndn"); }
8935
8936 case NEON::BI__builtin_neon_vset_lane_i8:
8937 case NEON::BI__builtin_neon_vset_lane_i16:
8938 case NEON::BI__builtin_neon_vset_lane_i32:
8939 case NEON::BI__builtin_neon_vset_lane_i64:
8940 case NEON::BI__builtin_neon_vset_lane_bf16:
8941 case NEON::BI__builtin_neon_vset_lane_f32:
8942 case NEON::BI__builtin_neon_vsetq_lane_i8:
8943 case NEON::BI__builtin_neon_vsetq_lane_i16:
8944 case NEON::BI__builtin_neon_vsetq_lane_i32:
8945 case NEON::BI__builtin_neon_vsetq_lane_i64:
8946 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8947 case NEON::BI__builtin_neon_vsetq_lane_f32:
8948 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
8949
8950 case NEON::BI__builtin_neon_vsha1h_u32:
8951 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
8952 "vsha1h");
8953 case NEON::BI__builtin_neon_vsha1cq_u32:
8954 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
8955 "vsha1h");
8956 case NEON::BI__builtin_neon_vsha1pq_u32:
8957 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
8958 "vsha1h");
8959 case NEON::BI__builtin_neon_vsha1mq_u32:
8960 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
8961 "vsha1h");
8962
8963 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
8964 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
8965 "vcvtbfp2bf");
8966 }
8967
8968 // The ARM _MoveToCoprocessor builtins put the input register value as
8969 // the first argument, but the LLVM intrinsic expects it as the third one.
8970 case clang::ARM::BI_MoveToCoprocessor:
8971 case clang::ARM::BI_MoveToCoprocessor2: {
8972 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
8973 ? Intrinsic::arm_mcr
8974 : Intrinsic::arm_mcr2);
8975 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
8976 Ops[3], Ops[4], Ops[5]});
8977 }
8978 }
8979
8980 // Get the last argument, which specifies the vector type.
8981 assert(HasExtraArg);
8982 const Expr *Arg = E->getArg(E->getNumArgs()-1);
8983 std::optional<llvm::APSInt> Result =
8985 if (!Result)
8986 return nullptr;
8987
8988 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
8989 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
8990 // Determine the overloaded type of this builtin.
8991 llvm::Type *Ty;
8992 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
8993 Ty = FloatTy;
8994 else
8995 Ty = DoubleTy;
8996
8997 // Determine whether this is an unsigned conversion or not.
8998 bool usgn = Result->getZExtValue() == 1;
8999 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9000
9001 // Call the appropriate intrinsic.
9002 Function *F = CGM.getIntrinsic(Int, Ty);
9003 return Builder.CreateCall(F, Ops, "vcvtr");
9004 }
9005
9006 // Determine the type of this overloaded NEON intrinsic.
9007 NeonTypeFlags Type = Result->getZExtValue();
9008 bool usgn = Type.isUnsigned();
9009 bool rightShift = false;
9010
9011 llvm::FixedVectorType *VTy =
9012 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9013 getTarget().hasBFloat16Type());
9014 llvm::Type *Ty = VTy;
9015 if (!Ty)
9016 return nullptr;
9017
9018 // Many NEON builtins have identical semantics and uses in ARM and
9019 // AArch64. Emit these in a single function.
9020 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9021 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9022 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9023 if (Builtin)
9025 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9026 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9027
9028 unsigned Int;
9029 switch (BuiltinID) {
9030 default: return nullptr;
9031 case NEON::BI__builtin_neon_vld1q_lane_v:
9032 // Handle 64-bit integer elements as a special case. Use shuffles of
9033 // one-element vectors to avoid poor code for i64 in the backend.
9034 if (VTy->getElementType()->isIntegerTy(64)) {
9035 // Extract the other lane.
9036 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9037 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9038 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9039 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9040 // Load the value as a one-element vector.
9041 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9042 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9043 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9044 Value *Align = getAlignmentValue32(PtrOp0);
9045 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9046 // Combine them.
9047 int Indices[] = {1 - Lane, Lane};
9048 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9049 }
9050 [[fallthrough]];
9051 case NEON::BI__builtin_neon_vld1_lane_v: {
9052 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9053 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9054 Value *Ld = Builder.CreateLoad(PtrOp0);
9055 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9056 }
9057 case NEON::BI__builtin_neon_vqrshrn_n_v:
9058 Int =
9059 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9060 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9061 1, true);
9062 case NEON::BI__builtin_neon_vqrshrun_n_v:
9063 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9064 Ops, "vqrshrun_n", 1, true);
9065 case NEON::BI__builtin_neon_vqshrn_n_v:
9066 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9067 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9068 1, true);
9069 case NEON::BI__builtin_neon_vqshrun_n_v:
9070 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9071 Ops, "vqshrun_n", 1, true);
9072 case NEON::BI__builtin_neon_vrecpe_v:
9073 case NEON::BI__builtin_neon_vrecpeq_v:
9074 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9075 Ops, "vrecpe");
9076 case NEON::BI__builtin_neon_vrshrn_n_v:
9077 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9078 Ops, "vrshrn_n", 1, true);
9079 case NEON::BI__builtin_neon_vrsra_n_v:
9080 case NEON::BI__builtin_neon_vrsraq_n_v:
9081 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9082 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9083 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9084 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9085 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9086 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9087 case NEON::BI__builtin_neon_vsri_n_v:
9088 case NEON::BI__builtin_neon_vsriq_n_v:
9089 rightShift = true;
9090 [[fallthrough]];
9091 case NEON::BI__builtin_neon_vsli_n_v:
9092 case NEON::BI__builtin_neon_vsliq_n_v:
9093 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9094 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9095 Ops, "vsli_n");
9096 case NEON::BI__builtin_neon_vsra_n_v:
9097 case NEON::BI__builtin_neon_vsraq_n_v:
9098 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9099 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9100 return Builder.CreateAdd(Ops[0], Ops[1]);
9101 case NEON::BI__builtin_neon_vst1q_lane_v:
9102 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9103 // a one-element vector and avoid poor code for i64 in the backend.
9104 if (VTy->getElementType()->isIntegerTy(64)) {
9105 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9106 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9107 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9108 Ops[2] = getAlignmentValue32(PtrOp0);
9109 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9110 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9111 Tys), Ops);
9112 }
9113 [[fallthrough]];
9114 case NEON::BI__builtin_neon_vst1_lane_v: {
9115 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9116 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9117 return Builder.CreateStore(Ops[1],
9118 PtrOp0.withElementType(Ops[1]->getType()));
9119 }
9120 case NEON::BI__builtin_neon_vtbl1_v:
9121 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9122 Ops, "vtbl1");
9123 case NEON::BI__builtin_neon_vtbl2_v:
9124 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9125 Ops, "vtbl2");
9126 case NEON::BI__builtin_neon_vtbl3_v:
9127 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9128 Ops, "vtbl3");
9129 case NEON::BI__builtin_neon_vtbl4_v:
9130 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9131 Ops, "vtbl4");
9132 case NEON::BI__builtin_neon_vtbx1_v:
9133 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9134 Ops, "vtbx1");
9135 case NEON::BI__builtin_neon_vtbx2_v:
9136 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9137 Ops, "vtbx2");
9138 case NEON::BI__builtin_neon_vtbx3_v:
9139 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9140 Ops, "vtbx3");
9141 case NEON::BI__builtin_neon_vtbx4_v:
9142 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9143 Ops, "vtbx4");
9144 }
9145}
9146
9147template<typename Integer>
9149 return E->getIntegerConstantExpr(Context)->getExtValue();
9150}
9151
9152static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9153 llvm::Type *T, bool Unsigned) {
9154 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9155 // which finds it convenient to specify signed/unsigned as a boolean flag.
9156 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9157}
9158
9159static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9160 uint32_t Shift, bool Unsigned) {
9161 // MVE helper function for integer shift right. This must handle signed vs
9162 // unsigned, and also deal specially with the case where the shift count is
9163 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9164 // undefined behavior, but in MVE it's legal, so we must convert it to code
9165 // that is not undefined in IR.
9166 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9167 ->getElementType()
9168 ->getPrimitiveSizeInBits();
9169 if (Shift == LaneBits) {
9170 // An unsigned shift of the full lane size always generates zero, so we can
9171 // simply emit a zero vector. A signed shift of the full lane size does the
9172 // same thing as shifting by one bit fewer.
9173 if (Unsigned)
9174 return llvm::Constant::getNullValue(V->getType());
9175 else
9176 --Shift;
9177 }
9178 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9179}
9180
9181static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9182 // MVE-specific helper function for a vector splat, which infers the element
9183 // count of the output vector by knowing that MVE vectors are all 128 bits
9184 // wide.
9185 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9186 return Builder.CreateVectorSplat(Elements, V);
9187}
9188
9189static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9190 CodeGenFunction *CGF,
9191 llvm::Value *V,
9192 llvm::Type *DestType) {
9193 // Convert one MVE vector type into another by reinterpreting its in-register
9194 // format.
9195 //
9196 // Little-endian, this is identical to a bitcast (which reinterprets the
9197 // memory format). But big-endian, they're not necessarily the same, because
9198 // the register and memory formats map to each other differently depending on
9199 // the lane size.
9200 //
9201 // We generate a bitcast whenever we can (if we're little-endian, or if the
9202 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9203 // that performs the different kind of reinterpretation.
9204 if (CGF->getTarget().isBigEndian() &&
9205 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9206 return Builder.CreateCall(
9207 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9208 {DestType, V->getType()}),
9209 V);
9210 } else {
9211 return Builder.CreateBitCast(V, DestType);
9212 }
9213}
9214
9215static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9216 // Make a shufflevector that extracts every other element of a vector (evens
9217 // or odds, as desired).
9218 SmallVector<int, 16> Indices;
9219 unsigned InputElements =
9220 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9221 for (unsigned i = 0; i < InputElements; i += 2)
9222 Indices.push_back(i + Odd);
9223 return Builder.CreateShuffleVector(V, Indices);
9224}
9225
9226static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9227 llvm::Value *V1) {
9228 // Make a shufflevector that interleaves two vectors element by element.
9229 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9230 SmallVector<int, 16> Indices;
9231 unsigned InputElements =
9232 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9233 for (unsigned i = 0; i < InputElements; i++) {
9234 Indices.push_back(i);
9235 Indices.push_back(i + InputElements);
9236 }
9237 return Builder.CreateShuffleVector(V0, V1, Indices);
9238}
9239
9240template<unsigned HighBit, unsigned OtherBits>
9241static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9242 // MVE-specific helper function to make a vector splat of a constant such as
9243 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9244 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9245 unsigned LaneBits = T->getPrimitiveSizeInBits();
9246 uint32_t Value = HighBit << (LaneBits - 1);
9247 if (OtherBits)
9248 Value |= (1UL << (LaneBits - 1)) - 1;
9249 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9250 return ARMMVEVectorSplat(Builder, Lane);
9251}
9252
9253static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9254 llvm::Value *V,
9255 unsigned ReverseWidth) {
9256 // MVE-specific helper function which reverses the elements of a
9257 // vector within every (ReverseWidth)-bit collection of lanes.
9258 SmallVector<int, 16> Indices;
9259 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9260 unsigned Elements = 128 / LaneSize;
9261 unsigned Mask = ReverseWidth / LaneSize - 1;
9262 for (unsigned i = 0; i < Elements; i++)
9263 Indices.push_back(i ^ Mask);
9264 return Builder.CreateShuffleVector(V, Indices);
9265}
9266
9268 const CallExpr *E,
9269 ReturnValueSlot ReturnValue,
9270 llvm::Triple::ArchType Arch) {
9271 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9272 Intrinsic::ID IRIntr;
9273 unsigned NumVectors;
9274
9275 // Code autogenerated by Tablegen will handle all the simple builtins.
9276 switch (BuiltinID) {
9277 #include "clang/Basic/arm_mve_builtin_cg.inc"
9278
9279 // If we didn't match an MVE builtin id at all, go back to the
9280 // main EmitARMBuiltinExpr.
9281 default:
9282 return nullptr;
9283 }
9284
9285 // Anything that breaks from that switch is an MVE builtin that
9286 // needs handwritten code to generate.
9287
9288 switch (CustomCodeGenType) {
9289
9290 case CustomCodeGen::VLD24: {
9293
9294 auto MvecCType = E->getType();
9295 auto MvecLType = ConvertType(MvecCType);
9296 assert(MvecLType->isStructTy() &&
9297 "Return type for vld[24]q should be a struct");
9298 assert(MvecLType->getStructNumElements() == 1 &&
9299 "Return-type struct for vld[24]q should have one element");
9300 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9301 assert(MvecLTypeInner->isArrayTy() &&
9302 "Return-type struct for vld[24]q should contain an array");
9303 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9304 "Array member of return-type struct vld[24]q has wrong length");
9305 auto VecLType = MvecLTypeInner->getArrayElementType();
9306
9307 Tys.push_back(VecLType);
9308
9309 auto Addr = E->getArg(0);
9310 Ops.push_back(EmitScalarExpr(Addr));
9311 Tys.push_back(ConvertType(Addr->getType()));
9312
9313 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9314 Value *LoadResult = Builder.CreateCall(F, Ops);
9315 Value *MvecOut = PoisonValue::get(MvecLType);
9316 for (unsigned i = 0; i < NumVectors; ++i) {
9317 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9318 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9319 }
9320
9321 if (ReturnValue.isNull())
9322 return MvecOut;
9323 else
9324 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9325 }
9326
9327 case CustomCodeGen::VST24: {
9330
9331 auto Addr = E->getArg(0);
9332 Ops.push_back(EmitScalarExpr(Addr));
9333 Tys.push_back(ConvertType(Addr->getType()));
9334
9335 auto MvecCType = E->getArg(1)->getType();
9336 auto MvecLType = ConvertType(MvecCType);
9337 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9338 assert(MvecLType->getStructNumElements() == 1 &&
9339 "Data-type struct for vst2q should have one element");
9340 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9341 assert(MvecLTypeInner->isArrayTy() &&
9342 "Data-type struct for vst2q should contain an array");
9343 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9344 "Array member of return-type struct vld[24]q has wrong length");
9345 auto VecLType = MvecLTypeInner->getArrayElementType();
9346
9347 Tys.push_back(VecLType);
9348
9349 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9350 EmitAggExpr(E->getArg(1), MvecSlot);
9351 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9352 for (unsigned i = 0; i < NumVectors; i++)
9353 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9354
9355 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9356 Value *ToReturn = nullptr;
9357 for (unsigned i = 0; i < NumVectors; i++) {
9358 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9359 ToReturn = Builder.CreateCall(F, Ops);
9360 Ops.pop_back();
9361 }
9362 return ToReturn;
9363 }
9364 }
9365 llvm_unreachable("unknown custom codegen type.");
9366}
9367
9369 const CallExpr *E,
9370 ReturnValueSlot ReturnValue,
9371 llvm::Triple::ArchType Arch) {
9372 switch (BuiltinID) {
9373 default:
9374 return nullptr;
9375#include "clang/Basic/arm_cde_builtin_cg.inc"
9376 }
9377}
9378
9379static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9380 const CallExpr *E,
9382 llvm::Triple::ArchType Arch) {
9383 unsigned int Int = 0;
9384 const char *s = nullptr;
9385
9386 switch (BuiltinID) {
9387 default:
9388 return nullptr;
9389 case NEON::BI__builtin_neon_vtbl1_v:
9390 case NEON::BI__builtin_neon_vqtbl1_v:
9391 case NEON::BI__builtin_neon_vqtbl1q_v:
9392 case NEON::BI__builtin_neon_vtbl2_v:
9393 case NEON::BI__builtin_neon_vqtbl2_v:
9394 case NEON::BI__builtin_neon_vqtbl2q_v:
9395 case NEON::BI__builtin_neon_vtbl3_v:
9396 case NEON::BI__builtin_neon_vqtbl3_v:
9397 case NEON::BI__builtin_neon_vqtbl3q_v:
9398 case NEON::BI__builtin_neon_vtbl4_v:
9399 case NEON::BI__builtin_neon_vqtbl4_v:
9400 case NEON::BI__builtin_neon_vqtbl4q_v:
9401 break;
9402 case NEON::BI__builtin_neon_vtbx1_v:
9403 case NEON::BI__builtin_neon_vqtbx1_v:
9404 case NEON::BI__builtin_neon_vqtbx1q_v:
9405 case NEON::BI__builtin_neon_vtbx2_v:
9406 case NEON::BI__builtin_neon_vqtbx2_v:
9407 case NEON::BI__builtin_neon_vqtbx2q_v:
9408 case NEON::BI__builtin_neon_vtbx3_v:
9409 case NEON::BI__builtin_neon_vqtbx3_v:
9410 case NEON::BI__builtin_neon_vqtbx3q_v:
9411 case NEON::BI__builtin_neon_vtbx4_v:
9412 case NEON::BI__builtin_neon_vqtbx4_v:
9413 case NEON::BI__builtin_neon_vqtbx4q_v:
9414 break;
9415 }
9416
9417 assert(E->getNumArgs() >= 3);
9418
9419 // Get the last argument, which specifies the vector type.
9420 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9421 std::optional<llvm::APSInt> Result =
9423 if (!Result)
9424 return nullptr;
9425
9426 // Determine the type of this overloaded NEON intrinsic.
9427 NeonTypeFlags Type = Result->getZExtValue();
9428 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9429 if (!Ty)
9430 return nullptr;
9431
9432 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9433
9434 // AArch64 scalar builtins are not overloaded, they do not have an extra
9435 // argument that specifies the vector type, need to handle each case.
9436 switch (BuiltinID) {
9437 case NEON::BI__builtin_neon_vtbl1_v: {
9438 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9439 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9440 }
9441 case NEON::BI__builtin_neon_vtbl2_v: {
9442 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9443 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9444 }
9445 case NEON::BI__builtin_neon_vtbl3_v: {
9446 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9447 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9448 }
9449 case NEON::BI__builtin_neon_vtbl4_v: {
9450 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9451 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9452 }
9453 case NEON::BI__builtin_neon_vtbx1_v: {
9454 Value *TblRes =
9455 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9456 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9457
9458 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9459 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9460 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9461
9462 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9463 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9464 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9465 }
9466 case NEON::BI__builtin_neon_vtbx2_v: {
9467 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9468 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9469 }
9470 case NEON::BI__builtin_neon_vtbx3_v: {
9471 Value *TblRes =
9472 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9473 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9474
9475 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9476 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9477 TwentyFourV);
9478 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9479
9480 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9481 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9482 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9483 }
9484 case NEON::BI__builtin_neon_vtbx4_v: {
9485 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9486 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9487 }
9488 case NEON::BI__builtin_neon_vqtbl1_v:
9489 case NEON::BI__builtin_neon_vqtbl1q_v:
9490 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9491 case NEON::BI__builtin_neon_vqtbl2_v:
9492 case NEON::BI__builtin_neon_vqtbl2q_v: {
9493 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9494 case NEON::BI__builtin_neon_vqtbl3_v:
9495 case NEON::BI__builtin_neon_vqtbl3q_v:
9496 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9497 case NEON::BI__builtin_neon_vqtbl4_v:
9498 case NEON::BI__builtin_neon_vqtbl4q_v:
9499 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9500 case NEON::BI__builtin_neon_vqtbx1_v:
9501 case NEON::BI__builtin_neon_vqtbx1q_v:
9502 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9503 case NEON::BI__builtin_neon_vqtbx2_v:
9504 case NEON::BI__builtin_neon_vqtbx2q_v:
9505 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9506 case NEON::BI__builtin_neon_vqtbx3_v:
9507 case NEON::BI__builtin_neon_vqtbx3q_v:
9508 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9509 case NEON::BI__builtin_neon_vqtbx4_v:
9510 case NEON::BI__builtin_neon_vqtbx4q_v:
9511 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9512 }
9513 }
9514
9515 if (!Int)
9516 return nullptr;
9517
9518 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9519 return CGF.EmitNeonCall(F, Ops, s);
9520}
9521
9523 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9524 Op = Builder.CreateBitCast(Op, Int16Ty);
9525 Value *V = PoisonValue::get(VTy);
9526 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9527 Op = Builder.CreateInsertElement(V, Op, CI);
9528 return Op;
9529}
9530
9531/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9532/// access builtin. Only required if it can't be inferred from the base pointer
9533/// operand.
9534llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9535 switch (TypeFlags.getMemEltType()) {
9536 case SVETypeFlags::MemEltTyDefault:
9537 return getEltType(TypeFlags);
9538 case SVETypeFlags::MemEltTyInt8:
9539 return Builder.getInt8Ty();
9540 case SVETypeFlags::MemEltTyInt16:
9541 return Builder.getInt16Ty();
9542 case SVETypeFlags::MemEltTyInt32:
9543 return Builder.getInt32Ty();
9544 case SVETypeFlags::MemEltTyInt64:
9545 return Builder.getInt64Ty();
9546 }
9547 llvm_unreachable("Unknown MemEltType");
9548}
9549
9550llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9551 switch (TypeFlags.getEltType()) {
9552 default:
9553 llvm_unreachable("Invalid SVETypeFlag!");
9554
9555 case SVETypeFlags::EltTyInt8:
9556 return Builder.getInt8Ty();
9557 case SVETypeFlags::EltTyInt16:
9558 return Builder.getInt16Ty();
9559 case SVETypeFlags::EltTyInt32:
9560 return Builder.getInt32Ty();
9561 case SVETypeFlags::EltTyInt64:
9562 return Builder.getInt64Ty();
9563 case SVETypeFlags::EltTyInt128:
9564 return Builder.getInt128Ty();
9565
9566 case SVETypeFlags::EltTyFloat16:
9567 return Builder.getHalfTy();
9568 case SVETypeFlags::EltTyFloat32:
9569 return Builder.getFloatTy();
9570 case SVETypeFlags::EltTyFloat64:
9571 return Builder.getDoubleTy();
9572
9573 case SVETypeFlags::EltTyBFloat16:
9574 return Builder.getBFloatTy();
9575
9576 case SVETypeFlags::EltTyBool8:
9577 case SVETypeFlags::EltTyBool16:
9578 case SVETypeFlags::EltTyBool32:
9579 case SVETypeFlags::EltTyBool64:
9580 return Builder.getInt1Ty();
9581 }
9582}
9583
9584// Return the llvm predicate vector type corresponding to the specified element
9585// TypeFlags.
9586llvm::ScalableVectorType *
9588 switch (TypeFlags.getEltType()) {
9589 default: llvm_unreachable("Unhandled SVETypeFlag!");
9590
9591 case SVETypeFlags::EltTyInt8:
9592 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9593 case SVETypeFlags::EltTyInt16:
9594 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9595 case SVETypeFlags::EltTyInt32:
9596 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9597 case SVETypeFlags::EltTyInt64:
9598 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9599
9600 case SVETypeFlags::EltTyBFloat16:
9601 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9602 case SVETypeFlags::EltTyFloat16:
9603 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9604 case SVETypeFlags::EltTyFloat32:
9605 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9606 case SVETypeFlags::EltTyFloat64:
9607 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9608
9609 case SVETypeFlags::EltTyBool8:
9610 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9611 case SVETypeFlags::EltTyBool16:
9612 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9613 case SVETypeFlags::EltTyBool32:
9614 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9615 case SVETypeFlags::EltTyBool64:
9616 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9617 }
9618}
9619
9620// Return the llvm vector type corresponding to the specified element TypeFlags.
9621llvm::ScalableVectorType *
9622CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9623 switch (TypeFlags.getEltType()) {
9624 default:
9625 llvm_unreachable("Invalid SVETypeFlag!");
9626
9627 case SVETypeFlags::EltTyInt8:
9628 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9629 case SVETypeFlags::EltTyInt16:
9630 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9631 case SVETypeFlags::EltTyInt32:
9632 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9633 case SVETypeFlags::EltTyInt64:
9634 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9635
9636 case SVETypeFlags::EltTyFloat16:
9637 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9638 case SVETypeFlags::EltTyBFloat16:
9639 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9640 case SVETypeFlags::EltTyFloat32:
9641 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9642 case SVETypeFlags::EltTyFloat64:
9643 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9644
9645 case SVETypeFlags::EltTyBool8:
9646 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9647 case SVETypeFlags::EltTyBool16:
9648 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9649 case SVETypeFlags::EltTyBool32:
9650 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9651 case SVETypeFlags::EltTyBool64:
9652 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9653 }
9654}
9655
9656llvm::Value *
9658 Function *Ptrue =
9659 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9660 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9661}
9662
9663constexpr unsigned SVEBitsPerBlock = 128;
9664
9665static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9666 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9667 return llvm::ScalableVectorType::get(EltTy, NumElts);
9668}
9669
9670// Reinterpret the input predicate so that it can be used to correctly isolate
9671// the elements of the specified datatype.
9673 llvm::ScalableVectorType *VTy) {
9674
9675 if (isa<TargetExtType>(Pred->getType()) &&
9676 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9677 return Pred;
9678
9679 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9680 if (Pred->getType() == RTy)
9681 return Pred;
9682
9683 unsigned IntID;
9684 llvm::Type *IntrinsicTy;
9685 switch (VTy->getMinNumElements()) {
9686 default:
9687 llvm_unreachable("unsupported element count!");
9688 case 1:
9689 case 2:
9690 case 4:
9691 case 8:
9692 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9693 IntrinsicTy = RTy;
9694 break;
9695 case 16:
9696 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9697 IntrinsicTy = Pred->getType();
9698 break;
9699 }
9700
9701 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9702 Value *C = Builder.CreateCall(F, Pred);
9703 assert(C->getType() == RTy && "Unexpected return type!");
9704 return C;
9705}
9706
9709 unsigned IntID) {
9710 auto *ResultTy = getSVEType(TypeFlags);
9711 auto *OverloadedTy =
9712 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9713
9714 Function *F = nullptr;
9715 if (Ops[1]->getType()->isVectorTy())
9716 // This is the "vector base, scalar offset" case. In order to uniquely
9717 // map this built-in to an LLVM IR intrinsic, we need both the return type
9718 // and the type of the vector base.
9719 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9720 else
9721 // This is the "scalar base, vector offset case". The type of the offset
9722 // is encoded in the name of the intrinsic. We only need to specify the
9723 // return type in order to uniquely map this built-in to an LLVM IR
9724 // intrinsic.
9725 F = CGM.getIntrinsic(IntID, OverloadedTy);
9726
9727 // At the ACLE level there's only one predicate type, svbool_t, which is
9728 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9729 // actual type being loaded. For example, when loading doubles (i64) the
9730 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9731 // the predicate and the data being loaded must match. Cast to the type
9732 // expected by the intrinsic. The intrinsic itself should be defined in
9733 // a way than enforces relations between parameter types.
9734 Ops[0] = EmitSVEPredicateCast(
9735 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9736
9737 // Pass 0 when the offset is missing. This can only be applied when using
9738 // the "vector base" addressing mode for which ACLE allows no offset. The
9739 // corresponding LLVM IR always requires an offset.
9740 if (Ops.size() == 2) {
9741 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9742 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9743 }
9744
9745 // For "vector base, scalar index" scale the index so that it becomes a
9746 // scalar offset.
9747 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9748 unsigned BytesPerElt =
9749 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9750 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9751 }
9752
9753 Value *Call = Builder.CreateCall(F, Ops);
9754
9755 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9756 // other cases it's folded into a nop.
9757 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9758 : Builder.CreateSExt(Call, ResultTy);
9759}
9760
9763 unsigned IntID) {
9764 auto *SrcDataTy = getSVEType(TypeFlags);
9765 auto *OverloadedTy =
9766 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9767
9768 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9769 // it's the first argument. Move it accordingly.
9770 Ops.insert(Ops.begin(), Ops.pop_back_val());
9771
9772 Function *F = nullptr;
9773 if (Ops[2]->getType()->isVectorTy())
9774 // This is the "vector base, scalar offset" case. In order to uniquely
9775 // map this built-in to an LLVM IR intrinsic, we need both the return type
9776 // and the type of the vector base.
9777 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9778 else
9779 // This is the "scalar base, vector offset case". The type of the offset
9780 // is encoded in the name of the intrinsic. We only need to specify the
9781 // return type in order to uniquely map this built-in to an LLVM IR
9782 // intrinsic.
9783 F = CGM.getIntrinsic(IntID, OverloadedTy);
9784
9785 // Pass 0 when the offset is missing. This can only be applied when using
9786 // the "vector base" addressing mode for which ACLE allows no offset. The
9787 // corresponding LLVM IR always requires an offset.
9788 if (Ops.size() == 3) {
9789 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9790 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9791 }
9792
9793 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9794 // folded into a nop.
9795 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9796
9797 // At the ACLE level there's only one predicate type, svbool_t, which is
9798 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9799 // actual type being stored. For example, when storing doubles (i64) the
9800 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9801 // the predicate and the data being stored must match. Cast to the type
9802 // expected by the intrinsic. The intrinsic itself should be defined in
9803 // a way that enforces relations between parameter types.
9804 Ops[1] = EmitSVEPredicateCast(
9805 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9806
9807 // For "vector base, scalar index" scale the index so that it becomes a
9808 // scalar offset.
9809 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9810 unsigned BytesPerElt =
9811 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9812 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9813 }
9814
9815 return Builder.CreateCall(F, Ops);
9816}
9817
9820 unsigned IntID) {
9821 // The gather prefetches are overloaded on the vector input - this can either
9822 // be the vector of base addresses or vector of offsets.
9823 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9824 if (!OverloadedTy)
9825 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9826
9827 // Cast the predicate from svbool_t to the right number of elements.
9828 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9829
9830 // vector + imm addressing modes
9831 if (Ops[1]->getType()->isVectorTy()) {
9832 if (Ops.size() == 3) {
9833 // Pass 0 for 'vector+imm' when the index is omitted.
9834 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9835
9836 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9837 std::swap(Ops[2], Ops[3]);
9838 } else {
9839 // Index needs to be passed as scaled offset.
9840 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9841 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9842 if (BytesPerElt > 1)
9843 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9844 }
9845 }
9846
9847 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9848 return Builder.CreateCall(F, Ops);
9849}
9850
9853 unsigned IntID) {
9854 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9855
9856 unsigned N;
9857 switch (IntID) {
9858 case Intrinsic::aarch64_sve_ld2_sret:
9859 case Intrinsic::aarch64_sve_ld1_pn_x2:
9860 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9861 case Intrinsic::aarch64_sve_ld2q_sret:
9862 N = 2;
9863 break;
9864 case Intrinsic::aarch64_sve_ld3_sret:
9865 case Intrinsic::aarch64_sve_ld3q_sret:
9866 N = 3;
9867 break;
9868 case Intrinsic::aarch64_sve_ld4_sret:
9869 case Intrinsic::aarch64_sve_ld1_pn_x4:
9870 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9871 case Intrinsic::aarch64_sve_ld4q_sret:
9872 N = 4;
9873 break;
9874 default:
9875 llvm_unreachable("unknown intrinsic!");
9876 }
9877 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
9878 VTy->getElementCount() * N);
9879
9880 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9881 Value *BasePtr = Ops[1];
9882
9883 // Does the load have an offset?
9884 if (Ops.size() > 2)
9885 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9886
9887 Function *F = CGM.getIntrinsic(IntID, {VTy});
9888 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
9889 unsigned MinElts = VTy->getMinNumElements();
9890 Value *Ret = llvm::PoisonValue::get(RetTy);
9891 for (unsigned I = 0; I < N; I++) {
9892 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
9893 Value *SRet = Builder.CreateExtractValue(Call, I);
9894 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
9895 }
9896 return Ret;
9897}
9898
9901 unsigned IntID) {
9902 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9903
9904 unsigned N;
9905 switch (IntID) {
9906 case Intrinsic::aarch64_sve_st2:
9907 case Intrinsic::aarch64_sve_st1_pn_x2:
9908 case Intrinsic::aarch64_sve_stnt1_pn_x2:
9909 case Intrinsic::aarch64_sve_st2q:
9910 N = 2;
9911 break;
9912 case Intrinsic::aarch64_sve_st3:
9913 case Intrinsic::aarch64_sve_st3q:
9914 N = 3;
9915 break;
9916 case Intrinsic::aarch64_sve_st4:
9917 case Intrinsic::aarch64_sve_st1_pn_x4:
9918 case Intrinsic::aarch64_sve_stnt1_pn_x4:
9919 case Intrinsic::aarch64_sve_st4q:
9920 N = 4;
9921 break;
9922 default:
9923 llvm_unreachable("unknown intrinsic!");
9924 }
9925
9926 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
9927 Value *BasePtr = Ops[1];
9928
9929 // Does the store have an offset?
9930 if (Ops.size() > (2 + N))
9931 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
9932
9933 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
9934 // need to break up the tuple vector.
9936 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
9937 Operands.push_back(Ops[I]);
9938 Operands.append({Predicate, BasePtr});
9939 Function *F = CGM.getIntrinsic(IntID, { VTy });
9940
9941 return Builder.CreateCall(F, Operands);
9942}
9943
9944// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
9945// svpmullt_pair intrinsics, with the exception that their results are bitcast
9946// to a wider type.
9949 unsigned BuiltinID) {
9950 // Splat scalar operand to vector (intrinsics with _n infix)
9951 if (TypeFlags.hasSplatOperand()) {
9952 unsigned OpNo = TypeFlags.getSplatOperand();
9953 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
9954 }
9955
9956 // The pair-wise function has a narrower overloaded type.
9957 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
9958 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
9959
9960 // Now bitcast to the wider result type.
9961 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
9962 return EmitSVEReinterpret(Call, Ty);
9963}
9964
9966 ArrayRef<Value *> Ops, unsigned BuiltinID) {
9967 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
9968 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
9969 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
9970}
9971
9974 unsigned BuiltinID) {
9975 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9976 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
9977 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
9978
9979 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
9980 Value *BasePtr = Ops[1];
9981
9982 // Implement the index operand if not omitted.
9983 if (Ops.size() > 3)
9984 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
9985
9986 Value *PrfOp = Ops.back();
9987
9988 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
9989 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
9990}
9991
9993 llvm::Type *ReturnTy,
9995 unsigned IntrinsicID,
9996 bool IsZExtReturn) {
9997 QualType LangPTy = E->getArg(1)->getType();
9998 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
9999 LangPTy->castAs<PointerType>()->getPointeeType());
10000
10001 // The vector type that is returned may be different from the
10002 // eventual type loaded from memory.
10003 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10004 llvm::ScalableVectorType *MemoryTy = nullptr;
10005 llvm::ScalableVectorType *PredTy = nullptr;
10006 bool IsQuadLoad = false;
10007 switch (IntrinsicID) {
10008 case Intrinsic::aarch64_sve_ld1uwq:
10009 case Intrinsic::aarch64_sve_ld1udq:
10010 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10011 PredTy = llvm::ScalableVectorType::get(
10012 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10013 IsQuadLoad = true;
10014 break;
10015 default:
10016 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10017 PredTy = MemoryTy;
10018 break;
10019 }
10020
10021 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10022 Value *BasePtr = Ops[1];
10023
10024 // Does the load have an offset?
10025 if (Ops.size() > 2)
10026 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10027
10028 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10029 auto *Load =
10030 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10031 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10032 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10033
10034 if (IsQuadLoad)
10035 return Load;
10036
10037 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10038 : Builder.CreateSExt(Load, VectorTy);
10039}
10040
10043 unsigned IntrinsicID) {
10044 QualType LangPTy = E->getArg(1)->getType();
10045 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10046 LangPTy->castAs<PointerType>()->getPointeeType());
10047
10048 // The vector type that is stored may be different from the
10049 // eventual type stored to memory.
10050 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10051 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10052
10053 auto PredTy = MemoryTy;
10054 auto AddrMemoryTy = MemoryTy;
10055 bool IsQuadStore = false;
10056
10057 switch (IntrinsicID) {
10058 case Intrinsic::aarch64_sve_st1wq:
10059 case Intrinsic::aarch64_sve_st1dq:
10060 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10061 PredTy =
10062 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10063 IsQuadStore = true;
10064 break;
10065 default:
10066 break;
10067 }
10068 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10069 Value *BasePtr = Ops[1];
10070
10071 // Does the store have an offset?
10072 if (Ops.size() == 4)
10073 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10074
10075 // Last value is always the data
10076 Value *Val =
10077 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10078
10079 Function *F =
10080 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10081 auto *Store =
10082 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10083 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10084 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10085 return Store;
10086}
10087
10090 unsigned IntID) {
10091 Ops[2] = EmitSVEPredicateCast(
10093
10094 SmallVector<Value *> NewOps;
10095 NewOps.push_back(Ops[2]);
10096
10097 llvm::Value *BasePtr = Ops[3];
10098
10099 // If the intrinsic contains the vnum parameter, multiply it with the vector
10100 // size in bytes.
10101 if (Ops.size() == 5) {
10102 Function *StreamingVectorLength =
10103 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10104 llvm::Value *StreamingVectorLengthCall =
10105 Builder.CreateCall(StreamingVectorLength);
10106 llvm::Value *Mulvl =
10107 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10108 // The type of the ptr parameter is void *, so use Int8Ty here.
10109 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10110 }
10111 NewOps.push_back(BasePtr);
10112 NewOps.push_back(Ops[0]);
10113 NewOps.push_back(Ops[1]);
10114 Function *F = CGM.getIntrinsic(IntID);
10115 return Builder.CreateCall(F, NewOps);
10116}
10117
10120 unsigned IntID) {
10121 auto *VecTy = getSVEType(TypeFlags);
10122 Function *F = CGM.getIntrinsic(IntID, VecTy);
10123 if (TypeFlags.isReadZA())
10124 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10125 else if (TypeFlags.isWriteZA())
10126 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10127 return Builder.CreateCall(F, Ops);
10128}
10129
10132 unsigned IntID) {
10133 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10134 if (Ops.size() == 0)
10135 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10136 Function *F = CGM.getIntrinsic(IntID, {});
10137 return Builder.CreateCall(F, Ops);
10138}
10139
10142 unsigned IntID) {
10143 if (Ops.size() == 2)
10144 Ops.push_back(Builder.getInt32(0));
10145 else
10146 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10147 Function *F = CGM.getIntrinsic(IntID, {});
10148 return Builder.CreateCall(F, Ops);
10149}
10150
10151// Limit the usage of scalable llvm IR generated by the ACLE by using the
10152// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10153Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10154 return Builder.CreateVectorSplat(
10155 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10156}
10157
10159 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10160}
10161
10162Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10163 // FIXME: For big endian this needs an additional REV, or needs a separate
10164 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10165 // instruction is defined as 'bitwise' equivalent from memory point of
10166 // view (when storing/reloading), whereas the svreinterpret builtin
10167 // implements bitwise equivalent cast from register point of view.
10168 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10169 return Builder.CreateBitCast(Val, Ty);
10170}
10171
10172static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10174 auto *SplatZero = Constant::getNullValue(Ty);
10175 Ops.insert(Ops.begin(), SplatZero);
10176}
10177
10178static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10180 auto *SplatUndef = UndefValue::get(Ty);
10181 Ops.insert(Ops.begin(), SplatUndef);
10182}
10183
10186 llvm::Type *ResultType,
10187 ArrayRef<Value *> Ops) {
10188 if (TypeFlags.isOverloadNone())
10189 return {};
10190
10191 llvm::Type *DefaultType = getSVEType(TypeFlags);
10192
10193 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10194 return {DefaultType, Ops[1]->getType()};
10195
10196 if (TypeFlags.isOverloadWhileRW())
10197 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10198
10199 if (TypeFlags.isOverloadCvt())
10200 return {Ops[0]->getType(), Ops.back()->getType()};
10201
10202 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10203 ResultType->isVectorTy())
10204 return {ResultType, Ops[1]->getType()};
10205
10206 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10207 return {DefaultType};
10208}
10209
10211 llvm::Type *Ty,
10212 ArrayRef<Value *> Ops) {
10213 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10214 "Expects TypleFlag isTupleSet or TypeFlags.isTupleSet()");
10215
10216 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10217 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10218 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10219 Value *Idx = ConstantInt::get(CGM.Int64Ty,
10220 I * SingleVecTy->getMinNumElements());
10221
10222 if (TypeFlags.isTupleSet())
10223 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10224 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10225}
10226
10228 llvm::Type *Ty,
10229 ArrayRef<Value *> Ops) {
10230 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10231
10232 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10233 unsigned MinElts = SrcTy->getMinNumElements();
10234 Value *Call = llvm::PoisonValue::get(Ty);
10235 for (unsigned I = 0; I < Ops.size(); I++) {
10236 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10237 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10238 }
10239
10240 return Call;
10241}
10242
10244 // Multi-vector results should be broken up into a single (wide) result
10245 // vector.
10246 auto *StructTy = dyn_cast<StructType>(Call->getType());
10247 if (!StructTy)
10248 return Call;
10249
10250 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10251 if (!VTy)
10252 return Call;
10253 unsigned N = StructTy->getNumElements();
10254
10255 // We may need to emit a cast to a svbool_t
10256 bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10257 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10258
10259 ScalableVectorType *WideVTy =
10260 ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10261 Value *Ret = llvm::PoisonValue::get(WideVTy);
10262 for (unsigned I = 0; I < N; ++I) {
10263 Value *SRet = Builder.CreateExtractValue(Call, I);
10264 assert(SRet->getType() == VTy && "Unexpected type for result value");
10265 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10266
10267 if (IsPredTy)
10268 SRet = EmitSVEPredicateCast(
10269 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10270
10271 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10272 }
10273 Call = Ret;
10274
10275 return Call;
10276}
10277
10279 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10280 SVETypeFlags TypeFlags) {
10281 // Find out if any arguments are required to be integer constant expressions.
10282 unsigned ICEArguments = 0;
10284 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10285 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10286
10287 // Tuple set/get only requires one insert/extract vector, which is
10288 // created by EmitSVETupleSetOrGet.
10289 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10290
10291 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10292 bool IsICE = ICEArguments & (1 << i);
10293 Value *Arg = EmitScalarExpr(E->getArg(i));
10294
10295 if (IsICE) {
10296 // If this is required to be a constant, constant fold it so that we know
10297 // that the generated intrinsic gets a ConstantInt.
10298 std::optional<llvm::APSInt> Result =
10300 assert(Result && "Expected argument to be a constant");
10301
10302 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10303 // truncate because the immediate has been range checked and no valid
10304 // immediate requires more than a handful of bits.
10305 *Result = Result->extOrTrunc(32);
10306 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10307 continue;
10308 }
10309
10310 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10311 Ops.push_back(Arg);
10312 continue;
10313 }
10314
10315 auto *VTy = cast<ScalableVectorType>(Arg->getType());
10316 unsigned MinElts = VTy->getMinNumElements();
10317 bool IsPred = VTy->getElementType()->isIntegerTy(1);
10318 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10319
10320 if (N == 1) {
10321 Ops.push_back(Arg);
10322 continue;
10323 }
10324
10325 for (unsigned I = 0; I < N; ++I) {
10326 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10327 auto *NewVTy =
10328 ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10329 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10330 }
10331 }
10332}
10333
10335 const CallExpr *E) {
10336 llvm::Type *Ty = ConvertType(E->getType());
10337 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10338 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10339 Value *Val = EmitScalarExpr(E->getArg(0));
10340 return EmitSVEReinterpret(Val, Ty);
10341 }
10342
10343 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10345
10347 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10348 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10349
10350 if (TypeFlags.isLoad())
10351 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10352 TypeFlags.isZExtReturn());
10353 else if (TypeFlags.isStore())
10354 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10355 else if (TypeFlags.isGatherLoad())
10356 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10357 else if (TypeFlags.isScatterStore())
10358 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10359 else if (TypeFlags.isPrefetch())
10360 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10361 else if (TypeFlags.isGatherPrefetch())
10362 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10363 else if (TypeFlags.isStructLoad())
10364 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10365 else if (TypeFlags.isStructStore())
10366 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10367 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10368 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10369 else if (TypeFlags.isTupleCreate())
10370 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10371 else if (TypeFlags.isUndef())
10372 return UndefValue::get(Ty);
10373 else if (Builtin->LLVMIntrinsic != 0) {
10374 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10376
10377 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10379
10380 // Some ACLE builtins leave out the argument to specify the predicate
10381 // pattern, which is expected to be expanded to an SV_ALL pattern.
10382 if (TypeFlags.isAppendSVALL())
10383 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10384 if (TypeFlags.isInsertOp1SVALL())
10385 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10386
10387 // Predicates must match the main datatype.
10388 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10389 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10390 if (PredTy->getElementType()->isIntegerTy(1))
10391 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10392
10393 // Splat scalar operand to vector (intrinsics with _n infix)
10394 if (TypeFlags.hasSplatOperand()) {
10395 unsigned OpNo = TypeFlags.getSplatOperand();
10396 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10397 }
10398
10399 if (TypeFlags.isReverseCompare())
10400 std::swap(Ops[1], Ops[2]);
10401 else if (TypeFlags.isReverseUSDOT())
10402 std::swap(Ops[1], Ops[2]);
10403 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10404 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10405 std::swap(Ops[1], Ops[2]);
10406 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10407 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10408 std::swap(Ops[1], Ops[3]);
10409
10410 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10411 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10412 llvm::Type *OpndTy = Ops[1]->getType();
10413 auto *SplatZero = Constant::getNullValue(OpndTy);
10414 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10415 }
10416
10417 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10418 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10419 Value *Call = Builder.CreateCall(F, Ops);
10420
10421 // Predicate results must be converted to svbool_t.
10422 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10423 if (PredTy->getScalarType()->isIntegerTy(1))
10424 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10425
10426 return FormSVEBuiltinResult(Call);
10427 }
10428
10429 switch (BuiltinID) {
10430 default:
10431 return nullptr;
10432
10433 case SVE::BI__builtin_sve_svreinterpret_b: {
10434 auto SVCountTy =
10435 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10436 Function *CastFromSVCountF =
10437 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10438 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10439 }
10440 case SVE::BI__builtin_sve_svreinterpret_c: {
10441 auto SVCountTy =
10442 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10443 Function *CastToSVCountF =
10444 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10445 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10446 }
10447
10448 case SVE::BI__builtin_sve_svpsel_lane_b8:
10449 case SVE::BI__builtin_sve_svpsel_lane_b16:
10450 case SVE::BI__builtin_sve_svpsel_lane_b32:
10451 case SVE::BI__builtin_sve_svpsel_lane_b64:
10452 case SVE::BI__builtin_sve_svpsel_lane_c8:
10453 case SVE::BI__builtin_sve_svpsel_lane_c16:
10454 case SVE::BI__builtin_sve_svpsel_lane_c32:
10455 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10456 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10457 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10458 "aarch64.svcount")) &&
10459 "Unexpected TargetExtType");
10460 auto SVCountTy =
10461 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10462 Function *CastFromSVCountF =
10463 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10464 Function *CastToSVCountF =
10465 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10466
10467 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10468 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10469 llvm::Value *Ops0 =
10470 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10471 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10472 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10473 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10474 }
10475 case SVE::BI__builtin_sve_svmov_b_z: {
10476 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10477 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10478 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10479 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10480 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10481 }
10482
10483 case SVE::BI__builtin_sve_svnot_b_z: {
10484 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10485 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10486 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10487 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10488 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10489 }
10490
10491 case SVE::BI__builtin_sve_svmovlb_u16:
10492 case SVE::BI__builtin_sve_svmovlb_u32:
10493 case SVE::BI__builtin_sve_svmovlb_u64:
10494 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10495
10496 case SVE::BI__builtin_sve_svmovlb_s16:
10497 case SVE::BI__builtin_sve_svmovlb_s32:
10498 case SVE::BI__builtin_sve_svmovlb_s64:
10499 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10500
10501 case SVE::BI__builtin_sve_svmovlt_u16:
10502 case SVE::BI__builtin_sve_svmovlt_u32:
10503 case SVE::BI__builtin_sve_svmovlt_u64:
10504 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10505
10506 case SVE::BI__builtin_sve_svmovlt_s16:
10507 case SVE::BI__builtin_sve_svmovlt_s32:
10508 case SVE::BI__builtin_sve_svmovlt_s64:
10509 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10510
10511 case SVE::BI__builtin_sve_svpmullt_u16:
10512 case SVE::BI__builtin_sve_svpmullt_u64:
10513 case SVE::BI__builtin_sve_svpmullt_n_u16:
10514 case SVE::BI__builtin_sve_svpmullt_n_u64:
10515 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10516
10517 case SVE::BI__builtin_sve_svpmullb_u16:
10518 case SVE::BI__builtin_sve_svpmullb_u64:
10519 case SVE::BI__builtin_sve_svpmullb_n_u16:
10520 case SVE::BI__builtin_sve_svpmullb_n_u64:
10521 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10522
10523 case SVE::BI__builtin_sve_svdup_n_b8:
10524 case SVE::BI__builtin_sve_svdup_n_b16:
10525 case SVE::BI__builtin_sve_svdup_n_b32:
10526 case SVE::BI__builtin_sve_svdup_n_b64: {
10527 Value *CmpNE =
10528 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10529 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10530 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10531 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10532 }
10533
10534 case SVE::BI__builtin_sve_svdupq_n_b8:
10535 case SVE::BI__builtin_sve_svdupq_n_b16:
10536 case SVE::BI__builtin_sve_svdupq_n_b32:
10537 case SVE::BI__builtin_sve_svdupq_n_b64:
10538 case SVE::BI__builtin_sve_svdupq_n_u8:
10539 case SVE::BI__builtin_sve_svdupq_n_s8:
10540 case SVE::BI__builtin_sve_svdupq_n_u64:
10541 case SVE::BI__builtin_sve_svdupq_n_f64:
10542 case SVE::BI__builtin_sve_svdupq_n_s64:
10543 case SVE::BI__builtin_sve_svdupq_n_u16:
10544 case SVE::BI__builtin_sve_svdupq_n_f16:
10545 case SVE::BI__builtin_sve_svdupq_n_bf16:
10546 case SVE::BI__builtin_sve_svdupq_n_s16:
10547 case SVE::BI__builtin_sve_svdupq_n_u32:
10548 case SVE::BI__builtin_sve_svdupq_n_f32:
10549 case SVE::BI__builtin_sve_svdupq_n_s32: {
10550 // These builtins are implemented by storing each element to an array and using
10551 // ld1rq to materialize a vector.
10552 unsigned NumOpnds = Ops.size();
10553
10554 bool IsBoolTy =
10555 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10556
10557 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10558 // so that the compare can use the width that is natural for the expected
10559 // number of predicate lanes.
10560 llvm::Type *EltTy = Ops[0]->getType();
10561 if (IsBoolTy)
10562 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10563
10565 for (unsigned I = 0; I < NumOpnds; ++I)
10566 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10567 Value *Vec = BuildVector(VecOps);
10568
10569 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10570 Value *InsertSubVec = Builder.CreateInsertVector(
10571 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10572
10573 Function *F =
10574 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10575 Value *DupQLane =
10576 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10577
10578 if (!IsBoolTy)
10579 return DupQLane;
10580
10581 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10582 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10583
10584 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10585 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10586 : Intrinsic::aarch64_sve_cmpne_wide,
10587 OverloadedTy);
10588 Value *Call = Builder.CreateCall(
10589 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10590 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10591 }
10592
10593 case SVE::BI__builtin_sve_svpfalse_b:
10594 return ConstantInt::getFalse(Ty);
10595
10596 case SVE::BI__builtin_sve_svpfalse_c: {
10597 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10598 Function *CastToSVCountF =
10599 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10600 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10601 }
10602
10603 case SVE::BI__builtin_sve_svlen_bf16:
10604 case SVE::BI__builtin_sve_svlen_f16:
10605 case SVE::BI__builtin_sve_svlen_f32:
10606 case SVE::BI__builtin_sve_svlen_f64:
10607 case SVE::BI__builtin_sve_svlen_s8:
10608 case SVE::BI__builtin_sve_svlen_s16:
10609 case SVE::BI__builtin_sve_svlen_s32:
10610 case SVE::BI__builtin_sve_svlen_s64:
10611 case SVE::BI__builtin_sve_svlen_u8:
10612 case SVE::BI__builtin_sve_svlen_u16:
10613 case SVE::BI__builtin_sve_svlen_u32:
10614 case SVE::BI__builtin_sve_svlen_u64: {
10615 SVETypeFlags TF(Builtin->TypeModifier);
10616 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10617 auto *NumEls =
10618 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10619
10620 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10621 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10622 }
10623
10624 case SVE::BI__builtin_sve_svtbl2_u8:
10625 case SVE::BI__builtin_sve_svtbl2_s8:
10626 case SVE::BI__builtin_sve_svtbl2_u16:
10627 case SVE::BI__builtin_sve_svtbl2_s16:
10628 case SVE::BI__builtin_sve_svtbl2_u32:
10629 case SVE::BI__builtin_sve_svtbl2_s32:
10630 case SVE::BI__builtin_sve_svtbl2_u64:
10631 case SVE::BI__builtin_sve_svtbl2_s64:
10632 case SVE::BI__builtin_sve_svtbl2_f16:
10633 case SVE::BI__builtin_sve_svtbl2_bf16:
10634 case SVE::BI__builtin_sve_svtbl2_f32:
10635 case SVE::BI__builtin_sve_svtbl2_f64: {
10636 SVETypeFlags TF(Builtin->TypeModifier);
10637 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10638 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10639 return Builder.CreateCall(F, Ops);
10640 }
10641
10642 case SVE::BI__builtin_sve_svset_neonq_s8:
10643 case SVE::BI__builtin_sve_svset_neonq_s16:
10644 case SVE::BI__builtin_sve_svset_neonq_s32:
10645 case SVE::BI__builtin_sve_svset_neonq_s64:
10646 case SVE::BI__builtin_sve_svset_neonq_u8:
10647 case SVE::BI__builtin_sve_svset_neonq_u16:
10648 case SVE::BI__builtin_sve_svset_neonq_u32:
10649 case SVE::BI__builtin_sve_svset_neonq_u64:
10650 case SVE::BI__builtin_sve_svset_neonq_f16:
10651 case SVE::BI__builtin_sve_svset_neonq_f32:
10652 case SVE::BI__builtin_sve_svset_neonq_f64:
10653 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10654 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10655 }
10656
10657 case SVE::BI__builtin_sve_svget_neonq_s8:
10658 case SVE::BI__builtin_sve_svget_neonq_s16:
10659 case SVE::BI__builtin_sve_svget_neonq_s32:
10660 case SVE::BI__builtin_sve_svget_neonq_s64:
10661 case SVE::BI__builtin_sve_svget_neonq_u8:
10662 case SVE::BI__builtin_sve_svget_neonq_u16:
10663 case SVE::BI__builtin_sve_svget_neonq_u32:
10664 case SVE::BI__builtin_sve_svget_neonq_u64:
10665 case SVE::BI__builtin_sve_svget_neonq_f16:
10666 case SVE::BI__builtin_sve_svget_neonq_f32:
10667 case SVE::BI__builtin_sve_svget_neonq_f64:
10668 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10669 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10670 }
10671
10672 case SVE::BI__builtin_sve_svdup_neonq_s8:
10673 case SVE::BI__builtin_sve_svdup_neonq_s16:
10674 case SVE::BI__builtin_sve_svdup_neonq_s32:
10675 case SVE::BI__builtin_sve_svdup_neonq_s64:
10676 case SVE::BI__builtin_sve_svdup_neonq_u8:
10677 case SVE::BI__builtin_sve_svdup_neonq_u16:
10678 case SVE::BI__builtin_sve_svdup_neonq_u32:
10679 case SVE::BI__builtin_sve_svdup_neonq_u64:
10680 case SVE::BI__builtin_sve_svdup_neonq_f16:
10681 case SVE::BI__builtin_sve_svdup_neonq_f32:
10682 case SVE::BI__builtin_sve_svdup_neonq_f64:
10683 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10684 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10685 Builder.getInt64(0));
10686 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10687 {Insert, Builder.getInt64(0)});
10688 }
10689 }
10690
10691 /// Should not happen
10692 return nullptr;
10693}
10694
10695static void swapCommutativeSMEOperands(unsigned BuiltinID,
10697 unsigned MultiVec;
10698 switch (BuiltinID) {
10699 default:
10700 return;
10701 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10702 MultiVec = 1;
10703 break;
10704 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10705 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10706 MultiVec = 2;
10707 break;
10708 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10709 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10710 MultiVec = 4;
10711 break;
10712 }
10713
10714 if (MultiVec > 0)
10715 for (unsigned I = 0; I < MultiVec; ++I)
10716 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10717}
10718
10720 const CallExpr *E) {
10721 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10723
10725 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10726 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10727
10728 if (TypeFlags.isLoad() || TypeFlags.isStore())
10729 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10730 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10731 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10732 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10733 BuiltinID == SME::BI__builtin_sme_svzero_za)
10734 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10735 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10736 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10737 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10738 BuiltinID == SME::BI__builtin_sme_svstr_za)
10739 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10740
10741 // Handle builtins which require their multi-vector operands to be swapped
10742 swapCommutativeSMEOperands(BuiltinID, Ops);
10743
10744 // Should not happen!
10745 if (Builtin->LLVMIntrinsic == 0)
10746 return nullptr;
10747
10748 // Predicates must match the main datatype.
10749 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10750 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10751 if (PredTy->getElementType()->isIntegerTy(1))
10752 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10753
10754 Function *F =
10755 TypeFlags.isOverloadNone()
10756 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10757 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10758 Value *Call = Builder.CreateCall(F, Ops);
10759
10760 return FormSVEBuiltinResult(Call);
10761}
10762
10764 const CallExpr *E,
10765 llvm::Triple::ArchType Arch) {
10766 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10767 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10768 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10769
10770 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10771 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10772 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10773
10774 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10775 return EmitAArch64CpuSupports(E);
10776
10777 unsigned HintID = static_cast<unsigned>(-1);
10778 switch (BuiltinID) {
10779 default: break;
10780 case clang::AArch64::BI__builtin_arm_nop:
10781 HintID = 0;
10782 break;
10783 case clang::AArch64::BI__builtin_arm_yield:
10784 case clang::AArch64::BI__yield:
10785 HintID = 1;
10786 break;
10787 case clang::AArch64::BI__builtin_arm_wfe:
10788 case clang::AArch64::BI__wfe:
10789 HintID = 2;
10790 break;
10791 case clang::AArch64::BI__builtin_arm_wfi:
10792 case clang::AArch64::BI__wfi:
10793 HintID = 3;
10794 break;
10795 case clang::AArch64::BI__builtin_arm_sev:
10796 case clang::AArch64::BI__sev:
10797 HintID = 4;
10798 break;
10799 case clang::AArch64::BI__builtin_arm_sevl:
10800 case clang::AArch64::BI__sevl:
10801 HintID = 5;
10802 break;
10803 }
10804
10805 if (HintID != static_cast<unsigned>(-1)) {
10806 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10807 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10808 }
10809
10810 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10811 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10812 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10813 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10814 }
10815
10816 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10817 // Create call to __arm_sme_state and store the results to the two pointers.
10819 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10820 false),
10821 "__arm_sme_state"));
10822 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10823 "aarch64_pstate_sm_compatible");
10824 CI->setAttributes(Attrs);
10825 CI->setCallingConv(
10826 llvm::CallingConv::
10827 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10828 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10830 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10832 }
10833
10834 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10835 assert((getContext().getTypeSize(E->getType()) == 32) &&
10836 "rbit of unusual size!");
10837 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10838 return Builder.CreateCall(
10839 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10840 }
10841 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10842 assert((getContext().getTypeSize(E->getType()) == 64) &&
10843 "rbit of unusual size!");
10844 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10845 return Builder.CreateCall(
10846 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10847 }
10848
10849 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10850 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10851 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10852 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10853 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10854 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10855 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10856 return Res;
10857 }
10858
10859 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10860 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10861 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10862 "cls");
10863 }
10864 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10865 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10866 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10867 "cls");
10868 }
10869
10870 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
10871 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
10872 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10873 llvm::Type *Ty = Arg->getType();
10874 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
10875 Arg, "frint32z");
10876 }
10877
10878 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
10879 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
10880 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10881 llvm::Type *Ty = Arg->getType();
10882 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
10883 Arg, "frint64z");
10884 }
10885
10886 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
10887 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
10888 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10889 llvm::Type *Ty = Arg->getType();
10890 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
10891 Arg, "frint32x");
10892 }
10893
10894 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
10895 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
10896 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10897 llvm::Type *Ty = Arg->getType();
10898 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
10899 Arg, "frint64x");
10900 }
10901
10902 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
10903 assert((getContext().getTypeSize(E->getType()) == 32) &&
10904 "__jcvt of unusual size!");
10905 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10906 return Builder.CreateCall(
10907 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
10908 }
10909
10910 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
10911 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
10912 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
10913 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
10914 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
10915 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
10916
10917 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
10918 // Load from the address via an LLVM intrinsic, receiving a
10919 // tuple of 8 i64 words, and store each one to ValPtr.
10920 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
10921 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
10922 llvm::Value *ToRet;
10923 for (size_t i = 0; i < 8; i++) {
10924 llvm::Value *ValOffsetPtr =
10925 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10926 Address Addr =
10927 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10928 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
10929 }
10930 return ToRet;
10931 } else {
10932 // Load 8 i64 words from ValPtr, and store them to the address
10933 // via an LLVM intrinsic.
10935 Args.push_back(MemAddr);
10936 for (size_t i = 0; i < 8; i++) {
10937 llvm::Value *ValOffsetPtr =
10938 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
10939 Address Addr =
10940 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
10941 Args.push_back(Builder.CreateLoad(Addr));
10942 }
10943
10944 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
10945 ? Intrinsic::aarch64_st64b
10946 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
10947 ? Intrinsic::aarch64_st64bv
10948 : Intrinsic::aarch64_st64bv0);
10949 Function *F = CGM.getIntrinsic(Intr);
10950 return Builder.CreateCall(F, Args);
10951 }
10952 }
10953
10954 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
10955 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
10956
10957 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
10958 ? Intrinsic::aarch64_rndr
10959 : Intrinsic::aarch64_rndrrs);
10960 Function *F = CGM.getIntrinsic(Intr);
10961 llvm::Value *Val = Builder.CreateCall(F);
10962 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
10963 Value *Status = Builder.CreateExtractValue(Val, 1);
10964
10965 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
10966 Builder.CreateStore(RandomValue, MemAddress);
10967 Status = Builder.CreateZExt(Status, Int32Ty);
10968 return Status;
10969 }
10970
10971 if (BuiltinID == clang::AArch64::BI__clear_cache) {
10972 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
10973 const FunctionDecl *FD = E->getDirectCallee();
10974 Value *Ops[2];
10975 for (unsigned i = 0; i < 2; i++)
10976 Ops[i] = EmitScalarExpr(E->getArg(i));
10977 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
10978 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
10979 StringRef Name = FD->getName();
10980 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
10981 }
10982
10983 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
10984 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
10985 getContext().getTypeSize(E->getType()) == 128) {
10986 Function *F =
10987 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
10988 ? Intrinsic::aarch64_ldaxp
10989 : Intrinsic::aarch64_ldxp);
10990
10991 Value *LdPtr = EmitScalarExpr(E->getArg(0));
10992 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
10993
10994 Value *Val0 = Builder.CreateExtractValue(Val, 1);
10995 Value *Val1 = Builder.CreateExtractValue(Val, 0);
10996 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
10997 Val0 = Builder.CreateZExt(Val0, Int128Ty);
10998 Val1 = Builder.CreateZExt(Val1, Int128Ty);
10999
11000 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11001 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11002 Val = Builder.CreateOr(Val, Val1);
11003 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11004 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11005 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11006 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11007
11008 QualType Ty = E->getType();
11009 llvm::Type *RealResTy = ConvertType(Ty);
11010 llvm::Type *IntTy =
11011 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11012
11013 Function *F =
11014 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11015 ? Intrinsic::aarch64_ldaxr
11016 : Intrinsic::aarch64_ldxr,
11017 UnqualPtrTy);
11018 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11019 Val->addParamAttr(
11020 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11021
11022 if (RealResTy->isPointerTy())
11023 return Builder.CreateIntToPtr(Val, RealResTy);
11024
11025 llvm::Type *IntResTy = llvm::IntegerType::get(
11026 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11027 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11028 RealResTy);
11029 }
11030
11031 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11032 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11033 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11034 Function *F =
11035 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11036 ? Intrinsic::aarch64_stlxp
11037 : Intrinsic::aarch64_stxp);
11038 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11039
11040 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11041 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11042
11043 Tmp = Tmp.withElementType(STy);
11044 llvm::Value *Val = Builder.CreateLoad(Tmp);
11045
11046 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11047 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11048 Value *StPtr = EmitScalarExpr(E->getArg(1));
11049 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11050 }
11051
11052 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11053 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11054 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11055 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11056
11057 QualType Ty = E->getArg(0)->getType();
11058 llvm::Type *StoreTy =
11059 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11060
11061 if (StoreVal->getType()->isPointerTy())
11062 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11063 else {
11064 llvm::Type *IntTy = llvm::IntegerType::get(
11066 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11067 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11068 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11069 }
11070
11071 Function *F =
11072 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11073 ? Intrinsic::aarch64_stlxr
11074 : Intrinsic::aarch64_stxr,
11075 StoreAddr->getType());
11076 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11077 CI->addParamAttr(
11078 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11079 return CI;
11080 }
11081
11082 if (BuiltinID == clang::AArch64::BI__getReg) {
11084 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11085 llvm_unreachable("Sema will ensure that the parameter is constant");
11086
11087 llvm::APSInt Value = Result.Val.getInt();
11088 LLVMContext &Context = CGM.getLLVMContext();
11089 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11090
11091 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11092 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11093 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11094
11095 llvm::Function *F =
11096 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11097 return Builder.CreateCall(F, Metadata);
11098 }
11099
11100 if (BuiltinID == clang::AArch64::BI__break) {
11102 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11103 llvm_unreachable("Sema will ensure that the parameter is constant");
11104
11105 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11106 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11107 }
11108
11109 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11110 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11111 return Builder.CreateCall(F);
11112 }
11113
11114 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11115 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11116 llvm::SyncScope::SingleThread);
11117
11118 // CRC32
11119 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11120 switch (BuiltinID) {
11121 case clang::AArch64::BI__builtin_arm_crc32b:
11122 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11123 case clang::AArch64::BI__builtin_arm_crc32cb:
11124 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11125 case clang::AArch64::BI__builtin_arm_crc32h:
11126 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11127 case clang::AArch64::BI__builtin_arm_crc32ch:
11128 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11129 case clang::AArch64::BI__builtin_arm_crc32w:
11130 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11131 case clang::AArch64::BI__builtin_arm_crc32cw:
11132 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11133 case clang::AArch64::BI__builtin_arm_crc32d:
11134 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11135 case clang::AArch64::BI__builtin_arm_crc32cd:
11136 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11137 }
11138
11139 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11140 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11141 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11142 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11143
11144 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11145 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11146
11147 return Builder.CreateCall(F, {Arg0, Arg1});
11148 }
11149
11150 // Memory Operations (MOPS)
11151 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11152 Value *Dst = EmitScalarExpr(E->getArg(0));
11153 Value *Val = EmitScalarExpr(E->getArg(1));
11154 Value *Size = EmitScalarExpr(E->getArg(2));
11155 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11156 Val = Builder.CreateTrunc(Val, Int8Ty);
11157 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11158 return Builder.CreateCall(
11159 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11160 }
11161
11162 // Memory Tagging Extensions (MTE) Intrinsics
11163 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11164 switch (BuiltinID) {
11165 case clang::AArch64::BI__builtin_arm_irg:
11166 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11167 case clang::AArch64::BI__builtin_arm_addg:
11168 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11169 case clang::AArch64::BI__builtin_arm_gmi:
11170 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11171 case clang::AArch64::BI__builtin_arm_ldg:
11172 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11173 case clang::AArch64::BI__builtin_arm_stg:
11174 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11175 case clang::AArch64::BI__builtin_arm_subp:
11176 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11177 }
11178
11179 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11180 llvm::Type *T = ConvertType(E->getType());
11181
11182 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11184 Value *Mask = EmitScalarExpr(E->getArg(1));
11185
11186 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11187 Mask = Builder.CreateZExt(Mask, Int64Ty);
11188 Value *RV = Builder.CreateCall(
11189 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11190 return Builder.CreatePointerCast(RV, T);
11191 }
11192 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11194 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11195
11196 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11197 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11198 Value *RV = Builder.CreateCall(
11199 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11200 return Builder.CreatePointerCast(RV, T);
11201 }
11202 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11204 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11205
11206 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11207 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11208 return Builder.CreateCall(
11209 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11210 }
11211 // Although it is possible to supply a different return
11212 // address (first arg) to this intrinsic, for now we set
11213 // return address same as input address.
11214 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11215 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11216 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11217 Value *RV = Builder.CreateCall(
11218 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11219 return Builder.CreatePointerCast(RV, T);
11220 }
11221 // Although it is possible to supply a different tag (to set)
11222 // to this intrinsic (as first arg), for now we supply
11223 // the tag that is in input address arg (common use case).
11224 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11225 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11226 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11227 return Builder.CreateCall(
11228 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11229 }
11230 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11231 Value *PointerA = EmitScalarExpr(E->getArg(0));
11232 Value *PointerB = EmitScalarExpr(E->getArg(1));
11233 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11234 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11235 return Builder.CreateCall(
11236 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11237 }
11238 }
11239
11240 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11241 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11242 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11243 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11244 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11245 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11246 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11247 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11248
11249 SpecialRegisterAccessKind AccessKind = Write;
11250 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11251 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11252 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11253 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11254 AccessKind = VolatileRead;
11255
11256 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11257 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11258
11259 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11260 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11261
11262 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11263 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11264
11265 llvm::Type *ValueType;
11266 llvm::Type *RegisterType = Int64Ty;
11267 if (Is32Bit) {
11268 ValueType = Int32Ty;
11269 } else if (Is128Bit) {
11270 llvm::Type *Int128Ty =
11271 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11272 ValueType = Int128Ty;
11273 RegisterType = Int128Ty;
11274 } else if (IsPointerBuiltin) {
11275 ValueType = VoidPtrTy;
11276 } else {
11277 ValueType = Int64Ty;
11278 };
11279
11280 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11281 AccessKind);
11282 }
11283
11284 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11285 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11286 LLVMContext &Context = CGM.getLLVMContext();
11287
11288 unsigned SysReg =
11289 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11290
11291 std::string SysRegStr;
11292 llvm::raw_string_ostream(SysRegStr) <<
11293 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11294 ((SysReg >> 11) & 7) << ":" <<
11295 ((SysReg >> 7) & 15) << ":" <<
11296 ((SysReg >> 3) & 15) << ":" <<
11297 ( SysReg & 7);
11298
11299 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11300 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11301 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11302
11303 llvm::Type *RegisterType = Int64Ty;
11304 llvm::Type *Types[] = { RegisterType };
11305
11306 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11307 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11308
11309 return Builder.CreateCall(F, Metadata);
11310 }
11311
11312 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11313 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11314
11315 return Builder.CreateCall(F, { Metadata, ArgValue });
11316 }
11317
11318 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11319 llvm::Function *F =
11320 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11321 return Builder.CreateCall(F);
11322 }
11323
11324 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11325 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11326 return Builder.CreateCall(F);
11327 }
11328
11329 if (BuiltinID == clang::AArch64::BI__mulh ||
11330 BuiltinID == clang::AArch64::BI__umulh) {
11331 llvm::Type *ResType = ConvertType(E->getType());
11332 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11333
11334 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11335 Value *LHS =
11336 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11337 Value *RHS =
11338 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11339
11340 Value *MulResult, *HigherBits;
11341 if (IsSigned) {
11342 MulResult = Builder.CreateNSWMul(LHS, RHS);
11343 HigherBits = Builder.CreateAShr(MulResult, 64);
11344 } else {
11345 MulResult = Builder.CreateNUWMul(LHS, RHS);
11346 HigherBits = Builder.CreateLShr(MulResult, 64);
11347 }
11348 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11349
11350 return HigherBits;
11351 }
11352
11353 if (BuiltinID == AArch64::BI__writex18byte ||
11354 BuiltinID == AArch64::BI__writex18word ||
11355 BuiltinID == AArch64::BI__writex18dword ||
11356 BuiltinID == AArch64::BI__writex18qword) {
11357 // Read x18 as i8*
11358 LLVMContext &Context = CGM.getLLVMContext();
11359 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11360 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11361 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11362 llvm::Function *F =
11363 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11364 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11365 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11366
11367 // Store val at x18 + offset
11368 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11369 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11370 Value *Val = EmitScalarExpr(E->getArg(1));
11371 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11372 return Store;
11373 }
11374
11375 if (BuiltinID == AArch64::BI__readx18byte ||
11376 BuiltinID == AArch64::BI__readx18word ||
11377 BuiltinID == AArch64::BI__readx18dword ||
11378 BuiltinID == AArch64::BI__readx18qword) {
11379 llvm::Type *IntTy = ConvertType(E->getType());
11380
11381 // Read x18 as i8*
11382 LLVMContext &Context = CGM.getLLVMContext();
11383 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11384 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11385 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11386 llvm::Function *F =
11387 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11388 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11389 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11390
11391 // Load x18 + offset
11392 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11393 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11394 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11395 return Load;
11396 }
11397
11398 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11399 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11400 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11401 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11402 Value *Arg = EmitScalarExpr(E->getArg(0));
11403 llvm::Type *RetTy = ConvertType(E->getType());
11404 return Builder.CreateBitCast(Arg, RetTy);
11405 }
11406
11407 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11408 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11409 BuiltinID == AArch64::BI_CountLeadingZeros ||
11410 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11411 Value *Arg = EmitScalarExpr(E->getArg(0));
11412 llvm::Type *ArgType = Arg->getType();
11413
11414 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11415 BuiltinID == AArch64::BI_CountLeadingOnes64)
11416 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11417
11418 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11419 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11420
11421 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11422 BuiltinID == AArch64::BI_CountLeadingZeros64)
11423 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11424 return Result;
11425 }
11426
11427 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11428 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11429 Value *Arg = EmitScalarExpr(E->getArg(0));
11430
11431 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11432 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11433 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11434
11435 Value *Result = Builder.CreateCall(F, Arg, "cls");
11436 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11437 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11438 return Result;
11439 }
11440
11441 if (BuiltinID == AArch64::BI_CountOneBits ||
11442 BuiltinID == AArch64::BI_CountOneBits64) {
11443 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11444 llvm::Type *ArgType = ArgValue->getType();
11445 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11446
11447 Value *Result = Builder.CreateCall(F, ArgValue);
11448 if (BuiltinID == AArch64::BI_CountOneBits64)
11449 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11450 return Result;
11451 }
11452
11453 if (BuiltinID == AArch64::BI__prefetch) {
11455 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11456 Value *Locality = ConstantInt::get(Int32Ty, 3);
11457 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11458 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11459 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11460 }
11461
11462 // Handle MSVC intrinsics before argument evaluation to prevent double
11463 // evaluation.
11464 if (std::optional<MSVCIntrin> MsvcIntId =
11466 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11467
11468 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11469 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11470 return P.first == BuiltinID;
11471 });
11472 if (It != end(NEONEquivalentIntrinsicMap))
11473 BuiltinID = It->second;
11474
11475 // Find out if any arguments are required to be integer constant
11476 // expressions.
11477 unsigned ICEArguments = 0;
11479 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11480 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11481
11483 Address PtrOp0 = Address::invalid();
11484 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11485 if (i == 0) {
11486 switch (BuiltinID) {
11487 case NEON::BI__builtin_neon_vld1_v:
11488 case NEON::BI__builtin_neon_vld1q_v:
11489 case NEON::BI__builtin_neon_vld1_dup_v:
11490 case NEON::BI__builtin_neon_vld1q_dup_v:
11491 case NEON::BI__builtin_neon_vld1_lane_v:
11492 case NEON::BI__builtin_neon_vld1q_lane_v:
11493 case NEON::BI__builtin_neon_vst1_v:
11494 case NEON::BI__builtin_neon_vst1q_v:
11495 case NEON::BI__builtin_neon_vst1_lane_v:
11496 case NEON::BI__builtin_neon_vst1q_lane_v:
11497 case NEON::BI__builtin_neon_vldap1_lane_s64:
11498 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11499 case NEON::BI__builtin_neon_vstl1_lane_s64:
11500 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11501 // Get the alignment for the argument in addition to the value;
11502 // we'll use it later.
11503 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11504 Ops.push_back(PtrOp0.emitRawPointer(*this));
11505 continue;
11506 }
11507 }
11508 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11509 }
11510
11511 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11512 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11513 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11514
11515 if (Builtin) {
11516 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11517 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11518 assert(Result && "SISD intrinsic should have been handled");
11519 return Result;
11520 }
11521
11522 const Expr *Arg = E->getArg(E->getNumArgs()-1);
11524 if (std::optional<llvm::APSInt> Result =
11526 // Determine the type of this overloaded NEON intrinsic.
11527 Type = NeonTypeFlags(Result->getZExtValue());
11528
11529 bool usgn = Type.isUnsigned();
11530 bool quad = Type.isQuad();
11531
11532 // Handle non-overloaded intrinsics first.
11533 switch (BuiltinID) {
11534 default: break;
11535 case NEON::BI__builtin_neon_vabsh_f16:
11536 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11537 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11538 case NEON::BI__builtin_neon_vaddq_p128: {
11539 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11540 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11541 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11542 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11543 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11544 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11545 return Builder.CreateBitCast(Ops[0], Int128Ty);
11546 }
11547 case NEON::BI__builtin_neon_vldrq_p128: {
11548 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11549 Value *Ptr = EmitScalarExpr(E->getArg(0));
11550 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11552 }
11553 case NEON::BI__builtin_neon_vstrq_p128: {
11554 Value *Ptr = Ops[0];
11556 }
11557 case NEON::BI__builtin_neon_vcvts_f32_u32:
11558 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11559 usgn = true;
11560 [[fallthrough]];
11561 case NEON::BI__builtin_neon_vcvts_f32_s32:
11562 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11563 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11564 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11565 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11566 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11567 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11568 if (usgn)
11569 return Builder.CreateUIToFP(Ops[0], FTy);
11570 return Builder.CreateSIToFP(Ops[0], FTy);
11571 }
11572 case NEON::BI__builtin_neon_vcvth_f16_u16:
11573 case NEON::BI__builtin_neon_vcvth_f16_u32:
11574 case NEON::BI__builtin_neon_vcvth_f16_u64:
11575 usgn = true;
11576 [[fallthrough]];
11577 case NEON::BI__builtin_neon_vcvth_f16_s16:
11578 case NEON::BI__builtin_neon_vcvth_f16_s32:
11579 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11580 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11581 llvm::Type *FTy = HalfTy;
11582 llvm::Type *InTy;
11583 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11584 InTy = Int64Ty;
11585 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11586 InTy = Int32Ty;
11587 else
11588 InTy = Int16Ty;
11589 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11590 if (usgn)
11591 return Builder.CreateUIToFP(Ops[0], FTy);
11592 return Builder.CreateSIToFP(Ops[0], FTy);
11593 }
11594 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11595 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11596 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11597 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11598 case NEON::BI__builtin_neon_vcvth_u16_f16:
11599 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11600 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11601 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11602 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11603 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11604 unsigned Int;
11605 llvm::Type* InTy = Int32Ty;
11606 llvm::Type* FTy = HalfTy;
11607 llvm::Type *Tys[2] = {InTy, FTy};
11608 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11609 switch (BuiltinID) {
11610 default: llvm_unreachable("missing builtin ID in switch!");
11611 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11612 Int = Intrinsic::aarch64_neon_fcvtau; break;
11613 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11614 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11615 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11616 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11617 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11618 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11619 case NEON::BI__builtin_neon_vcvth_u16_f16:
11620 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11621 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11622 Int = Intrinsic::aarch64_neon_fcvtas; break;
11623 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11624 Int = Intrinsic::aarch64_neon_fcvtms; break;
11625 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11626 Int = Intrinsic::aarch64_neon_fcvtns; break;
11627 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11628 Int = Intrinsic::aarch64_neon_fcvtps; break;
11629 case NEON::BI__builtin_neon_vcvth_s16_f16:
11630 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11631 }
11632 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11633 return Builder.CreateTrunc(Ops[0], Int16Ty);
11634 }
11635 case NEON::BI__builtin_neon_vcaleh_f16:
11636 case NEON::BI__builtin_neon_vcalth_f16:
11637 case NEON::BI__builtin_neon_vcageh_f16:
11638 case NEON::BI__builtin_neon_vcagth_f16: {
11639 unsigned Int;
11640 llvm::Type* InTy = Int32Ty;
11641 llvm::Type* FTy = HalfTy;
11642 llvm::Type *Tys[2] = {InTy, FTy};
11643 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11644 switch (BuiltinID) {
11645 default: llvm_unreachable("missing builtin ID in switch!");
11646 case NEON::BI__builtin_neon_vcageh_f16:
11647 Int = Intrinsic::aarch64_neon_facge; break;
11648 case NEON::BI__builtin_neon_vcagth_f16:
11649 Int = Intrinsic::aarch64_neon_facgt; break;
11650 case NEON::BI__builtin_neon_vcaleh_f16:
11651 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11652 case NEON::BI__builtin_neon_vcalth_f16:
11653 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11654 }
11655 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11656 return Builder.CreateTrunc(Ops[0], Int16Ty);
11657 }
11658 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11659 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11660 unsigned Int;
11661 llvm::Type* InTy = Int32Ty;
11662 llvm::Type* FTy = HalfTy;
11663 llvm::Type *Tys[2] = {InTy, FTy};
11664 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11665 switch (BuiltinID) {
11666 default: llvm_unreachable("missing builtin ID in switch!");
11667 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11668 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11669 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11670 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11671 }
11672 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11673 return Builder.CreateTrunc(Ops[0], Int16Ty);
11674 }
11675 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11676 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11677 unsigned Int;
11678 llvm::Type* FTy = HalfTy;
11679 llvm::Type* InTy = Int32Ty;
11680 llvm::Type *Tys[2] = {FTy, InTy};
11681 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11682 switch (BuiltinID) {
11683 default: llvm_unreachable("missing builtin ID in switch!");
11684 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11685 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11686 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11687 break;
11688 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11689 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11690 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11691 break;
11692 }
11693 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11694 }
11695 case NEON::BI__builtin_neon_vpaddd_s64: {
11696 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11697 Value *Vec = EmitScalarExpr(E->getArg(0));
11698 // The vector is v2f64, so make sure it's bitcast to that.
11699 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11700 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11701 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11702 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11703 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11704 // Pairwise addition of a v2f64 into a scalar f64.
11705 return Builder.CreateAdd(Op0, Op1, "vpaddd");
11706 }
11707 case NEON::BI__builtin_neon_vpaddd_f64: {
11708 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11709 Value *Vec = EmitScalarExpr(E->getArg(0));
11710 // The vector is v2f64, so make sure it's bitcast to that.
11711 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11712 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11713 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11714 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11715 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11716 // Pairwise addition of a v2f64 into a scalar f64.
11717 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11718 }
11719 case NEON::BI__builtin_neon_vpadds_f32: {
11720 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11721 Value *Vec = EmitScalarExpr(E->getArg(0));
11722 // The vector is v2f32, so make sure it's bitcast to that.
11723 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11724 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11725 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11726 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11727 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11728 // Pairwise addition of a v2f32 into a scalar f32.
11729 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11730 }
11731 case NEON::BI__builtin_neon_vceqzd_s64:
11732 case NEON::BI__builtin_neon_vceqzd_f64:
11733 case NEON::BI__builtin_neon_vceqzs_f32:
11734 case NEON::BI__builtin_neon_vceqzh_f16:
11735 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11738 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11739 case NEON::BI__builtin_neon_vcgezd_s64:
11740 case NEON::BI__builtin_neon_vcgezd_f64:
11741 case NEON::BI__builtin_neon_vcgezs_f32:
11742 case NEON::BI__builtin_neon_vcgezh_f16:
11743 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11746 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11747 case NEON::BI__builtin_neon_vclezd_s64:
11748 case NEON::BI__builtin_neon_vclezd_f64:
11749 case NEON::BI__builtin_neon_vclezs_f32:
11750 case NEON::BI__builtin_neon_vclezh_f16:
11751 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11754 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11755 case NEON::BI__builtin_neon_vcgtzd_s64:
11756 case NEON::BI__builtin_neon_vcgtzd_f64:
11757 case NEON::BI__builtin_neon_vcgtzs_f32:
11758 case NEON::BI__builtin_neon_vcgtzh_f16:
11759 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11762 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11763 case NEON::BI__builtin_neon_vcltzd_s64:
11764 case NEON::BI__builtin_neon_vcltzd_f64:
11765 case NEON::BI__builtin_neon_vcltzs_f32:
11766 case NEON::BI__builtin_neon_vcltzh_f16:
11767 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11770 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11771
11772 case NEON::BI__builtin_neon_vceqzd_u64: {
11773 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11774 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11775 Ops[0] =
11776 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11777 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11778 }
11779 case NEON::BI__builtin_neon_vceqd_f64:
11780 case NEON::BI__builtin_neon_vcled_f64:
11781 case NEON::BI__builtin_neon_vcltd_f64:
11782 case NEON::BI__builtin_neon_vcged_f64:
11783 case NEON::BI__builtin_neon_vcgtd_f64: {
11784 llvm::CmpInst::Predicate P;
11785 switch (BuiltinID) {
11786 default: llvm_unreachable("missing builtin ID in switch!");
11787 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11788 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11789 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11790 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11791 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11792 }
11793 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11794 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11795 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11796 if (P == llvm::FCmpInst::FCMP_OEQ)
11797 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11798 else
11799 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11800 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11801 }
11802 case NEON::BI__builtin_neon_vceqs_f32:
11803 case NEON::BI__builtin_neon_vcles_f32:
11804 case NEON::BI__builtin_neon_vclts_f32:
11805 case NEON::BI__builtin_neon_vcges_f32:
11806 case NEON::BI__builtin_neon_vcgts_f32: {
11807 llvm::CmpInst::Predicate P;
11808 switch (BuiltinID) {
11809 default: llvm_unreachable("missing builtin ID in switch!");
11810 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11811 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11812 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11813 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11814 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11815 }
11816 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11817 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11818 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11819 if (P == llvm::FCmpInst::FCMP_OEQ)
11820 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11821 else
11822 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11823 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11824 }
11825 case NEON::BI__builtin_neon_vceqh_f16:
11826 case NEON::BI__builtin_neon_vcleh_f16:
11827 case NEON::BI__builtin_neon_vclth_f16:
11828 case NEON::BI__builtin_neon_vcgeh_f16:
11829 case NEON::BI__builtin_neon_vcgth_f16: {
11830 llvm::CmpInst::Predicate P;
11831 switch (BuiltinID) {
11832 default: llvm_unreachable("missing builtin ID in switch!");
11833 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11834 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11835 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11836 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11837 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11838 }
11839 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11840 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11841 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11842 if (P == llvm::FCmpInst::FCMP_OEQ)
11843 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11844 else
11845 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11846 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11847 }
11848 case NEON::BI__builtin_neon_vceqd_s64:
11849 case NEON::BI__builtin_neon_vceqd_u64:
11850 case NEON::BI__builtin_neon_vcgtd_s64:
11851 case NEON::BI__builtin_neon_vcgtd_u64:
11852 case NEON::BI__builtin_neon_vcltd_s64:
11853 case NEON::BI__builtin_neon_vcltd_u64:
11854 case NEON::BI__builtin_neon_vcged_u64:
11855 case NEON::BI__builtin_neon_vcged_s64:
11856 case NEON::BI__builtin_neon_vcled_u64:
11857 case NEON::BI__builtin_neon_vcled_s64: {
11858 llvm::CmpInst::Predicate P;
11859 switch (BuiltinID) {
11860 default: llvm_unreachable("missing builtin ID in switch!");
11861 case NEON::BI__builtin_neon_vceqd_s64:
11862 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
11863 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
11864 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
11865 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
11866 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
11867 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
11868 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
11869 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
11870 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
11871 }
11872 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11873 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11874 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11875 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
11876 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
11877 }
11878 case NEON::BI__builtin_neon_vtstd_s64:
11879 case NEON::BI__builtin_neon_vtstd_u64: {
11880 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11881 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11882 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
11883 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
11884 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
11885 llvm::Constant::getNullValue(Int64Ty));
11886 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
11887 }
11888 case NEON::BI__builtin_neon_vset_lane_i8:
11889 case NEON::BI__builtin_neon_vset_lane_i16:
11890 case NEON::BI__builtin_neon_vset_lane_i32:
11891 case NEON::BI__builtin_neon_vset_lane_i64:
11892 case NEON::BI__builtin_neon_vset_lane_bf16:
11893 case NEON::BI__builtin_neon_vset_lane_f32:
11894 case NEON::BI__builtin_neon_vsetq_lane_i8:
11895 case NEON::BI__builtin_neon_vsetq_lane_i16:
11896 case NEON::BI__builtin_neon_vsetq_lane_i32:
11897 case NEON::BI__builtin_neon_vsetq_lane_i64:
11898 case NEON::BI__builtin_neon_vsetq_lane_bf16:
11899 case NEON::BI__builtin_neon_vsetq_lane_f32:
11900 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11901 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11902 case NEON::BI__builtin_neon_vset_lane_f64:
11903 // The vector type needs a cast for the v1f64 variant.
11904 Ops[1] =
11905 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
11906 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11907 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11908 case NEON::BI__builtin_neon_vsetq_lane_f64:
11909 // The vector type needs a cast for the v2f64 variant.
11910 Ops[1] =
11911 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
11912 Ops.push_back(EmitScalarExpr(E->getArg(2)));
11913 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
11914
11915 case NEON::BI__builtin_neon_vget_lane_i8:
11916 case NEON::BI__builtin_neon_vdupb_lane_i8:
11917 Ops[0] =
11918 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
11919 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11920 "vget_lane");
11921 case NEON::BI__builtin_neon_vgetq_lane_i8:
11922 case NEON::BI__builtin_neon_vdupb_laneq_i8:
11923 Ops[0] =
11924 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
11925 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11926 "vgetq_lane");
11927 case NEON::BI__builtin_neon_vget_lane_i16:
11928 case NEON::BI__builtin_neon_vduph_lane_i16:
11929 Ops[0] =
11930 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
11931 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11932 "vget_lane");
11933 case NEON::BI__builtin_neon_vgetq_lane_i16:
11934 case NEON::BI__builtin_neon_vduph_laneq_i16:
11935 Ops[0] =
11936 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
11937 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11938 "vgetq_lane");
11939 case NEON::BI__builtin_neon_vget_lane_i32:
11940 case NEON::BI__builtin_neon_vdups_lane_i32:
11941 Ops[0] =
11942 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
11943 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11944 "vget_lane");
11945 case NEON::BI__builtin_neon_vdups_lane_f32:
11946 Ops[0] =
11947 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11948 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11949 "vdups_lane");
11950 case NEON::BI__builtin_neon_vgetq_lane_i32:
11951 case NEON::BI__builtin_neon_vdups_laneq_i32:
11952 Ops[0] =
11953 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
11954 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11955 "vgetq_lane");
11956 case NEON::BI__builtin_neon_vget_lane_i64:
11957 case NEON::BI__builtin_neon_vdupd_lane_i64:
11958 Ops[0] =
11959 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
11960 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11961 "vget_lane");
11962 case NEON::BI__builtin_neon_vdupd_lane_f64:
11963 Ops[0] =
11964 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11965 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11966 "vdupd_lane");
11967 case NEON::BI__builtin_neon_vgetq_lane_i64:
11968 case NEON::BI__builtin_neon_vdupd_laneq_i64:
11969 Ops[0] =
11970 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
11971 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11972 "vgetq_lane");
11973 case NEON::BI__builtin_neon_vget_lane_f32:
11974 Ops[0] =
11975 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
11976 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11977 "vget_lane");
11978 case NEON::BI__builtin_neon_vget_lane_f64:
11979 Ops[0] =
11980 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
11981 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11982 "vget_lane");
11983 case NEON::BI__builtin_neon_vgetq_lane_f32:
11984 case NEON::BI__builtin_neon_vdups_laneq_f32:
11985 Ops[0] =
11986 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
11987 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11988 "vgetq_lane");
11989 case NEON::BI__builtin_neon_vgetq_lane_f64:
11990 case NEON::BI__builtin_neon_vdupd_laneq_f64:
11991 Ops[0] =
11992 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
11993 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
11994 "vgetq_lane");
11995 case NEON::BI__builtin_neon_vaddh_f16:
11996 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11997 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
11998 case NEON::BI__builtin_neon_vsubh_f16:
11999 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12000 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12001 case NEON::BI__builtin_neon_vmulh_f16:
12002 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12003 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12004 case NEON::BI__builtin_neon_vdivh_f16:
12005 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12006 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12007 case NEON::BI__builtin_neon_vfmah_f16:
12008 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12010 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12011 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12012 case NEON::BI__builtin_neon_vfmsh_f16: {
12013 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12014
12015 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12017 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12018 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12019 }
12020 case NEON::BI__builtin_neon_vaddd_s64:
12021 case NEON::BI__builtin_neon_vaddd_u64:
12022 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12023 case NEON::BI__builtin_neon_vsubd_s64:
12024 case NEON::BI__builtin_neon_vsubd_u64:
12025 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12026 case NEON::BI__builtin_neon_vqdmlalh_s16:
12027 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12028 SmallVector<Value *, 2> ProductOps;
12029 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12030 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12031 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12032 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12033 ProductOps, "vqdmlXl");
12034 Constant *CI = ConstantInt::get(SizeTy, 0);
12035 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12036
12037 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12038 ? Intrinsic::aarch64_neon_sqadd
12039 : Intrinsic::aarch64_neon_sqsub;
12040 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12041 }
12042 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12043 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12044 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12045 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12046 Ops, "vqshlu_n");
12047 }
12048 case NEON::BI__builtin_neon_vqshld_n_u64:
12049 case NEON::BI__builtin_neon_vqshld_n_s64: {
12050 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12051 ? Intrinsic::aarch64_neon_uqshl
12052 : Intrinsic::aarch64_neon_sqshl;
12053 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12054 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12055 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12056 }
12057 case NEON::BI__builtin_neon_vrshrd_n_u64:
12058 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12059 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12060 ? Intrinsic::aarch64_neon_urshl
12061 : Intrinsic::aarch64_neon_srshl;
12062 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12063 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12064 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12065 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12066 }
12067 case NEON::BI__builtin_neon_vrsrad_n_u64:
12068 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12069 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12070 ? Intrinsic::aarch64_neon_urshl
12071 : Intrinsic::aarch64_neon_srshl;
12072 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12073 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12074 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12075 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12076 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12077 }
12078 case NEON::BI__builtin_neon_vshld_n_s64:
12079 case NEON::BI__builtin_neon_vshld_n_u64: {
12080 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12081 return Builder.CreateShl(
12082 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12083 }
12084 case NEON::BI__builtin_neon_vshrd_n_s64: {
12085 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12086 return Builder.CreateAShr(
12087 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12088 Amt->getZExtValue())),
12089 "shrd_n");
12090 }
12091 case NEON::BI__builtin_neon_vshrd_n_u64: {
12092 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12093 uint64_t ShiftAmt = Amt->getZExtValue();
12094 // Right-shifting an unsigned value by its size yields 0.
12095 if (ShiftAmt == 64)
12096 return ConstantInt::get(Int64Ty, 0);
12097 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12098 "shrd_n");
12099 }
12100 case NEON::BI__builtin_neon_vsrad_n_s64: {
12101 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12102 Ops[1] = Builder.CreateAShr(
12103 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12104 Amt->getZExtValue())),
12105 "shrd_n");
12106 return Builder.CreateAdd(Ops[0], Ops[1]);
12107 }
12108 case NEON::BI__builtin_neon_vsrad_n_u64: {
12109 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12110 uint64_t ShiftAmt = Amt->getZExtValue();
12111 // Right-shifting an unsigned value by its size yields 0.
12112 // As Op + 0 = Op, return Ops[0] directly.
12113 if (ShiftAmt == 64)
12114 return Ops[0];
12115 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12116 "shrd_n");
12117 return Builder.CreateAdd(Ops[0], Ops[1]);
12118 }
12119 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12120 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12121 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12122 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12123 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12124 "lane");
12125 SmallVector<Value *, 2> ProductOps;
12126 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12127 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12128 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12129 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12130 ProductOps, "vqdmlXl");
12131 Constant *CI = ConstantInt::get(SizeTy, 0);
12132 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12133 Ops.pop_back();
12134
12135 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12136 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12137 ? Intrinsic::aarch64_neon_sqadd
12138 : Intrinsic::aarch64_neon_sqsub;
12139 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12140 }
12141 case NEON::BI__builtin_neon_vqdmlals_s32:
12142 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12143 SmallVector<Value *, 2> ProductOps;
12144 ProductOps.push_back(Ops[1]);
12145 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12146 Ops[1] =
12147 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12148 ProductOps, "vqdmlXl");
12149
12150 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12151 ? Intrinsic::aarch64_neon_sqadd
12152 : Intrinsic::aarch64_neon_sqsub;
12153 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12154 }
12155 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12156 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12157 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12158 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12159 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12160 "lane");
12161 SmallVector<Value *, 2> ProductOps;
12162 ProductOps.push_back(Ops[1]);
12163 ProductOps.push_back(Ops[2]);
12164 Ops[1] =
12165 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12166 ProductOps, "vqdmlXl");
12167 Ops.pop_back();
12168
12169 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12170 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12171 ? Intrinsic::aarch64_neon_sqadd
12172 : Intrinsic::aarch64_neon_sqsub;
12173 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12174 }
12175 case NEON::BI__builtin_neon_vget_lane_bf16:
12176 case NEON::BI__builtin_neon_vduph_lane_bf16:
12177 case NEON::BI__builtin_neon_vduph_lane_f16: {
12178 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12179 "vget_lane");
12180 }
12181 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12182 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12183 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12184 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12185 "vgetq_lane");
12186 }
12187
12188 case clang::AArch64::BI_InterlockedAdd:
12189 case clang::AArch64::BI_InterlockedAdd64: {
12190 Address DestAddr = CheckAtomicAlignment(*this, E);
12191 Value *Val = EmitScalarExpr(E->getArg(1));
12192 AtomicRMWInst *RMWI =
12193 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12194 llvm::AtomicOrdering::SequentiallyConsistent);
12195 return Builder.CreateAdd(RMWI, Val);
12196 }
12197 }
12198
12199 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12200 llvm::Type *Ty = VTy;
12201 if (!Ty)
12202 return nullptr;
12203
12204 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12205 // defer to common code if it's been added to our special map.
12208
12209 if (Builtin)
12211 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12212 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12213 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12214
12215 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12216 return V;
12217
12218 unsigned Int;
12219 switch (BuiltinID) {
12220 default: return nullptr;
12221 case NEON::BI__builtin_neon_vbsl_v:
12222 case NEON::BI__builtin_neon_vbslq_v: {
12223 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12224 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12225 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12226 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12227
12228 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12229 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12230 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12231 return Builder.CreateBitCast(Ops[0], Ty);
12232 }
12233 case NEON::BI__builtin_neon_vfma_lane_v:
12234 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12235 // The ARM builtins (and instructions) have the addend as the first
12236 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12237 Value *Addend = Ops[0];
12238 Value *Multiplicand = Ops[1];
12239 Value *LaneSource = Ops[2];
12240 Ops[0] = Multiplicand;
12241 Ops[1] = LaneSource;
12242 Ops[2] = Addend;
12243
12244 // Now adjust things to handle the lane access.
12245 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12246 ? llvm::FixedVectorType::get(VTy->getElementType(),
12247 VTy->getNumElements() / 2)
12248 : VTy;
12249 llvm::Constant *cst = cast<Constant>(Ops[3]);
12250 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12251 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12252 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12253
12254 Ops.pop_back();
12255 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12256 : Intrinsic::fma;
12257 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12258 }
12259 case NEON::BI__builtin_neon_vfma_laneq_v: {
12260 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12261 // v1f64 fma should be mapped to Neon scalar f64 fma
12262 if (VTy && VTy->getElementType() == DoubleTy) {
12263 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12264 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12265 llvm::FixedVectorType *VTy =
12267 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12268 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12269 Value *Result;
12271 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12272 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12273 return Builder.CreateBitCast(Result, Ty);
12274 }
12275 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12276 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12277
12278 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12279 VTy->getNumElements() * 2);
12280 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12281 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12282 cast<ConstantInt>(Ops[3]));
12283 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12284
12286 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12287 {Ops[2], Ops[1], Ops[0]});
12288 }
12289 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12290 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12291 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12292
12293 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12294 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12296 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12297 {Ops[2], Ops[1], Ops[0]});
12298 }
12299 case NEON::BI__builtin_neon_vfmah_lane_f16:
12300 case NEON::BI__builtin_neon_vfmas_lane_f32:
12301 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12302 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12303 case NEON::BI__builtin_neon_vfmad_lane_f64:
12304 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12305 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12306 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12307 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12309 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12310 {Ops[1], Ops[2], Ops[0]});
12311 }
12312 case NEON::BI__builtin_neon_vmull_v:
12313 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12314 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12315 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12316 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12317 case NEON::BI__builtin_neon_vmax_v:
12318 case NEON::BI__builtin_neon_vmaxq_v:
12319 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12320 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12321 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12322 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12323 case NEON::BI__builtin_neon_vmaxh_f16: {
12324 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12325 Int = Intrinsic::aarch64_neon_fmax;
12326 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12327 }
12328 case NEON::BI__builtin_neon_vmin_v:
12329 case NEON::BI__builtin_neon_vminq_v:
12330 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12331 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12332 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12333 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12334 case NEON::BI__builtin_neon_vminh_f16: {
12335 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12336 Int = Intrinsic::aarch64_neon_fmin;
12337 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12338 }
12339 case NEON::BI__builtin_neon_vabd_v:
12340 case NEON::BI__builtin_neon_vabdq_v:
12341 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12342 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12343 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12344 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12345 case NEON::BI__builtin_neon_vpadal_v:
12346 case NEON::BI__builtin_neon_vpadalq_v: {
12347 unsigned ArgElts = VTy->getNumElements();
12348 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12349 unsigned BitWidth = EltTy->getBitWidth();
12350 auto *ArgTy = llvm::FixedVectorType::get(
12351 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12352 llvm::Type* Tys[2] = { VTy, ArgTy };
12353 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12355 TmpOps.push_back(Ops[1]);
12356 Function *F = CGM.getIntrinsic(Int, Tys);
12357 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12358 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12359 return Builder.CreateAdd(tmp, addend);
12360 }
12361 case NEON::BI__builtin_neon_vpmin_v:
12362 case NEON::BI__builtin_neon_vpminq_v:
12363 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12364 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12365 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12366 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12367 case NEON::BI__builtin_neon_vpmax_v:
12368 case NEON::BI__builtin_neon_vpmaxq_v:
12369 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12370 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12371 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12372 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12373 case NEON::BI__builtin_neon_vminnm_v:
12374 case NEON::BI__builtin_neon_vminnmq_v:
12375 Int = Intrinsic::aarch64_neon_fminnm;
12376 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12377 case NEON::BI__builtin_neon_vminnmh_f16:
12378 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12379 Int = Intrinsic::aarch64_neon_fminnm;
12380 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12381 case NEON::BI__builtin_neon_vmaxnm_v:
12382 case NEON::BI__builtin_neon_vmaxnmq_v:
12383 Int = Intrinsic::aarch64_neon_fmaxnm;
12384 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12385 case NEON::BI__builtin_neon_vmaxnmh_f16:
12386 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12387 Int = Intrinsic::aarch64_neon_fmaxnm;
12388 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12389 case NEON::BI__builtin_neon_vrecpss_f32: {
12390 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12391 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12392 Ops, "vrecps");
12393 }
12394 case NEON::BI__builtin_neon_vrecpsd_f64:
12395 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12396 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12397 Ops, "vrecps");
12398 case NEON::BI__builtin_neon_vrecpsh_f16:
12399 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12400 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12401 Ops, "vrecps");
12402 case NEON::BI__builtin_neon_vqshrun_n_v:
12403 Int = Intrinsic::aarch64_neon_sqshrun;
12404 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12405 case NEON::BI__builtin_neon_vqrshrun_n_v:
12406 Int = Intrinsic::aarch64_neon_sqrshrun;
12407 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12408 case NEON::BI__builtin_neon_vqshrn_n_v:
12409 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12410 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12411 case NEON::BI__builtin_neon_vrshrn_n_v:
12412 Int = Intrinsic::aarch64_neon_rshrn;
12413 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12414 case NEON::BI__builtin_neon_vqrshrn_n_v:
12415 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12416 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12417 case NEON::BI__builtin_neon_vrndah_f16: {
12418 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12419 Int = Builder.getIsFPConstrained()
12420 ? Intrinsic::experimental_constrained_round
12421 : Intrinsic::round;
12422 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12423 }
12424 case NEON::BI__builtin_neon_vrnda_v:
12425 case NEON::BI__builtin_neon_vrndaq_v: {
12426 Int = Builder.getIsFPConstrained()
12427 ? Intrinsic::experimental_constrained_round
12428 : Intrinsic::round;
12429 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12430 }
12431 case NEON::BI__builtin_neon_vrndih_f16: {
12432 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12433 Int = Builder.getIsFPConstrained()
12434 ? Intrinsic::experimental_constrained_nearbyint
12435 : Intrinsic::nearbyint;
12436 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12437 }
12438 case NEON::BI__builtin_neon_vrndmh_f16: {
12439 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12440 Int = Builder.getIsFPConstrained()
12441 ? Intrinsic::experimental_constrained_floor
12442 : Intrinsic::floor;
12443 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12444 }
12445 case NEON::BI__builtin_neon_vrndm_v:
12446 case NEON::BI__builtin_neon_vrndmq_v: {
12447 Int = Builder.getIsFPConstrained()
12448 ? Intrinsic::experimental_constrained_floor
12449 : Intrinsic::floor;
12450 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12451 }
12452 case NEON::BI__builtin_neon_vrndnh_f16: {
12453 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12454 Int = Builder.getIsFPConstrained()
12455 ? Intrinsic::experimental_constrained_roundeven
12456 : Intrinsic::roundeven;
12457 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12458 }
12459 case NEON::BI__builtin_neon_vrndn_v:
12460 case NEON::BI__builtin_neon_vrndnq_v: {
12461 Int = Builder.getIsFPConstrained()
12462 ? Intrinsic::experimental_constrained_roundeven
12463 : Intrinsic::roundeven;
12464 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12465 }
12466 case NEON::BI__builtin_neon_vrndns_f32: {
12467 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12468 Int = Builder.getIsFPConstrained()
12469 ? Intrinsic::experimental_constrained_roundeven
12470 : Intrinsic::roundeven;
12471 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12472 }
12473 case NEON::BI__builtin_neon_vrndph_f16: {
12474 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12475 Int = Builder.getIsFPConstrained()
12476 ? Intrinsic::experimental_constrained_ceil
12477 : Intrinsic::ceil;
12478 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12479 }
12480 case NEON::BI__builtin_neon_vrndp_v:
12481 case NEON::BI__builtin_neon_vrndpq_v: {
12482 Int = Builder.getIsFPConstrained()
12483 ? Intrinsic::experimental_constrained_ceil
12484 : Intrinsic::ceil;
12485 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12486 }
12487 case NEON::BI__builtin_neon_vrndxh_f16: {
12488 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12489 Int = Builder.getIsFPConstrained()
12490 ? Intrinsic::experimental_constrained_rint
12491 : Intrinsic::rint;
12492 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12493 }
12494 case NEON::BI__builtin_neon_vrndx_v:
12495 case NEON::BI__builtin_neon_vrndxq_v: {
12496 Int = Builder.getIsFPConstrained()
12497 ? Intrinsic::experimental_constrained_rint
12498 : Intrinsic::rint;
12499 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12500 }
12501 case NEON::BI__builtin_neon_vrndh_f16: {
12502 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12503 Int = Builder.getIsFPConstrained()
12504 ? Intrinsic::experimental_constrained_trunc
12505 : Intrinsic::trunc;
12506 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12507 }
12508 case NEON::BI__builtin_neon_vrnd32x_f32:
12509 case NEON::BI__builtin_neon_vrnd32xq_f32:
12510 case NEON::BI__builtin_neon_vrnd32x_f64:
12511 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12512 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12513 Int = Intrinsic::aarch64_neon_frint32x;
12514 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12515 }
12516 case NEON::BI__builtin_neon_vrnd32z_f32:
12517 case NEON::BI__builtin_neon_vrnd32zq_f32:
12518 case NEON::BI__builtin_neon_vrnd32z_f64:
12519 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12520 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12521 Int = Intrinsic::aarch64_neon_frint32z;
12522 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12523 }
12524 case NEON::BI__builtin_neon_vrnd64x_f32:
12525 case NEON::BI__builtin_neon_vrnd64xq_f32:
12526 case NEON::BI__builtin_neon_vrnd64x_f64:
12527 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12528 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12529 Int = Intrinsic::aarch64_neon_frint64x;
12530 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12531 }
12532 case NEON::BI__builtin_neon_vrnd64z_f32:
12533 case NEON::BI__builtin_neon_vrnd64zq_f32:
12534 case NEON::BI__builtin_neon_vrnd64z_f64:
12535 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12536 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12537 Int = Intrinsic::aarch64_neon_frint64z;
12538 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12539 }
12540 case NEON::BI__builtin_neon_vrnd_v:
12541 case NEON::BI__builtin_neon_vrndq_v: {
12542 Int = Builder.getIsFPConstrained()
12543 ? Intrinsic::experimental_constrained_trunc
12544 : Intrinsic::trunc;
12545 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12546 }
12547 case NEON::BI__builtin_neon_vcvt_f64_v:
12548 case NEON::BI__builtin_neon_vcvtq_f64_v:
12549 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12550 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12551 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12552 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12553 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12554 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12555 "unexpected vcvt_f64_f32 builtin");
12556 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12557 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12558
12559 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12560 }
12561 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12562 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12563 "unexpected vcvt_f32_f64 builtin");
12564 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12565 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12566
12567 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12568 }
12569 case NEON::BI__builtin_neon_vcvt_s32_v:
12570 case NEON::BI__builtin_neon_vcvt_u32_v:
12571 case NEON::BI__builtin_neon_vcvt_s64_v:
12572 case NEON::BI__builtin_neon_vcvt_u64_v:
12573 case NEON::BI__builtin_neon_vcvt_s16_f16:
12574 case NEON::BI__builtin_neon_vcvt_u16_f16:
12575 case NEON::BI__builtin_neon_vcvtq_s32_v:
12576 case NEON::BI__builtin_neon_vcvtq_u32_v:
12577 case NEON::BI__builtin_neon_vcvtq_s64_v:
12578 case NEON::BI__builtin_neon_vcvtq_u64_v:
12579 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12580 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12581 Int =
12582 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12583 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12584 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12585 }
12586 case NEON::BI__builtin_neon_vcvta_s16_f16:
12587 case NEON::BI__builtin_neon_vcvta_u16_f16:
12588 case NEON::BI__builtin_neon_vcvta_s32_v:
12589 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12590 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12591 case NEON::BI__builtin_neon_vcvta_u32_v:
12592 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12593 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12594 case NEON::BI__builtin_neon_vcvta_s64_v:
12595 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12596 case NEON::BI__builtin_neon_vcvta_u64_v:
12597 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12598 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12599 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12600 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12601 }
12602 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12603 case NEON::BI__builtin_neon_vcvtm_s32_v:
12604 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12605 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12606 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12607 case NEON::BI__builtin_neon_vcvtm_u32_v:
12608 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12609 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12610 case NEON::BI__builtin_neon_vcvtm_s64_v:
12611 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12612 case NEON::BI__builtin_neon_vcvtm_u64_v:
12613 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12614 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12615 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12616 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12617 }
12618 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12619 case NEON::BI__builtin_neon_vcvtn_s32_v:
12620 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12621 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12622 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12623 case NEON::BI__builtin_neon_vcvtn_u32_v:
12624 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12625 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12626 case NEON::BI__builtin_neon_vcvtn_s64_v:
12627 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12628 case NEON::BI__builtin_neon_vcvtn_u64_v:
12629 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12630 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12631 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12632 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12633 }
12634 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12635 case NEON::BI__builtin_neon_vcvtp_s32_v:
12636 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12637 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12638 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12639 case NEON::BI__builtin_neon_vcvtp_u32_v:
12640 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12641 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12642 case NEON::BI__builtin_neon_vcvtp_s64_v:
12643 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12644 case NEON::BI__builtin_neon_vcvtp_u64_v:
12645 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12646 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12647 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12648 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12649 }
12650 case NEON::BI__builtin_neon_vmulx_v:
12651 case NEON::BI__builtin_neon_vmulxq_v: {
12652 Int = Intrinsic::aarch64_neon_fmulx;
12653 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12654 }
12655 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12656 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12657 // vmulx_lane should be mapped to Neon scalar mulx after
12658 // extracting the scalar element
12659 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12660 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12661 Ops.pop_back();
12662 Int = Intrinsic::aarch64_neon_fmulx;
12663 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12664 }
12665 case NEON::BI__builtin_neon_vmul_lane_v:
12666 case NEON::BI__builtin_neon_vmul_laneq_v: {
12667 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12668 bool Quad = false;
12669 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12670 Quad = true;
12671 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12672 llvm::FixedVectorType *VTy =
12674 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12675 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12676 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12677 return Builder.CreateBitCast(Result, Ty);
12678 }
12679 case NEON::BI__builtin_neon_vnegd_s64:
12680 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12681 case NEON::BI__builtin_neon_vnegh_f16:
12682 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12683 case NEON::BI__builtin_neon_vpmaxnm_v:
12684 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12685 Int = Intrinsic::aarch64_neon_fmaxnmp;
12686 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12687 }
12688 case NEON::BI__builtin_neon_vpminnm_v:
12689 case NEON::BI__builtin_neon_vpminnmq_v: {
12690 Int = Intrinsic::aarch64_neon_fminnmp;
12691 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12692 }
12693 case NEON::BI__builtin_neon_vsqrth_f16: {
12694 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12695 Int = Builder.getIsFPConstrained()
12696 ? Intrinsic::experimental_constrained_sqrt
12697 : Intrinsic::sqrt;
12698 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12699 }
12700 case NEON::BI__builtin_neon_vsqrt_v:
12701 case NEON::BI__builtin_neon_vsqrtq_v: {
12702 Int = Builder.getIsFPConstrained()
12703 ? Intrinsic::experimental_constrained_sqrt
12704 : Intrinsic::sqrt;
12705 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12706 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12707 }
12708 case NEON::BI__builtin_neon_vrbit_v:
12709 case NEON::BI__builtin_neon_vrbitq_v: {
12710 Int = Intrinsic::bitreverse;
12711 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12712 }
12713 case NEON::BI__builtin_neon_vaddv_u8:
12714 // FIXME: These are handled by the AArch64 scalar code.
12715 usgn = true;
12716 [[fallthrough]];
12717 case NEON::BI__builtin_neon_vaddv_s8: {
12718 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12719 Ty = Int32Ty;
12720 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12721 llvm::Type *Tys[2] = { Ty, VTy };
12722 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12723 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12724 return Builder.CreateTrunc(Ops[0], Int8Ty);
12725 }
12726 case NEON::BI__builtin_neon_vaddv_u16:
12727 usgn = true;
12728 [[fallthrough]];
12729 case NEON::BI__builtin_neon_vaddv_s16: {
12730 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12731 Ty = Int32Ty;
12732 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12733 llvm::Type *Tys[2] = { Ty, VTy };
12734 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12735 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12736 return Builder.CreateTrunc(Ops[0], Int16Ty);
12737 }
12738 case NEON::BI__builtin_neon_vaddvq_u8:
12739 usgn = true;
12740 [[fallthrough]];
12741 case NEON::BI__builtin_neon_vaddvq_s8: {
12742 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12743 Ty = Int32Ty;
12744 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12745 llvm::Type *Tys[2] = { Ty, VTy };
12746 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12747 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12748 return Builder.CreateTrunc(Ops[0], Int8Ty);
12749 }
12750 case NEON::BI__builtin_neon_vaddvq_u16:
12751 usgn = true;
12752 [[fallthrough]];
12753 case NEON::BI__builtin_neon_vaddvq_s16: {
12754 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12755 Ty = Int32Ty;
12756 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12757 llvm::Type *Tys[2] = { Ty, VTy };
12758 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12759 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12760 return Builder.CreateTrunc(Ops[0], Int16Ty);
12761 }
12762 case NEON::BI__builtin_neon_vmaxv_u8: {
12763 Int = Intrinsic::aarch64_neon_umaxv;
12764 Ty = Int32Ty;
12765 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12766 llvm::Type *Tys[2] = { Ty, VTy };
12767 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12768 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12769 return Builder.CreateTrunc(Ops[0], Int8Ty);
12770 }
12771 case NEON::BI__builtin_neon_vmaxv_u16: {
12772 Int = Intrinsic::aarch64_neon_umaxv;
12773 Ty = Int32Ty;
12774 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12775 llvm::Type *Tys[2] = { Ty, VTy };
12776 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12777 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12778 return Builder.CreateTrunc(Ops[0], Int16Ty);
12779 }
12780 case NEON::BI__builtin_neon_vmaxvq_u8: {
12781 Int = Intrinsic::aarch64_neon_umaxv;
12782 Ty = Int32Ty;
12783 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12784 llvm::Type *Tys[2] = { Ty, VTy };
12785 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12786 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12787 return Builder.CreateTrunc(Ops[0], Int8Ty);
12788 }
12789 case NEON::BI__builtin_neon_vmaxvq_u16: {
12790 Int = Intrinsic::aarch64_neon_umaxv;
12791 Ty = Int32Ty;
12792 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12793 llvm::Type *Tys[2] = { Ty, VTy };
12794 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12795 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12796 return Builder.CreateTrunc(Ops[0], Int16Ty);
12797 }
12798 case NEON::BI__builtin_neon_vmaxv_s8: {
12799 Int = Intrinsic::aarch64_neon_smaxv;
12800 Ty = Int32Ty;
12801 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12802 llvm::Type *Tys[2] = { Ty, VTy };
12803 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12804 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12805 return Builder.CreateTrunc(Ops[0], Int8Ty);
12806 }
12807 case NEON::BI__builtin_neon_vmaxv_s16: {
12808 Int = Intrinsic::aarch64_neon_smaxv;
12809 Ty = Int32Ty;
12810 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12811 llvm::Type *Tys[2] = { Ty, VTy };
12812 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12813 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12814 return Builder.CreateTrunc(Ops[0], Int16Ty);
12815 }
12816 case NEON::BI__builtin_neon_vmaxvq_s8: {
12817 Int = Intrinsic::aarch64_neon_smaxv;
12818 Ty = Int32Ty;
12819 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12820 llvm::Type *Tys[2] = { Ty, VTy };
12821 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12822 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12823 return Builder.CreateTrunc(Ops[0], Int8Ty);
12824 }
12825 case NEON::BI__builtin_neon_vmaxvq_s16: {
12826 Int = Intrinsic::aarch64_neon_smaxv;
12827 Ty = Int32Ty;
12828 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12829 llvm::Type *Tys[2] = { Ty, VTy };
12830 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12831 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12832 return Builder.CreateTrunc(Ops[0], Int16Ty);
12833 }
12834 case NEON::BI__builtin_neon_vmaxv_f16: {
12835 Int = Intrinsic::aarch64_neon_fmaxv;
12836 Ty = HalfTy;
12837 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12838 llvm::Type *Tys[2] = { Ty, VTy };
12839 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12840 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12841 return Builder.CreateTrunc(Ops[0], HalfTy);
12842 }
12843 case NEON::BI__builtin_neon_vmaxvq_f16: {
12844 Int = Intrinsic::aarch64_neon_fmaxv;
12845 Ty = HalfTy;
12846 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12847 llvm::Type *Tys[2] = { Ty, VTy };
12848 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12849 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12850 return Builder.CreateTrunc(Ops[0], HalfTy);
12851 }
12852 case NEON::BI__builtin_neon_vminv_u8: {
12853 Int = Intrinsic::aarch64_neon_uminv;
12854 Ty = Int32Ty;
12855 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12856 llvm::Type *Tys[2] = { Ty, VTy };
12857 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12858 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12859 return Builder.CreateTrunc(Ops[0], Int8Ty);
12860 }
12861 case NEON::BI__builtin_neon_vminv_u16: {
12862 Int = Intrinsic::aarch64_neon_uminv;
12863 Ty = Int32Ty;
12864 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12865 llvm::Type *Tys[2] = { Ty, VTy };
12866 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12867 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12868 return Builder.CreateTrunc(Ops[0], Int16Ty);
12869 }
12870 case NEON::BI__builtin_neon_vminvq_u8: {
12871 Int = Intrinsic::aarch64_neon_uminv;
12872 Ty = Int32Ty;
12873 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12874 llvm::Type *Tys[2] = { Ty, VTy };
12875 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12876 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12877 return Builder.CreateTrunc(Ops[0], Int8Ty);
12878 }
12879 case NEON::BI__builtin_neon_vminvq_u16: {
12880 Int = Intrinsic::aarch64_neon_uminv;
12881 Ty = Int32Ty;
12882 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12883 llvm::Type *Tys[2] = { Ty, VTy };
12884 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12885 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12886 return Builder.CreateTrunc(Ops[0], Int16Ty);
12887 }
12888 case NEON::BI__builtin_neon_vminv_s8: {
12889 Int = Intrinsic::aarch64_neon_sminv;
12890 Ty = Int32Ty;
12891 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12892 llvm::Type *Tys[2] = { Ty, VTy };
12893 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12894 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12895 return Builder.CreateTrunc(Ops[0], Int8Ty);
12896 }
12897 case NEON::BI__builtin_neon_vminv_s16: {
12898 Int = Intrinsic::aarch64_neon_sminv;
12899 Ty = Int32Ty;
12900 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12901 llvm::Type *Tys[2] = { Ty, VTy };
12902 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12903 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12904 return Builder.CreateTrunc(Ops[0], Int16Ty);
12905 }
12906 case NEON::BI__builtin_neon_vminvq_s8: {
12907 Int = Intrinsic::aarch64_neon_sminv;
12908 Ty = Int32Ty;
12909 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12910 llvm::Type *Tys[2] = { Ty, VTy };
12911 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12912 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12913 return Builder.CreateTrunc(Ops[0], Int8Ty);
12914 }
12915 case NEON::BI__builtin_neon_vminvq_s16: {
12916 Int = Intrinsic::aarch64_neon_sminv;
12917 Ty = Int32Ty;
12918 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12919 llvm::Type *Tys[2] = { Ty, VTy };
12920 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12921 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12922 return Builder.CreateTrunc(Ops[0], Int16Ty);
12923 }
12924 case NEON::BI__builtin_neon_vminv_f16: {
12925 Int = Intrinsic::aarch64_neon_fminv;
12926 Ty = HalfTy;
12927 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12928 llvm::Type *Tys[2] = { Ty, VTy };
12929 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12930 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12931 return Builder.CreateTrunc(Ops[0], HalfTy);
12932 }
12933 case NEON::BI__builtin_neon_vminvq_f16: {
12934 Int = Intrinsic::aarch64_neon_fminv;
12935 Ty = HalfTy;
12936 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12937 llvm::Type *Tys[2] = { Ty, VTy };
12938 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12939 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12940 return Builder.CreateTrunc(Ops[0], HalfTy);
12941 }
12942 case NEON::BI__builtin_neon_vmaxnmv_f16: {
12943 Int = Intrinsic::aarch64_neon_fmaxnmv;
12944 Ty = HalfTy;
12945 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12946 llvm::Type *Tys[2] = { Ty, VTy };
12947 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12948 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12949 return Builder.CreateTrunc(Ops[0], HalfTy);
12950 }
12951 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
12952 Int = Intrinsic::aarch64_neon_fmaxnmv;
12953 Ty = HalfTy;
12954 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12955 llvm::Type *Tys[2] = { Ty, VTy };
12956 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12957 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
12958 return Builder.CreateTrunc(Ops[0], HalfTy);
12959 }
12960 case NEON::BI__builtin_neon_vminnmv_f16: {
12961 Int = Intrinsic::aarch64_neon_fminnmv;
12962 Ty = HalfTy;
12963 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12964 llvm::Type *Tys[2] = { Ty, VTy };
12965 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12966 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12967 return Builder.CreateTrunc(Ops[0], HalfTy);
12968 }
12969 case NEON::BI__builtin_neon_vminnmvq_f16: {
12970 Int = Intrinsic::aarch64_neon_fminnmv;
12971 Ty = HalfTy;
12972 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12973 llvm::Type *Tys[2] = { Ty, VTy };
12974 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12975 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
12976 return Builder.CreateTrunc(Ops[0], HalfTy);
12977 }
12978 case NEON::BI__builtin_neon_vmul_n_f64: {
12979 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12980 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
12981 return Builder.CreateFMul(Ops[0], RHS);
12982 }
12983 case NEON::BI__builtin_neon_vaddlv_u8: {
12984 Int = Intrinsic::aarch64_neon_uaddlv;
12985 Ty = Int32Ty;
12986 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12987 llvm::Type *Tys[2] = { Ty, VTy };
12988 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12989 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12990 return Builder.CreateTrunc(Ops[0], Int16Ty);
12991 }
12992 case NEON::BI__builtin_neon_vaddlv_u16: {
12993 Int = Intrinsic::aarch64_neon_uaddlv;
12994 Ty = Int32Ty;
12995 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12996 llvm::Type *Tys[2] = { Ty, VTy };
12997 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12998 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
12999 }
13000 case NEON::BI__builtin_neon_vaddlvq_u8: {
13001 Int = Intrinsic::aarch64_neon_uaddlv;
13002 Ty = Int32Ty;
13003 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13004 llvm::Type *Tys[2] = { Ty, VTy };
13005 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13006 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13007 return Builder.CreateTrunc(Ops[0], Int16Ty);
13008 }
13009 case NEON::BI__builtin_neon_vaddlvq_u16: {
13010 Int = Intrinsic::aarch64_neon_uaddlv;
13011 Ty = Int32Ty;
13012 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13013 llvm::Type *Tys[2] = { Ty, VTy };
13014 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13015 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13016 }
13017 case NEON::BI__builtin_neon_vaddlv_s8: {
13018 Int = Intrinsic::aarch64_neon_saddlv;
13019 Ty = Int32Ty;
13020 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13021 llvm::Type *Tys[2] = { Ty, VTy };
13022 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13023 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13024 return Builder.CreateTrunc(Ops[0], Int16Ty);
13025 }
13026 case NEON::BI__builtin_neon_vaddlv_s16: {
13027 Int = Intrinsic::aarch64_neon_saddlv;
13028 Ty = Int32Ty;
13029 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13030 llvm::Type *Tys[2] = { Ty, VTy };
13031 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13032 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13033 }
13034 case NEON::BI__builtin_neon_vaddlvq_s8: {
13035 Int = Intrinsic::aarch64_neon_saddlv;
13036 Ty = Int32Ty;
13037 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13038 llvm::Type *Tys[2] = { Ty, VTy };
13039 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13040 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13041 return Builder.CreateTrunc(Ops[0], Int16Ty);
13042 }
13043 case NEON::BI__builtin_neon_vaddlvq_s16: {
13044 Int = Intrinsic::aarch64_neon_saddlv;
13045 Ty = Int32Ty;
13046 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13047 llvm::Type *Tys[2] = { Ty, VTy };
13048 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13049 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13050 }
13051 case NEON::BI__builtin_neon_vsri_n_v:
13052 case NEON::BI__builtin_neon_vsriq_n_v: {
13053 Int = Intrinsic::aarch64_neon_vsri;
13054 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13055 return EmitNeonCall(Intrin, Ops, "vsri_n");
13056 }
13057 case NEON::BI__builtin_neon_vsli_n_v:
13058 case NEON::BI__builtin_neon_vsliq_n_v: {
13059 Int = Intrinsic::aarch64_neon_vsli;
13060 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13061 return EmitNeonCall(Intrin, Ops, "vsli_n");
13062 }
13063 case NEON::BI__builtin_neon_vsra_n_v:
13064 case NEON::BI__builtin_neon_vsraq_n_v:
13065 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13066 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13067 return Builder.CreateAdd(Ops[0], Ops[1]);
13068 case NEON::BI__builtin_neon_vrsra_n_v:
13069 case NEON::BI__builtin_neon_vrsraq_n_v: {
13070 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13072 TmpOps.push_back(Ops[1]);
13073 TmpOps.push_back(Ops[2]);
13074 Function* F = CGM.getIntrinsic(Int, Ty);
13075 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13076 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13077 return Builder.CreateAdd(Ops[0], tmp);
13078 }
13079 case NEON::BI__builtin_neon_vld1_v:
13080 case NEON::BI__builtin_neon_vld1q_v: {
13081 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13082 }
13083 case NEON::BI__builtin_neon_vst1_v:
13084 case NEON::BI__builtin_neon_vst1q_v:
13085 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13086 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13087 case NEON::BI__builtin_neon_vld1_lane_v:
13088 case NEON::BI__builtin_neon_vld1q_lane_v: {
13089 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13090 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13091 PtrOp0.getAlignment());
13092 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13093 }
13094 case NEON::BI__builtin_neon_vldap1_lane_s64:
13095 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13096 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13097 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13098 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13099 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13100 Ops[0] = LI;
13101 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13102 }
13103 case NEON::BI__builtin_neon_vld1_dup_v:
13104 case NEON::BI__builtin_neon_vld1q_dup_v: {
13105 Value *V = PoisonValue::get(Ty);
13106 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13107 PtrOp0.getAlignment());
13108 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13109 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13110 return EmitNeonSplat(Ops[0], CI);
13111 }
13112 case NEON::BI__builtin_neon_vst1_lane_v:
13113 case NEON::BI__builtin_neon_vst1q_lane_v:
13114 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13115 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13116 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13117 case NEON::BI__builtin_neon_vstl1_lane_s64:
13118 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13119 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13120 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13121 llvm::StoreInst *SI =
13122 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13123 SI->setAtomic(llvm::AtomicOrdering::Release);
13124 return SI;
13125 }
13126 case NEON::BI__builtin_neon_vld2_v:
13127 case NEON::BI__builtin_neon_vld2q_v: {
13128 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13129 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13130 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13131 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13132 }
13133 case NEON::BI__builtin_neon_vld3_v:
13134 case NEON::BI__builtin_neon_vld3q_v: {
13135 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13136 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13137 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13138 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13139 }
13140 case NEON::BI__builtin_neon_vld4_v:
13141 case NEON::BI__builtin_neon_vld4q_v: {
13142 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13143 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13144 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13145 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13146 }
13147 case NEON::BI__builtin_neon_vld2_dup_v:
13148 case NEON::BI__builtin_neon_vld2q_dup_v: {
13149 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13150 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13151 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13152 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13153 }
13154 case NEON::BI__builtin_neon_vld3_dup_v:
13155 case NEON::BI__builtin_neon_vld3q_dup_v: {
13156 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13157 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13158 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13159 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13160 }
13161 case NEON::BI__builtin_neon_vld4_dup_v:
13162 case NEON::BI__builtin_neon_vld4q_dup_v: {
13163 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13164 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13165 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13166 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13167 }
13168 case NEON::BI__builtin_neon_vld2_lane_v:
13169 case NEON::BI__builtin_neon_vld2q_lane_v: {
13170 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13171 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13172 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13173 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13174 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13175 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13176 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13177 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13178 }
13179 case NEON::BI__builtin_neon_vld3_lane_v:
13180 case NEON::BI__builtin_neon_vld3q_lane_v: {
13181 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13182 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13183 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13184 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13185 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13186 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13187 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13188 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13189 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13190 }
13191 case NEON::BI__builtin_neon_vld4_lane_v:
13192 case NEON::BI__builtin_neon_vld4q_lane_v: {
13193 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13194 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13195 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13196 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13197 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13198 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13199 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13200 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13201 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13202 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13203 }
13204 case NEON::BI__builtin_neon_vst2_v:
13205 case NEON::BI__builtin_neon_vst2q_v: {
13206 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13207 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13208 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13209 Ops, "");
13210 }
13211 case NEON::BI__builtin_neon_vst2_lane_v:
13212 case NEON::BI__builtin_neon_vst2q_lane_v: {
13213 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13214 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13215 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13216 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13217 Ops, "");
13218 }
13219 case NEON::BI__builtin_neon_vst3_v:
13220 case NEON::BI__builtin_neon_vst3q_v: {
13221 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13222 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13223 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13224 Ops, "");
13225 }
13226 case NEON::BI__builtin_neon_vst3_lane_v:
13227 case NEON::BI__builtin_neon_vst3q_lane_v: {
13228 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13229 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13230 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13231 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13232 Ops, "");
13233 }
13234 case NEON::BI__builtin_neon_vst4_v:
13235 case NEON::BI__builtin_neon_vst4q_v: {
13236 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13237 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13238 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13239 Ops, "");
13240 }
13241 case NEON::BI__builtin_neon_vst4_lane_v:
13242 case NEON::BI__builtin_neon_vst4q_lane_v: {
13243 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13244 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13245 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13246 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13247 Ops, "");
13248 }
13249 case NEON::BI__builtin_neon_vtrn_v:
13250 case NEON::BI__builtin_neon_vtrnq_v: {
13251 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13252 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13253 Value *SV = nullptr;
13254
13255 for (unsigned vi = 0; vi != 2; ++vi) {
13256 SmallVector<int, 16> Indices;
13257 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13258 Indices.push_back(i+vi);
13259 Indices.push_back(i+e+vi);
13260 }
13261 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13262 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13263 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13264 }
13265 return SV;
13266 }
13267 case NEON::BI__builtin_neon_vuzp_v:
13268 case NEON::BI__builtin_neon_vuzpq_v: {
13269 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13270 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13271 Value *SV = nullptr;
13272
13273 for (unsigned vi = 0; vi != 2; ++vi) {
13274 SmallVector<int, 16> Indices;
13275 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13276 Indices.push_back(2*i+vi);
13277
13278 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13279 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13280 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13281 }
13282 return SV;
13283 }
13284 case NEON::BI__builtin_neon_vzip_v:
13285 case NEON::BI__builtin_neon_vzipq_v: {
13286 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13287 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13288 Value *SV = nullptr;
13289
13290 for (unsigned vi = 0; vi != 2; ++vi) {
13291 SmallVector<int, 16> Indices;
13292 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13293 Indices.push_back((i + vi*e) >> 1);
13294 Indices.push_back(((i + vi*e) >> 1)+e);
13295 }
13296 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13297 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13298 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13299 }
13300 return SV;
13301 }
13302 case NEON::BI__builtin_neon_vqtbl1q_v: {
13303 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13304 Ops, "vtbl1");
13305 }
13306 case NEON::BI__builtin_neon_vqtbl2q_v: {
13307 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13308 Ops, "vtbl2");
13309 }
13310 case NEON::BI__builtin_neon_vqtbl3q_v: {
13311 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13312 Ops, "vtbl3");
13313 }
13314 case NEON::BI__builtin_neon_vqtbl4q_v: {
13315 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13316 Ops, "vtbl4");
13317 }
13318 case NEON::BI__builtin_neon_vqtbx1q_v: {
13319 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13320 Ops, "vtbx1");
13321 }
13322 case NEON::BI__builtin_neon_vqtbx2q_v: {
13323 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13324 Ops, "vtbx2");
13325 }
13326 case NEON::BI__builtin_neon_vqtbx3q_v: {
13327 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13328 Ops, "vtbx3");
13329 }
13330 case NEON::BI__builtin_neon_vqtbx4q_v: {
13331 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13332 Ops, "vtbx4");
13333 }
13334 case NEON::BI__builtin_neon_vsqadd_v:
13335 case NEON::BI__builtin_neon_vsqaddq_v: {
13336 Int = Intrinsic::aarch64_neon_usqadd;
13337 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13338 }
13339 case NEON::BI__builtin_neon_vuqadd_v:
13340 case NEON::BI__builtin_neon_vuqaddq_v: {
13341 Int = Intrinsic::aarch64_neon_suqadd;
13342 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13343 }
13344 }
13345}
13346
13347Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13348 const CallExpr *E) {
13349 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13350 BuiltinID == BPF::BI__builtin_btf_type_id ||
13351 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13352 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13353 "unexpected BPF builtin");
13354
13355 // A sequence number, injected into IR builtin functions, to
13356 // prevent CSE given the only difference of the function
13357 // may just be the debuginfo metadata.
13358 static uint32_t BuiltinSeqNum;
13359
13360 switch (BuiltinID) {
13361 default:
13362 llvm_unreachable("Unexpected BPF builtin");
13363 case BPF::BI__builtin_preserve_field_info: {
13364 const Expr *Arg = E->getArg(0);
13365 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13366
13367 if (!getDebugInfo()) {
13368 CGM.Error(E->getExprLoc(),
13369 "using __builtin_preserve_field_info() without -g");
13370 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13371 : EmitLValue(Arg).emitRawPointer(*this);
13372 }
13373
13374 // Enable underlying preserve_*_access_index() generation.
13375 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13376 IsInPreservedAIRegion = true;
13377 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13378 : EmitLValue(Arg).emitRawPointer(*this);
13379 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13380
13381 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13382 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13383
13384 // Built the IR for the preserve_field_info intrinsic.
13385 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13386 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13387 {FieldAddr->getType()});
13388 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13389 }
13390 case BPF::BI__builtin_btf_type_id:
13391 case BPF::BI__builtin_preserve_type_info: {
13392 if (!getDebugInfo()) {
13393 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13394 return nullptr;
13395 }
13396
13397 const Expr *Arg0 = E->getArg(0);
13398 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13399 Arg0->getType(), Arg0->getExprLoc());
13400
13401 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13402 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13403 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13404
13405 llvm::Function *FnDecl;
13406 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13407 FnDecl = llvm::Intrinsic::getDeclaration(
13408 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13409 else
13410 FnDecl = llvm::Intrinsic::getDeclaration(
13411 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13412 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13413 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13414 return Fn;
13415 }
13416 case BPF::BI__builtin_preserve_enum_value: {
13417 if (!getDebugInfo()) {
13418 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13419 return nullptr;
13420 }
13421
13422 const Expr *Arg0 = E->getArg(0);
13423 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13424 Arg0->getType(), Arg0->getExprLoc());
13425
13426 // Find enumerator
13427 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13428 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13429 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13430 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13431
13432 auto InitVal = Enumerator->getInitVal();
13433 std::string InitValStr;
13434 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13435 InitValStr = std::to_string(InitVal.getSExtValue());
13436 else
13437 InitValStr = std::to_string(InitVal.getZExtValue());
13438 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13439 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13440
13441 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13442 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13443 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13444
13445 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13446 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13447 CallInst *Fn =
13448 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13449 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13450 return Fn;
13451 }
13452 }
13453}
13454
13455llvm::Value *CodeGenFunction::
13457 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13458 "Not a power-of-two sized vector!");
13459 bool AllConstants = true;
13460 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13461 AllConstants &= isa<Constant>(Ops[i]);
13462
13463 // If this is a constant vector, create a ConstantVector.
13464 if (AllConstants) {
13466 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13467 CstOps.push_back(cast<Constant>(Ops[i]));
13468 return llvm::ConstantVector::get(CstOps);
13469 }
13470
13471 // Otherwise, insertelement the values to build the vector.
13472 Value *Result = llvm::PoisonValue::get(
13473 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13474
13475 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13476 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13477
13478 return Result;
13479}
13480
13481// Convert the mask from an integer type to a vector of i1.
13483 unsigned NumElts) {
13484
13485 auto *MaskTy = llvm::FixedVectorType::get(
13486 CGF.Builder.getInt1Ty(),
13487 cast<IntegerType>(Mask->getType())->getBitWidth());
13488 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13489
13490 // If we have less than 8 elements, then the starting mask was an i8 and
13491 // we need to extract down to the right number of elements.
13492 if (NumElts < 8) {
13493 int Indices[4];
13494 for (unsigned i = 0; i != NumElts; ++i)
13495 Indices[i] = i;
13496 MaskVec = CGF.Builder.CreateShuffleVector(
13497 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13498 }
13499 return MaskVec;
13500}
13501
13503 Align Alignment) {
13504 Value *Ptr = Ops[0];
13505
13506 Value *MaskVec = getMaskVecValue(
13507 CGF, Ops[2],
13508 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13509
13510 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13511}
13512
13514 Align Alignment) {
13515 llvm::Type *Ty = Ops[1]->getType();
13516 Value *Ptr = Ops[0];
13517
13518 Value *MaskVec = getMaskVecValue(
13519 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13520
13521 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13522}
13523
13525 ArrayRef<Value *> Ops) {
13526 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13527 Value *Ptr = Ops[0];
13528
13529 Value *MaskVec = getMaskVecValue(
13530 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13531
13532 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13533 ResultTy);
13534 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13535}
13536
13539 bool IsCompress) {
13540 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13541
13542 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13543
13544 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13545 : Intrinsic::x86_avx512_mask_expand;
13546 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13547 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13548}
13549
13551 ArrayRef<Value *> Ops) {
13552 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13553 Value *Ptr = Ops[0];
13554
13555 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13556
13557 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13558 ResultTy);
13559 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13560}
13561
13562static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13564 bool InvertLHS = false) {
13565 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13566 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13567 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13568
13569 if (InvertLHS)
13570 LHS = CGF.Builder.CreateNot(LHS);
13571
13572 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13573 Ops[0]->getType());
13574}
13575
13577 Value *Amt, bool IsRight) {
13578 llvm::Type *Ty = Op0->getType();
13579
13580 // Amount may be scalar immediate, in which case create a splat vector.
13581 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13582 // we only care about the lowest log2 bits anyway.
13583 if (Amt->getType() != Ty) {
13584 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13585 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13586 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13587 }
13588
13589 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13590 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13591 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13592}
13593
13595 bool IsSigned) {
13596 Value *Op0 = Ops[0];
13597 Value *Op1 = Ops[1];
13598 llvm::Type *Ty = Op0->getType();
13599 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13600
13601 CmpInst::Predicate Pred;
13602 switch (Imm) {
13603 case 0x0:
13604 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13605 break;
13606 case 0x1:
13607 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13608 break;
13609 case 0x2:
13610 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13611 break;
13612 case 0x3:
13613 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13614 break;
13615 case 0x4:
13616 Pred = ICmpInst::ICMP_EQ;
13617 break;
13618 case 0x5:
13619 Pred = ICmpInst::ICMP_NE;
13620 break;
13621 case 0x6:
13622 return llvm::Constant::getNullValue(Ty); // FALSE
13623 case 0x7:
13624 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13625 default:
13626 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13627 }
13628
13629 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13630 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13631 return Res;
13632}
13633
13635 Value *Mask, Value *Op0, Value *Op1) {
13636
13637 // If the mask is all ones just return first argument.
13638 if (const auto *C = dyn_cast<Constant>(Mask))
13639 if (C->isAllOnesValue())
13640 return Op0;
13641
13642 Mask = getMaskVecValue(
13643 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13644
13645 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13646}
13647
13649 Value *Mask, Value *Op0, Value *Op1) {
13650 // If the mask is all ones just return first argument.
13651 if (const auto *C = dyn_cast<Constant>(Mask))
13652 if (C->isAllOnesValue())
13653 return Op0;
13654
13655 auto *MaskTy = llvm::FixedVectorType::get(
13656 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13657 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13658 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13659 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13660}
13661
13663 unsigned NumElts, Value *MaskIn) {
13664 if (MaskIn) {
13665 const auto *C = dyn_cast<Constant>(MaskIn);
13666 if (!C || !C->isAllOnesValue())
13667 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13668 }
13669
13670 if (NumElts < 8) {
13671 int Indices[8];
13672 for (unsigned i = 0; i != NumElts; ++i)
13673 Indices[i] = i;
13674 for (unsigned i = NumElts; i != 8; ++i)
13675 Indices[i] = i % NumElts + NumElts;
13676 Cmp = CGF.Builder.CreateShuffleVector(
13677 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13678 }
13679
13680 return CGF.Builder.CreateBitCast(Cmp,
13681 IntegerType::get(CGF.getLLVMContext(),
13682 std::max(NumElts, 8U)));
13683}
13684
13686 bool Signed, ArrayRef<Value *> Ops) {
13687 assert((Ops.size() == 2 || Ops.size() == 4) &&
13688 "Unexpected number of arguments");
13689 unsigned NumElts =
13690 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13691 Value *Cmp;
13692
13693 if (CC == 3) {
13694 Cmp = Constant::getNullValue(
13695 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13696 } else if (CC == 7) {
13697 Cmp = Constant::getAllOnesValue(
13698 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13699 } else {
13700 ICmpInst::Predicate Pred;
13701 switch (CC) {
13702 default: llvm_unreachable("Unknown condition code");
13703 case 0: Pred = ICmpInst::ICMP_EQ; break;
13704 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13705 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13706 case 4: Pred = ICmpInst::ICMP_NE; break;
13707 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13708 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13709 }
13710 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13711 }
13712
13713 Value *MaskIn = nullptr;
13714 if (Ops.size() == 4)
13715 MaskIn = Ops[3];
13716
13717 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13718}
13719
13721 Value *Zero = Constant::getNullValue(In->getType());
13722 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13723}
13724
13726 ArrayRef<Value *> Ops, bool IsSigned) {
13727 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13728 llvm::Type *Ty = Ops[1]->getType();
13729
13730 Value *Res;
13731 if (Rnd != 4) {
13732 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13733 : Intrinsic::x86_avx512_uitofp_round;
13734 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13735 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13736 } else {
13737 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13738 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13739 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13740 }
13741
13742 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13743}
13744
13745// Lowers X86 FMA intrinsics to IR.
13747 ArrayRef<Value *> Ops, unsigned BuiltinID,
13748 bool IsAddSub) {
13749
13750 bool Subtract = false;
13751 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13752 switch (BuiltinID) {
13753 default: break;
13754 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13755 Subtract = true;
13756 [[fallthrough]];
13757 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13758 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13759 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13760 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13761 break;
13762 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13763 Subtract = true;
13764 [[fallthrough]];
13765 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13766 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13767 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13768 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13769 break;
13770 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13771 Subtract = true;
13772 [[fallthrough]];
13773 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13774 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13775 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13776 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13777 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13778 Subtract = true;
13779 [[fallthrough]];
13780 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13781 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13782 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13783 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13784 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13785 Subtract = true;
13786 [[fallthrough]];
13787 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13788 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13789 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13790 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13791 break;
13792 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13793 Subtract = true;
13794 [[fallthrough]];
13795 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13796 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13797 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13798 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13799 break;
13800 }
13801
13802 Value *A = Ops[0];
13803 Value *B = Ops[1];
13804 Value *C = Ops[2];
13805
13806 if (Subtract)
13807 C = CGF.Builder.CreateFNeg(C);
13808
13809 Value *Res;
13810
13811 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
13812 if (IID != Intrinsic::not_intrinsic &&
13813 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
13814 IsAddSub)) {
13815 Function *Intr = CGF.CGM.getIntrinsic(IID);
13816 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
13817 } else {
13818 llvm::Type *Ty = A->getType();
13819 Function *FMA;
13820 if (CGF.Builder.getIsFPConstrained()) {
13821 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13822 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
13823 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
13824 } else {
13825 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
13826 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
13827 }
13828 }
13829
13830 // Handle any required masking.
13831 Value *MaskFalseVal = nullptr;
13832 switch (BuiltinID) {
13833 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13834 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13835 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13836 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13837 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13838 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13839 MaskFalseVal = Ops[0];
13840 break;
13841 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13842 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13843 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13844 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13845 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13846 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13847 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
13848 break;
13849 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13850 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13851 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13852 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13853 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13854 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13855 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13856 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13857 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13858 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13859 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13860 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13861 MaskFalseVal = Ops[2];
13862 break;
13863 }
13864
13865 if (MaskFalseVal)
13866 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
13867
13868 return Res;
13869}
13870
13872 MutableArrayRef<Value *> Ops, Value *Upper,
13873 bool ZeroMask = false, unsigned PTIdx = 0,
13874 bool NegAcc = false) {
13875 unsigned Rnd = 4;
13876 if (Ops.size() > 4)
13877 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
13878
13879 if (NegAcc)
13880 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
13881
13882 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
13883 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
13884 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
13885 Value *Res;
13886 if (Rnd != 4) {
13887 Intrinsic::ID IID;
13888
13889 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
13890 case 16:
13891 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
13892 break;
13893 case 32:
13894 IID = Intrinsic::x86_avx512_vfmadd_f32;
13895 break;
13896 case 64:
13897 IID = Intrinsic::x86_avx512_vfmadd_f64;
13898 break;
13899 default:
13900 llvm_unreachable("Unexpected size");
13901 }
13902 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13903 {Ops[0], Ops[1], Ops[2], Ops[4]});
13904 } else if (CGF.Builder.getIsFPConstrained()) {
13905 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13906 Function *FMA = CGF.CGM.getIntrinsic(
13907 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
13908 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
13909 } else {
13910 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
13911 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
13912 }
13913 // If we have more than 3 arguments, we need to do masking.
13914 if (Ops.size() > 3) {
13915 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
13916 : Ops[PTIdx];
13917
13918 // If we negated the accumulator and the its the PassThru value we need to
13919 // bypass the negate. Conveniently Upper should be the same thing in this
13920 // case.
13921 if (NegAcc && PTIdx == 2)
13922 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
13923
13924 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
13925 }
13926 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
13927}
13928
13929static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
13930 ArrayRef<Value *> Ops) {
13931 llvm::Type *Ty = Ops[0]->getType();
13932 // Arguments have a vXi32 type so cast to vXi64.
13933 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
13934 Ty->getPrimitiveSizeInBits() / 64);
13935 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
13936 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
13937
13938 if (IsSigned) {
13939 // Shift left then arithmetic shift right.
13940 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
13941 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
13942 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
13943 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
13944 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
13945 } else {
13946 // Clear the upper bits.
13947 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
13948 LHS = CGF.Builder.CreateAnd(LHS, Mask);
13949 RHS = CGF.Builder.CreateAnd(RHS, Mask);
13950 }
13951
13952 return CGF.Builder.CreateMul(LHS, RHS);
13953}
13954
13955// Emit a masked pternlog intrinsic. This only exists because the header has to
13956// use a macro and we aren't able to pass the input argument to a pternlog
13957// builtin and a select builtin without evaluating it twice.
13958static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
13959 ArrayRef<Value *> Ops) {
13960 llvm::Type *Ty = Ops[0]->getType();
13961
13962 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
13963 unsigned EltWidth = Ty->getScalarSizeInBits();
13964 Intrinsic::ID IID;
13965 if (VecWidth == 128 && EltWidth == 32)
13966 IID = Intrinsic::x86_avx512_pternlog_d_128;
13967 else if (VecWidth == 256 && EltWidth == 32)
13968 IID = Intrinsic::x86_avx512_pternlog_d_256;
13969 else if (VecWidth == 512 && EltWidth == 32)
13970 IID = Intrinsic::x86_avx512_pternlog_d_512;
13971 else if (VecWidth == 128 && EltWidth == 64)
13972 IID = Intrinsic::x86_avx512_pternlog_q_128;
13973 else if (VecWidth == 256 && EltWidth == 64)
13974 IID = Intrinsic::x86_avx512_pternlog_q_256;
13975 else if (VecWidth == 512 && EltWidth == 64)
13976 IID = Intrinsic::x86_avx512_pternlog_q_512;
13977 else
13978 llvm_unreachable("Unexpected intrinsic");
13979
13980 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
13981 Ops.drop_back());
13982 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
13983 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
13984}
13985
13987 llvm::Type *DstTy) {
13988 unsigned NumberOfElements =
13989 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
13990 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
13991 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
13992}
13993
13994Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
13995 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
13996 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
13997 return EmitX86CpuIs(CPUStr);
13998}
13999
14000// Convert F16 halfs to floats.
14003 llvm::Type *DstTy) {
14004 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14005 "Unknown cvtph2ps intrinsic");
14006
14007 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14008 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14009 Function *F =
14010 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14011 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14012 }
14013
14014 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14015 Value *Src = Ops[0];
14016
14017 // Extract the subvector.
14018 if (NumDstElts !=
14019 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14020 assert(NumDstElts == 4 && "Unexpected vector size");
14021 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14022 }
14023
14024 // Bitcast from vXi16 to vXf16.
14025 auto *HalfTy = llvm::FixedVectorType::get(
14026 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14027 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14028
14029 // Perform the fp-extension.
14030 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14031
14032 if (Ops.size() >= 3)
14033 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14034 return Res;
14035}
14036
14037Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14038
14039 llvm::Type *Int32Ty = Builder.getInt32Ty();
14040
14041 // Matching the struct layout from the compiler-rt/libgcc structure that is
14042 // filled in:
14043 // unsigned int __cpu_vendor;
14044 // unsigned int __cpu_type;
14045 // unsigned int __cpu_subtype;
14046 // unsigned int __cpu_features[1];
14047 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14048 llvm::ArrayType::get(Int32Ty, 1));
14049
14050 // Grab the global __cpu_model.
14051 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14052 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14053
14054 // Calculate the index needed to access the correct field based on the
14055 // range. Also adjust the expected value.
14056 unsigned Index;
14057 unsigned Value;
14058 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14059#define X86_VENDOR(ENUM, STRING) \
14060 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14061#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14062 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14063#define X86_CPU_TYPE(ENUM, STR) \
14064 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14065#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14066 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14067#define X86_CPU_SUBTYPE(ENUM, STR) \
14068 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14069#include "llvm/TargetParser/X86TargetParser.def"
14070 .Default({0, 0});
14071 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14072
14073 // Grab the appropriate field from __cpu_model.
14074 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14075 ConstantInt::get(Int32Ty, Index)};
14076 llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
14077 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14079
14080 // Check the value of the field against the requested value.
14081 return Builder.CreateICmpEQ(CpuValue,
14082 llvm::ConstantInt::get(Int32Ty, Value));
14083}
14084
14085Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14086 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14087 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14088 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14089 return Builder.getFalse();
14090 return EmitX86CpuSupports(FeatureStr);
14091}
14092
14093Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14094 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14095}
14096
14097llvm::Value *
14098CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14099 Value *Result = Builder.getTrue();
14100 if (FeatureMask[0] != 0) {
14101 // Matching the struct layout from the compiler-rt/libgcc structure that is
14102 // filled in:
14103 // unsigned int __cpu_vendor;
14104 // unsigned int __cpu_type;
14105 // unsigned int __cpu_subtype;
14106 // unsigned int __cpu_features[1];
14107 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14108 llvm::ArrayType::get(Int32Ty, 1));
14109
14110 // Grab the global __cpu_model.
14111 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14112 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14113
14114 // Grab the first (0th) element from the field __cpu_features off of the
14115 // global in the struct STy.
14116 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14117 Builder.getInt32(0)};
14118 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
14119 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14121
14122 // Check the value of the bit corresponding to the feature requested.
14123 Value *Mask = Builder.getInt32(FeatureMask[0]);
14124 Value *Bitset = Builder.CreateAnd(Features, Mask);
14125 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14126 Result = Builder.CreateAnd(Result, Cmp);
14127 }
14128
14129 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14130 llvm::Constant *CpuFeatures2 =
14131 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14132 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14133 for (int i = 1; i != 4; ++i) {
14134 const uint32_t M = FeatureMask[i];
14135 if (!M)
14136 continue;
14137 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14138 Value *Features = Builder.CreateAlignedLoad(
14139 Int32Ty, Builder.CreateGEP(ATy, CpuFeatures2, Idxs),
14141 // Check the value of the bit corresponding to the feature requested.
14142 Value *Mask = Builder.getInt32(M);
14143 Value *Bitset = Builder.CreateAnd(Features, Mask);
14144 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14145 Result = Builder.CreateAnd(Result, Cmp);
14146 }
14147
14148 return Result;
14149}
14150
14151Value *CodeGenFunction::EmitAArch64CpuInit() {
14152 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14153 llvm::FunctionCallee Func =
14154 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14155 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14156 cast<llvm::GlobalValue>(Func.getCallee())
14157 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14158 return Builder.CreateCall(Func);
14159}
14160
14161Value *CodeGenFunction::EmitX86CpuInit() {
14162 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14163 /*Variadic*/ false);
14164 llvm::FunctionCallee Func =
14165 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14166 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14167 cast<llvm::GlobalValue>(Func.getCallee())
14168 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14169 return Builder.CreateCall(Func);
14170}
14171
14172Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14173 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14174 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14176 ArgStr.split(Features, "+");
14177 for (auto &Feature : Features) {
14178 Feature = Feature.trim();
14179 if (!llvm::AArch64::parseArchExtension(Feature))
14180 return Builder.getFalse();
14181 if (Feature != "default")
14182 Features.push_back(Feature);
14183 }
14184 return EmitAArch64CpuSupports(Features);
14185}
14186
14187llvm::Value *
14188CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14189 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14190 Value *Result = Builder.getTrue();
14191 if (FeaturesMask != 0) {
14192 // Get features from structure in runtime library
14193 // struct {
14194 // unsigned long long features;
14195 // } __aarch64_cpu_features;
14196 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14197 llvm::Constant *AArch64CPUFeatures =
14198 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14199 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14200 llvm::Value *CpuFeatures = Builder.CreateGEP(
14201 STy, AArch64CPUFeatures,
14202 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14203 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14205 Value *Mask = Builder.getInt64(FeaturesMask);
14206 Value *Bitset = Builder.CreateAnd(Features, Mask);
14207 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14208 Result = Builder.CreateAnd(Result, Cmp);
14209 }
14210 return Result;
14211}
14212
14213Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14214 const CallExpr *E) {
14215 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14216 return EmitX86CpuIs(E);
14217 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14218 return EmitX86CpuSupports(E);
14219 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14220 return EmitX86CpuInit();
14221
14222 // Handle MSVC intrinsics before argument evaluation to prevent double
14223 // evaluation.
14224 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14225 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14226
14228 bool IsMaskFCmp = false;
14229 bool IsConjFMA = false;
14230
14231 // Find out if any arguments are required to be integer constant expressions.
14232 unsigned ICEArguments = 0;
14234 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14235 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14236
14237 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14238 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14239 }
14240
14241 // These exist so that the builtin that takes an immediate can be bounds
14242 // checked by clang to avoid passing bad immediates to the backend. Since
14243 // AVX has a larger immediate than SSE we would need separate builtins to
14244 // do the different bounds checking. Rather than create a clang specific
14245 // SSE only builtin, this implements eight separate builtins to match gcc
14246 // implementation.
14247 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14248 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14249 llvm::Function *F = CGM.getIntrinsic(ID);
14250 return Builder.CreateCall(F, Ops);
14251 };
14252
14253 // For the vector forms of FP comparisons, translate the builtins directly to
14254 // IR.
14255 // TODO: The builtins could be removed if the SSE header files used vector
14256 // extension comparisons directly (vector ordered/unordered may need
14257 // additional support via __builtin_isnan()).
14258 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14259 bool IsSignaling) {
14260 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14261 Value *Cmp;
14262 if (IsSignaling)
14263 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14264 else
14265 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14266 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14267 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14268 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14269 return Builder.CreateBitCast(Sext, FPVecTy);
14270 };
14271
14272 switch (BuiltinID) {
14273 default: return nullptr;
14274 case X86::BI_mm_prefetch: {
14275 Value *Address = Ops[0];
14276 ConstantInt *C = cast<ConstantInt>(Ops[1]);
14277 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14278 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14279 Value *Data = ConstantInt::get(Int32Ty, 1);
14280 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14281 return Builder.CreateCall(F, {Address, RW, Locality, Data});
14282 }
14283 case X86::BI_mm_clflush: {
14284 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14285 Ops[0]);
14286 }
14287 case X86::BI_mm_lfence: {
14288 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14289 }
14290 case X86::BI_mm_mfence: {
14291 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14292 }
14293 case X86::BI_mm_sfence: {
14294 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14295 }
14296 case X86::BI_mm_pause: {
14297 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14298 }
14299 case X86::BI__rdtsc: {
14300 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14301 }
14302 case X86::BI__builtin_ia32_rdtscp: {
14303 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14304 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14305 Ops[0]);
14306 return Builder.CreateExtractValue(Call, 0);
14307 }
14308 case X86::BI__builtin_ia32_lzcnt_u16:
14309 case X86::BI__builtin_ia32_lzcnt_u32:
14310 case X86::BI__builtin_ia32_lzcnt_u64: {
14311 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14312 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14313 }
14314 case X86::BI__builtin_ia32_tzcnt_u16:
14315 case X86::BI__builtin_ia32_tzcnt_u32:
14316 case X86::BI__builtin_ia32_tzcnt_u64: {
14317 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14318 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14319 }
14320 case X86::BI__builtin_ia32_undef128:
14321 case X86::BI__builtin_ia32_undef256:
14322 case X86::BI__builtin_ia32_undef512:
14323 // The x86 definition of "undef" is not the same as the LLVM definition
14324 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14325 // IR optimizer and backend.
14326 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14327 // value, we should use that here instead of a zero.
14328 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14329 case X86::BI__builtin_ia32_vec_init_v8qi:
14330 case X86::BI__builtin_ia32_vec_init_v4hi:
14331 case X86::BI__builtin_ia32_vec_init_v2si:
14332 return Builder.CreateBitCast(BuildVector(Ops),
14333 llvm::Type::getX86_MMXTy(getLLVMContext()));
14334 case X86::BI__builtin_ia32_vec_ext_v2si:
14335 case X86::BI__builtin_ia32_vec_ext_v16qi:
14336 case X86::BI__builtin_ia32_vec_ext_v8hi:
14337 case X86::BI__builtin_ia32_vec_ext_v4si:
14338 case X86::BI__builtin_ia32_vec_ext_v4sf:
14339 case X86::BI__builtin_ia32_vec_ext_v2di:
14340 case X86::BI__builtin_ia32_vec_ext_v32qi:
14341 case X86::BI__builtin_ia32_vec_ext_v16hi:
14342 case X86::BI__builtin_ia32_vec_ext_v8si:
14343 case X86::BI__builtin_ia32_vec_ext_v4di: {
14344 unsigned NumElts =
14345 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14346 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14347 Index &= NumElts - 1;
14348 // These builtins exist so we can ensure the index is an ICE and in range.
14349 // Otherwise we could just do this in the header file.
14350 return Builder.CreateExtractElement(Ops[0], Index);
14351 }
14352 case X86::BI__builtin_ia32_vec_set_v16qi:
14353 case X86::BI__builtin_ia32_vec_set_v8hi:
14354 case X86::BI__builtin_ia32_vec_set_v4si:
14355 case X86::BI__builtin_ia32_vec_set_v2di:
14356 case X86::BI__builtin_ia32_vec_set_v32qi:
14357 case X86::BI__builtin_ia32_vec_set_v16hi:
14358 case X86::BI__builtin_ia32_vec_set_v8si:
14359 case X86::BI__builtin_ia32_vec_set_v4di: {
14360 unsigned NumElts =
14361 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14362 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14363 Index &= NumElts - 1;
14364 // These builtins exist so we can ensure the index is an ICE and in range.
14365 // Otherwise we could just do this in the header file.
14366 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14367 }
14368 case X86::BI_mm_setcsr:
14369 case X86::BI__builtin_ia32_ldmxcsr: {
14370 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
14371 Builder.CreateStore(Ops[0], Tmp);
14372 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14373 Tmp.getPointer());
14374 }
14375 case X86::BI_mm_getcsr:
14376 case X86::BI__builtin_ia32_stmxcsr: {
14377 RawAddress Tmp = CreateMemTemp(E->getType());
14378 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14379 Tmp.getPointer());
14380 return Builder.CreateLoad(Tmp, "stmxcsr");
14381 }
14382 case X86::BI__builtin_ia32_xsave:
14383 case X86::BI__builtin_ia32_xsave64:
14384 case X86::BI__builtin_ia32_xrstor:
14385 case X86::BI__builtin_ia32_xrstor64:
14386 case X86::BI__builtin_ia32_xsaveopt:
14387 case X86::BI__builtin_ia32_xsaveopt64:
14388 case X86::BI__builtin_ia32_xrstors:
14389 case X86::BI__builtin_ia32_xrstors64:
14390 case X86::BI__builtin_ia32_xsavec:
14391 case X86::BI__builtin_ia32_xsavec64:
14392 case X86::BI__builtin_ia32_xsaves:
14393 case X86::BI__builtin_ia32_xsaves64:
14394 case X86::BI__builtin_ia32_xsetbv:
14395 case X86::BI_xsetbv: {
14396 Intrinsic::ID ID;
14397#define INTRINSIC_X86_XSAVE_ID(NAME) \
14398 case X86::BI__builtin_ia32_##NAME: \
14399 ID = Intrinsic::x86_##NAME; \
14400 break
14401 switch (BuiltinID) {
14402 default: llvm_unreachable("Unsupported intrinsic!");
14404 INTRINSIC_X86_XSAVE_ID(xsave64);
14405 INTRINSIC_X86_XSAVE_ID(xrstor);
14406 INTRINSIC_X86_XSAVE_ID(xrstor64);
14407 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14408 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14409 INTRINSIC_X86_XSAVE_ID(xrstors);
14410 INTRINSIC_X86_XSAVE_ID(xrstors64);
14411 INTRINSIC_X86_XSAVE_ID(xsavec);
14412 INTRINSIC_X86_XSAVE_ID(xsavec64);
14413 INTRINSIC_X86_XSAVE_ID(xsaves);
14414 INTRINSIC_X86_XSAVE_ID(xsaves64);
14415 INTRINSIC_X86_XSAVE_ID(xsetbv);
14416 case X86::BI_xsetbv:
14417 ID = Intrinsic::x86_xsetbv;
14418 break;
14419 }
14420#undef INTRINSIC_X86_XSAVE_ID
14421 Value *Mhi = Builder.CreateTrunc(
14422 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14423 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14424 Ops[1] = Mhi;
14425 Ops.push_back(Mlo);
14426 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14427 }
14428 case X86::BI__builtin_ia32_xgetbv:
14429 case X86::BI_xgetbv:
14430 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14431 case X86::BI__builtin_ia32_storedqudi128_mask:
14432 case X86::BI__builtin_ia32_storedqusi128_mask:
14433 case X86::BI__builtin_ia32_storedquhi128_mask:
14434 case X86::BI__builtin_ia32_storedquqi128_mask:
14435 case X86::BI__builtin_ia32_storeupd128_mask:
14436 case X86::BI__builtin_ia32_storeups128_mask:
14437 case X86::BI__builtin_ia32_storedqudi256_mask:
14438 case X86::BI__builtin_ia32_storedqusi256_mask:
14439 case X86::BI__builtin_ia32_storedquhi256_mask:
14440 case X86::BI__builtin_ia32_storedquqi256_mask:
14441 case X86::BI__builtin_ia32_storeupd256_mask:
14442 case X86::BI__builtin_ia32_storeups256_mask:
14443 case X86::BI__builtin_ia32_storedqudi512_mask:
14444 case X86::BI__builtin_ia32_storedqusi512_mask:
14445 case X86::BI__builtin_ia32_storedquhi512_mask:
14446 case X86::BI__builtin_ia32_storedquqi512_mask:
14447 case X86::BI__builtin_ia32_storeupd512_mask:
14448 case X86::BI__builtin_ia32_storeups512_mask:
14449 return EmitX86MaskedStore(*this, Ops, Align(1));
14450
14451 case X86::BI__builtin_ia32_storesh128_mask:
14452 case X86::BI__builtin_ia32_storess128_mask:
14453 case X86::BI__builtin_ia32_storesd128_mask:
14454 return EmitX86MaskedStore(*this, Ops, Align(1));
14455
14456 case X86::BI__builtin_ia32_vpopcntb_128:
14457 case X86::BI__builtin_ia32_vpopcntd_128:
14458 case X86::BI__builtin_ia32_vpopcntq_128:
14459 case X86::BI__builtin_ia32_vpopcntw_128:
14460 case X86::BI__builtin_ia32_vpopcntb_256:
14461 case X86::BI__builtin_ia32_vpopcntd_256:
14462 case X86::BI__builtin_ia32_vpopcntq_256:
14463 case X86::BI__builtin_ia32_vpopcntw_256:
14464 case X86::BI__builtin_ia32_vpopcntb_512:
14465 case X86::BI__builtin_ia32_vpopcntd_512:
14466 case X86::BI__builtin_ia32_vpopcntq_512:
14467 case X86::BI__builtin_ia32_vpopcntw_512: {
14468 llvm::Type *ResultType = ConvertType(E->getType());
14469 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14470 return Builder.CreateCall(F, Ops);
14471 }
14472 case X86::BI__builtin_ia32_cvtmask2b128:
14473 case X86::BI__builtin_ia32_cvtmask2b256:
14474 case X86::BI__builtin_ia32_cvtmask2b512:
14475 case X86::BI__builtin_ia32_cvtmask2w128:
14476 case X86::BI__builtin_ia32_cvtmask2w256:
14477 case X86::BI__builtin_ia32_cvtmask2w512:
14478 case X86::BI__builtin_ia32_cvtmask2d128:
14479 case X86::BI__builtin_ia32_cvtmask2d256:
14480 case X86::BI__builtin_ia32_cvtmask2d512:
14481 case X86::BI__builtin_ia32_cvtmask2q128:
14482 case X86::BI__builtin_ia32_cvtmask2q256:
14483 case X86::BI__builtin_ia32_cvtmask2q512:
14484 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14485
14486 case X86::BI__builtin_ia32_cvtb2mask128:
14487 case X86::BI__builtin_ia32_cvtb2mask256:
14488 case X86::BI__builtin_ia32_cvtb2mask512:
14489 case X86::BI__builtin_ia32_cvtw2mask128:
14490 case X86::BI__builtin_ia32_cvtw2mask256:
14491 case X86::BI__builtin_ia32_cvtw2mask512:
14492 case X86::BI__builtin_ia32_cvtd2mask128:
14493 case X86::BI__builtin_ia32_cvtd2mask256:
14494 case X86::BI__builtin_ia32_cvtd2mask512:
14495 case X86::BI__builtin_ia32_cvtq2mask128:
14496 case X86::BI__builtin_ia32_cvtq2mask256:
14497 case X86::BI__builtin_ia32_cvtq2mask512:
14498 return EmitX86ConvertToMask(*this, Ops[0]);
14499
14500 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14501 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14502 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14503 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14504 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14505 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14506 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14507 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14508 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14509 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14510 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14511 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14512 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14513 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14514
14515 case X86::BI__builtin_ia32_vfmaddss3:
14516 case X86::BI__builtin_ia32_vfmaddsd3:
14517 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14518 case X86::BI__builtin_ia32_vfmaddss3_mask:
14519 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14520 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14521 case X86::BI__builtin_ia32_vfmaddss:
14522 case X86::BI__builtin_ia32_vfmaddsd:
14523 return EmitScalarFMAExpr(*this, E, Ops,
14524 Constant::getNullValue(Ops[0]->getType()));
14525 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14526 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14527 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14528 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14529 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14530 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14531 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14532 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14533 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14534 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14535 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14536 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14537 /*NegAcc*/ true);
14538 case X86::BI__builtin_ia32_vfmaddph:
14539 case X86::BI__builtin_ia32_vfmaddps:
14540 case X86::BI__builtin_ia32_vfmaddpd:
14541 case X86::BI__builtin_ia32_vfmaddph256:
14542 case X86::BI__builtin_ia32_vfmaddps256:
14543 case X86::BI__builtin_ia32_vfmaddpd256:
14544 case X86::BI__builtin_ia32_vfmaddph512_mask:
14545 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14546 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14547 case X86::BI__builtin_ia32_vfmaddps512_mask:
14548 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14549 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14550 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14551 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14552 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14553 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14554 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14555 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14556 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14557 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14558 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14559 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14560 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14561 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14562 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14563 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14564 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14565 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14566 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14567 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14568 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14569 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14570
14571 case X86::BI__builtin_ia32_movdqa32store128_mask:
14572 case X86::BI__builtin_ia32_movdqa64store128_mask:
14573 case X86::BI__builtin_ia32_storeaps128_mask:
14574 case X86::BI__builtin_ia32_storeapd128_mask:
14575 case X86::BI__builtin_ia32_movdqa32store256_mask:
14576 case X86::BI__builtin_ia32_movdqa64store256_mask:
14577 case X86::BI__builtin_ia32_storeaps256_mask:
14578 case X86::BI__builtin_ia32_storeapd256_mask:
14579 case X86::BI__builtin_ia32_movdqa32store512_mask:
14580 case X86::BI__builtin_ia32_movdqa64store512_mask:
14581 case X86::BI__builtin_ia32_storeaps512_mask:
14582 case X86::BI__builtin_ia32_storeapd512_mask:
14583 return EmitX86MaskedStore(
14584 *this, Ops,
14585 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14586
14587 case X86::BI__builtin_ia32_loadups128_mask:
14588 case X86::BI__builtin_ia32_loadups256_mask:
14589 case X86::BI__builtin_ia32_loadups512_mask:
14590 case X86::BI__builtin_ia32_loadupd128_mask:
14591 case X86::BI__builtin_ia32_loadupd256_mask:
14592 case X86::BI__builtin_ia32_loadupd512_mask:
14593 case X86::BI__builtin_ia32_loaddquqi128_mask:
14594 case X86::BI__builtin_ia32_loaddquqi256_mask:
14595 case X86::BI__builtin_ia32_loaddquqi512_mask:
14596 case X86::BI__builtin_ia32_loaddquhi128_mask:
14597 case X86::BI__builtin_ia32_loaddquhi256_mask:
14598 case X86::BI__builtin_ia32_loaddquhi512_mask:
14599 case X86::BI__builtin_ia32_loaddqusi128_mask:
14600 case X86::BI__builtin_ia32_loaddqusi256_mask:
14601 case X86::BI__builtin_ia32_loaddqusi512_mask:
14602 case X86::BI__builtin_ia32_loaddqudi128_mask:
14603 case X86::BI__builtin_ia32_loaddqudi256_mask:
14604 case X86::BI__builtin_ia32_loaddqudi512_mask:
14605 return EmitX86MaskedLoad(*this, Ops, Align(1));
14606
14607 case X86::BI__builtin_ia32_loadsh128_mask:
14608 case X86::BI__builtin_ia32_loadss128_mask:
14609 case X86::BI__builtin_ia32_loadsd128_mask:
14610 return EmitX86MaskedLoad(*this, Ops, Align(1));
14611
14612 case X86::BI__builtin_ia32_loadaps128_mask:
14613 case X86::BI__builtin_ia32_loadaps256_mask:
14614 case X86::BI__builtin_ia32_loadaps512_mask:
14615 case X86::BI__builtin_ia32_loadapd128_mask:
14616 case X86::BI__builtin_ia32_loadapd256_mask:
14617 case X86::BI__builtin_ia32_loadapd512_mask:
14618 case X86::BI__builtin_ia32_movdqa32load128_mask:
14619 case X86::BI__builtin_ia32_movdqa32load256_mask:
14620 case X86::BI__builtin_ia32_movdqa32load512_mask:
14621 case X86::BI__builtin_ia32_movdqa64load128_mask:
14622 case X86::BI__builtin_ia32_movdqa64load256_mask:
14623 case X86::BI__builtin_ia32_movdqa64load512_mask:
14624 return EmitX86MaskedLoad(
14625 *this, Ops,
14626 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14627
14628 case X86::BI__builtin_ia32_expandloaddf128_mask:
14629 case X86::BI__builtin_ia32_expandloaddf256_mask:
14630 case X86::BI__builtin_ia32_expandloaddf512_mask:
14631 case X86::BI__builtin_ia32_expandloadsf128_mask:
14632 case X86::BI__builtin_ia32_expandloadsf256_mask:
14633 case X86::BI__builtin_ia32_expandloadsf512_mask:
14634 case X86::BI__builtin_ia32_expandloaddi128_mask:
14635 case X86::BI__builtin_ia32_expandloaddi256_mask:
14636 case X86::BI__builtin_ia32_expandloaddi512_mask:
14637 case X86::BI__builtin_ia32_expandloadsi128_mask:
14638 case X86::BI__builtin_ia32_expandloadsi256_mask:
14639 case X86::BI__builtin_ia32_expandloadsi512_mask:
14640 case X86::BI__builtin_ia32_expandloadhi128_mask:
14641 case X86::BI__builtin_ia32_expandloadhi256_mask:
14642 case X86::BI__builtin_ia32_expandloadhi512_mask:
14643 case X86::BI__builtin_ia32_expandloadqi128_mask:
14644 case X86::BI__builtin_ia32_expandloadqi256_mask:
14645 case X86::BI__builtin_ia32_expandloadqi512_mask:
14646 return EmitX86ExpandLoad(*this, Ops);
14647
14648 case X86::BI__builtin_ia32_compressstoredf128_mask:
14649 case X86::BI__builtin_ia32_compressstoredf256_mask:
14650 case X86::BI__builtin_ia32_compressstoredf512_mask:
14651 case X86::BI__builtin_ia32_compressstoresf128_mask:
14652 case X86::BI__builtin_ia32_compressstoresf256_mask:
14653 case X86::BI__builtin_ia32_compressstoresf512_mask:
14654 case X86::BI__builtin_ia32_compressstoredi128_mask:
14655 case X86::BI__builtin_ia32_compressstoredi256_mask:
14656 case X86::BI__builtin_ia32_compressstoredi512_mask:
14657 case X86::BI__builtin_ia32_compressstoresi128_mask:
14658 case X86::BI__builtin_ia32_compressstoresi256_mask:
14659 case X86::BI__builtin_ia32_compressstoresi512_mask:
14660 case X86::BI__builtin_ia32_compressstorehi128_mask:
14661 case X86::BI__builtin_ia32_compressstorehi256_mask:
14662 case X86::BI__builtin_ia32_compressstorehi512_mask:
14663 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14664 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14665 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14666 return EmitX86CompressStore(*this, Ops);
14667
14668 case X86::BI__builtin_ia32_expanddf128_mask:
14669 case X86::BI__builtin_ia32_expanddf256_mask:
14670 case X86::BI__builtin_ia32_expanddf512_mask:
14671 case X86::BI__builtin_ia32_expandsf128_mask:
14672 case X86::BI__builtin_ia32_expandsf256_mask:
14673 case X86::BI__builtin_ia32_expandsf512_mask:
14674 case X86::BI__builtin_ia32_expanddi128_mask:
14675 case X86::BI__builtin_ia32_expanddi256_mask:
14676 case X86::BI__builtin_ia32_expanddi512_mask:
14677 case X86::BI__builtin_ia32_expandsi128_mask:
14678 case X86::BI__builtin_ia32_expandsi256_mask:
14679 case X86::BI__builtin_ia32_expandsi512_mask:
14680 case X86::BI__builtin_ia32_expandhi128_mask:
14681 case X86::BI__builtin_ia32_expandhi256_mask:
14682 case X86::BI__builtin_ia32_expandhi512_mask:
14683 case X86::BI__builtin_ia32_expandqi128_mask:
14684 case X86::BI__builtin_ia32_expandqi256_mask:
14685 case X86::BI__builtin_ia32_expandqi512_mask:
14686 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
14687
14688 case X86::BI__builtin_ia32_compressdf128_mask:
14689 case X86::BI__builtin_ia32_compressdf256_mask:
14690 case X86::BI__builtin_ia32_compressdf512_mask:
14691 case X86::BI__builtin_ia32_compresssf128_mask:
14692 case X86::BI__builtin_ia32_compresssf256_mask:
14693 case X86::BI__builtin_ia32_compresssf512_mask:
14694 case X86::BI__builtin_ia32_compressdi128_mask:
14695 case X86::BI__builtin_ia32_compressdi256_mask:
14696 case X86::BI__builtin_ia32_compressdi512_mask:
14697 case X86::BI__builtin_ia32_compresssi128_mask:
14698 case X86::BI__builtin_ia32_compresssi256_mask:
14699 case X86::BI__builtin_ia32_compresssi512_mask:
14700 case X86::BI__builtin_ia32_compresshi128_mask:
14701 case X86::BI__builtin_ia32_compresshi256_mask:
14702 case X86::BI__builtin_ia32_compresshi512_mask:
14703 case X86::BI__builtin_ia32_compressqi128_mask:
14704 case X86::BI__builtin_ia32_compressqi256_mask:
14705 case X86::BI__builtin_ia32_compressqi512_mask:
14706 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
14707
14708 case X86::BI__builtin_ia32_gather3div2df:
14709 case X86::BI__builtin_ia32_gather3div2di:
14710 case X86::BI__builtin_ia32_gather3div4df:
14711 case X86::BI__builtin_ia32_gather3div4di:
14712 case X86::BI__builtin_ia32_gather3div4sf:
14713 case X86::BI__builtin_ia32_gather3div4si:
14714 case X86::BI__builtin_ia32_gather3div8sf:
14715 case X86::BI__builtin_ia32_gather3div8si:
14716 case X86::BI__builtin_ia32_gather3siv2df:
14717 case X86::BI__builtin_ia32_gather3siv2di:
14718 case X86::BI__builtin_ia32_gather3siv4df:
14719 case X86::BI__builtin_ia32_gather3siv4di:
14720 case X86::BI__builtin_ia32_gather3siv4sf:
14721 case X86::BI__builtin_ia32_gather3siv4si:
14722 case X86::BI__builtin_ia32_gather3siv8sf:
14723 case X86::BI__builtin_ia32_gather3siv8si:
14724 case X86::BI__builtin_ia32_gathersiv8df:
14725 case X86::BI__builtin_ia32_gathersiv16sf:
14726 case X86::BI__builtin_ia32_gatherdiv8df:
14727 case X86::BI__builtin_ia32_gatherdiv16sf:
14728 case X86::BI__builtin_ia32_gathersiv8di:
14729 case X86::BI__builtin_ia32_gathersiv16si:
14730 case X86::BI__builtin_ia32_gatherdiv8di:
14731 case X86::BI__builtin_ia32_gatherdiv16si: {
14732 Intrinsic::ID IID;
14733 switch (BuiltinID) {
14734 default: llvm_unreachable("Unexpected builtin");
14735 case X86::BI__builtin_ia32_gather3div2df:
14736 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
14737 break;
14738 case X86::BI__builtin_ia32_gather3div2di:
14739 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
14740 break;
14741 case X86::BI__builtin_ia32_gather3div4df:
14742 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
14743 break;
14744 case X86::BI__builtin_ia32_gather3div4di:
14745 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
14746 break;
14747 case X86::BI__builtin_ia32_gather3div4sf:
14748 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
14749 break;
14750 case X86::BI__builtin_ia32_gather3div4si:
14751 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
14752 break;
14753 case X86::BI__builtin_ia32_gather3div8sf:
14754 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
14755 break;
14756 case X86::BI__builtin_ia32_gather3div8si:
14757 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
14758 break;
14759 case X86::BI__builtin_ia32_gather3siv2df:
14760 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
14761 break;
14762 case X86::BI__builtin_ia32_gather3siv2di:
14763 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
14764 break;
14765 case X86::BI__builtin_ia32_gather3siv4df:
14766 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
14767 break;
14768 case X86::BI__builtin_ia32_gather3siv4di:
14769 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
14770 break;
14771 case X86::BI__builtin_ia32_gather3siv4sf:
14772 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
14773 break;
14774 case X86::BI__builtin_ia32_gather3siv4si:
14775 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
14776 break;
14777 case X86::BI__builtin_ia32_gather3siv8sf:
14778 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
14779 break;
14780 case X86::BI__builtin_ia32_gather3siv8si:
14781 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
14782 break;
14783 case X86::BI__builtin_ia32_gathersiv8df:
14784 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
14785 break;
14786 case X86::BI__builtin_ia32_gathersiv16sf:
14787 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
14788 break;
14789 case X86::BI__builtin_ia32_gatherdiv8df:
14790 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
14791 break;
14792 case X86::BI__builtin_ia32_gatherdiv16sf:
14793 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
14794 break;
14795 case X86::BI__builtin_ia32_gathersiv8di:
14796 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
14797 break;
14798 case X86::BI__builtin_ia32_gathersiv16si:
14799 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
14800 break;
14801 case X86::BI__builtin_ia32_gatherdiv8di:
14802 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
14803 break;
14804 case X86::BI__builtin_ia32_gatherdiv16si:
14805 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
14806 break;
14807 }
14808
14809 unsigned MinElts = std::min(
14810 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
14811 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
14812 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
14813 Function *Intr = CGM.getIntrinsic(IID);
14814 return Builder.CreateCall(Intr, Ops);
14815 }
14816
14817 case X86::BI__builtin_ia32_scattersiv8df:
14818 case X86::BI__builtin_ia32_scattersiv16sf:
14819 case X86::BI__builtin_ia32_scatterdiv8df:
14820 case X86::BI__builtin_ia32_scatterdiv16sf:
14821 case X86::BI__builtin_ia32_scattersiv8di:
14822 case X86::BI__builtin_ia32_scattersiv16si:
14823 case X86::BI__builtin_ia32_scatterdiv8di:
14824 case X86::BI__builtin_ia32_scatterdiv16si:
14825 case X86::BI__builtin_ia32_scatterdiv2df:
14826 case X86::BI__builtin_ia32_scatterdiv2di:
14827 case X86::BI__builtin_ia32_scatterdiv4df:
14828 case X86::BI__builtin_ia32_scatterdiv4di:
14829 case X86::BI__builtin_ia32_scatterdiv4sf:
14830 case X86::BI__builtin_ia32_scatterdiv4si:
14831 case X86::BI__builtin_ia32_scatterdiv8sf:
14832 case X86::BI__builtin_ia32_scatterdiv8si:
14833 case X86::BI__builtin_ia32_scattersiv2df:
14834 case X86::BI__builtin_ia32_scattersiv2di:
14835 case X86::BI__builtin_ia32_scattersiv4df:
14836 case X86::BI__builtin_ia32_scattersiv4di:
14837 case X86::BI__builtin_ia32_scattersiv4sf:
14838 case X86::BI__builtin_ia32_scattersiv4si:
14839 case X86::BI__builtin_ia32_scattersiv8sf:
14840 case X86::BI__builtin_ia32_scattersiv8si: {
14841 Intrinsic::ID IID;
14842 switch (BuiltinID) {
14843 default: llvm_unreachable("Unexpected builtin");
14844 case X86::BI__builtin_ia32_scattersiv8df:
14845 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
14846 break;
14847 case X86::BI__builtin_ia32_scattersiv16sf:
14848 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
14849 break;
14850 case X86::BI__builtin_ia32_scatterdiv8df:
14851 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
14852 break;
14853 case X86::BI__builtin_ia32_scatterdiv16sf:
14854 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
14855 break;
14856 case X86::BI__builtin_ia32_scattersiv8di:
14857 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
14858 break;
14859 case X86::BI__builtin_ia32_scattersiv16si:
14860 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
14861 break;
14862 case X86::BI__builtin_ia32_scatterdiv8di:
14863 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
14864 break;
14865 case X86::BI__builtin_ia32_scatterdiv16si:
14866 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
14867 break;
14868 case X86::BI__builtin_ia32_scatterdiv2df:
14869 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
14870 break;
14871 case X86::BI__builtin_ia32_scatterdiv2di:
14872 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
14873 break;
14874 case X86::BI__builtin_ia32_scatterdiv4df:
14875 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
14876 break;
14877 case X86::BI__builtin_ia32_scatterdiv4di:
14878 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
14879 break;
14880 case X86::BI__builtin_ia32_scatterdiv4sf:
14881 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
14882 break;
14883 case X86::BI__builtin_ia32_scatterdiv4si:
14884 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
14885 break;
14886 case X86::BI__builtin_ia32_scatterdiv8sf:
14887 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
14888 break;
14889 case X86::BI__builtin_ia32_scatterdiv8si:
14890 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
14891 break;
14892 case X86::BI__builtin_ia32_scattersiv2df:
14893 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
14894 break;
14895 case X86::BI__builtin_ia32_scattersiv2di:
14896 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
14897 break;
14898 case X86::BI__builtin_ia32_scattersiv4df:
14899 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
14900 break;
14901 case X86::BI__builtin_ia32_scattersiv4di:
14902 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
14903 break;
14904 case X86::BI__builtin_ia32_scattersiv4sf:
14905 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
14906 break;
14907 case X86::BI__builtin_ia32_scattersiv4si:
14908 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
14909 break;
14910 case X86::BI__builtin_ia32_scattersiv8sf:
14911 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
14912 break;
14913 case X86::BI__builtin_ia32_scattersiv8si:
14914 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
14915 break;
14916 }
14917
14918 unsigned MinElts = std::min(
14919 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
14920 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
14921 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
14922 Function *Intr = CGM.getIntrinsic(IID);
14923 return Builder.CreateCall(Intr, Ops);
14924 }
14925
14926 case X86::BI__builtin_ia32_vextractf128_pd256:
14927 case X86::BI__builtin_ia32_vextractf128_ps256:
14928 case X86::BI__builtin_ia32_vextractf128_si256:
14929 case X86::BI__builtin_ia32_extract128i256:
14930 case X86::BI__builtin_ia32_extractf64x4_mask:
14931 case X86::BI__builtin_ia32_extractf32x4_mask:
14932 case X86::BI__builtin_ia32_extracti64x4_mask:
14933 case X86::BI__builtin_ia32_extracti32x4_mask:
14934 case X86::BI__builtin_ia32_extractf32x8_mask:
14935 case X86::BI__builtin_ia32_extracti32x8_mask:
14936 case X86::BI__builtin_ia32_extractf32x4_256_mask:
14937 case X86::BI__builtin_ia32_extracti32x4_256_mask:
14938 case X86::BI__builtin_ia32_extractf64x2_256_mask:
14939 case X86::BI__builtin_ia32_extracti64x2_256_mask:
14940 case X86::BI__builtin_ia32_extractf64x2_512_mask:
14941 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
14942 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
14943 unsigned NumElts = DstTy->getNumElements();
14944 unsigned SrcNumElts =
14945 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14946 unsigned SubVectors = SrcNumElts / NumElts;
14947 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14948 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14949 Index &= SubVectors - 1; // Remove any extra bits.
14950 Index *= NumElts;
14951
14952 int Indices[16];
14953 for (unsigned i = 0; i != NumElts; ++i)
14954 Indices[i] = i + Index;
14955
14956 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
14957 "extract");
14958
14959 if (Ops.size() == 4)
14960 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
14961
14962 return Res;
14963 }
14964 case X86::BI__builtin_ia32_vinsertf128_pd256:
14965 case X86::BI__builtin_ia32_vinsertf128_ps256:
14966 case X86::BI__builtin_ia32_vinsertf128_si256:
14967 case X86::BI__builtin_ia32_insert128i256:
14968 case X86::BI__builtin_ia32_insertf64x4:
14969 case X86::BI__builtin_ia32_insertf32x4:
14970 case X86::BI__builtin_ia32_inserti64x4:
14971 case X86::BI__builtin_ia32_inserti32x4:
14972 case X86::BI__builtin_ia32_insertf32x8:
14973 case X86::BI__builtin_ia32_inserti32x8:
14974 case X86::BI__builtin_ia32_insertf32x4_256:
14975 case X86::BI__builtin_ia32_inserti32x4_256:
14976 case X86::BI__builtin_ia32_insertf64x2_256:
14977 case X86::BI__builtin_ia32_inserti64x2_256:
14978 case X86::BI__builtin_ia32_insertf64x2_512:
14979 case X86::BI__builtin_ia32_inserti64x2_512: {
14980 unsigned DstNumElts =
14981 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14982 unsigned SrcNumElts =
14983 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
14984 unsigned SubVectors = DstNumElts / SrcNumElts;
14985 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14986 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
14987 Index &= SubVectors - 1; // Remove any extra bits.
14988 Index *= SrcNumElts;
14989
14990 int Indices[16];
14991 for (unsigned i = 0; i != DstNumElts; ++i)
14992 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
14993
14994 Value *Op1 = Builder.CreateShuffleVector(
14995 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
14996
14997 for (unsigned i = 0; i != DstNumElts; ++i) {
14998 if (i >= Index && i < (Index + SrcNumElts))
14999 Indices[i] = (i - Index) + DstNumElts;
15000 else
15001 Indices[i] = i;
15002 }
15003
15004 return Builder.CreateShuffleVector(Ops[0], Op1,
15005 ArrayRef(Indices, DstNumElts), "insert");
15006 }
15007 case X86::BI__builtin_ia32_pmovqd512_mask:
15008 case X86::BI__builtin_ia32_pmovwb512_mask: {
15009 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15010 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15011 }
15012 case X86::BI__builtin_ia32_pmovdb512_mask:
15013 case X86::BI__builtin_ia32_pmovdw512_mask:
15014 case X86::BI__builtin_ia32_pmovqw512_mask: {
15015 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15016 if (C->isAllOnesValue())
15017 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15018
15019 Intrinsic::ID IID;
15020 switch (BuiltinID) {
15021 default: llvm_unreachable("Unsupported intrinsic!");
15022 case X86::BI__builtin_ia32_pmovdb512_mask:
15023 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15024 break;
15025 case X86::BI__builtin_ia32_pmovdw512_mask:
15026 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15027 break;
15028 case X86::BI__builtin_ia32_pmovqw512_mask:
15029 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15030 break;
15031 }
15032
15033 Function *Intr = CGM.getIntrinsic(IID);
15034 return Builder.CreateCall(Intr, Ops);
15035 }
15036 case X86::BI__builtin_ia32_pblendw128:
15037 case X86::BI__builtin_ia32_blendpd:
15038 case X86::BI__builtin_ia32_blendps:
15039 case X86::BI__builtin_ia32_blendpd256:
15040 case X86::BI__builtin_ia32_blendps256:
15041 case X86::BI__builtin_ia32_pblendw256:
15042 case X86::BI__builtin_ia32_pblendd128:
15043 case X86::BI__builtin_ia32_pblendd256: {
15044 unsigned NumElts =
15045 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15046 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15047
15048 int Indices[16];
15049 // If there are more than 8 elements, the immediate is used twice so make
15050 // sure we handle that.
15051 for (unsigned i = 0; i != NumElts; ++i)
15052 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15053
15054 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15055 ArrayRef(Indices, NumElts), "blend");
15056 }
15057 case X86::BI__builtin_ia32_pshuflw:
15058 case X86::BI__builtin_ia32_pshuflw256:
15059 case X86::BI__builtin_ia32_pshuflw512: {
15060 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15061 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15062 unsigned NumElts = Ty->getNumElements();
15063
15064 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15065 Imm = (Imm & 0xff) * 0x01010101;
15066
15067 int Indices[32];
15068 for (unsigned l = 0; l != NumElts; l += 8) {
15069 for (unsigned i = 0; i != 4; ++i) {
15070 Indices[l + i] = l + (Imm & 3);
15071 Imm >>= 2;
15072 }
15073 for (unsigned i = 4; i != 8; ++i)
15074 Indices[l + i] = l + i;
15075 }
15076
15077 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15078 "pshuflw");
15079 }
15080 case X86::BI__builtin_ia32_pshufhw:
15081 case X86::BI__builtin_ia32_pshufhw256:
15082 case X86::BI__builtin_ia32_pshufhw512: {
15083 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15084 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15085 unsigned NumElts = Ty->getNumElements();
15086
15087 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15088 Imm = (Imm & 0xff) * 0x01010101;
15089
15090 int Indices[32];
15091 for (unsigned l = 0; l != NumElts; l += 8) {
15092 for (unsigned i = 0; i != 4; ++i)
15093 Indices[l + i] = l + i;
15094 for (unsigned i = 4; i != 8; ++i) {
15095 Indices[l + i] = l + 4 + (Imm & 3);
15096 Imm >>= 2;
15097 }
15098 }
15099
15100 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15101 "pshufhw");
15102 }
15103 case X86::BI__builtin_ia32_pshufd:
15104 case X86::BI__builtin_ia32_pshufd256:
15105 case X86::BI__builtin_ia32_pshufd512:
15106 case X86::BI__builtin_ia32_vpermilpd:
15107 case X86::BI__builtin_ia32_vpermilps:
15108 case X86::BI__builtin_ia32_vpermilpd256:
15109 case X86::BI__builtin_ia32_vpermilps256:
15110 case X86::BI__builtin_ia32_vpermilpd512:
15111 case X86::BI__builtin_ia32_vpermilps512: {
15112 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15113 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15114 unsigned NumElts = Ty->getNumElements();
15115 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15116 unsigned NumLaneElts = NumElts / NumLanes;
15117
15118 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15119 Imm = (Imm & 0xff) * 0x01010101;
15120
15121 int Indices[16];
15122 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15123 for (unsigned i = 0; i != NumLaneElts; ++i) {
15124 Indices[i + l] = (Imm % NumLaneElts) + l;
15125 Imm /= NumLaneElts;
15126 }
15127 }
15128
15129 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15130 "permil");
15131 }
15132 case X86::BI__builtin_ia32_shufpd:
15133 case X86::BI__builtin_ia32_shufpd256:
15134 case X86::BI__builtin_ia32_shufpd512:
15135 case X86::BI__builtin_ia32_shufps:
15136 case X86::BI__builtin_ia32_shufps256:
15137 case X86::BI__builtin_ia32_shufps512: {
15138 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15139 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15140 unsigned NumElts = Ty->getNumElements();
15141 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15142 unsigned NumLaneElts = NumElts / NumLanes;
15143
15144 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15145 Imm = (Imm & 0xff) * 0x01010101;
15146
15147 int Indices[16];
15148 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15149 for (unsigned i = 0; i != NumLaneElts; ++i) {
15150 unsigned Index = Imm % NumLaneElts;
15151 Imm /= NumLaneElts;
15152 if (i >= (NumLaneElts / 2))
15153 Index += NumElts;
15154 Indices[l + i] = l + Index;
15155 }
15156 }
15157
15158 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15159 ArrayRef(Indices, NumElts), "shufp");
15160 }
15161 case X86::BI__builtin_ia32_permdi256:
15162 case X86::BI__builtin_ia32_permdf256:
15163 case X86::BI__builtin_ia32_permdi512:
15164 case X86::BI__builtin_ia32_permdf512: {
15165 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15166 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15167 unsigned NumElts = Ty->getNumElements();
15168
15169 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15170 int Indices[8];
15171 for (unsigned l = 0; l != NumElts; l += 4)
15172 for (unsigned i = 0; i != 4; ++i)
15173 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15174
15175 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15176 "perm");
15177 }
15178 case X86::BI__builtin_ia32_palignr128:
15179 case X86::BI__builtin_ia32_palignr256:
15180 case X86::BI__builtin_ia32_palignr512: {
15181 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15182
15183 unsigned NumElts =
15184 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15185 assert(NumElts % 16 == 0);
15186
15187 // If palignr is shifting the pair of vectors more than the size of two
15188 // lanes, emit zero.
15189 if (ShiftVal >= 32)
15190 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15191
15192 // If palignr is shifting the pair of input vectors more than one lane,
15193 // but less than two lanes, convert to shifting in zeroes.
15194 if (ShiftVal > 16) {
15195 ShiftVal -= 16;
15196 Ops[1] = Ops[0];
15197 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15198 }
15199
15200 int Indices[64];
15201 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15202 for (unsigned l = 0; l != NumElts; l += 16) {
15203 for (unsigned i = 0; i != 16; ++i) {
15204 unsigned Idx = ShiftVal + i;
15205 if (Idx >= 16)
15206 Idx += NumElts - 16; // End of lane, switch operand.
15207 Indices[l + i] = Idx + l;
15208 }
15209 }
15210
15211 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15212 ArrayRef(Indices, NumElts), "palignr");
15213 }
15214 case X86::BI__builtin_ia32_alignd128:
15215 case X86::BI__builtin_ia32_alignd256:
15216 case X86::BI__builtin_ia32_alignd512:
15217 case X86::BI__builtin_ia32_alignq128:
15218 case X86::BI__builtin_ia32_alignq256:
15219 case X86::BI__builtin_ia32_alignq512: {
15220 unsigned NumElts =
15221 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15222 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15223
15224 // Mask the shift amount to width of a vector.
15225 ShiftVal &= NumElts - 1;
15226
15227 int Indices[16];
15228 for (unsigned i = 0; i != NumElts; ++i)
15229 Indices[i] = i + ShiftVal;
15230
15231 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15232 ArrayRef(Indices, NumElts), "valign");
15233 }
15234 case X86::BI__builtin_ia32_shuf_f32x4_256:
15235 case X86::BI__builtin_ia32_shuf_f64x2_256:
15236 case X86::BI__builtin_ia32_shuf_i32x4_256:
15237 case X86::BI__builtin_ia32_shuf_i64x2_256:
15238 case X86::BI__builtin_ia32_shuf_f32x4:
15239 case X86::BI__builtin_ia32_shuf_f64x2:
15240 case X86::BI__builtin_ia32_shuf_i32x4:
15241 case X86::BI__builtin_ia32_shuf_i64x2: {
15242 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15243 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15244 unsigned NumElts = Ty->getNumElements();
15245 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15246 unsigned NumLaneElts = NumElts / NumLanes;
15247
15248 int Indices[16];
15249 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15250 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15251 Imm /= NumLanes; // Discard the bits we just used.
15252 if (l >= (NumElts / 2))
15253 Index += NumElts; // Switch to other source.
15254 for (unsigned i = 0; i != NumLaneElts; ++i) {
15255 Indices[l + i] = Index + i;
15256 }
15257 }
15258
15259 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15260 ArrayRef(Indices, NumElts), "shuf");
15261 }
15262
15263 case X86::BI__builtin_ia32_vperm2f128_pd256:
15264 case X86::BI__builtin_ia32_vperm2f128_ps256:
15265 case X86::BI__builtin_ia32_vperm2f128_si256:
15266 case X86::BI__builtin_ia32_permti256: {
15267 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15268 unsigned NumElts =
15269 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15270
15271 // This takes a very simple approach since there are two lanes and a
15272 // shuffle can have 2 inputs. So we reserve the first input for the first
15273 // lane and the second input for the second lane. This may result in
15274 // duplicate sources, but this can be dealt with in the backend.
15275
15276 Value *OutOps[2];
15277 int Indices[8];
15278 for (unsigned l = 0; l != 2; ++l) {
15279 // Determine the source for this lane.
15280 if (Imm & (1 << ((l * 4) + 3)))
15281 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15282 else if (Imm & (1 << ((l * 4) + 1)))
15283 OutOps[l] = Ops[1];
15284 else
15285 OutOps[l] = Ops[0];
15286
15287 for (unsigned i = 0; i != NumElts/2; ++i) {
15288 // Start with ith element of the source for this lane.
15289 unsigned Idx = (l * NumElts) + i;
15290 // If bit 0 of the immediate half is set, switch to the high half of
15291 // the source.
15292 if (Imm & (1 << (l * 4)))
15293 Idx += NumElts/2;
15294 Indices[(l * (NumElts/2)) + i] = Idx;
15295 }
15296 }
15297
15298 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15299 ArrayRef(Indices, NumElts), "vperm");
15300 }
15301
15302 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15303 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15304 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15305 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15306 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15307 // Builtin type is vXi64 so multiply by 8 to get bytes.
15308 unsigned NumElts = ResultType->getNumElements() * 8;
15309
15310 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15311 if (ShiftVal >= 16)
15312 return llvm::Constant::getNullValue(ResultType);
15313
15314 int Indices[64];
15315 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15316 for (unsigned l = 0; l != NumElts; l += 16) {
15317 for (unsigned i = 0; i != 16; ++i) {
15318 unsigned Idx = NumElts + i - ShiftVal;
15319 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15320 Indices[l + i] = Idx + l;
15321 }
15322 }
15323
15324 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15325 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15326 Value *Zero = llvm::Constant::getNullValue(VecTy);
15327 Value *SV = Builder.CreateShuffleVector(
15328 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15329 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15330 }
15331 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15332 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15333 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15334 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15335 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15336 // Builtin type is vXi64 so multiply by 8 to get bytes.
15337 unsigned NumElts = ResultType->getNumElements() * 8;
15338
15339 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15340 if (ShiftVal >= 16)
15341 return llvm::Constant::getNullValue(ResultType);
15342
15343 int Indices[64];
15344 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15345 for (unsigned l = 0; l != NumElts; l += 16) {
15346 for (unsigned i = 0; i != 16; ++i) {
15347 unsigned Idx = i + ShiftVal;
15348 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15349 Indices[l + i] = Idx + l;
15350 }
15351 }
15352
15353 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15354 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15355 Value *Zero = llvm::Constant::getNullValue(VecTy);
15356 Value *SV = Builder.CreateShuffleVector(
15357 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15358 return Builder.CreateBitCast(SV, ResultType, "cast");
15359 }
15360 case X86::BI__builtin_ia32_kshiftliqi:
15361 case X86::BI__builtin_ia32_kshiftlihi:
15362 case X86::BI__builtin_ia32_kshiftlisi:
15363 case X86::BI__builtin_ia32_kshiftlidi: {
15364 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15365 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15366
15367 if (ShiftVal >= NumElts)
15368 return llvm::Constant::getNullValue(Ops[0]->getType());
15369
15370 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15371
15372 int Indices[64];
15373 for (unsigned i = 0; i != NumElts; ++i)
15374 Indices[i] = NumElts + i - ShiftVal;
15375
15376 Value *Zero = llvm::Constant::getNullValue(In->getType());
15377 Value *SV = Builder.CreateShuffleVector(
15378 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15379 return Builder.CreateBitCast(SV, Ops[0]->getType());
15380 }
15381 case X86::BI__builtin_ia32_kshiftriqi:
15382 case X86::BI__builtin_ia32_kshiftrihi:
15383 case X86::BI__builtin_ia32_kshiftrisi:
15384 case X86::BI__builtin_ia32_kshiftridi: {
15385 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15386 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15387
15388 if (ShiftVal >= NumElts)
15389 return llvm::Constant::getNullValue(Ops[0]->getType());
15390
15391 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15392
15393 int Indices[64];
15394 for (unsigned i = 0; i != NumElts; ++i)
15395 Indices[i] = i + ShiftVal;
15396
15397 Value *Zero = llvm::Constant::getNullValue(In->getType());
15398 Value *SV = Builder.CreateShuffleVector(
15399 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15400 return Builder.CreateBitCast(SV, Ops[0]->getType());
15401 }
15402 case X86::BI__builtin_ia32_movnti:
15403 case X86::BI__builtin_ia32_movnti64:
15404 case X86::BI__builtin_ia32_movntsd:
15405 case X86::BI__builtin_ia32_movntss: {
15406 llvm::MDNode *Node = llvm::MDNode::get(
15407 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15408
15409 Value *Ptr = Ops[0];
15410 Value *Src = Ops[1];
15411
15412 // Extract the 0'th element of the source vector.
15413 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15414 BuiltinID == X86::BI__builtin_ia32_movntss)
15415 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15416
15417 // Unaligned nontemporal store of the scalar value.
15418 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15419 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15420 SI->setAlignment(llvm::Align(1));
15421 return SI;
15422 }
15423 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15424 case X86::BI__builtin_ia32_vprotb:
15425 case X86::BI__builtin_ia32_vprotw:
15426 case X86::BI__builtin_ia32_vprotd:
15427 case X86::BI__builtin_ia32_vprotq:
15428 case X86::BI__builtin_ia32_vprotbi:
15429 case X86::BI__builtin_ia32_vprotwi:
15430 case X86::BI__builtin_ia32_vprotdi:
15431 case X86::BI__builtin_ia32_vprotqi:
15432 case X86::BI__builtin_ia32_prold128:
15433 case X86::BI__builtin_ia32_prold256:
15434 case X86::BI__builtin_ia32_prold512:
15435 case X86::BI__builtin_ia32_prolq128:
15436 case X86::BI__builtin_ia32_prolq256:
15437 case X86::BI__builtin_ia32_prolq512:
15438 case X86::BI__builtin_ia32_prolvd128:
15439 case X86::BI__builtin_ia32_prolvd256:
15440 case X86::BI__builtin_ia32_prolvd512:
15441 case X86::BI__builtin_ia32_prolvq128:
15442 case X86::BI__builtin_ia32_prolvq256:
15443 case X86::BI__builtin_ia32_prolvq512:
15444 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15445 case X86::BI__builtin_ia32_prord128:
15446 case X86::BI__builtin_ia32_prord256:
15447 case X86::BI__builtin_ia32_prord512:
15448 case X86::BI__builtin_ia32_prorq128:
15449 case X86::BI__builtin_ia32_prorq256:
15450 case X86::BI__builtin_ia32_prorq512:
15451 case X86::BI__builtin_ia32_prorvd128:
15452 case X86::BI__builtin_ia32_prorvd256:
15453 case X86::BI__builtin_ia32_prorvd512:
15454 case X86::BI__builtin_ia32_prorvq128:
15455 case X86::BI__builtin_ia32_prorvq256:
15456 case X86::BI__builtin_ia32_prorvq512:
15457 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15458 case X86::BI__builtin_ia32_selectb_128:
15459 case X86::BI__builtin_ia32_selectb_256:
15460 case X86::BI__builtin_ia32_selectb_512:
15461 case X86::BI__builtin_ia32_selectw_128:
15462 case X86::BI__builtin_ia32_selectw_256:
15463 case X86::BI__builtin_ia32_selectw_512:
15464 case X86::BI__builtin_ia32_selectd_128:
15465 case X86::BI__builtin_ia32_selectd_256:
15466 case X86::BI__builtin_ia32_selectd_512:
15467 case X86::BI__builtin_ia32_selectq_128:
15468 case X86::BI__builtin_ia32_selectq_256:
15469 case X86::BI__builtin_ia32_selectq_512:
15470 case X86::BI__builtin_ia32_selectph_128:
15471 case X86::BI__builtin_ia32_selectph_256:
15472 case X86::BI__builtin_ia32_selectph_512:
15473 case X86::BI__builtin_ia32_selectpbf_128:
15474 case X86::BI__builtin_ia32_selectpbf_256:
15475 case X86::BI__builtin_ia32_selectpbf_512:
15476 case X86::BI__builtin_ia32_selectps_128:
15477 case X86::BI__builtin_ia32_selectps_256:
15478 case X86::BI__builtin_ia32_selectps_512:
15479 case X86::BI__builtin_ia32_selectpd_128:
15480 case X86::BI__builtin_ia32_selectpd_256:
15481 case X86::BI__builtin_ia32_selectpd_512:
15482 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15483 case X86::BI__builtin_ia32_selectsh_128:
15484 case X86::BI__builtin_ia32_selectsbf_128:
15485 case X86::BI__builtin_ia32_selectss_128:
15486 case X86::BI__builtin_ia32_selectsd_128: {
15487 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15488 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15489 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15490 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15491 }
15492 case X86::BI__builtin_ia32_cmpb128_mask:
15493 case X86::BI__builtin_ia32_cmpb256_mask:
15494 case X86::BI__builtin_ia32_cmpb512_mask:
15495 case X86::BI__builtin_ia32_cmpw128_mask:
15496 case X86::BI__builtin_ia32_cmpw256_mask:
15497 case X86::BI__builtin_ia32_cmpw512_mask:
15498 case X86::BI__builtin_ia32_cmpd128_mask:
15499 case X86::BI__builtin_ia32_cmpd256_mask:
15500 case X86::BI__builtin_ia32_cmpd512_mask:
15501 case X86::BI__builtin_ia32_cmpq128_mask:
15502 case X86::BI__builtin_ia32_cmpq256_mask:
15503 case X86::BI__builtin_ia32_cmpq512_mask: {
15504 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15505 return EmitX86MaskedCompare(*this, CC, true, Ops);
15506 }
15507 case X86::BI__builtin_ia32_ucmpb128_mask:
15508 case X86::BI__builtin_ia32_ucmpb256_mask:
15509 case X86::BI__builtin_ia32_ucmpb512_mask:
15510 case X86::BI__builtin_ia32_ucmpw128_mask:
15511 case X86::BI__builtin_ia32_ucmpw256_mask:
15512 case X86::BI__builtin_ia32_ucmpw512_mask:
15513 case X86::BI__builtin_ia32_ucmpd128_mask:
15514 case X86::BI__builtin_ia32_ucmpd256_mask:
15515 case X86::BI__builtin_ia32_ucmpd512_mask:
15516 case X86::BI__builtin_ia32_ucmpq128_mask:
15517 case X86::BI__builtin_ia32_ucmpq256_mask:
15518 case X86::BI__builtin_ia32_ucmpq512_mask: {
15519 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15520 return EmitX86MaskedCompare(*this, CC, false, Ops);
15521 }
15522 case X86::BI__builtin_ia32_vpcomb:
15523 case X86::BI__builtin_ia32_vpcomw:
15524 case X86::BI__builtin_ia32_vpcomd:
15525 case X86::BI__builtin_ia32_vpcomq:
15526 return EmitX86vpcom(*this, Ops, true);
15527 case X86::BI__builtin_ia32_vpcomub:
15528 case X86::BI__builtin_ia32_vpcomuw:
15529 case X86::BI__builtin_ia32_vpcomud:
15530 case X86::BI__builtin_ia32_vpcomuq:
15531 return EmitX86vpcom(*this, Ops, false);
15532
15533 case X86::BI__builtin_ia32_kortestcqi:
15534 case X86::BI__builtin_ia32_kortestchi:
15535 case X86::BI__builtin_ia32_kortestcsi:
15536 case X86::BI__builtin_ia32_kortestcdi: {
15537 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15538 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15539 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15540 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15541 }
15542 case X86::BI__builtin_ia32_kortestzqi:
15543 case X86::BI__builtin_ia32_kortestzhi:
15544 case X86::BI__builtin_ia32_kortestzsi:
15545 case X86::BI__builtin_ia32_kortestzdi: {
15546 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15547 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15548 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15549 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15550 }
15551
15552 case X86::BI__builtin_ia32_ktestcqi:
15553 case X86::BI__builtin_ia32_ktestzqi:
15554 case X86::BI__builtin_ia32_ktestchi:
15555 case X86::BI__builtin_ia32_ktestzhi:
15556 case X86::BI__builtin_ia32_ktestcsi:
15557 case X86::BI__builtin_ia32_ktestzsi:
15558 case X86::BI__builtin_ia32_ktestcdi:
15559 case X86::BI__builtin_ia32_ktestzdi: {
15560 Intrinsic::ID IID;
15561 switch (BuiltinID) {
15562 default: llvm_unreachable("Unsupported intrinsic!");
15563 case X86::BI__builtin_ia32_ktestcqi:
15564 IID = Intrinsic::x86_avx512_ktestc_b;
15565 break;
15566 case X86::BI__builtin_ia32_ktestzqi:
15567 IID = Intrinsic::x86_avx512_ktestz_b;
15568 break;
15569 case X86::BI__builtin_ia32_ktestchi:
15570 IID = Intrinsic::x86_avx512_ktestc_w;
15571 break;
15572 case X86::BI__builtin_ia32_ktestzhi:
15573 IID = Intrinsic::x86_avx512_ktestz_w;
15574 break;
15575 case X86::BI__builtin_ia32_ktestcsi:
15576 IID = Intrinsic::x86_avx512_ktestc_d;
15577 break;
15578 case X86::BI__builtin_ia32_ktestzsi:
15579 IID = Intrinsic::x86_avx512_ktestz_d;
15580 break;
15581 case X86::BI__builtin_ia32_ktestcdi:
15582 IID = Intrinsic::x86_avx512_ktestc_q;
15583 break;
15584 case X86::BI__builtin_ia32_ktestzdi:
15585 IID = Intrinsic::x86_avx512_ktestz_q;
15586 break;
15587 }
15588
15589 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15590 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15591 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15592 Function *Intr = CGM.getIntrinsic(IID);
15593 return Builder.CreateCall(Intr, {LHS, RHS});
15594 }
15595
15596 case X86::BI__builtin_ia32_kaddqi:
15597 case X86::BI__builtin_ia32_kaddhi:
15598 case X86::BI__builtin_ia32_kaddsi:
15599 case X86::BI__builtin_ia32_kadddi: {
15600 Intrinsic::ID IID;
15601 switch (BuiltinID) {
15602 default: llvm_unreachable("Unsupported intrinsic!");
15603 case X86::BI__builtin_ia32_kaddqi:
15604 IID = Intrinsic::x86_avx512_kadd_b;
15605 break;
15606 case X86::BI__builtin_ia32_kaddhi:
15607 IID = Intrinsic::x86_avx512_kadd_w;
15608 break;
15609 case X86::BI__builtin_ia32_kaddsi:
15610 IID = Intrinsic::x86_avx512_kadd_d;
15611 break;
15612 case X86::BI__builtin_ia32_kadddi:
15613 IID = Intrinsic::x86_avx512_kadd_q;
15614 break;
15615 }
15616
15617 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15618 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15619 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15620 Function *Intr = CGM.getIntrinsic(IID);
15621 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15622 return Builder.CreateBitCast(Res, Ops[0]->getType());
15623 }
15624 case X86::BI__builtin_ia32_kandqi:
15625 case X86::BI__builtin_ia32_kandhi:
15626 case X86::BI__builtin_ia32_kandsi:
15627 case X86::BI__builtin_ia32_kanddi:
15628 return EmitX86MaskLogic(*this, Instruction::And, Ops);
15629 case X86::BI__builtin_ia32_kandnqi:
15630 case X86::BI__builtin_ia32_kandnhi:
15631 case X86::BI__builtin_ia32_kandnsi:
15632 case X86::BI__builtin_ia32_kandndi:
15633 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15634 case X86::BI__builtin_ia32_korqi:
15635 case X86::BI__builtin_ia32_korhi:
15636 case X86::BI__builtin_ia32_korsi:
15637 case X86::BI__builtin_ia32_kordi:
15638 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15639 case X86::BI__builtin_ia32_kxnorqi:
15640 case X86::BI__builtin_ia32_kxnorhi:
15641 case X86::BI__builtin_ia32_kxnorsi:
15642 case X86::BI__builtin_ia32_kxnordi:
15643 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15644 case X86::BI__builtin_ia32_kxorqi:
15645 case X86::BI__builtin_ia32_kxorhi:
15646 case X86::BI__builtin_ia32_kxorsi:
15647 case X86::BI__builtin_ia32_kxordi:
15648 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15649 case X86::BI__builtin_ia32_knotqi:
15650 case X86::BI__builtin_ia32_knothi:
15651 case X86::BI__builtin_ia32_knotsi:
15652 case X86::BI__builtin_ia32_knotdi: {
15653 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15654 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15655 return Builder.CreateBitCast(Builder.CreateNot(Res),
15656 Ops[0]->getType());
15657 }
15658 case X86::BI__builtin_ia32_kmovb:
15659 case X86::BI__builtin_ia32_kmovw:
15660 case X86::BI__builtin_ia32_kmovd:
15661 case X86::BI__builtin_ia32_kmovq: {
15662 // Bitcast to vXi1 type and then back to integer. This gets the mask
15663 // register type into the IR, but might be optimized out depending on
15664 // what's around it.
15665 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15666 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15667 return Builder.CreateBitCast(Res, Ops[0]->getType());
15668 }
15669
15670 case X86::BI__builtin_ia32_kunpckdi:
15671 case X86::BI__builtin_ia32_kunpcksi:
15672 case X86::BI__builtin_ia32_kunpckhi: {
15673 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15674 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15675 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15676 int Indices[64];
15677 for (unsigned i = 0; i != NumElts; ++i)
15678 Indices[i] = i;
15679
15680 // First extract half of each vector. This gives better codegen than
15681 // doing it in a single shuffle.
15682 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15683 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
15684 // Concat the vectors.
15685 // NOTE: Operands are swapped to match the intrinsic definition.
15686 Value *Res =
15687 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
15688 return Builder.CreateBitCast(Res, Ops[0]->getType());
15689 }
15690
15691 case X86::BI__builtin_ia32_vplzcntd_128:
15692 case X86::BI__builtin_ia32_vplzcntd_256:
15693 case X86::BI__builtin_ia32_vplzcntd_512:
15694 case X86::BI__builtin_ia32_vplzcntq_128:
15695 case X86::BI__builtin_ia32_vplzcntq_256:
15696 case X86::BI__builtin_ia32_vplzcntq_512: {
15697 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15698 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
15699 }
15700 case X86::BI__builtin_ia32_sqrtss:
15701 case X86::BI__builtin_ia32_sqrtsd: {
15702 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
15703 Function *F;
15704 if (Builder.getIsFPConstrained()) {
15705 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15706 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15707 A->getType());
15708 A = Builder.CreateConstrainedFPCall(F, {A});
15709 } else {
15710 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15711 A = Builder.CreateCall(F, {A});
15712 }
15713 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15714 }
15715 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15716 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15717 case X86::BI__builtin_ia32_sqrtss_round_mask: {
15718 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
15719 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15720 // otherwise keep the intrinsic.
15721 if (CC != 4) {
15722 Intrinsic::ID IID;
15723
15724 switch (BuiltinID) {
15725 default:
15726 llvm_unreachable("Unsupported intrinsic!");
15727 case X86::BI__builtin_ia32_sqrtsh_round_mask:
15728 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
15729 break;
15730 case X86::BI__builtin_ia32_sqrtsd_round_mask:
15731 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
15732 break;
15733 case X86::BI__builtin_ia32_sqrtss_round_mask:
15734 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
15735 break;
15736 }
15737 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15738 }
15739 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15740 Function *F;
15741 if (Builder.getIsFPConstrained()) {
15742 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15743 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15744 A->getType());
15745 A = Builder.CreateConstrainedFPCall(F, A);
15746 } else {
15747 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
15748 A = Builder.CreateCall(F, A);
15749 }
15750 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15751 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
15752 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
15753 }
15754 case X86::BI__builtin_ia32_sqrtpd256:
15755 case X86::BI__builtin_ia32_sqrtpd:
15756 case X86::BI__builtin_ia32_sqrtps256:
15757 case X86::BI__builtin_ia32_sqrtps:
15758 case X86::BI__builtin_ia32_sqrtph256:
15759 case X86::BI__builtin_ia32_sqrtph:
15760 case X86::BI__builtin_ia32_sqrtph512:
15761 case X86::BI__builtin_ia32_sqrtps512:
15762 case X86::BI__builtin_ia32_sqrtpd512: {
15763 if (Ops.size() == 2) {
15764 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15765 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
15766 // otherwise keep the intrinsic.
15767 if (CC != 4) {
15768 Intrinsic::ID IID;
15769
15770 switch (BuiltinID) {
15771 default:
15772 llvm_unreachable("Unsupported intrinsic!");
15773 case X86::BI__builtin_ia32_sqrtph512:
15774 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
15775 break;
15776 case X86::BI__builtin_ia32_sqrtps512:
15777 IID = Intrinsic::x86_avx512_sqrt_ps_512;
15778 break;
15779 case X86::BI__builtin_ia32_sqrtpd512:
15780 IID = Intrinsic::x86_avx512_sqrt_pd_512;
15781 break;
15782 }
15783 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
15784 }
15785 }
15786 if (Builder.getIsFPConstrained()) {
15787 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15788 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
15789 Ops[0]->getType());
15790 return Builder.CreateConstrainedFPCall(F, Ops[0]);
15791 } else {
15792 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
15793 return Builder.CreateCall(F, Ops[0]);
15794 }
15795 }
15796
15797 case X86::BI__builtin_ia32_pmuludq128:
15798 case X86::BI__builtin_ia32_pmuludq256:
15799 case X86::BI__builtin_ia32_pmuludq512:
15800 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
15801
15802 case X86::BI__builtin_ia32_pmuldq128:
15803 case X86::BI__builtin_ia32_pmuldq256:
15804 case X86::BI__builtin_ia32_pmuldq512:
15805 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
15806
15807 case X86::BI__builtin_ia32_pternlogd512_mask:
15808 case X86::BI__builtin_ia32_pternlogq512_mask:
15809 case X86::BI__builtin_ia32_pternlogd128_mask:
15810 case X86::BI__builtin_ia32_pternlogd256_mask:
15811 case X86::BI__builtin_ia32_pternlogq128_mask:
15812 case X86::BI__builtin_ia32_pternlogq256_mask:
15813 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
15814
15815 case X86::BI__builtin_ia32_pternlogd512_maskz:
15816 case X86::BI__builtin_ia32_pternlogq512_maskz:
15817 case X86::BI__builtin_ia32_pternlogd128_maskz:
15818 case X86::BI__builtin_ia32_pternlogd256_maskz:
15819 case X86::BI__builtin_ia32_pternlogq128_maskz:
15820 case X86::BI__builtin_ia32_pternlogq256_maskz:
15821 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
15822
15823 case X86::BI__builtin_ia32_vpshldd128:
15824 case X86::BI__builtin_ia32_vpshldd256:
15825 case X86::BI__builtin_ia32_vpshldd512:
15826 case X86::BI__builtin_ia32_vpshldq128:
15827 case X86::BI__builtin_ia32_vpshldq256:
15828 case X86::BI__builtin_ia32_vpshldq512:
15829 case X86::BI__builtin_ia32_vpshldw128:
15830 case X86::BI__builtin_ia32_vpshldw256:
15831 case X86::BI__builtin_ia32_vpshldw512:
15832 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15833
15834 case X86::BI__builtin_ia32_vpshrdd128:
15835 case X86::BI__builtin_ia32_vpshrdd256:
15836 case X86::BI__builtin_ia32_vpshrdd512:
15837 case X86::BI__builtin_ia32_vpshrdq128:
15838 case X86::BI__builtin_ia32_vpshrdq256:
15839 case X86::BI__builtin_ia32_vpshrdq512:
15840 case X86::BI__builtin_ia32_vpshrdw128:
15841 case X86::BI__builtin_ia32_vpshrdw256:
15842 case X86::BI__builtin_ia32_vpshrdw512:
15843 // Ops 0 and 1 are swapped.
15844 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15845
15846 case X86::BI__builtin_ia32_vpshldvd128:
15847 case X86::BI__builtin_ia32_vpshldvd256:
15848 case X86::BI__builtin_ia32_vpshldvd512:
15849 case X86::BI__builtin_ia32_vpshldvq128:
15850 case X86::BI__builtin_ia32_vpshldvq256:
15851 case X86::BI__builtin_ia32_vpshldvq512:
15852 case X86::BI__builtin_ia32_vpshldvw128:
15853 case X86::BI__builtin_ia32_vpshldvw256:
15854 case X86::BI__builtin_ia32_vpshldvw512:
15855 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
15856
15857 case X86::BI__builtin_ia32_vpshrdvd128:
15858 case X86::BI__builtin_ia32_vpshrdvd256:
15859 case X86::BI__builtin_ia32_vpshrdvd512:
15860 case X86::BI__builtin_ia32_vpshrdvq128:
15861 case X86::BI__builtin_ia32_vpshrdvq256:
15862 case X86::BI__builtin_ia32_vpshrdvq512:
15863 case X86::BI__builtin_ia32_vpshrdvw128:
15864 case X86::BI__builtin_ia32_vpshrdvw256:
15865 case X86::BI__builtin_ia32_vpshrdvw512:
15866 // Ops 0 and 1 are swapped.
15867 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
15868
15869 // Reductions
15870 case X86::BI__builtin_ia32_reduce_fadd_pd512:
15871 case X86::BI__builtin_ia32_reduce_fadd_ps512:
15872 case X86::BI__builtin_ia32_reduce_fadd_ph512:
15873 case X86::BI__builtin_ia32_reduce_fadd_ph256:
15874 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
15875 Function *F =
15876 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
15877 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15878 Builder.getFastMathFlags().setAllowReassoc();
15879 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15880 }
15881 case X86::BI__builtin_ia32_reduce_fmul_pd512:
15882 case X86::BI__builtin_ia32_reduce_fmul_ps512:
15883 case X86::BI__builtin_ia32_reduce_fmul_ph512:
15884 case X86::BI__builtin_ia32_reduce_fmul_ph256:
15885 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
15886 Function *F =
15887 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
15888 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15889 Builder.getFastMathFlags().setAllowReassoc();
15890 return Builder.CreateCall(F, {Ops[0], Ops[1]});
15891 }
15892 case X86::BI__builtin_ia32_reduce_fmax_pd512:
15893 case X86::BI__builtin_ia32_reduce_fmax_ps512:
15894 case X86::BI__builtin_ia32_reduce_fmax_ph512:
15895 case X86::BI__builtin_ia32_reduce_fmax_ph256:
15896 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
15897 Function *F =
15898 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
15899 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15900 Builder.getFastMathFlags().setNoNaNs();
15901 return Builder.CreateCall(F, {Ops[0]});
15902 }
15903 case X86::BI__builtin_ia32_reduce_fmin_pd512:
15904 case X86::BI__builtin_ia32_reduce_fmin_ps512:
15905 case X86::BI__builtin_ia32_reduce_fmin_ph512:
15906 case X86::BI__builtin_ia32_reduce_fmin_ph256:
15907 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
15908 Function *F =
15909 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
15910 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
15911 Builder.getFastMathFlags().setNoNaNs();
15912 return Builder.CreateCall(F, {Ops[0]});
15913 }
15914
15915 // 3DNow!
15916 case X86::BI__builtin_ia32_pswapdsf:
15917 case X86::BI__builtin_ia32_pswapdsi: {
15918 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
15919 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
15920 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
15921 return Builder.CreateCall(F, Ops, "pswapd");
15922 }
15923 case X86::BI__builtin_ia32_rdrand16_step:
15924 case X86::BI__builtin_ia32_rdrand32_step:
15925 case X86::BI__builtin_ia32_rdrand64_step:
15926 case X86::BI__builtin_ia32_rdseed16_step:
15927 case X86::BI__builtin_ia32_rdseed32_step:
15928 case X86::BI__builtin_ia32_rdseed64_step: {
15929 Intrinsic::ID ID;
15930 switch (BuiltinID) {
15931 default: llvm_unreachable("Unsupported intrinsic!");
15932 case X86::BI__builtin_ia32_rdrand16_step:
15933 ID = Intrinsic::x86_rdrand_16;
15934 break;
15935 case X86::BI__builtin_ia32_rdrand32_step:
15936 ID = Intrinsic::x86_rdrand_32;
15937 break;
15938 case X86::BI__builtin_ia32_rdrand64_step:
15939 ID = Intrinsic::x86_rdrand_64;
15940 break;
15941 case X86::BI__builtin_ia32_rdseed16_step:
15942 ID = Intrinsic::x86_rdseed_16;
15943 break;
15944 case X86::BI__builtin_ia32_rdseed32_step:
15945 ID = Intrinsic::x86_rdseed_32;
15946 break;
15947 case X86::BI__builtin_ia32_rdseed64_step:
15948 ID = Intrinsic::x86_rdseed_64;
15949 break;
15950 }
15951
15952 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
15953 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
15954 Ops[0]);
15955 return Builder.CreateExtractValue(Call, 1);
15956 }
15957 case X86::BI__builtin_ia32_addcarryx_u32:
15958 case X86::BI__builtin_ia32_addcarryx_u64:
15959 case X86::BI__builtin_ia32_subborrow_u32:
15960 case X86::BI__builtin_ia32_subborrow_u64: {
15961 Intrinsic::ID IID;
15962 switch (BuiltinID) {
15963 default: llvm_unreachable("Unsupported intrinsic!");
15964 case X86::BI__builtin_ia32_addcarryx_u32:
15965 IID = Intrinsic::x86_addcarry_32;
15966 break;
15967 case X86::BI__builtin_ia32_addcarryx_u64:
15968 IID = Intrinsic::x86_addcarry_64;
15969 break;
15970 case X86::BI__builtin_ia32_subborrow_u32:
15971 IID = Intrinsic::x86_subborrow_32;
15972 break;
15973 case X86::BI__builtin_ia32_subborrow_u64:
15974 IID = Intrinsic::x86_subborrow_64;
15975 break;
15976 }
15977
15978 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
15979 { Ops[0], Ops[1], Ops[2] });
15980 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15981 Ops[3]);
15982 return Builder.CreateExtractValue(Call, 0);
15983 }
15984
15985 case X86::BI__builtin_ia32_fpclassps128_mask:
15986 case X86::BI__builtin_ia32_fpclassps256_mask:
15987 case X86::BI__builtin_ia32_fpclassps512_mask:
15988 case X86::BI__builtin_ia32_fpclassph128_mask:
15989 case X86::BI__builtin_ia32_fpclassph256_mask:
15990 case X86::BI__builtin_ia32_fpclassph512_mask:
15991 case X86::BI__builtin_ia32_fpclasspd128_mask:
15992 case X86::BI__builtin_ia32_fpclasspd256_mask:
15993 case X86::BI__builtin_ia32_fpclasspd512_mask: {
15994 unsigned NumElts =
15995 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15996 Value *MaskIn = Ops[2];
15997 Ops.erase(&Ops[2]);
15998
15999 Intrinsic::ID ID;
16000 switch (BuiltinID) {
16001 default: llvm_unreachable("Unsupported intrinsic!");
16002 case X86::BI__builtin_ia32_fpclassph128_mask:
16003 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16004 break;
16005 case X86::BI__builtin_ia32_fpclassph256_mask:
16006 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16007 break;
16008 case X86::BI__builtin_ia32_fpclassph512_mask:
16009 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16010 break;
16011 case X86::BI__builtin_ia32_fpclassps128_mask:
16012 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16013 break;
16014 case X86::BI__builtin_ia32_fpclassps256_mask:
16015 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16016 break;
16017 case X86::BI__builtin_ia32_fpclassps512_mask:
16018 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16019 break;
16020 case X86::BI__builtin_ia32_fpclasspd128_mask:
16021 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16022 break;
16023 case X86::BI__builtin_ia32_fpclasspd256_mask:
16024 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16025 break;
16026 case X86::BI__builtin_ia32_fpclasspd512_mask:
16027 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16028 break;
16029 }
16030
16031 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16032 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16033 }
16034
16035 case X86::BI__builtin_ia32_vp2intersect_q_512:
16036 case X86::BI__builtin_ia32_vp2intersect_q_256:
16037 case X86::BI__builtin_ia32_vp2intersect_q_128:
16038 case X86::BI__builtin_ia32_vp2intersect_d_512:
16039 case X86::BI__builtin_ia32_vp2intersect_d_256:
16040 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16041 unsigned NumElts =
16042 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16043 Intrinsic::ID ID;
16044
16045 switch (BuiltinID) {
16046 default: llvm_unreachable("Unsupported intrinsic!");
16047 case X86::BI__builtin_ia32_vp2intersect_q_512:
16048 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16049 break;
16050 case X86::BI__builtin_ia32_vp2intersect_q_256:
16051 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16052 break;
16053 case X86::BI__builtin_ia32_vp2intersect_q_128:
16054 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16055 break;
16056 case X86::BI__builtin_ia32_vp2intersect_d_512:
16057 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16058 break;
16059 case X86::BI__builtin_ia32_vp2intersect_d_256:
16060 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16061 break;
16062 case X86::BI__builtin_ia32_vp2intersect_d_128:
16063 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16064 break;
16065 }
16066
16067 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16068 Value *Result = Builder.CreateExtractValue(Call, 0);
16069 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16071
16072 Result = Builder.CreateExtractValue(Call, 1);
16073 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16075 }
16076
16077 case X86::BI__builtin_ia32_vpmultishiftqb128:
16078 case X86::BI__builtin_ia32_vpmultishiftqb256:
16079 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16080 Intrinsic::ID ID;
16081 switch (BuiltinID) {
16082 default: llvm_unreachable("Unsupported intrinsic!");
16083 case X86::BI__builtin_ia32_vpmultishiftqb128:
16084 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16085 break;
16086 case X86::BI__builtin_ia32_vpmultishiftqb256:
16087 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16088 break;
16089 case X86::BI__builtin_ia32_vpmultishiftqb512:
16090 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16091 break;
16092 }
16093
16094 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16095 }
16096
16097 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16098 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16099 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16100 unsigned NumElts =
16101 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16102 Value *MaskIn = Ops[2];
16103 Ops.erase(&Ops[2]);
16104
16105 Intrinsic::ID ID;
16106 switch (BuiltinID) {
16107 default: llvm_unreachable("Unsupported intrinsic!");
16108 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16109 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16110 break;
16111 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16112 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16113 break;
16114 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16115 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16116 break;
16117 }
16118
16119 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16120 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16121 }
16122
16123 // packed comparison intrinsics
16124 case X86::BI__builtin_ia32_cmpeqps:
16125 case X86::BI__builtin_ia32_cmpeqpd:
16126 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16127 case X86::BI__builtin_ia32_cmpltps:
16128 case X86::BI__builtin_ia32_cmpltpd:
16129 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16130 case X86::BI__builtin_ia32_cmpleps:
16131 case X86::BI__builtin_ia32_cmplepd:
16132 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16133 case X86::BI__builtin_ia32_cmpunordps:
16134 case X86::BI__builtin_ia32_cmpunordpd:
16135 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16136 case X86::BI__builtin_ia32_cmpneqps:
16137 case X86::BI__builtin_ia32_cmpneqpd:
16138 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16139 case X86::BI__builtin_ia32_cmpnltps:
16140 case X86::BI__builtin_ia32_cmpnltpd:
16141 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16142 case X86::BI__builtin_ia32_cmpnleps:
16143 case X86::BI__builtin_ia32_cmpnlepd:
16144 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16145 case X86::BI__builtin_ia32_cmpordps:
16146 case X86::BI__builtin_ia32_cmpordpd:
16147 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16148 case X86::BI__builtin_ia32_cmpph128_mask:
16149 case X86::BI__builtin_ia32_cmpph256_mask:
16150 case X86::BI__builtin_ia32_cmpph512_mask:
16151 case X86::BI__builtin_ia32_cmpps128_mask:
16152 case X86::BI__builtin_ia32_cmpps256_mask:
16153 case X86::BI__builtin_ia32_cmpps512_mask:
16154 case X86::BI__builtin_ia32_cmppd128_mask:
16155 case X86::BI__builtin_ia32_cmppd256_mask:
16156 case X86::BI__builtin_ia32_cmppd512_mask:
16157 IsMaskFCmp = true;
16158 [[fallthrough]];
16159 case X86::BI__builtin_ia32_cmpps:
16160 case X86::BI__builtin_ia32_cmpps256:
16161 case X86::BI__builtin_ia32_cmppd:
16162 case X86::BI__builtin_ia32_cmppd256: {
16163 // Lowering vector comparisons to fcmp instructions, while
16164 // ignoring signalling behaviour requested
16165 // ignoring rounding mode requested
16166 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16167
16168 // The third argument is the comparison condition, and integer in the
16169 // range [0, 31]
16170 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16171
16172 // Lowering to IR fcmp instruction.
16173 // Ignoring requested signaling behaviour,
16174 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16175 FCmpInst::Predicate Pred;
16176 bool IsSignaling;
16177 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16178 // behavior is inverted. We'll handle that after the switch.
16179 switch (CC & 0xf) {
16180 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16181 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16182 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16183 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16184 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16185 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16186 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16187 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16188 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16189 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16190 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16191 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16192 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16193 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16194 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16195 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16196 default: llvm_unreachable("Unhandled CC");
16197 }
16198
16199 // Invert the signalling behavior for 16-31.
16200 if (CC & 0x10)
16201 IsSignaling = !IsSignaling;
16202
16203 // If the predicate is true or false and we're using constrained intrinsics,
16204 // we don't have a compare intrinsic we can use. Just use the legacy X86
16205 // specific intrinsic.
16206 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16207 // use the legacy X86 specific intrinsic.
16208 if (Builder.getIsFPConstrained() &&
16209 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16210 IsMaskFCmp)) {
16211
16212 Intrinsic::ID IID;
16213 switch (BuiltinID) {
16214 default: llvm_unreachable("Unexpected builtin");
16215 case X86::BI__builtin_ia32_cmpps:
16216 IID = Intrinsic::x86_sse_cmp_ps;
16217 break;
16218 case X86::BI__builtin_ia32_cmpps256:
16219 IID = Intrinsic::x86_avx_cmp_ps_256;
16220 break;
16221 case X86::BI__builtin_ia32_cmppd:
16222 IID = Intrinsic::x86_sse2_cmp_pd;
16223 break;
16224 case X86::BI__builtin_ia32_cmppd256:
16225 IID = Intrinsic::x86_avx_cmp_pd_256;
16226 break;
16227 case X86::BI__builtin_ia32_cmpph128_mask:
16228 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16229 break;
16230 case X86::BI__builtin_ia32_cmpph256_mask:
16231 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16232 break;
16233 case X86::BI__builtin_ia32_cmpph512_mask:
16234 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16235 break;
16236 case X86::BI__builtin_ia32_cmpps512_mask:
16237 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16238 break;
16239 case X86::BI__builtin_ia32_cmppd512_mask:
16240 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16241 break;
16242 case X86::BI__builtin_ia32_cmpps128_mask:
16243 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16244 break;
16245 case X86::BI__builtin_ia32_cmpps256_mask:
16246 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16247 break;
16248 case X86::BI__builtin_ia32_cmppd128_mask:
16249 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16250 break;
16251 case X86::BI__builtin_ia32_cmppd256_mask:
16252 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16253 break;
16254 }
16255
16256 Function *Intr = CGM.getIntrinsic(IID);
16257 if (IsMaskFCmp) {
16258 unsigned NumElts =
16259 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16260 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16261 Value *Cmp = Builder.CreateCall(Intr, Ops);
16262 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16263 }
16264
16265 return Builder.CreateCall(Intr, Ops);
16266 }
16267
16268 // Builtins without the _mask suffix return a vector of integers
16269 // of the same width as the input vectors
16270 if (IsMaskFCmp) {
16271 // We ignore SAE if strict FP is disabled. We only keep precise
16272 // exception behavior under strict FP.
16273 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16274 // object will be required.
16275 unsigned NumElts =
16276 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16277 Value *Cmp;
16278 if (IsSignaling)
16279 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16280 else
16281 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16282 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16283 }
16284
16285 return getVectorFCmpIR(Pred, IsSignaling);
16286 }
16287
16288 // SSE scalar comparison intrinsics
16289 case X86::BI__builtin_ia32_cmpeqss:
16290 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16291 case X86::BI__builtin_ia32_cmpltss:
16292 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16293 case X86::BI__builtin_ia32_cmpless:
16294 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16295 case X86::BI__builtin_ia32_cmpunordss:
16296 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16297 case X86::BI__builtin_ia32_cmpneqss:
16298 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16299 case X86::BI__builtin_ia32_cmpnltss:
16300 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16301 case X86::BI__builtin_ia32_cmpnless:
16302 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16303 case X86::BI__builtin_ia32_cmpordss:
16304 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16305 case X86::BI__builtin_ia32_cmpeqsd:
16306 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16307 case X86::BI__builtin_ia32_cmpltsd:
16308 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16309 case X86::BI__builtin_ia32_cmplesd:
16310 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16311 case X86::BI__builtin_ia32_cmpunordsd:
16312 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16313 case X86::BI__builtin_ia32_cmpneqsd:
16314 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16315 case X86::BI__builtin_ia32_cmpnltsd:
16316 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16317 case X86::BI__builtin_ia32_cmpnlesd:
16318 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16319 case X86::BI__builtin_ia32_cmpordsd:
16320 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16321
16322 // f16c half2float intrinsics
16323 case X86::BI__builtin_ia32_vcvtph2ps:
16324 case X86::BI__builtin_ia32_vcvtph2ps256:
16325 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16326 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16327 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16328 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16329 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16330 }
16331
16332 // AVX512 bf16 intrinsics
16333 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16334 Ops[2] = getMaskVecValue(
16335 *this, Ops[2],
16336 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16337 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16338 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16339 }
16340 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16341 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16342
16343 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16344 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16345 Intrinsic::ID IID;
16346 switch (BuiltinID) {
16347 default: llvm_unreachable("Unsupported intrinsic!");
16348 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16349 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16350 break;
16351 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16352 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16353 break;
16354 }
16355 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16356 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16357 }
16358
16359 case X86::BI__cpuid:
16360 case X86::BI__cpuidex: {
16361 Value *FuncId = EmitScalarExpr(E->getArg(1));
16362 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16363 ? EmitScalarExpr(E->getArg(2))
16364 : llvm::ConstantInt::get(Int32Ty, 0);
16365
16366 llvm::StructType *CpuidRetTy =
16367 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16368 llvm::FunctionType *FTy =
16369 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16370
16371 StringRef Asm, Constraints;
16372 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16373 Asm = "cpuid";
16374 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16375 } else {
16376 // x86-64 uses %rbx as the base register, so preserve it.
16377 Asm = "xchgq %rbx, ${1:q}\n"
16378 "cpuid\n"
16379 "xchgq %rbx, ${1:q}";
16380 Constraints = "={ax},=r,={cx},={dx},0,2";
16381 }
16382
16383 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16384 /*hasSideEffects=*/false);
16385 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16386 Value *BasePtr = EmitScalarExpr(E->getArg(0));
16387 Value *Store = nullptr;
16388 for (unsigned i = 0; i < 4; i++) {
16389 Value *Extracted = Builder.CreateExtractValue(IACall, i);
16390 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16391 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16392 }
16393
16394 // Return the last store instruction to signal that we have emitted the
16395 // the intrinsic.
16396 return Store;
16397 }
16398
16399 case X86::BI__emul:
16400 case X86::BI__emulu: {
16401 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16402 bool isSigned = (BuiltinID == X86::BI__emul);
16403 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16404 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16405 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16406 }
16407 case X86::BI__mulh:
16408 case X86::BI__umulh:
16409 case X86::BI_mul128:
16410 case X86::BI_umul128: {
16411 llvm::Type *ResType = ConvertType(E->getType());
16412 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16413
16414 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16415 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16416 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16417
16418 Value *MulResult, *HigherBits;
16419 if (IsSigned) {
16420 MulResult = Builder.CreateNSWMul(LHS, RHS);
16421 HigherBits = Builder.CreateAShr(MulResult, 64);
16422 } else {
16423 MulResult = Builder.CreateNUWMul(LHS, RHS);
16424 HigherBits = Builder.CreateLShr(MulResult, 64);
16425 }
16426 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16427
16428 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16429 return HigherBits;
16430
16431 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16432 Builder.CreateStore(HigherBits, HighBitsAddress);
16433 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16434 }
16435
16436 case X86::BI__faststorefence: {
16437 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16438 llvm::SyncScope::System);
16439 }
16440 case X86::BI__shiftleft128:
16441 case X86::BI__shiftright128: {
16442 llvm::Function *F = CGM.getIntrinsic(
16443 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16444 Int64Ty);
16445 // Flip low/high ops and zero-extend amount to matching type.
16446 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16447 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16448 std::swap(Ops[0], Ops[1]);
16449 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16450 return Builder.CreateCall(F, Ops);
16451 }
16452 case X86::BI_ReadWriteBarrier:
16453 case X86::BI_ReadBarrier:
16454 case X86::BI_WriteBarrier: {
16455 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16456 llvm::SyncScope::SingleThread);
16457 }
16458
16459 case X86::BI_AddressOfReturnAddress: {
16460 Function *F =
16461 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16462 return Builder.CreateCall(F);
16463 }
16464 case X86::BI__stosb: {
16465 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16466 // instruction, but it will create a memset that won't be optimized away.
16467 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16468 }
16469 case X86::BI__ud2:
16470 // llvm.trap makes a ud2a instruction on x86.
16471 return EmitTrapCall(Intrinsic::trap);
16472 case X86::BI__int2c: {
16473 // This syscall signals a driver assertion failure in x86 NT kernels.
16474 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16475 llvm::InlineAsm *IA =
16476 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16477 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16478 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16479 llvm::Attribute::NoReturn);
16480 llvm::CallInst *CI = Builder.CreateCall(IA);
16481 CI->setAttributes(NoReturnAttr);
16482 return CI;
16483 }
16484 case X86::BI__readfsbyte:
16485 case X86::BI__readfsword:
16486 case X86::BI__readfsdword:
16487 case X86::BI__readfsqword: {
16488 llvm::Type *IntTy = ConvertType(E->getType());
16489 Value *Ptr = Builder.CreateIntToPtr(
16490 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16491 LoadInst *Load = Builder.CreateAlignedLoad(
16492 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16493 Load->setVolatile(true);
16494 return Load;
16495 }
16496 case X86::BI__readgsbyte:
16497 case X86::BI__readgsword:
16498 case X86::BI__readgsdword:
16499 case X86::BI__readgsqword: {
16500 llvm::Type *IntTy = ConvertType(E->getType());
16501 Value *Ptr = Builder.CreateIntToPtr(
16502 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16503 LoadInst *Load = Builder.CreateAlignedLoad(
16504 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16505 Load->setVolatile(true);
16506 return Load;
16507 }
16508 case X86::BI__builtin_ia32_encodekey128_u32: {
16509 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16510
16511 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16512
16513 for (int i = 0; i < 3; ++i) {
16514 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16515 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16516 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16517 }
16518
16519 return Builder.CreateExtractValue(Call, 0);
16520 }
16521 case X86::BI__builtin_ia32_encodekey256_u32: {
16522 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16523
16524 Value *Call =
16525 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16526
16527 for (int i = 0; i < 4; ++i) {
16528 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16529 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16530 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16531 }
16532
16533 return Builder.CreateExtractValue(Call, 0);
16534 }
16535 case X86::BI__builtin_ia32_aesenc128kl_u8:
16536 case X86::BI__builtin_ia32_aesdec128kl_u8:
16537 case X86::BI__builtin_ia32_aesenc256kl_u8:
16538 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16539 Intrinsic::ID IID;
16540 StringRef BlockName;
16541 switch (BuiltinID) {
16542 default:
16543 llvm_unreachable("Unexpected builtin");
16544 case X86::BI__builtin_ia32_aesenc128kl_u8:
16545 IID = Intrinsic::x86_aesenc128kl;
16546 BlockName = "aesenc128kl";
16547 break;
16548 case X86::BI__builtin_ia32_aesdec128kl_u8:
16549 IID = Intrinsic::x86_aesdec128kl;
16550 BlockName = "aesdec128kl";
16551 break;
16552 case X86::BI__builtin_ia32_aesenc256kl_u8:
16553 IID = Intrinsic::x86_aesenc256kl;
16554 BlockName = "aesenc256kl";
16555 break;
16556 case X86::BI__builtin_ia32_aesdec256kl_u8:
16557 IID = Intrinsic::x86_aesdec256kl;
16558 BlockName = "aesdec256kl";
16559 break;
16560 }
16561
16562 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16563
16564 BasicBlock *NoError =
16565 createBasicBlock(BlockName + "_no_error", this->CurFn);
16566 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16567 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16568
16569 Value *Ret = Builder.CreateExtractValue(Call, 0);
16570 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16571 Value *Out = Builder.CreateExtractValue(Call, 1);
16572 Builder.CreateCondBr(Succ, NoError, Error);
16573
16574 Builder.SetInsertPoint(NoError);
16576 Builder.CreateBr(End);
16577
16578 Builder.SetInsertPoint(Error);
16579 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16580 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16581 Builder.CreateBr(End);
16582
16583 Builder.SetInsertPoint(End);
16584 return Builder.CreateExtractValue(Call, 0);
16585 }
16586 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16587 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16588 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16589 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16590 Intrinsic::ID IID;
16591 StringRef BlockName;
16592 switch (BuiltinID) {
16593 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16594 IID = Intrinsic::x86_aesencwide128kl;
16595 BlockName = "aesencwide128kl";
16596 break;
16597 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16598 IID = Intrinsic::x86_aesdecwide128kl;
16599 BlockName = "aesdecwide128kl";
16600 break;
16601 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16602 IID = Intrinsic::x86_aesencwide256kl;
16603 BlockName = "aesencwide256kl";
16604 break;
16605 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16606 IID = Intrinsic::x86_aesdecwide256kl;
16607 BlockName = "aesdecwide256kl";
16608 break;
16609 }
16610
16611 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16612 Value *InOps[9];
16613 InOps[0] = Ops[2];
16614 for (int i = 0; i != 8; ++i) {
16615 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16616 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16617 }
16618
16619 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16620
16621 BasicBlock *NoError =
16622 createBasicBlock(BlockName + "_no_error", this->CurFn);
16623 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16624 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16625
16626 Value *Ret = Builder.CreateExtractValue(Call, 0);
16627 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16628 Builder.CreateCondBr(Succ, NoError, Error);
16629
16630 Builder.SetInsertPoint(NoError);
16631 for (int i = 0; i != 8; ++i) {
16632 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16633 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16634 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16635 }
16636 Builder.CreateBr(End);
16637
16638 Builder.SetInsertPoint(Error);
16639 for (int i = 0; i != 8; ++i) {
16640 Value *Out = Builder.CreateExtractValue(Call, i + 1);
16641 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16642 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16643 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16644 }
16645 Builder.CreateBr(End);
16646
16647 Builder.SetInsertPoint(End);
16648 return Builder.CreateExtractValue(Call, 0);
16649 }
16650 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16651 IsConjFMA = true;
16652 [[fallthrough]];
16653 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16654 Intrinsic::ID IID = IsConjFMA
16655 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16656 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16657 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16658 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16659 }
16660 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16661 IsConjFMA = true;
16662 [[fallthrough]];
16663 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16664 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16665 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16666 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16667 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16668 return EmitX86Select(*this, And, Call, Ops[0]);
16669 }
16670 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16671 IsConjFMA = true;
16672 [[fallthrough]];
16673 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16674 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16675 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16676 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16677 static constexpr int Mask[] = {0, 5, 6, 7};
16678 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16679 }
16680 case X86::BI__builtin_ia32_prefetchi:
16681 return Builder.CreateCall(
16682 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
16683 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
16684 llvm::ConstantInt::get(Int32Ty, 0)});
16685 }
16686}
16687
16688Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
16689 const CallExpr *E) {
16690 // Do not emit the builtin arguments in the arguments of a function call,
16691 // because the evaluation order of function arguments is not specified in C++.
16692 // This is important when testing to ensure the arguments are emitted in the
16693 // same order every time. Eg:
16694 // Instead of:
16695 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
16696 // EmitScalarExpr(E->getArg(1)), "swdiv");
16697 // Use:
16698 // Value *Op0 = EmitScalarExpr(E->getArg(0));
16699 // Value *Op1 = EmitScalarExpr(E->getArg(1));
16700 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
16701
16702 Intrinsic::ID ID = Intrinsic::not_intrinsic;
16703
16704#include "llvm/TargetParser/PPCTargetParser.def"
16705 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
16706 unsigned Mask, CmpInst::Predicate CompOp,
16707 unsigned OpValue) -> Value * {
16708 if (SupportMethod == AIX_BUILTIN_PPC_FALSE)
16709 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
16710
16711 if (SupportMethod == AIX_BUILTIN_PPC_TRUE)
16712 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
16713
16714 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
16715
16716 llvm::Value *FieldValue = nullptr;
16717 if (SupportMethod == USE_SYS_CONF) {
16718 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
16719 llvm::Constant *SysConf =
16720 CGM.CreateRuntimeVariable(STy, "_system_configuration");
16721
16722 // Grab the appropriate field from _system_configuration.
16723 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
16724 ConstantInt::get(Int32Ty, FieldIdx)};
16725
16726 FieldValue = Builder.CreateGEP(STy, SysConf, Idxs);
16727 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
16729 } else if (SupportMethod == SYS_CALL) {
16730 llvm::FunctionType *FTy =
16731 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
16732 llvm::FunctionCallee Func =
16733 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
16734
16735 FieldValue =
16736 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
16737 }
16738 assert(FieldValue &&
16739 "SupportMethod value is not defined in PPCTargetParser.def.");
16740
16741 if (Mask)
16742 FieldValue = Builder.CreateAnd(FieldValue, Mask);
16743
16744 llvm::Type *ValueType = FieldValue->getType();
16745 bool IsValueType64Bit = ValueType->isIntegerTy(64);
16746 assert(
16747 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
16748 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
16749
16750 return Builder.CreateICmp(
16751 CompOp, FieldValue,
16752 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
16753 };
16754
16755 switch (BuiltinID) {
16756 default: return nullptr;
16757
16758 case Builtin::BI__builtin_cpu_is: {
16759 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16760 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16761 llvm::Triple Triple = getTarget().getTriple();
16762
16763 if (Triple.isOSAIX()) {
16764 unsigned SupportMethod, FieldIdx, CpuIdValue;
16765 CmpInst::Predicate CompareOp;
16766 typedef std::tuple<unsigned, unsigned, CmpInst::Predicate, unsigned>
16767 CPUType;
16768 std::tie(SupportMethod, FieldIdx, CompareOp, CpuIdValue) =
16769 static_cast<CPUType>(StringSwitch<CPUType>(CPUStr)
16770#define PPC_AIX_CPU(NAME, SUPPORT_METHOD, INDEX, COMPARE_OP, VALUE) \
16771 .Case(NAME, {SUPPORT_METHOD, INDEX, COMPARE_OP, VALUE})
16772#include "llvm/TargetParser/PPCTargetParser.def"
16773 .Default({AIX_BUILTIN_PPC_FALSE, 0,
16774 CmpInst::Predicate(), 0}));
16775 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, 0, CompareOp,
16776 CpuIdValue);
16777 }
16778
16779 assert(Triple.isOSLinux() &&
16780 "__builtin_cpu_is() is only supported for AIX and Linux.");
16781 unsigned NumCPUID = StringSwitch<unsigned>(CPUStr)
16782#define PPC_LNX_CPU(Name, NumericID) .Case(Name, NumericID)
16783#include "llvm/TargetParser/PPCTargetParser.def"
16784 .Default(-1U);
16785 assert(NumCPUID < -1U && "Invalid CPU name. Missed by SemaChecking?");
16786 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
16787 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16788 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
16789 return Builder.CreateICmpEQ(TheCall,
16790 llvm::ConstantInt::get(Int32Ty, NumCPUID));
16791 }
16792 case Builtin::BI__builtin_cpu_supports: {
16793 llvm::Triple Triple = getTarget().getTriple();
16794 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
16795 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
16796 if (Triple.isOSAIX()) {
16797 unsigned SupportMethod, FieldIdx, Mask, Value;
16798 CmpInst::Predicate CompOp;
16799 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
16800 unsigned>
16801 CPUSupportType;
16802 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
16803 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
16804#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
16805 VALUE) \
16806 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
16807#include "llvm/TargetParser/PPCTargetParser.def"
16808 .Default({AIX_BUILTIN_PPC_FALSE, 0, 0,
16809 CmpInst::Predicate(), 0}));
16810 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
16811 Value);
16812 }
16813
16814 assert(Triple.isOSLinux() &&
16815 "__builtin_cpu_supports() is only supported for AIX and Linux.");
16816 unsigned FeatureWord;
16817 unsigned BitMask;
16818 std::tie(FeatureWord, BitMask) =
16819 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
16820#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
16821 .Case(Name, {FA_WORD, Bitmask})
16822#include "llvm/TargetParser/PPCTargetParser.def"
16823 .Default({0, 0});
16824 if (!BitMask)
16825 return Builder.getFalse();
16826 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
16827 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
16828 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
16829 Value *Mask =
16830 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
16831 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
16832#undef PPC_FAWORD_HWCAP
16833#undef PPC_FAWORD_HWCAP2
16834#undef PPC_FAWORD_CPUID
16835 }
16836
16837 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
16838 // call __builtin_readcyclecounter.
16839 case PPC::BI__builtin_ppc_get_timebase:
16840 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
16841
16842 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
16843 case PPC::BI__builtin_altivec_lvx:
16844 case PPC::BI__builtin_altivec_lvxl:
16845 case PPC::BI__builtin_altivec_lvebx:
16846 case PPC::BI__builtin_altivec_lvehx:
16847 case PPC::BI__builtin_altivec_lvewx:
16848 case PPC::BI__builtin_altivec_lvsl:
16849 case PPC::BI__builtin_altivec_lvsr:
16850 case PPC::BI__builtin_vsx_lxvd2x:
16851 case PPC::BI__builtin_vsx_lxvw4x:
16852 case PPC::BI__builtin_vsx_lxvd2x_be:
16853 case PPC::BI__builtin_vsx_lxvw4x_be:
16854 case PPC::BI__builtin_vsx_lxvl:
16855 case PPC::BI__builtin_vsx_lxvll:
16856 {
16858 Ops.push_back(EmitScalarExpr(E->getArg(0)));
16859 Ops.push_back(EmitScalarExpr(E->getArg(1)));
16860 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
16861 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
16862 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
16863 Ops.pop_back();
16864 }
16865
16866 switch (BuiltinID) {
16867 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
16868 case PPC::BI__builtin_altivec_lvx:
16869 ID = Intrinsic::ppc_altivec_lvx;
16870 break;
16871 case PPC::BI__builtin_altivec_lvxl:
16872 ID = Intrinsic::ppc_altivec_lvxl;
16873 break;
16874 case PPC::BI__builtin_altivec_lvebx:
16875 ID = Intrinsic::ppc_altivec_lvebx;
16876 break;
16877 case PPC::BI__builtin_altivec_lvehx:
16878 ID = Intrinsic::ppc_altivec_lvehx;
16879 break;
16880 case PPC::BI__builtin_altivec_lvewx:
16881 ID = Intrinsic::ppc_altivec_lvewx;
16882 break;
16883 case PPC::BI__builtin_altivec_lvsl:
16884 ID = Intrinsic::ppc_altivec_lvsl;
16885 break;
16886 case PPC::BI__builtin_altivec_lvsr:
16887 ID = Intrinsic::ppc_altivec_lvsr;
16888 break;
16889 case PPC::BI__builtin_vsx_lxvd2x:
16890 ID = Intrinsic::ppc_vsx_lxvd2x;
16891 break;
16892 case PPC::BI__builtin_vsx_lxvw4x:
16893 ID = Intrinsic::ppc_vsx_lxvw4x;
16894 break;
16895 case PPC::BI__builtin_vsx_lxvd2x_be:
16896 ID = Intrinsic::ppc_vsx_lxvd2x_be;
16897 break;
16898 case PPC::BI__builtin_vsx_lxvw4x_be:
16899 ID = Intrinsic::ppc_vsx_lxvw4x_be;
16900 break;
16901 case PPC::BI__builtin_vsx_lxvl:
16902 ID = Intrinsic::ppc_vsx_lxvl;
16903 break;
16904 case PPC::BI__builtin_vsx_lxvll:
16905 ID = Intrinsic::ppc_vsx_lxvll;
16906 break;
16907 }
16908 llvm::Function *F = CGM.getIntrinsic(ID);
16909 return Builder.CreateCall(F, Ops, "");
16910 }
16911
16912 // vec_st, vec_xst_be
16913 case PPC::BI__builtin_altivec_stvx:
16914 case PPC::BI__builtin_altivec_stvxl:
16915 case PPC::BI__builtin_altivec_stvebx:
16916 case PPC::BI__builtin_altivec_stvehx:
16917 case PPC::BI__builtin_altivec_stvewx:
16918 case PPC::BI__builtin_vsx_stxvd2x:
16919 case PPC::BI__builtin_vsx_stxvw4x:
16920 case PPC::BI__builtin_vsx_stxvd2x_be:
16921 case PPC::BI__builtin_vsx_stxvw4x_be:
16922 case PPC::BI__builtin_vsx_stxvl:
16923 case PPC::BI__builtin_vsx_stxvll:
16924 {
16926 Ops.push_back(EmitScalarExpr(E->getArg(0)));
16927 Ops.push_back(EmitScalarExpr(E->getArg(1)));
16928 Ops.push_back(EmitScalarExpr(E->getArg(2)));
16929 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
16930 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
16931 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
16932 Ops.pop_back();
16933 }
16934
16935 switch (BuiltinID) {
16936 default: llvm_unreachable("Unsupported st intrinsic!");
16937 case PPC::BI__builtin_altivec_stvx:
16938 ID = Intrinsic::ppc_altivec_stvx;
16939 break;
16940 case PPC::BI__builtin_altivec_stvxl:
16941 ID = Intrinsic::ppc_altivec_stvxl;
16942 break;
16943 case PPC::BI__builtin_altivec_stvebx:
16944 ID = Intrinsic::ppc_altivec_stvebx;
16945 break;
16946 case PPC::BI__builtin_altivec_stvehx:
16947 ID = Intrinsic::ppc_altivec_stvehx;
16948 break;
16949 case PPC::BI__builtin_altivec_stvewx:
16950 ID = Intrinsic::ppc_altivec_stvewx;
16951 break;
16952 case PPC::BI__builtin_vsx_stxvd2x:
16953 ID = Intrinsic::ppc_vsx_stxvd2x;
16954 break;
16955 case PPC::BI__builtin_vsx_stxvw4x:
16956 ID = Intrinsic::ppc_vsx_stxvw4x;
16957 break;
16958 case PPC::BI__builtin_vsx_stxvd2x_be:
16959 ID = Intrinsic::ppc_vsx_stxvd2x_be;
16960 break;
16961 case PPC::BI__builtin_vsx_stxvw4x_be:
16962 ID = Intrinsic::ppc_vsx_stxvw4x_be;
16963 break;
16964 case PPC::BI__builtin_vsx_stxvl:
16965 ID = Intrinsic::ppc_vsx_stxvl;
16966 break;
16967 case PPC::BI__builtin_vsx_stxvll:
16968 ID = Intrinsic::ppc_vsx_stxvll;
16969 break;
16970 }
16971 llvm::Function *F = CGM.getIntrinsic(ID);
16972 return Builder.CreateCall(F, Ops, "");
16973 }
16974 case PPC::BI__builtin_vsx_ldrmb: {
16975 // Essentially boils down to performing an unaligned VMX load sequence so
16976 // as to avoid crossing a page boundary and then shuffling the elements
16977 // into the right side of the vector register.
16978 Value *Op0 = EmitScalarExpr(E->getArg(0));
16979 Value *Op1 = EmitScalarExpr(E->getArg(1));
16980 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
16981 llvm::Type *ResTy = ConvertType(E->getType());
16982 bool IsLE = getTarget().isLittleEndian();
16983
16984 // If the user wants the entire vector, just load the entire vector.
16985 if (NumBytes == 16) {
16986 Value *LD =
16988 if (!IsLE)
16989 return LD;
16990
16991 // Reverse the bytes on LE.
16992 SmallVector<int, 16> RevMask;
16993 for (int Idx = 0; Idx < 16; Idx++)
16994 RevMask.push_back(15 - Idx);
16995 return Builder.CreateShuffleVector(LD, LD, RevMask);
16996 }
16997
16998 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
16999 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17000 : Intrinsic::ppc_altivec_lvsl);
17001 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17002 Value *HiMem = Builder.CreateGEP(
17003 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17004 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17005 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17006 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17007
17008 Op0 = IsLE ? HiLd : LoLd;
17009 Op1 = IsLE ? LoLd : HiLd;
17010 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17011 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17012
17013 if (IsLE) {
17014 SmallVector<int, 16> Consts;
17015 for (int Idx = 0; Idx < 16; Idx++) {
17016 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17017 : 16 - (NumBytes - Idx);
17018 Consts.push_back(Val);
17019 }
17020 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17021 Zero, Consts);
17022 }
17024 for (int Idx = 0; Idx < 16; Idx++)
17025 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17026 Value *Mask2 = ConstantVector::get(Consts);
17027 return Builder.CreateBitCast(
17028 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17029 }
17030 case PPC::BI__builtin_vsx_strmb: {
17031 Value *Op0 = EmitScalarExpr(E->getArg(0));
17032 Value *Op1 = EmitScalarExpr(E->getArg(1));
17033 Value *Op2 = EmitScalarExpr(E->getArg(2));
17034 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17035 bool IsLE = getTarget().isLittleEndian();
17036 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17037 // Storing the whole vector, simply store it on BE and reverse bytes and
17038 // store on LE.
17039 if (Width == 16) {
17040 Value *StVec = Op2;
17041 if (IsLE) {
17042 SmallVector<int, 16> RevMask;
17043 for (int Idx = 0; Idx < 16; Idx++)
17044 RevMask.push_back(15 - Idx);
17045 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17046 }
17047 return Builder.CreateStore(
17048 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17049 }
17050 auto *ConvTy = Int64Ty;
17051 unsigned NumElts = 0;
17052 switch (Width) {
17053 default:
17054 llvm_unreachable("width for stores must be a power of 2");
17055 case 8:
17056 ConvTy = Int64Ty;
17057 NumElts = 2;
17058 break;
17059 case 4:
17060 ConvTy = Int32Ty;
17061 NumElts = 4;
17062 break;
17063 case 2:
17064 ConvTy = Int16Ty;
17065 NumElts = 8;
17066 break;
17067 case 1:
17068 ConvTy = Int8Ty;
17069 NumElts = 16;
17070 break;
17071 }
17072 Value *Vec = Builder.CreateBitCast(
17073 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17074 Value *Ptr =
17075 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17076 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17077 if (IsLE && Width > 1) {
17078 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17079 Elt = Builder.CreateCall(F, Elt);
17080 }
17081 return Builder.CreateStore(
17082 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17083 };
17084 unsigned Stored = 0;
17085 unsigned RemainingBytes = NumBytes;
17086 Value *Result;
17087 if (NumBytes == 16)
17088 return StoreSubVec(16, 0, 0);
17089 if (NumBytes >= 8) {
17090 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17091 RemainingBytes -= 8;
17092 Stored += 8;
17093 }
17094 if (RemainingBytes >= 4) {
17095 Result = StoreSubVec(4, NumBytes - Stored - 4,
17096 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17097 RemainingBytes -= 4;
17098 Stored += 4;
17099 }
17100 if (RemainingBytes >= 2) {
17101 Result = StoreSubVec(2, NumBytes - Stored - 2,
17102 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17103 RemainingBytes -= 2;
17104 Stored += 2;
17105 }
17106 if (RemainingBytes)
17107 Result =
17108 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17109 return Result;
17110 }
17111 // Square root
17112 case PPC::BI__builtin_vsx_xvsqrtsp:
17113 case PPC::BI__builtin_vsx_xvsqrtdp: {
17114 llvm::Type *ResultType = ConvertType(E->getType());
17115 Value *X = EmitScalarExpr(E->getArg(0));
17116 if (Builder.getIsFPConstrained()) {
17117 llvm::Function *F = CGM.getIntrinsic(
17118 Intrinsic::experimental_constrained_sqrt, ResultType);
17119 return Builder.CreateConstrainedFPCall(F, X);
17120 } else {
17121 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17122 return Builder.CreateCall(F, X);
17123 }
17124 }
17125 // Count leading zeros
17126 case PPC::BI__builtin_altivec_vclzb:
17127 case PPC::BI__builtin_altivec_vclzh:
17128 case PPC::BI__builtin_altivec_vclzw:
17129 case PPC::BI__builtin_altivec_vclzd: {
17130 llvm::Type *ResultType = ConvertType(E->getType());
17131 Value *X = EmitScalarExpr(E->getArg(0));
17132 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17133 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17134 return Builder.CreateCall(F, {X, Undef});
17135 }
17136 case PPC::BI__builtin_altivec_vctzb:
17137 case PPC::BI__builtin_altivec_vctzh:
17138 case PPC::BI__builtin_altivec_vctzw:
17139 case PPC::BI__builtin_altivec_vctzd: {
17140 llvm::Type *ResultType = ConvertType(E->getType());
17141 Value *X = EmitScalarExpr(E->getArg(0));
17142 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17143 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17144 return Builder.CreateCall(F, {X, Undef});
17145 }
17146 case PPC::BI__builtin_altivec_vinsd:
17147 case PPC::BI__builtin_altivec_vinsw:
17148 case PPC::BI__builtin_altivec_vinsd_elt:
17149 case PPC::BI__builtin_altivec_vinsw_elt: {
17150 llvm::Type *ResultType = ConvertType(E->getType());
17151 Value *Op0 = EmitScalarExpr(E->getArg(0));
17152 Value *Op1 = EmitScalarExpr(E->getArg(1));
17153 Value *Op2 = EmitScalarExpr(E->getArg(2));
17154
17155 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17156 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17157
17158 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17159 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17160
17161 // The third argument must be a compile time constant.
17162 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17163 assert(ArgCI &&
17164 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17165
17166 // Valid value for the third argument is dependent on the input type and
17167 // builtin called.
17168 int ValidMaxValue = 0;
17169 if (IsUnaligned)
17170 ValidMaxValue = (Is32bit) ? 12 : 8;
17171 else
17172 ValidMaxValue = (Is32bit) ? 3 : 1;
17173
17174 // Get value of third argument.
17175 int64_t ConstArg = ArgCI->getSExtValue();
17176
17177 // Compose range checking error message.
17178 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17179 RangeErrMsg += " number " + llvm::to_string(ConstArg);
17180 RangeErrMsg += " is outside of the valid range [0, ";
17181 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17182
17183 // Issue error if third argument is not within the valid range.
17184 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17185 CGM.Error(E->getExprLoc(), RangeErrMsg);
17186
17187 // Input to vec_replace_elt is an element index, convert to byte index.
17188 if (!IsUnaligned) {
17189 ConstArg *= Is32bit ? 4 : 8;
17190 // Fix the constant according to endianess.
17191 if (getTarget().isLittleEndian())
17192 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17193 }
17194
17195 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17196 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17197 // Casting input to vector int as per intrinsic definition.
17198 Op0 =
17199 Is32bit
17200 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17201 : Builder.CreateBitCast(Op0,
17202 llvm::FixedVectorType::get(Int64Ty, 2));
17203 return Builder.CreateBitCast(
17204 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17205 }
17206 case PPC::BI__builtin_altivec_vpopcntb:
17207 case PPC::BI__builtin_altivec_vpopcnth:
17208 case PPC::BI__builtin_altivec_vpopcntw:
17209 case PPC::BI__builtin_altivec_vpopcntd: {
17210 llvm::Type *ResultType = ConvertType(E->getType());
17211 Value *X = EmitScalarExpr(E->getArg(0));
17212 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17213 return Builder.CreateCall(F, X);
17214 }
17215 case PPC::BI__builtin_altivec_vadduqm:
17216 case PPC::BI__builtin_altivec_vsubuqm: {
17217 Value *Op0 = EmitScalarExpr(E->getArg(0));
17218 Value *Op1 = EmitScalarExpr(E->getArg(1));
17219 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17220 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17221 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17222 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17223 return Builder.CreateAdd(Op0, Op1, "vadduqm");
17224 else
17225 return Builder.CreateSub(Op0, Op1, "vsubuqm");
17226 }
17227 case PPC::BI__builtin_altivec_vaddcuq_c:
17228 case PPC::BI__builtin_altivec_vsubcuq_c: {
17230 Value *Op0 = EmitScalarExpr(E->getArg(0));
17231 Value *Op1 = EmitScalarExpr(E->getArg(1));
17232 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17233 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17234 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17235 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17236 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17237 ? Intrinsic::ppc_altivec_vaddcuq
17238 : Intrinsic::ppc_altivec_vsubcuq;
17239 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17240 }
17241 case PPC::BI__builtin_altivec_vaddeuqm_c:
17242 case PPC::BI__builtin_altivec_vaddecuq_c:
17243 case PPC::BI__builtin_altivec_vsubeuqm_c:
17244 case PPC::BI__builtin_altivec_vsubecuq_c: {
17246 Value *Op0 = EmitScalarExpr(E->getArg(0));
17247 Value *Op1 = EmitScalarExpr(E->getArg(1));
17248 Value *Op2 = EmitScalarExpr(E->getArg(2));
17249 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17250 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17251 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17252 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17253 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17254 switch (BuiltinID) {
17255 default:
17256 llvm_unreachable("Unsupported intrinsic!");
17257 case PPC::BI__builtin_altivec_vaddeuqm_c:
17258 ID = Intrinsic::ppc_altivec_vaddeuqm;
17259 break;
17260 case PPC::BI__builtin_altivec_vaddecuq_c:
17261 ID = Intrinsic::ppc_altivec_vaddecuq;
17262 break;
17263 case PPC::BI__builtin_altivec_vsubeuqm_c:
17264 ID = Intrinsic::ppc_altivec_vsubeuqm;
17265 break;
17266 case PPC::BI__builtin_altivec_vsubecuq_c:
17267 ID = Intrinsic::ppc_altivec_vsubecuq;
17268 break;
17269 }
17270 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17271 }
17272 case PPC::BI__builtin_ppc_rldimi:
17273 case PPC::BI__builtin_ppc_rlwimi: {
17274 Value *Op0 = EmitScalarExpr(E->getArg(0));
17275 Value *Op1 = EmitScalarExpr(E->getArg(1));
17276 Value *Op2 = EmitScalarExpr(E->getArg(2));
17277 Value *Op3 = EmitScalarExpr(E->getArg(3));
17278 // rldimi is 64-bit instruction, expand the intrinsic before isel to
17279 // leverage peephole and avoid legalization efforts.
17280 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17281 !getTarget().getTriple().isPPC64()) {
17282 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17283 Op2 = Builder.CreateZExt(Op2, Int64Ty);
17284 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17285 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17286 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17287 }
17288 return Builder.CreateCall(
17289 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17290 ? Intrinsic::ppc_rldimi
17291 : Intrinsic::ppc_rlwimi),
17292 {Op0, Op1, Op2, Op3});
17293 }
17294 case PPC::BI__builtin_ppc_rlwnm: {
17295 Value *Op0 = EmitScalarExpr(E->getArg(0));
17296 Value *Op1 = EmitScalarExpr(E->getArg(1));
17297 Value *Op2 = EmitScalarExpr(E->getArg(2));
17298 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17299 {Op0, Op1, Op2});
17300 }
17301 case PPC::BI__builtin_ppc_poppar4:
17302 case PPC::BI__builtin_ppc_poppar8: {
17303 Value *Op0 = EmitScalarExpr(E->getArg(0));
17304 llvm::Type *ArgType = Op0->getType();
17305 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17306 Value *Tmp = Builder.CreateCall(F, Op0);
17307
17308 llvm::Type *ResultType = ConvertType(E->getType());
17309 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17310 if (Result->getType() != ResultType)
17311 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17312 "cast");
17313 return Result;
17314 }
17315 case PPC::BI__builtin_ppc_cmpb: {
17316 Value *Op0 = EmitScalarExpr(E->getArg(0));
17317 Value *Op1 = EmitScalarExpr(E->getArg(1));
17318 if (getTarget().getTriple().isPPC64()) {
17319 Function *F =
17320 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17321 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17322 }
17323 // For 32 bit, emit the code as below:
17324 // %conv = trunc i64 %a to i32
17325 // %conv1 = trunc i64 %b to i32
17326 // %shr = lshr i64 %a, 32
17327 // %conv2 = trunc i64 %shr to i32
17328 // %shr3 = lshr i64 %b, 32
17329 // %conv4 = trunc i64 %shr3 to i32
17330 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17331 // %conv5 = zext i32 %0 to i64
17332 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17333 // %conv614 = zext i32 %1 to i64
17334 // %shl = shl nuw i64 %conv614, 32
17335 // %or = or i64 %shl, %conv5
17336 // ret i64 %or
17337 Function *F =
17338 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17339 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17340 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17341 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17342 Value *ArgOneHi =
17343 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17344 Value *ArgTwoHi =
17345 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17346 Value *ResLo = Builder.CreateZExt(
17347 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17348 Value *ResHiShift = Builder.CreateZExt(
17349 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17350 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17351 return Builder.CreateOr(ResLo, ResHi);
17352 }
17353 // Copy sign
17354 case PPC::BI__builtin_vsx_xvcpsgnsp:
17355 case PPC::BI__builtin_vsx_xvcpsgndp: {
17356 llvm::Type *ResultType = ConvertType(E->getType());
17357 Value *X = EmitScalarExpr(E->getArg(0));
17358 Value *Y = EmitScalarExpr(E->getArg(1));
17359 ID = Intrinsic::copysign;
17360 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17361 return Builder.CreateCall(F, {X, Y});
17362 }
17363 // Rounding/truncation
17364 case PPC::BI__builtin_vsx_xvrspip:
17365 case PPC::BI__builtin_vsx_xvrdpip:
17366 case PPC::BI__builtin_vsx_xvrdpim:
17367 case PPC::BI__builtin_vsx_xvrspim:
17368 case PPC::BI__builtin_vsx_xvrdpi:
17369 case PPC::BI__builtin_vsx_xvrspi:
17370 case PPC::BI__builtin_vsx_xvrdpic:
17371 case PPC::BI__builtin_vsx_xvrspic:
17372 case PPC::BI__builtin_vsx_xvrdpiz:
17373 case PPC::BI__builtin_vsx_xvrspiz: {
17374 llvm::Type *ResultType = ConvertType(E->getType());
17375 Value *X = EmitScalarExpr(E->getArg(0));
17376 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17377 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17378 ID = Builder.getIsFPConstrained()
17379 ? Intrinsic::experimental_constrained_floor
17380 : Intrinsic::floor;
17381 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17382 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17383 ID = Builder.getIsFPConstrained()
17384 ? Intrinsic::experimental_constrained_round
17385 : Intrinsic::round;
17386 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17387 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17388 ID = Builder.getIsFPConstrained()
17389 ? Intrinsic::experimental_constrained_rint
17390 : Intrinsic::rint;
17391 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17392 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17393 ID = Builder.getIsFPConstrained()
17394 ? Intrinsic::experimental_constrained_ceil
17395 : Intrinsic::ceil;
17396 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17397 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17398 ID = Builder.getIsFPConstrained()
17399 ? Intrinsic::experimental_constrained_trunc
17400 : Intrinsic::trunc;
17401 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17402 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17403 : Builder.CreateCall(F, X);
17404 }
17405
17406 // Absolute value
17407 case PPC::BI__builtin_vsx_xvabsdp:
17408 case PPC::BI__builtin_vsx_xvabssp: {
17409 llvm::Type *ResultType = ConvertType(E->getType());
17410 Value *X = EmitScalarExpr(E->getArg(0));
17411 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17412 return Builder.CreateCall(F, X);
17413 }
17414
17415 // Fastmath by default
17416 case PPC::BI__builtin_ppc_recipdivf:
17417 case PPC::BI__builtin_ppc_recipdivd:
17418 case PPC::BI__builtin_ppc_rsqrtf:
17419 case PPC::BI__builtin_ppc_rsqrtd: {
17420 FastMathFlags FMF = Builder.getFastMathFlags();
17421 Builder.getFastMathFlags().setFast();
17422 llvm::Type *ResultType = ConvertType(E->getType());
17423 Value *X = EmitScalarExpr(E->getArg(0));
17424
17425 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17426 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17427 Value *Y = EmitScalarExpr(E->getArg(1));
17428 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17429 Builder.getFastMathFlags() &= (FMF);
17430 return FDiv;
17431 }
17432 auto *One = ConstantFP::get(ResultType, 1.0);
17433 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17434 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17435 Builder.getFastMathFlags() &= (FMF);
17436 return FDiv;
17437 }
17438 case PPC::BI__builtin_ppc_alignx: {
17439 Value *Op0 = EmitScalarExpr(E->getArg(0));
17440 Value *Op1 = EmitScalarExpr(E->getArg(1));
17441 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17442 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17443 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17444 llvm::Value::MaximumAlignment);
17445
17447 /*The expr loc is sufficient.*/ SourceLocation(),
17448 AlignmentCI, nullptr);
17449 return Op1;
17450 }
17451 case PPC::BI__builtin_ppc_rdlam: {
17452 Value *Op0 = EmitScalarExpr(E->getArg(0));
17453 Value *Op1 = EmitScalarExpr(E->getArg(1));
17454 Value *Op2 = EmitScalarExpr(E->getArg(2));
17455 llvm::Type *Ty = Op0->getType();
17456 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17457 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17458 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17459 return Builder.CreateAnd(Rotate, Op2);
17460 }
17461 case PPC::BI__builtin_ppc_load2r: {
17462 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17463 Value *Op0 = EmitScalarExpr(E->getArg(0));
17464 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17465 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17466 }
17467 // FMA variations
17468 case PPC::BI__builtin_ppc_fnmsub:
17469 case PPC::BI__builtin_ppc_fnmsubs:
17470 case PPC::BI__builtin_vsx_xvmaddadp:
17471 case PPC::BI__builtin_vsx_xvmaddasp:
17472 case PPC::BI__builtin_vsx_xvnmaddadp:
17473 case PPC::BI__builtin_vsx_xvnmaddasp:
17474 case PPC::BI__builtin_vsx_xvmsubadp:
17475 case PPC::BI__builtin_vsx_xvmsubasp:
17476 case PPC::BI__builtin_vsx_xvnmsubadp:
17477 case PPC::BI__builtin_vsx_xvnmsubasp: {
17478 llvm::Type *ResultType = ConvertType(E->getType());
17479 Value *X = EmitScalarExpr(E->getArg(0));
17480 Value *Y = EmitScalarExpr(E->getArg(1));
17481 Value *Z = EmitScalarExpr(E->getArg(2));
17482 llvm::Function *F;
17483 if (Builder.getIsFPConstrained())
17484 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17485 else
17486 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17487 switch (BuiltinID) {
17488 case PPC::BI__builtin_vsx_xvmaddadp:
17489 case PPC::BI__builtin_vsx_xvmaddasp:
17490 if (Builder.getIsFPConstrained())
17491 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17492 else
17493 return Builder.CreateCall(F, {X, Y, Z});
17494 case PPC::BI__builtin_vsx_xvnmaddadp:
17495 case PPC::BI__builtin_vsx_xvnmaddasp:
17496 if (Builder.getIsFPConstrained())
17497 return Builder.CreateFNeg(
17498 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17499 else
17500 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17501 case PPC::BI__builtin_vsx_xvmsubadp:
17502 case PPC::BI__builtin_vsx_xvmsubasp:
17503 if (Builder.getIsFPConstrained())
17504 return Builder.CreateConstrainedFPCall(
17505 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17506 else
17507 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17508 case PPC::BI__builtin_ppc_fnmsub:
17509 case PPC::BI__builtin_ppc_fnmsubs:
17510 case PPC::BI__builtin_vsx_xvnmsubadp:
17511 case PPC::BI__builtin_vsx_xvnmsubasp:
17512 if (Builder.getIsFPConstrained())
17513 return Builder.CreateFNeg(
17514 Builder.CreateConstrainedFPCall(
17515 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17516 "neg");
17517 else
17518 return Builder.CreateCall(
17519 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17520 }
17521 llvm_unreachable("Unknown FMA operation");
17522 return nullptr; // Suppress no-return warning
17523 }
17524
17525 case PPC::BI__builtin_vsx_insertword: {
17526 Value *Op0 = EmitScalarExpr(E->getArg(0));
17527 Value *Op1 = EmitScalarExpr(E->getArg(1));
17528 Value *Op2 = EmitScalarExpr(E->getArg(2));
17529 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17530
17531 // Third argument is a compile time constant int. It must be clamped to
17532 // to the range [0, 12].
17533 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17534 assert(ArgCI &&
17535 "Third arg to xxinsertw intrinsic must be constant integer");
17536 const int64_t MaxIndex = 12;
17537 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17538
17539 // The builtin semantics don't exactly match the xxinsertw instructions
17540 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17541 // word from the first argument, and inserts it in the second argument. The
17542 // instruction extracts the word from its second input register and inserts
17543 // it into its first input register, so swap the first and second arguments.
17544 std::swap(Op0, Op1);
17545
17546 // Need to cast the second argument from a vector of unsigned int to a
17547 // vector of long long.
17548 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17549
17550 if (getTarget().isLittleEndian()) {
17551 // Reverse the double words in the vector we will extract from.
17552 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17553 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17554
17555 // Reverse the index.
17556 Index = MaxIndex - Index;
17557 }
17558
17559 // Intrinsic expects the first arg to be a vector of int.
17560 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17561 Op2 = ConstantInt::getSigned(Int32Ty, Index);
17562 return Builder.CreateCall(F, {Op0, Op1, Op2});
17563 }
17564
17565 case PPC::BI__builtin_vsx_extractuword: {
17566 Value *Op0 = EmitScalarExpr(E->getArg(0));
17567 Value *Op1 = EmitScalarExpr(E->getArg(1));
17568 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17569
17570 // Intrinsic expects the first argument to be a vector of doublewords.
17571 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17572
17573 // The second argument is a compile time constant int that needs to
17574 // be clamped to the range [0, 12].
17575 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17576 assert(ArgCI &&
17577 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17578 const int64_t MaxIndex = 12;
17579 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17580
17581 if (getTarget().isLittleEndian()) {
17582 // Reverse the index.
17583 Index = MaxIndex - Index;
17584 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17585
17586 // Emit the call, then reverse the double words of the results vector.
17587 Value *Call = Builder.CreateCall(F, {Op0, Op1});
17588
17589 Value *ShuffleCall =
17590 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17591 return ShuffleCall;
17592 } else {
17593 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17594 return Builder.CreateCall(F, {Op0, Op1});
17595 }
17596 }
17597
17598 case PPC::BI__builtin_vsx_xxpermdi: {
17599 Value *Op0 = EmitScalarExpr(E->getArg(0));
17600 Value *Op1 = EmitScalarExpr(E->getArg(1));
17601 Value *Op2 = EmitScalarExpr(E->getArg(2));
17602 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17603 assert(ArgCI && "Third arg must be constant integer!");
17604
17605 unsigned Index = ArgCI->getZExtValue();
17606 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17607 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17608
17609 // Account for endianness by treating this as just a shuffle. So we use the
17610 // same indices for both LE and BE in order to produce expected results in
17611 // both cases.
17612 int ElemIdx0 = (Index & 2) >> 1;
17613 int ElemIdx1 = 2 + (Index & 1);
17614
17615 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17616 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17617 QualType BIRetType = E->getType();
17618 auto RetTy = ConvertType(BIRetType);
17619 return Builder.CreateBitCast(ShuffleCall, RetTy);
17620 }
17621
17622 case PPC::BI__builtin_vsx_xxsldwi: {
17623 Value *Op0 = EmitScalarExpr(E->getArg(0));
17624 Value *Op1 = EmitScalarExpr(E->getArg(1));
17625 Value *Op2 = EmitScalarExpr(E->getArg(2));
17626 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17627 assert(ArgCI && "Third argument must be a compile time constant");
17628 unsigned Index = ArgCI->getZExtValue() & 0x3;
17629 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17630 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17631
17632 // Create a shuffle mask
17633 int ElemIdx0;
17634 int ElemIdx1;
17635 int ElemIdx2;
17636 int ElemIdx3;
17637 if (getTarget().isLittleEndian()) {
17638 // Little endian element N comes from element 8+N-Index of the
17639 // concatenated wide vector (of course, using modulo arithmetic on
17640 // the total number of elements).
17641 ElemIdx0 = (8 - Index) % 8;
17642 ElemIdx1 = (9 - Index) % 8;
17643 ElemIdx2 = (10 - Index) % 8;
17644 ElemIdx3 = (11 - Index) % 8;
17645 } else {
17646 // Big endian ElemIdx<N> = Index + N
17647 ElemIdx0 = Index;
17648 ElemIdx1 = Index + 1;
17649 ElemIdx2 = Index + 2;
17650 ElemIdx3 = Index + 3;
17651 }
17652
17653 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17654 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17655 QualType BIRetType = E->getType();
17656 auto RetTy = ConvertType(BIRetType);
17657 return Builder.CreateBitCast(ShuffleCall, RetTy);
17658 }
17659
17660 case PPC::BI__builtin_pack_vector_int128: {
17661 Value *Op0 = EmitScalarExpr(E->getArg(0));
17662 Value *Op1 = EmitScalarExpr(E->getArg(1));
17663 bool isLittleEndian = getTarget().isLittleEndian();
17664 Value *PoisonValue =
17665 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17666 Value *Res = Builder.CreateInsertElement(
17667 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17668 Res = Builder.CreateInsertElement(Res, Op1,
17669 (uint64_t)(isLittleEndian ? 0 : 1));
17670 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17671 }
17672
17673 case PPC::BI__builtin_unpack_vector_int128: {
17674 Value *Op0 = EmitScalarExpr(E->getArg(0));
17675 Value *Op1 = EmitScalarExpr(E->getArg(1));
17676 ConstantInt *Index = cast<ConstantInt>(Op1);
17677 Value *Unpacked = Builder.CreateBitCast(
17678 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
17679
17680 if (getTarget().isLittleEndian())
17681 Index =
17682 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
17683
17684 return Builder.CreateExtractElement(Unpacked, Index);
17685 }
17686
17687 case PPC::BI__builtin_ppc_sthcx: {
17688 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
17689 Value *Op0 = EmitScalarExpr(E->getArg(0));
17690 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
17691 return Builder.CreateCall(F, {Op0, Op1});
17692 }
17693
17694 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
17695 // Some of the MMA instructions accumulate their result into an existing
17696 // accumulator whereas the others generate a new accumulator. So we need to
17697 // use custom code generation to expand a builtin call with a pointer to a
17698 // load (if the corresponding instruction accumulates its result) followed by
17699 // the call to the intrinsic and a store of the result.
17700#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
17701 case PPC::BI__builtin_##Name:
17702#include "clang/Basic/BuiltinsPPC.def"
17703 {
17705 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
17706 if (E->getArg(i)->getType()->isArrayType())
17707 Ops.push_back(
17709 else
17710 Ops.push_back(EmitScalarExpr(E->getArg(i)));
17711 // The first argument of these two builtins is a pointer used to store their
17712 // result. However, the llvm intrinsics return their result in multiple
17713 // return values. So, here we emit code extracting these values from the
17714 // intrinsic results and storing them using that pointer.
17715 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
17716 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
17717 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
17718 unsigned NumVecs = 2;
17719 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
17720 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
17721 NumVecs = 4;
17722 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
17723 }
17724 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
17726 Value *Vec = Builder.CreateLoad(Addr);
17727 Value *Call = Builder.CreateCall(F, {Vec});
17728 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
17729 Value *Ptr = Ops[0];
17730 for (unsigned i=0; i<NumVecs; i++) {
17731 Value *Vec = Builder.CreateExtractValue(Call, i);
17732 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
17733 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
17734 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
17735 }
17736 return Call;
17737 }
17738 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
17739 BuiltinID == PPC::BI__builtin_mma_build_acc) {
17740 // Reverse the order of the operands for LE, so the
17741 // same builtin call can be used on both LE and BE
17742 // without the need for the programmer to swap operands.
17743 // The operands are reversed starting from the second argument,
17744 // the first operand is the pointer to the pair/accumulator
17745 // that is being built.
17746 if (getTarget().isLittleEndian())
17747 std::reverse(Ops.begin() + 1, Ops.end());
17748 }
17749 bool Accumulate;
17750 switch (BuiltinID) {
17751 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
17752 case PPC::BI__builtin_##Name: \
17753 ID = Intrinsic::ppc_##Intr; \
17754 Accumulate = Acc; \
17755 break;
17756 #include "clang/Basic/BuiltinsPPC.def"
17757 }
17758 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17759 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
17760 BuiltinID == PPC::BI__builtin_mma_lxvp ||
17761 BuiltinID == PPC::BI__builtin_mma_stxvp) {
17762 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
17763 BuiltinID == PPC::BI__builtin_mma_lxvp) {
17764 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17765 } else {
17766 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17767 }
17768 Ops.pop_back();
17769 llvm::Function *F = CGM.getIntrinsic(ID);
17770 return Builder.CreateCall(F, Ops, "");
17771 }
17772 SmallVector<Value*, 4> CallOps;
17773 if (Accumulate) {
17775 Value *Acc = Builder.CreateLoad(Addr);
17776 CallOps.push_back(Acc);
17777 }
17778 for (unsigned i=1; i<Ops.size(); i++)
17779 CallOps.push_back(Ops[i]);
17780 llvm::Function *F = CGM.getIntrinsic(ID);
17781 Value *Call = Builder.CreateCall(F, CallOps);
17782 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
17783 }
17784
17785 case PPC::BI__builtin_ppc_compare_and_swap:
17786 case PPC::BI__builtin_ppc_compare_and_swaplp: {
17788 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
17789 Value *OldVal = Builder.CreateLoad(OldValAddr);
17790 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
17791 LValue LV = MakeAddrLValue(Addr, AtomicTy);
17792 Value *Op2 = EmitScalarExpr(E->getArg(2));
17793 auto Pair = EmitAtomicCompareExchange(
17794 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
17795 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
17796 // Unlike c11's atomic_compare_exchange, according to
17797 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
17798 // > In either case, the contents of the memory location specified by addr
17799 // > are copied into the memory location specified by old_val_addr.
17800 // But it hasn't specified storing to OldValAddr is atomic or not and
17801 // which order to use. Now following XL's codegen, treat it as a normal
17802 // store.
17803 Value *LoadedVal = Pair.first.getScalarVal();
17804 Builder.CreateStore(LoadedVal, OldValAddr);
17805 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
17806 }
17807 case PPC::BI__builtin_ppc_fetch_and_add:
17808 case PPC::BI__builtin_ppc_fetch_and_addlp: {
17809 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
17810 llvm::AtomicOrdering::Monotonic);
17811 }
17812 case PPC::BI__builtin_ppc_fetch_and_and:
17813 case PPC::BI__builtin_ppc_fetch_and_andlp: {
17814 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
17815 llvm::AtomicOrdering::Monotonic);
17816 }
17817
17818 case PPC::BI__builtin_ppc_fetch_and_or:
17819 case PPC::BI__builtin_ppc_fetch_and_orlp: {
17820 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
17821 llvm::AtomicOrdering::Monotonic);
17822 }
17823 case PPC::BI__builtin_ppc_fetch_and_swap:
17824 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
17825 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
17826 llvm::AtomicOrdering::Monotonic);
17827 }
17828 case PPC::BI__builtin_ppc_ldarx:
17829 case PPC::BI__builtin_ppc_lwarx:
17830 case PPC::BI__builtin_ppc_lharx:
17831 case PPC::BI__builtin_ppc_lbarx:
17832 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
17833 case PPC::BI__builtin_ppc_mfspr: {
17834 Value *Op0 = EmitScalarExpr(E->getArg(0));
17835 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17836 ? Int32Ty
17837 : Int64Ty;
17838 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
17839 return Builder.CreateCall(F, {Op0});
17840 }
17841 case PPC::BI__builtin_ppc_mtspr: {
17842 Value *Op0 = EmitScalarExpr(E->getArg(0));
17843 Value *Op1 = EmitScalarExpr(E->getArg(1));
17844 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
17845 ? Int32Ty
17846 : Int64Ty;
17847 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
17848 return Builder.CreateCall(F, {Op0, Op1});
17849 }
17850 case PPC::BI__builtin_ppc_popcntb: {
17851 Value *ArgValue = EmitScalarExpr(E->getArg(0));
17852 llvm::Type *ArgType = ArgValue->getType();
17853 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
17854 return Builder.CreateCall(F, {ArgValue}, "popcntb");
17855 }
17856 case PPC::BI__builtin_ppc_mtfsf: {
17857 // The builtin takes a uint32 that needs to be cast to an
17858 // f64 to be passed to the intrinsic.
17859 Value *Op0 = EmitScalarExpr(E->getArg(0));
17860 Value *Op1 = EmitScalarExpr(E->getArg(1));
17861 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
17862 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
17863 return Builder.CreateCall(F, {Op0, Cast}, "");
17864 }
17865
17866 case PPC::BI__builtin_ppc_swdiv_nochk:
17867 case PPC::BI__builtin_ppc_swdivs_nochk: {
17868 Value *Op0 = EmitScalarExpr(E->getArg(0));
17869 Value *Op1 = EmitScalarExpr(E->getArg(1));
17870 FastMathFlags FMF = Builder.getFastMathFlags();
17871 Builder.getFastMathFlags().setFast();
17872 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
17873 Builder.getFastMathFlags() &= (FMF);
17874 return FDiv;
17875 }
17876 case PPC::BI__builtin_ppc_fric:
17878 *this, E, Intrinsic::rint,
17879 Intrinsic::experimental_constrained_rint))
17880 .getScalarVal();
17881 case PPC::BI__builtin_ppc_frim:
17882 case PPC::BI__builtin_ppc_frims:
17884 *this, E, Intrinsic::floor,
17885 Intrinsic::experimental_constrained_floor))
17886 .getScalarVal();
17887 case PPC::BI__builtin_ppc_frin:
17888 case PPC::BI__builtin_ppc_frins:
17890 *this, E, Intrinsic::round,
17891 Intrinsic::experimental_constrained_round))
17892 .getScalarVal();
17893 case PPC::BI__builtin_ppc_frip:
17894 case PPC::BI__builtin_ppc_frips:
17896 *this, E, Intrinsic::ceil,
17897 Intrinsic::experimental_constrained_ceil))
17898 .getScalarVal();
17899 case PPC::BI__builtin_ppc_friz:
17900 case PPC::BI__builtin_ppc_frizs:
17902 *this, E, Intrinsic::trunc,
17903 Intrinsic::experimental_constrained_trunc))
17904 .getScalarVal();
17905 case PPC::BI__builtin_ppc_fsqrt:
17906 case PPC::BI__builtin_ppc_fsqrts:
17908 *this, E, Intrinsic::sqrt,
17909 Intrinsic::experimental_constrained_sqrt))
17910 .getScalarVal();
17911 case PPC::BI__builtin_ppc_test_data_class: {
17912 Value *Op0 = EmitScalarExpr(E->getArg(0));
17913 Value *Op1 = EmitScalarExpr(E->getArg(1));
17914 return Builder.CreateCall(
17915 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
17916 {Op0, Op1}, "test_data_class");
17917 }
17918 case PPC::BI__builtin_ppc_maxfe: {
17919 Value *Op0 = EmitScalarExpr(E->getArg(0));
17920 Value *Op1 = EmitScalarExpr(E->getArg(1));
17921 Value *Op2 = EmitScalarExpr(E->getArg(2));
17922 Value *Op3 = EmitScalarExpr(E->getArg(3));
17923 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
17924 {Op0, Op1, Op2, Op3});
17925 }
17926 case PPC::BI__builtin_ppc_maxfl: {
17927 Value *Op0 = EmitScalarExpr(E->getArg(0));
17928 Value *Op1 = EmitScalarExpr(E->getArg(1));
17929 Value *Op2 = EmitScalarExpr(E->getArg(2));
17930 Value *Op3 = EmitScalarExpr(E->getArg(3));
17931 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
17932 {Op0, Op1, Op2, Op3});
17933 }
17934 case PPC::BI__builtin_ppc_maxfs: {
17935 Value *Op0 = EmitScalarExpr(E->getArg(0));
17936 Value *Op1 = EmitScalarExpr(E->getArg(1));
17937 Value *Op2 = EmitScalarExpr(E->getArg(2));
17938 Value *Op3 = EmitScalarExpr(E->getArg(3));
17939 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
17940 {Op0, Op1, Op2, Op3});
17941 }
17942 case PPC::BI__builtin_ppc_minfe: {
17943 Value *Op0 = EmitScalarExpr(E->getArg(0));
17944 Value *Op1 = EmitScalarExpr(E->getArg(1));
17945 Value *Op2 = EmitScalarExpr(E->getArg(2));
17946 Value *Op3 = EmitScalarExpr(E->getArg(3));
17947 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
17948 {Op0, Op1, Op2, Op3});
17949 }
17950 case PPC::BI__builtin_ppc_minfl: {
17951 Value *Op0 = EmitScalarExpr(E->getArg(0));
17952 Value *Op1 = EmitScalarExpr(E->getArg(1));
17953 Value *Op2 = EmitScalarExpr(E->getArg(2));
17954 Value *Op3 = EmitScalarExpr(E->getArg(3));
17955 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
17956 {Op0, Op1, Op2, Op3});
17957 }
17958 case PPC::BI__builtin_ppc_minfs: {
17959 Value *Op0 = EmitScalarExpr(E->getArg(0));
17960 Value *Op1 = EmitScalarExpr(E->getArg(1));
17961 Value *Op2 = EmitScalarExpr(E->getArg(2));
17962 Value *Op3 = EmitScalarExpr(E->getArg(3));
17963 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
17964 {Op0, Op1, Op2, Op3});
17965 }
17966 case PPC::BI__builtin_ppc_swdiv:
17967 case PPC::BI__builtin_ppc_swdivs: {
17968 Value *Op0 = EmitScalarExpr(E->getArg(0));
17969 Value *Op1 = EmitScalarExpr(E->getArg(1));
17970 return Builder.CreateFDiv(Op0, Op1, "swdiv");
17971 }
17972 case PPC::BI__builtin_ppc_set_fpscr_rn:
17973 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
17974 {EmitScalarExpr(E->getArg(0))});
17975 case PPC::BI__builtin_ppc_mffs:
17976 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
17977 }
17978}
17979
17980namespace {
17981// If \p E is not null pointer, insert address space cast to match return
17982// type of \p E if necessary.
17983Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
17984 const CallExpr *E = nullptr) {
17985 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
17986 auto *Call = CGF.Builder.CreateCall(F);
17987 Call->addRetAttr(
17988 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
17989 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
17990 if (!E)
17991 return Call;
17992 QualType BuiltinRetType = E->getType();
17993 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
17994 if (RetTy == Call->getType())
17995 return Call;
17996 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
17997}
17998
17999Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18000 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18001 auto *Call = CGF.Builder.CreateCall(F);
18002 Call->addRetAttr(
18003 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18004 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18005 return Call;
18006}
18007
18008// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18009/// Emit code based on Code Object ABI version.
18010/// COV_4 : Emit code to use dispatch ptr
18011/// COV_5+ : Emit code to use implicitarg ptr
18012/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18013/// and use its value for COV_4 or COV_5+ approach. It is used for
18014/// compiling device libraries in an ABI-agnostic way.
18015///
18016/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18017/// clang during compilation of user code.
18018Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18019 llvm::LoadInst *LD;
18020
18021 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18022
18023 if (Cov == CodeObjectVersionKind::COV_None) {
18024 StringRef Name = "__oclc_ABI_version";
18025 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18026 if (!ABIVersionC)
18027 ABIVersionC = new llvm::GlobalVariable(
18028 CGF.CGM.getModule(), CGF.Int32Ty, false,
18029 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18030 llvm::GlobalVariable::NotThreadLocal,
18032
18033 // This load will be eliminated by the IPSCCP because it is constant
18034 // weak_odr without externally_initialized. Either changing it to weak or
18035 // adding externally_initialized will keep the load.
18036 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18037 CGF.CGM.getIntAlign());
18038
18039 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18040 ABIVersion,
18041 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18042
18043 // Indexing the implicit kernarg segment.
18044 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18045 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18046
18047 // Indexing the HSA kernel_dispatch_packet struct.
18048 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18049 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18050
18051 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18052 LD = CGF.Builder.CreateLoad(
18054 } else {
18055 Value *GEP = nullptr;
18056 if (Cov >= CodeObjectVersionKind::COV_5) {
18057 // Indexing the implicit kernarg segment.
18058 GEP = CGF.Builder.CreateConstGEP1_32(
18059 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18060 } else {
18061 // Indexing the HSA kernel_dispatch_packet struct.
18062 GEP = CGF.Builder.CreateConstGEP1_32(
18063 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18064 }
18065 LD = CGF.Builder.CreateLoad(
18067 }
18068
18069 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18070 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18071 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18072 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18073 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18074 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18075 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18076 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18077 return LD;
18078}
18079
18080// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18081Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18082 const unsigned XOffset = 12;
18083 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18084 // Indexing the HSA kernel_dispatch_packet struct.
18085 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18086 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18087 auto *LD = CGF.Builder.CreateLoad(
18089 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18090 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18091 return LD;
18092}
18093} // namespace
18094
18095// For processing memory ordering and memory scope arguments of various
18096// amdgcn builtins.
18097// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18098// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18099// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18100// specific SyncScopeID and writes it to \p SSID.
18102 llvm::AtomicOrdering &AO,
18103 llvm::SyncScope::ID &SSID) {
18104 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18105
18106 // Map C11/C++11 memory ordering to LLVM memory ordering
18107 assert(llvm::isValidAtomicOrderingCABI(ord));
18108 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18109 case llvm::AtomicOrderingCABI::acquire:
18110 case llvm::AtomicOrderingCABI::consume:
18111 AO = llvm::AtomicOrdering::Acquire;
18112 break;
18113 case llvm::AtomicOrderingCABI::release:
18114 AO = llvm::AtomicOrdering::Release;
18115 break;
18116 case llvm::AtomicOrderingCABI::acq_rel:
18117 AO = llvm::AtomicOrdering::AcquireRelease;
18118 break;
18119 case llvm::AtomicOrderingCABI::seq_cst:
18120 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18121 break;
18122 case llvm::AtomicOrderingCABI::relaxed:
18123 AO = llvm::AtomicOrdering::Monotonic;
18124 break;
18125 }
18126
18127 StringRef scp;
18128 llvm::getConstantStringInfo(Scope, scp);
18129 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18130}
18131
18132llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18133 unsigned Idx,
18134 const CallExpr *E) {
18135 llvm::Value *Arg = nullptr;
18136 if ((ICEArguments & (1 << Idx)) == 0) {
18137 Arg = EmitScalarExpr(E->getArg(Idx));
18138 } else {
18139 // If this is required to be a constant, constant fold it so that we
18140 // know that the generated intrinsic gets a ConstantInt.
18141 std::optional<llvm::APSInt> Result =
18143 assert(Result && "Expected argument to be a constant");
18144 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18145 }
18146 return Arg;
18147}
18148
18149Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18150 if (QT->hasFloatingRepresentation()) {
18151 switch (elementCount) {
18152 case 2:
18153 return Intrinsic::dx_dot2;
18154 case 3:
18155 return Intrinsic::dx_dot3;
18156 case 4:
18157 return Intrinsic::dx_dot4;
18158 }
18159 }
18161 return Intrinsic::dx_sdot;
18162
18164 return Intrinsic::dx_udot;
18165}
18166
18168 const CallExpr *E) {
18169 if (!getLangOpts().HLSL)
18170 return nullptr;
18171
18172 switch (BuiltinID) {
18173 case Builtin::BI__builtin_hlsl_elementwise_all: {
18174 Value *Op0 = EmitScalarExpr(E->getArg(0));
18175 return Builder.CreateIntrinsic(
18176 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18177 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18178 "hlsl.all");
18179 }
18180 case Builtin::BI__builtin_hlsl_elementwise_any: {
18181 Value *Op0 = EmitScalarExpr(E->getArg(0));
18182 return Builder.CreateIntrinsic(
18183 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18184 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18185 "hlsl.any");
18186 }
18187 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18188 Value *OpX = EmitScalarExpr(E->getArg(0));
18189 Value *OpMin = EmitScalarExpr(E->getArg(1));
18190 Value *OpMax = EmitScalarExpr(E->getArg(2));
18191
18192 QualType Ty = E->getArg(0)->getType();
18193 bool IsUnsigned = false;
18194 if (auto *VecTy = Ty->getAs<VectorType>())
18195 Ty = VecTy->getElementType();
18196 IsUnsigned = Ty->isUnsignedIntegerType();
18197 return Builder.CreateIntrinsic(
18198 /*ReturnType=*/OpX->getType(),
18199 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18200 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18201 }
18202 case Builtin::BI__builtin_hlsl_dot: {
18203 Value *Op0 = EmitScalarExpr(E->getArg(0));
18204 Value *Op1 = EmitScalarExpr(E->getArg(1));
18205 llvm::Type *T0 = Op0->getType();
18206 llvm::Type *T1 = Op1->getType();
18207 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18208 if (T0->isFloatingPointTy())
18209 return Builder.CreateFMul(Op0, Op1, "dx.dot");
18210
18211 if (T0->isIntegerTy())
18212 return Builder.CreateMul(Op0, Op1, "dx.dot");
18213
18214 // Bools should have been promoted
18215 llvm_unreachable(
18216 "Scalar dot product is only supported on ints and floats.");
18217 }
18218 // A VectorSplat should have happened
18219 assert(T0->isVectorTy() && T1->isVectorTy() &&
18220 "Dot product of vector and scalar is not supported.");
18221
18222 // A vector sext or sitofp should have happened
18223 assert(T0->getScalarType() == T1->getScalarType() &&
18224 "Dot product of vectors need the same element types.");
18225
18226 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
18227 [[maybe_unused]] auto *VecTy1 =
18228 E->getArg(1)->getType()->getAs<VectorType>();
18229 // A HLSLVectorTruncation should have happend
18230 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18231 "Dot product requires vectors to be of the same size.");
18232
18233 return Builder.CreateIntrinsic(
18234 /*ReturnType=*/T0->getScalarType(),
18236 VecTy0->getNumElements()),
18237 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18238 } break;
18239 case Builtin::BI__builtin_hlsl_lerp: {
18240 Value *X = EmitScalarExpr(E->getArg(0));
18241 Value *Y = EmitScalarExpr(E->getArg(1));
18242 Value *S = EmitScalarExpr(E->getArg(2));
18243 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18244 llvm_unreachable("lerp operand must have a float representation");
18245 return Builder.CreateIntrinsic(
18246 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
18247 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
18248 }
18249 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18250 Value *Op0 = EmitScalarExpr(E->getArg(0));
18251 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18252 llvm_unreachable("frac operand must have a float representation");
18253 return Builder.CreateIntrinsic(
18254 /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
18255 ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
18256 }
18257 case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18258 Value *Op0 = EmitScalarExpr(E->getArg(0));
18259 llvm::Type *Xty = Op0->getType();
18260 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18261 if (Xty->isVectorTy()) {
18262 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18263 retType = llvm::VectorType::get(
18264 retType, ElementCount::getFixed(XVecTy->getNumElements()));
18265 }
18266 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18267 llvm_unreachable("isinf operand must have a float representation");
18268 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18269 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18270 }
18271 case Builtin::BI__builtin_hlsl_mad: {
18272 Value *M = EmitScalarExpr(E->getArg(0));
18273 Value *A = EmitScalarExpr(E->getArg(1));
18274 Value *B = EmitScalarExpr(E->getArg(2));
18276 return Builder.CreateIntrinsic(
18277 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18278 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18279
18281 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18282 return Builder.CreateIntrinsic(
18283 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18284 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18285
18286 Value *Mul = Builder.CreateNSWMul(M, A);
18287 return Builder.CreateNSWAdd(Mul, B);
18288 }
18290 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18291 return Builder.CreateIntrinsic(
18292 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18293 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18294
18295 Value *Mul = Builder.CreateNUWMul(M, A);
18296 return Builder.CreateNUWAdd(Mul, B);
18297 }
18298 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18299 Value *Op0 = EmitScalarExpr(E->getArg(0));
18300 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18301 llvm_unreachable("rcp operand must have a float representation");
18302 llvm::Type *Ty = Op0->getType();
18303 llvm::Type *EltTy = Ty->getScalarType();
18304 Constant *One =
18305 Ty->isVectorTy()
18306 ? ConstantVector::getSplat(
18307 ElementCount::getFixed(
18308 dyn_cast<FixedVectorType>(Ty)->getNumElements()),
18309 ConstantFP::get(EltTy, 1.0))
18310 : ConstantFP::get(EltTy, 1.0);
18311 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
18312 }
18313 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18314 Value *Op0 = EmitScalarExpr(E->getArg(0));
18315 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18316 llvm_unreachable("rsqrt operand must have a float representation");
18317 return Builder.CreateIntrinsic(
18318 /*ReturnType=*/Op0->getType(), Intrinsic::dx_rsqrt,
18319 ArrayRef<Value *>{Op0}, nullptr, "dx.rsqrt");
18320 }
18321 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18323 llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
18324 {}, false, true));
18325 }
18326 }
18327 return nullptr;
18328}
18329
18331 const CallExpr *E) {
18332 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18333 llvm::SyncScope::ID SSID;
18334 switch (BuiltinID) {
18335 case AMDGPU::BI__builtin_amdgcn_div_scale:
18336 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18337 // Translate from the intrinsics's struct return to the builtin's out
18338 // argument.
18339
18340 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18341
18342 llvm::Value *X = EmitScalarExpr(E->getArg(0));
18343 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18344 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18345
18346 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18347 X->getType());
18348
18349 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18350
18351 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18352 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18353
18354 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18355
18356 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18357 Builder.CreateStore(FlagExt, FlagOutPtr);
18358 return Result;
18359 }
18360 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18361 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18362 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18363 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18364 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18365 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18366
18367 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18368 Src0->getType());
18369 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18370 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18371 }
18372
18373 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18374 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
18375 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18376 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
18377 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18378 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18380 // Find out if any arguments are required to be integer constant
18381 // expressions.
18382 unsigned ICEArguments = 0;
18384 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18385 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18386 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18387 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18388 }
18389 assert(Args.size() == 5 || Args.size() == 6);
18390 if (Args.size() == 5)
18391 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18392 Function *F =
18393 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18394 return Builder.CreateCall(F, Args);
18395 }
18396 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18397 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18398 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18399 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
18400 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18401 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18402 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18403 case AMDGPU::BI__builtin_amdgcn_rcp:
18404 case AMDGPU::BI__builtin_amdgcn_rcpf:
18405 case AMDGPU::BI__builtin_amdgcn_rcph:
18406 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
18407 case AMDGPU::BI__builtin_amdgcn_sqrt:
18408 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18409 case AMDGPU::BI__builtin_amdgcn_sqrth:
18410 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sqrt);
18411 case AMDGPU::BI__builtin_amdgcn_rsq:
18412 case AMDGPU::BI__builtin_amdgcn_rsqf:
18413 case AMDGPU::BI__builtin_amdgcn_rsqh:
18414 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
18415 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18416 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18417 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
18418 case AMDGPU::BI__builtin_amdgcn_sinf:
18419 case AMDGPU::BI__builtin_amdgcn_sinh:
18420 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
18421 case AMDGPU::BI__builtin_amdgcn_cosf:
18422 case AMDGPU::BI__builtin_amdgcn_cosh:
18423 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
18424 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18425 return EmitAMDGPUDispatchPtr(*this, E);
18426 case AMDGPU::BI__builtin_amdgcn_logf:
18427 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log);
18428 case AMDGPU::BI__builtin_amdgcn_exp2f:
18429 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_exp2);
18430 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18431 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
18432 case AMDGPU::BI__builtin_amdgcn_ldexp:
18433 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18434 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18435 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18436 llvm::Function *F =
18437 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18438 return Builder.CreateCall(F, {Src0, Src1});
18439 }
18440 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18441 // The raw instruction has a different behavior for out of bounds exponent
18442 // values (implicit truncation instead of saturate to short_min/short_max).
18443 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18444 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18445 llvm::Function *F =
18446 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18447 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18448 }
18449 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18450 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18451 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18452 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
18453 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18454 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18455 Value *Src0 = EmitScalarExpr(E->getArg(0));
18456 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18457 { Builder.getInt32Ty(), Src0->getType() });
18458 return Builder.CreateCall(F, Src0);
18459 }
18460 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18461 Value *Src0 = EmitScalarExpr(E->getArg(0));
18462 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18463 { Builder.getInt16Ty(), Src0->getType() });
18464 return Builder.CreateCall(F, Src0);
18465 }
18466 case AMDGPU::BI__builtin_amdgcn_fract:
18467 case AMDGPU::BI__builtin_amdgcn_fractf:
18468 case AMDGPU::BI__builtin_amdgcn_fracth:
18469 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
18470 case AMDGPU::BI__builtin_amdgcn_lerp:
18471 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
18472 case AMDGPU::BI__builtin_amdgcn_ubfe:
18473 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
18474 case AMDGPU::BI__builtin_amdgcn_sbfe:
18475 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
18476 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18477 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18478 llvm::Type *ResultType = ConvertType(E->getType());
18479 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18480 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18481 return Builder.CreateCall(F, { Src });
18482 }
18483 case AMDGPU::BI__builtin_amdgcn_uicmp:
18484 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18485 case AMDGPU::BI__builtin_amdgcn_sicmp:
18486 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18487 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18488 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18489 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18490
18491 // FIXME-GFX10: How should 32 bit mask be handled?
18492 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18493 { Builder.getInt64Ty(), Src0->getType() });
18494 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18495 }
18496 case AMDGPU::BI__builtin_amdgcn_fcmp:
18497 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18498 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18499 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18500 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18501
18502 // FIXME-GFX10: How should 32 bit mask be handled?
18503 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18504 { Builder.getInt64Ty(), Src0->getType() });
18505 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18506 }
18507 case AMDGPU::BI__builtin_amdgcn_class:
18508 case AMDGPU::BI__builtin_amdgcn_classf:
18509 case AMDGPU::BI__builtin_amdgcn_classh:
18510 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18511 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18512 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18513 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
18514 case AMDGPU::BI__builtin_amdgcn_ds_append:
18515 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18516 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18517 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18518 Value *Src0 = EmitScalarExpr(E->getArg(0));
18519 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18520 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18521 }
18522 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18523 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18524 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
18525 Intrinsic::ID Intrin;
18526 switch (BuiltinID) {
18527 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18528 Intrin = Intrinsic::amdgcn_ds_fadd;
18529 break;
18530 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18531 Intrin = Intrinsic::amdgcn_ds_fmin;
18532 break;
18533 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
18534 Intrin = Intrinsic::amdgcn_ds_fmax;
18535 break;
18536 }
18537 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18538 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18539 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18540 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18541 llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
18542 llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
18543 llvm::FunctionType *FTy = F->getFunctionType();
18544 llvm::Type *PTy = FTy->getParamType(0);
18546 return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
18547 }
18548 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18549 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18550 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18551 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18552 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18553 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18554 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18555 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18556 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18557 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {
18558 Intrinsic::ID IID;
18559 llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18560 switch (BuiltinID) {
18561 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
18562 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18563 IID = Intrinsic::amdgcn_global_atomic_fadd;
18564 break;
18565 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
18566 ArgTy = llvm::FixedVectorType::get(
18567 llvm::Type::getHalfTy(getLLVMContext()), 2);
18568 IID = Intrinsic::amdgcn_global_atomic_fadd;
18569 break;
18570 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
18571 IID = Intrinsic::amdgcn_global_atomic_fadd;
18572 break;
18573 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
18574 IID = Intrinsic::amdgcn_global_atomic_fmin;
18575 break;
18576 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
18577 IID = Intrinsic::amdgcn_global_atomic_fmax;
18578 break;
18579 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
18580 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18581 break;
18582 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
18583 IID = Intrinsic::amdgcn_flat_atomic_fmin;
18584 break;
18585 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
18586 IID = Intrinsic::amdgcn_flat_atomic_fmax;
18587 break;
18588 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
18589 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18590 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18591 break;
18592 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
18593 ArgTy = llvm::FixedVectorType::get(
18594 llvm::Type::getHalfTy(getLLVMContext()), 2);
18595 IID = Intrinsic::amdgcn_flat_atomic_fadd;
18596 break;
18597 }
18598 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18599 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18600 llvm::Function *F =
18601 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
18602 return Builder.CreateCall(F, {Addr, Val});
18603 }
18604 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18605 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18606 Intrinsic::ID IID;
18607 switch (BuiltinID) {
18608 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18609 IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18610 break;
18611 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18612 IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18613 break;
18614 }
18615 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18616 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18617 llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18618 return Builder.CreateCall(F, {Addr, Val});
18619 }
18620 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18621 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18622 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18623 Intrinsic::ID IID;
18624 llvm::Type *ArgTy;
18625 switch (BuiltinID) {
18626 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18627 ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18628 IID = Intrinsic::amdgcn_ds_fadd;
18629 break;
18630 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18631 ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18632 IID = Intrinsic::amdgcn_ds_fadd;
18633 break;
18634 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18635 ArgTy = llvm::FixedVectorType::get(
18636 llvm::Type::getHalfTy(getLLVMContext()), 2);
18637 IID = Intrinsic::amdgcn_ds_fadd;
18638 break;
18639 }
18640 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18641 llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18642 llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18643 llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
18644 llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18645 llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
18646 llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
18647 return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18648 }
18649 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18650 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18651 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18652 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: {
18653
18654 Intrinsic::ID IID;
18655 switch (BuiltinID) {
18656 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18657 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18658 IID = Intrinsic::amdgcn_global_load_tr_b64;
18659 break;
18660 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18661 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18662 IID = Intrinsic::amdgcn_global_load_tr_b128;
18663 break;
18664 }
18665 llvm::Type *LoadTy = ConvertType(E->getType());
18666 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18667 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
18668 return Builder.CreateCall(F, {Addr});
18669 }
18670 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18671 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18672 {llvm::Type::getInt64Ty(getLLVMContext())});
18673 return Builder.CreateCall(F);
18674 }
18675 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18676 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18677 {llvm::Type::getInt64Ty(getLLVMContext())});
18678 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18679 return Builder.CreateCall(F, {Env});
18680 }
18681 case AMDGPU::BI__builtin_amdgcn_read_exec:
18682 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
18683 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
18684 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
18685 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
18686 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
18687 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
18688 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
18689 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
18690 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
18691 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
18692 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
18693 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
18694 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
18695 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
18696 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
18697
18698 // The builtins take these arguments as vec4 where the last element is
18699 // ignored. The intrinsic takes them as vec3.
18700 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
18701 ArrayRef<int>{0, 1, 2});
18702 RayDir =
18703 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
18704 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
18705 ArrayRef<int>{0, 1, 2});
18706
18707 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
18708 {NodePtr->getType(), RayDir->getType()});
18709 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
18710 RayInverseDir, TextureDescr});
18711 }
18712
18713 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
18715 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18716 Args.push_back(EmitScalarExpr(E->getArg(i)));
18717
18718 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
18719 Value *Call = Builder.CreateCall(F, Args);
18720 Value *Rtn = Builder.CreateExtractValue(Call, 0);
18721 Value *A = Builder.CreateExtractValue(Call, 1);
18722 llvm::Type *RetTy = ConvertType(E->getType());
18723 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
18724 (uint64_t)0);
18725 return Builder.CreateInsertElement(I0, A, 1);
18726 }
18727
18728 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18729 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18730 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18731 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18732 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18733 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18734 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18735 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18736 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18737 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18738 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18739 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18740 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18741 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18742 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18743 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18744 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18745 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18746 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18747 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18748 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18749 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18750 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18751 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18752 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18753 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18754 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18755 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18756 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18757 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18758 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18759 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18760 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18761 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18762 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18763 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18764 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18765 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18766 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18767 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18768 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18769 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18770 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18771 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18772 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18773 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18774 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18775 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18776 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18777 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18778 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18779 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18780 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18781 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18782 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18783 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18784 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18785 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18786 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18787 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
18788
18789 // These operations perform a matrix multiplication and accumulation of
18790 // the form:
18791 // D = A * B + C
18792 // We need to specify one type for matrices AB and one for matrices CD.
18793 // Sparse matrix operations can have different types for A and B as well as
18794 // an additional type for sparsity index.
18795 // Destination type should be put before types used for source operands.
18796 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18797 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18798 // There is no need for the variable opsel argument, so always set it to
18799 // "false".
18800 bool AppendFalseForOpselArg = false;
18801 unsigned BuiltinWMMAOp;
18802
18803 switch (BuiltinID) {
18804 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
18805 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18806 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18807 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18808 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18809 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18810 break;
18811 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18812 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18813 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18814 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18815 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18816 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18817 break;
18818 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18819 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18820 AppendFalseForOpselArg = true;
18821 [[fallthrough]];
18822 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18823 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18824 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18825 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18826 break;
18827 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18828 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18829 AppendFalseForOpselArg = true;
18830 [[fallthrough]];
18831 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18832 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18833 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18834 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18835 break;
18836 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18837 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18838 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18839 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
18840 break;
18841 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18842 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18843 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18844 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
18845 break;
18846 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18847 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18848 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18849 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18850 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18851 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18852 break;
18853 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18854 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18855 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18856 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18857 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18858 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18859 break;
18860 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18861 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18862 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18863 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
18864 break;
18865 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18866 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18867 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18868 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
18869 break;
18870 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18871 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18872 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18873 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
18874 break;
18875 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18876 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18877 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18878 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
18879 break;
18880 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18881 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18882 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18883 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
18884 break;
18885 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18886 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18887 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18888 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
18889 break;
18890 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18891 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18892 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18893 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
18894 break;
18895 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18896 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18897 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18898 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
18899 break;
18900 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18901 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18902 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18903 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
18904 break;
18905 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18906 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18907 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18908 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
18909 break;
18910 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18911 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18912 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18913 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
18914 break;
18915 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18916 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18917 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18918 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
18919 break;
18920 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18921 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18922 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18923 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
18924 break;
18925 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18926 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18927 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18928 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
18929 break;
18930 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18931 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18932 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18933 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
18934 break;
18935 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18936 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
18937 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18938 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
18939 break;
18940 }
18941
18943 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18944 Args.push_back(EmitScalarExpr(E->getArg(i)));
18945 if (AppendFalseForOpselArg)
18946 Args.push_back(Builder.getFalse());
18947
18949 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
18950 ArgTypes.push_back(Args[ArgIdx]->getType());
18951
18952 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
18953 return Builder.CreateCall(F, Args);
18954 }
18955
18956 // amdgcn workitem
18957 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
18958 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
18959 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
18960 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
18961 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
18962 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
18963
18964 // amdgcn workgroup size
18965 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
18966 return EmitAMDGPUWorkGroupSize(*this, 0);
18967 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
18968 return EmitAMDGPUWorkGroupSize(*this, 1);
18969 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
18970 return EmitAMDGPUWorkGroupSize(*this, 2);
18971
18972 // amdgcn grid size
18973 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
18974 return EmitAMDGPUGridSize(*this, 0);
18975 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
18976 return EmitAMDGPUGridSize(*this, 1);
18977 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
18978 return EmitAMDGPUGridSize(*this, 2);
18979
18980 // r600 intrinsics
18981 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
18982 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
18983 return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
18984 case AMDGPU::BI__builtin_r600_read_tidig_x:
18985 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
18986 case AMDGPU::BI__builtin_r600_read_tidig_y:
18987 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
18988 case AMDGPU::BI__builtin_r600_read_tidig_z:
18989 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
18990 case AMDGPU::BI__builtin_amdgcn_alignbit: {
18991 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18992 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18993 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18994 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
18995 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18996 }
18997 case AMDGPU::BI__builtin_amdgcn_fence: {
18999 EmitScalarExpr(E->getArg(1)), AO, SSID);
19000 return Builder.CreateFence(AO, SSID);
19001 }
19002 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19003 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19004 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19005 case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
19006 llvm::AtomicRMWInst::BinOp BinOp;
19007 switch (BuiltinID) {
19008 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19009 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19010 BinOp = llvm::AtomicRMWInst::UIncWrap;
19011 break;
19012 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19013 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19014 BinOp = llvm::AtomicRMWInst::UDecWrap;
19015 break;
19016 }
19017
19018 Address Ptr = CheckAtomicAlignment(*this, E);
19019 Value *Val = EmitScalarExpr(E->getArg(1));
19020
19022 EmitScalarExpr(E->getArg(3)), AO, SSID);
19023
19024 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19025 bool Volatile =
19026 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19027
19028 llvm::AtomicRMWInst *RMW =
19029 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19030 if (Volatile)
19031 RMW->setVolatile(true);
19032 return RMW;
19033 }
19034 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19035 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19036 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
19037 llvm::Type *ResultType = ConvertType(E->getType());
19038 // s_sendmsg_rtn is mangled using return type only.
19039 Function *F =
19040 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19041 return Builder.CreateCall(F, {Arg});
19042 }
19043 default:
19044 return nullptr;
19045 }
19046}
19047
19048/// Handle a SystemZ function in which the final argument is a pointer
19049/// to an int that receives the post-instruction CC value. At the LLVM level
19050/// this is represented as a function that returns a {result, cc} pair.
19052 unsigned IntrinsicID,
19053 const CallExpr *E) {
19054 unsigned NumArgs = E->getNumArgs() - 1;
19055 SmallVector<Value *, 8> Args(NumArgs);
19056 for (unsigned I = 0; I < NumArgs; ++I)
19057 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
19058 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
19059 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
19060 Value *Call = CGF.Builder.CreateCall(F, Args);
19061 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
19062 CGF.Builder.CreateStore(CC, CCPtr);
19063 return CGF.Builder.CreateExtractValue(Call, 0);
19064}
19065
19067 const CallExpr *E) {
19068 switch (BuiltinID) {
19069 case SystemZ::BI__builtin_tbegin: {
19070 Value *TDB = EmitScalarExpr(E->getArg(0));
19071 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19072 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19073 return Builder.CreateCall(F, {TDB, Control});
19074 }
19075 case SystemZ::BI__builtin_tbegin_nofloat: {
19076 Value *TDB = EmitScalarExpr(E->getArg(0));
19077 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19078 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19079 return Builder.CreateCall(F, {TDB, Control});
19080 }
19081 case SystemZ::BI__builtin_tbeginc: {
19082 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
19083 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
19084 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19085 return Builder.CreateCall(F, {TDB, Control});
19086 }
19087 case SystemZ::BI__builtin_tabort: {
19088 Value *Data = EmitScalarExpr(E->getArg(0));
19089 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19090 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
19091 }
19092 case SystemZ::BI__builtin_non_tx_store: {
19094 Value *Data = EmitScalarExpr(E->getArg(1));
19095 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19096 return Builder.CreateCall(F, {Data, Address});
19097 }
19098
19099 // Vector builtins. Note that most vector builtins are mapped automatically
19100 // to target-specific LLVM intrinsics. The ones handled specially here can
19101 // be represented via standard LLVM IR, which is preferable to enable common
19102 // LLVM optimizations.
19103
19104 case SystemZ::BI__builtin_s390_vpopctb:
19105 case SystemZ::BI__builtin_s390_vpopcth:
19106 case SystemZ::BI__builtin_s390_vpopctf:
19107 case SystemZ::BI__builtin_s390_vpopctg: {
19108 llvm::Type *ResultType = ConvertType(E->getType());
19109 Value *X = EmitScalarExpr(E->getArg(0));
19110 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19111 return Builder.CreateCall(F, X);
19112 }
19113
19114 case SystemZ::BI__builtin_s390_vclzb:
19115 case SystemZ::BI__builtin_s390_vclzh:
19116 case SystemZ::BI__builtin_s390_vclzf:
19117 case SystemZ::BI__builtin_s390_vclzg: {
19118 llvm::Type *ResultType = ConvertType(E->getType());
19119 Value *X = EmitScalarExpr(E->getArg(0));
19120 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19121 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19122 return Builder.CreateCall(F, {X, Undef});
19123 }
19124
19125 case SystemZ::BI__builtin_s390_vctzb:
19126 case SystemZ::BI__builtin_s390_vctzh:
19127 case SystemZ::BI__builtin_s390_vctzf:
19128 case SystemZ::BI__builtin_s390_vctzg: {
19129 llvm::Type *ResultType = ConvertType(E->getType());
19130 Value *X = EmitScalarExpr(E->getArg(0));
19131 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19132 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19133 return Builder.CreateCall(F, {X, Undef});
19134 }
19135
19136 case SystemZ::BI__builtin_s390_verllb:
19137 case SystemZ::BI__builtin_s390_verllh:
19138 case SystemZ::BI__builtin_s390_verllf:
19139 case SystemZ::BI__builtin_s390_verllg: {
19140 llvm::Type *ResultType = ConvertType(E->getType());
19141 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19142 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19143 // Splat scalar rotate amount to vector type.
19144 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19145 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19146 Amt = Builder.CreateVectorSplat(NumElts, Amt);
19147 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19148 return Builder.CreateCall(F, { Src, Src, Amt });
19149 }
19150
19151 case SystemZ::BI__builtin_s390_verllvb:
19152 case SystemZ::BI__builtin_s390_verllvh:
19153 case SystemZ::BI__builtin_s390_verllvf:
19154 case SystemZ::BI__builtin_s390_verllvg: {
19155 llvm::Type *ResultType = ConvertType(E->getType());
19156 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19157 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19158 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19159 return Builder.CreateCall(F, { Src, Src, Amt });
19160 }
19161
19162 case SystemZ::BI__builtin_s390_vfsqsb:
19163 case SystemZ::BI__builtin_s390_vfsqdb: {
19164 llvm::Type *ResultType = ConvertType(E->getType());
19165 Value *X = EmitScalarExpr(E->getArg(0));
19166 if (Builder.getIsFPConstrained()) {
19167 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19168 return Builder.CreateConstrainedFPCall(F, { X });
19169 } else {
19170 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19171 return Builder.CreateCall(F, X);
19172 }
19173 }
19174 case SystemZ::BI__builtin_s390_vfmasb:
19175 case SystemZ::BI__builtin_s390_vfmadb: {
19176 llvm::Type *ResultType = ConvertType(E->getType());
19177 Value *X = EmitScalarExpr(E->getArg(0));
19178 Value *Y = EmitScalarExpr(E->getArg(1));
19179 Value *Z = EmitScalarExpr(E->getArg(2));
19180 if (Builder.getIsFPConstrained()) {
19181 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19182 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19183 } else {
19184 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19185 return Builder.CreateCall(F, {X, Y, Z});
19186 }
19187 }
19188 case SystemZ::BI__builtin_s390_vfmssb:
19189 case SystemZ::BI__builtin_s390_vfmsdb: {
19190 llvm::Type *ResultType = ConvertType(E->getType());
19191 Value *X = EmitScalarExpr(E->getArg(0));
19192 Value *Y = EmitScalarExpr(E->getArg(1));
19193 Value *Z = EmitScalarExpr(E->getArg(2));
19194 if (Builder.getIsFPConstrained()) {
19195 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19196 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19197 } else {
19198 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19199 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19200 }
19201 }
19202 case SystemZ::BI__builtin_s390_vfnmasb:
19203 case SystemZ::BI__builtin_s390_vfnmadb: {
19204 llvm::Type *ResultType = ConvertType(E->getType());
19205 Value *X = EmitScalarExpr(E->getArg(0));
19206 Value *Y = EmitScalarExpr(E->getArg(1));
19207 Value *Z = EmitScalarExpr(E->getArg(2));
19208 if (Builder.getIsFPConstrained()) {
19209 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19210 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19211 } else {
19212 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19213 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19214 }
19215 }
19216 case SystemZ::BI__builtin_s390_vfnmssb:
19217 case SystemZ::BI__builtin_s390_vfnmsdb: {
19218 llvm::Type *ResultType = ConvertType(E->getType());
19219 Value *X = EmitScalarExpr(E->getArg(0));
19220 Value *Y = EmitScalarExpr(E->getArg(1));
19221 Value *Z = EmitScalarExpr(E->getArg(2));
19222 if (Builder.getIsFPConstrained()) {
19223 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19224 Value *NegZ = Builder.CreateFNeg(Z, "sub");
19225 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19226 } else {
19227 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19228 Value *NegZ = Builder.CreateFNeg(Z, "neg");
19229 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19230 }
19231 }
19232 case SystemZ::BI__builtin_s390_vflpsb:
19233 case SystemZ::BI__builtin_s390_vflpdb: {
19234 llvm::Type *ResultType = ConvertType(E->getType());
19235 Value *X = EmitScalarExpr(E->getArg(0));
19236 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19237 return Builder.CreateCall(F, X);
19238 }
19239 case SystemZ::BI__builtin_s390_vflnsb:
19240 case SystemZ::BI__builtin_s390_vflndb: {
19241 llvm::Type *ResultType = ConvertType(E->getType());
19242 Value *X = EmitScalarExpr(E->getArg(0));
19243 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19244 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19245 }
19246 case SystemZ::BI__builtin_s390_vfisb:
19247 case SystemZ::BI__builtin_s390_vfidb: {
19248 llvm::Type *ResultType = ConvertType(E->getType());
19249 Value *X = EmitScalarExpr(E->getArg(0));
19250 // Constant-fold the M4 and M5 mask arguments.
19251 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19252 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19253 // Check whether this instance can be represented via a LLVM standard
19254 // intrinsic. We only support some combinations of M4 and M5.
19255 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19256 Intrinsic::ID CI;
19257 switch (M4.getZExtValue()) {
19258 default: break;
19259 case 0: // IEEE-inexact exception allowed
19260 switch (M5.getZExtValue()) {
19261 default: break;
19262 case 0: ID = Intrinsic::rint;
19263 CI = Intrinsic::experimental_constrained_rint; break;
19264 }
19265 break;
19266 case 4: // IEEE-inexact exception suppressed
19267 switch (M5.getZExtValue()) {
19268 default: break;
19269 case 0: ID = Intrinsic::nearbyint;
19270 CI = Intrinsic::experimental_constrained_nearbyint; break;
19271 case 1: ID = Intrinsic::round;
19272 CI = Intrinsic::experimental_constrained_round; break;
19273 case 5: ID = Intrinsic::trunc;
19274 CI = Intrinsic::experimental_constrained_trunc; break;
19275 case 6: ID = Intrinsic::ceil;
19276 CI = Intrinsic::experimental_constrained_ceil; break;
19277 case 7: ID = Intrinsic::floor;
19278 CI = Intrinsic::experimental_constrained_floor; break;
19279 }
19280 break;
19281 }
19282 if (ID != Intrinsic::not_intrinsic) {
19283 if (Builder.getIsFPConstrained()) {
19284 Function *F = CGM.getIntrinsic(CI, ResultType);
19285 return Builder.CreateConstrainedFPCall(F, X);
19286 } else {
19287 Function *F = CGM.getIntrinsic(ID, ResultType);
19288 return Builder.CreateCall(F, X);
19289 }
19290 }
19291 switch (BuiltinID) { // FIXME: constrained version?
19292 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19293 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19294 default: llvm_unreachable("Unknown BuiltinID");
19295 }
19296 Function *F = CGM.getIntrinsic(ID);
19297 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19298 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19299 return Builder.CreateCall(F, {X, M4Value, M5Value});
19300 }
19301 case SystemZ::BI__builtin_s390_vfmaxsb:
19302 case SystemZ::BI__builtin_s390_vfmaxdb: {
19303 llvm::Type *ResultType = ConvertType(E->getType());
19304 Value *X = EmitScalarExpr(E->getArg(0));
19305 Value *Y = EmitScalarExpr(E->getArg(1));
19306 // Constant-fold the M4 mask argument.
19307 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19308 // Check whether this instance can be represented via a LLVM standard
19309 // intrinsic. We only support some values of M4.
19310 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19311 Intrinsic::ID CI;
19312 switch (M4.getZExtValue()) {
19313 default: break;
19314 case 4: ID = Intrinsic::maxnum;
19315 CI = Intrinsic::experimental_constrained_maxnum; break;
19316 }
19317 if (ID != Intrinsic::not_intrinsic) {
19318 if (Builder.getIsFPConstrained()) {
19319 Function *F = CGM.getIntrinsic(CI, ResultType);
19320 return Builder.CreateConstrainedFPCall(F, {X, Y});
19321 } else {
19322 Function *F = CGM.getIntrinsic(ID, ResultType);
19323 return Builder.CreateCall(F, {X, Y});
19324 }
19325 }
19326 switch (BuiltinID) {
19327 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19328 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19329 default: llvm_unreachable("Unknown BuiltinID");
19330 }
19331 Function *F = CGM.getIntrinsic(ID);
19332 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19333 return Builder.CreateCall(F, {X, Y, M4Value});
19334 }
19335 case SystemZ::BI__builtin_s390_vfminsb:
19336 case SystemZ::BI__builtin_s390_vfmindb: {
19337 llvm::Type *ResultType = ConvertType(E->getType());
19338 Value *X = EmitScalarExpr(E->getArg(0));
19339 Value *Y = EmitScalarExpr(E->getArg(1));
19340 // Constant-fold the M4 mask argument.
19341 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19342 // Check whether this instance can be represented via a LLVM standard
19343 // intrinsic. We only support some values of M4.
19344 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19345 Intrinsic::ID CI;
19346 switch (M4.getZExtValue()) {
19347 default: break;
19348 case 4: ID = Intrinsic::minnum;
19349 CI = Intrinsic::experimental_constrained_minnum; break;
19350 }
19351 if (ID != Intrinsic::not_intrinsic) {
19352 if (Builder.getIsFPConstrained()) {
19353 Function *F = CGM.getIntrinsic(CI, ResultType);
19354 return Builder.CreateConstrainedFPCall(F, {X, Y});
19355 } else {
19356 Function *F = CGM.getIntrinsic(ID, ResultType);
19357 return Builder.CreateCall(F, {X, Y});
19358 }
19359 }
19360 switch (BuiltinID) {
19361 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19362 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19363 default: llvm_unreachable("Unknown BuiltinID");
19364 }
19365 Function *F = CGM.getIntrinsic(ID);
19366 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19367 return Builder.CreateCall(F, {X, Y, M4Value});
19368 }
19369
19370 case SystemZ::BI__builtin_s390_vlbrh:
19371 case SystemZ::BI__builtin_s390_vlbrf:
19372 case SystemZ::BI__builtin_s390_vlbrg: {
19373 llvm::Type *ResultType = ConvertType(E->getType());
19374 Value *X = EmitScalarExpr(E->getArg(0));
19375 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19376 return Builder.CreateCall(F, X);
19377 }
19378
19379 // Vector intrinsics that output the post-instruction CC value.
19380
19381#define INTRINSIC_WITH_CC(NAME) \
19382 case SystemZ::BI__builtin_##NAME: \
19383 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19384
19385 INTRINSIC_WITH_CC(s390_vpkshs);
19386 INTRINSIC_WITH_CC(s390_vpksfs);
19387 INTRINSIC_WITH_CC(s390_vpksgs);
19388
19389 INTRINSIC_WITH_CC(s390_vpklshs);
19390 INTRINSIC_WITH_CC(s390_vpklsfs);
19391 INTRINSIC_WITH_CC(s390_vpklsgs);
19392
19393 INTRINSIC_WITH_CC(s390_vceqbs);
19394 INTRINSIC_WITH_CC(s390_vceqhs);
19395 INTRINSIC_WITH_CC(s390_vceqfs);
19396 INTRINSIC_WITH_CC(s390_vceqgs);
19397
19398 INTRINSIC_WITH_CC(s390_vchbs);
19399 INTRINSIC_WITH_CC(s390_vchhs);
19400 INTRINSIC_WITH_CC(s390_vchfs);
19401 INTRINSIC_WITH_CC(s390_vchgs);
19402
19403 INTRINSIC_WITH_CC(s390_vchlbs);
19404 INTRINSIC_WITH_CC(s390_vchlhs);
19405 INTRINSIC_WITH_CC(s390_vchlfs);
19406 INTRINSIC_WITH_CC(s390_vchlgs);
19407
19408 INTRINSIC_WITH_CC(s390_vfaebs);
19409 INTRINSIC_WITH_CC(s390_vfaehs);
19410 INTRINSIC_WITH_CC(s390_vfaefs);
19411
19412 INTRINSIC_WITH_CC(s390_vfaezbs);
19413 INTRINSIC_WITH_CC(s390_vfaezhs);
19414 INTRINSIC_WITH_CC(s390_vfaezfs);
19415
19416 INTRINSIC_WITH_CC(s390_vfeebs);
19417 INTRINSIC_WITH_CC(s390_vfeehs);
19418 INTRINSIC_WITH_CC(s390_vfeefs);
19419
19420 INTRINSIC_WITH_CC(s390_vfeezbs);
19421 INTRINSIC_WITH_CC(s390_vfeezhs);
19422 INTRINSIC_WITH_CC(s390_vfeezfs);
19423
19424 INTRINSIC_WITH_CC(s390_vfenebs);
19425 INTRINSIC_WITH_CC(s390_vfenehs);
19426 INTRINSIC_WITH_CC(s390_vfenefs);
19427
19428 INTRINSIC_WITH_CC(s390_vfenezbs);
19429 INTRINSIC_WITH_CC(s390_vfenezhs);
19430 INTRINSIC_WITH_CC(s390_vfenezfs);
19431
19432 INTRINSIC_WITH_CC(s390_vistrbs);
19433 INTRINSIC_WITH_CC(s390_vistrhs);
19434 INTRINSIC_WITH_CC(s390_vistrfs);
19435
19436 INTRINSIC_WITH_CC(s390_vstrcbs);
19437 INTRINSIC_WITH_CC(s390_vstrchs);
19438 INTRINSIC_WITH_CC(s390_vstrcfs);
19439
19440 INTRINSIC_WITH_CC(s390_vstrczbs);
19441 INTRINSIC_WITH_CC(s390_vstrczhs);
19442 INTRINSIC_WITH_CC(s390_vstrczfs);
19443
19444 INTRINSIC_WITH_CC(s390_vfcesbs);
19445 INTRINSIC_WITH_CC(s390_vfcedbs);
19446 INTRINSIC_WITH_CC(s390_vfchsbs);
19447 INTRINSIC_WITH_CC(s390_vfchdbs);
19448 INTRINSIC_WITH_CC(s390_vfchesbs);
19449 INTRINSIC_WITH_CC(s390_vfchedbs);
19450
19451 INTRINSIC_WITH_CC(s390_vftcisb);
19452 INTRINSIC_WITH_CC(s390_vftcidb);
19453
19454 INTRINSIC_WITH_CC(s390_vstrsb);
19455 INTRINSIC_WITH_CC(s390_vstrsh);
19456 INTRINSIC_WITH_CC(s390_vstrsf);
19457
19458 INTRINSIC_WITH_CC(s390_vstrszb);
19459 INTRINSIC_WITH_CC(s390_vstrszh);
19460 INTRINSIC_WITH_CC(s390_vstrszf);
19461
19462#undef INTRINSIC_WITH_CC
19463
19464 default:
19465 return nullptr;
19466 }
19467}
19468
19469namespace {
19470// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19471struct NVPTXMmaLdstInfo {
19472 unsigned NumResults; // Number of elements to load/store
19473 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19474 unsigned IID_col;
19475 unsigned IID_row;
19476};
19477
19478#define MMA_INTR(geom_op_type, layout) \
19479 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19480#define MMA_LDST(n, geom_op_type) \
19481 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19482
19483static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19484 switch (BuiltinID) {
19485 // FP MMA loads
19486 case NVPTX::BI__hmma_m16n16k16_ld_a:
19487 return MMA_LDST(8, m16n16k16_load_a_f16);
19488 case NVPTX::BI__hmma_m16n16k16_ld_b:
19489 return MMA_LDST(8, m16n16k16_load_b_f16);
19490 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19491 return MMA_LDST(4, m16n16k16_load_c_f16);
19492 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19493 return MMA_LDST(8, m16n16k16_load_c_f32);
19494 case NVPTX::BI__hmma_m32n8k16_ld_a:
19495 return MMA_LDST(8, m32n8k16_load_a_f16);
19496 case NVPTX::BI__hmma_m32n8k16_ld_b:
19497 return MMA_LDST(8, m32n8k16_load_b_f16);
19498 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19499 return MMA_LDST(4, m32n8k16_load_c_f16);
19500 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19501 return MMA_LDST(8, m32n8k16_load_c_f32);
19502 case NVPTX::BI__hmma_m8n32k16_ld_a:
19503 return MMA_LDST(8, m8n32k16_load_a_f16);
19504 case NVPTX::BI__hmma_m8n32k16_ld_b:
19505 return MMA_LDST(8, m8n32k16_load_b_f16);
19506 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19507 return MMA_LDST(4, m8n32k16_load_c_f16);
19508 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19509 return MMA_LDST(8, m8n32k16_load_c_f32);
19510
19511 // Integer MMA loads
19512 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19513 return MMA_LDST(2, m16n16k16_load_a_s8);
19514 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19515 return MMA_LDST(2, m16n16k16_load_a_u8);
19516 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19517 return MMA_LDST(2, m16n16k16_load_b_s8);
19518 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19519 return MMA_LDST(2, m16n16k16_load_b_u8);
19520 case NVPTX::BI__imma_m16n16k16_ld_c:
19521 return MMA_LDST(8, m16n16k16_load_c_s32);
19522 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19523 return MMA_LDST(4, m32n8k16_load_a_s8);
19524 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19525 return MMA_LDST(4, m32n8k16_load_a_u8);
19526 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19527 return MMA_LDST(1, m32n8k16_load_b_s8);
19528 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19529 return MMA_LDST(1, m32n8k16_load_b_u8);
19530 case NVPTX::BI__imma_m32n8k16_ld_c:
19531 return MMA_LDST(8, m32n8k16_load_c_s32);
19532 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19533 return MMA_LDST(1, m8n32k16_load_a_s8);
19534 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19535 return MMA_LDST(1, m8n32k16_load_a_u8);
19536 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19537 return MMA_LDST(4, m8n32k16_load_b_s8);
19538 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19539 return MMA_LDST(4, m8n32k16_load_b_u8);
19540 case NVPTX::BI__imma_m8n32k16_ld_c:
19541 return MMA_LDST(8, m8n32k16_load_c_s32);
19542
19543 // Sub-integer MMA loads.
19544 // Only row/col layout is supported by A/B fragments.
19545 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
19546 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
19547 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
19548 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
19549 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
19550 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
19551 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
19552 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
19553 case NVPTX::BI__imma_m8n8k32_ld_c:
19554 return MMA_LDST(2, m8n8k32_load_c_s32);
19555 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
19556 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
19557 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
19558 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
19559 case NVPTX::BI__bmma_m8n8k128_ld_c:
19560 return MMA_LDST(2, m8n8k128_load_c_s32);
19561
19562 // Double MMA loads
19563 case NVPTX::BI__dmma_m8n8k4_ld_a:
19564 return MMA_LDST(1, m8n8k4_load_a_f64);
19565 case NVPTX::BI__dmma_m8n8k4_ld_b:
19566 return MMA_LDST(1, m8n8k4_load_b_f64);
19567 case NVPTX::BI__dmma_m8n8k4_ld_c:
19568 return MMA_LDST(2, m8n8k4_load_c_f64);
19569
19570 // Alternate float MMA loads
19571 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
19572 return MMA_LDST(4, m16n16k16_load_a_bf16);
19573 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
19574 return MMA_LDST(4, m16n16k16_load_b_bf16);
19575 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
19576 return MMA_LDST(2, m8n32k16_load_a_bf16);
19577 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
19578 return MMA_LDST(8, m8n32k16_load_b_bf16);
19579 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
19580 return MMA_LDST(8, m32n8k16_load_a_bf16);
19581 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
19582 return MMA_LDST(2, m32n8k16_load_b_bf16);
19583 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
19584 return MMA_LDST(4, m16n16k8_load_a_tf32);
19585 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
19586 return MMA_LDST(4, m16n16k8_load_b_tf32);
19587 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
19588 return MMA_LDST(8, m16n16k8_load_c_f32);
19589
19590 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
19591 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
19592 // use fragment C for both loads and stores.
19593 // FP MMA stores.
19594 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
19595 return MMA_LDST(4, m16n16k16_store_d_f16);
19596 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
19597 return MMA_LDST(8, m16n16k16_store_d_f32);
19598 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
19599 return MMA_LDST(4, m32n8k16_store_d_f16);
19600 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
19601 return MMA_LDST(8, m32n8k16_store_d_f32);
19602 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
19603 return MMA_LDST(4, m8n32k16_store_d_f16);
19604 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
19605 return MMA_LDST(8, m8n32k16_store_d_f32);
19606
19607 // Integer and sub-integer MMA stores.
19608 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
19609 // name, integer loads/stores use LLVM's i32.
19610 case NVPTX::BI__imma_m16n16k16_st_c_i32:
19611 return MMA_LDST(8, m16n16k16_store_d_s32);
19612 case NVPTX::BI__imma_m32n8k16_st_c_i32:
19613 return MMA_LDST(8, m32n8k16_store_d_s32);
19614 case NVPTX::BI__imma_m8n32k16_st_c_i32:
19615 return MMA_LDST(8, m8n32k16_store_d_s32);
19616 case NVPTX::BI__imma_m8n8k32_st_c_i32:
19617 return MMA_LDST(2, m8n8k32_store_d_s32);
19618 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
19619 return MMA_LDST(2, m8n8k128_store_d_s32);
19620
19621 // Double MMA store
19622 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
19623 return MMA_LDST(2, m8n8k4_store_d_f64);
19624
19625 // Alternate float MMA store
19626 case NVPTX::BI__mma_m16n16k8_st_c_f32:
19627 return MMA_LDST(8, m16n16k8_store_d_f32);
19628
19629 default:
19630 llvm_unreachable("Unknown MMA builtin");
19631 }
19632}
19633#undef MMA_LDST
19634#undef MMA_INTR
19635
19636
19637struct NVPTXMmaInfo {
19638 unsigned NumEltsA;
19639 unsigned NumEltsB;
19640 unsigned NumEltsC;
19641 unsigned NumEltsD;
19642
19643 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
19644 // over 'col' for layout. The index of non-satf variants is expected to match
19645 // the undocumented layout constants used by CUDA's mma.hpp.
19646 std::array<unsigned, 8> Variants;
19647
19648 unsigned getMMAIntrinsic(int Layout, bool Satf) {
19649 unsigned Index = Layout + 4 * Satf;
19650 if (Index >= Variants.size())
19651 return 0;
19652 return Variants[Index];
19653 }
19654};
19655
19656 // Returns an intrinsic that matches Layout and Satf for valid combinations of
19657 // Layout and Satf, 0 otherwise.
19658static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
19659 // clang-format off
19660#define MMA_VARIANTS(geom, type) \
19661 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
19662 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19663 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
19664 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
19665#define MMA_SATF_VARIANTS(geom, type) \
19666 MMA_VARIANTS(geom, type), \
19667 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
19668 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19669 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
19670 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
19671// Sub-integer MMA only supports row.col layout.
19672#define MMA_VARIANTS_I4(geom, type) \
19673 0, \
19674 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
19675 0, \
19676 0, \
19677 0, \
19678 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
19679 0, \
19680 0
19681// b1 MMA does not support .satfinite.
19682#define MMA_VARIANTS_B1_XOR(geom, type) \
19683 0, \
19684 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
19685 0, \
19686 0, \
19687 0, \
19688 0, \
19689 0, \
19690 0
19691#define MMA_VARIANTS_B1_AND(geom, type) \
19692 0, \
19693 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
19694 0, \
19695 0, \
19696 0, \
19697 0, \
19698 0, \
19699 0
19700 // clang-format on
19701 switch (BuiltinID) {
19702 // FP MMA
19703 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
19704 // NumEltsN of return value are ordered as A,B,C,D.
19705 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
19706 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
19707 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
19708 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
19709 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
19710 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
19711 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
19712 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
19713 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
19714 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
19715 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
19716 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
19717 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
19718 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
19719 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
19720 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
19721 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
19722 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
19723 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
19724 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
19725 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
19726 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
19727 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
19728 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
19729
19730 // Integer MMA
19731 case NVPTX::BI__imma_m16n16k16_mma_s8:
19732 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
19733 case NVPTX::BI__imma_m16n16k16_mma_u8:
19734 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
19735 case NVPTX::BI__imma_m32n8k16_mma_s8:
19736 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
19737 case NVPTX::BI__imma_m32n8k16_mma_u8:
19738 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
19739 case NVPTX::BI__imma_m8n32k16_mma_s8:
19740 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
19741 case NVPTX::BI__imma_m8n32k16_mma_u8:
19742 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
19743
19744 // Sub-integer MMA
19745 case NVPTX::BI__imma_m8n8k32_mma_s4:
19746 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
19747 case NVPTX::BI__imma_m8n8k32_mma_u4:
19748 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
19749 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
19750 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
19751 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
19752 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
19753
19754 // Double MMA
19755 case NVPTX::BI__dmma_m8n8k4_mma_f64:
19756 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
19757
19758 // Alternate FP MMA
19759 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
19760 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
19761 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
19762 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
19763 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
19764 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
19765 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
19766 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
19767 default:
19768 llvm_unreachable("Unexpected builtin ID.");
19769 }
19770#undef MMA_VARIANTS
19771#undef MMA_SATF_VARIANTS
19772#undef MMA_VARIANTS_I4
19773#undef MMA_VARIANTS_B1_AND
19774#undef MMA_VARIANTS_B1_XOR
19775}
19776
19777static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
19778 const CallExpr *E) {
19779 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19780 QualType ArgType = E->getArg(0)->getType();
19782 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
19783 return CGF.Builder.CreateCall(
19784 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19785 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
19786}
19787
19788static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
19789 const CallExpr *E) {
19790 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
19791 llvm::Type *ElemTy =
19793 return CGF.Builder.CreateCall(
19794 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
19795 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
19796}
19797
19798static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
19799 CodeGenFunction &CGF, const CallExpr *E,
19800 int SrcSize) {
19801 return E->getNumArgs() == 3
19802 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
19803 {CGF.EmitScalarExpr(E->getArg(0)),
19804 CGF.EmitScalarExpr(E->getArg(1)),
19805 CGF.EmitScalarExpr(E->getArg(2))})
19806 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
19807 {CGF.EmitScalarExpr(E->getArg(0)),
19808 CGF.EmitScalarExpr(E->getArg(1))});
19809}
19810
19811static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
19812 const CallExpr *E, CodeGenFunction &CGF) {
19813 auto &C = CGF.CGM.getContext();
19814 if (!(C.getLangOpts().NativeHalfType ||
19815 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
19816 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
19817 " requires native half type support.");
19818 return nullptr;
19819 }
19820
19821 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
19822 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
19823 return MakeLdgLdu(IntrinsicID, CGF, E);
19824
19826 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
19827 auto *FTy = F->getFunctionType();
19828 unsigned ICEArguments = 0;
19830 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
19831 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19832 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
19833 assert((ICEArguments & (1 << i)) == 0);
19834 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
19835 auto *PTy = FTy->getParamType(i);
19836 if (PTy != ArgValue->getType())
19837 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
19838 Args.push_back(ArgValue);
19839 }
19840
19841 return CGF.Builder.CreateCall(F, Args);
19842}
19843} // namespace
19844
19846 const CallExpr *E) {
19847 switch (BuiltinID) {
19848 case NVPTX::BI__nvvm_atom_add_gen_i:
19849 case NVPTX::BI__nvvm_atom_add_gen_l:
19850 case NVPTX::BI__nvvm_atom_add_gen_ll:
19851 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
19852
19853 case NVPTX::BI__nvvm_atom_sub_gen_i:
19854 case NVPTX::BI__nvvm_atom_sub_gen_l:
19855 case NVPTX::BI__nvvm_atom_sub_gen_ll:
19856 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
19857
19858 case NVPTX::BI__nvvm_atom_and_gen_i:
19859 case NVPTX::BI__nvvm_atom_and_gen_l:
19860 case NVPTX::BI__nvvm_atom_and_gen_ll:
19861 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
19862
19863 case NVPTX::BI__nvvm_atom_or_gen_i:
19864 case NVPTX::BI__nvvm_atom_or_gen_l:
19865 case NVPTX::BI__nvvm_atom_or_gen_ll:
19866 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
19867
19868 case NVPTX::BI__nvvm_atom_xor_gen_i:
19869 case NVPTX::BI__nvvm_atom_xor_gen_l:
19870 case NVPTX::BI__nvvm_atom_xor_gen_ll:
19871 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
19872
19873 case NVPTX::BI__nvvm_atom_xchg_gen_i:
19874 case NVPTX::BI__nvvm_atom_xchg_gen_l:
19875 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
19876 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
19877
19878 case NVPTX::BI__nvvm_atom_max_gen_i:
19879 case NVPTX::BI__nvvm_atom_max_gen_l:
19880 case NVPTX::BI__nvvm_atom_max_gen_ll:
19881 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
19882
19883 case NVPTX::BI__nvvm_atom_max_gen_ui:
19884 case NVPTX::BI__nvvm_atom_max_gen_ul:
19885 case NVPTX::BI__nvvm_atom_max_gen_ull:
19886 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
19887
19888 case NVPTX::BI__nvvm_atom_min_gen_i:
19889 case NVPTX::BI__nvvm_atom_min_gen_l:
19890 case NVPTX::BI__nvvm_atom_min_gen_ll:
19891 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
19892
19893 case NVPTX::BI__nvvm_atom_min_gen_ui:
19894 case NVPTX::BI__nvvm_atom_min_gen_ul:
19895 case NVPTX::BI__nvvm_atom_min_gen_ull:
19896 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
19897
19898 case NVPTX::BI__nvvm_atom_cas_gen_i:
19899 case NVPTX::BI__nvvm_atom_cas_gen_l:
19900 case NVPTX::BI__nvvm_atom_cas_gen_ll:
19901 // __nvvm_atom_cas_gen_* should return the old value rather than the
19902 // success flag.
19903 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
19904
19905 case NVPTX::BI__nvvm_atom_add_gen_f:
19906 case NVPTX::BI__nvvm_atom_add_gen_d: {
19907 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
19908 Value *Val = EmitScalarExpr(E->getArg(1));
19909
19910 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
19911 AtomicOrdering::SequentiallyConsistent);
19912 }
19913
19914 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
19915 Value *Ptr = EmitScalarExpr(E->getArg(0));
19916 Value *Val = EmitScalarExpr(E->getArg(1));
19917 Function *FnALI32 =
19918 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
19919 return Builder.CreateCall(FnALI32, {Ptr, Val});
19920 }
19921
19922 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
19923 Value *Ptr = EmitScalarExpr(E->getArg(0));
19924 Value *Val = EmitScalarExpr(E->getArg(1));
19925 Function *FnALD32 =
19926 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
19927 return Builder.CreateCall(FnALD32, {Ptr, Val});
19928 }
19929
19930 case NVPTX::BI__nvvm_ldg_c:
19931 case NVPTX::BI__nvvm_ldg_sc:
19932 case NVPTX::BI__nvvm_ldg_c2:
19933 case NVPTX::BI__nvvm_ldg_sc2:
19934 case NVPTX::BI__nvvm_ldg_c4:
19935 case NVPTX::BI__nvvm_ldg_sc4:
19936 case NVPTX::BI__nvvm_ldg_s:
19937 case NVPTX::BI__nvvm_ldg_s2:
19938 case NVPTX::BI__nvvm_ldg_s4:
19939 case NVPTX::BI__nvvm_ldg_i:
19940 case NVPTX::BI__nvvm_ldg_i2:
19941 case NVPTX::BI__nvvm_ldg_i4:
19942 case NVPTX::BI__nvvm_ldg_l:
19943 case NVPTX::BI__nvvm_ldg_l2:
19944 case NVPTX::BI__nvvm_ldg_ll:
19945 case NVPTX::BI__nvvm_ldg_ll2:
19946 case NVPTX::BI__nvvm_ldg_uc:
19947 case NVPTX::BI__nvvm_ldg_uc2:
19948 case NVPTX::BI__nvvm_ldg_uc4:
19949 case NVPTX::BI__nvvm_ldg_us:
19950 case NVPTX::BI__nvvm_ldg_us2:
19951 case NVPTX::BI__nvvm_ldg_us4:
19952 case NVPTX::BI__nvvm_ldg_ui:
19953 case NVPTX::BI__nvvm_ldg_ui2:
19954 case NVPTX::BI__nvvm_ldg_ui4:
19955 case NVPTX::BI__nvvm_ldg_ul:
19956 case NVPTX::BI__nvvm_ldg_ul2:
19957 case NVPTX::BI__nvvm_ldg_ull:
19958 case NVPTX::BI__nvvm_ldg_ull2:
19959 // PTX Interoperability section 2.2: "For a vector with an even number of
19960 // elements, its alignment is set to number of elements times the alignment
19961 // of its member: n*alignof(t)."
19962 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
19963 case NVPTX::BI__nvvm_ldg_f:
19964 case NVPTX::BI__nvvm_ldg_f2:
19965 case NVPTX::BI__nvvm_ldg_f4:
19966 case NVPTX::BI__nvvm_ldg_d:
19967 case NVPTX::BI__nvvm_ldg_d2:
19968 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
19969
19970 case NVPTX::BI__nvvm_ldu_c:
19971 case NVPTX::BI__nvvm_ldu_sc:
19972 case NVPTX::BI__nvvm_ldu_c2:
19973 case NVPTX::BI__nvvm_ldu_sc2:
19974 case NVPTX::BI__nvvm_ldu_c4:
19975 case NVPTX::BI__nvvm_ldu_sc4:
19976 case NVPTX::BI__nvvm_ldu_s:
19977 case NVPTX::BI__nvvm_ldu_s2:
19978 case NVPTX::BI__nvvm_ldu_s4:
19979 case NVPTX::BI__nvvm_ldu_i:
19980 case NVPTX::BI__nvvm_ldu_i2:
19981 case NVPTX::BI__nvvm_ldu_i4:
19982 case NVPTX::BI__nvvm_ldu_l:
19983 case NVPTX::BI__nvvm_ldu_l2:
19984 case NVPTX::BI__nvvm_ldu_ll:
19985 case NVPTX::BI__nvvm_ldu_ll2:
19986 case NVPTX::BI__nvvm_ldu_uc:
19987 case NVPTX::BI__nvvm_ldu_uc2:
19988 case NVPTX::BI__nvvm_ldu_uc4:
19989 case NVPTX::BI__nvvm_ldu_us:
19990 case NVPTX::BI__nvvm_ldu_us2:
19991 case NVPTX::BI__nvvm_ldu_us4:
19992 case NVPTX::BI__nvvm_ldu_ui:
19993 case NVPTX::BI__nvvm_ldu_ui2:
19994 case NVPTX::BI__nvvm_ldu_ui4:
19995 case NVPTX::BI__nvvm_ldu_ul:
19996 case NVPTX::BI__nvvm_ldu_ul2:
19997 case NVPTX::BI__nvvm_ldu_ull:
19998 case NVPTX::BI__nvvm_ldu_ull2:
19999 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20000 case NVPTX::BI__nvvm_ldu_f:
20001 case NVPTX::BI__nvvm_ldu_f2:
20002 case NVPTX::BI__nvvm_ldu_f4:
20003 case NVPTX::BI__nvvm_ldu_d:
20004 case NVPTX::BI__nvvm_ldu_d2:
20005 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20006
20007 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20008 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20009 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20010 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20011 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20012 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20013 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20014 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20015 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20016 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20017 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20018 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20019 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20020 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20021 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20022 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20023 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20024 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20025 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20026 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20027 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20028 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20029 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20030 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20031 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20032 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20033 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20034 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20035 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20036 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20037 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20038 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20039 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20040 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20041 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20042 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20043 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20044 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20045 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20046 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20047 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20048 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20049 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20050 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20051 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20052 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20053 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20054 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20055 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20056 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20057 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20058 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20059 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20060 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20061 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20062 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20063 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20064 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20065 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20066 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20067 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20068 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20069 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20070 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20071 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20072 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20073 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20074 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20075 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20076 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20077 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20078 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20079 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20080 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20081 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20082 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20083 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20084 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20085 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20086 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20087 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20088 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20089 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20090 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20091 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
20092 Value *Ptr = EmitScalarExpr(E->getArg(0));
20093 llvm::Type *ElemTy =
20095 return Builder.CreateCall(
20097 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
20098 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20099 }
20100 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20101 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20102 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
20103 Value *Ptr = EmitScalarExpr(E->getArg(0));
20104 llvm::Type *ElemTy =
20106 return Builder.CreateCall(
20108 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
20109 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20110 }
20111 case NVPTX::BI__nvvm_match_all_sync_i32p:
20112 case NVPTX::BI__nvvm_match_all_sync_i64p: {
20113 Value *Mask = EmitScalarExpr(E->getArg(0));
20114 Value *Val = EmitScalarExpr(E->getArg(1));
20115 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
20116 Value *ResultPair = Builder.CreateCall(
20117 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
20118 ? Intrinsic::nvvm_match_all_sync_i32p
20119 : Intrinsic::nvvm_match_all_sync_i64p),
20120 {Mask, Val});
20121 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
20122 PredOutPtr.getElementType());
20123 Builder.CreateStore(Pred, PredOutPtr);
20124 return Builder.CreateExtractValue(ResultPair, 0);
20125 }
20126
20127 // FP MMA loads
20128 case NVPTX::BI__hmma_m16n16k16_ld_a:
20129 case NVPTX::BI__hmma_m16n16k16_ld_b:
20130 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20131 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20132 case NVPTX::BI__hmma_m32n8k16_ld_a:
20133 case NVPTX::BI__hmma_m32n8k16_ld_b:
20134 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20135 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20136 case NVPTX::BI__hmma_m8n32k16_ld_a:
20137 case NVPTX::BI__hmma_m8n32k16_ld_b:
20138 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20139 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20140 // Integer MMA loads.
20141 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20142 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20143 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20144 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20145 case NVPTX::BI__imma_m16n16k16_ld_c:
20146 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20147 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20148 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20149 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20150 case NVPTX::BI__imma_m32n8k16_ld_c:
20151 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20152 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20153 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20154 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20155 case NVPTX::BI__imma_m8n32k16_ld_c:
20156 // Sub-integer MMA loads.
20157 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20158 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20159 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20160 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20161 case NVPTX::BI__imma_m8n8k32_ld_c:
20162 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20163 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20164 case NVPTX::BI__bmma_m8n8k128_ld_c:
20165 // Double MMA loads.
20166 case NVPTX::BI__dmma_m8n8k4_ld_a:
20167 case NVPTX::BI__dmma_m8n8k4_ld_b:
20168 case NVPTX::BI__dmma_m8n8k4_ld_c:
20169 // Alternate float MMA loads.
20170 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20171 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20172 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20173 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20174 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20175 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20176 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20177 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20178 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20180 Value *Src = EmitScalarExpr(E->getArg(1));
20181 Value *Ldm = EmitScalarExpr(E->getArg(2));
20182 std::optional<llvm::APSInt> isColMajorArg =
20184 if (!isColMajorArg)
20185 return nullptr;
20186 bool isColMajor = isColMajorArg->getSExtValue();
20187 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20188 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20189 if (IID == 0)
20190 return nullptr;
20191
20192 Value *Result =
20193 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
20194
20195 // Save returned values.
20196 assert(II.NumResults);
20197 if (II.NumResults == 1) {
20200 } else {
20201 for (unsigned i = 0; i < II.NumResults; ++i) {
20203 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
20204 Dst.getElementType()),
20206 llvm::ConstantInt::get(IntTy, i)),
20208 }
20209 }
20210 return Result;
20211 }
20212
20213 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20214 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20215 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20216 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20217 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20218 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20219 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20220 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20221 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20222 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20223 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20224 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20225 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20226 Value *Dst = EmitScalarExpr(E->getArg(0));
20228 Value *Ldm = EmitScalarExpr(E->getArg(2));
20229 std::optional<llvm::APSInt> isColMajorArg =
20231 if (!isColMajorArg)
20232 return nullptr;
20233 bool isColMajor = isColMajorArg->getSExtValue();
20234 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20235 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20236 if (IID == 0)
20237 return nullptr;
20238 Function *Intrinsic =
20239 CGM.getIntrinsic(IID, Dst->getType());
20240 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
20241 SmallVector<Value *, 10> Values = {Dst};
20242 for (unsigned i = 0; i < II.NumResults; ++i) {
20244 Src.getElementType(),
20246 llvm::ConstantInt::get(IntTy, i)),
20248 Values.push_back(Builder.CreateBitCast(V, ParamType));
20249 }
20250 Values.push_back(Ldm);
20251 Value *Result = Builder.CreateCall(Intrinsic, Values);
20252 return Result;
20253 }
20254
20255 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20256 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20257 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20258 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20259 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20260 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20261 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20262 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20263 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20264 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20265 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20266 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20267 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20268 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20269 case NVPTX::BI__imma_m16n16k16_mma_s8:
20270 case NVPTX::BI__imma_m16n16k16_mma_u8:
20271 case NVPTX::BI__imma_m32n8k16_mma_s8:
20272 case NVPTX::BI__imma_m32n8k16_mma_u8:
20273 case NVPTX::BI__imma_m8n32k16_mma_s8:
20274 case NVPTX::BI__imma_m8n32k16_mma_u8:
20275 case NVPTX::BI__imma_m8n8k32_mma_s4:
20276 case NVPTX::BI__imma_m8n8k32_mma_u4:
20277 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20278 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20279 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20280 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20281 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20282 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20283 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20288 std::optional<llvm::APSInt> LayoutArg =
20290 if (!LayoutArg)
20291 return nullptr;
20292 int Layout = LayoutArg->getSExtValue();
20293 if (Layout < 0 || Layout > 3)
20294 return nullptr;
20295 llvm::APSInt SatfArg;
20296 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20297 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20298 SatfArg = 0; // .b1 does not have satf argument.
20299 else if (std::optional<llvm::APSInt> OptSatfArg =
20301 SatfArg = *OptSatfArg;
20302 else
20303 return nullptr;
20304 bool Satf = SatfArg.getSExtValue();
20305 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20306 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20307 if (IID == 0) // Unsupported combination of Layout/Satf.
20308 return nullptr;
20309
20311 Function *Intrinsic = CGM.getIntrinsic(IID);
20312 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
20313 // Load A
20314 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20316 SrcA.getElementType(),
20317 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
20318 llvm::ConstantInt::get(IntTy, i)),
20320 Values.push_back(Builder.CreateBitCast(V, AType));
20321 }
20322 // Load B
20323 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
20324 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20326 SrcB.getElementType(),
20327 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
20328 llvm::ConstantInt::get(IntTy, i)),
20330 Values.push_back(Builder.CreateBitCast(V, BType));
20331 }
20332 // Load C
20333 llvm::Type *CType =
20334 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
20335 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20337 SrcC.getElementType(),
20338 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
20339 llvm::ConstantInt::get(IntTy, i)),
20341 Values.push_back(Builder.CreateBitCast(V, CType));
20342 }
20343 Value *Result = Builder.CreateCall(Intrinsic, Values);
20344 llvm::Type *DType = Dst.getElementType();
20345 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20347 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
20349 llvm::ConstantInt::get(IntTy, i)),
20351 return Result;
20352 }
20353 // The following builtins require half type support
20354 case NVPTX::BI__nvvm_ex2_approx_f16:
20355 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20356 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20357 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20358 case NVPTX::BI__nvvm_ff2f16x2_rn:
20359 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20360 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20361 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20362 case NVPTX::BI__nvvm_ff2f16x2_rz:
20363 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20364 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20365 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20366 case NVPTX::BI__nvvm_fma_rn_f16:
20367 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20368 case NVPTX::BI__nvvm_fma_rn_f16x2:
20369 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20370 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20371 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20372 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20373 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20374 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20375 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20376 *this);
20377 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20378 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20379 *this);
20380 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20381 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20382 *this);
20383 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20384 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20385 *this);
20386 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20387 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20388 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20389 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20390 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20391 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20392 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20393 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20394 case NVPTX::BI__nvvm_fmax_f16:
20395 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20396 case NVPTX::BI__nvvm_fmax_f16x2:
20397 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20398 case NVPTX::BI__nvvm_fmax_ftz_f16:
20399 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20400 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20401 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20402 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20403 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20404 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20405 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20406 *this);
20407 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20408 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20409 E, *this);
20410 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20411 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20412 BuiltinID, E, *this);
20413 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20414 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20415 *this);
20416 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20417 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20418 E, *this);
20419 case NVPTX::BI__nvvm_fmax_nan_f16:
20420 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20421 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20422 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20423 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20424 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20425 *this);
20426 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20427 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20428 E, *this);
20429 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20430 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20431 *this);
20432 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20433 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20434 *this);
20435 case NVPTX::BI__nvvm_fmin_f16:
20436 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20437 case NVPTX::BI__nvvm_fmin_f16x2:
20438 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20439 case NVPTX::BI__nvvm_fmin_ftz_f16:
20440 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20441 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20442 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20443 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20444 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20445 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20446 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20447 *this);
20448 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20449 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20450 E, *this);
20451 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20452 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20453 BuiltinID, E, *this);
20454 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20455 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20456 *this);
20457 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20458 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20459 E, *this);
20460 case NVPTX::BI__nvvm_fmin_nan_f16:
20461 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20462 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20463 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20464 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20465 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20466 *this);
20467 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20468 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20469 E, *this);
20470 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20471 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20472 *this);
20473 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20474 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20475 *this);
20476 case NVPTX::BI__nvvm_ldg_h:
20477 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20478 case NVPTX::BI__nvvm_ldg_h2:
20479 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20480 case NVPTX::BI__nvvm_ldu_h:
20481 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20482 case NVPTX::BI__nvvm_ldu_h2: {
20483 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20484 }
20485 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20486 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20487 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20488 4);
20489 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20490 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20491 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20492 8);
20493 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20494 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20495 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20496 16);
20497 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20498 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20499 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20500 16);
20501 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20502 return Builder.CreateCall(
20503 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20504 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20505 return Builder.CreateCall(
20506 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20507 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20508 return Builder.CreateCall(
20509 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20510 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20511 return Builder.CreateCall(
20512 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20513 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20514 return Builder.CreateCall(
20515 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20516 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20517 return Builder.CreateCall(
20518 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20519 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20520 return Builder.CreateCall(
20521 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20522 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20523 return Builder.CreateCall(
20524 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20525 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20526 return Builder.CreateCall(
20527 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20528 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20529 return Builder.CreateCall(
20530 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20531 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20532 return Builder.CreateCall(
20533 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20534 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20535 return Builder.CreateCall(
20536 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20537 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20538 return Builder.CreateCall(
20539 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20540 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20541 return Builder.CreateCall(
20542 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20543 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
20544 return Builder.CreateCall(
20545 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
20546 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
20547 return Builder.CreateCall(
20548 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
20549 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
20550 return Builder.CreateCall(
20551 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
20552 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
20553 return Builder.CreateCall(
20554 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
20555 case NVPTX::BI__nvvm_is_explicit_cluster:
20556 return Builder.CreateCall(
20557 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
20558 case NVPTX::BI__nvvm_isspacep_shared_cluster:
20559 return Builder.CreateCall(
20560 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
20561 EmitScalarExpr(E->getArg(0)));
20562 case NVPTX::BI__nvvm_mapa:
20563 return Builder.CreateCall(
20564 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
20565 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20566 case NVPTX::BI__nvvm_mapa_shared_cluster:
20567 return Builder.CreateCall(
20568 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
20569 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
20570 case NVPTX::BI__nvvm_getctarank:
20571 return Builder.CreateCall(
20572 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
20573 EmitScalarExpr(E->getArg(0)));
20574 case NVPTX::BI__nvvm_getctarank_shared_cluster:
20575 return Builder.CreateCall(
20576 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
20577 EmitScalarExpr(E->getArg(0)));
20578 case NVPTX::BI__nvvm_barrier_cluster_arrive:
20579 return Builder.CreateCall(
20580 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
20581 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
20582 return Builder.CreateCall(
20583 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
20584 case NVPTX::BI__nvvm_barrier_cluster_wait:
20585 return Builder.CreateCall(
20586 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
20587 case NVPTX::BI__nvvm_fence_sc_cluster:
20588 return Builder.CreateCall(
20589 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
20590 default:
20591 return nullptr;
20592 }
20593}
20594
20595namespace {
20596struct BuiltinAlignArgs {
20597 llvm::Value *Src = nullptr;
20598 llvm::Type *SrcType = nullptr;
20599 llvm::Value *Alignment = nullptr;
20600 llvm::Value *Mask = nullptr;
20601 llvm::IntegerType *IntType = nullptr;
20602
20603 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
20604 QualType AstType = E->getArg(0)->getType();
20605 if (AstType->isArrayType())
20606 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
20607 else
20608 Src = CGF.EmitScalarExpr(E->getArg(0));
20609 SrcType = Src->getType();
20610 if (SrcType->isPointerTy()) {
20611 IntType = IntegerType::get(
20612 CGF.getLLVMContext(),
20613 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
20614 } else {
20615 assert(SrcType->isIntegerTy());
20616 IntType = cast<llvm::IntegerType>(SrcType);
20617 }
20618 Alignment = CGF.EmitScalarExpr(E->getArg(1));
20619 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
20620 auto *One = llvm::ConstantInt::get(IntType, 1);
20621 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
20622 }
20623};
20624} // namespace
20625
20626/// Generate (x & (y-1)) == 0.
20628 BuiltinAlignArgs Args(E, *this);
20629 llvm::Value *SrcAddress = Args.Src;
20630 if (Args.SrcType->isPointerTy())
20631 SrcAddress =
20632 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
20633 return RValue::get(Builder.CreateICmpEQ(
20634 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
20635 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
20636}
20637
20638/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
20639/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
20640/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
20641RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
20642 BuiltinAlignArgs Args(E, *this);
20643 llvm::Value *SrcForMask = Args.Src;
20644 if (AlignUp) {
20645 // When aligning up we have to first add the mask to ensure we go over the
20646 // next alignment value and then align down to the next valid multiple.
20647 // By adding the mask, we ensure that align_up on an already aligned
20648 // value will not change the value.
20649 if (Args.Src->getType()->isPointerTy()) {
20650 if (getLangOpts().isSignedOverflowDefined())
20651 SrcForMask =
20652 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
20653 else
20654 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
20655 /*SignedIndices=*/true,
20656 /*isSubtraction=*/false,
20657 E->getExprLoc(), "over_boundary");
20658 } else {
20659 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
20660 }
20661 }
20662 // Invert the mask to only clear the lower bits.
20663 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
20664 llvm::Value *Result = nullptr;
20665 if (Args.Src->getType()->isPointerTy()) {
20666 Result = Builder.CreateIntrinsic(
20667 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
20668 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
20669 } else {
20670 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
20671 }
20672 assert(Result->getType() == Args.SrcType);
20673 return RValue::get(Result);
20674}
20675
20677 const CallExpr *E) {
20678 switch (BuiltinID) {
20679 case WebAssembly::BI__builtin_wasm_memory_size: {
20680 llvm::Type *ResultType = ConvertType(E->getType());
20681 Value *I = EmitScalarExpr(E->getArg(0));
20682 Function *Callee =
20683 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
20684 return Builder.CreateCall(Callee, I);
20685 }
20686 case WebAssembly::BI__builtin_wasm_memory_grow: {
20687 llvm::Type *ResultType = ConvertType(E->getType());
20688 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
20689 EmitScalarExpr(E->getArg(1))};
20690 Function *Callee =
20691 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
20692 return Builder.CreateCall(Callee, Args);
20693 }
20694 case WebAssembly::BI__builtin_wasm_tls_size: {
20695 llvm::Type *ResultType = ConvertType(E->getType());
20696 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
20697 return Builder.CreateCall(Callee);
20698 }
20699 case WebAssembly::BI__builtin_wasm_tls_align: {
20700 llvm::Type *ResultType = ConvertType(E->getType());
20701 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
20702 return Builder.CreateCall(Callee);
20703 }
20704 case WebAssembly::BI__builtin_wasm_tls_base: {
20705 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
20706 return Builder.CreateCall(Callee);
20707 }
20708 case WebAssembly::BI__builtin_wasm_throw: {
20709 Value *Tag = EmitScalarExpr(E->getArg(0));
20710 Value *Obj = EmitScalarExpr(E->getArg(1));
20711 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
20712 return Builder.CreateCall(Callee, {Tag, Obj});
20713 }
20714 case WebAssembly::BI__builtin_wasm_rethrow: {
20715 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
20716 return Builder.CreateCall(Callee);
20717 }
20718 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
20719 Value *Addr = EmitScalarExpr(E->getArg(0));
20721 Value *Timeout = EmitScalarExpr(E->getArg(2));
20722 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
20723 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20724 }
20725 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
20726 Value *Addr = EmitScalarExpr(E->getArg(0));
20728 Value *Timeout = EmitScalarExpr(E->getArg(2));
20729 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
20730 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
20731 }
20732 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
20733 Value *Addr = EmitScalarExpr(E->getArg(0));
20734 Value *Count = EmitScalarExpr(E->getArg(1));
20735 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
20736 return Builder.CreateCall(Callee, {Addr, Count});
20737 }
20738 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
20739 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
20740 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
20741 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
20742 Value *Src = EmitScalarExpr(E->getArg(0));
20743 llvm::Type *ResT = ConvertType(E->getType());
20744 Function *Callee =
20745 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
20746 return Builder.CreateCall(Callee, {Src});
20747 }
20748 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
20749 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
20750 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
20751 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
20752 Value *Src = EmitScalarExpr(E->getArg(0));
20753 llvm::Type *ResT = ConvertType(E->getType());
20754 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
20755 {ResT, Src->getType()});
20756 return Builder.CreateCall(Callee, {Src});
20757 }
20758 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
20759 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
20760 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
20761 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
20762 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
20763 Value *Src = EmitScalarExpr(E->getArg(0));
20764 llvm::Type *ResT = ConvertType(E->getType());
20765 Function *Callee =
20766 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
20767 return Builder.CreateCall(Callee, {Src});
20768 }
20769 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
20770 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
20771 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
20772 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
20773 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
20774 Value *Src = EmitScalarExpr(E->getArg(0));
20775 llvm::Type *ResT = ConvertType(E->getType());
20776 Function *Callee =
20777 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
20778 return Builder.CreateCall(Callee, {Src});
20779 }
20780 case WebAssembly::BI__builtin_wasm_min_f32:
20781 case WebAssembly::BI__builtin_wasm_min_f64:
20782 case WebAssembly::BI__builtin_wasm_min_f32x4:
20783 case WebAssembly::BI__builtin_wasm_min_f64x2: {
20784 Value *LHS = EmitScalarExpr(E->getArg(0));
20785 Value *RHS = EmitScalarExpr(E->getArg(1));
20786 Function *Callee =
20787 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
20788 return Builder.CreateCall(Callee, {LHS, RHS});
20789 }
20790 case WebAssembly::BI__builtin_wasm_max_f32:
20791 case WebAssembly::BI__builtin_wasm_max_f64:
20792 case WebAssembly::BI__builtin_wasm_max_f32x4:
20793 case WebAssembly::BI__builtin_wasm_max_f64x2: {
20794 Value *LHS = EmitScalarExpr(E->getArg(0));
20795 Value *RHS = EmitScalarExpr(E->getArg(1));
20796 Function *Callee =
20797 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
20798 return Builder.CreateCall(Callee, {LHS, RHS});
20799 }
20800 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
20801 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
20802 Value *LHS = EmitScalarExpr(E->getArg(0));
20803 Value *RHS = EmitScalarExpr(E->getArg(1));
20804 Function *Callee =
20805 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
20806 return Builder.CreateCall(Callee, {LHS, RHS});
20807 }
20808 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
20809 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
20810 Value *LHS = EmitScalarExpr(E->getArg(0));
20811 Value *RHS = EmitScalarExpr(E->getArg(1));
20812 Function *Callee =
20813 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
20814 return Builder.CreateCall(Callee, {LHS, RHS});
20815 }
20816 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20817 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20818 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20819 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20820 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20821 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20822 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20823 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
20824 unsigned IntNo;
20825 switch (BuiltinID) {
20826 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
20827 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
20828 IntNo = Intrinsic::ceil;
20829 break;
20830 case WebAssembly::BI__builtin_wasm_floor_f32x4:
20831 case WebAssembly::BI__builtin_wasm_floor_f64x2:
20832 IntNo = Intrinsic::floor;
20833 break;
20834 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
20835 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
20836 IntNo = Intrinsic::trunc;
20837 break;
20838 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
20839 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
20840 IntNo = Intrinsic::nearbyint;
20841 break;
20842 default:
20843 llvm_unreachable("unexpected builtin ID");
20844 }
20845 Value *Value = EmitScalarExpr(E->getArg(0));
20847 return Builder.CreateCall(Callee, Value);
20848 }
20849 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
20850 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
20851 return Builder.CreateCall(Callee);
20852 }
20853 case WebAssembly::BI__builtin_wasm_ref_null_func: {
20854 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
20855 return Builder.CreateCall(Callee);
20856 }
20857 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
20858 Value *Src = EmitScalarExpr(E->getArg(0));
20859 Value *Indices = EmitScalarExpr(E->getArg(1));
20860 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
20861 return Builder.CreateCall(Callee, {Src, Indices});
20862 }
20863 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20864 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20865 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20866 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20867 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20868 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20869 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20870 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
20871 unsigned IntNo;
20872 switch (BuiltinID) {
20873 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
20874 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
20875 IntNo = Intrinsic::sadd_sat;
20876 break;
20877 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
20878 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
20879 IntNo = Intrinsic::uadd_sat;
20880 break;
20881 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
20882 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
20883 IntNo = Intrinsic::wasm_sub_sat_signed;
20884 break;
20885 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
20886 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
20887 IntNo = Intrinsic::wasm_sub_sat_unsigned;
20888 break;
20889 default:
20890 llvm_unreachable("unexpected builtin ID");
20891 }
20892 Value *LHS = EmitScalarExpr(E->getArg(0));
20893 Value *RHS = EmitScalarExpr(E->getArg(1));
20895 return Builder.CreateCall(Callee, {LHS, RHS});
20896 }
20897 case WebAssembly::BI__builtin_wasm_abs_i8x16:
20898 case WebAssembly::BI__builtin_wasm_abs_i16x8:
20899 case WebAssembly::BI__builtin_wasm_abs_i32x4:
20900 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
20901 Value *Vec = EmitScalarExpr(E->getArg(0));
20902 Value *Neg = Builder.CreateNeg(Vec, "neg");
20903 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
20904 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
20905 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
20906 }
20907 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20908 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20909 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20910 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20911 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20912 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20913 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20914 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20915 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20916 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20917 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20918 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
20919 Value *LHS = EmitScalarExpr(E->getArg(0));
20920 Value *RHS = EmitScalarExpr(E->getArg(1));
20921 Value *ICmp;
20922 switch (BuiltinID) {
20923 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
20924 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
20925 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
20926 ICmp = Builder.CreateICmpSLT(LHS, RHS);
20927 break;
20928 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
20929 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
20930 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
20931 ICmp = Builder.CreateICmpULT(LHS, RHS);
20932 break;
20933 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
20934 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
20935 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
20936 ICmp = Builder.CreateICmpSGT(LHS, RHS);
20937 break;
20938 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
20939 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
20940 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
20941 ICmp = Builder.CreateICmpUGT(LHS, RHS);
20942 break;
20943 default:
20944 llvm_unreachable("unexpected builtin ID");
20945 }
20946 return Builder.CreateSelect(ICmp, LHS, RHS);
20947 }
20948 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
20949 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
20950 Value *LHS = EmitScalarExpr(E->getArg(0));
20951 Value *RHS = EmitScalarExpr(E->getArg(1));
20952 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
20953 ConvertType(E->getType()));
20954 return Builder.CreateCall(Callee, {LHS, RHS});
20955 }
20956 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
20957 Value *LHS = EmitScalarExpr(E->getArg(0));
20958 Value *RHS = EmitScalarExpr(E->getArg(1));
20959 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
20960 return Builder.CreateCall(Callee, {LHS, RHS});
20961 }
20962 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20963 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20964 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20965 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
20966 Value *Vec = EmitScalarExpr(E->getArg(0));
20967 unsigned IntNo;
20968 switch (BuiltinID) {
20969 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
20970 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
20971 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
20972 break;
20973 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
20974 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
20975 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
20976 break;
20977 default:
20978 llvm_unreachable("unexpected builtin ID");
20979 }
20980
20982 return Builder.CreateCall(Callee, Vec);
20983 }
20984 case WebAssembly::BI__builtin_wasm_bitselect: {
20985 Value *V1 = EmitScalarExpr(E->getArg(0));
20986 Value *V2 = EmitScalarExpr(E->getArg(1));
20987 Value *C = EmitScalarExpr(E->getArg(2));
20988 Function *Callee =
20989 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
20990 return Builder.CreateCall(Callee, {V1, V2, C});
20991 }
20992 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
20993 Value *LHS = EmitScalarExpr(E->getArg(0));
20994 Value *RHS = EmitScalarExpr(E->getArg(1));
20995 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
20996 return Builder.CreateCall(Callee, {LHS, RHS});
20997 }
20998 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
20999 Value *Vec = EmitScalarExpr(E->getArg(0));
21000 Function *Callee =
21001 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
21002 return Builder.CreateCall(Callee, {Vec});
21003 }
21004 case WebAssembly::BI__builtin_wasm_any_true_v128:
21005 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21006 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21007 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21008 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
21009 unsigned IntNo;
21010 switch (BuiltinID) {
21011 case WebAssembly::BI__builtin_wasm_any_true_v128:
21012 IntNo = Intrinsic::wasm_anytrue;
21013 break;
21014 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21015 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21016 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21017 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
21018 IntNo = Intrinsic::wasm_alltrue;
21019 break;
21020 default:
21021 llvm_unreachable("unexpected builtin ID");
21022 }
21023 Value *Vec = EmitScalarExpr(E->getArg(0));
21024 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
21025 return Builder.CreateCall(Callee, {Vec});
21026 }
21027 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
21028 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
21029 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
21030 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
21031 Value *Vec = EmitScalarExpr(E->getArg(0));
21032 Function *Callee =
21033 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
21034 return Builder.CreateCall(Callee, {Vec});
21035 }
21036 case WebAssembly::BI__builtin_wasm_abs_f32x4:
21037 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
21038 Value *Vec = EmitScalarExpr(E->getArg(0));
21039 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
21040 return Builder.CreateCall(Callee, {Vec});
21041 }
21042 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
21043 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
21044 Value *Vec = EmitScalarExpr(E->getArg(0));
21045 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
21046 return Builder.CreateCall(Callee, {Vec});
21047 }
21048 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21049 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21050 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21051 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
21052 Value *Low = EmitScalarExpr(E->getArg(0));
21053 Value *High = EmitScalarExpr(E->getArg(1));
21054 unsigned IntNo;
21055 switch (BuiltinID) {
21056 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21057 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21058 IntNo = Intrinsic::wasm_narrow_signed;
21059 break;
21060 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21061 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
21062 IntNo = Intrinsic::wasm_narrow_unsigned;
21063 break;
21064 default:
21065 llvm_unreachable("unexpected builtin ID");
21066 }
21067 Function *Callee =
21068 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
21069 return Builder.CreateCall(Callee, {Low, High});
21070 }
21071 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21072 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
21073 Value *Vec = EmitScalarExpr(E->getArg(0));
21074 unsigned IntNo;
21075 switch (BuiltinID) {
21076 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21077 IntNo = Intrinsic::fptosi_sat;
21078 break;
21079 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
21080 IntNo = Intrinsic::fptoui_sat;
21081 break;
21082 default:
21083 llvm_unreachable("unexpected builtin ID");
21084 }
21085 llvm::Type *SrcT = Vec->getType();
21086 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
21087 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
21088 Value *Trunc = Builder.CreateCall(Callee, Vec);
21089 Value *Splat = Constant::getNullValue(TruncT);
21090 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
21091 }
21092 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
21093 Value *Ops[18];
21094 size_t OpIdx = 0;
21095 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
21096 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
21097 while (OpIdx < 18) {
21098 std::optional<llvm::APSInt> LaneConst =
21100 assert(LaneConst && "Constant arg isn't actually constant?");
21101 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
21102 }
21103 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
21104 return Builder.CreateCall(Callee, Ops);
21105 }
21106 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21107 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21108 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21109 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
21110 Value *A = EmitScalarExpr(E->getArg(0));
21111 Value *B = EmitScalarExpr(E->getArg(1));
21112 Value *C = EmitScalarExpr(E->getArg(2));
21113 unsigned IntNo;
21114 switch (BuiltinID) {
21115 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21116 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21117 IntNo = Intrinsic::wasm_relaxed_madd;
21118 break;
21119 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21120 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
21121 IntNo = Intrinsic::wasm_relaxed_nmadd;
21122 break;
21123 default:
21124 llvm_unreachable("unexpected builtin ID");
21125 }
21126 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
21127 return Builder.CreateCall(Callee, {A, B, C});
21128 }
21129 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
21130 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
21131 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
21132 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
21133 Value *A = EmitScalarExpr(E->getArg(0));
21134 Value *B = EmitScalarExpr(E->getArg(1));
21135 Value *C = EmitScalarExpr(E->getArg(2));
21136 Function *Callee =
21137 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
21138 return Builder.CreateCall(Callee, {A, B, C});
21139 }
21140 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21141 Value *Src = EmitScalarExpr(E->getArg(0));
21142 Value *Indices = EmitScalarExpr(E->getArg(1));
21143 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21144 return Builder.CreateCall(Callee, {Src, Indices});
21145 }
21146 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21147 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21148 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21149 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21150 Value *LHS = EmitScalarExpr(E->getArg(0));
21151 Value *RHS = EmitScalarExpr(E->getArg(1));
21152 unsigned IntNo;
21153 switch (BuiltinID) {
21154 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21155 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21156 IntNo = Intrinsic::wasm_relaxed_min;
21157 break;
21158 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21159 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21160 IntNo = Intrinsic::wasm_relaxed_max;
21161 break;
21162 default:
21163 llvm_unreachable("unexpected builtin ID");
21164 }
21165 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
21166 return Builder.CreateCall(Callee, {LHS, RHS});
21167 }
21168 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21169 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21170 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21171 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21172 Value *Vec = EmitScalarExpr(E->getArg(0));
21173 unsigned IntNo;
21174 switch (BuiltinID) {
21175 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21176 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21177 break;
21178 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21179 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21180 break;
21181 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21182 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21183 break;
21184 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21185 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21186 break;
21187 default:
21188 llvm_unreachable("unexpected builtin ID");
21189 }
21190 Function *Callee = CGM.getIntrinsic(IntNo);
21191 return Builder.CreateCall(Callee, {Vec});
21192 }
21193 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21194 Value *LHS = EmitScalarExpr(E->getArg(0));
21195 Value *RHS = EmitScalarExpr(E->getArg(1));
21196 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21197 return Builder.CreateCall(Callee, {LHS, RHS});
21198 }
21199 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21200 Value *LHS = EmitScalarExpr(E->getArg(0));
21201 Value *RHS = EmitScalarExpr(E->getArg(1));
21202 Function *Callee =
21203 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21204 return Builder.CreateCall(Callee, {LHS, RHS});
21205 }
21206 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21207 Value *LHS = EmitScalarExpr(E->getArg(0));
21208 Value *RHS = EmitScalarExpr(E->getArg(1));
21209 Value *Acc = EmitScalarExpr(E->getArg(2));
21210 Function *Callee =
21211 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21212 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21213 }
21214 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21215 Value *LHS = EmitScalarExpr(E->getArg(0));
21216 Value *RHS = EmitScalarExpr(E->getArg(1));
21217 Value *Acc = EmitScalarExpr(E->getArg(2));
21218 Function *Callee =
21219 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21220 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21221 }
21222 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
21223 Value *Addr = EmitScalarExpr(E->getArg(0));
21224 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
21225 return Builder.CreateCall(Callee, {Addr});
21226 }
21227 case WebAssembly::BI__builtin_wasm_storef16_f32: {
21228 Value *Val = EmitScalarExpr(E->getArg(0));
21229 Value *Addr = EmitScalarExpr(E->getArg(1));
21230 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
21231 return Builder.CreateCall(Callee, {Val, Addr});
21232 }
21233 case WebAssembly::BI__builtin_wasm_table_get: {
21234 assert(E->getArg(0)->getType()->isArrayType());
21235 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21236 Value *Index = EmitScalarExpr(E->getArg(1));
21239 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21240 else if (E->getType().isWebAssemblyFuncrefType())
21241 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21242 else
21243 llvm_unreachable(
21244 "Unexpected reference type for __builtin_wasm_table_get");
21245 return Builder.CreateCall(Callee, {Table, Index});
21246 }
21247 case WebAssembly::BI__builtin_wasm_table_set: {
21248 assert(E->getArg(0)->getType()->isArrayType());
21249 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21250 Value *Index = EmitScalarExpr(E->getArg(1));
21251 Value *Val = EmitScalarExpr(E->getArg(2));
21254 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21255 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21256 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21257 else
21258 llvm_unreachable(
21259 "Unexpected reference type for __builtin_wasm_table_set");
21260 return Builder.CreateCall(Callee, {Table, Index, Val});
21261 }
21262 case WebAssembly::BI__builtin_wasm_table_size: {
21263 assert(E->getArg(0)->getType()->isArrayType());
21265 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21266 return Builder.CreateCall(Callee, Value);
21267 }
21268 case WebAssembly::BI__builtin_wasm_table_grow: {
21269 assert(E->getArg(0)->getType()->isArrayType());
21270 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21271 Value *Val = EmitScalarExpr(E->getArg(1));
21272 Value *NElems = EmitScalarExpr(E->getArg(2));
21273
21276 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21277 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21278 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21279 else
21280 llvm_unreachable(
21281 "Unexpected reference type for __builtin_wasm_table_grow");
21282
21283 return Builder.CreateCall(Callee, {Table, Val, NElems});
21284 }
21285 case WebAssembly::BI__builtin_wasm_table_fill: {
21286 assert(E->getArg(0)->getType()->isArrayType());
21287 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21288 Value *Index = EmitScalarExpr(E->getArg(1));
21289 Value *Val = EmitScalarExpr(E->getArg(2));
21290 Value *NElems = EmitScalarExpr(E->getArg(3));
21291
21294 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21295 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21296 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21297 else
21298 llvm_unreachable(
21299 "Unexpected reference type for __builtin_wasm_table_fill");
21300
21301 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
21302 }
21303 case WebAssembly::BI__builtin_wasm_table_copy: {
21304 assert(E->getArg(0)->getType()->isArrayType());
21305 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21306 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
21307 Value *DstIdx = EmitScalarExpr(E->getArg(2));
21308 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
21309 Value *NElems = EmitScalarExpr(E->getArg(4));
21310
21311 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21312
21313 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
21314 }
21315 default:
21316 return nullptr;
21317 }
21318}
21319
21320static std::pair<Intrinsic::ID, unsigned>
21322 struct Info {
21323 unsigned BuiltinID;
21324 Intrinsic::ID IntrinsicID;
21325 unsigned VecLen;
21326 };
21327 static Info Infos[] = {
21328#define CUSTOM_BUILTIN_MAPPING(x,s) \
21329 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21330 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21331 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21332 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21333 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21334 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21335 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21336 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21337 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21338 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21339 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21340 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21341 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21342 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21343 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21344 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21345 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21346 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21347 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21348 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21349 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21350 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21351 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21352 // Legacy builtins that take a vector in place of a vector predicate.
21353 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21354 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21355 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21356 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21357 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21358 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21359 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21360 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21361#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21362#undef CUSTOM_BUILTIN_MAPPING
21363 };
21364
21365 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21366 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
21367 (void)SortOnce;
21368
21369 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
21370 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21371 return {Intrinsic::not_intrinsic, 0};
21372
21373 return {F->IntrinsicID, F->VecLen};
21374}
21375
21377 const CallExpr *E) {
21378 Intrinsic::ID ID;
21379 unsigned VecLen;
21380 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21381
21382 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21383 // The base pointer is passed by address, so it needs to be loaded.
21386 llvm::Value *Base = Builder.CreateLoad(BP);
21387 // The treatment of both loads and stores is the same: the arguments for
21388 // the builtin are the same as the arguments for the intrinsic.
21389 // Load:
21390 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21391 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21392 // Store:
21393 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21394 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21396 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21397 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21398
21399 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
21400 // The load intrinsics generate two results (Value, NewBase), stores
21401 // generate one (NewBase). The new base address needs to be stored.
21402 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
21403 : Result;
21404 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
21406 llvm::Value *RetVal =
21407 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
21408 if (IsLoad)
21409 RetVal = Builder.CreateExtractValue(Result, 0);
21410 return RetVal;
21411 };
21412
21413 // Handle the conversion of bit-reverse load intrinsics to bit code.
21414 // The intrinsic call after this function only reads from memory and the
21415 // write to memory is dealt by the store instruction.
21416 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21417 // The intrinsic generates one result, which is the new value for the base
21418 // pointer. It needs to be returned. The result of the load instruction is
21419 // passed to intrinsic by address, so the value needs to be stored.
21420 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
21421
21422 // Expressions like &(*pt++) will be incremented per evaluation.
21423 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21424 // per call.
21425 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
21426 DestAddr = DestAddr.withElementType(Int8Ty);
21427 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
21428
21429 // Operands are Base, Dest, Modifier.
21430 // The intrinsic format in LLVM IR is defined as
21431 // { ValueType, i8* } (i8*, i32).
21432 llvm::Value *Result = Builder.CreateCall(
21433 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
21434
21435 // The value needs to be stored as the variable is passed by reference.
21436 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
21437
21438 // The store needs to be truncated to fit the destination type.
21439 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21440 // to be handled with stores of respective destination type.
21441 DestVal = Builder.CreateTrunc(DestVal, DestTy);
21442
21443 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
21444 // The updated value of the base pointer is returned.
21445 return Builder.CreateExtractValue(Result, 1);
21446 };
21447
21448 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21449 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21450 : Intrinsic::hexagon_V6_vandvrt;
21451 return Builder.CreateCall(CGM.getIntrinsic(ID),
21452 {Vec, Builder.getInt32(-1)});
21453 };
21454 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21455 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21456 : Intrinsic::hexagon_V6_vandqrt;
21457 return Builder.CreateCall(CGM.getIntrinsic(ID),
21458 {Pred, Builder.getInt32(-1)});
21459 };
21460
21461 switch (BuiltinID) {
21462 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21463 // and the corresponding C/C++ builtins use loads/stores to update
21464 // the predicate.
21465 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21466 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21467 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21468 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21469 // Get the type from the 0-th argument.
21470 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21471 Address PredAddr =
21473 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
21474 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21475 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
21476
21477 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21478 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21479 PredAddr.getAlignment());
21480 return Builder.CreateExtractValue(Result, 0);
21481 }
21482 // These are identical to the builtins above, except they don't consume
21483 // input carry, only generate carry-out. Since they still produce two
21484 // outputs, generate the store of the predicate, but no load.
21485 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21486 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21487 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21488 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21489 // Get the type from the 0-th argument.
21490 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21491 Address PredAddr =
21493 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21494 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21495
21496 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21497 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21498 PredAddr.getAlignment());
21499 return Builder.CreateExtractValue(Result, 0);
21500 }
21501
21502 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21503 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21504 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21505 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21506 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21507 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21508 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21509 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21511 const Expr *PredOp = E->getArg(0);
21512 // There will be an implicit cast to a boolean vector. Strip it.
21513 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
21514 if (Cast->getCastKind() == CK_BitCast)
21515 PredOp = Cast->getSubExpr();
21516 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
21517 }
21518 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
21519 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21520 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
21521 }
21522
21523 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
21524 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
21525 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
21526 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
21527 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
21528 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
21529 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
21530 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
21531 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
21532 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
21533 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
21534 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
21535 return MakeCircOp(ID, /*IsLoad=*/true);
21536 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
21537 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
21538 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
21539 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
21540 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
21541 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
21542 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
21543 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
21544 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
21545 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
21546 return MakeCircOp(ID, /*IsLoad=*/false);
21547 case Hexagon::BI__builtin_brev_ldub:
21548 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
21549 case Hexagon::BI__builtin_brev_ldb:
21550 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
21551 case Hexagon::BI__builtin_brev_lduh:
21552 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
21553 case Hexagon::BI__builtin_brev_ldh:
21554 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
21555 case Hexagon::BI__builtin_brev_ldw:
21556 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
21557 case Hexagon::BI__builtin_brev_ldd:
21558 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
21559 } // switch
21560
21561 return nullptr;
21562}
21563
21565 const CallExpr *E,
21566 ReturnValueSlot ReturnValue) {
21568 llvm::Type *ResultType = ConvertType(E->getType());
21569
21570 // Find out if any arguments are required to be integer constant expressions.
21571 unsigned ICEArguments = 0;
21573 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
21574 if (Error == ASTContext::GE_Missing_type) {
21575 // Vector intrinsics don't have a type string.
21576 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
21577 BuiltinID <= clang::RISCV::LastRVVBuiltin);
21578 ICEArguments = 0;
21579 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
21580 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
21581 ICEArguments = 1 << 1;
21582 } else {
21583 assert(Error == ASTContext::GE_None && "Unexpected error");
21584 }
21585
21586 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
21587 ICEArguments |= (1 << 1);
21588 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
21589 ICEArguments |= (1 << 2);
21590
21591 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
21592 // Handle aggregate argument, namely RVV tuple types in segment load/store
21595 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress(*this));
21596 Ops.push_back(AggValue);
21597 continue;
21598 }
21599 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
21600 }
21601
21602 Intrinsic::ID ID = Intrinsic::not_intrinsic;
21603 unsigned NF = 1;
21604 // The 0th bit simulates the `vta` of RVV
21605 // The 1st bit simulates the `vma` of RVV
21606 constexpr unsigned RVV_VTA = 0x1;
21607 constexpr unsigned RVV_VMA = 0x2;
21608 int PolicyAttrs = 0;
21609 bool IsMasked = false;
21610
21611 // Required for overloaded intrinsics.
21613 switch (BuiltinID) {
21614 default: llvm_unreachable("unexpected builtin ID");
21615 case RISCV::BI__builtin_riscv_orc_b_32:
21616 case RISCV::BI__builtin_riscv_orc_b_64:
21617 case RISCV::BI__builtin_riscv_clz_32:
21618 case RISCV::BI__builtin_riscv_clz_64:
21619 case RISCV::BI__builtin_riscv_ctz_32:
21620 case RISCV::BI__builtin_riscv_ctz_64:
21621 case RISCV::BI__builtin_riscv_clmul_32:
21622 case RISCV::BI__builtin_riscv_clmul_64:
21623 case RISCV::BI__builtin_riscv_clmulh_32:
21624 case RISCV::BI__builtin_riscv_clmulh_64:
21625 case RISCV::BI__builtin_riscv_clmulr_32:
21626 case RISCV::BI__builtin_riscv_clmulr_64:
21627 case RISCV::BI__builtin_riscv_xperm4_32:
21628 case RISCV::BI__builtin_riscv_xperm4_64:
21629 case RISCV::BI__builtin_riscv_xperm8_32:
21630 case RISCV::BI__builtin_riscv_xperm8_64:
21631 case RISCV::BI__builtin_riscv_brev8_32:
21632 case RISCV::BI__builtin_riscv_brev8_64:
21633 case RISCV::BI__builtin_riscv_zip_32:
21634 case RISCV::BI__builtin_riscv_unzip_32: {
21635 switch (BuiltinID) {
21636 default: llvm_unreachable("unexpected builtin ID");
21637 // Zbb
21638 case RISCV::BI__builtin_riscv_orc_b_32:
21639 case RISCV::BI__builtin_riscv_orc_b_64:
21640 ID = Intrinsic::riscv_orc_b;
21641 break;
21642 case RISCV::BI__builtin_riscv_clz_32:
21643 case RISCV::BI__builtin_riscv_clz_64: {
21644 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
21645 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21646 if (Result->getType() != ResultType)
21647 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21648 "cast");
21649 return Result;
21650 }
21651 case RISCV::BI__builtin_riscv_ctz_32:
21652 case RISCV::BI__builtin_riscv_ctz_64: {
21653 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
21654 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
21655 if (Result->getType() != ResultType)
21656 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
21657 "cast");
21658 return Result;
21659 }
21660
21661 // Zbc
21662 case RISCV::BI__builtin_riscv_clmul_32:
21663 case RISCV::BI__builtin_riscv_clmul_64:
21664 ID = Intrinsic::riscv_clmul;
21665 break;
21666 case RISCV::BI__builtin_riscv_clmulh_32:
21667 case RISCV::BI__builtin_riscv_clmulh_64:
21668 ID = Intrinsic::riscv_clmulh;
21669 break;
21670 case RISCV::BI__builtin_riscv_clmulr_32:
21671 case RISCV::BI__builtin_riscv_clmulr_64:
21672 ID = Intrinsic::riscv_clmulr;
21673 break;
21674
21675 // Zbkx
21676 case RISCV::BI__builtin_riscv_xperm8_32:
21677 case RISCV::BI__builtin_riscv_xperm8_64:
21678 ID = Intrinsic::riscv_xperm8;
21679 break;
21680 case RISCV::BI__builtin_riscv_xperm4_32:
21681 case RISCV::BI__builtin_riscv_xperm4_64:
21682 ID = Intrinsic::riscv_xperm4;
21683 break;
21684
21685 // Zbkb
21686 case RISCV::BI__builtin_riscv_brev8_32:
21687 case RISCV::BI__builtin_riscv_brev8_64:
21688 ID = Intrinsic::riscv_brev8;
21689 break;
21690 case RISCV::BI__builtin_riscv_zip_32:
21691 ID = Intrinsic::riscv_zip;
21692 break;
21693 case RISCV::BI__builtin_riscv_unzip_32:
21694 ID = Intrinsic::riscv_unzip;
21695 break;
21696 }
21697
21698 IntrinsicTypes = {ResultType};
21699 break;
21700 }
21701
21702 // Zk builtins
21703
21704 // Zknh
21705 case RISCV::BI__builtin_riscv_sha256sig0:
21706 ID = Intrinsic::riscv_sha256sig0;
21707 break;
21708 case RISCV::BI__builtin_riscv_sha256sig1:
21709 ID = Intrinsic::riscv_sha256sig1;
21710 break;
21711 case RISCV::BI__builtin_riscv_sha256sum0:
21712 ID = Intrinsic::riscv_sha256sum0;
21713 break;
21714 case RISCV::BI__builtin_riscv_sha256sum1:
21715 ID = Intrinsic::riscv_sha256sum1;
21716 break;
21717
21718 // Zksed
21719 case RISCV::BI__builtin_riscv_sm4ks:
21720 ID = Intrinsic::riscv_sm4ks;
21721 break;
21722 case RISCV::BI__builtin_riscv_sm4ed:
21723 ID = Intrinsic::riscv_sm4ed;
21724 break;
21725
21726 // Zksh
21727 case RISCV::BI__builtin_riscv_sm3p0:
21728 ID = Intrinsic::riscv_sm3p0;
21729 break;
21730 case RISCV::BI__builtin_riscv_sm3p1:
21731 ID = Intrinsic::riscv_sm3p1;
21732 break;
21733
21734 // Zihintntl
21735 case RISCV::BI__builtin_riscv_ntl_load: {
21736 llvm::Type *ResTy = ConvertType(E->getType());
21737 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21738 if (Ops.size() == 2)
21739 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
21740
21741 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21743 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21744 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21745 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21746
21747 int Width;
21748 if(ResTy->isScalableTy()) {
21749 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
21750 llvm::Type *ScalarTy = ResTy->getScalarType();
21751 Width = ScalarTy->getPrimitiveSizeInBits() *
21752 SVTy->getElementCount().getKnownMinValue();
21753 } else
21754 Width = ResTy->getPrimitiveSizeInBits();
21755 LoadInst *Load = Builder.CreateLoad(
21756 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
21757
21758 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21759 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21760 RISCVDomainNode);
21761
21762 return Load;
21763 }
21764 case RISCV::BI__builtin_riscv_ntl_store: {
21765 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
21766 if (Ops.size() == 3)
21767 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
21768
21769 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
21771 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
21772 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
21773 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
21774
21775 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
21776 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
21777 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
21778 RISCVDomainNode);
21779
21780 return Store;
21781 }
21782
21783 // Vector builtins are handled from here.
21784#include "clang/Basic/riscv_vector_builtin_cg.inc"
21785 // SiFive Vector builtins are handled from here.
21786#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
21787 }
21788
21789 assert(ID != Intrinsic::not_intrinsic);
21790
21791 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
21792 return Builder.CreateCall(F, Ops, "");
21793}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3285
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8353
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9189
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:211
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1193
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6379
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:390
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2059
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2025
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6248
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2516
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9159
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:803
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9152
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7394
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9379
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7406
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7376
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8421
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2394
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:443
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:736
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:493
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6375
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7407
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1391
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7411
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:863
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:630
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2422
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:467
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:771
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9148
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7408
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9226
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6137
constexpr unsigned SVEBitsPerBlock
Definition: CGBuiltin.cpp:9663
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1776
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7218
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6372
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:595
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:510
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:263
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9253
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1622
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1476
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1254
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6384
@ UnsignedAlts
Definition: CGBuiltin.cpp:6342
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6347
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6351
@ Use64BitVectors
Definition: CGBuiltin.cpp:6344
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6339
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6349
@ InventFloatType
Definition: CGBuiltin.cpp:6341
@ AddRetType
Definition: CGBuiltin.cpp:6334
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6336
@ VectorizeRetType
Definition: CGBuiltin.cpp:6338
@ VectorRet
Definition: CGBuiltin.cpp:6348
@ Add1ArgType
Definition: CGBuiltin.cpp:6335
@ Use128BitVectors
Definition: CGBuiltin.cpp:6345
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:676
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:667
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2252
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:811
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1330
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9215
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:547
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:757
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2306
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: CGBuiltin.cpp:9665
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2445
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6207
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:254
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9241
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:182
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8332
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:72
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8324
@ VolatileRead
Definition: CGBuiltin.cpp:8326
@ NormalRead
Definition: CGBuiltin.cpp:8325
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:348
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:193
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2294
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:308
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:527
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:171
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9181
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7403
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6703
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:235
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:721
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7469
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:618
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:606
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:648
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:479
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2527
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1203
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2260
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:585
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:566
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:705
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:246
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1239
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8251
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2019
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:454
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7405
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:6978
CodeGenFunction::ComplexPairTy ComplexPairTy
const Environment & Env
Definition: HTMLLogger.cpp:148
unsigned Iter
Definition: HTMLLogger.cpp:154
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:143
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
SourceRange Range
Definition: SemaObjC.cpp:754
SourceLocation Loc
Definition: SemaObjC.cpp:755
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:423
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:182
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1118
IdentifierTable & Idents
Definition: ASTContext.h:644
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:646
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2064
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2618
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2341
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1091
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:757
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2243
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2246
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:193
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3518
QualType getElementType() const
Definition: Type.h:3530
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:247
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:251
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:122
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2820
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:3011
bool hasStoredFPFeatures() const
Definition: Expr.h:2982
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Expr.cpp:1638
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition: Expr.h:2990
Expr * getCallee()
Definition: Expr.h:2970
FPOptionsOverride getFPFeatures() const
Definition: Expr.h:3102
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2998
arg_range arguments()
Definition: Expr.h:3059
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1590
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:111
static Address invalid()
Definition: Address.h:153
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:220
CharUnits getAlignment() const
Definition: Address.h:166
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:184
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:241
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:234
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:176
An aggregate value slot.
Definition: CGValue.h:512
Address getAddress() const
Definition: CGValue.h:652
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:864
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:881
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:388
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:203
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:380
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:404
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:397
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:436
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:364
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:345
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:62
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:129
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:258
void add(RValue rvalue, QualType type)
Definition: CGCall.h:282
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * FormSVEBuiltinResult(llvm::Value *Call)
FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider vector.
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
llvm::Value * EmitCountedByFieldExpr(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E)
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
const FieldDecl * FindCountedByField(const FieldDecl *FD)
Find the FieldDecl specified in a FAM's "counted_by" attribute.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:97
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1632
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:668
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:352
LValue - This represents an lvalue references.
Definition: CGValue.h:181
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:427
Address getAddress(CodeGenFunction &CGF) const
Definition: CGValue.h:370
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:365
void setNontemporal(bool Value)
Definition: CGValue.h:322
llvm::Value * getPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:361
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:41
static RValue getIgnored()
Definition: CGValue.h:92
static RValue get(llvm::Value *V)
Definition: CGValue.h:97
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:124
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:107
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:70
An abstract representation of an aligned address.
Definition: Address.h:41
llvm::Value * getPointer() const
Definition: Address.h:65
static RawAddress invalid()
Definition: Address.h:60
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:356
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:151
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:141
const T & getABIInfo() const
Definition: TargetInfo.h:56
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:123
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:160
Complex values, per C99 6.2.5p11.
Definition: Type.h:3086
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4167
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:1964
T * getAttr() const
Definition: DeclBase.h:579
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:599
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:227
DeclContext * getDeclContext()
Definition: DeclBase.h:454
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:413
bool hasAttr() const
Definition: DeclBase.h:583
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3064
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3059
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3055
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:825
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3556
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3039
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3923
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:915
Represents a member of a struct/union/class.
Definition: Decl.h:3057
Represents a function declaration or definition.
Definition: Decl.h:1971
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2706
Represents a prototype with parameter type info, e.g.
Definition: Type.h:4656
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5379
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:292
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7208
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3139
QualType getPointeeType() const
Definition: Type.h:3149
A (possibly-)qualified type.
Definition: Type.h:940
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:7443
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2849
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7485
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2845
The collection of all-type qualifiers we support.
Definition: Type.h:318
Represents a struct/union/class.
Definition: Decl.h:4168
field_range fields() const
Definition: Decl.h:4374
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
bool isUnion() const
Definition: Decl.h:3790
Exposes information about the current target.
Definition: TargetInfo.h:218
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:312
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:687
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1256
bool isLittleEndian() const
Definition: TargetInfo.h:1651
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:851
bool isBigEndian() const
Definition: TargetInfo.h:1650
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1657
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:723
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1813
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1871
bool isBlockPointerType() const
Definition: Type.h:7620
bool isVoidType() const
Definition: Type.h:7905
bool isBooleanType() const
Definition: Type.h:8033
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2135
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:667
bool isArrayType() const
Definition: Type.h:7678
bool isCountAttributedType() const
Definition: Type.cpp:684
bool isPointerType() const
Definition: Type.h:7612
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:7945
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8193
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:695
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8020
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2225
bool isBitIntType() const
Definition: Type.h:7840
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2175
bool isObjCObjectPointerType() const
Definition: Type.h:7744
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2247
bool isFloatingType() const
Definition: Type.cpp:2238
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2185
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8126
bool isRecordType() const
Definition: Type.h:7706
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2457
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2514
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1875
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:706
QualType getType() const
Definition: Decl.h:717
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:3969
unsigned getNumElements() const
Definition: Type.h:3984
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: Integral.h:29
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:923
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:217
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1873
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:348
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:489
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1393
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1713
The JSON file list parser is used to communicate input to InstallAPI.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:151
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
unsigned long uint64_t
long int64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
uint64_t Width
Definition: ASTContext.h:153
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742