clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CGValue.h"
21#include "CodeGenFunction.h"
22#include "CodeGenModule.h"
23#include "ConstantEmitter.h"
24#include "PatternInit.h"
25#include "TargetInfo.h"
27#include "clang/AST/Attr.h"
28#include "clang/AST/Decl.h"
29#include "clang/AST/Expr.h"
30#include "clang/AST/OSLog.h"
32#include "clang/AST/Type.h"
38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
71#include <optional>
72#include <utility>
73
74using namespace clang;
75using namespace CodeGen;
76using namespace llvm;
77
78static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
79 Align AlignmentInBytes) {
80 ConstantInt *Byte;
81 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
82 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
83 // Nothing to initialize.
84 return;
85 case LangOptions::TrivialAutoVarInitKind::Zero:
86 Byte = CGF.Builder.getInt8(0x00);
87 break;
88 case LangOptions::TrivialAutoVarInitKind::Pattern: {
89 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
90 Byte = llvm::dyn_cast<llvm::ConstantInt>(
91 initializationPatternFor(CGF.CGM, Int8));
92 break;
93 }
94 }
95 if (CGF.CGM.stopAutoInit())
96 return;
97 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
98 I->addAnnotationMetadata("auto-init");
99}
100
102 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
103
104 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
105 Value *CMP;
106 Value *LastInstr;
107
108 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
109 FZeroConst = ConstantVector::getSplat(
110 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
111 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
112 CMP = CGF->Builder.CreateIntrinsic(
113 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
114 {FCompInst}, nullptr);
115 } else
116 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
117
118 if (CGF->CGM.getTarget().getTriple().isDXIL())
119 LastInstr = CGF->Builder.CreateIntrinsic(
120 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
121 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
122 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
123 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
124
125 CGF->Builder.CreateCondBr(CMP, LT0, End);
126
127 CGF->Builder.SetInsertPoint(LT0);
128
129 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
130 nullptr);
131
132 LastInstr = CGF->Builder.CreateBr(End);
133
134 CGF->Builder.SetInsertPoint(End);
135 } else {
136 llvm_unreachable("Backend Codegen not supported.");
137 }
138
139 return LastInstr;
140}
141
143 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
144 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
145 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
146
147 CallArgList Args;
148 LValue Op1TmpLValue =
149 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
150 LValue Op2TmpLValue =
151 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
152
154 Args.reverseWritebacks();
155
156 Value *LowBits = nullptr;
157 Value *HighBits = nullptr;
158
159 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
160
161 llvm::Type *RetElementTy = CGF->Int32Ty;
162 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
163 RetElementTy = llvm::VectorType::get(
164 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
165 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
166
167 CallInst *CI = CGF->Builder.CreateIntrinsic(
168 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
169
170 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
171 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
172
173 } else {
174 // For Non DXIL targets we generate the instructions.
175
176 if (!Op0->getType()->isVectorTy()) {
177 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
178 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
179
180 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
181 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
182 } else {
183 int NumElements = 1;
184 if (const auto *VecTy =
185 E->getArg(0)->getType()->getAs<clang::VectorType>())
186 NumElements = VecTy->getNumElements();
187
188 FixedVectorType *Uint32VecTy =
189 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
190 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
191 if (NumElements == 1) {
192 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
193 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
194 } else {
195 SmallVector<int> EvenMask, OddMask;
196 for (int I = 0, E = NumElements; I != E; ++I) {
197 EvenMask.push_back(I * 2);
198 OddMask.push_back(I * 2 + 1);
199 }
200 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
201 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
202 }
203 }
204 }
205 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
206 auto *LastInst =
207 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
208 CGF->EmitWritebacks(Args);
209 return LastInst;
210}
211
213 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
214 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
215 "asdouble operands types mismatch");
216 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
217 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
218
219 llvm::Type *ResultType = CGF.DoubleTy;
220 int N = 1;
221 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
222 N = VTy->getNumElements();
223 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
224 }
225
226 if (CGF.CGM.getTarget().getTriple().isDXIL())
227 return CGF.Builder.CreateIntrinsic(
228 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
229 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
230
231 if (!E->getArg(0)->getType()->isVectorType()) {
232 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
233 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
234 }
235
237 for (int i = 0; i < N; i++) {
238 Mask.push_back(i);
239 Mask.push_back(i + N);
240 }
241
242 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
243
244 return CGF.Builder.CreateBitCast(BitVec, ResultType);
245}
246
247/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
248/// return it as an i8 pointer.
250 LLVMContext &Context = CGF.CGM.getLLVMContext();
251 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
252 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
253 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
254 llvm::Function *F =
255 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
256 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
257 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
258}
259
260/// getBuiltinLibFunction - Given a builtin id for a function like
261/// "__builtin_fabsf", return a Function* for "fabsf".
263 unsigned BuiltinID) {
264 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
265
266 // Get the name, skip over the __builtin_ prefix (if necessary).
267 StringRef Name;
268 GlobalDecl D(FD);
269
270 // TODO: This list should be expanded or refactored after all GCC-compatible
271 // std libcall builtins are implemented.
272 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
273 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
274 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
275 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
276 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
277 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
278 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
279 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
280 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
281 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
282 {Builtin::BI__builtin_printf, "__printfieee128"},
283 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
284 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
285 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
286 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
287 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
288 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
289 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
290 {Builtin::BI__builtin_scanf, "__scanfieee128"},
291 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
292 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
293 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
294 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
295 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
296 };
297
298 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
299 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
300 // if it is 64-bit 'long double' mode.
301 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
302 {Builtin::BI__builtin_frexpl, "frexp"},
303 {Builtin::BI__builtin_ldexpl, "ldexp"},
304 {Builtin::BI__builtin_modfl, "modf"},
305 };
306
307 // If the builtin has been declared explicitly with an assembler label,
308 // use the mangled name. This differs from the plain label on platforms
309 // that prefix labels.
310 if (FD->hasAttr<AsmLabelAttr>())
311 Name = getMangledName(D);
312 else {
313 // TODO: This mutation should also be applied to other targets other than
314 // PPC, after backend supports IEEE 128-bit style libcalls.
315 if (getTriple().isPPC64() &&
316 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
317 F128Builtins.contains(BuiltinID))
318 Name = F128Builtins[BuiltinID];
319 else if (getTriple().isOSAIX() &&
320 &getTarget().getLongDoubleFormat() ==
321 &llvm::APFloat::IEEEdouble() &&
322 AIXLongDouble64Builtins.contains(BuiltinID))
323 Name = AIXLongDouble64Builtins[BuiltinID];
324 else
325 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
326 }
327
328 llvm::FunctionType *Ty =
329 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
330
331 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
332}
333
334/// Emit the conversions required to turn the given value into an
335/// integer of the given size.
336static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
337 QualType T, llvm::IntegerType *IntType) {
338 V = CGF.EmitToMemory(V, T);
339
340 if (V->getType()->isPointerTy())
341 return CGF.Builder.CreatePtrToInt(V, IntType);
342
343 assert(V->getType() == IntType);
344 return V;
345}
346
347static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
348 QualType T, llvm::Type *ResultType) {
349 V = CGF.EmitFromMemory(V, T);
350
351 if (ResultType->isPointerTy())
352 return CGF.Builder.CreateIntToPtr(V, ResultType);
353
354 assert(V->getType() == ResultType);
355 return V;
356}
357
359 ASTContext &Ctx = CGF.getContext();
360 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
361 unsigned Bytes = Ptr.getElementType()->isPointerTy()
363 : Ptr.getElementType()->getScalarSizeInBits() / 8;
364 unsigned Align = Ptr.getAlignment().getQuantity();
365 if (Align % Bytes != 0) {
366 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
367 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
368 // Force address to be at least naturally-aligned.
369 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
370 }
371 return Ptr;
372}
373
374/// Utility to insert an atomic instruction based on Intrinsic::ID
375/// and the expression node.
377 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
378 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
379
380 QualType T = E->getType();
381 assert(E->getArg(0)->getType()->isPointerType());
383 E->getArg(0)->getType()->getPointeeType()));
384 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
385
386 Address DestAddr = CheckAtomicAlignment(CGF, E);
387
388 llvm::IntegerType *IntType = llvm::IntegerType::get(
389 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
390
391 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
392 llvm::Type *ValueType = Val->getType();
393 Val = EmitToInt(CGF, Val, T, IntType);
394
395 llvm::Value *Result =
396 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
397 return EmitFromInt(CGF, Result, T, ValueType);
398}
399
401 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
402 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
403
404 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
405 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
406 LV.setNontemporal(true);
407 CGF.EmitStoreOfScalar(Val, LV, false);
408 return nullptr;
409}
410
412 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
413
414 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
415 LV.setNontemporal(true);
416 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
417}
418
420 llvm::AtomicRMWInst::BinOp Kind,
421 const CallExpr *E) {
422 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
423}
424
425/// Utility to insert an atomic instruction based Intrinsic::ID and
426/// the expression node, where the return value is the result of the
427/// operation.
429 llvm::AtomicRMWInst::BinOp Kind,
430 const CallExpr *E,
431 Instruction::BinaryOps Op,
432 bool Invert = false) {
433 QualType T = E->getType();
434 assert(E->getArg(0)->getType()->isPointerType());
436 E->getArg(0)->getType()->getPointeeType()));
437 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
438
439 Address DestAddr = CheckAtomicAlignment(CGF, E);
440
441 llvm::IntegerType *IntType = llvm::IntegerType::get(
442 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
443
444 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
445 llvm::Type *ValueType = Val->getType();
446 Val = EmitToInt(CGF, Val, T, IntType);
447
448 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
449 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
450 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
451 if (Invert)
452 Result =
453 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
454 llvm::ConstantInt::getAllOnesValue(IntType));
455 Result = EmitFromInt(CGF, Result, T, ValueType);
456 return RValue::get(Result);
457}
458
459/// Utility to insert an atomic cmpxchg instruction.
460///
461/// @param CGF The current codegen function.
462/// @param E Builtin call expression to convert to cmpxchg.
463/// arg0 - address to operate on
464/// arg1 - value to compare with
465/// arg2 - new value
466/// @param ReturnBool Specifies whether to return success flag of
467/// cmpxchg result or the old value.
468///
469/// @returns result of cmpxchg, according to ReturnBool
470///
471/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
472/// invoke the function EmitAtomicCmpXchgForMSIntrin.
474 bool ReturnBool) {
475 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
476 Address DestAddr = CheckAtomicAlignment(CGF, E);
477
478 llvm::IntegerType *IntType = llvm::IntegerType::get(
479 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
480
481 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
482 llvm::Type *ValueType = Cmp->getType();
483 Cmp = EmitToInt(CGF, Cmp, T, IntType);
484 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
485
487 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
488 llvm::AtomicOrdering::SequentiallyConsistent);
489 if (ReturnBool)
490 // Extract boolean success flag and zext it to int.
491 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
492 CGF.ConvertType(E->getType()));
493 else
494 // Extract old value and emit it using the same type as compare value.
495 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
496 ValueType);
497}
498
499/// This function should be invoked to emit atomic cmpxchg for Microsoft's
500/// _InterlockedCompareExchange* intrinsics which have the following signature:
501/// T _InterlockedCompareExchange(T volatile *Destination,
502/// T Exchange,
503/// T Comparand);
504///
505/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
506/// cmpxchg *Destination, Comparand, Exchange.
507/// So we need to swap Comparand and Exchange when invoking
508/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
509/// function MakeAtomicCmpXchgValue since it expects the arguments to be
510/// already swapped.
511
512static
514 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
515 assert(E->getArg(0)->getType()->isPointerType());
517 E->getType(), E->getArg(0)->getType()->getPointeeType()));
519 E->getArg(1)->getType()));
521 E->getArg(2)->getType()));
522
523 Address DestAddr = CheckAtomicAlignment(CGF, E);
524
525 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
526 auto *RTy = Exchange->getType();
527
528 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
529
530 if (RTy->isPointerTy()) {
531 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
532 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
533 }
534
535 // For Release ordering, the failure ordering should be Monotonic.
536 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
537 AtomicOrdering::Monotonic :
538 SuccessOrdering;
539
540 // The atomic instruction is marked volatile for consistency with MSVC. This
541 // blocks the few atomics optimizations that LLVM has. If we want to optimize
542 // _Interlocked* operations in the future, we will have to remove the volatile
543 // marker.
544 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
545 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
546 CmpXchg->setVolatile(true);
547
548 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
549 if (RTy->isPointerTy()) {
550 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
551 }
552
553 return Result;
554}
555
556// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
557// prototyped like this:
558//
559// unsigned char _InterlockedCompareExchange128...(
560// __int64 volatile * _Destination,
561// __int64 _ExchangeHigh,
562// __int64 _ExchangeLow,
563// __int64 * _ComparandResult);
564//
565// Note that Destination is assumed to be at least 16-byte aligned, despite
566// being typed int64.
567
569 const CallExpr *E,
570 AtomicOrdering SuccessOrdering) {
571 assert(E->getNumArgs() == 4);
572 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
573 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
574 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
575 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
576
577 assert(DestPtr->getType()->isPointerTy());
578 assert(!ExchangeHigh->getType()->isPointerTy());
579 assert(!ExchangeLow->getType()->isPointerTy());
580
581 // For Release ordering, the failure ordering should be Monotonic.
582 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
583 ? AtomicOrdering::Monotonic
584 : SuccessOrdering;
585
586 // Convert to i128 pointers and values. Alignment is also overridden for
587 // destination pointer.
588 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
589 Address DestAddr(DestPtr, Int128Ty,
591 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
592
593 // (((i128)hi) << 64) | ((i128)lo)
594 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
595 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
596 ExchangeHigh =
597 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
598 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
599
600 // Load the comparand for the instruction.
601 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
602
603 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
604 SuccessOrdering, FailureOrdering);
605
606 // The atomic instruction is marked volatile for consistency with MSVC. This
607 // blocks the few atomics optimizations that LLVM has. If we want to optimize
608 // _Interlocked* operations in the future, we will have to remove the volatile
609 // marker.
610 CXI->setVolatile(true);
611
612 // Store the result as an outparameter.
613 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
614 ComparandAddr);
615
616 // Get the success boolean and zero extend it to i8.
617 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
618 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
619}
620
622 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
623 assert(E->getArg(0)->getType()->isPointerType());
624
625 auto *IntTy = CGF.ConvertType(E->getType());
626 Address DestAddr = CheckAtomicAlignment(CGF, E);
627 auto *Result = CGF.Builder.CreateAtomicRMW(
628 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
629 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
630}
631
633 CodeGenFunction &CGF, const CallExpr *E,
634 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
635 assert(E->getArg(0)->getType()->isPointerType());
636
637 auto *IntTy = CGF.ConvertType(E->getType());
638 Address DestAddr = CheckAtomicAlignment(CGF, E);
639 auto *Result = CGF.Builder.CreateAtomicRMW(
640 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
641 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
642}
643
644// Build a plain volatile load.
646 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
647 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
648 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
649 llvm::Type *ITy =
650 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
651 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
652 Load->setVolatile(true);
653 return Load;
654}
655
656// Build a plain volatile store.
658 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
659 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
660 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
661 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
662 llvm::StoreInst *Store =
663 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
664 Store->setVolatile(true);
665 return Store;
666}
667
668// Emit a simple mangled intrinsic that has 1 argument and a return type
669// matching the argument type. Depending on mode, this may be a constrained
670// floating-point intrinsic.
672 const CallExpr *E, unsigned IntrinsicID,
673 unsigned ConstrainedIntrinsicID) {
674 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
675
676 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
677 if (CGF.Builder.getIsFPConstrained()) {
678 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
679 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
680 } else {
681 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
682 return CGF.Builder.CreateCall(F, Src0);
683 }
684}
685
686// Emit an intrinsic that has 2 operands of the same type as its result.
687// Depending on mode, this may be a constrained floating-point intrinsic.
689 const CallExpr *E, unsigned IntrinsicID,
690 unsigned ConstrainedIntrinsicID) {
691 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
692 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
693
694 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
695 if (CGF.Builder.getIsFPConstrained()) {
696 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
697 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
698 } else {
699 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
700 return CGF.Builder.CreateCall(F, { Src0, Src1 });
701 }
702}
703
704// Has second type mangled argument.
706 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
707 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
708 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
709 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
710
711 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
712 if (CGF.Builder.getIsFPConstrained()) {
713 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
714 {Src0->getType(), Src1->getType()});
715 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
716 }
717
718 Function *F =
719 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
720 return CGF.Builder.CreateCall(F, {Src0, Src1});
721}
722
723// Emit an intrinsic that has 3 operands of the same type as its result.
724// Depending on mode, this may be a constrained floating-point intrinsic.
726 const CallExpr *E, unsigned IntrinsicID,
727 unsigned ConstrainedIntrinsicID) {
728 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
729 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
730 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
731
732 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
733 if (CGF.Builder.getIsFPConstrained()) {
734 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
735 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
736 } else {
737 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
738 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
739 }
740}
741
742// Emit an intrinsic where all operands are of the same type as the result.
743// Depending on mode, this may be a constrained floating-point intrinsic.
745 unsigned IntrinsicID,
746 unsigned ConstrainedIntrinsicID,
747 llvm::Type *Ty,
748 ArrayRef<Value *> Args) {
749 Function *F;
750 if (CGF.Builder.getIsFPConstrained())
751 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
752 else
753 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
754
755 if (CGF.Builder.getIsFPConstrained())
756 return CGF.Builder.CreateConstrainedFPCall(F, Args);
757 else
758 return CGF.Builder.CreateCall(F, Args);
759}
760
761// Emit a simple intrinsic that has N scalar arguments and a return type
762// matching the argument type. It is assumed that only the first argument is
763// overloaded.
764template <unsigned N>
766 const CallExpr *E,
767 unsigned IntrinsicID,
768 llvm::StringRef Name = "") {
769 static_assert(N, "expect non-empty argument");
771 for (unsigned I = 0; I < N; ++I)
772 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
773 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
774 return CGF.Builder.CreateCall(F, Args, Name);
775}
776
777// Emit an intrinsic that has 4 operands of the same type as its result.
779 unsigned IntrinsicID) {
780 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
781 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
782 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
783 llvm::Value *Src3 = CGF.EmitScalarExpr(E->getArg(3));
784
785 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
786 return CGF.Builder.CreateCall(F, {Src0, Src1, Src2, Src3});
787}
788
789// Emit an intrinsic that has 1 float or double operand, and 1 integer.
791 const CallExpr *E,
792 unsigned IntrinsicID) {
793 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
794 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
795
796 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
797 return CGF.Builder.CreateCall(F, {Src0, Src1});
798}
799
800// Emit an intrinsic that has overloaded integer result and fp operand.
801static Value *
803 unsigned IntrinsicID,
804 unsigned ConstrainedIntrinsicID) {
805 llvm::Type *ResultType = CGF.ConvertType(E->getType());
806 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
807
808 if (CGF.Builder.getIsFPConstrained()) {
809 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
810 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
811 {ResultType, Src0->getType()});
812 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
813 } else {
814 Function *F =
815 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
816 return CGF.Builder.CreateCall(F, Src0);
817 }
818}
819
821 llvm::Intrinsic::ID IntrinsicID) {
822 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
823 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
824
825 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
826 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
827 llvm::Function *F =
828 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
829 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
830
831 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
832 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
833 CGF.EmitStoreOfScalar(Exp, LV);
834
835 return CGF.Builder.CreateExtractValue(Call, 0);
836}
837
839 llvm::Intrinsic::ID IntrinsicID) {
840 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
841 llvm::Value *Dest0 = CGF.EmitScalarExpr(E->getArg(1));
842 llvm::Value *Dest1 = CGF.EmitScalarExpr(E->getArg(2));
843
844 llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Val->getType()});
845 llvm::Value *Call = CGF.Builder.CreateCall(F, Val);
846
847 llvm::Value *SinResult = CGF.Builder.CreateExtractValue(Call, 0);
848 llvm::Value *CosResult = CGF.Builder.CreateExtractValue(Call, 1);
849
850 QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
851 LValue SinLV = CGF.MakeNaturalAlignAddrLValue(Dest0, DestPtrType);
852 LValue CosLV = CGF.MakeNaturalAlignAddrLValue(Dest1, DestPtrType);
853
854 llvm::StoreInst *StoreSin =
855 CGF.Builder.CreateStore(SinResult, SinLV.getAddress());
856 llvm::StoreInst *StoreCos =
857 CGF.Builder.CreateStore(CosResult, CosLV.getAddress());
858
859 // Mark the two stores as non-aliasing with each other. The order of stores
860 // emitted by this builtin is arbitrary, enforcing a particular order will
861 // prevent optimizations later on.
862 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
863 MDNode *Domain = MDHelper.createAnonymousAliasScopeDomain();
864 MDNode *AliasScope = MDHelper.createAnonymousAliasScope(Domain);
865 MDNode *AliasScopeList = MDNode::get(Call->getContext(), AliasScope);
866 StoreSin->setMetadata(LLVMContext::MD_alias_scope, AliasScopeList);
867 StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
868}
869
870/// EmitFAbs - Emit a call to @llvm.fabs().
872 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
873 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
874 Call->setDoesNotAccessMemory();
875 return Call;
876}
877
878/// Emit the computation of the sign bit for a floating point value. Returns
879/// the i1 sign bit value.
881 LLVMContext &C = CGF.CGM.getLLVMContext();
882
883 llvm::Type *Ty = V->getType();
884 int Width = Ty->getPrimitiveSizeInBits();
885 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
886 V = CGF.Builder.CreateBitCast(V, IntTy);
887 if (Ty->isPPC_FP128Ty()) {
888 // We want the sign bit of the higher-order double. The bitcast we just
889 // did works as if the double-double was stored to memory and then
890 // read as an i128. The "store" will put the higher-order double in the
891 // lower address in both little- and big-Endian modes, but the "load"
892 // will treat those bits as a different part of the i128: the low bits in
893 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
894 // we need to shift the high bits down to the low before truncating.
895 Width >>= 1;
896 if (CGF.getTarget().isBigEndian()) {
897 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
898 V = CGF.Builder.CreateLShr(V, ShiftCst);
899 }
900 // We are truncating value in order to extract the higher-order
901 // double, which we will be using to extract the sign from.
902 IntTy = llvm::IntegerType::get(C, Width);
903 V = CGF.Builder.CreateTrunc(V, IntTy);
904 }
905 Value *Zero = llvm::Constant::getNullValue(IntTy);
906 return CGF.Builder.CreateICmpSLT(V, Zero);
907}
908
909/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
910/// hidden pointer). This is used to check annotating FP libcalls (that could
911/// set `errno`) with "int" TBAA metadata is safe. If any floating-point
912/// arguments are passed indirectly, setup for the call could be incorrectly
913/// optimized out.
915 auto IsIndirect = [&](ABIArgInfo const &info) {
916 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
917 };
918 return !IsIndirect(FnInfo.getReturnInfo()) &&
919 llvm::none_of(FnInfo.arguments(),
920 [&](CGFunctionInfoArgInfo const &ArgInfo) {
921 return IsIndirect(ArgInfo.info);
922 });
923}
924
926 const CallExpr *E, llvm::Constant *calleeValue) {
927 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
928 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
929 llvm::CallBase *callOrInvoke = nullptr;
930 CGFunctionInfo const *FnInfo = nullptr;
931 RValue Call =
932 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
933 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
934
935 if (unsigned BuiltinID = FD->getBuiltinID()) {
936 // Check whether a FP math builtin function, such as BI__builtin_expf
937 ASTContext &Context = CGF.getContext();
938 bool ConstWithoutErrnoAndExceptions =
940 // Restrict to target with errno, for example, MacOS doesn't set errno.
941 // TODO: Support builtin function with complex type returned, eg: cacosh
942 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
943 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
945 // Emit "int" TBAA metadata on FP math libcalls.
946 clang::QualType IntTy = Context.IntTy;
947 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
948 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
949 }
950 }
951 return Call;
952}
953
954/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
955/// depending on IntrinsicID.
956///
957/// \arg CGF The current codegen function.
958/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
959/// \arg X The first argument to the llvm.*.with.overflow.*.
960/// \arg Y The second argument to the llvm.*.with.overflow.*.
961/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
962/// \returns The result (i.e. sum/product) returned by the intrinsic.
963static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
964 const llvm::Intrinsic::ID IntrinsicID,
965 llvm::Value *X, llvm::Value *Y,
966 llvm::Value *&Carry) {
967 // Make sure we have integers of the same width.
968 assert(X->getType() == Y->getType() &&
969 "Arguments must be the same type. (Did you forget to make sure both "
970 "arguments have the same integer width?)");
971
972 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
973 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
974 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
975 return CGF.Builder.CreateExtractValue(Tmp, 0);
976}
977
978static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
979 int low, int high) {
980 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
981 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
982 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
983 Call->addRangeRetAttr(CR);
984 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
985 return Call;
986}
987
988namespace {
989 struct WidthAndSignedness {
990 unsigned Width;
991 bool Signed;
992 };
993}
994
995static WidthAndSignedness
997 const clang::QualType Type) {
998 assert(Type->isIntegerType() && "Given type is not an integer.");
999 unsigned Width = context.getIntWidth(Type);
1001 return {Width, Signed};
1002}
1003
1004// Given one or more integer types, this function produces an integer type that
1005// encompasses them: any value in one of the given types could be expressed in
1006// the encompassing type.
1007static struct WidthAndSignedness
1008EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
1009 assert(Types.size() > 0 && "Empty list of types.");
1010
1011 // If any of the given types is signed, we must return a signed type.
1012 bool Signed = false;
1013 for (const auto &Type : Types) {
1014 Signed |= Type.Signed;
1015 }
1016
1017 // The encompassing type must have a width greater than or equal to the width
1018 // of the specified types. Additionally, if the encompassing type is signed,
1019 // its width must be strictly greater than the width of any unsigned types
1020 // given.
1021 unsigned Width = 0;
1022 for (const auto &Type : Types) {
1023 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
1024 if (Width < MinWidth) {
1025 Width = MinWidth;
1026 }
1027 }
1028
1029 return {Width, Signed};
1030}
1031
1032Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1033 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1034 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1035 ArgValue);
1036}
1037
1038/// Checks if using the result of __builtin_object_size(p, @p From) in place of
1039/// __builtin_object_size(p, @p To) is correct
1040static bool areBOSTypesCompatible(int From, int To) {
1041 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1042 // Type=2 identically. Encoding this implementation detail here may make
1043 // improving __builtin_object_size difficult in the future, so it's omitted.
1044 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1045}
1046
1047static llvm::Value *
1048getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1049 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1050}
1051
1052llvm::Value *
1053CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1054 llvm::IntegerType *ResType,
1055 llvm::Value *EmittedE,
1056 bool IsDynamic) {
1057 uint64_t ObjectSize;
1058 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1059 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1060 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1061}
1062
1064 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1065 uint64_t &Offset) {
1066 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1067 getLangOpts().getStrictFlexArraysLevel();
1068 uint32_t FieldNo = 0;
1069
1070 if (RD->isImplicit())
1071 return nullptr;
1072
1073 for (const FieldDecl *FD : RD->fields()) {
1074 if ((!FAMDecl || FD == FAMDecl) &&
1076 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1077 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1078 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1079 Offset += Layout.getFieldOffset(FieldNo);
1080 return FD;
1081 }
1082
1083 QualType Ty = FD->getType();
1084 if (Ty->isRecordType()) {
1086 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1087 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1088 Offset += Layout.getFieldOffset(FieldNo);
1089 return Field;
1090 }
1091 }
1092
1093 if (!RD->isUnion())
1094 ++FieldNo;
1095 }
1096
1097 return nullptr;
1098}
1099
1100static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1101 unsigned Num = 0;
1102
1103 for (const FieldDecl *FD : RD->fields()) {
1104 if (FD->getType()->isCountAttributedType())
1105 return ++Num;
1106
1107 QualType Ty = FD->getType();
1108 if (Ty->isRecordType())
1110 }
1111
1112 return Num;
1113}
1114
1115llvm::Value *
1116CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1117 llvm::IntegerType *ResType) {
1118 // The code generated here calculates the size of a struct with a flexible
1119 // array member that uses the counted_by attribute. There are two instances
1120 // we handle:
1121 //
1122 // struct s {
1123 // unsigned long flags;
1124 // int count;
1125 // int array[] __attribute__((counted_by(count)));
1126 // }
1127 //
1128 // 1) bdos of the flexible array itself:
1129 //
1130 // __builtin_dynamic_object_size(p->array, 1) ==
1131 // p->count * sizeof(*p->array)
1132 //
1133 // 2) bdos of a pointer into the flexible array:
1134 //
1135 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1136 // (p->count - 42) * sizeof(*p->array)
1137 //
1138 // 2) bdos of the whole struct, including the flexible array:
1139 //
1140 // __builtin_dynamic_object_size(p, 1) ==
1141 // max(sizeof(struct s),
1142 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1143 //
1144 ASTContext &Ctx = getContext();
1145 const Expr *Base = E->IgnoreParenImpCasts();
1146 const Expr *Idx = nullptr;
1147
1148 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1149 UO && UO->getOpcode() == UO_AddrOf) {
1150 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1151 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1152 Base = ASE->getBase()->IgnoreParenImpCasts();
1153 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1154
1155 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1156 int64_t Val = IL->getValue().getSExtValue();
1157 if (Val < 0)
1158 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1159
1160 if (Val == 0)
1161 // The index is 0, so we don't need to take it into account.
1162 Idx = nullptr;
1163 }
1164 } else {
1165 // Potential pointer to another element in the struct.
1166 Base = SubExpr;
1167 }
1168 }
1169
1170 // Get the flexible array member Decl.
1171 const RecordDecl *OuterRD = nullptr;
1172 const FieldDecl *FAMDecl = nullptr;
1173 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1174 // Check if \p Base is referencing the FAM itself.
1175 const ValueDecl *VD = ME->getMemberDecl();
1177 FAMDecl = dyn_cast<FieldDecl>(VD);
1178 if (!FAMDecl)
1179 return nullptr;
1180 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1181 // Check if we're pointing to the whole struct.
1182 QualType Ty = DRE->getDecl()->getType();
1183 if (Ty->isPointerType())
1184 Ty = Ty->getPointeeType();
1185 OuterRD = Ty->getAsRecordDecl();
1186
1187 // If we have a situation like this:
1188 //
1189 // struct union_of_fams {
1190 // int flags;
1191 // union {
1192 // signed char normal_field;
1193 // struct {
1194 // int count1;
1195 // int arr1[] __counted_by(count1);
1196 // };
1197 // struct {
1198 // signed char count2;
1199 // int arr2[] __counted_by(count2);
1200 // };
1201 // };
1202 // };
1203 //
1204 // We don't know which 'count' to use in this scenario:
1205 //
1206 // size_t get_size(struct union_of_fams *p) {
1207 // return __builtin_dynamic_object_size(p, 1);
1208 // }
1209 //
1210 // Instead of calculating a wrong number, we give up.
1211 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1212 return nullptr;
1213 }
1214
1215 if (!OuterRD)
1216 return nullptr;
1217
1218 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1219 // get its offset.
1220 uint64_t Offset = 0;
1221 FAMDecl =
1222 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1223 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1224
1225 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1226 // No flexible array member found or it doesn't have the "counted_by"
1227 // attribute.
1228 return nullptr;
1229
1230 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1231 if (!CountedByFD)
1232 // Can't find the field referenced by the "counted_by" attribute.
1233 return nullptr;
1234
1235 if (isa<DeclRefExpr>(Base))
1236 // The whole struct is specificed in the __bdos. The calculation of the
1237 // whole size of the structure can be done in two ways:
1238 //
1239 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1240 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1241 //
1242 // The first will add additional padding after the end of the array,
1243 // allocation while the second method is more precise, but not quite
1244 // expected from programmers. See
1245 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1246 // discussion of the topic.
1247 //
1248 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1249 // structure. Therefore, because of the above issue, we'll choose to match
1250 // what GCC does for consistency's sake.
1251 return nullptr;
1252
1253 // Build a load of the counted_by field.
1254 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1255 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1256 if (!CountedByInst)
1257 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1258
1259 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1260
1261 // Build a load of the index and subtract it from the count.
1262 Value *IdxInst = nullptr;
1263 if (Idx) {
1264 if (Idx->HasSideEffects(getContext()))
1265 // We can't have side-effects.
1266 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1267
1268 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1269 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1270 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1271
1272 // We go ahead with the calculation here. If the index turns out to be
1273 // negative, we'll catch it at the end.
1274 CountedByInst =
1275 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1276 }
1277
1278 // Calculate how large the flexible array member is in bytes.
1279 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1281 llvm::Constant *ElemSize =
1282 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1283 Value *Res =
1284 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1285 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1286
1287 // A negative \p IdxInst or \p CountedByInst means that the index lands
1288 // outside of the flexible array member. If that's the case, we want to
1289 // return 0.
1290 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1291 if (IdxInst)
1292 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1293
1294 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1295}
1296
1297/// Returns a Value corresponding to the size of the given expression.
1298/// This Value may be either of the following:
1299/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1300/// it)
1301/// - A call to the @llvm.objectsize intrinsic
1302///
1303/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1304/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1305/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1306llvm::Value *
1307CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1308 llvm::IntegerType *ResType,
1309 llvm::Value *EmittedE, bool IsDynamic) {
1310 // We need to reference an argument if the pointer is a parameter with the
1311 // pass_object_size attribute.
1312 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1313 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1314 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1315 if (Param != nullptr && PS != nullptr &&
1316 areBOSTypesCompatible(PS->getType(), Type)) {
1317 auto Iter = SizeArguments.find(Param);
1318 assert(Iter != SizeArguments.end());
1319
1320 const ImplicitParamDecl *D = Iter->second;
1321 auto DIter = LocalDeclMap.find(D);
1322 assert(DIter != LocalDeclMap.end());
1323
1324 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1325 getContext().getSizeType(), E->getBeginLoc());
1326 }
1327 }
1328
1329 if (IsDynamic) {
1330 // Emit special code for a flexible array member with the "counted_by"
1331 // attribute.
1332 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1333 return V;
1334 }
1335
1336 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1337 // evaluate E for side-effects. In either case, we shouldn't lower to
1338 // @llvm.objectsize.
1339 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1340 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1341
1342 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1343 assert(Ptr->getType()->isPointerTy() &&
1344 "Non-pointer passed to __builtin_object_size?");
1345
1346 Function *F =
1347 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1348
1349 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1350 Value *Min = Builder.getInt1((Type & 2) != 0);
1351 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1352 Value *NullIsUnknown = Builder.getTrue();
1353 Value *Dynamic = Builder.getInt1(IsDynamic);
1354 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1355}
1356
1357namespace {
1358/// A struct to generically describe a bit test intrinsic.
1359struct BitTest {
1360 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1361 enum InterlockingKind : uint8_t {
1362 Unlocked,
1363 Sequential,
1364 Acquire,
1365 Release,
1366 NoFence
1367 };
1368
1369 ActionKind Action;
1370 InterlockingKind Interlocking;
1371 bool Is64Bit;
1372
1373 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1374};
1375
1376} // namespace
1377
1378BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1379 switch (BuiltinID) {
1380 // Main portable variants.
1381 case Builtin::BI_bittest:
1382 return {TestOnly, Unlocked, false};
1383 case Builtin::BI_bittestandcomplement:
1384 return {Complement, Unlocked, false};
1385 case Builtin::BI_bittestandreset:
1386 return {Reset, Unlocked, false};
1387 case Builtin::BI_bittestandset:
1388 return {Set, Unlocked, false};
1389 case Builtin::BI_interlockedbittestandreset:
1390 return {Reset, Sequential, false};
1391 case Builtin::BI_interlockedbittestandset:
1392 return {Set, Sequential, false};
1393
1394 // X86-specific 64-bit variants.
1395 case Builtin::BI_bittest64:
1396 return {TestOnly, Unlocked, true};
1397 case Builtin::BI_bittestandcomplement64:
1398 return {Complement, Unlocked, true};
1399 case Builtin::BI_bittestandreset64:
1400 return {Reset, Unlocked, true};
1401 case Builtin::BI_bittestandset64:
1402 return {Set, Unlocked, true};
1403 case Builtin::BI_interlockedbittestandreset64:
1404 return {Reset, Sequential, true};
1405 case Builtin::BI_interlockedbittestandset64:
1406 return {Set, Sequential, true};
1407
1408 // ARM/AArch64-specific ordering variants.
1409 case Builtin::BI_interlockedbittestandset_acq:
1410 return {Set, Acquire, false};
1411 case Builtin::BI_interlockedbittestandset_rel:
1412 return {Set, Release, false};
1413 case Builtin::BI_interlockedbittestandset_nf:
1414 return {Set, NoFence, false};
1415 case Builtin::BI_interlockedbittestandreset_acq:
1416 return {Reset, Acquire, false};
1417 case Builtin::BI_interlockedbittestandreset_rel:
1418 return {Reset, Release, false};
1419 case Builtin::BI_interlockedbittestandreset_nf:
1420 return {Reset, NoFence, false};
1421 }
1422 llvm_unreachable("expected only bittest intrinsics");
1423}
1424
1425static char bitActionToX86BTCode(BitTest::ActionKind A) {
1426 switch (A) {
1427 case BitTest::TestOnly: return '\0';
1428 case BitTest::Complement: return 'c';
1429 case BitTest::Reset: return 'r';
1430 case BitTest::Set: return 's';
1431 }
1432 llvm_unreachable("invalid action");
1433}
1434
1436 BitTest BT,
1437 const CallExpr *E, Value *BitBase,
1438 Value *BitPos) {
1439 char Action = bitActionToX86BTCode(BT.Action);
1440 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1441
1442 // Build the assembly.
1444 raw_svector_ostream AsmOS(Asm);
1445 if (BT.Interlocking != BitTest::Unlocked)
1446 AsmOS << "lock ";
1447 AsmOS << "bt";
1448 if (Action)
1449 AsmOS << Action;
1450 AsmOS << SizeSuffix << " $2, ($1)";
1451
1452 // Build the constraints. FIXME: We should support immediates when possible.
1453 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1454 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1455 if (!MachineClobbers.empty()) {
1456 Constraints += ',';
1457 Constraints += MachineClobbers;
1458 }
1459 llvm::IntegerType *IntType = llvm::IntegerType::get(
1460 CGF.getLLVMContext(),
1461 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1462 llvm::FunctionType *FTy =
1463 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1464
1465 llvm::InlineAsm *IA =
1466 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1467 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1468}
1469
1470static llvm::AtomicOrdering
1471getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1472 switch (I) {
1473 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1474 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1475 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1476 case BitTest::Release: return llvm::AtomicOrdering::Release;
1477 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1478 }
1479 llvm_unreachable("invalid interlocking");
1480}
1481
1482/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1483/// bits and a bit position and read and optionally modify the bit at that
1484/// position. The position index can be arbitrarily large, i.e. it can be larger
1485/// than 31 or 63, so we need an indexed load in the general case.
1486static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1487 unsigned BuiltinID,
1488 const CallExpr *E) {
1489 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1490 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1491
1492 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1493
1494 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1495 // indexing operation internally. Use them if possible.
1496 if (CGF.getTarget().getTriple().isX86())
1497 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1498
1499 // Otherwise, use generic code to load one byte and test the bit. Use all but
1500 // the bottom three bits as the array index, and the bottom three bits to form
1501 // a mask.
1502 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1503 Value *ByteIndex = CGF.Builder.CreateAShr(
1504 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1505 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1506 "bittest.byteaddr"),
1507 CGF.Int8Ty, CharUnits::One());
1508 Value *PosLow =
1509 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1510 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1511
1512 // The updating instructions will need a mask.
1513 Value *Mask = nullptr;
1514 if (BT.Action != BitTest::TestOnly) {
1515 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1516 "bittest.mask");
1517 }
1518
1519 // Check the action and ordering of the interlocked intrinsics.
1520 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1521
1522 Value *OldByte = nullptr;
1523 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1524 // Emit a combined atomicrmw load/store operation for the interlocked
1525 // intrinsics.
1526 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1527 if (BT.Action == BitTest::Reset) {
1528 Mask = CGF.Builder.CreateNot(Mask);
1529 RMWOp = llvm::AtomicRMWInst::And;
1530 }
1531 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1532 } else {
1533 // Emit a plain load for the non-interlocked intrinsics.
1534 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1535 Value *NewByte = nullptr;
1536 switch (BT.Action) {
1537 case BitTest::TestOnly:
1538 // Don't store anything.
1539 break;
1540 case BitTest::Complement:
1541 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1542 break;
1543 case BitTest::Reset:
1544 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1545 break;
1546 case BitTest::Set:
1547 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1548 break;
1549 }
1550 if (NewByte)
1551 CGF.Builder.CreateStore(NewByte, ByteAddr);
1552 }
1553
1554 // However we loaded the old byte, either by plain load or atomicrmw, shift
1555 // the bit into the low position and mask it to 0 or 1.
1556 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1557 return CGF.Builder.CreateAnd(
1558 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1559}
1560
1562 unsigned BuiltinID,
1563 const CallExpr *E) {
1564 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1565
1567 raw_svector_ostream AsmOS(Asm);
1568 llvm::IntegerType *RetType = CGF.Int32Ty;
1569
1570 switch (BuiltinID) {
1571 case clang::PPC::BI__builtin_ppc_ldarx:
1572 AsmOS << "ldarx ";
1573 RetType = CGF.Int64Ty;
1574 break;
1575 case clang::PPC::BI__builtin_ppc_lwarx:
1576 AsmOS << "lwarx ";
1577 RetType = CGF.Int32Ty;
1578 break;
1579 case clang::PPC::BI__builtin_ppc_lharx:
1580 AsmOS << "lharx ";
1581 RetType = CGF.Int16Ty;
1582 break;
1583 case clang::PPC::BI__builtin_ppc_lbarx:
1584 AsmOS << "lbarx ";
1585 RetType = CGF.Int8Ty;
1586 break;
1587 default:
1588 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1589 }
1590
1591 AsmOS << "$0, ${1:y}";
1592
1593 std::string Constraints = "=r,*Z,~{memory}";
1594 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1595 if (!MachineClobbers.empty()) {
1596 Constraints += ',';
1597 Constraints += MachineClobbers;
1598 }
1599
1600 llvm::Type *PtrType = CGF.UnqualPtrTy;
1601 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1602
1603 llvm::InlineAsm *IA =
1604 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1605 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1606 CI->addParamAttr(
1607 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1608 return CI;
1609}
1610
1611namespace {
1612enum class MSVCSetJmpKind {
1613 _setjmpex,
1614 _setjmp3,
1615 _setjmp
1616};
1617}
1618
1619/// MSVC handles setjmp a bit differently on different platforms. On every
1620/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1621/// parameters can be passed as variadic arguments, but we always pass none.
1622static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1623 const CallExpr *E) {
1624 llvm::Value *Arg1 = nullptr;
1625 llvm::Type *Arg1Ty = nullptr;
1626 StringRef Name;
1627 bool IsVarArg = false;
1628 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1629 Name = "_setjmp3";
1630 Arg1Ty = CGF.Int32Ty;
1631 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1632 IsVarArg = true;
1633 } else {
1634 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1635 Arg1Ty = CGF.Int8PtrTy;
1636 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1637 Arg1 = CGF.Builder.CreateCall(
1638 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1639 } else
1640 Arg1 = CGF.Builder.CreateCall(
1641 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1642 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1643 }
1644
1645 // Mark the call site and declaration with ReturnsTwice.
1646 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1647 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1648 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1649 llvm::Attribute::ReturnsTwice);
1650 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1651 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1652 ReturnsTwiceAttr, /*Local=*/true);
1653
1654 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1655 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1656 llvm::Value *Args[] = {Buf, Arg1};
1657 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1658 CB->setAttributes(ReturnsTwiceAttr);
1659 return RValue::get(CB);
1660}
1661
1662// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1663// we handle them here.
1704 __fastfail,
1705};
1706
1707static std::optional<CodeGenFunction::MSVCIntrin>
1708translateArmToMsvcIntrin(unsigned BuiltinID) {
1709 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1710 switch (BuiltinID) {
1711 default:
1712 return std::nullopt;
1713 case clang::ARM::BI_BitScanForward:
1714 case clang::ARM::BI_BitScanForward64:
1715 return MSVCIntrin::_BitScanForward;
1716 case clang::ARM::BI_BitScanReverse:
1717 case clang::ARM::BI_BitScanReverse64:
1718 return MSVCIntrin::_BitScanReverse;
1719 case clang::ARM::BI_InterlockedAnd64:
1720 return MSVCIntrin::_InterlockedAnd;
1721 case clang::ARM::BI_InterlockedExchange64:
1722 return MSVCIntrin::_InterlockedExchange;
1723 case clang::ARM::BI_InterlockedExchangeAdd64:
1724 return MSVCIntrin::_InterlockedExchangeAdd;
1725 case clang::ARM::BI_InterlockedExchangeSub64:
1726 return MSVCIntrin::_InterlockedExchangeSub;
1727 case clang::ARM::BI_InterlockedOr64:
1728 return MSVCIntrin::_InterlockedOr;
1729 case clang::ARM::BI_InterlockedXor64:
1730 return MSVCIntrin::_InterlockedXor;
1731 case clang::ARM::BI_InterlockedDecrement64:
1732 return MSVCIntrin::_InterlockedDecrement;
1733 case clang::ARM::BI_InterlockedIncrement64:
1734 return MSVCIntrin::_InterlockedIncrement;
1735 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1736 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1737 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1738 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1739 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1740 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1741 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1742 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1743 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1744 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1745 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1746 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1747 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1748 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1749 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1750 case clang::ARM::BI_InterlockedExchange8_acq:
1751 case clang::ARM::BI_InterlockedExchange16_acq:
1752 case clang::ARM::BI_InterlockedExchange_acq:
1753 case clang::ARM::BI_InterlockedExchange64_acq:
1754 case clang::ARM::BI_InterlockedExchangePointer_acq:
1755 return MSVCIntrin::_InterlockedExchange_acq;
1756 case clang::ARM::BI_InterlockedExchange8_rel:
1757 case clang::ARM::BI_InterlockedExchange16_rel:
1758 case clang::ARM::BI_InterlockedExchange_rel:
1759 case clang::ARM::BI_InterlockedExchange64_rel:
1760 case clang::ARM::BI_InterlockedExchangePointer_rel:
1761 return MSVCIntrin::_InterlockedExchange_rel;
1762 case clang::ARM::BI_InterlockedExchange8_nf:
1763 case clang::ARM::BI_InterlockedExchange16_nf:
1764 case clang::ARM::BI_InterlockedExchange_nf:
1765 case clang::ARM::BI_InterlockedExchange64_nf:
1766 case clang::ARM::BI_InterlockedExchangePointer_nf:
1767 return MSVCIntrin::_InterlockedExchange_nf;
1768 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1769 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1770 case clang::ARM::BI_InterlockedCompareExchange_acq:
1771 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1772 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1773 return MSVCIntrin::_InterlockedCompareExchange_acq;
1774 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1775 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1776 case clang::ARM::BI_InterlockedCompareExchange_rel:
1777 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1778 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1779 return MSVCIntrin::_InterlockedCompareExchange_rel;
1780 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1781 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1782 case clang::ARM::BI_InterlockedCompareExchange_nf:
1783 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1784 return MSVCIntrin::_InterlockedCompareExchange_nf;
1785 case clang::ARM::BI_InterlockedOr8_acq:
1786 case clang::ARM::BI_InterlockedOr16_acq:
1787 case clang::ARM::BI_InterlockedOr_acq:
1788 case clang::ARM::BI_InterlockedOr64_acq:
1789 return MSVCIntrin::_InterlockedOr_acq;
1790 case clang::ARM::BI_InterlockedOr8_rel:
1791 case clang::ARM::BI_InterlockedOr16_rel:
1792 case clang::ARM::BI_InterlockedOr_rel:
1793 case clang::ARM::BI_InterlockedOr64_rel:
1794 return MSVCIntrin::_InterlockedOr_rel;
1795 case clang::ARM::BI_InterlockedOr8_nf:
1796 case clang::ARM::BI_InterlockedOr16_nf:
1797 case clang::ARM::BI_InterlockedOr_nf:
1798 case clang::ARM::BI_InterlockedOr64_nf:
1799 return MSVCIntrin::_InterlockedOr_nf;
1800 case clang::ARM::BI_InterlockedXor8_acq:
1801 case clang::ARM::BI_InterlockedXor16_acq:
1802 case clang::ARM::BI_InterlockedXor_acq:
1803 case clang::ARM::BI_InterlockedXor64_acq:
1804 return MSVCIntrin::_InterlockedXor_acq;
1805 case clang::ARM::BI_InterlockedXor8_rel:
1806 case clang::ARM::BI_InterlockedXor16_rel:
1807 case clang::ARM::BI_InterlockedXor_rel:
1808 case clang::ARM::BI_InterlockedXor64_rel:
1809 return MSVCIntrin::_InterlockedXor_rel;
1810 case clang::ARM::BI_InterlockedXor8_nf:
1811 case clang::ARM::BI_InterlockedXor16_nf:
1812 case clang::ARM::BI_InterlockedXor_nf:
1813 case clang::ARM::BI_InterlockedXor64_nf:
1814 return MSVCIntrin::_InterlockedXor_nf;
1815 case clang::ARM::BI_InterlockedAnd8_acq:
1816 case clang::ARM::BI_InterlockedAnd16_acq:
1817 case clang::ARM::BI_InterlockedAnd_acq:
1818 case clang::ARM::BI_InterlockedAnd64_acq:
1819 return MSVCIntrin::_InterlockedAnd_acq;
1820 case clang::ARM::BI_InterlockedAnd8_rel:
1821 case clang::ARM::BI_InterlockedAnd16_rel:
1822 case clang::ARM::BI_InterlockedAnd_rel:
1823 case clang::ARM::BI_InterlockedAnd64_rel:
1824 return MSVCIntrin::_InterlockedAnd_rel;
1825 case clang::ARM::BI_InterlockedAnd8_nf:
1826 case clang::ARM::BI_InterlockedAnd16_nf:
1827 case clang::ARM::BI_InterlockedAnd_nf:
1828 case clang::ARM::BI_InterlockedAnd64_nf:
1829 return MSVCIntrin::_InterlockedAnd_nf;
1830 case clang::ARM::BI_InterlockedIncrement16_acq:
1831 case clang::ARM::BI_InterlockedIncrement_acq:
1832 case clang::ARM::BI_InterlockedIncrement64_acq:
1833 return MSVCIntrin::_InterlockedIncrement_acq;
1834 case clang::ARM::BI_InterlockedIncrement16_rel:
1835 case clang::ARM::BI_InterlockedIncrement_rel:
1836 case clang::ARM::BI_InterlockedIncrement64_rel:
1837 return MSVCIntrin::_InterlockedIncrement_rel;
1838 case clang::ARM::BI_InterlockedIncrement16_nf:
1839 case clang::ARM::BI_InterlockedIncrement_nf:
1840 case clang::ARM::BI_InterlockedIncrement64_nf:
1841 return MSVCIntrin::_InterlockedIncrement_nf;
1842 case clang::ARM::BI_InterlockedDecrement16_acq:
1843 case clang::ARM::BI_InterlockedDecrement_acq:
1844 case clang::ARM::BI_InterlockedDecrement64_acq:
1845 return MSVCIntrin::_InterlockedDecrement_acq;
1846 case clang::ARM::BI_InterlockedDecrement16_rel:
1847 case clang::ARM::BI_InterlockedDecrement_rel:
1848 case clang::ARM::BI_InterlockedDecrement64_rel:
1849 return MSVCIntrin::_InterlockedDecrement_rel;
1850 case clang::ARM::BI_InterlockedDecrement16_nf:
1851 case clang::ARM::BI_InterlockedDecrement_nf:
1852 case clang::ARM::BI_InterlockedDecrement64_nf:
1853 return MSVCIntrin::_InterlockedDecrement_nf;
1854 }
1855 llvm_unreachable("must return from switch");
1856}
1857
1858static std::optional<CodeGenFunction::MSVCIntrin>
1859translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1860 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1861 switch (BuiltinID) {
1862 default:
1863 return std::nullopt;
1864 case clang::AArch64::BI_BitScanForward:
1865 case clang::AArch64::BI_BitScanForward64:
1866 return MSVCIntrin::_BitScanForward;
1867 case clang::AArch64::BI_BitScanReverse:
1868 case clang::AArch64::BI_BitScanReverse64:
1869 return MSVCIntrin::_BitScanReverse;
1870 case clang::AArch64::BI_InterlockedAnd64:
1871 return MSVCIntrin::_InterlockedAnd;
1872 case clang::AArch64::BI_InterlockedExchange64:
1873 return MSVCIntrin::_InterlockedExchange;
1874 case clang::AArch64::BI_InterlockedExchangeAdd64:
1875 return MSVCIntrin::_InterlockedExchangeAdd;
1876 case clang::AArch64::BI_InterlockedExchangeSub64:
1877 return MSVCIntrin::_InterlockedExchangeSub;
1878 case clang::AArch64::BI_InterlockedOr64:
1879 return MSVCIntrin::_InterlockedOr;
1880 case clang::AArch64::BI_InterlockedXor64:
1881 return MSVCIntrin::_InterlockedXor;
1882 case clang::AArch64::BI_InterlockedDecrement64:
1883 return MSVCIntrin::_InterlockedDecrement;
1884 case clang::AArch64::BI_InterlockedIncrement64:
1885 return MSVCIntrin::_InterlockedIncrement;
1886 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1887 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1888 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1889 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1890 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1891 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1892 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1893 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1894 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1895 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1896 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1897 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1898 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1899 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1900 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1901 case clang::AArch64::BI_InterlockedExchange8_acq:
1902 case clang::AArch64::BI_InterlockedExchange16_acq:
1903 case clang::AArch64::BI_InterlockedExchange_acq:
1904 case clang::AArch64::BI_InterlockedExchange64_acq:
1905 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1906 return MSVCIntrin::_InterlockedExchange_acq;
1907 case clang::AArch64::BI_InterlockedExchange8_rel:
1908 case clang::AArch64::BI_InterlockedExchange16_rel:
1909 case clang::AArch64::BI_InterlockedExchange_rel:
1910 case clang::AArch64::BI_InterlockedExchange64_rel:
1911 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1912 return MSVCIntrin::_InterlockedExchange_rel;
1913 case clang::AArch64::BI_InterlockedExchange8_nf:
1914 case clang::AArch64::BI_InterlockedExchange16_nf:
1915 case clang::AArch64::BI_InterlockedExchange_nf:
1916 case clang::AArch64::BI_InterlockedExchange64_nf:
1917 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1918 return MSVCIntrin::_InterlockedExchange_nf;
1919 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1920 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1921 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1922 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1923 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1924 return MSVCIntrin::_InterlockedCompareExchange_acq;
1925 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1926 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1927 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1928 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1929 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1930 return MSVCIntrin::_InterlockedCompareExchange_rel;
1931 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1932 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1933 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1934 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1935 return MSVCIntrin::_InterlockedCompareExchange_nf;
1936 case clang::AArch64::BI_InterlockedCompareExchange128:
1937 return MSVCIntrin::_InterlockedCompareExchange128;
1938 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1939 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1940 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1941 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1942 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1943 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1944 case clang::AArch64::BI_InterlockedOr8_acq:
1945 case clang::AArch64::BI_InterlockedOr16_acq:
1946 case clang::AArch64::BI_InterlockedOr_acq:
1947 case clang::AArch64::BI_InterlockedOr64_acq:
1948 return MSVCIntrin::_InterlockedOr_acq;
1949 case clang::AArch64::BI_InterlockedOr8_rel:
1950 case clang::AArch64::BI_InterlockedOr16_rel:
1951 case clang::AArch64::BI_InterlockedOr_rel:
1952 case clang::AArch64::BI_InterlockedOr64_rel:
1953 return MSVCIntrin::_InterlockedOr_rel;
1954 case clang::AArch64::BI_InterlockedOr8_nf:
1955 case clang::AArch64::BI_InterlockedOr16_nf:
1956 case clang::AArch64::BI_InterlockedOr_nf:
1957 case clang::AArch64::BI_InterlockedOr64_nf:
1958 return MSVCIntrin::_InterlockedOr_nf;
1959 case clang::AArch64::BI_InterlockedXor8_acq:
1960 case clang::AArch64::BI_InterlockedXor16_acq:
1961 case clang::AArch64::BI_InterlockedXor_acq:
1962 case clang::AArch64::BI_InterlockedXor64_acq:
1963 return MSVCIntrin::_InterlockedXor_acq;
1964 case clang::AArch64::BI_InterlockedXor8_rel:
1965 case clang::AArch64::BI_InterlockedXor16_rel:
1966 case clang::AArch64::BI_InterlockedXor_rel:
1967 case clang::AArch64::BI_InterlockedXor64_rel:
1968 return MSVCIntrin::_InterlockedXor_rel;
1969 case clang::AArch64::BI_InterlockedXor8_nf:
1970 case clang::AArch64::BI_InterlockedXor16_nf:
1971 case clang::AArch64::BI_InterlockedXor_nf:
1972 case clang::AArch64::BI_InterlockedXor64_nf:
1973 return MSVCIntrin::_InterlockedXor_nf;
1974 case clang::AArch64::BI_InterlockedAnd8_acq:
1975 case clang::AArch64::BI_InterlockedAnd16_acq:
1976 case clang::AArch64::BI_InterlockedAnd_acq:
1977 case clang::AArch64::BI_InterlockedAnd64_acq:
1978 return MSVCIntrin::_InterlockedAnd_acq;
1979 case clang::AArch64::BI_InterlockedAnd8_rel:
1980 case clang::AArch64::BI_InterlockedAnd16_rel:
1981 case clang::AArch64::BI_InterlockedAnd_rel:
1982 case clang::AArch64::BI_InterlockedAnd64_rel:
1983 return MSVCIntrin::_InterlockedAnd_rel;
1984 case clang::AArch64::BI_InterlockedAnd8_nf:
1985 case clang::AArch64::BI_InterlockedAnd16_nf:
1986 case clang::AArch64::BI_InterlockedAnd_nf:
1987 case clang::AArch64::BI_InterlockedAnd64_nf:
1988 return MSVCIntrin::_InterlockedAnd_nf;
1989 case clang::AArch64::BI_InterlockedIncrement16_acq:
1990 case clang::AArch64::BI_InterlockedIncrement_acq:
1991 case clang::AArch64::BI_InterlockedIncrement64_acq:
1992 return MSVCIntrin::_InterlockedIncrement_acq;
1993 case clang::AArch64::BI_InterlockedIncrement16_rel:
1994 case clang::AArch64::BI_InterlockedIncrement_rel:
1995 case clang::AArch64::BI_InterlockedIncrement64_rel:
1996 return MSVCIntrin::_InterlockedIncrement_rel;
1997 case clang::AArch64::BI_InterlockedIncrement16_nf:
1998 case clang::AArch64::BI_InterlockedIncrement_nf:
1999 case clang::AArch64::BI_InterlockedIncrement64_nf:
2000 return MSVCIntrin::_InterlockedIncrement_nf;
2001 case clang::AArch64::BI_InterlockedDecrement16_acq:
2002 case clang::AArch64::BI_InterlockedDecrement_acq:
2003 case clang::AArch64::BI_InterlockedDecrement64_acq:
2004 return MSVCIntrin::_InterlockedDecrement_acq;
2005 case clang::AArch64::BI_InterlockedDecrement16_rel:
2006 case clang::AArch64::BI_InterlockedDecrement_rel:
2007 case clang::AArch64::BI_InterlockedDecrement64_rel:
2008 return MSVCIntrin::_InterlockedDecrement_rel;
2009 case clang::AArch64::BI_InterlockedDecrement16_nf:
2010 case clang::AArch64::BI_InterlockedDecrement_nf:
2011 case clang::AArch64::BI_InterlockedDecrement64_nf:
2012 return MSVCIntrin::_InterlockedDecrement_nf;
2013 }
2014 llvm_unreachable("must return from switch");
2015}
2016
2017static std::optional<CodeGenFunction::MSVCIntrin>
2018translateX86ToMsvcIntrin(unsigned BuiltinID) {
2019 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
2020 switch (BuiltinID) {
2021 default:
2022 return std::nullopt;
2023 case clang::X86::BI_BitScanForward:
2024 case clang::X86::BI_BitScanForward64:
2025 return MSVCIntrin::_BitScanForward;
2026 case clang::X86::BI_BitScanReverse:
2027 case clang::X86::BI_BitScanReverse64:
2028 return MSVCIntrin::_BitScanReverse;
2029 case clang::X86::BI_InterlockedAnd64:
2030 return MSVCIntrin::_InterlockedAnd;
2031 case clang::X86::BI_InterlockedCompareExchange128:
2032 return MSVCIntrin::_InterlockedCompareExchange128;
2033 case clang::X86::BI_InterlockedExchange64:
2034 return MSVCIntrin::_InterlockedExchange;
2035 case clang::X86::BI_InterlockedExchangeAdd64:
2036 return MSVCIntrin::_InterlockedExchangeAdd;
2037 case clang::X86::BI_InterlockedExchangeSub64:
2038 return MSVCIntrin::_InterlockedExchangeSub;
2039 case clang::X86::BI_InterlockedOr64:
2040 return MSVCIntrin::_InterlockedOr;
2041 case clang::X86::BI_InterlockedXor64:
2042 return MSVCIntrin::_InterlockedXor;
2043 case clang::X86::BI_InterlockedDecrement64:
2044 return MSVCIntrin::_InterlockedDecrement;
2045 case clang::X86::BI_InterlockedIncrement64:
2046 return MSVCIntrin::_InterlockedIncrement;
2047 }
2048 llvm_unreachable("must return from switch");
2049}
2050
2051// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2052Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2053 const CallExpr *E) {
2054 switch (BuiltinID) {
2055 case MSVCIntrin::_BitScanForward:
2056 case MSVCIntrin::_BitScanReverse: {
2057 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2058 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2059
2060 llvm::Type *ArgType = ArgValue->getType();
2061 llvm::Type *IndexType = IndexAddress.getElementType();
2062 llvm::Type *ResultType = ConvertType(E->getType());
2063
2064 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2065 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2066 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2067
2068 BasicBlock *Begin = Builder.GetInsertBlock();
2069 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2070 Builder.SetInsertPoint(End);
2071 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2072
2073 Builder.SetInsertPoint(Begin);
2074 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2075 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2076 Builder.CreateCondBr(IsZero, End, NotZero);
2077 Result->addIncoming(ResZero, Begin);
2078
2079 Builder.SetInsertPoint(NotZero);
2080
2081 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2082 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2083 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2084 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2085 Builder.CreateStore(ZeroCount, IndexAddress, false);
2086 } else {
2087 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2088 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2089
2090 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2091 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2092 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2093 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2094 Builder.CreateStore(Index, IndexAddress, false);
2095 }
2096 Builder.CreateBr(End);
2097 Result->addIncoming(ResOne, NotZero);
2098
2099 Builder.SetInsertPoint(End);
2100 return Result;
2101 }
2102 case MSVCIntrin::_InterlockedAnd:
2103 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2104 case MSVCIntrin::_InterlockedExchange:
2105 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2106 case MSVCIntrin::_InterlockedExchangeAdd:
2107 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2108 case MSVCIntrin::_InterlockedExchangeSub:
2109 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2110 case MSVCIntrin::_InterlockedOr:
2111 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2112 case MSVCIntrin::_InterlockedXor:
2113 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2114 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2115 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2116 AtomicOrdering::Acquire);
2117 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2118 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2119 AtomicOrdering::Release);
2120 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2121 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2122 AtomicOrdering::Monotonic);
2123 case MSVCIntrin::_InterlockedExchange_acq:
2124 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2125 AtomicOrdering::Acquire);
2126 case MSVCIntrin::_InterlockedExchange_rel:
2127 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2128 AtomicOrdering::Release);
2129 case MSVCIntrin::_InterlockedExchange_nf:
2130 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2131 AtomicOrdering::Monotonic);
2132 case MSVCIntrin::_InterlockedCompareExchange:
2133 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2134 case MSVCIntrin::_InterlockedCompareExchange_acq:
2135 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2136 case MSVCIntrin::_InterlockedCompareExchange_rel:
2137 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2138 case MSVCIntrin::_InterlockedCompareExchange_nf:
2139 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2140 case MSVCIntrin::_InterlockedCompareExchange128:
2142 *this, E, AtomicOrdering::SequentiallyConsistent);
2143 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2144 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2145 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2146 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2147 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2148 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2149 case MSVCIntrin::_InterlockedOr_acq:
2150 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2151 AtomicOrdering::Acquire);
2152 case MSVCIntrin::_InterlockedOr_rel:
2153 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2154 AtomicOrdering::Release);
2155 case MSVCIntrin::_InterlockedOr_nf:
2156 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2157 AtomicOrdering::Monotonic);
2158 case MSVCIntrin::_InterlockedXor_acq:
2159 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2160 AtomicOrdering::Acquire);
2161 case MSVCIntrin::_InterlockedXor_rel:
2162 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2163 AtomicOrdering::Release);
2164 case MSVCIntrin::_InterlockedXor_nf:
2165 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2166 AtomicOrdering::Monotonic);
2167 case MSVCIntrin::_InterlockedAnd_acq:
2168 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2169 AtomicOrdering::Acquire);
2170 case MSVCIntrin::_InterlockedAnd_rel:
2171 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2172 AtomicOrdering::Release);
2173 case MSVCIntrin::_InterlockedAnd_nf:
2174 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2175 AtomicOrdering::Monotonic);
2176 case MSVCIntrin::_InterlockedIncrement_acq:
2177 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2178 case MSVCIntrin::_InterlockedIncrement_rel:
2179 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2180 case MSVCIntrin::_InterlockedIncrement_nf:
2181 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2182 case MSVCIntrin::_InterlockedDecrement_acq:
2183 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2184 case MSVCIntrin::_InterlockedDecrement_rel:
2185 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2186 case MSVCIntrin::_InterlockedDecrement_nf:
2187 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2188
2189 case MSVCIntrin::_InterlockedDecrement:
2190 return EmitAtomicDecrementValue(*this, E);
2191 case MSVCIntrin::_InterlockedIncrement:
2192 return EmitAtomicIncrementValue(*this, E);
2193
2194 case MSVCIntrin::__fastfail: {
2195 // Request immediate process termination from the kernel. The instruction
2196 // sequences to do this are documented on MSDN:
2197 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2198 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2199 StringRef Asm, Constraints;
2200 switch (ISA) {
2201 default:
2202 ErrorUnsupported(E, "__fastfail call for this architecture");
2203 break;
2204 case llvm::Triple::x86:
2205 case llvm::Triple::x86_64:
2206 Asm = "int $$0x29";
2207 Constraints = "{cx}";
2208 break;
2209 case llvm::Triple::thumb:
2210 Asm = "udf #251";
2211 Constraints = "{r0}";
2212 break;
2213 case llvm::Triple::aarch64:
2214 Asm = "brk #0xF003";
2215 Constraints = "{w0}";
2216 }
2217 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2218 llvm::InlineAsm *IA =
2219 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2220 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2221 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2222 llvm::Attribute::NoReturn);
2223 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2224 CI->setAttributes(NoReturnAttr);
2225 return CI;
2226 }
2227 }
2228 llvm_unreachable("Incorrect MSVC intrinsic!");
2229}
2230
2231namespace {
2232// ARC cleanup for __builtin_os_log_format
2233struct CallObjCArcUse final : EHScopeStack::Cleanup {
2234 CallObjCArcUse(llvm::Value *object) : object(object) {}
2235 llvm::Value *object;
2236
2237 void Emit(CodeGenFunction &CGF, Flags flags) override {
2238 CGF.EmitARCIntrinsicUse(object);
2239 }
2240};
2241}
2242
2244 BuiltinCheckKind Kind) {
2245 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2246 "Unsupported builtin check kind");
2247
2248 Value *ArgValue = EmitScalarExpr(E);
2249 if (!SanOpts.has(SanitizerKind::Builtin))
2250 return ArgValue;
2251
2252 SanitizerScope SanScope(this);
2253 Value *Cond = Builder.CreateICmpNE(
2254 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2255 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2256 SanitizerHandler::InvalidBuiltin,
2258 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2259 {});
2260 return ArgValue;
2261}
2262
2264 Value *ArgValue = EvaluateExprAsBool(E);
2265 if (!SanOpts.has(SanitizerKind::Builtin))
2266 return ArgValue;
2267
2268 SanitizerScope SanScope(this);
2269 EmitCheck(
2270 std::make_pair(ArgValue, SanitizerKind::Builtin),
2271 SanitizerHandler::InvalidBuiltin,
2273 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2274 std::nullopt);
2275 return ArgValue;
2276}
2277
2278static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2279 return CGF.Builder.CreateBinaryIntrinsic(
2280 Intrinsic::abs, ArgValue,
2281 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2282}
2283
2285 bool SanitizeOverflow) {
2286 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2287
2288 // Try to eliminate overflow check.
2289 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2290 if (!VCI->isMinSignedValue())
2291 return EmitAbs(CGF, ArgValue, true);
2292 }
2293
2294 CodeGenFunction::SanitizerScope SanScope(&CGF);
2295
2296 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2297 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2298 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2299 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2300 Value *NotOverflow = CGF.Builder.CreateNot(
2301 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2302
2303 // TODO: support -ftrapv-handler.
2304 if (SanitizeOverflow) {
2305 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2306 SanitizerHandler::NegateOverflow,
2307 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2309 {ArgValue});
2310 } else
2311 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2312
2313 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2314 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2315}
2316
2317/// Get the argument type for arguments to os_log_helper.
2319 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2320 return C.getCanonicalType(UnsignedTy);
2321}
2322
2325 CharUnits BufferAlignment) {
2326 ASTContext &Ctx = getContext();
2327
2329 {
2330 raw_svector_ostream OS(Name);
2331 OS << "__os_log_helper";
2332 OS << "_" << BufferAlignment.getQuantity();
2333 OS << "_" << int(Layout.getSummaryByte());
2334 OS << "_" << int(Layout.getNumArgsByte());
2335 for (const auto &Item : Layout.Items)
2336 OS << "_" << int(Item.getSizeByte()) << "_"
2337 << int(Item.getDescriptorByte());
2338 }
2339
2340 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2341 return F;
2342
2344 FunctionArgList Args;
2345 Args.push_back(ImplicitParamDecl::Create(
2346 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2348 ArgTys.emplace_back(Ctx.VoidPtrTy);
2349
2350 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2351 char Size = Layout.Items[I].getSizeByte();
2352 if (!Size)
2353 continue;
2354
2355 QualType ArgTy = getOSLogArgType(Ctx, Size);
2356 Args.push_back(ImplicitParamDecl::Create(
2357 Ctx, nullptr, SourceLocation(),
2358 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2360 ArgTys.emplace_back(ArgTy);
2361 }
2362
2363 QualType ReturnTy = Ctx.VoidTy;
2364
2365 // The helper function has linkonce_odr linkage to enable the linker to merge
2366 // identical functions. To ensure the merging always happens, 'noinline' is
2367 // attached to the function when compiling with -Oz.
2368 const CGFunctionInfo &FI =
2370 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2371 llvm::Function *Fn = llvm::Function::Create(
2372 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2373 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2374 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2376 Fn->setDoesNotThrow();
2377
2378 // Attach 'noinline' at -Oz.
2379 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2380 Fn->addFnAttr(llvm::Attribute::NoInline);
2381
2382 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2383 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2384
2385 // Create a scope with an artificial location for the body of this function.
2386 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2387
2388 CharUnits Offset;
2390 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2391 BufferAlignment);
2392 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2393 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2394 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2395 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2396
2397 unsigned I = 1;
2398 for (const auto &Item : Layout.Items) {
2400 Builder.getInt8(Item.getDescriptorByte()),
2401 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2403 Builder.getInt8(Item.getSizeByte()),
2404 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2405
2406 CharUnits Size = Item.size();
2407 if (!Size.getQuantity())
2408 continue;
2409
2410 Address Arg = GetAddrOfLocalVar(Args[I]);
2411 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2412 Addr = Addr.withElementType(Arg.getElementType());
2414 Offset += Size;
2415 ++I;
2416 }
2417
2419
2420 return Fn;
2421}
2422
2424 assert(E.getNumArgs() >= 2 &&
2425 "__builtin_os_log_format takes at least 2 arguments");
2426 ASTContext &Ctx = getContext();
2429 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2430 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2431
2432 // Ignore argument 1, the format string. It is not currently used.
2433 CallArgList Args;
2434 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2435
2436 for (const auto &Item : Layout.Items) {
2437 int Size = Item.getSizeByte();
2438 if (!Size)
2439 continue;
2440
2441 llvm::Value *ArgVal;
2442
2443 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2444 uint64_t Val = 0;
2445 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2446 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2447 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2448 } else if (const Expr *TheExpr = Item.getExpr()) {
2449 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2450
2451 // If a temporary object that requires destruction after the full
2452 // expression is passed, push a lifetime-extended cleanup to extend its
2453 // lifetime to the end of the enclosing block scope.
2454 auto LifetimeExtendObject = [&](const Expr *E) {
2455 E = E->IgnoreParenCasts();
2456 // Extend lifetimes of objects returned by function calls and message
2457 // sends.
2458
2459 // FIXME: We should do this in other cases in which temporaries are
2460 // created including arguments of non-ARC types (e.g., C++
2461 // temporaries).
2462 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2463 return true;
2464 return false;
2465 };
2466
2467 if (TheExpr->getType()->isObjCRetainableType() &&
2468 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2469 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2470 "Only scalar can be a ObjC retainable type");
2471 if (!isa<Constant>(ArgVal)) {
2472 CleanupKind Cleanup = getARCCleanupKind();
2473 QualType Ty = TheExpr->getType();
2475 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2476 ArgVal = EmitARCRetain(Ty, ArgVal);
2477 Builder.CreateStore(ArgVal, Addr);
2478 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2480 Cleanup & EHCleanup);
2481
2482 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2483 // argument has to be alive.
2484 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2485 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2486 }
2487 }
2488 } else {
2489 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2490 }
2491
2492 unsigned ArgValSize =
2493 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2494 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2495 ArgValSize);
2496 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2497 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2498 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2499 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2500 Args.add(RValue::get(ArgVal), ArgTy);
2501 }
2502
2503 const CGFunctionInfo &FI =
2506 Layout, BufAddr.getAlignment());
2508 return RValue::get(BufAddr, *this);
2509}
2510
2512 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2513 WidthAndSignedness ResultInfo) {
2514 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2515 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2516 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2517}
2518
2520 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2521 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2522 const clang::Expr *ResultArg, QualType ResultQTy,
2523 WidthAndSignedness ResultInfo) {
2525 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2526 "Cannot specialize this multiply");
2527
2528 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2529 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2530
2531 llvm::Value *HasOverflow;
2532 llvm::Value *Result = EmitOverflowIntrinsic(
2533 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2534
2535 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2536 // however, since the original builtin had a signed result, we need to report
2537 // an overflow when the result is greater than INT_MAX.
2538 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2539 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2540
2541 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2542 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2543
2544 bool isVolatile =
2545 ResultArg->getType()->getPointeeType().isVolatileQualified();
2546 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2547 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2548 isVolatile);
2549 return RValue::get(HasOverflow);
2550}
2551
2552/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2553static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2554 WidthAndSignedness Op1Info,
2555 WidthAndSignedness Op2Info,
2556 WidthAndSignedness ResultInfo) {
2557 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2558 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2559 Op1Info.Signed != Op2Info.Signed;
2560}
2561
2562/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2563/// the generic checked-binop irgen.
2564static RValue
2566 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2567 WidthAndSignedness Op2Info,
2568 const clang::Expr *ResultArg, QualType ResultQTy,
2569 WidthAndSignedness ResultInfo) {
2570 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2571 Op2Info, ResultInfo) &&
2572 "Not a mixed-sign multipliction we can specialize");
2573
2574 // Emit the signed and unsigned operands.
2575 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2576 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2577 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2578 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2579 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2580 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2581
2582 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2583 if (SignedOpWidth < UnsignedOpWidth)
2584 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2585 if (UnsignedOpWidth < SignedOpWidth)
2586 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2587
2588 llvm::Type *OpTy = Signed->getType();
2589 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2590 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2591 llvm::Type *ResTy = ResultPtr.getElementType();
2592 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2593
2594 // Take the absolute value of the signed operand.
2595 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2596 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2597 llvm::Value *AbsSigned =
2598 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2599
2600 // Perform a checked unsigned multiplication.
2601 llvm::Value *UnsignedOverflow;
2602 llvm::Value *UnsignedResult =
2603 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2604 Unsigned, UnsignedOverflow);
2605
2606 llvm::Value *Overflow, *Result;
2607 if (ResultInfo.Signed) {
2608 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2609 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2610 auto IntMax =
2611 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2612 llvm::Value *MaxResult =
2613 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2614 CGF.Builder.CreateZExt(IsNegative, OpTy));
2615 llvm::Value *SignedOverflow =
2616 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2617 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2618
2619 // Prepare the signed result (possibly by negating it).
2620 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2621 llvm::Value *SignedResult =
2622 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2623 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2624 } else {
2625 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2626 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2627 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2628 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2629 if (ResultInfo.Width < OpWidth) {
2630 auto IntMax =
2631 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2632 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2633 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2634 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2635 }
2636
2637 // Negate the product if it would be negative in infinite precision.
2638 Result = CGF.Builder.CreateSelect(
2639 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2640
2641 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2642 }
2643 assert(Overflow && Result && "Missing overflow or result");
2644
2645 bool isVolatile =
2646 ResultArg->getType()->getPointeeType().isVolatileQualified();
2647 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2648 isVolatile);
2649 return RValue::get(Overflow);
2650}
2651
2652static bool
2654 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2655 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2656 Ty = Ctx.getBaseElementType(Arr);
2657
2658 const auto *Record = Ty->getAsCXXRecordDecl();
2659 if (!Record)
2660 return false;
2661
2662 // We've already checked this type, or are in the process of checking it.
2663 if (!Seen.insert(Record).second)
2664 return false;
2665
2666 assert(Record->hasDefinition() &&
2667 "Incomplete types should already be diagnosed");
2668
2669 if (Record->isDynamicClass())
2670 return true;
2671
2672 for (FieldDecl *F : Record->fields()) {
2673 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2674 return true;
2675 }
2676 return false;
2677}
2678
2679/// Determine if the specified type requires laundering by checking if it is a
2680/// dynamic class type or contains a subobject which is a dynamic class type.
2682 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2683 return false;
2685 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2686}
2687
2688RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2689 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2690 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2691
2692 // The builtin's shift arg may have a different type than the source arg and
2693 // result, but the LLVM intrinsic uses the same type for all values.
2694 llvm::Type *Ty = Src->getType();
2695 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2696
2697 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2698 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2699 Function *F = CGM.getIntrinsic(IID, Ty);
2700 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2701}
2702
2703// Map math builtins for long-double to f128 version.
2704static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2705 switch (BuiltinID) {
2706#define MUTATE_LDBL(func) \
2707 case Builtin::BI__builtin_##func##l: \
2708 return Builtin::BI__builtin_##func##f128;
2739 MUTATE_LDBL(nans)
2740 MUTATE_LDBL(inf)
2759 MUTATE_LDBL(huge_val)
2769#undef MUTATE_LDBL
2770 default:
2771 return BuiltinID;
2772 }
2773}
2774
2775static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2776 Value *V) {
2777 if (CGF.Builder.getIsFPConstrained() &&
2778 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2779 if (Value *Result =
2780 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2781 return Result;
2782 }
2783 return nullptr;
2784}
2785
2787 const FunctionDecl *FD) {
2788 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2789 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2790 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2791
2793 for (auto &&FormalTy : FnTy->params())
2794 Args.push_back(llvm::PoisonValue::get(FormalTy));
2795
2796 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2797}
2798
2799RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2800 const CallExpr *E,
2801 ReturnValueSlot ReturnValue) {
2802 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2803 "Should not codegen for consteval builtins");
2804
2805 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2806 // See if we can constant fold this builtin. If so, don't emit it at all.
2807 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2810 !Result.hasSideEffects()) {
2811 if (Result.Val.isInt())
2812 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2813 Result.Val.getInt()));
2814 if (Result.Val.isFloat())
2815 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2816 Result.Val.getFloat()));
2817 }
2818
2819 // If current long-double semantics is IEEE 128-bit, replace math builtins
2820 // of long-double with f128 equivalent.
2821 // TODO: This mutation should also be applied to other targets other than PPC,
2822 // after backend supports IEEE 128-bit style libcalls.
2823 if (getTarget().getTriple().isPPC64() &&
2824 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2825 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2826
2827 // If the builtin has been declared explicitly with an assembler label,
2828 // disable the specialized emitting below. Ideally we should communicate the
2829 // rename in IR, or at least avoid generating the intrinsic calls that are
2830 // likely to get lowered to the renamed library functions.
2831 const unsigned BuiltinIDIfNoAsmLabel =
2832 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2833
2834 std::optional<bool> ErrnoOverriden;
2835 // ErrnoOverriden is true if math-errno is overriden via the
2836 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2837 // which implies math-errno.
2838 if (E->hasStoredFPFeatures()) {
2839 FPOptionsOverride OP = E->getFPFeatures();
2840 if (OP.hasMathErrnoOverride())
2841 ErrnoOverriden = OP.getMathErrnoOverride();
2842 }
2843 // True if 'attribute__((optnone))' is used. This attribute overrides
2844 // fast-math which implies math-errno.
2845 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2846
2847 // True if we are compiling at -O2 and errno has been disabled
2848 // using the '#pragma float_control(precise, off)', and
2849 // attribute opt-none hasn't been seen.
2850 bool ErrnoOverridenToFalseWithOpt =
2851 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2852 CGM.getCodeGenOpts().OptimizationLevel != 0;
2853
2854 // There are LLVM math intrinsics/instructions corresponding to math library
2855 // functions except the LLVM op will never set errno while the math library
2856 // might. Also, math builtins have the same semantics as their math library
2857 // twins. Thus, we can transform math library and builtin calls to their
2858 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2859 // In case FP exceptions are enabled, the experimental versions of the
2860 // intrinsics model those.
2861 bool ConstAlways =
2862 getContext().BuiltinInfo.isConst(BuiltinID);
2863
2864 // There's a special case with the fma builtins where they are always const
2865 // if the target environment is GNU or the target is OS is Windows and we're
2866 // targeting the MSVCRT.dll environment.
2867 // FIXME: This list can be become outdated. Need to find a way to get it some
2868 // other way.
2869 switch (BuiltinID) {
2870 case Builtin::BI__builtin_fma:
2871 case Builtin::BI__builtin_fmaf:
2872 case Builtin::BI__builtin_fmal:
2873 case Builtin::BI__builtin_fmaf16:
2874 case Builtin::BIfma:
2875 case Builtin::BIfmaf:
2876 case Builtin::BIfmal: {
2877 auto &Trip = CGM.getTriple();
2878 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2879 ConstAlways = true;
2880 break;
2881 }
2882 default:
2883 break;
2884 }
2885
2886 bool ConstWithoutErrnoAndExceptions =
2888 bool ConstWithoutExceptions =
2890
2891 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2892 // disabled.
2893 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2894 // or attributes that affect math-errno should prevent or allow math
2895 // intrincs to be generated. Intrinsics are generated:
2896 // 1- In fast math mode, unless math-errno is overriden
2897 // via '#pragma float_control(precise, on)', or via an
2898 // 'attribute__((optnone))'.
2899 // 2- If math-errno was enabled on command line but overriden
2900 // to false via '#pragma float_control(precise, off))' and
2901 // 'attribute__((optnone))' hasn't been used.
2902 // 3- If we are compiling with optimization and errno has been disabled
2903 // via '#pragma float_control(precise, off)', and
2904 // 'attribute__((optnone))' hasn't been used.
2905
2906 bool ConstWithoutErrnoOrExceptions =
2907 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2908 bool GenerateIntrinsics =
2909 (ConstAlways && !OptNone) ||
2910 (!getLangOpts().MathErrno &&
2911 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2912 if (!GenerateIntrinsics) {
2913 GenerateIntrinsics =
2914 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2915 if (!GenerateIntrinsics)
2916 GenerateIntrinsics =
2917 ConstWithoutErrnoOrExceptions &&
2918 (!getLangOpts().MathErrno &&
2919 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2920 if (!GenerateIntrinsics)
2921 GenerateIntrinsics =
2922 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2923 }
2924 if (GenerateIntrinsics) {
2925 switch (BuiltinIDIfNoAsmLabel) {
2926 case Builtin::BIacos:
2927 case Builtin::BIacosf:
2928 case Builtin::BIacosl:
2929 case Builtin::BI__builtin_acos:
2930 case Builtin::BI__builtin_acosf:
2931 case Builtin::BI__builtin_acosf16:
2932 case Builtin::BI__builtin_acosl:
2933 case Builtin::BI__builtin_acosf128:
2935 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2936
2937 case Builtin::BIasin:
2938 case Builtin::BIasinf:
2939 case Builtin::BIasinl:
2940 case Builtin::BI__builtin_asin:
2941 case Builtin::BI__builtin_asinf:
2942 case Builtin::BI__builtin_asinf16:
2943 case Builtin::BI__builtin_asinl:
2944 case Builtin::BI__builtin_asinf128:
2946 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2947
2948 case Builtin::BIatan:
2949 case Builtin::BIatanf:
2950 case Builtin::BIatanl:
2951 case Builtin::BI__builtin_atan:
2952 case Builtin::BI__builtin_atanf:
2953 case Builtin::BI__builtin_atanf16:
2954 case Builtin::BI__builtin_atanl:
2955 case Builtin::BI__builtin_atanf128:
2957 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2958
2959 case Builtin::BIatan2:
2960 case Builtin::BIatan2f:
2961 case Builtin::BIatan2l:
2962 case Builtin::BI__builtin_atan2:
2963 case Builtin::BI__builtin_atan2f:
2964 case Builtin::BI__builtin_atan2f16:
2965 case Builtin::BI__builtin_atan2l:
2966 case Builtin::BI__builtin_atan2f128:
2968 *this, E, Intrinsic::atan2,
2969 Intrinsic::experimental_constrained_atan2));
2970
2971 case Builtin::BIceil:
2972 case Builtin::BIceilf:
2973 case Builtin::BIceill:
2974 case Builtin::BI__builtin_ceil:
2975 case Builtin::BI__builtin_ceilf:
2976 case Builtin::BI__builtin_ceilf16:
2977 case Builtin::BI__builtin_ceill:
2978 case Builtin::BI__builtin_ceilf128:
2980 Intrinsic::ceil,
2981 Intrinsic::experimental_constrained_ceil));
2982
2983 case Builtin::BIcopysign:
2984 case Builtin::BIcopysignf:
2985 case Builtin::BIcopysignl:
2986 case Builtin::BI__builtin_copysign:
2987 case Builtin::BI__builtin_copysignf:
2988 case Builtin::BI__builtin_copysignf16:
2989 case Builtin::BI__builtin_copysignl:
2990 case Builtin::BI__builtin_copysignf128:
2991 return RValue::get(
2992 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2993
2994 case Builtin::BIcos:
2995 case Builtin::BIcosf:
2996 case Builtin::BIcosl:
2997 case Builtin::BI__builtin_cos:
2998 case Builtin::BI__builtin_cosf:
2999 case Builtin::BI__builtin_cosf16:
3000 case Builtin::BI__builtin_cosl:
3001 case Builtin::BI__builtin_cosf128:
3003 Intrinsic::cos,
3004 Intrinsic::experimental_constrained_cos));
3005
3006 case Builtin::BIcosh:
3007 case Builtin::BIcoshf:
3008 case Builtin::BIcoshl:
3009 case Builtin::BI__builtin_cosh:
3010 case Builtin::BI__builtin_coshf:
3011 case Builtin::BI__builtin_coshf16:
3012 case Builtin::BI__builtin_coshl:
3013 case Builtin::BI__builtin_coshf128:
3015 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
3016
3017 case Builtin::BIexp:
3018 case Builtin::BIexpf:
3019 case Builtin::BIexpl:
3020 case Builtin::BI__builtin_exp:
3021 case Builtin::BI__builtin_expf:
3022 case Builtin::BI__builtin_expf16:
3023 case Builtin::BI__builtin_expl:
3024 case Builtin::BI__builtin_expf128:
3026 Intrinsic::exp,
3027 Intrinsic::experimental_constrained_exp));
3028
3029 case Builtin::BIexp2:
3030 case Builtin::BIexp2f:
3031 case Builtin::BIexp2l:
3032 case Builtin::BI__builtin_exp2:
3033 case Builtin::BI__builtin_exp2f:
3034 case Builtin::BI__builtin_exp2f16:
3035 case Builtin::BI__builtin_exp2l:
3036 case Builtin::BI__builtin_exp2f128:
3038 Intrinsic::exp2,
3039 Intrinsic::experimental_constrained_exp2));
3040 case Builtin::BI__builtin_exp10:
3041 case Builtin::BI__builtin_exp10f:
3042 case Builtin::BI__builtin_exp10f16:
3043 case Builtin::BI__builtin_exp10l:
3044 case Builtin::BI__builtin_exp10f128: {
3045 // TODO: strictfp support
3046 if (Builder.getIsFPConstrained())
3047 break;
3048 return RValue::get(
3049 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3050 }
3051 case Builtin::BIfabs:
3052 case Builtin::BIfabsf:
3053 case Builtin::BIfabsl:
3054 case Builtin::BI__builtin_fabs:
3055 case Builtin::BI__builtin_fabsf:
3056 case Builtin::BI__builtin_fabsf16:
3057 case Builtin::BI__builtin_fabsl:
3058 case Builtin::BI__builtin_fabsf128:
3059 return RValue::get(
3060 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3061
3062 case Builtin::BIfloor:
3063 case Builtin::BIfloorf:
3064 case Builtin::BIfloorl:
3065 case Builtin::BI__builtin_floor:
3066 case Builtin::BI__builtin_floorf:
3067 case Builtin::BI__builtin_floorf16:
3068 case Builtin::BI__builtin_floorl:
3069 case Builtin::BI__builtin_floorf128:
3071 Intrinsic::floor,
3072 Intrinsic::experimental_constrained_floor));
3073
3074 case Builtin::BIfma:
3075 case Builtin::BIfmaf:
3076 case Builtin::BIfmal:
3077 case Builtin::BI__builtin_fma:
3078 case Builtin::BI__builtin_fmaf:
3079 case Builtin::BI__builtin_fmaf16:
3080 case Builtin::BI__builtin_fmal:
3081 case Builtin::BI__builtin_fmaf128:
3083 Intrinsic::fma,
3084 Intrinsic::experimental_constrained_fma));
3085
3086 case Builtin::BIfmax:
3087 case Builtin::BIfmaxf:
3088 case Builtin::BIfmaxl:
3089 case Builtin::BI__builtin_fmax:
3090 case Builtin::BI__builtin_fmaxf:
3091 case Builtin::BI__builtin_fmaxf16:
3092 case Builtin::BI__builtin_fmaxl:
3093 case Builtin::BI__builtin_fmaxf128:
3095 Intrinsic::maxnum,
3096 Intrinsic::experimental_constrained_maxnum));
3097
3098 case Builtin::BIfmin:
3099 case Builtin::BIfminf:
3100 case Builtin::BIfminl:
3101 case Builtin::BI__builtin_fmin:
3102 case Builtin::BI__builtin_fminf:
3103 case Builtin::BI__builtin_fminf16:
3104 case Builtin::BI__builtin_fminl:
3105 case Builtin::BI__builtin_fminf128:
3107 Intrinsic::minnum,
3108 Intrinsic::experimental_constrained_minnum));
3109
3110 case Builtin::BIfmaximum_num:
3111 case Builtin::BIfmaximum_numf:
3112 case Builtin::BIfmaximum_numl:
3113 case Builtin::BI__builtin_fmaximum_num:
3114 case Builtin::BI__builtin_fmaximum_numf:
3115 case Builtin::BI__builtin_fmaximum_numf16:
3116 case Builtin::BI__builtin_fmaximum_numl:
3117 case Builtin::BI__builtin_fmaximum_numf128:
3118 return RValue::get(
3119 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3120
3121 case Builtin::BIfminimum_num:
3122 case Builtin::BIfminimum_numf:
3123 case Builtin::BIfminimum_numl:
3124 case Builtin::BI__builtin_fminimum_num:
3125 case Builtin::BI__builtin_fminimum_numf:
3126 case Builtin::BI__builtin_fminimum_numf16:
3127 case Builtin::BI__builtin_fminimum_numl:
3128 case Builtin::BI__builtin_fminimum_numf128:
3129 return RValue::get(
3130 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3131
3132 // fmod() is a special-case. It maps to the frem instruction rather than an
3133 // LLVM intrinsic.
3134 case Builtin::BIfmod:
3135 case Builtin::BIfmodf:
3136 case Builtin::BIfmodl:
3137 case Builtin::BI__builtin_fmod:
3138 case Builtin::BI__builtin_fmodf:
3139 case Builtin::BI__builtin_fmodf16:
3140 case Builtin::BI__builtin_fmodl:
3141 case Builtin::BI__builtin_fmodf128:
3142 case Builtin::BI__builtin_elementwise_fmod: {
3143 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3144 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3145 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3146 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3147 }
3148
3149 case Builtin::BIlog:
3150 case Builtin::BIlogf:
3151 case Builtin::BIlogl:
3152 case Builtin::BI__builtin_log:
3153 case Builtin::BI__builtin_logf:
3154 case Builtin::BI__builtin_logf16:
3155 case Builtin::BI__builtin_logl:
3156 case Builtin::BI__builtin_logf128:
3158 Intrinsic::log,
3159 Intrinsic::experimental_constrained_log));
3160
3161 case Builtin::BIlog10:
3162 case Builtin::BIlog10f:
3163 case Builtin::BIlog10l:
3164 case Builtin::BI__builtin_log10:
3165 case Builtin::BI__builtin_log10f:
3166 case Builtin::BI__builtin_log10f16:
3167 case Builtin::BI__builtin_log10l:
3168 case Builtin::BI__builtin_log10f128:
3170 Intrinsic::log10,
3171 Intrinsic::experimental_constrained_log10));
3172
3173 case Builtin::BIlog2:
3174 case Builtin::BIlog2f:
3175 case Builtin::BIlog2l:
3176 case Builtin::BI__builtin_log2:
3177 case Builtin::BI__builtin_log2f:
3178 case Builtin::BI__builtin_log2f16:
3179 case Builtin::BI__builtin_log2l:
3180 case Builtin::BI__builtin_log2f128:
3182 Intrinsic::log2,
3183 Intrinsic::experimental_constrained_log2));
3184
3185 case Builtin::BInearbyint:
3186 case Builtin::BInearbyintf:
3187 case Builtin::BInearbyintl:
3188 case Builtin::BI__builtin_nearbyint:
3189 case Builtin::BI__builtin_nearbyintf:
3190 case Builtin::BI__builtin_nearbyintl:
3191 case Builtin::BI__builtin_nearbyintf128:
3193 Intrinsic::nearbyint,
3194 Intrinsic::experimental_constrained_nearbyint));
3195
3196 case Builtin::BIpow:
3197 case Builtin::BIpowf:
3198 case Builtin::BIpowl:
3199 case Builtin::BI__builtin_pow:
3200 case Builtin::BI__builtin_powf:
3201 case Builtin::BI__builtin_powf16:
3202 case Builtin::BI__builtin_powl:
3203 case Builtin::BI__builtin_powf128:
3205 Intrinsic::pow,
3206 Intrinsic::experimental_constrained_pow));
3207
3208 case Builtin::BIrint:
3209 case Builtin::BIrintf:
3210 case Builtin::BIrintl:
3211 case Builtin::BI__builtin_rint:
3212 case Builtin::BI__builtin_rintf:
3213 case Builtin::BI__builtin_rintf16:
3214 case Builtin::BI__builtin_rintl:
3215 case Builtin::BI__builtin_rintf128:
3217 Intrinsic::rint,
3218 Intrinsic::experimental_constrained_rint));
3219
3220 case Builtin::BIround:
3221 case Builtin::BIroundf:
3222 case Builtin::BIroundl:
3223 case Builtin::BI__builtin_round:
3224 case Builtin::BI__builtin_roundf:
3225 case Builtin::BI__builtin_roundf16:
3226 case Builtin::BI__builtin_roundl:
3227 case Builtin::BI__builtin_roundf128:
3229 Intrinsic::round,
3230 Intrinsic::experimental_constrained_round));
3231
3232 case Builtin::BIroundeven:
3233 case Builtin::BIroundevenf:
3234 case Builtin::BIroundevenl:
3235 case Builtin::BI__builtin_roundeven:
3236 case Builtin::BI__builtin_roundevenf:
3237 case Builtin::BI__builtin_roundevenf16:
3238 case Builtin::BI__builtin_roundevenl:
3239 case Builtin::BI__builtin_roundevenf128:
3241 Intrinsic::roundeven,
3242 Intrinsic::experimental_constrained_roundeven));
3243
3244 case Builtin::BIsin:
3245 case Builtin::BIsinf:
3246 case Builtin::BIsinl:
3247 case Builtin::BI__builtin_sin:
3248 case Builtin::BI__builtin_sinf:
3249 case Builtin::BI__builtin_sinf16:
3250 case Builtin::BI__builtin_sinl:
3251 case Builtin::BI__builtin_sinf128:
3253 Intrinsic::sin,
3254 Intrinsic::experimental_constrained_sin));
3255
3256 case Builtin::BIsinh:
3257 case Builtin::BIsinhf:
3258 case Builtin::BIsinhl:
3259 case Builtin::BI__builtin_sinh:
3260 case Builtin::BI__builtin_sinhf:
3261 case Builtin::BI__builtin_sinhf16:
3262 case Builtin::BI__builtin_sinhl:
3263 case Builtin::BI__builtin_sinhf128:
3265 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3266
3267 case Builtin::BI__builtin_sincos:
3268 case Builtin::BI__builtin_sincosf:
3269 case Builtin::BI__builtin_sincosf16:
3270 case Builtin::BI__builtin_sincosl:
3271 case Builtin::BI__builtin_sincosf128:
3272 emitSincosBuiltin(*this, E, Intrinsic::sincos);
3273 return RValue::get(nullptr);
3274
3275 case Builtin::BIsqrt:
3276 case Builtin::BIsqrtf:
3277 case Builtin::BIsqrtl:
3278 case Builtin::BI__builtin_sqrt:
3279 case Builtin::BI__builtin_sqrtf:
3280 case Builtin::BI__builtin_sqrtf16:
3281 case Builtin::BI__builtin_sqrtl:
3282 case Builtin::BI__builtin_sqrtf128:
3283 case Builtin::BI__builtin_elementwise_sqrt: {
3285 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3287 return RValue::get(Call);
3288 }
3289
3290 case Builtin::BItan:
3291 case Builtin::BItanf:
3292 case Builtin::BItanl:
3293 case Builtin::BI__builtin_tan:
3294 case Builtin::BI__builtin_tanf:
3295 case Builtin::BI__builtin_tanf16:
3296 case Builtin::BI__builtin_tanl:
3297 case Builtin::BI__builtin_tanf128:
3299 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3300
3301 case Builtin::BItanh:
3302 case Builtin::BItanhf:
3303 case Builtin::BItanhl:
3304 case Builtin::BI__builtin_tanh:
3305 case Builtin::BI__builtin_tanhf:
3306 case Builtin::BI__builtin_tanhf16:
3307 case Builtin::BI__builtin_tanhl:
3308 case Builtin::BI__builtin_tanhf128:
3310 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3311
3312 case Builtin::BItrunc:
3313 case Builtin::BItruncf:
3314 case Builtin::BItruncl:
3315 case Builtin::BI__builtin_trunc:
3316 case Builtin::BI__builtin_truncf:
3317 case Builtin::BI__builtin_truncf16:
3318 case Builtin::BI__builtin_truncl:
3319 case Builtin::BI__builtin_truncf128:
3321 Intrinsic::trunc,
3322 Intrinsic::experimental_constrained_trunc));
3323
3324 case Builtin::BIlround:
3325 case Builtin::BIlroundf:
3326 case Builtin::BIlroundl:
3327 case Builtin::BI__builtin_lround:
3328 case Builtin::BI__builtin_lroundf:
3329 case Builtin::BI__builtin_lroundl:
3330 case Builtin::BI__builtin_lroundf128:
3332 *this, E, Intrinsic::lround,
3333 Intrinsic::experimental_constrained_lround));
3334
3335 case Builtin::BIllround:
3336 case Builtin::BIllroundf:
3337 case Builtin::BIllroundl:
3338 case Builtin::BI__builtin_llround:
3339 case Builtin::BI__builtin_llroundf:
3340 case Builtin::BI__builtin_llroundl:
3341 case Builtin::BI__builtin_llroundf128:
3343 *this, E, Intrinsic::llround,
3344 Intrinsic::experimental_constrained_llround));
3345
3346 case Builtin::BIlrint:
3347 case Builtin::BIlrintf:
3348 case Builtin::BIlrintl:
3349 case Builtin::BI__builtin_lrint:
3350 case Builtin::BI__builtin_lrintf:
3351 case Builtin::BI__builtin_lrintl:
3352 case Builtin::BI__builtin_lrintf128:
3354 *this, E, Intrinsic::lrint,
3355 Intrinsic::experimental_constrained_lrint));
3356
3357 case Builtin::BIllrint:
3358 case Builtin::BIllrintf:
3359 case Builtin::BIllrintl:
3360 case Builtin::BI__builtin_llrint:
3361 case Builtin::BI__builtin_llrintf:
3362 case Builtin::BI__builtin_llrintl:
3363 case Builtin::BI__builtin_llrintf128:
3365 *this, E, Intrinsic::llrint,
3366 Intrinsic::experimental_constrained_llrint));
3367 case Builtin::BI__builtin_ldexp:
3368 case Builtin::BI__builtin_ldexpf:
3369 case Builtin::BI__builtin_ldexpl:
3370 case Builtin::BI__builtin_ldexpf16:
3371 case Builtin::BI__builtin_ldexpf128: {
3373 *this, E, Intrinsic::ldexp,
3374 Intrinsic::experimental_constrained_ldexp));
3375 }
3376 default:
3377 break;
3378 }
3379 }
3380
3381 // Check NonnullAttribute/NullabilityArg and Alignment.
3382 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3383 unsigned ParmNum) {
3384 Value *Val = A.emitRawPointer(*this);
3385 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3386 ParmNum);
3387
3388 if (SanOpts.has(SanitizerKind::Alignment)) {
3389 SanitizerSet SkippedChecks;
3390 SkippedChecks.set(SanitizerKind::All);
3391 SkippedChecks.clear(SanitizerKind::Alignment);
3392 SourceLocation Loc = Arg->getExprLoc();
3393 // Strip an implicit cast.
3394 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3395 if (CE->getCastKind() == CK_BitCast)
3396 Arg = CE->getSubExpr();
3397 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3398 SkippedChecks);
3399 }
3400 };
3401
3402 switch (BuiltinIDIfNoAsmLabel) {
3403 default: break;
3404 case Builtin::BI__builtin___CFStringMakeConstantString:
3405 case Builtin::BI__builtin___NSStringMakeConstantString:
3406 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3407 case Builtin::BI__builtin_stdarg_start:
3408 case Builtin::BI__builtin_va_start:
3409 case Builtin::BI__va_start:
3410 case Builtin::BI__builtin_va_end:
3411 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3412 ? EmitScalarExpr(E->getArg(0))
3413 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3414 BuiltinID != Builtin::BI__builtin_va_end);
3415 return RValue::get(nullptr);
3416 case Builtin::BI__builtin_va_copy: {
3417 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3418 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3419 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3420 {DstPtr, SrcPtr});
3421 return RValue::get(nullptr);
3422 }
3423 case Builtin::BIabs:
3424 case Builtin::BIlabs:
3425 case Builtin::BIllabs:
3426 case Builtin::BI__builtin_abs:
3427 case Builtin::BI__builtin_labs:
3428 case Builtin::BI__builtin_llabs: {
3429 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3430
3431 Value *Result;
3432 switch (getLangOpts().getSignedOverflowBehavior()) {
3434 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3435 break;
3437 if (!SanitizeOverflow) {
3438 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3439 break;
3440 }
3441 [[fallthrough]];
3443 // TODO: Somehow handle the corner case when the address of abs is taken.
3444 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3445 break;
3446 }
3447 return RValue::get(Result);
3448 }
3449 case Builtin::BI__builtin_complex: {
3450 Value *Real = EmitScalarExpr(E->getArg(0));
3451 Value *Imag = EmitScalarExpr(E->getArg(1));
3452 return RValue::getComplex({Real, Imag});
3453 }
3454 case Builtin::BI__builtin_conj:
3455 case Builtin::BI__builtin_conjf:
3456 case Builtin::BI__builtin_conjl:
3457 case Builtin::BIconj:
3458 case Builtin::BIconjf:
3459 case Builtin::BIconjl: {
3460 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3461 Value *Real = ComplexVal.first;
3462 Value *Imag = ComplexVal.second;
3463 Imag = Builder.CreateFNeg(Imag, "neg");
3464 return RValue::getComplex(std::make_pair(Real, Imag));
3465 }
3466 case Builtin::BI__builtin_creal:
3467 case Builtin::BI__builtin_crealf:
3468 case Builtin::BI__builtin_creall:
3469 case Builtin::BIcreal:
3470 case Builtin::BIcrealf:
3471 case Builtin::BIcreall: {
3472 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3473 return RValue::get(ComplexVal.first);
3474 }
3475
3476 case Builtin::BI__builtin_preserve_access_index: {
3477 // Only enabled preserved access index region when debuginfo
3478 // is available as debuginfo is needed to preserve user-level
3479 // access pattern.
3480 if (!getDebugInfo()) {
3481 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3482 return RValue::get(EmitScalarExpr(E->getArg(0)));
3483 }
3484
3485 // Nested builtin_preserve_access_index() not supported
3487 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3488 return RValue::get(EmitScalarExpr(E->getArg(0)));
3489 }
3490
3491 IsInPreservedAIRegion = true;
3492 Value *Res = EmitScalarExpr(E->getArg(0));
3493 IsInPreservedAIRegion = false;
3494 return RValue::get(Res);
3495 }
3496
3497 case Builtin::BI__builtin_cimag:
3498 case Builtin::BI__builtin_cimagf:
3499 case Builtin::BI__builtin_cimagl:
3500 case Builtin::BIcimag:
3501 case Builtin::BIcimagf:
3502 case Builtin::BIcimagl: {
3503 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3504 return RValue::get(ComplexVal.second);
3505 }
3506
3507 case Builtin::BI__builtin_clrsb:
3508 case Builtin::BI__builtin_clrsbl:
3509 case Builtin::BI__builtin_clrsbll: {
3510 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3511 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3512
3513 llvm::Type *ArgType = ArgValue->getType();
3514 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3515
3516 llvm::Type *ResultType = ConvertType(E->getType());
3517 Value *Zero = llvm::Constant::getNullValue(ArgType);
3518 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3519 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3520 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3521 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3522 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3523 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3524 "cast");
3525 return RValue::get(Result);
3526 }
3527 case Builtin::BI__builtin_ctzs:
3528 case Builtin::BI__builtin_ctz:
3529 case Builtin::BI__builtin_ctzl:
3530 case Builtin::BI__builtin_ctzll:
3531 case Builtin::BI__builtin_ctzg: {
3532 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3533 E->getNumArgs() > 1;
3534
3535 Value *ArgValue =
3536 HasFallback ? EmitScalarExpr(E->getArg(0))
3538
3539 llvm::Type *ArgType = ArgValue->getType();
3540 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3541
3542 llvm::Type *ResultType = ConvertType(E->getType());
3543 Value *ZeroUndef =
3544 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3545 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3546 if (Result->getType() != ResultType)
3547 Result =
3548 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3549 if (!HasFallback)
3550 return RValue::get(Result);
3551
3552 Value *Zero = Constant::getNullValue(ArgType);
3553 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3554 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3555 Value *ResultOrFallback =
3556 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3557 return RValue::get(ResultOrFallback);
3558 }
3559 case Builtin::BI__builtin_clzs:
3560 case Builtin::BI__builtin_clz:
3561 case Builtin::BI__builtin_clzl:
3562 case Builtin::BI__builtin_clzll:
3563 case Builtin::BI__builtin_clzg: {
3564 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3565 E->getNumArgs() > 1;
3566
3567 Value *ArgValue =
3568 HasFallback ? EmitScalarExpr(E->getArg(0))
3570
3571 llvm::Type *ArgType = ArgValue->getType();
3572 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3573
3574 llvm::Type *ResultType = ConvertType(E->getType());
3575 Value *ZeroUndef =
3576 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3577 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3578 if (Result->getType() != ResultType)
3579 Result =
3580 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3581 if (!HasFallback)
3582 return RValue::get(Result);
3583
3584 Value *Zero = Constant::getNullValue(ArgType);
3585 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3586 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3587 Value *ResultOrFallback =
3588 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3589 return RValue::get(ResultOrFallback);
3590 }
3591 case Builtin::BI__builtin_ffs:
3592 case Builtin::BI__builtin_ffsl:
3593 case Builtin::BI__builtin_ffsll: {
3594 // ffs(x) -> x ? cttz(x) + 1 : 0
3595 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3596
3597 llvm::Type *ArgType = ArgValue->getType();
3598 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3599
3600 llvm::Type *ResultType = ConvertType(E->getType());
3601 Value *Tmp =
3602 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3603 llvm::ConstantInt::get(ArgType, 1));
3604 Value *Zero = llvm::Constant::getNullValue(ArgType);
3605 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3606 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3607 if (Result->getType() != ResultType)
3608 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3609 "cast");
3610 return RValue::get(Result);
3611 }
3612 case Builtin::BI__builtin_parity:
3613 case Builtin::BI__builtin_parityl:
3614 case Builtin::BI__builtin_parityll: {
3615 // parity(x) -> ctpop(x) & 1
3616 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3617
3618 llvm::Type *ArgType = ArgValue->getType();
3619 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3620
3621 llvm::Type *ResultType = ConvertType(E->getType());
3622 Value *Tmp = Builder.CreateCall(F, ArgValue);
3623 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3624 if (Result->getType() != ResultType)
3625 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3626 "cast");
3627 return RValue::get(Result);
3628 }
3629 case Builtin::BI__lzcnt16:
3630 case Builtin::BI__lzcnt:
3631 case Builtin::BI__lzcnt64: {
3632 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3633
3634 llvm::Type *ArgType = ArgValue->getType();
3635 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3636
3637 llvm::Type *ResultType = ConvertType(E->getType());
3638 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3639 if (Result->getType() != ResultType)
3640 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3641 "cast");
3642 return RValue::get(Result);
3643 }
3644 case Builtin::BI__popcnt16:
3645 case Builtin::BI__popcnt:
3646 case Builtin::BI__popcnt64:
3647 case Builtin::BI__builtin_popcount:
3648 case Builtin::BI__builtin_popcountl:
3649 case Builtin::BI__builtin_popcountll:
3650 case Builtin::BI__builtin_popcountg: {
3651 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3652
3653 llvm::Type *ArgType = ArgValue->getType();
3654 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3655
3656 llvm::Type *ResultType = ConvertType(E->getType());
3657 Value *Result = Builder.CreateCall(F, ArgValue);
3658 if (Result->getType() != ResultType)
3659 Result =
3660 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3661 return RValue::get(Result);
3662 }
3663 case Builtin::BI__builtin_unpredictable: {
3664 // Always return the argument of __builtin_unpredictable. LLVM does not
3665 // handle this builtin. Metadata for this builtin should be added directly
3666 // to instructions such as branches or switches that use it.
3667 return RValue::get(EmitScalarExpr(E->getArg(0)));
3668 }
3669 case Builtin::BI__builtin_expect: {
3670 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3671 llvm::Type *ArgType = ArgValue->getType();
3672
3673 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3674 // Don't generate llvm.expect on -O0 as the backend won't use it for
3675 // anything.
3676 // Note, we still IRGen ExpectedValue because it could have side-effects.
3677 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3678 return RValue::get(ArgValue);
3679
3680 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3681 Value *Result =
3682 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3683 return RValue::get(Result);
3684 }
3685 case Builtin::BI__builtin_expect_with_probability: {
3686 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3687 llvm::Type *ArgType = ArgValue->getType();
3688
3689 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3690 llvm::APFloat Probability(0.0);
3691 const Expr *ProbArg = E->getArg(2);
3692 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3693 assert(EvalSucceed && "probability should be able to evaluate as float");
3694 (void)EvalSucceed;
3695 bool LoseInfo = false;
3696 Probability.convert(llvm::APFloat::IEEEdouble(),
3697 llvm::RoundingMode::Dynamic, &LoseInfo);
3698 llvm::Type *Ty = ConvertType(ProbArg->getType());
3699 Constant *Confidence = ConstantFP::get(Ty, Probability);
3700 // Don't generate llvm.expect.with.probability on -O0 as the backend
3701 // won't use it for anything.
3702 // Note, we still IRGen ExpectedValue because it could have side-effects.
3703 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3704 return RValue::get(ArgValue);
3705
3706 Function *FnExpect =
3707 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3708 Value *Result = Builder.CreateCall(
3709 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3710 return RValue::get(Result);
3711 }
3712 case Builtin::BI__builtin_assume_aligned: {
3713 const Expr *Ptr = E->getArg(0);
3714 Value *PtrValue = EmitScalarExpr(Ptr);
3715 Value *OffsetValue =
3716 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3717
3718 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3719 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3720 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3721 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3722 llvm::Value::MaximumAlignment);
3723
3724 emitAlignmentAssumption(PtrValue, Ptr,
3725 /*The expr loc is sufficient.*/ SourceLocation(),
3726 AlignmentCI, OffsetValue);
3727 return RValue::get(PtrValue);
3728 }
3729 case Builtin::BI__assume:
3730 case Builtin::BI__builtin_assume: {
3731 if (E->getArg(0)->HasSideEffects(getContext()))
3732 return RValue::get(nullptr);
3733
3734 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3735 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3736 Builder.CreateCall(FnAssume, ArgValue);
3737 return RValue::get(nullptr);
3738 }
3739 case Builtin::BI__builtin_assume_separate_storage: {
3740 const Expr *Arg0 = E->getArg(0);
3741 const Expr *Arg1 = E->getArg(1);
3742
3743 Value *Value0 = EmitScalarExpr(Arg0);
3744 Value *Value1 = EmitScalarExpr(Arg1);
3745
3746 Value *Values[] = {Value0, Value1};
3747 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3748 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3749 return RValue::get(nullptr);
3750 }
3751 case Builtin::BI__builtin_allow_runtime_check: {
3752 StringRef Kind =
3753 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3754 LLVMContext &Ctx = CGM.getLLVMContext();
3755 llvm::Value *Allow = Builder.CreateCall(
3756 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3757 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3758 return RValue::get(Allow);
3759 }
3760 case Builtin::BI__arithmetic_fence: {
3761 // Create the builtin call if FastMath is selected, and the target
3762 // supports the builtin, otherwise just return the argument.
3763 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3764 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3765 bool isArithmeticFenceEnabled =
3766 FMF.allowReassoc() &&
3768 QualType ArgType = E->getArg(0)->getType();
3769 if (ArgType->isComplexType()) {
3770 if (isArithmeticFenceEnabled) {
3771 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3772 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3773 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3774 ConvertType(ElementType));
3775 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3776 ConvertType(ElementType));
3777 return RValue::getComplex(std::make_pair(Real, Imag));
3778 }
3779 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3780 Value *Real = ComplexVal.first;
3781 Value *Imag = ComplexVal.second;
3782 return RValue::getComplex(std::make_pair(Real, Imag));
3783 }
3784 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3785 if (isArithmeticFenceEnabled)
3786 return RValue::get(
3787 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3788 return RValue::get(ArgValue);
3789 }
3790 case Builtin::BI__builtin_bswap16:
3791 case Builtin::BI__builtin_bswap32:
3792 case Builtin::BI__builtin_bswap64:
3793 case Builtin::BI_byteswap_ushort:
3794 case Builtin::BI_byteswap_ulong:
3795 case Builtin::BI_byteswap_uint64: {
3796 return RValue::get(
3797 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3798 }
3799 case Builtin::BI__builtin_bitreverse8:
3800 case Builtin::BI__builtin_bitreverse16:
3801 case Builtin::BI__builtin_bitreverse32:
3802 case Builtin::BI__builtin_bitreverse64: {
3803 return RValue::get(
3804 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3805 }
3806 case Builtin::BI__builtin_rotateleft8:
3807 case Builtin::BI__builtin_rotateleft16:
3808 case Builtin::BI__builtin_rotateleft32:
3809 case Builtin::BI__builtin_rotateleft64:
3810 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3811 case Builtin::BI_rotl16:
3812 case Builtin::BI_rotl:
3813 case Builtin::BI_lrotl:
3814 case Builtin::BI_rotl64:
3815 return emitRotate(E, false);
3816
3817 case Builtin::BI__builtin_rotateright8:
3818 case Builtin::BI__builtin_rotateright16:
3819 case Builtin::BI__builtin_rotateright32:
3820 case Builtin::BI__builtin_rotateright64:
3821 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3822 case Builtin::BI_rotr16:
3823 case Builtin::BI_rotr:
3824 case Builtin::BI_lrotr:
3825 case Builtin::BI_rotr64:
3826 return emitRotate(E, true);
3827
3828 case Builtin::BI__builtin_constant_p: {
3829 llvm::Type *ResultType = ConvertType(E->getType());
3830
3831 const Expr *Arg = E->getArg(0);
3832 QualType ArgType = Arg->getType();
3833 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3834 // and likely a mistake.
3835 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3836 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3837 // Per the GCC documentation, only numeric constants are recognized after
3838 // inlining.
3839 return RValue::get(ConstantInt::get(ResultType, 0));
3840
3841 if (Arg->HasSideEffects(getContext()))
3842 // The argument is unevaluated, so be conservative if it might have
3843 // side-effects.
3844 return RValue::get(ConstantInt::get(ResultType, 0));
3845
3846 Value *ArgValue = EmitScalarExpr(Arg);
3847 if (ArgType->isObjCObjectPointerType()) {
3848 // Convert Objective-C objects to id because we cannot distinguish between
3849 // LLVM types for Obj-C classes as they are opaque.
3850 ArgType = CGM.getContext().getObjCIdType();
3851 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3852 }
3853 Function *F =
3854 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3855 Value *Result = Builder.CreateCall(F, ArgValue);
3856 if (Result->getType() != ResultType)
3857 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3858 return RValue::get(Result);
3859 }
3860 case Builtin::BI__builtin_dynamic_object_size:
3861 case Builtin::BI__builtin_object_size: {
3862 unsigned Type =
3863 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3864 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3865
3866 // We pass this builtin onto the optimizer so that it can figure out the
3867 // object size in more complex cases.
3868 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3869 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3870 /*EmittedE=*/nullptr, IsDynamic));
3871 }
3872 case Builtin::BI__builtin_counted_by_ref: {
3873 // Default to returning '(void *) 0'.
3874 llvm::Value *Result = llvm::ConstantPointerNull::get(
3875 llvm::PointerType::getUnqual(getLLVMContext()));
3876
3877 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3878
3879 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3880 UO && UO->getOpcode() == UO_AddrOf) {
3881 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3882
3883 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3884 Arg = ASE->getBase()->IgnoreParenImpCasts();
3885 }
3886
3887 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3888 if (auto *CATy =
3889 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3890 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3891 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3892 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3893 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3894 else
3895 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3896 }
3897 }
3898
3899 return RValue::get(Result);
3900 }
3901 case Builtin::BI__builtin_prefetch: {
3902 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3903 // FIXME: Technically these constants should of type 'int', yes?
3904 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3905 llvm::ConstantInt::get(Int32Ty, 0);
3906 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3907 llvm::ConstantInt::get(Int32Ty, 3);
3908 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3909 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3910 Builder.CreateCall(F, {Address, RW, Locality, Data});
3911 return RValue::get(nullptr);
3912 }
3913 case Builtin::BI__builtin_readcyclecounter: {
3914 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3915 return RValue::get(Builder.CreateCall(F));
3916 }
3917 case Builtin::BI__builtin_readsteadycounter: {
3918 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3919 return RValue::get(Builder.CreateCall(F));
3920 }
3921 case Builtin::BI__builtin___clear_cache: {
3922 Value *Begin = EmitScalarExpr(E->getArg(0));
3923 Value *End = EmitScalarExpr(E->getArg(1));
3924 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3925 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3926 }
3927 case Builtin::BI__builtin_trap:
3928 EmitTrapCall(Intrinsic::trap);
3929 return RValue::get(nullptr);
3930 case Builtin::BI__builtin_verbose_trap: {
3931 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3932 if (getDebugInfo()) {
3933 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3934 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3935 *E->getArg(1)->tryEvaluateString(getContext()));
3936 }
3937 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3938 // Currently no attempt is made to prevent traps from being merged.
3939 EmitTrapCall(Intrinsic::trap);
3940 return RValue::get(nullptr);
3941 }
3942 case Builtin::BI__debugbreak:
3943 EmitTrapCall(Intrinsic::debugtrap);
3944 return RValue::get(nullptr);
3945 case Builtin::BI__builtin_unreachable: {
3947
3948 // We do need to preserve an insertion point.
3949 EmitBlock(createBasicBlock("unreachable.cont"));
3950
3951 return RValue::get(nullptr);
3952 }
3953
3954 case Builtin::BI__builtin_powi:
3955 case Builtin::BI__builtin_powif:
3956 case Builtin::BI__builtin_powil: {
3957 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3958 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3959
3960 if (Builder.getIsFPConstrained()) {
3961 // FIXME: llvm.powi has 2 mangling types,
3962 // llvm.experimental.constrained.powi has one.
3963 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3964 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3965 Src0->getType());
3966 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3967 }
3968
3969 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3970 { Src0->getType(), Src1->getType() });
3971 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3972 }
3973 case Builtin::BI__builtin_frexpl: {
3974 // Linux PPC will not be adding additional PPCDoubleDouble support.
3975 // WIP to switch default to IEEE long double. Will emit libcall for
3976 // frexpl instead of legalizing this type in the BE.
3977 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3978 break;
3979 [[fallthrough]];
3980 }
3981 case Builtin::BI__builtin_frexp:
3982 case Builtin::BI__builtin_frexpf:
3983 case Builtin::BI__builtin_frexpf128:
3984 case Builtin::BI__builtin_frexpf16:
3985 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3986 case Builtin::BI__builtin_isgreater:
3987 case Builtin::BI__builtin_isgreaterequal:
3988 case Builtin::BI__builtin_isless:
3989 case Builtin::BI__builtin_islessequal:
3990 case Builtin::BI__builtin_islessgreater:
3991 case Builtin::BI__builtin_isunordered: {
3992 // Ordered comparisons: we know the arguments to these are matching scalar
3993 // floating point values.
3994 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3995 Value *LHS = EmitScalarExpr(E->getArg(0));
3996 Value *RHS = EmitScalarExpr(E->getArg(1));
3997
3998 switch (BuiltinID) {
3999 default: llvm_unreachable("Unknown ordered comparison");
4000 case Builtin::BI__builtin_isgreater:
4001 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
4002 break;
4003 case Builtin::BI__builtin_isgreaterequal:
4004 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
4005 break;
4006 case Builtin::BI__builtin_isless:
4007 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
4008 break;
4009 case Builtin::BI__builtin_islessequal:
4010 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
4011 break;
4012 case Builtin::BI__builtin_islessgreater:
4013 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
4014 break;
4015 case Builtin::BI__builtin_isunordered:
4016 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
4017 break;
4018 }
4019 // ZExt bool to int type.
4020 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
4021 }
4022
4023 case Builtin::BI__builtin_isnan: {
4024 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4025 Value *V = EmitScalarExpr(E->getArg(0));
4026 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4027 return RValue::get(Result);
4028 return RValue::get(
4029 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
4030 ConvertType(E->getType())));
4031 }
4032
4033 case Builtin::BI__builtin_issignaling: {
4034 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4035 Value *V = EmitScalarExpr(E->getArg(0));
4036 return RValue::get(
4037 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
4038 ConvertType(E->getType())));
4039 }
4040
4041 case Builtin::BI__builtin_isinf: {
4042 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4043 Value *V = EmitScalarExpr(E->getArg(0));
4044 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4045 return RValue::get(Result);
4046 return RValue::get(
4047 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4048 ConvertType(E->getType())));
4049 }
4050
4051 case Builtin::BIfinite:
4052 case Builtin::BI__finite:
4053 case Builtin::BIfinitef:
4054 case Builtin::BI__finitef:
4055 case Builtin::BIfinitel:
4056 case Builtin::BI__finitel:
4057 case Builtin::BI__builtin_isfinite: {
4058 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4059 Value *V = EmitScalarExpr(E->getArg(0));
4060 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4061 return RValue::get(Result);
4062 return RValue::get(
4063 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4064 ConvertType(E->getType())));
4065 }
4066
4067 case Builtin::BI__builtin_isnormal: {
4068 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4069 Value *V = EmitScalarExpr(E->getArg(0));
4070 return RValue::get(
4071 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4072 ConvertType(E->getType())));
4073 }
4074
4075 case Builtin::BI__builtin_issubnormal: {
4076 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4077 Value *V = EmitScalarExpr(E->getArg(0));
4078 return RValue::get(
4079 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4080 ConvertType(E->getType())));
4081 }
4082
4083 case Builtin::BI__builtin_iszero: {
4084 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4085 Value *V = EmitScalarExpr(E->getArg(0));
4086 return RValue::get(
4087 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4088 ConvertType(E->getType())));
4089 }
4090
4091 case Builtin::BI__builtin_isfpclass: {
4093 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4094 break;
4095 uint64_t Test = Result.Val.getInt().getLimitedValue();
4096 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4097 Value *V = EmitScalarExpr(E->getArg(0));
4098 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4099 ConvertType(E->getType())));
4100 }
4101
4102 case Builtin::BI__builtin_nondeterministic_value: {
4103 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4104
4105 Value *Result = PoisonValue::get(Ty);
4106 Result = Builder.CreateFreeze(Result);
4107
4108 return RValue::get(Result);
4109 }
4110
4111 case Builtin::BI__builtin_elementwise_abs: {
4112 Value *Result;
4113 QualType QT = E->getArg(0)->getType();
4114
4115 if (auto *VecTy = QT->getAs<VectorType>())
4116 QT = VecTy->getElementType();
4117 if (QT->isIntegerType())
4118 Result = Builder.CreateBinaryIntrinsic(
4119 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4120 Builder.getFalse(), nullptr, "elt.abs");
4121 else
4122 Result = emitBuiltinWithOneOverloadedType<1>(
4123 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4124
4125 return RValue::get(Result);
4126 }
4127 case Builtin::BI__builtin_elementwise_acos:
4128 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4129 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4130 case Builtin::BI__builtin_elementwise_asin:
4131 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4132 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4133 case Builtin::BI__builtin_elementwise_atan:
4134 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4135 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4136 case Builtin::BI__builtin_elementwise_atan2:
4137 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4138 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4139 case Builtin::BI__builtin_elementwise_ceil:
4140 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4141 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4142 case Builtin::BI__builtin_elementwise_exp:
4143 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4144 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4145 case Builtin::BI__builtin_elementwise_exp2:
4146 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4147 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4148 case Builtin::BI__builtin_elementwise_log:
4149 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4150 *this, E, llvm::Intrinsic::log, "elt.log"));
4151 case Builtin::BI__builtin_elementwise_log2:
4152 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4153 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4154 case Builtin::BI__builtin_elementwise_log10:
4155 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4156 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4157 case Builtin::BI__builtin_elementwise_pow: {
4158 return RValue::get(
4159 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4160 }
4161 case Builtin::BI__builtin_elementwise_bitreverse:
4162 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4163 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4164 case Builtin::BI__builtin_elementwise_cos:
4165 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4166 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4167 case Builtin::BI__builtin_elementwise_cosh:
4168 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4169 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4170 case Builtin::BI__builtin_elementwise_floor:
4171 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4172 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4173 case Builtin::BI__builtin_elementwise_popcount:
4174 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4175 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4176 case Builtin::BI__builtin_elementwise_roundeven:
4177 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4178 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4179 case Builtin::BI__builtin_elementwise_round:
4180 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4181 *this, E, llvm::Intrinsic::round, "elt.round"));
4182 case Builtin::BI__builtin_elementwise_rint:
4183 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4184 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4185 case Builtin::BI__builtin_elementwise_nearbyint:
4186 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4187 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4188 case Builtin::BI__builtin_elementwise_sin:
4189 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4190 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4191 case Builtin::BI__builtin_elementwise_sinh:
4192 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4193 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4194 case Builtin::BI__builtin_elementwise_tan:
4195 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4196 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4197 case Builtin::BI__builtin_elementwise_tanh:
4198 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4199 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4200 case Builtin::BI__builtin_elementwise_trunc:
4201 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4202 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4203 case Builtin::BI__builtin_elementwise_canonicalize:
4204 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4205 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4206 case Builtin::BI__builtin_elementwise_copysign:
4207 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4208 *this, E, llvm::Intrinsic::copysign));
4209 case Builtin::BI__builtin_elementwise_fma:
4210 return RValue::get(
4211 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4212 case Builtin::BI__builtin_elementwise_add_sat:
4213 case Builtin::BI__builtin_elementwise_sub_sat: {
4214 Value *Op0 = EmitScalarExpr(E->getArg(0));
4215 Value *Op1 = EmitScalarExpr(E->getArg(1));
4216 Value *Result;
4217 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4218 QualType Ty = E->getArg(0)->getType();
4219 if (auto *VecTy = Ty->getAs<VectorType>())
4220 Ty = VecTy->getElementType();
4221 bool IsSigned = Ty->isSignedIntegerType();
4222 unsigned Opc;
4223 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4224 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4225 else
4226 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4227 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4228 return RValue::get(Result);
4229 }
4230
4231 case Builtin::BI__builtin_elementwise_max: {
4232 Value *Op0 = EmitScalarExpr(E->getArg(0));
4233 Value *Op1 = EmitScalarExpr(E->getArg(1));
4234 Value *Result;
4235 if (Op0->getType()->isIntOrIntVectorTy()) {
4236 QualType Ty = E->getArg(0)->getType();
4237 if (auto *VecTy = Ty->getAs<VectorType>())
4238 Ty = VecTy->getElementType();
4239 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4240 ? llvm::Intrinsic::smax
4241 : llvm::Intrinsic::umax,
4242 Op0, Op1, nullptr, "elt.max");
4243 } else
4244 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4245 return RValue::get(Result);
4246 }
4247 case Builtin::BI__builtin_elementwise_min: {
4248 Value *Op0 = EmitScalarExpr(E->getArg(0));
4249 Value *Op1 = EmitScalarExpr(E->getArg(1));
4250 Value *Result;
4251 if (Op0->getType()->isIntOrIntVectorTy()) {
4252 QualType Ty = E->getArg(0)->getType();
4253 if (auto *VecTy = Ty->getAs<VectorType>())
4254 Ty = VecTy->getElementType();
4255 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4256 ? llvm::Intrinsic::smin
4257 : llvm::Intrinsic::umin,
4258 Op0, Op1, nullptr, "elt.min");
4259 } else
4260 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4261 return RValue::get(Result);
4262 }
4263
4264 case Builtin::BI__builtin_elementwise_maximum: {
4265 Value *Op0 = EmitScalarExpr(E->getArg(0));
4266 Value *Op1 = EmitScalarExpr(E->getArg(1));
4267 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4268 Op1, nullptr, "elt.maximum");
4269 return RValue::get(Result);
4270 }
4271
4272 case Builtin::BI__builtin_elementwise_minimum: {
4273 Value *Op0 = EmitScalarExpr(E->getArg(0));
4274 Value *Op1 = EmitScalarExpr(E->getArg(1));
4275 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4276 Op1, nullptr, "elt.minimum");
4277 return RValue::get(Result);
4278 }
4279
4280 case Builtin::BI__builtin_reduce_max: {
4281 auto GetIntrinsicID = [this](QualType QT) {
4282 if (auto *VecTy = QT->getAs<VectorType>())
4283 QT = VecTy->getElementType();
4284 else if (QT->isSizelessVectorType())
4286
4287 if (QT->isSignedIntegerType())
4288 return llvm::Intrinsic::vector_reduce_smax;
4289 if (QT->isUnsignedIntegerType())
4290 return llvm::Intrinsic::vector_reduce_umax;
4291 assert(QT->isFloatingType() && "must have a float here");
4292 return llvm::Intrinsic::vector_reduce_fmax;
4293 };
4294 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4295 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4296 }
4297
4298 case Builtin::BI__builtin_reduce_min: {
4299 auto GetIntrinsicID = [this](QualType QT) {
4300 if (auto *VecTy = QT->getAs<VectorType>())
4301 QT = VecTy->getElementType();
4302 else if (QT->isSizelessVectorType())
4304
4305 if (QT->isSignedIntegerType())
4306 return llvm::Intrinsic::vector_reduce_smin;
4307 if (QT->isUnsignedIntegerType())
4308 return llvm::Intrinsic::vector_reduce_umin;
4309 assert(QT->isFloatingType() && "must have a float here");
4310 return llvm::Intrinsic::vector_reduce_fmin;
4311 };
4312
4313 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4314 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4315 }
4316
4317 case Builtin::BI__builtin_reduce_add:
4318 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4319 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4320 case Builtin::BI__builtin_reduce_mul:
4321 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4322 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4323 case Builtin::BI__builtin_reduce_xor:
4324 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4325 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4326 case Builtin::BI__builtin_reduce_or:
4327 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4328 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4329 case Builtin::BI__builtin_reduce_and:
4330 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4331 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4332 case Builtin::BI__builtin_reduce_maximum:
4333 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4334 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4335 case Builtin::BI__builtin_reduce_minimum:
4336 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4337 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4338
4339 case Builtin::BI__builtin_matrix_transpose: {
4340 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4341 Value *MatValue = EmitScalarExpr(E->getArg(0));
4342 MatrixBuilder MB(Builder);
4343 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4344 MatrixTy->getNumColumns());
4345 return RValue::get(Result);
4346 }
4347
4348 case Builtin::BI__builtin_matrix_column_major_load: {
4349 MatrixBuilder MB(Builder);
4350 // Emit everything that isn't dependent on the first parameter type
4351 Value *Stride = EmitScalarExpr(E->getArg(3));
4352 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4353 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4354 assert(PtrTy && "arg0 must be of pointer type");
4355 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4356
4357 Address Src = EmitPointerWithAlignment(E->getArg(0));
4359 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4360 0);
4361 Value *Result = MB.CreateColumnMajorLoad(
4362 Src.getElementType(), Src.emitRawPointer(*this),
4363 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4364 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4365 return RValue::get(Result);
4366 }
4367
4368 case Builtin::BI__builtin_matrix_column_major_store: {
4369 MatrixBuilder MB(Builder);
4370 Value *Matrix = EmitScalarExpr(E->getArg(0));
4371 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4372 Value *Stride = EmitScalarExpr(E->getArg(2));
4373
4374 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4375 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4376 assert(PtrTy && "arg1 must be of pointer type");
4377 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4378
4380 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4381 0);
4382 Value *Result = MB.CreateColumnMajorStore(
4383 Matrix, Dst.emitRawPointer(*this),
4384 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4385 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4386 return RValue::get(Result);
4387 }
4388
4389 case Builtin::BI__builtin_isinf_sign: {
4390 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4391 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4392 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4393 Value *Arg = EmitScalarExpr(E->getArg(0));
4394 Value *AbsArg = EmitFAbs(*this, Arg);
4395 Value *IsInf = Builder.CreateFCmpOEQ(
4396 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4397 Value *IsNeg = EmitSignBit(*this, Arg);
4398
4399 llvm::Type *IntTy = ConvertType(E->getType());
4400 Value *Zero = Constant::getNullValue(IntTy);
4401 Value *One = ConstantInt::get(IntTy, 1);
4402 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4403 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4404 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4405 return RValue::get(Result);
4406 }
4407
4408 case Builtin::BI__builtin_flt_rounds: {
4409 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4410
4411 llvm::Type *ResultType = ConvertType(E->getType());
4412 Value *Result = Builder.CreateCall(F);
4413 if (Result->getType() != ResultType)
4414 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4415 "cast");
4416 return RValue::get(Result);
4417 }
4418
4419 case Builtin::BI__builtin_set_flt_rounds: {
4420 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4421
4422 Value *V = EmitScalarExpr(E->getArg(0));
4423 Builder.CreateCall(F, V);
4424 return RValue::get(nullptr);
4425 }
4426
4427 case Builtin::BI__builtin_fpclassify: {
4428 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4429 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4430 Value *V = EmitScalarExpr(E->getArg(5));
4431 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4432
4433 // Create Result
4434 BasicBlock *Begin = Builder.GetInsertBlock();
4435 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4436 Builder.SetInsertPoint(End);
4437 PHINode *Result =
4438 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4439 "fpclassify_result");
4440
4441 // if (V==0) return FP_ZERO
4442 Builder.SetInsertPoint(Begin);
4443 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4444 "iszero");
4445 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4446 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4447 Builder.CreateCondBr(IsZero, End, NotZero);
4448 Result->addIncoming(ZeroLiteral, Begin);
4449
4450 // if (V != V) return FP_NAN
4451 Builder.SetInsertPoint(NotZero);
4452 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4453 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4454 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4455 Builder.CreateCondBr(IsNan, End, NotNan);
4456 Result->addIncoming(NanLiteral, NotZero);
4457
4458 // if (fabs(V) == infinity) return FP_INFINITY
4459 Builder.SetInsertPoint(NotNan);
4460 Value *VAbs = EmitFAbs(*this, V);
4461 Value *IsInf =
4462 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4463 "isinf");
4464 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4465 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4466 Builder.CreateCondBr(IsInf, End, NotInf);
4467 Result->addIncoming(InfLiteral, NotNan);
4468
4469 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4470 Builder.SetInsertPoint(NotInf);
4471 APFloat Smallest = APFloat::getSmallestNormalized(
4472 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4473 Value *IsNormal =
4474 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4475 "isnormal");
4476 Value *NormalResult =
4477 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4478 EmitScalarExpr(E->getArg(3)));
4479 Builder.CreateBr(End);
4480 Result->addIncoming(NormalResult, NotInf);
4481
4482 // return Result
4483 Builder.SetInsertPoint(End);
4484 return RValue::get(Result);
4485 }
4486
4487 // An alloca will always return a pointer to the alloca (stack) address
4488 // space. This address space need not be the same as the AST / Language
4489 // default (e.g. in C / C++ auto vars are in the generic address space). At
4490 // the AST level this is handled within CreateTempAlloca et al., but for the
4491 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4492 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4493 case Builtin::BIalloca:
4494 case Builtin::BI_alloca:
4495 case Builtin::BI__builtin_alloca_uninitialized:
4496 case Builtin::BI__builtin_alloca: {
4497 Value *Size = EmitScalarExpr(E->getArg(0));
4498 const TargetInfo &TI = getContext().getTargetInfo();
4499 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4500 const Align SuitableAlignmentInBytes =
4501 CGM.getContext()
4503 .getAsAlign();
4504 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4505 AI->setAlignment(SuitableAlignmentInBytes);
4506 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4507 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4510 if (AAS != EAS) {
4511 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4512 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4513 EAS, Ty));
4514 }
4515 return RValue::get(AI);
4516 }
4517
4518 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4519 case Builtin::BI__builtin_alloca_with_align: {
4520 Value *Size = EmitScalarExpr(E->getArg(0));
4521 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4522 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4523 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4524 const Align AlignmentInBytes =
4525 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4526 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4527 AI->setAlignment(AlignmentInBytes);
4528 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4529 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4532 if (AAS != EAS) {
4533 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4534 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4535 EAS, Ty));
4536 }
4537 return RValue::get(AI);
4538 }
4539
4540 case Builtin::BIbzero:
4541 case Builtin::BI__builtin_bzero: {
4542 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4543 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4544 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4545 E->getArg(0)->getExprLoc(), FD, 0);
4546 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4547 return RValue::get(nullptr);
4548 }
4549
4550 case Builtin::BIbcopy:
4551 case Builtin::BI__builtin_bcopy: {
4552 Address Src = EmitPointerWithAlignment(E->getArg(0));
4553 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4554 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4556 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4557 0);
4559 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4560 0);
4561 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4562 return RValue::get(nullptr);
4563 }
4564
4565 case Builtin::BImemcpy:
4566 case Builtin::BI__builtin_memcpy:
4567 case Builtin::BImempcpy:
4568 case Builtin::BI__builtin_mempcpy: {
4569 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4570 Address Src = EmitPointerWithAlignment(E->getArg(1));
4571 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4572 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4573 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4574 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4575 if (BuiltinID == Builtin::BImempcpy ||
4576 BuiltinID == Builtin::BI__builtin_mempcpy)
4578 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4579 else
4580 return RValue::get(Dest, *this);
4581 }
4582
4583 case Builtin::BI__builtin_memcpy_inline: {
4584 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4585 Address Src = EmitPointerWithAlignment(E->getArg(1));
4586 uint64_t Size =
4587 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4588 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4589 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4590 Builder.CreateMemCpyInline(Dest, Src, Size);
4591 return RValue::get(nullptr);
4592 }
4593
4594 case Builtin::BI__builtin_char_memchr:
4595 BuiltinID = Builtin::BI__builtin_memchr;
4596 break;
4597
4598 case Builtin::BI__builtin___memcpy_chk: {
4599 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4600 Expr::EvalResult SizeResult, DstSizeResult;
4601 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4602 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4603 break;
4604 llvm::APSInt Size = SizeResult.Val.getInt();
4605 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4606 if (Size.ugt(DstSize))
4607 break;
4608 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4609 Address Src = EmitPointerWithAlignment(E->getArg(1));
4610 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4611 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4612 return RValue::get(Dest, *this);
4613 }
4614
4615 case Builtin::BI__builtin_objc_memmove_collectable: {
4616 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4617 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4618 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4620 DestAddr, SrcAddr, SizeVal);
4621 return RValue::get(DestAddr, *this);
4622 }
4623
4624 case Builtin::BI__builtin___memmove_chk: {
4625 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4626 Expr::EvalResult SizeResult, DstSizeResult;
4627 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4628 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4629 break;
4630 llvm::APSInt Size = SizeResult.Val.getInt();
4631 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4632 if (Size.ugt(DstSize))
4633 break;
4634 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4635 Address Src = EmitPointerWithAlignment(E->getArg(1));
4636 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4637 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4638 return RValue::get(Dest, *this);
4639 }
4640
4641 case Builtin::BImemmove:
4642 case Builtin::BI__builtin_memmove: {
4643 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4644 Address Src = EmitPointerWithAlignment(E->getArg(1));
4645 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4646 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4647 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4648 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4649 return RValue::get(Dest, *this);
4650 }
4651 case Builtin::BImemset:
4652 case Builtin::BI__builtin_memset: {
4653 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4654 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4655 Builder.getInt8Ty());
4656 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4657 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4658 E->getArg(0)->getExprLoc(), FD, 0);
4659 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4660 return RValue::get(Dest, *this);
4661 }
4662 case Builtin::BI__builtin_memset_inline: {
4663 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4664 Value *ByteVal =
4665 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4666 uint64_t Size =
4667 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4669 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4670 0);
4671 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4672 return RValue::get(nullptr);
4673 }
4674 case Builtin::BI__builtin___memset_chk: {
4675 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4676 Expr::EvalResult SizeResult, DstSizeResult;
4677 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4678 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4679 break;
4680 llvm::APSInt Size = SizeResult.Val.getInt();
4681 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4682 if (Size.ugt(DstSize))
4683 break;
4684 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4685 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4686 Builder.getInt8Ty());
4687 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4688 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4689 return RValue::get(Dest, *this);
4690 }
4691 case Builtin::BI__builtin_wmemchr: {
4692 // The MSVC runtime library does not provide a definition of wmemchr, so we
4693 // need an inline implementation.
4694 if (!getTarget().getTriple().isOSMSVCRT())
4695 break;
4696
4697 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4698 Value *Str = EmitScalarExpr(E->getArg(0));
4699 Value *Chr = EmitScalarExpr(E->getArg(1));
4700 Value *Size = EmitScalarExpr(E->getArg(2));
4701
4702 BasicBlock *Entry = Builder.GetInsertBlock();
4703 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4704 BasicBlock *Next = createBasicBlock("wmemchr.next");
4705 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4706 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4707 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4708
4709 EmitBlock(CmpEq);
4710 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4711 StrPhi->addIncoming(Str, Entry);
4712 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4713 SizePhi->addIncoming(Size, Entry);
4714 CharUnits WCharAlign =
4716 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4717 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4718 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4719 Builder.CreateCondBr(StrEqChr, Exit, Next);
4720
4721 EmitBlock(Next);
4722 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4723 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4724 Value *NextSizeEq0 =
4725 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4726 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4727 StrPhi->addIncoming(NextStr, Next);
4728 SizePhi->addIncoming(NextSize, Next);
4729
4730 EmitBlock(Exit);
4731 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4732 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4733 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4734 Ret->addIncoming(FoundChr, CmpEq);
4735 return RValue::get(Ret);
4736 }
4737 case Builtin::BI__builtin_wmemcmp: {
4738 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4739 // need an inline implementation.
4740 if (!getTarget().getTriple().isOSMSVCRT())
4741 break;
4742
4743 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4744
4745 Value *Dst = EmitScalarExpr(E->getArg(0));
4746 Value *Src = EmitScalarExpr(E->getArg(1));
4747 Value *Size = EmitScalarExpr(E->getArg(2));
4748
4749 BasicBlock *Entry = Builder.GetInsertBlock();
4750 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4751 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4752 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4753 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4754 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4755 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4756
4757 EmitBlock(CmpGT);
4758 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4759 DstPhi->addIncoming(Dst, Entry);
4760 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4761 SrcPhi->addIncoming(Src, Entry);
4762 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4763 SizePhi->addIncoming(Size, Entry);
4764 CharUnits WCharAlign =
4766 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4767 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4768 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4769 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4770
4771 EmitBlock(CmpLT);
4772 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4773 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4774
4775 EmitBlock(Next);
4776 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4777 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4778 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4779 Value *NextSizeEq0 =
4780 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4781 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4782 DstPhi->addIncoming(NextDst, Next);
4783 SrcPhi->addIncoming(NextSrc, Next);
4784 SizePhi->addIncoming(NextSize, Next);
4785
4786 EmitBlock(Exit);
4787 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4788 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4789 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4790 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4791 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4792 return RValue::get(Ret);
4793 }
4794 case Builtin::BI__builtin_dwarf_cfa: {
4795 // The offset in bytes from the first argument to the CFA.
4796 //
4797 // Why on earth is this in the frontend? Is there any reason at
4798 // all that the backend can't reasonably determine this while
4799 // lowering llvm.eh.dwarf.cfa()?
4800 //
4801 // TODO: If there's a satisfactory reason, add a target hook for
4802 // this instead of hard-coding 0, which is correct for most targets.
4803 int32_t Offset = 0;
4804
4805 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4806 return RValue::get(Builder.CreateCall(F,
4807 llvm::ConstantInt::get(Int32Ty, Offset)));
4808 }
4809 case Builtin::BI__builtin_return_address: {
4810 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4811 getContext().UnsignedIntTy);
4812 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4813 return RValue::get(Builder.CreateCall(F, Depth));
4814 }
4815 case Builtin::BI_ReturnAddress: {
4816 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4817 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4818 }
4819 case Builtin::BI__builtin_frame_address: {
4820 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4821 getContext().UnsignedIntTy);
4822 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4823 return RValue::get(Builder.CreateCall(F, Depth));
4824 }
4825 case Builtin::BI__builtin_extract_return_addr: {
4826 Value *Address = EmitScalarExpr(E->getArg(0));
4828 return RValue::get(Result);
4829 }
4830 case Builtin::BI__builtin_frob_return_addr: {
4831 Value *Address = EmitScalarExpr(E->getArg(0));
4833 return RValue::get(Result);
4834 }
4835 case Builtin::BI__builtin_dwarf_sp_column: {
4836 llvm::IntegerType *Ty
4837 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4839 if (Column == -1) {
4840 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4841 return RValue::get(llvm::UndefValue::get(Ty));
4842 }
4843 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4844 }
4845 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4846 Value *Address = EmitScalarExpr(E->getArg(0));
4847 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4848 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4849 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4850 }
4851 case Builtin::BI__builtin_eh_return: {
4852 Value *Int = EmitScalarExpr(E->getArg(0));
4853 Value *Ptr = EmitScalarExpr(E->getArg(1));
4854
4855 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4856 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4857 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4858 Function *F =
4859 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4860 : Intrinsic::eh_return_i64);
4861 Builder.CreateCall(F, {Int, Ptr});
4862 Builder.CreateUnreachable();
4863
4864 // We do need to preserve an insertion point.
4865 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4866
4867 return RValue::get(nullptr);
4868 }
4869 case Builtin::BI__builtin_unwind_init: {
4870 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4871 Builder.CreateCall(F);
4872 return RValue::get(nullptr);
4873 }
4874 case Builtin::BI__builtin_extend_pointer: {
4875 // Extends a pointer to the size of an _Unwind_Word, which is
4876 // uint64_t on all platforms. Generally this gets poked into a
4877 // register and eventually used as an address, so if the
4878 // addressing registers are wider than pointers and the platform
4879 // doesn't implicitly ignore high-order bits when doing
4880 // addressing, we need to make sure we zext / sext based on
4881 // the platform's expectations.
4882 //
4883 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4884
4885 // Cast the pointer to intptr_t.
4886 Value *Ptr = EmitScalarExpr(E->getArg(0));
4887 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4888
4889 // If that's 64 bits, we're done.
4890 if (IntPtrTy->getBitWidth() == 64)
4891 return RValue::get(Result);
4892
4893 // Otherwise, ask the codegen data what to do.
4894 if (getTargetHooks().extendPointerWithSExt())
4895 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4896 else
4897 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4898 }
4899 case Builtin::BI__builtin_setjmp: {
4900 // Buffer is a void**.
4901 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4902
4903 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4904 // On this target, the back end fills in the context buffer completely.
4905 // It doesn't really matter if the frontend stores to the buffer before
4906 // calling setjmp, the back-end is going to overwrite them anyway.
4907 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4908 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4909 }
4910
4911 // Store the frame pointer to the setjmp buffer.
4912 Value *FrameAddr = Builder.CreateCall(
4913 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4914 ConstantInt::get(Int32Ty, 0));
4915 Builder.CreateStore(FrameAddr, Buf);
4916
4917 // Store the stack pointer to the setjmp buffer.
4918 Value *StackAddr = Builder.CreateStackSave();
4919 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4920
4921 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4922 Builder.CreateStore(StackAddr, StackSaveSlot);
4923
4924 // Call LLVM's EH setjmp, which is lightweight.
4925 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4926 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4927 }
4928 case Builtin::BI__builtin_longjmp: {
4929 Value *Buf = EmitScalarExpr(E->getArg(0));
4930
4931 // Call LLVM's EH longjmp, which is lightweight.
4932 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4933
4934 // longjmp doesn't return; mark this as unreachable.
4935 Builder.CreateUnreachable();
4936
4937 // We do need to preserve an insertion point.
4938 EmitBlock(createBasicBlock("longjmp.cont"));
4939
4940 return RValue::get(nullptr);
4941 }
4942 case Builtin::BI__builtin_launder: {
4943 const Expr *Arg = E->getArg(0);
4944 QualType ArgTy = Arg->getType()->getPointeeType();
4945 Value *Ptr = EmitScalarExpr(Arg);
4946 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4948
4949 return RValue::get(Ptr);
4950 }
4951 case Builtin::BI__sync_fetch_and_add:
4952 case Builtin::BI__sync_fetch_and_sub:
4953 case Builtin::BI__sync_fetch_and_or:
4954 case Builtin::BI__sync_fetch_and_and:
4955 case Builtin::BI__sync_fetch_and_xor:
4956 case Builtin::BI__sync_fetch_and_nand:
4957 case Builtin::BI__sync_add_and_fetch:
4958 case Builtin::BI__sync_sub_and_fetch:
4959 case Builtin::BI__sync_and_and_fetch:
4960 case Builtin::BI__sync_or_and_fetch:
4961 case Builtin::BI__sync_xor_and_fetch:
4962 case Builtin::BI__sync_nand_and_fetch:
4963 case Builtin::BI__sync_val_compare_and_swap:
4964 case Builtin::BI__sync_bool_compare_and_swap:
4965 case Builtin::BI__sync_lock_test_and_set:
4966 case Builtin::BI__sync_lock_release:
4967 case Builtin::BI__sync_swap:
4968 llvm_unreachable("Shouldn't make it through sema");
4969 case Builtin::BI__sync_fetch_and_add_1:
4970 case Builtin::BI__sync_fetch_and_add_2:
4971 case Builtin::BI__sync_fetch_and_add_4:
4972 case Builtin::BI__sync_fetch_and_add_8:
4973 case Builtin::BI__sync_fetch_and_add_16:
4974 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4975 case Builtin::BI__sync_fetch_and_sub_1:
4976 case Builtin::BI__sync_fetch_and_sub_2:
4977 case Builtin::BI__sync_fetch_and_sub_4:
4978 case Builtin::BI__sync_fetch_and_sub_8:
4979 case Builtin::BI__sync_fetch_and_sub_16:
4980 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4981 case Builtin::BI__sync_fetch_and_or_1:
4982 case Builtin::BI__sync_fetch_and_or_2:
4983 case Builtin::BI__sync_fetch_and_or_4:
4984 case Builtin::BI__sync_fetch_and_or_8:
4985 case Builtin::BI__sync_fetch_and_or_16:
4986 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4987 case Builtin::BI__sync_fetch_and_and_1:
4988 case Builtin::BI__sync_fetch_and_and_2:
4989 case Builtin::BI__sync_fetch_and_and_4:
4990 case Builtin::BI__sync_fetch_and_and_8:
4991 case Builtin::BI__sync_fetch_and_and_16:
4992 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4993 case Builtin::BI__sync_fetch_and_xor_1:
4994 case Builtin::BI__sync_fetch_and_xor_2:
4995 case Builtin::BI__sync_fetch_and_xor_4:
4996 case Builtin::BI__sync_fetch_and_xor_8:
4997 case Builtin::BI__sync_fetch_and_xor_16:
4998 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4999 case Builtin::BI__sync_fetch_and_nand_1:
5000 case Builtin::BI__sync_fetch_and_nand_2:
5001 case Builtin::BI__sync_fetch_and_nand_4:
5002 case Builtin::BI__sync_fetch_and_nand_8:
5003 case Builtin::BI__sync_fetch_and_nand_16:
5004 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
5005
5006 // Clang extensions: not overloaded yet.
5007 case Builtin::BI__sync_fetch_and_min:
5008 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
5009 case Builtin::BI__sync_fetch_and_max:
5010 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
5011 case Builtin::BI__sync_fetch_and_umin:
5012 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
5013 case Builtin::BI__sync_fetch_and_umax:
5014 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
5015
5016 case Builtin::BI__sync_add_and_fetch_1:
5017 case Builtin::BI__sync_add_and_fetch_2:
5018 case Builtin::BI__sync_add_and_fetch_4:
5019 case Builtin::BI__sync_add_and_fetch_8:
5020 case Builtin::BI__sync_add_and_fetch_16:
5021 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
5022 llvm::Instruction::Add);
5023 case Builtin::BI__sync_sub_and_fetch_1:
5024 case Builtin::BI__sync_sub_and_fetch_2:
5025 case Builtin::BI__sync_sub_and_fetch_4:
5026 case Builtin::BI__sync_sub_and_fetch_8:
5027 case Builtin::BI__sync_sub_and_fetch_16:
5028 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
5029 llvm::Instruction::Sub);
5030 case Builtin::BI__sync_and_and_fetch_1:
5031 case Builtin::BI__sync_and_and_fetch_2:
5032 case Builtin::BI__sync_and_and_fetch_4:
5033 case Builtin::BI__sync_and_and_fetch_8:
5034 case Builtin::BI__sync_and_and_fetch_16:
5035 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
5036 llvm::Instruction::And);
5037 case Builtin::BI__sync_or_and_fetch_1:
5038 case Builtin::BI__sync_or_and_fetch_2:
5039 case Builtin::BI__sync_or_and_fetch_4:
5040 case Builtin::BI__sync_or_and_fetch_8:
5041 case Builtin::BI__sync_or_and_fetch_16:
5042 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5043 llvm::Instruction::Or);
5044 case Builtin::BI__sync_xor_and_fetch_1:
5045 case Builtin::BI__sync_xor_and_fetch_2:
5046 case Builtin::BI__sync_xor_and_fetch_4:
5047 case Builtin::BI__sync_xor_and_fetch_8:
5048 case Builtin::BI__sync_xor_and_fetch_16:
5049 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5050 llvm::Instruction::Xor);
5051 case Builtin::BI__sync_nand_and_fetch_1:
5052 case Builtin::BI__sync_nand_and_fetch_2:
5053 case Builtin::BI__sync_nand_and_fetch_4:
5054 case Builtin::BI__sync_nand_and_fetch_8:
5055 case Builtin::BI__sync_nand_and_fetch_16:
5056 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5057 llvm::Instruction::And, true);
5058
5059 case Builtin::BI__sync_val_compare_and_swap_1:
5060 case Builtin::BI__sync_val_compare_and_swap_2:
5061 case Builtin::BI__sync_val_compare_and_swap_4:
5062 case Builtin::BI__sync_val_compare_and_swap_8:
5063 case Builtin::BI__sync_val_compare_and_swap_16:
5064 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5065
5066 case Builtin::BI__sync_bool_compare_and_swap_1:
5067 case Builtin::BI__sync_bool_compare_and_swap_2:
5068 case Builtin::BI__sync_bool_compare_and_swap_4:
5069 case Builtin::BI__sync_bool_compare_and_swap_8:
5070 case Builtin::BI__sync_bool_compare_and_swap_16:
5071 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5072
5073 case Builtin::BI__sync_swap_1:
5074 case Builtin::BI__sync_swap_2:
5075 case Builtin::BI__sync_swap_4:
5076 case Builtin::BI__sync_swap_8:
5077 case Builtin::BI__sync_swap_16:
5078 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5079
5080 case Builtin::BI__sync_lock_test_and_set_1:
5081 case Builtin::BI__sync_lock_test_and_set_2:
5082 case Builtin::BI__sync_lock_test_and_set_4:
5083 case Builtin::BI__sync_lock_test_and_set_8:
5084 case Builtin::BI__sync_lock_test_and_set_16:
5085 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5086
5087 case Builtin::BI__sync_lock_release_1:
5088 case Builtin::BI__sync_lock_release_2:
5089 case Builtin::BI__sync_lock_release_4:
5090 case Builtin::BI__sync_lock_release_8:
5091 case Builtin::BI__sync_lock_release_16: {
5092 Address Ptr = CheckAtomicAlignment(*this, E);
5093 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5094
5095 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5096 getContext().getTypeSize(ElTy));
5097 llvm::StoreInst *Store =
5098 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5099 Store->setAtomic(llvm::AtomicOrdering::Release);
5100 return RValue::get(nullptr);
5101 }
5102
5103 case Builtin::BI__sync_synchronize: {
5104 // We assume this is supposed to correspond to a C++0x-style
5105 // sequentially-consistent fence (i.e. this is only usable for
5106 // synchronization, not device I/O or anything like that). This intrinsic
5107 // is really badly designed in the sense that in theory, there isn't
5108 // any way to safely use it... but in practice, it mostly works
5109 // to use it with non-atomic loads and stores to get acquire/release
5110 // semantics.
5111 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5112 return RValue::get(nullptr);
5113 }
5114
5115 case Builtin::BI__builtin_nontemporal_load:
5116 return RValue::get(EmitNontemporalLoad(*this, E));
5117 case Builtin::BI__builtin_nontemporal_store:
5118 return RValue::get(EmitNontemporalStore(*this, E));
5119 case Builtin::BI__c11_atomic_is_lock_free:
5120 case Builtin::BI__atomic_is_lock_free: {
5121 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5122 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5123 // _Atomic(T) is always properly-aligned.
5124 const char *LibCallName = "__atomic_is_lock_free";
5125 CallArgList Args;
5126 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5127 getContext().getSizeType());
5128 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5129 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5131 else
5132 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5134 const CGFunctionInfo &FuncInfo =
5136 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5137 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5138 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5139 ReturnValueSlot(), Args);
5140 }
5141
5142 case Builtin::BI__atomic_test_and_set: {
5143 // Look at the argument type to determine whether this is a volatile
5144 // operation. The parameter type is always volatile.
5145 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5146 bool Volatile =
5148
5149 Address Ptr =
5151
5152 Value *NewVal = Builder.getInt8(1);
5153 Value *Order = EmitScalarExpr(E->getArg(1));
5154 if (isa<llvm::ConstantInt>(Order)) {
5155 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5156 AtomicRMWInst *Result = nullptr;
5157 switch (ord) {
5158 case 0: // memory_order_relaxed
5159 default: // invalid order
5160 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5161 llvm::AtomicOrdering::Monotonic);
5162 break;
5163 case 1: // memory_order_consume
5164 case 2: // memory_order_acquire
5165 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5166 llvm::AtomicOrdering::Acquire);
5167 break;
5168 case 3: // memory_order_release
5169 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5170 llvm::AtomicOrdering::Release);
5171 break;
5172 case 4: // memory_order_acq_rel
5173
5174 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5175 llvm::AtomicOrdering::AcquireRelease);
5176 break;
5177 case 5: // memory_order_seq_cst
5179 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5180 llvm::AtomicOrdering::SequentiallyConsistent);
5181 break;
5182 }
5183 Result->setVolatile(Volatile);
5184 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5185 }
5186
5187 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5188
5189 llvm::BasicBlock *BBs[5] = {
5190 createBasicBlock("monotonic", CurFn),
5191 createBasicBlock("acquire", CurFn),
5192 createBasicBlock("release", CurFn),
5193 createBasicBlock("acqrel", CurFn),
5194 createBasicBlock("seqcst", CurFn)
5195 };
5196 llvm::AtomicOrdering Orders[5] = {
5197 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
5198 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
5199 llvm::AtomicOrdering::SequentiallyConsistent};
5200
5201 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5202 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5203
5204 Builder.SetInsertPoint(ContBB);
5205 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
5206
5207 for (unsigned i = 0; i < 5; ++i) {
5208 Builder.SetInsertPoint(BBs[i]);
5209 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
5210 Ptr, NewVal, Orders[i]);
5211 RMW->setVolatile(Volatile);
5212 Result->addIncoming(RMW, BBs[i]);
5213 Builder.CreateBr(ContBB);
5214 }
5215
5216 SI->addCase(Builder.getInt32(0), BBs[0]);
5217 SI->addCase(Builder.getInt32(1), BBs[1]);
5218 SI->addCase(Builder.getInt32(2), BBs[1]);
5219 SI->addCase(Builder.getInt32(3), BBs[2]);
5220 SI->addCase(Builder.getInt32(4), BBs[3]);
5221 SI->addCase(Builder.getInt32(5), BBs[4]);
5222
5223 Builder.SetInsertPoint(ContBB);
5224 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5225 }
5226
5227 case Builtin::BI__atomic_clear: {
5228 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5229 bool Volatile =
5231
5232 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
5233 Ptr = Ptr.withElementType(Int8Ty);
5234 Value *NewVal = Builder.getInt8(0);
5235 Value *Order = EmitScalarExpr(E->getArg(1));
5236 if (isa<llvm::ConstantInt>(Order)) {
5237 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5238 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5239 switch (ord) {
5240 case 0: // memory_order_relaxed
5241 default: // invalid order
5242 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
5243 break;
5244 case 3: // memory_order_release
5245 Store->setOrdering(llvm::AtomicOrdering::Release);
5246 break;
5247 case 5: // memory_order_seq_cst
5248 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
5249 break;
5250 }
5251 return RValue::get(nullptr);
5252 }
5253
5254 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5255
5256 llvm::BasicBlock *BBs[3] = {
5257 createBasicBlock("monotonic", CurFn),
5258 createBasicBlock("release", CurFn),
5259 createBasicBlock("seqcst", CurFn)
5260 };
5261 llvm::AtomicOrdering Orders[3] = {
5262 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
5263 llvm::AtomicOrdering::SequentiallyConsistent};
5264
5265 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5266 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5267
5268 for (unsigned i = 0; i < 3; ++i) {
5269 Builder.SetInsertPoint(BBs[i]);
5270 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5271 Store->setOrdering(Orders[i]);
5272 Builder.CreateBr(ContBB);
5273 }
5274
5275 SI->addCase(Builder.getInt32(0), BBs[0]);
5276 SI->addCase(Builder.getInt32(3), BBs[1]);
5277 SI->addCase(Builder.getInt32(5), BBs[2]);
5278
5279 Builder.SetInsertPoint(ContBB);
5280 return RValue::get(nullptr);
5281 }
5282
5283 case Builtin::BI__atomic_thread_fence:
5284 case Builtin::BI__atomic_signal_fence:
5285 case Builtin::BI__c11_atomic_thread_fence:
5286 case Builtin::BI__c11_atomic_signal_fence: {
5287 llvm::SyncScope::ID SSID;
5288 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5289 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5290 SSID = llvm::SyncScope::SingleThread;
5291 else
5292 SSID = llvm::SyncScope::System;
5293 Value *Order = EmitScalarExpr(E->getArg(0));
5294 if (isa<llvm::ConstantInt>(Order)) {
5295 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5296 switch (ord) {
5297 case 0: // memory_order_relaxed
5298 default: // invalid order
5299 break;
5300 case 1: // memory_order_consume
5301 case 2: // memory_order_acquire
5302 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5303 break;
5304 case 3: // memory_order_release
5305 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5306 break;
5307 case 4: // memory_order_acq_rel
5308 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5309 break;
5310 case 5: // memory_order_seq_cst
5311 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5312 break;
5313 }
5314 return RValue::get(nullptr);
5315 }
5316
5317 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5318 AcquireBB = createBasicBlock("acquire", CurFn);
5319 ReleaseBB = createBasicBlock("release", CurFn);
5320 AcqRelBB = createBasicBlock("acqrel", CurFn);
5321 SeqCstBB = createBasicBlock("seqcst", CurFn);
5322 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5323
5324 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5325 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5326
5327 Builder.SetInsertPoint(AcquireBB);
5328 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5329 Builder.CreateBr(ContBB);
5330 SI->addCase(Builder.getInt32(1), AcquireBB);
5331 SI->addCase(Builder.getInt32(2), AcquireBB);
5332
5333 Builder.SetInsertPoint(ReleaseBB);
5334 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5335 Builder.CreateBr(ContBB);
5336 SI->addCase(Builder.getInt32(3), ReleaseBB);
5337
5338 Builder.SetInsertPoint(AcqRelBB);
5339 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5340 Builder.CreateBr(ContBB);
5341 SI->addCase(Builder.getInt32(4), AcqRelBB);
5342
5343 Builder.SetInsertPoint(SeqCstBB);
5344 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5345 Builder.CreateBr(ContBB);
5346 SI->addCase(Builder.getInt32(5), SeqCstBB);
5347
5348 Builder.SetInsertPoint(ContBB);
5349 return RValue::get(nullptr);
5350 }
5351 case Builtin::BI__scoped_atomic_thread_fence: {
5353
5354 Value *Order = EmitScalarExpr(E->getArg(0));
5355 Value *Scope = EmitScalarExpr(E->getArg(1));
5356 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5357 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5358 if (Ord && Scp) {
5359 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5360 ? ScopeModel->map(Scp->getZExtValue())
5361 : ScopeModel->map(ScopeModel->getFallBackValue());
5362 switch (Ord->getZExtValue()) {
5363 case 0: // memory_order_relaxed
5364 default: // invalid order
5365 break;
5366 case 1: // memory_order_consume
5367 case 2: // memory_order_acquire
5368 Builder.CreateFence(
5369 llvm::AtomicOrdering::Acquire,
5370 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5371 llvm::AtomicOrdering::Acquire,
5372 getLLVMContext()));
5373 break;
5374 case 3: // memory_order_release
5375 Builder.CreateFence(
5376 llvm::AtomicOrdering::Release,
5377 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5378 llvm::AtomicOrdering::Release,
5379 getLLVMContext()));
5380 break;
5381 case 4: // memory_order_acq_rel
5382 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5383 getTargetHooks().getLLVMSyncScopeID(
5384 getLangOpts(), SS,
5385 llvm::AtomicOrdering::AcquireRelease,
5386 getLLVMContext()));
5387 break;
5388 case 5: // memory_order_seq_cst
5389 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5390 getTargetHooks().getLLVMSyncScopeID(
5391 getLangOpts(), SS,
5392 llvm::AtomicOrdering::SequentiallyConsistent,
5393 getLLVMContext()));
5394 break;
5395 }
5396 return RValue::get(nullptr);
5397 }
5398
5399 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5400
5402 OrderBBs;
5403 if (Ord) {
5404 switch (Ord->getZExtValue()) {
5405 case 0: // memory_order_relaxed
5406 default: // invalid order
5407 ContBB->eraseFromParent();
5408 return RValue::get(nullptr);
5409 case 1: // memory_order_consume
5410 case 2: // memory_order_acquire
5411 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5412 llvm::AtomicOrdering::Acquire);
5413 break;
5414 case 3: // memory_order_release
5415 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5416 llvm::AtomicOrdering::Release);
5417 break;
5418 case 4: // memory_order_acq_rel
5419 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5420 llvm::AtomicOrdering::AcquireRelease);
5421 break;
5422 case 5: // memory_order_seq_cst
5423 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5424 llvm::AtomicOrdering::SequentiallyConsistent);
5425 break;
5426 }
5427 } else {
5428 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5429 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5430 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5431 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5432
5433 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5434 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5435 SI->addCase(Builder.getInt32(1), AcquireBB);
5436 SI->addCase(Builder.getInt32(2), AcquireBB);
5437 SI->addCase(Builder.getInt32(3), ReleaseBB);
5438 SI->addCase(Builder.getInt32(4), AcqRelBB);
5439 SI->addCase(Builder.getInt32(5), SeqCstBB);
5440
5441 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5442 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5443 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5444 OrderBBs.emplace_back(SeqCstBB,
5445 llvm::AtomicOrdering::SequentiallyConsistent);
5446 }
5447
5448 for (auto &[OrderBB, Ordering] : OrderBBs) {
5449 Builder.SetInsertPoint(OrderBB);
5450 if (Scp) {
5451 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5452 ? ScopeModel->map(Scp->getZExtValue())
5453 : ScopeModel->map(ScopeModel->getFallBackValue());
5454 Builder.CreateFence(Ordering,
5455 getTargetHooks().getLLVMSyncScopeID(
5456 getLangOpts(), SS, Ordering, getLLVMContext()));
5457 Builder.CreateBr(ContBB);
5458 } else {
5459 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5460 for (unsigned Scp : ScopeModel->getRuntimeValues())
5461 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5462
5463 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5464 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5465 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5466 auto *B = BBs[Scp];
5467 SI->addCase(Builder.getInt32(Scp), B);
5468
5469 Builder.SetInsertPoint(B);
5470 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5471 getLangOpts(), ScopeModel->map(Scp),
5472 Ordering, getLLVMContext()));
5473 Builder.CreateBr(ContBB);
5474 }
5475 }
5476 }
5477
5478 Builder.SetInsertPoint(ContBB);
5479 return RValue::get(nullptr);
5480 }
5481
5482 case Builtin::BI__builtin_signbit:
5483 case Builtin::BI__builtin_signbitf:
5484 case Builtin::BI__builtin_signbitl: {
5485 return RValue::get(
5486 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5487 ConvertType(E->getType())));
5488 }
5489 case Builtin::BI__warn_memset_zero_len:
5490 return RValue::getIgnored();
5491 case Builtin::BI__annotation: {
5492 // Re-encode each wide string to UTF8 and make an MDString.
5494 for (const Expr *Arg : E->arguments()) {
5495 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5496 assert(Str->getCharByteWidth() == 2);
5497 StringRef WideBytes = Str->getBytes();
5498 std::string StrUtf8;
5499 if (!convertUTF16ToUTF8String(
5500 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5501 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5502 continue;
5503 }
5504 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5505 }
5506
5507 // Build and MDTuple of MDStrings and emit the intrinsic call.
5508 llvm::Function *F =
5509 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5510 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5511 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5512 return RValue::getIgnored();
5513 }
5514 case Builtin::BI__builtin_annotation: {
5515 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5516 llvm::Function *F =
5517 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5518 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5519
5520 // Get the annotation string, go through casts. Sema requires this to be a
5521 // non-wide string literal, potentially casted, so the cast<> is safe.
5522 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5523 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5524 return RValue::get(
5525 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5526 }
5527 case Builtin::BI__builtin_addcb:
5528 case Builtin::BI__builtin_addcs:
5529 case Builtin::BI__builtin_addc:
5530 case Builtin::BI__builtin_addcl:
5531 case Builtin::BI__builtin_addcll:
5532 case Builtin::BI__builtin_subcb:
5533 case Builtin::BI__builtin_subcs:
5534 case Builtin::BI__builtin_subc:
5535 case Builtin::BI__builtin_subcl:
5536 case Builtin::BI__builtin_subcll: {
5537
5538 // We translate all of these builtins from expressions of the form:
5539 // int x = ..., y = ..., carryin = ..., carryout, result;
5540 // result = __builtin_addc(x, y, carryin, &carryout);
5541 //
5542 // to LLVM IR of the form:
5543 //
5544 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5545 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5546 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5547 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5548 // i32 %carryin)
5549 // %result = extractvalue {i32, i1} %tmp2, 0
5550 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5551 // %tmp3 = or i1 %carry1, %carry2
5552 // %tmp4 = zext i1 %tmp3 to i32
5553 // store i32 %tmp4, i32* %carryout
5554
5555 // Scalarize our inputs.
5556 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5557 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5558 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5559 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5560
5561 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5562 llvm::Intrinsic::ID IntrinsicId;
5563 switch (BuiltinID) {
5564 default: llvm_unreachable("Unknown multiprecision builtin id.");
5565 case Builtin::BI__builtin_addcb:
5566 case Builtin::BI__builtin_addcs:
5567 case Builtin::BI__builtin_addc:
5568 case Builtin::BI__builtin_addcl:
5569 case Builtin::BI__builtin_addcll:
5570 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5571 break;
5572 case Builtin::BI__builtin_subcb:
5573 case Builtin::BI__builtin_subcs:
5574 case Builtin::BI__builtin_subc:
5575 case Builtin::BI__builtin_subcl:
5576 case Builtin::BI__builtin_subcll:
5577 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5578 break;
5579 }
5580
5581 // Construct our resulting LLVM IR expression.
5582 llvm::Value *Carry1;
5583 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5584 X, Y, Carry1);
5585 llvm::Value *Carry2;
5586 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5587 Sum1, Carryin, Carry2);
5588 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5589 X->getType());
5590 Builder.CreateStore(CarryOut, CarryOutPtr);
5591 return RValue::get(Sum2);
5592 }
5593
5594 case Builtin::BI__builtin_add_overflow:
5595 case Builtin::BI__builtin_sub_overflow:
5596 case Builtin::BI__builtin_mul_overflow: {
5597 const clang::Expr *LeftArg = E->getArg(0);
5598 const clang::Expr *RightArg = E->getArg(1);
5599 const clang::Expr *ResultArg = E->getArg(2);
5600
5601 clang::QualType ResultQTy =
5602 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5603
5604 WidthAndSignedness LeftInfo =
5606 WidthAndSignedness RightInfo =
5608 WidthAndSignedness ResultInfo =
5610
5611 // Handle mixed-sign multiplication as a special case, because adding
5612 // runtime or backend support for our generic irgen would be too expensive.
5613 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5614 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5615 RightInfo, ResultArg, ResultQTy,
5616 ResultInfo);
5617
5618 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5619 ResultInfo))
5621 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5622 ResultInfo);
5623
5624 WidthAndSignedness EncompassingInfo =
5625 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5626
5627 llvm::Type *EncompassingLLVMTy =
5628 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5629
5630 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5631
5632 llvm::Intrinsic::ID IntrinsicId;
5633 switch (BuiltinID) {
5634 default:
5635 llvm_unreachable("Unknown overflow builtin id.");
5636 case Builtin::BI__builtin_add_overflow:
5637 IntrinsicId = EncompassingInfo.Signed
5638 ? llvm::Intrinsic::sadd_with_overflow
5639 : llvm::Intrinsic::uadd_with_overflow;
5640 break;
5641 case Builtin::BI__builtin_sub_overflow:
5642 IntrinsicId = EncompassingInfo.Signed
5643 ? llvm::Intrinsic::ssub_with_overflow
5644 : llvm::Intrinsic::usub_with_overflow;
5645 break;
5646 case Builtin::BI__builtin_mul_overflow:
5647 IntrinsicId = EncompassingInfo.Signed
5648 ? llvm::Intrinsic::smul_with_overflow
5649 : llvm::Intrinsic::umul_with_overflow;
5650 break;
5651 }
5652
5653 llvm::Value *Left = EmitScalarExpr(LeftArg);
5654 llvm::Value *Right = EmitScalarExpr(RightArg);
5655 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5656
5657 // Extend each operand to the encompassing type.
5658 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5659 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5660
5661 // Perform the operation on the extended values.
5662 llvm::Value *Overflow, *Result;
5663 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5664
5665 if (EncompassingInfo.Width > ResultInfo.Width) {
5666 // The encompassing type is wider than the result type, so we need to
5667 // truncate it.
5668 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5669
5670 // To see if the truncation caused an overflow, we will extend
5671 // the result and then compare it to the original result.
5672 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5673 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5674 llvm::Value *TruncationOverflow =
5675 Builder.CreateICmpNE(Result, ResultTruncExt);
5676
5677 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5678 Result = ResultTrunc;
5679 }
5680
5681 // Finally, store the result using the pointer.
5682 bool isVolatile =
5683 ResultArg->getType()->getPointeeType().isVolatileQualified();
5684 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5685
5686 return RValue::get(Overflow);
5687 }
5688
5689 case Builtin::BI__builtin_uadd_overflow:
5690 case Builtin::BI__builtin_uaddl_overflow:
5691 case Builtin::BI__builtin_uaddll_overflow:
5692 case Builtin::BI__builtin_usub_overflow:
5693 case Builtin::BI__builtin_usubl_overflow:
5694 case Builtin::BI__builtin_usubll_overflow:
5695 case Builtin::BI__builtin_umul_overflow:
5696 case Builtin::BI__builtin_umull_overflow:
5697 case Builtin::BI__builtin_umulll_overflow:
5698 case Builtin::BI__builtin_sadd_overflow:
5699 case Builtin::BI__builtin_saddl_overflow:
5700 case Builtin::BI__builtin_saddll_overflow:
5701 case Builtin::BI__builtin_ssub_overflow:
5702 case Builtin::BI__builtin_ssubl_overflow:
5703 case Builtin::BI__builtin_ssubll_overflow:
5704 case Builtin::BI__builtin_smul_overflow:
5705 case Builtin::BI__builtin_smull_overflow:
5706 case Builtin::BI__builtin_smulll_overflow: {
5707
5708 // We translate all of these builtins directly to the relevant llvm IR node.
5709
5710 // Scalarize our inputs.
5711 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5712 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5713 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5714
5715 // Decide which of the overflow intrinsics we are lowering to:
5716 llvm::Intrinsic::ID IntrinsicId;
5717 switch (BuiltinID) {
5718 default: llvm_unreachable("Unknown overflow builtin id.");
5719 case Builtin::BI__builtin_uadd_overflow:
5720 case Builtin::BI__builtin_uaddl_overflow:
5721 case Builtin::BI__builtin_uaddll_overflow:
5722 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5723 break;
5724 case Builtin::BI__builtin_usub_overflow:
5725 case Builtin::BI__builtin_usubl_overflow:
5726 case Builtin::BI__builtin_usubll_overflow:
5727 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5728 break;
5729 case Builtin::BI__builtin_umul_overflow:
5730 case Builtin::BI__builtin_umull_overflow:
5731 case Builtin::BI__builtin_umulll_overflow:
5732 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5733 break;
5734 case Builtin::BI__builtin_sadd_overflow:
5735 case Builtin::BI__builtin_saddl_overflow:
5736 case Builtin::BI__builtin_saddll_overflow:
5737 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5738 break;
5739 case Builtin::BI__builtin_ssub_overflow:
5740 case Builtin::BI__builtin_ssubl_overflow:
5741 case Builtin::BI__builtin_ssubll_overflow:
5742 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5743 break;
5744 case Builtin::BI__builtin_smul_overflow:
5745 case Builtin::BI__builtin_smull_overflow:
5746 case Builtin::BI__builtin_smulll_overflow:
5747 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5748 break;
5749 }
5750
5751
5752 llvm::Value *Carry;
5753 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5754 Builder.CreateStore(Sum, SumOutPtr);
5755
5756 return RValue::get(Carry);
5757 }
5758 case Builtin::BIaddressof:
5759 case Builtin::BI__addressof:
5760 case Builtin::BI__builtin_addressof:
5761 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5762 case Builtin::BI__builtin_function_start:
5765 case Builtin::BI__builtin_operator_new:
5767 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5768 case Builtin::BI__builtin_operator_delete:
5770 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5771 return RValue::get(nullptr);
5772
5773 case Builtin::BI__builtin_is_aligned:
5774 return EmitBuiltinIsAligned(E);
5775 case Builtin::BI__builtin_align_up:
5776 return EmitBuiltinAlignTo(E, true);
5777 case Builtin::BI__builtin_align_down:
5778 return EmitBuiltinAlignTo(E, false);
5779
5780 case Builtin::BI__noop:
5781 // __noop always evaluates to an integer literal zero.
5782 return RValue::get(ConstantInt::get(IntTy, 0));
5783 case Builtin::BI__builtin_call_with_static_chain: {
5784 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5785 const Expr *Chain = E->getArg(1);
5786 return EmitCall(Call->getCallee()->getType(),
5787 EmitCallee(Call->getCallee()), Call, ReturnValue,
5788 EmitScalarExpr(Chain));
5789 }
5790 case Builtin::BI_InterlockedExchange8:
5791 case Builtin::BI_InterlockedExchange16:
5792 case Builtin::BI_InterlockedExchange:
5793 case Builtin::BI_InterlockedExchangePointer:
5794 return RValue::get(
5795 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5796 case Builtin::BI_InterlockedCompareExchangePointer:
5797 return RValue::get(
5798 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5799 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5800 return RValue::get(
5801 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5802 case Builtin::BI_InterlockedCompareExchange8:
5803 case Builtin::BI_InterlockedCompareExchange16:
5804 case Builtin::BI_InterlockedCompareExchange:
5805 case Builtin::BI_InterlockedCompareExchange64:
5807 case Builtin::BI_InterlockedIncrement16:
5808 case Builtin::BI_InterlockedIncrement:
5809 return RValue::get(
5810 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5811 case Builtin::BI_InterlockedDecrement16:
5812 case Builtin::BI_InterlockedDecrement:
5813 return RValue::get(
5814 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5815 case Builtin::BI_InterlockedAnd8:
5816 case Builtin::BI_InterlockedAnd16:
5817 case Builtin::BI_InterlockedAnd:
5818 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5819 case Builtin::BI_InterlockedExchangeAdd8:
5820 case Builtin::BI_InterlockedExchangeAdd16:
5821 case Builtin::BI_InterlockedExchangeAdd:
5822 return RValue::get(
5823 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5824 case Builtin::BI_InterlockedExchangeSub8:
5825 case Builtin::BI_InterlockedExchangeSub16:
5826 case Builtin::BI_InterlockedExchangeSub:
5827 return RValue::get(
5828 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5829 case Builtin::BI_InterlockedOr8:
5830 case Builtin::BI_InterlockedOr16:
5831 case Builtin::BI_InterlockedOr:
5832 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5833 case Builtin::BI_InterlockedXor8:
5834 case Builtin::BI_InterlockedXor16:
5835 case Builtin::BI_InterlockedXor:
5836 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5837
5838 case Builtin::BI_bittest64:
5839 case Builtin::BI_bittest:
5840 case Builtin::BI_bittestandcomplement64:
5841 case Builtin::BI_bittestandcomplement:
5842 case Builtin::BI_bittestandreset64:
5843 case Builtin::BI_bittestandreset:
5844 case Builtin::BI_bittestandset64:
5845 case Builtin::BI_bittestandset:
5846 case Builtin::BI_interlockedbittestandreset:
5847 case Builtin::BI_interlockedbittestandreset64:
5848 case Builtin::BI_interlockedbittestandset64:
5849 case Builtin::BI_interlockedbittestandset:
5850 case Builtin::BI_interlockedbittestandset_acq:
5851 case Builtin::BI_interlockedbittestandset_rel:
5852 case Builtin::BI_interlockedbittestandset_nf:
5853 case Builtin::BI_interlockedbittestandreset_acq:
5854 case Builtin::BI_interlockedbittestandreset_rel:
5855 case Builtin::BI_interlockedbittestandreset_nf:
5856 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5857
5858 // These builtins exist to emit regular volatile loads and stores not
5859 // affected by the -fms-volatile setting.
5860 case Builtin::BI__iso_volatile_load8:
5861 case Builtin::BI__iso_volatile_load16:
5862 case Builtin::BI__iso_volatile_load32:
5863 case Builtin::BI__iso_volatile_load64:
5864 return RValue::get(EmitISOVolatileLoad(*this, E));
5865 case Builtin::BI__iso_volatile_store8:
5866 case Builtin::BI__iso_volatile_store16:
5867 case Builtin::BI__iso_volatile_store32:
5868 case Builtin::BI__iso_volatile_store64:
5869 return RValue::get(EmitISOVolatileStore(*this, E));
5870
5871 case Builtin::BI__builtin_ptrauth_sign_constant:
5872 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5873
5874 case Builtin::BI__builtin_ptrauth_auth:
5875 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5876 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5877 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5878 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5879 case Builtin::BI__builtin_ptrauth_strip: {
5880 // Emit the arguments.
5882 for (auto argExpr : E->arguments())
5883 Args.push_back(EmitScalarExpr(argExpr));
5884
5885 // Cast the value to intptr_t, saving its original type.
5886 llvm::Type *OrigValueType = Args[0]->getType();
5887 if (OrigValueType->isPointerTy())
5888 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5889
5890 switch (BuiltinID) {
5891 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5892 if (Args[4]->getType()->isPointerTy())
5893 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5894 [[fallthrough]];
5895
5896 case Builtin::BI__builtin_ptrauth_auth:
5897 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5898 if (Args[2]->getType()->isPointerTy())
5899 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5900 break;
5901
5902 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5903 if (Args[1]->getType()->isPointerTy())
5904 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5905 break;
5906
5907 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5908 case Builtin::BI__builtin_ptrauth_strip:
5909 break;
5910 }
5911
5912 // Call the intrinsic.
5913 auto IntrinsicID = [&]() -> unsigned {
5914 switch (BuiltinID) {
5915 case Builtin::BI__builtin_ptrauth_auth:
5916 return llvm::Intrinsic::ptrauth_auth;
5917 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5918 return llvm::Intrinsic::ptrauth_resign;
5919 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5920 return llvm::Intrinsic::ptrauth_blend;
5921 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5922 return llvm::Intrinsic::ptrauth_sign_generic;
5923 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5924 return llvm::Intrinsic::ptrauth_sign;
5925 case Builtin::BI__builtin_ptrauth_strip:
5926 return llvm::Intrinsic::ptrauth_strip;
5927 }
5928 llvm_unreachable("bad ptrauth intrinsic");
5929 }();
5930 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5931 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5932
5933 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5934 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5935 OrigValueType->isPointerTy()) {
5936 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5937 }
5938 return RValue::get(Result);
5939 }
5940
5941 case Builtin::BI__exception_code:
5942 case Builtin::BI_exception_code:
5944 case Builtin::BI__exception_info:
5945 case Builtin::BI_exception_info:
5947 case Builtin::BI__abnormal_termination:
5948 case Builtin::BI_abnormal_termination:
5950 case Builtin::BI_setjmpex:
5951 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5952 E->getArg(0)->getType()->isPointerType())
5953 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5954 break;
5955 case Builtin::BI_setjmp:
5956 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5957 E->getArg(0)->getType()->isPointerType()) {
5958 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5959 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5960 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5961 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5962 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5963 }
5964 break;
5965
5966 // C++ std:: builtins.
5967 case Builtin::BImove:
5968 case Builtin::BImove_if_noexcept:
5969 case Builtin::BIforward:
5970 case Builtin::BIforward_like:
5971 case Builtin::BIas_const:
5972 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5973 case Builtin::BI__GetExceptionInfo: {
5974 if (llvm::GlobalVariable *GV =
5976 return RValue::get(GV);
5977 break;
5978 }
5979
5980 case Builtin::BI__fastfail:
5981 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5982
5983 case Builtin::BI__builtin_coro_id:
5984 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5985 case Builtin::BI__builtin_coro_promise:
5986 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5987 case Builtin::BI__builtin_coro_resume:
5988 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5989 return RValue::get(nullptr);
5990 case Builtin::BI__builtin_coro_frame:
5991 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5992 case Builtin::BI__builtin_coro_noop:
5993 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5994 case Builtin::BI__builtin_coro_free:
5995 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5996 case Builtin::BI__builtin_coro_destroy:
5997 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5998 return RValue::get(nullptr);
5999 case Builtin::BI__builtin_coro_done:
6000 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
6001 case Builtin::BI__builtin_coro_alloc:
6002 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
6003 case Builtin::BI__builtin_coro_begin:
6004 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
6005 case Builtin::BI__builtin_coro_end:
6006 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
6007 case Builtin::BI__builtin_coro_suspend:
6008 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
6009 case Builtin::BI__builtin_coro_size:
6010 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
6011 case Builtin::BI__builtin_coro_align:
6012 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
6013
6014 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
6015 case Builtin::BIread_pipe:
6016 case Builtin::BIwrite_pipe: {
6017 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6018 *Arg1 = EmitScalarExpr(E->getArg(1));
6019 CGOpenCLRuntime OpenCLRT(CGM);
6020 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6021 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6022
6023 // Type of the generic packet parameter.
6024 unsigned GenericAS =
6026 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
6027
6028 // Testing which overloaded version we should generate the call for.
6029 if (2U == E->getNumArgs()) {
6030 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
6031 : "__write_pipe_2";
6032 // Creating a generic function type to be able to call with any builtin or
6033 // user defined type.
6034 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
6035 llvm::FunctionType *FTy = llvm::FunctionType::get(
6036 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6037 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
6038 return RValue::get(
6040 {Arg0, ACast, PacketSize, PacketAlign}));
6041 } else {
6042 assert(4 == E->getNumArgs() &&
6043 "Illegal number of parameters to pipe function");
6044 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
6045 : "__write_pipe_4";
6046
6047 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
6048 Int32Ty, Int32Ty};
6049 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
6050 *Arg3 = EmitScalarExpr(E->getArg(3));
6051 llvm::FunctionType *FTy = llvm::FunctionType::get(
6052 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6053 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
6054 // We know the third argument is an integer type, but we may need to cast
6055 // it to i32.
6056 if (Arg2->getType() != Int32Ty)
6057 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
6058 return RValue::get(
6060 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
6061 }
6062 }
6063 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
6064 // functions
6065 case Builtin::BIreserve_read_pipe:
6066 case Builtin::BIreserve_write_pipe:
6067 case Builtin::BIwork_group_reserve_read_pipe:
6068 case Builtin::BIwork_group_reserve_write_pipe:
6069 case Builtin::BIsub_group_reserve_read_pipe:
6070 case Builtin::BIsub_group_reserve_write_pipe: {
6071 // Composing the mangled name for the function.
6072 const char *Name;
6073 if (BuiltinID == Builtin::BIreserve_read_pipe)
6074 Name = "__reserve_read_pipe";
6075 else if (BuiltinID == Builtin::BIreserve_write_pipe)
6076 Name = "__reserve_write_pipe";
6077 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
6078 Name = "__work_group_reserve_read_pipe";
6079 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
6080 Name = "__work_group_reserve_write_pipe";
6081 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
6082 Name = "__sub_group_reserve_read_pipe";
6083 else
6084 Name = "__sub_group_reserve_write_pipe";
6085
6086 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6087 *Arg1 = EmitScalarExpr(E->getArg(1));
6088 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
6089 CGOpenCLRuntime OpenCLRT(CGM);
6090 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6091 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6092
6093 // Building the generic function prototype.
6094 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
6095 llvm::FunctionType *FTy = llvm::FunctionType::get(
6096 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6097 // We know the second argument is an integer type, but we may need to cast
6098 // it to i32.
6099 if (Arg1->getType() != Int32Ty)
6100 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
6102 {Arg0, Arg1, PacketSize, PacketAlign}));
6103 }
6104 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
6105 // functions
6106 case Builtin::BIcommit_read_pipe:
6107 case Builtin::BIcommit_write_pipe:
6108 case Builtin::BIwork_group_commit_read_pipe:
6109 case Builtin::BIwork_group_commit_write_pipe:
6110 case Builtin::BIsub_group_commit_read_pipe:
6111 case Builtin::BIsub_group_commit_write_pipe: {
6112 const char *Name;
6113 if (BuiltinID == Builtin::BIcommit_read_pipe)
6114 Name = "__commit_read_pipe";
6115 else if (BuiltinID == Builtin::BIcommit_write_pipe)
6116 Name = "__commit_write_pipe";
6117 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
6118 Name = "__work_group_commit_read_pipe";
6119 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
6120 Name = "__work_group_commit_write_pipe";
6121 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
6122 Name = "__sub_group_commit_read_pipe";
6123 else
6124 Name = "__sub_group_commit_write_pipe";
6125
6126 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6127 *Arg1 = EmitScalarExpr(E->getArg(1));
6128 CGOpenCLRuntime OpenCLRT(CGM);
6129 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6130 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6131
6132 // Building the generic function prototype.
6133 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
6134 llvm::FunctionType *FTy =
6135 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
6136 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6137
6139 {Arg0, Arg1, PacketSize, PacketAlign}));
6140 }
6141 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
6142 case Builtin::BIget_pipe_num_packets:
6143 case Builtin::BIget_pipe_max_packets: {
6144 const char *BaseName;
6145 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
6146 if (BuiltinID == Builtin::BIget_pipe_num_packets)
6147 BaseName = "__get_pipe_num_packets";
6148 else
6149 BaseName = "__get_pipe_max_packets";
6150 std::string Name = std::string(BaseName) +
6151 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6152
6153 // Building the generic function prototype.
6154 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6155 CGOpenCLRuntime OpenCLRT(CGM);
6156 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6157 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6158 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6159 llvm::FunctionType *FTy = llvm::FunctionType::get(
6160 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6161
6163 {Arg0, PacketSize, PacketAlign}));
6164 }
6165
6166 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6167 case Builtin::BIto_global:
6168 case Builtin::BIto_local:
6169 case Builtin::BIto_private: {
6170 auto Arg0 = EmitScalarExpr(E->getArg(0));
6171 auto NewArgT = llvm::PointerType::get(
6174 auto NewRetT = llvm::PointerType::get(
6178 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6179 llvm::Value *NewArg;
6180 if (Arg0->getType()->getPointerAddressSpace() !=
6181 NewArgT->getPointerAddressSpace())
6182 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6183 else
6184 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6185 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6186 auto NewCall =
6187 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6188 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6189 ConvertType(E->getType())));
6190 }
6191
6192 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6193 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6194 // The code below expands the builtin call to a call to one of the following
6195 // functions that an OpenCL runtime library will have to provide:
6196 // __enqueue_kernel_basic
6197 // __enqueue_kernel_varargs
6198 // __enqueue_kernel_basic_events
6199 // __enqueue_kernel_events_varargs
6200 case Builtin::BIenqueue_kernel: {
6201 StringRef Name; // Generated function call name
6202 unsigned NumArgs = E->getNumArgs();
6203
6204 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6205 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6206 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6207
6208 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6209 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6210 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6211 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6212 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6213
6214 if (NumArgs == 4) {
6215 // The most basic form of the call with parameters:
6216 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6217 Name = "__enqueue_kernel_basic";
6218 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6219 GenericVoidPtrTy};
6220 llvm::FunctionType *FTy = llvm::FunctionType::get(
6221 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6222
6223 auto Info =
6224 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6225 llvm::Value *Kernel =
6226 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6227 llvm::Value *Block =
6228 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6229
6230 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6231 {Queue, Flags, Range, Kernel, Block});
6232 return RValue::get(RTCall);
6233 }
6234 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6235
6236 // Create a temporary array to hold the sizes of local pointer arguments
6237 // for the block. \p First is the position of the first size argument.
6238 auto CreateArrayForSizeVar = [=](unsigned First)
6239 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6240 llvm::APInt ArraySize(32, NumArgs - First);
6242 getContext().getSizeType(), ArraySize, nullptr,
6244 /*IndexTypeQuals=*/0);
6245 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6246 llvm::Value *TmpPtr = Tmp.getPointer();
6247 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6248 // however for cases where the default AS is not the Alloca AS, Tmp is
6249 // actually the Alloca ascasted to the default AS, hence the
6250 // stripPointerCasts()
6251 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6252 llvm::Value *TmpSize = EmitLifetimeStart(
6253 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6254 llvm::Value *ElemPtr;
6255 // Each of the following arguments specifies the size of the corresponding
6256 // argument passed to the enqueued block.
6257 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6258 for (unsigned I = First; I < NumArgs; ++I) {
6259 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6260 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6261 {Zero, Index});
6262 if (I == First)
6263 ElemPtr = GEP;
6264 auto *V =
6265 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6267 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6268 }
6269 // Return the Alloca itself rather than a potential ascast as this is only
6270 // used by the paired EmitLifetimeEnd.
6271 return std::tie(ElemPtr, TmpSize, Alloca);
6272 };
6273
6274 // Could have events and/or varargs.
6275 if (E->getArg(3)->getType()->isBlockPointerType()) {
6276 // No events passed, but has variadic arguments.
6277 Name = "__enqueue_kernel_varargs";
6278 auto Info =
6279 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6280 llvm::Value *Kernel =
6281 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6282 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6283 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6284 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6285
6286 // Create a vector of the arguments, as well as a constant value to
6287 // express to the runtime the number of variadic arguments.
6288 llvm::Value *const Args[] = {Queue, Flags,
6289 Range, Kernel,
6290 Block, ConstantInt::get(IntTy, NumArgs - 4),
6291 ElemPtr};
6292 llvm::Type *const ArgTys[] = {
6293 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6294 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6295
6296 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6297 auto Call = RValue::get(
6298 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6299 if (TmpSize)
6300 EmitLifetimeEnd(TmpSize, TmpPtr);
6301 return Call;
6302 }
6303 // Any calls now have event arguments passed.
6304 if (NumArgs >= 7) {
6305 llvm::PointerType *PtrTy = llvm::PointerType::get(
6308
6309 llvm::Value *NumEvents =
6310 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6311
6312 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6313 // to be a null pointer constant (including `0` literal), we can take it
6314 // into account and emit null pointer directly.
6315 llvm::Value *EventWaitList = nullptr;
6316 if (E->getArg(4)->isNullPointerConstant(
6318 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6319 } else {
6320 EventWaitList =
6321 E->getArg(4)->getType()->isArrayType()
6322 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6323 : EmitScalarExpr(E->getArg(4));
6324 // Convert to generic address space.
6325 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6326 }
6327 llvm::Value *EventRet = nullptr;
6328 if (E->getArg(5)->isNullPointerConstant(
6330 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6331 } else {
6332 EventRet =
6333 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6334 }
6335
6336 auto Info =
6337 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6338 llvm::Value *Kernel =
6339 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6340 llvm::Value *Block =
6341 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6342
6343 std::vector<llvm::Type *> ArgTys = {
6344 QueueTy, Int32Ty, RangeTy, Int32Ty,
6345 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6346
6347 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6348 NumEvents, EventWaitList, EventRet,
6349 Kernel, Block};
6350
6351 if (NumArgs == 7) {
6352 // Has events but no variadics.
6353 Name = "__enqueue_kernel_basic_events";
6354 llvm::FunctionType *FTy = llvm::FunctionType::get(
6355 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6356 return RValue::get(
6359 }
6360 // Has event info and variadics
6361 // Pass the number of variadics to the runtime function too.
6362 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6363 ArgTys.push_back(Int32Ty);
6364 Name = "__enqueue_kernel_events_varargs";
6365
6366 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6367 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6368 Args.push_back(ElemPtr);
6369 ArgTys.push_back(ElemPtr->getType());
6370
6371 llvm::FunctionType *FTy = llvm::FunctionType::get(
6372 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6373 auto Call =
6376 if (TmpSize)
6377 EmitLifetimeEnd(TmpSize, TmpPtr);
6378 return Call;
6379 }
6380 llvm_unreachable("Unexpected enqueue_kernel signature");
6381 }
6382 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6383 // parameter.
6384 case Builtin::BIget_kernel_work_group_size: {
6385 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6386 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6387 auto Info =
6388 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6389 Value *Kernel =
6390 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6391 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6394 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6395 false),
6396 "__get_kernel_work_group_size_impl"),
6397 {Kernel, Arg}));
6398 }
6399 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6400 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6401 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6402 auto Info =
6403 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6404 Value *Kernel =
6405 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6406 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6409 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6410 false),
6411 "__get_kernel_preferred_work_group_size_multiple_impl"),
6412 {Kernel, Arg}));
6413 }
6414 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6415 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6416 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6417 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6418 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6419 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6420 auto Info =
6421 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6422 Value *Kernel =
6423 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6424 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6425 const char *Name =
6426 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6427 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6428 : "__get_kernel_sub_group_count_for_ndrange_impl";
6431 llvm::FunctionType::get(
6432 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6433 false),
6434 Name),
6435 {NDRange, Kernel, Block}));
6436 }
6437 case Builtin::BI__builtin_store_half:
6438 case Builtin::BI__builtin_store_halff: {
6439 Value *Val = EmitScalarExpr(E->getArg(0));
6441 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6442 Builder.CreateStore(HalfVal, Address);
6443 return RValue::get(nullptr);
6444 }
6445 case Builtin::BI__builtin_load_half: {
6447 Value *HalfVal = Builder.CreateLoad(Address);
6448 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6449 }
6450 case Builtin::BI__builtin_load_halff: {
6452 Value *HalfVal = Builder.CreateLoad(Address);
6453 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6454 }
6455 case Builtin::BI__builtin_printf:
6456 case Builtin::BIprintf:
6457 if (getTarget().getTriple().isNVPTX() ||
6458 getTarget().getTriple().isAMDGCN() ||
6459 (getTarget().getTriple().isSPIRV() &&
6460 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6461 if (getTarget().getTriple().isNVPTX())
6463 if ((getTarget().getTriple().isAMDGCN() ||
6464 getTarget().getTriple().isSPIRV()) &&
6465 getLangOpts().HIP)
6467 }
6468
6469 break;
6470 case Builtin::BI__builtin_canonicalize:
6471 case Builtin::BI__builtin_canonicalizef:
6472 case Builtin::BI__builtin_canonicalizef16:
6473 case Builtin::BI__builtin_canonicalizel:
6474 return RValue::get(
6475 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6476
6477 case Builtin::BI__builtin_thread_pointer: {
6478 if (!getContext().getTargetInfo().isTLSSupported())
6479 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6480 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6481 break;
6482 }
6483 case Builtin::BI__builtin_os_log_format:
6484 return emitBuiltinOSLogFormat(*E);
6485
6486 case Builtin::BI__xray_customevent: {
6488 return RValue::getIgnored();
6489
6492 return RValue::getIgnored();
6493
6494 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6495 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6496 return RValue::getIgnored();
6497
6498 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6499 auto FTy = F->getFunctionType();
6500 auto Arg0 = E->getArg(0);
6501 auto Arg0Val = EmitScalarExpr(Arg0);
6502 auto Arg0Ty = Arg0->getType();
6503 auto PTy0 = FTy->getParamType(0);
6504 if (PTy0 != Arg0Val->getType()) {
6505 if (Arg0Ty->isArrayType())
6506 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6507 else
6508 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6509 }
6510 auto Arg1 = EmitScalarExpr(E->getArg(1));
6511 auto PTy1 = FTy->getParamType(1);
6512 if (PTy1 != Arg1->getType())
6513 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6514 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6515 }
6516
6517 case Builtin::BI__xray_typedevent: {
6518 // TODO: There should be a way to always emit events even if the current
6519 // function is not instrumented. Losing events in a stream can cripple
6520 // a trace.
6522 return RValue::getIgnored();
6523
6526 return RValue::getIgnored();
6527
6528 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6529 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6530 return RValue::getIgnored();
6531
6532 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6533 auto FTy = F->getFunctionType();
6534 auto Arg0 = EmitScalarExpr(E->getArg(0));
6535 auto PTy0 = FTy->getParamType(0);
6536 if (PTy0 != Arg0->getType())
6537 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6538 auto Arg1 = E->getArg(1);
6539 auto Arg1Val = EmitScalarExpr(Arg1);
6540 auto Arg1Ty = Arg1->getType();
6541 auto PTy1 = FTy->getParamType(1);
6542 if (PTy1 != Arg1Val->getType()) {
6543 if (Arg1Ty->isArrayType())
6544 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6545 else
6546 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6547 }
6548 auto Arg2 = EmitScalarExpr(E->getArg(2));
6549 auto PTy2 = FTy->getParamType(2);
6550 if (PTy2 != Arg2->getType())
6551 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6552 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6553 }
6554
6555 case Builtin::BI__builtin_ms_va_start:
6556 case Builtin::BI__builtin_ms_va_end:
6557 return RValue::get(
6559 BuiltinID == Builtin::BI__builtin_ms_va_start));
6560
6561 case Builtin::BI__builtin_ms_va_copy: {
6562 // Lower this manually. We can't reliably determine whether or not any
6563 // given va_copy() is for a Win64 va_list from the calling convention
6564 // alone, because it's legal to do this from a System V ABI function.
6565 // With opaque pointer types, we won't have enough information in LLVM
6566 // IR to determine this from the argument types, either. Best to do it
6567 // now, while we have enough information.
6568 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6569 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6570
6571 DestAddr = DestAddr.withElementType(Int8PtrTy);
6572 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6573
6574 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6575 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6576 }
6577
6578 case Builtin::BI__builtin_get_device_side_mangled_name: {
6579 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6580 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6581 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6582 return RValue::get(Str.getPointer());
6583 }
6584 }
6585
6586 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6587 // the call using the normal call path, but using the unmangled
6588 // version of the function name.
6589 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6590 return emitLibraryCall(*this, FD, E,
6591 CGM.getBuiltinLibFunction(FD, BuiltinID));
6592
6593 // If this is a predefined lib function (e.g. malloc), emit the call
6594 // using exactly the normal call path.
6595 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6596 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6597
6598 // Check that a call to a target specific builtin has the correct target
6599 // features.
6600 // This is down here to avoid non-target specific builtins, however, if
6601 // generic builtins start to require generic target features then we
6602 // can move this up to the beginning of the function.
6604
6605 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6606 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6607
6608 // See if we have a target specific intrinsic.
6609 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6610 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6611 StringRef Prefix =
6612 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6613 if (!Prefix.empty()) {
6614 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6615 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6616 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6617 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6618 // NOTE we don't need to perform a compatibility flag check here since the
6619 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6620 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6621 if (IntrinsicID == Intrinsic::not_intrinsic)
6622 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6623 }
6624
6625 if (IntrinsicID != Intrinsic::not_intrinsic) {
6627
6628 // Find out if any arguments are required to be integer constant
6629 // expressions.
6630 unsigned ICEArguments = 0;
6632 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6633 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6634
6635 Function *F = CGM.getIntrinsic(IntrinsicID);
6636 llvm::FunctionType *FTy = F->getFunctionType();
6637
6638 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6639 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6640 // If the intrinsic arg type is different from the builtin arg type
6641 // we need to do a bit cast.
6642 llvm::Type *PTy = FTy->getParamType(i);
6643 if (PTy != ArgValue->getType()) {
6644 // XXX - vector of pointers?
6645 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6646 if (PtrTy->getAddressSpace() !=
6647 ArgValue->getType()->getPointerAddressSpace()) {
6648 ArgValue = Builder.CreateAddrSpaceCast(
6649 ArgValue, llvm::PointerType::get(getLLVMContext(),
6650 PtrTy->getAddressSpace()));
6651 }
6652 }
6653
6654 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6655 // in amx intrinsics.
6656 if (PTy->isX86_AMXTy())
6657 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6658 {ArgValue->getType()}, {ArgValue});
6659 else
6660 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6661 }
6662
6663 Args.push_back(ArgValue);
6664 }
6665
6666 Value *V = Builder.CreateCall(F, Args);
6667 QualType BuiltinRetType = E->getType();
6668
6669 llvm::Type *RetTy = VoidTy;
6670 if (!BuiltinRetType->isVoidType())
6671 RetTy = ConvertType(BuiltinRetType);
6672
6673 if (RetTy != V->getType()) {
6674 // XXX - vector of pointers?
6675 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6676 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6678 V, llvm::PointerType::get(getLLVMContext(),
6679 PtrTy->getAddressSpace()));
6680 }
6681 }
6682
6683 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6684 // in amx intrinsics.
6685 if (V->getType()->isX86_AMXTy())
6686 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6687 {V});
6688 else
6689 V = Builder.CreateBitCast(V, RetTy);
6690 }
6691
6692 if (RetTy->isVoidTy())
6693 return RValue::get(nullptr);
6694
6695 return RValue::get(V);
6696 }
6697
6698 // Some target-specific builtins can have aggregate return values, e.g.
6699 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6700 // ReturnValue to be non-null, so that the target-specific emission code can
6701 // always just emit into it.
6703 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6704 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6705 ReturnValue = ReturnValueSlot(DestPtr, false);
6706 }
6707
6708 // Now see if we can emit a target-specific builtin.
6709 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6710 switch (EvalKind) {
6711 case TEK_Scalar:
6712 if (V->getType()->isVoidTy())
6713 return RValue::get(nullptr);
6714 return RValue::get(V);
6715 case TEK_Aggregate:
6716 return RValue::getAggregate(ReturnValue.getAddress(),
6717 ReturnValue.isVolatile());
6718 case TEK_Complex:
6719 llvm_unreachable("No current target builtin returns complex");
6720 }
6721 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6722 }
6723
6724 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6725 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6726 switch (EvalKind) {
6727 case TEK_Scalar:
6728 if (V->getType()->isVoidTy())
6729 return RValue::get(nullptr);
6730 return RValue::get(V);
6731 case TEK_Aggregate:
6732 return RValue::getAggregate(ReturnValue.getAddress(),
6733 ReturnValue.isVolatile());
6734 case TEK_Complex:
6735 llvm_unreachable("No current hlsl builtin returns complex");
6736 }
6737 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6738 }
6739
6740 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6741 return EmitHipStdParUnsupportedBuiltin(this, FD);
6742
6743 ErrorUnsupported(E, "builtin function");
6744
6745 // Unknown builtin, for now just dump it out and return undef.
6746 return GetUndefRValue(E->getType());
6747}
6748
6750 unsigned BuiltinID, const CallExpr *E,
6751 ReturnValueSlot ReturnValue,
6752 llvm::Triple::ArchType Arch) {
6753 // When compiling in HipStdPar mode we have to be conservative in rejecting
6754 // target specific features in the FE, and defer the possible error to the
6755 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6756 // referenced by an accelerator executable function, we emit an error.
6757 // Returning nullptr here leads to the builtin being handled in
6758 // EmitStdParUnsupportedBuiltin.
6759 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6760 Arch != CGF->getTarget().getTriple().getArch())
6761 return nullptr;
6762
6763 switch (Arch) {
6764 case llvm::Triple::arm:
6765 case llvm::Triple::armeb:
6766 case llvm::Triple::thumb:
6767 case llvm::Triple::thumbeb:
6768 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6769 case llvm::Triple::aarch64:
6770 case llvm::Triple::aarch64_32:
6771 case llvm::Triple::aarch64_be:
6772 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6773 case llvm::Triple::bpfeb:
6774 case llvm::Triple::bpfel:
6775 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6776 case llvm::Triple::x86:
6777 case llvm::Triple::x86_64:
6778 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6779 case llvm::Triple::ppc:
6780 case llvm::Triple::ppcle:
6781 case llvm::Triple::ppc64:
6782 case llvm::Triple::ppc64le:
6783 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6784 case llvm::Triple::r600:
6785 case llvm::Triple::amdgcn:
6786 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6787 case llvm::Triple::systemz:
6788 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6789 case llvm::Triple::nvptx:
6790 case llvm::Triple::nvptx64:
6791 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6792 case llvm::Triple::wasm32:
6793 case llvm::Triple::wasm64:
6794 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6795 case llvm::Triple::hexagon:
6796 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6797 case llvm::Triple::riscv32:
6798 case llvm::Triple::riscv64:
6799 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6800 case llvm::Triple::spirv:
6801 return CGF->EmitSPIRVBuiltinExpr(BuiltinID, E);
6802 case llvm::Triple::spirv64:
6803 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6804 return nullptr;
6805 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6806 default:
6807 return nullptr;
6808 }
6809}
6810
6812 const CallExpr *E,
6813 ReturnValueSlot ReturnValue) {
6814 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6815 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6817 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6818 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6819 }
6820
6821 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6822 getTarget().getTriple().getArch());
6823}
6824
6825static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6826 NeonTypeFlags TypeFlags,
6827 bool HasLegalHalfType = true,
6828 bool V1Ty = false,
6829 bool AllowBFloatArgsAndRet = true) {
6830 int IsQuad = TypeFlags.isQuad();
6831 switch (TypeFlags.getEltType()) {
6834 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6837 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6839 if (AllowBFloatArgsAndRet)
6840 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6841 else
6842 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6844 if (HasLegalHalfType)
6845 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6846 else
6847 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6849 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6852 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6854 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6855 // There is a lot of i128 and f128 API missing.
6856 // so we use v16i8 to represent poly128 and get pattern matched.
6857 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6859 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6861 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6862 }
6863 llvm_unreachable("Unknown vector element type!");
6864}
6865
6866static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6867 NeonTypeFlags IntTypeFlags) {
6868 int IsQuad = IntTypeFlags.isQuad();
6869 switch (IntTypeFlags.getEltType()) {
6871 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6873 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6875 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6876 default:
6877 llvm_unreachable("Type can't be converted to floating-point!");
6878 }
6879}
6880
6882 const ElementCount &Count) {
6883 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6884 return Builder.CreateShuffleVector(V, V, SV, "lane");
6885}
6886
6888 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6889 return EmitNeonSplat(V, C, EC);
6890}
6891
6893 const char *name,
6894 unsigned shift, bool rightshift) {
6895 unsigned j = 0;
6896 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6897 ai != ae; ++ai, ++j) {
6898 if (F->isConstrainedFPIntrinsic())
6899 if (ai->getType()->isMetadataTy())
6900 continue;
6901 if (shift > 0 && shift == j)
6902 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6903 else
6904 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6905 }
6906
6907 if (F->isConstrainedFPIntrinsic())
6908 return Builder.CreateConstrainedFPCall(F, Ops, name);
6909 else
6910 return Builder.CreateCall(F, Ops, name);
6911}
6912
6914 bool neg) {
6915 int SV = cast<ConstantInt>(V)->getSExtValue();
6916 return ConstantInt::get(Ty, neg ? -SV : SV);
6917}
6918
6919// Right-shift a vector by a constant.
6921 llvm::Type *Ty, bool usgn,
6922 const char *name) {
6923 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6924
6925 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6926 int EltSize = VTy->getScalarSizeInBits();
6927
6928 Vec = Builder.CreateBitCast(Vec, Ty);
6929
6930 // lshr/ashr are undefined when the shift amount is equal to the vector
6931 // element size.
6932 if (ShiftAmt == EltSize) {
6933 if (usgn) {
6934 // Right-shifting an unsigned value by its size yields 0.
6935 return llvm::ConstantAggregateZero::get(VTy);
6936 } else {
6937 // Right-shifting a signed value by its size is equivalent
6938 // to a shift of size-1.
6939 --ShiftAmt;
6940 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6941 }
6942 }
6943
6944 Shift = EmitNeonShiftVector(Shift, Ty, false);
6945 if (usgn)
6946 return Builder.CreateLShr(Vec, Shift, name);
6947 else
6948 return Builder.CreateAShr(Vec, Shift, name);
6949}
6950
6951enum {
6952 AddRetType = (1 << 0),
6953 Add1ArgType = (1 << 1),
6954 Add2ArgTypes = (1 << 2),
6955
6958
6960 UnsignedAlts = (1 << 6),
6961
6964
6972
6973namespace {
6974struct ARMVectorIntrinsicInfo {
6975 const char *NameHint;
6976 unsigned BuiltinID;
6977 unsigned LLVMIntrinsic;
6978 unsigned AltLLVMIntrinsic;
6980
6981 bool operator<(unsigned RHSBuiltinID) const {
6982 return BuiltinID < RHSBuiltinID;
6983 }
6984 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6985 return BuiltinID < TE.BuiltinID;
6986 }
6987};
6988} // end anonymous namespace
6989
6990#define NEONMAP0(NameBase) \
6991 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6992
6993#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6994 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6995 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6996
6997#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6998 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6999 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
7000 TypeModifier }
7001
7002static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
7003 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
7004 NEONMAP0(splat_lane_v),
7005 NEONMAP0(splat_laneq_v),
7006 NEONMAP0(splatq_lane_v),
7007 NEONMAP0(splatq_laneq_v),
7008 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
7009 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
7010 NEONMAP1(vabs_v, arm_neon_vabs, 0),
7011 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
7012 NEONMAP0(vadd_v),
7013 NEONMAP0(vaddhn_v),
7014 NEONMAP0(vaddq_v),
7015 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
7016 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
7017 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
7018 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
7019 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
7020 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
7021 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
7022 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
7023 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
7024 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
7025 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
7026 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
7027 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
7028 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
7029 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
7030 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
7031 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
7032 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
7033 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
7034 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
7035 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
7036 NEONMAP1(vcage_v, arm_neon_vacge, 0),
7037 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
7038 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
7039 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
7040 NEONMAP1(vcale_v, arm_neon_vacge, 0),
7041 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
7042 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
7043 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
7044 NEONMAP0(vceqz_v),
7045 NEONMAP0(vceqzq_v),
7046 NEONMAP0(vcgez_v),
7047 NEONMAP0(vcgezq_v),
7048 NEONMAP0(vcgtz_v),
7049 NEONMAP0(vcgtzq_v),
7050 NEONMAP0(vclez_v),
7051 NEONMAP0(vclezq_v),
7052 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
7053 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
7054 NEONMAP0(vcltz_v),
7055 NEONMAP0(vcltzq_v),
7056 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7057 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7058 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7059 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7060 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
7061 NEONMAP0(vcvt_f16_s16),
7062 NEONMAP0(vcvt_f16_u16),
7063 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
7064 NEONMAP0(vcvt_f32_v),
7065 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7066 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7067 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7068 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7069 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7070 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7071 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7072 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7073 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7074 NEONMAP0(vcvt_s16_f16),
7075 NEONMAP0(vcvt_s32_v),
7076 NEONMAP0(vcvt_s64_v),
7077 NEONMAP0(vcvt_u16_f16),
7078 NEONMAP0(vcvt_u32_v),
7079 NEONMAP0(vcvt_u64_v),
7080 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
7081 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
7082 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
7083 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
7084 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
7085 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
7086 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
7087 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
7088 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
7089 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
7090 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
7091 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
7092 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
7093 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
7094 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
7095 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
7096 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
7097 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
7098 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
7099 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
7100 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
7101 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
7102 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
7103 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
7104 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
7105 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
7106 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
7107 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
7108 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
7109 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
7110 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
7111 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
7112 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
7113 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
7114 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
7115 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
7116 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
7117 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
7118 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
7119 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
7120 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
7121 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
7122 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
7123 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
7124 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
7125 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
7126 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
7127 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
7128 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
7129 NEONMAP0(vcvtq_f16_s16),
7130 NEONMAP0(vcvtq_f16_u16),
7131 NEONMAP0(vcvtq_f32_v),
7132 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7133 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7134 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7135 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7136 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7137 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7138 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7139 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7140 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7141 NEONMAP0(vcvtq_s16_f16),
7142 NEONMAP0(vcvtq_s32_v),
7143 NEONMAP0(vcvtq_s64_v),
7144 NEONMAP0(vcvtq_u16_f16),
7145 NEONMAP0(vcvtq_u32_v),
7146 NEONMAP0(vcvtq_u64_v),
7147 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
7148 NEONMAP1(vdot_u32, arm_neon_udot, 0),
7149 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
7150 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
7151 NEONMAP0(vext_v),
7152 NEONMAP0(vextq_v),
7153 NEONMAP0(vfma_v),
7154 NEONMAP0(vfmaq_v),
7155 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7156 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7157 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7158 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7159 NEONMAP0(vld1_dup_v),
7160 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7161 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7162 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7163 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7164 NEONMAP0(vld1q_dup_v),
7165 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7166 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7167 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7168 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7169 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7170 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7171 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7172 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7173 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7174 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7175 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7176 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7177 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7178 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7179 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7180 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7181 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7182 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7183 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7184 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7185 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7186 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7187 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7188 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7189 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7190 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7191 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7192 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7193 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7194 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7195 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7196 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7197 NEONMAP0(vmovl_v),
7198 NEONMAP0(vmovn_v),
7199 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7200 NEONMAP0(vmull_v),
7201 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7202 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7203 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7204 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7205 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7206 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7207 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7208 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7209 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7210 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7211 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7212 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7213 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7214 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7215 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7216 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7217 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7218 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7219 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7220 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7221 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7222 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7223 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7224 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7225 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7226 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7227 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7228 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7229 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7230 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7231 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7232 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7233 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7234 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7235 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7236 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7237 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7238 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7239 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7240 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7241 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7242 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7243 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7244 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7245 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7246 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7247 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7248 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7249 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7250 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7251 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7252 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7253 NEONMAP0(vrndi_v),
7254 NEONMAP0(vrndiq_v),
7255 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7256 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7257 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7258 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7259 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7260 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7261 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7262 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7263 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7264 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7265 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7266 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7267 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7268 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7269 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7270 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7271 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7272 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7273 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7274 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7275 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7276 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7277 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7278 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7279 NEONMAP0(vshl_n_v),
7280 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7281 NEONMAP0(vshll_n_v),
7282 NEONMAP0(vshlq_n_v),
7283 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7284 NEONMAP0(vshr_n_v),
7285 NEONMAP0(vshrn_n_v),
7286 NEONMAP0(vshrq_n_v),
7287 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7288 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7289 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7290 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7291 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7292 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7293 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7294 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7295 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7296 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7297 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7298 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7299 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7300 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7301 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7302 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7303 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7304 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7305 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7306 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7307 NEONMAP0(vsubhn_v),
7308 NEONMAP0(vtrn_v),
7309 NEONMAP0(vtrnq_v),
7310 NEONMAP0(vtst_v),
7311 NEONMAP0(vtstq_v),
7312 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7313 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7314 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7315 NEONMAP0(vuzp_v),
7316 NEONMAP0(vuzpq_v),
7317 NEONMAP0(vzip_v),
7318 NEONMAP0(vzipq_v)
7319};
7320
7321static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7322 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
7323 NEONMAP0(splat_lane_v),
7324 NEONMAP0(splat_laneq_v),
7325 NEONMAP0(splatq_lane_v),
7326 NEONMAP0(splatq_laneq_v),
7327 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7328 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7329 NEONMAP0(vadd_v),
7330 NEONMAP0(vaddhn_v),
7331 NEONMAP0(vaddq_p128),
7332 NEONMAP0(vaddq_v),
7333 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7334 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7335 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7336 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7337 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7338 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7339 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7340 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7341 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7342 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7343 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7344 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7345 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7346 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7347 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7348 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7349 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7350 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7351 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7352 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7353 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7354 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7355 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7356 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7357 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7358 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7359 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7360 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7361 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7362 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7363 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7364 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7365 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7366 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7367 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7368 NEONMAP0(vceqz_v),
7369 NEONMAP0(vceqzq_v),
7370 NEONMAP0(vcgez_v),
7371 NEONMAP0(vcgezq_v),
7372 NEONMAP0(vcgtz_v),
7373 NEONMAP0(vcgtzq_v),
7374 NEONMAP0(vclez_v),
7375 NEONMAP0(vclezq_v),
7376 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7377 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7378 NEONMAP0(vcltz_v),
7379 NEONMAP0(vcltzq_v),
7380 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7381 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7382 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7383 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7384 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7385 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7386 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7387 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7388 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7389 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7390 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7391 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7392 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7393 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7394 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7395 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7396 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7397 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7398 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7399 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7400 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7401 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7402 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7403 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7404 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7405 NEONMAP0(vcvt_f16_s16),
7406 NEONMAP0(vcvt_f16_u16),
7407 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7408 NEONMAP0(vcvt_f32_v),
7409 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7410 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7411 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7412 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7413 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7414 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7415 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7416 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7417 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7418 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7419 NEONMAP0(vcvtq_f16_s16),
7420 NEONMAP0(vcvtq_f16_u16),
7421 NEONMAP0(vcvtq_f32_v),
7422 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7423 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7424 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7425 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7426 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7427 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7428 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7429 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7430 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7431 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7432 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7433 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7434 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7435 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7436 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7437 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7438 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7439 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7440 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7441 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7442 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7443 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7444 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7445 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7446 NEONMAP0(vext_v),
7447 NEONMAP0(vextq_v),
7448 NEONMAP0(vfma_v),
7449 NEONMAP0(vfmaq_v),
7450 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7451 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7452 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7453 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7454 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7455 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7456 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7457 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7458 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7459 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7460 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7461 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7462 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7463 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7464 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7465 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7466 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7467 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7468 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7469 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7470 NEONMAP0(vmovl_v),
7471 NEONMAP0(vmovn_v),
7472 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7473 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7474 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7475 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7476 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7477 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7478 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7479 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7480 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7481 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7482 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7483 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7484 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7485 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7486 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7487 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7488 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7489 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7490 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7491 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7492 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7493 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7494 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7495 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7496 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7497 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7498 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7499 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7500 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7501 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7502 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7503 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7504 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7505 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7506 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7507 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7508 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7509 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7510 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7511 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7512 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7513 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7514 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7515 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7516 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7517 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7518 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7519 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7520 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7521 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7522 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7523 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7524 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7525 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7526 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7527 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7528 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7529 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7530 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7531 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7532 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7533 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7534 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7535 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7536 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7537 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7538 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7539 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7540 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7541 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7542 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7543 NEONMAP0(vrndi_v),
7544 NEONMAP0(vrndiq_v),
7545 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7546 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7547 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7548 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7549 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7550 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7551 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7552 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7553 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7554 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7555 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7556 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7557 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7558 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7559 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7560 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7561 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7562 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7563 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7564 NEONMAP0(vshl_n_v),
7565 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7566 NEONMAP0(vshll_n_v),
7567 NEONMAP0(vshlq_n_v),
7568 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7569 NEONMAP0(vshr_n_v),
7570 NEONMAP0(vshrn_n_v),
7571 NEONMAP0(vshrq_n_v),
7572 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7573 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7574 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7575 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7576 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7577 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7578 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7579 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7580 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7581 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7582 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7583 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7584 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7585 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7586 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7587 NEONMAP0(vsubhn_v),
7588 NEONMAP0(vtst_v),
7589 NEONMAP0(vtstq_v),
7590 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7591 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7592 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7593 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7594};
7595
7596static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7597 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7598 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7599 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7600 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7601 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7602 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7603 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7604 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7605 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7606 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7607 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7608 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7609 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7610 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7611 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7612 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7613 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7614 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7615 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7616 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7617 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7618 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7619 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7620 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7621 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7622 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7623 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7624 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7625 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7626 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7627 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7628 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7629 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7630 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7631 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7632 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7633 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7634 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7635 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7636 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7637 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7638 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7639 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7640 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7641 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7642 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7643 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7644 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7645 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7646 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7647 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7648 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7649 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7650 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7651 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7652 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7653 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7654 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7655 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7656 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7657 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7658 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7659 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7660 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7661 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7662 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7663 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7664 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7665 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7666 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7667 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7668 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7669 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7670 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7671 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7672 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7673 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7674 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7675 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7676 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7677 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7678 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7679 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7680 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7681 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7682 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7683 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7684 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7685 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7686 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7687 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7688 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7689 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7690 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7691 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7692 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7693 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7694 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7695 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7696 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7697 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7698 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7699 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7700 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7701 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7702 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7703 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7704 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7705 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7706 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7707 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7708 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7709 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7710 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7711 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7712 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7713 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7714 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7715 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7716 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7717 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7718 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7719 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7720 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7721 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7722 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7723 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7724 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7725 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7726 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7727 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7728 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7729 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7730 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7731 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7732 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7733 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7734 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7735 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7736 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7737 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7738 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7739 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7740 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7741 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7742 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7743 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7744 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7745 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7746 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7747 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7748 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7749 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7750 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7751 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7752 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7753 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7754 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7755 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7756 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7757 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7758 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7759 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7760 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7761 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7762 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7763 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7764 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7765 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7766 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7767 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7768 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7769 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7770 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7771 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7772 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7773 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7774 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7775 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7776 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7777 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7778 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7779 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7780 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7781 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7782 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7783 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7784 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7785 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7786 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7787 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7788 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7789 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7790 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7791 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7792 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7793 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7794 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7795 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7796 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7797 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7798 // FP16 scalar intrinisics go here.
7799 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7800 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7801 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7802 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7803 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7804 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7805 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7806 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7807 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7808 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7809 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7810 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7811 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7812 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7813 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7814 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7815 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7816 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7817 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7818 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7819 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7820 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7821 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7822 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7823 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7824 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7825 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7826 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7827 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7828 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7829 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7830 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7831 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7832 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7833};
7834
7835// Some intrinsics are equivalent for codegen.
7836static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7837 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7838 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7839 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7840 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7841 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7842 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7843 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7844 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7845 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7846 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7847 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7848 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7849 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7850 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7851 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7852 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7853 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7854 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7855 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7856 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7857 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7858 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7859 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7860 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7861 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7862 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7863 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7864 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7865 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7866 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7867 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7868 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7869 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7870 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7871 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7872 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7873 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7874 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7875 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7876 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7877 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7878 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7879 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7880 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7881 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7882 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7883 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7884 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7885 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7886 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7887 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7888 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7889 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7890 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7891 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7892 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7893 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7894 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7895 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7896 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7897 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7898 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7899 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7900 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7901 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7902 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7903 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7904 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7905 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7906 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7907 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7908 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7909 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7910 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7911 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7912 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7913 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7914 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7915 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7916 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7917 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7918 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7919 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7920 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7921 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7922 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7923 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7924 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7925 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7926 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7927 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7928 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7929 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7930 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7931 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7932 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7933 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7934 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7935 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7936 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7937 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7938 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7939 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7940 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7941 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7942 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7943 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7944 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7945 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7946 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7947 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7948 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7949 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7950 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7951 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7952 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7953 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7954 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7955 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7956 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7957 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7958 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7959 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7960 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7961 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7962 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7963 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7964 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7965 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7966 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7967 // arbitrary one to be handled as tha canonical variation.
7968 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7969 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7970 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7971 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7972 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7973 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7974 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7975 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7976 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7977 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7978 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7979 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7980};
7981
7982#undef NEONMAP0
7983#undef NEONMAP1
7984#undef NEONMAP2
7985
7986#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7987 { \
7988 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7989 TypeModifier \
7990 }
7991
7992#define SVEMAP2(NameBase, TypeModifier) \
7993 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7994static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7995#define GET_SVE_LLVM_INTRINSIC_MAP
7996#include "clang/Basic/arm_sve_builtin_cg.inc"
7997#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7998#undef GET_SVE_LLVM_INTRINSIC_MAP
7999};
8000
8001#undef SVEMAP1
8002#undef SVEMAP2
8003
8004#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
8005 { \
8006 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
8007 TypeModifier \
8008 }
8009
8010#define SMEMAP2(NameBase, TypeModifier) \
8011 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
8012static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
8013#define GET_SME_LLVM_INTRINSIC_MAP
8014#include "clang/Basic/arm_sme_builtin_cg.inc"
8015#undef GET_SME_LLVM_INTRINSIC_MAP
8016};
8017
8018#undef SMEMAP1
8019#undef SMEMAP2
8020
8022
8027
8028static const ARMVectorIntrinsicInfo *
8030 unsigned BuiltinID, bool &MapProvenSorted) {
8031
8032#ifndef NDEBUG
8033 if (!MapProvenSorted) {
8034 assert(llvm::is_sorted(IntrinsicMap));
8035 MapProvenSorted = true;
8036 }
8037#endif
8038
8039 const ARMVectorIntrinsicInfo *Builtin =
8040 llvm::lower_bound(IntrinsicMap, BuiltinID);
8041
8042 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
8043 return Builtin;
8044
8045 return nullptr;
8046}
8047
8049 unsigned Modifier,
8050 llvm::Type *ArgType,
8051 const CallExpr *E) {
8052 int VectorSize = 0;
8053 if (Modifier & Use64BitVectors)
8054 VectorSize = 64;
8055 else if (Modifier & Use128BitVectors)
8056 VectorSize = 128;
8057
8058 // Return type.
8060 if (Modifier & AddRetType) {
8061 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
8062 if (Modifier & VectorizeRetType)
8063 Ty = llvm::FixedVectorType::get(
8064 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
8065
8066 Tys.push_back(Ty);
8067 }
8068
8069 // Arguments.
8070 if (Modifier & VectorizeArgTypes) {
8071 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
8072 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
8073 }
8074
8075 if (Modifier & (Add1ArgType | Add2ArgTypes))
8076 Tys.push_back(ArgType);
8077
8078 if (Modifier & Add2ArgTypes)
8079 Tys.push_back(ArgType);
8080
8081 if (Modifier & InventFloatType)
8082 Tys.push_back(FloatTy);
8083
8084 return CGM.getIntrinsic(IntrinsicID, Tys);
8085}
8086
8088 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
8089 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
8090 unsigned BuiltinID = SISDInfo.BuiltinID;
8091 unsigned int Int = SISDInfo.LLVMIntrinsic;
8092 unsigned Modifier = SISDInfo.TypeModifier;
8093 const char *s = SISDInfo.NameHint;
8094
8095 switch (BuiltinID) {
8096 case NEON::BI__builtin_neon_vcled_s64:
8097 case NEON::BI__builtin_neon_vcled_u64:
8098 case NEON::BI__builtin_neon_vcles_f32:
8099 case NEON::BI__builtin_neon_vcled_f64:
8100 case NEON::BI__builtin_neon_vcltd_s64:
8101 case NEON::BI__builtin_neon_vcltd_u64:
8102 case NEON::BI__builtin_neon_vclts_f32:
8103 case NEON::BI__builtin_neon_vcltd_f64:
8104 case NEON::BI__builtin_neon_vcales_f32:
8105 case NEON::BI__builtin_neon_vcaled_f64:
8106 case NEON::BI__builtin_neon_vcalts_f32:
8107 case NEON::BI__builtin_neon_vcaltd_f64:
8108 // Only one direction of comparisons actually exist, cmle is actually a cmge
8109 // with swapped operands. The table gives us the right intrinsic but we
8110 // still need to do the swap.
8111 std::swap(Ops[0], Ops[1]);
8112 break;
8113 }
8114
8115 assert(Int && "Generic code assumes a valid intrinsic");
8116
8117 // Determine the type(s) of this overloaded AArch64 intrinsic.
8118 const Expr *Arg = E->getArg(0);
8119 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
8120 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
8121
8122 int j = 0;
8123 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
8124 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
8125 ai != ae; ++ai, ++j) {
8126 llvm::Type *ArgTy = ai->getType();
8127 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
8128 ArgTy->getPrimitiveSizeInBits())
8129 continue;
8130
8131 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
8132 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
8133 // it before inserting.
8134 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
8135 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
8136 Ops[j] =
8137 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
8138 }
8139
8140 Value *Result = CGF.EmitNeonCall(F, Ops, s);
8141 llvm::Type *ResultType = CGF.ConvertType(E->getType());
8142 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
8143 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
8144 return CGF.Builder.CreateExtractElement(Result, C0);
8145
8146 return CGF.Builder.CreateBitCast(Result, ResultType, s);
8147}
8148
8150 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
8151 const char *NameHint, unsigned Modifier, const CallExpr *E,
8152 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8153 llvm::Triple::ArchType Arch) {
8154 // Get the last argument, which specifies the vector type.
8155 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8156 std::optional<llvm::APSInt> NeonTypeConst =
8158 if (!NeonTypeConst)
8159 return nullptr;
8160
8161 // Determine the type of this overloaded NEON intrinsic.
8162 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8163 bool Usgn = Type.isUnsigned();
8164 bool Quad = Type.isQuad();
8165 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8166 const bool AllowBFloatArgsAndRet =
8167 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8168
8169 llvm::FixedVectorType *VTy =
8170 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8171 llvm::Type *Ty = VTy;
8172 if (!Ty)
8173 return nullptr;
8174
8175 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8176 return Builder.getInt32(addr.getAlignment().getQuantity());
8177 };
8178
8179 unsigned Int = LLVMIntrinsic;
8180 if ((Modifier & UnsignedAlts) && !Usgn)
8181 Int = AltLLVMIntrinsic;
8182
8183 switch (BuiltinID) {
8184 default: break;
8185 case NEON::BI__builtin_neon_splat_lane_v:
8186 case NEON::BI__builtin_neon_splat_laneq_v:
8187 case NEON::BI__builtin_neon_splatq_lane_v:
8188 case NEON::BI__builtin_neon_splatq_laneq_v: {
8189 auto NumElements = VTy->getElementCount();
8190 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8191 NumElements = NumElements * 2;
8192 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8193 NumElements = NumElements.divideCoefficientBy(2);
8194
8195 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8196 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8197 }
8198 case NEON::BI__builtin_neon_vpadd_v:
8199 case NEON::BI__builtin_neon_vpaddq_v:
8200 // We don't allow fp/int overloading of intrinsics.
8201 if (VTy->getElementType()->isFloatingPointTy() &&
8202 Int == Intrinsic::aarch64_neon_addp)
8203 Int = Intrinsic::aarch64_neon_faddp;
8204 break;
8205 case NEON::BI__builtin_neon_vabs_v:
8206 case NEON::BI__builtin_neon_vabsq_v:
8207 if (VTy->getElementType()->isFloatingPointTy())
8208 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8209 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8210 case NEON::BI__builtin_neon_vadd_v:
8211 case NEON::BI__builtin_neon_vaddq_v: {
8212 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8213 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8214 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8215 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8216 return Builder.CreateBitCast(Ops[0], Ty);
8217 }
8218 case NEON::BI__builtin_neon_vaddhn_v: {
8219 llvm::FixedVectorType *SrcTy =
8220 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8221
8222 // %sum = add <4 x i32> %lhs, %rhs
8223 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8224 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8225 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8226
8227 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8228 Constant *ShiftAmt =
8229 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8230 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8231
8232 // %res = trunc <4 x i32> %high to <4 x i16>
8233 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8234 }
8235 case NEON::BI__builtin_neon_vcale_v:
8236 case NEON::BI__builtin_neon_vcaleq_v:
8237 case NEON::BI__builtin_neon_vcalt_v:
8238 case NEON::BI__builtin_neon_vcaltq_v:
8239 std::swap(Ops[0], Ops[1]);
8240 [[fallthrough]];
8241 case NEON::BI__builtin_neon_vcage_v:
8242 case NEON::BI__builtin_neon_vcageq_v:
8243 case NEON::BI__builtin_neon_vcagt_v:
8244 case NEON::BI__builtin_neon_vcagtq_v: {
8245 llvm::Type *Ty;
8246 switch (VTy->getScalarSizeInBits()) {
8247 default: llvm_unreachable("unexpected type");
8248 case 32:
8249 Ty = FloatTy;
8250 break;
8251 case 64:
8252 Ty = DoubleTy;
8253 break;
8254 case 16:
8255 Ty = HalfTy;
8256 break;
8257 }
8258 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8259 llvm::Type *Tys[] = { VTy, VecFlt };
8260 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8261 return EmitNeonCall(F, Ops, NameHint);
8262 }
8263 case NEON::BI__builtin_neon_vceqz_v:
8264 case NEON::BI__builtin_neon_vceqzq_v:
8265 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8266 ICmpInst::ICMP_EQ, "vceqz");
8267 case NEON::BI__builtin_neon_vcgez_v:
8268 case NEON::BI__builtin_neon_vcgezq_v:
8269 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8270 ICmpInst::ICMP_SGE, "vcgez");
8271 case NEON::BI__builtin_neon_vclez_v:
8272 case NEON::BI__builtin_neon_vclezq_v:
8273 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8274 ICmpInst::ICMP_SLE, "vclez");
8275 case NEON::BI__builtin_neon_vcgtz_v:
8276 case NEON::BI__builtin_neon_vcgtzq_v:
8277 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8278 ICmpInst::ICMP_SGT, "vcgtz");
8279 case NEON::BI__builtin_neon_vcltz_v:
8280 case NEON::BI__builtin_neon_vcltzq_v:
8281 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8282 ICmpInst::ICMP_SLT, "vcltz");
8283 case NEON::BI__builtin_neon_vclz_v:
8284 case NEON::BI__builtin_neon_vclzq_v:
8285 // We generate target-independent intrinsic, which needs a second argument
8286 // for whether or not clz of zero is undefined; on ARM it isn't.
8287 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8288 break;
8289 case NEON::BI__builtin_neon_vcvt_f32_v:
8290 case NEON::BI__builtin_neon_vcvtq_f32_v:
8291 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8292 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8293 HasLegalHalfType);
8294 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8295 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8296 case NEON::BI__builtin_neon_vcvt_f16_s16:
8297 case NEON::BI__builtin_neon_vcvt_f16_u16:
8298 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8299 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8300 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8301 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8302 HasLegalHalfType);
8303 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8304 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8305 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8306 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8307 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8308 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8309 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8310 Function *F = CGM.getIntrinsic(Int, Tys);
8311 return EmitNeonCall(F, Ops, "vcvt_n");
8312 }
8313 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8314 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8315 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8316 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8317 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8318 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8319 Function *F = CGM.getIntrinsic(Int, Tys);
8320 return EmitNeonCall(F, Ops, "vcvt_n");
8321 }
8322 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8323 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8324 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8325 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8326 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8327 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8328 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8329 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8330 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8331 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8332 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8333 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8334 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8335 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8336 return EmitNeonCall(F, Ops, "vcvt_n");
8337 }
8338 case NEON::BI__builtin_neon_vcvt_s32_v:
8339 case NEON::BI__builtin_neon_vcvt_u32_v:
8340 case NEON::BI__builtin_neon_vcvt_s64_v:
8341 case NEON::BI__builtin_neon_vcvt_u64_v:
8342 case NEON::BI__builtin_neon_vcvt_s16_f16:
8343 case NEON::BI__builtin_neon_vcvt_u16_f16:
8344 case NEON::BI__builtin_neon_vcvtq_s32_v:
8345 case NEON::BI__builtin_neon_vcvtq_u32_v:
8346 case NEON::BI__builtin_neon_vcvtq_s64_v:
8347 case NEON::BI__builtin_neon_vcvtq_u64_v:
8348 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8349 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8350 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8351 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8352 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8353 }
8354 case NEON::BI__builtin_neon_vcvta_s16_f16:
8355 case NEON::BI__builtin_neon_vcvta_s32_v:
8356 case NEON::BI__builtin_neon_vcvta_s64_v:
8357 case NEON::BI__builtin_neon_vcvta_u16_f16:
8358 case NEON::BI__builtin_neon_vcvta_u32_v:
8359 case NEON::BI__builtin_neon_vcvta_u64_v:
8360 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8361 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8362 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8363 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8364 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8365 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8366 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8367 case NEON::BI__builtin_neon_vcvtn_s32_v:
8368 case NEON::BI__builtin_neon_vcvtn_s64_v:
8369 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8370 case NEON::BI__builtin_neon_vcvtn_u32_v:
8371 case NEON::BI__builtin_neon_vcvtn_u64_v:
8372 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8373 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8374 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8375 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8376 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8377 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8378 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8379 case NEON::BI__builtin_neon_vcvtp_s32_v:
8380 case NEON::BI__builtin_neon_vcvtp_s64_v:
8381 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8382 case NEON::BI__builtin_neon_vcvtp_u32_v:
8383 case NEON::BI__builtin_neon_vcvtp_u64_v:
8384 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8385 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8386 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8387 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8388 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8389 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8390 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8391 case NEON::BI__builtin_neon_vcvtm_s32_v:
8392 case NEON::BI__builtin_neon_vcvtm_s64_v:
8393 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8394 case NEON::BI__builtin_neon_vcvtm_u32_v:
8395 case NEON::BI__builtin_neon_vcvtm_u64_v:
8396 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8397 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8398 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8399 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8400 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8401 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8402 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8403 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8404 }
8405 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8406 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8407 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8408
8409 }
8410 case NEON::BI__builtin_neon_vext_v:
8411 case NEON::BI__builtin_neon_vextq_v: {
8412 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8413 SmallVector<int, 16> Indices;
8414 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8415 Indices.push_back(i+CV);
8416
8417 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8418 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8419 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8420 }
8421 case NEON::BI__builtin_neon_vfma_v:
8422 case NEON::BI__builtin_neon_vfmaq_v: {
8423 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8424 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8425 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8426
8427 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8429 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8430 {Ops[1], Ops[2], Ops[0]});
8431 }
8432 case NEON::BI__builtin_neon_vld1_v:
8433 case NEON::BI__builtin_neon_vld1q_v: {
8434 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8435 Ops.push_back(getAlignmentValue32(PtrOp0));
8436 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8437 }
8438 case NEON::BI__builtin_neon_vld1_x2_v:
8439 case NEON::BI__builtin_neon_vld1q_x2_v:
8440 case NEON::BI__builtin_neon_vld1_x3_v:
8441 case NEON::BI__builtin_neon_vld1q_x3_v:
8442 case NEON::BI__builtin_neon_vld1_x4_v:
8443 case NEON::BI__builtin_neon_vld1q_x4_v: {
8444 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8445 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8446 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8447 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8448 }
8449 case NEON::BI__builtin_neon_vld2_v:
8450 case NEON::BI__builtin_neon_vld2q_v:
8451 case NEON::BI__builtin_neon_vld3_v:
8452 case NEON::BI__builtin_neon_vld3q_v:
8453 case NEON::BI__builtin_neon_vld4_v:
8454 case NEON::BI__builtin_neon_vld4q_v:
8455 case NEON::BI__builtin_neon_vld2_dup_v:
8456 case NEON::BI__builtin_neon_vld2q_dup_v:
8457 case NEON::BI__builtin_neon_vld3_dup_v:
8458 case NEON::BI__builtin_neon_vld3q_dup_v:
8459 case NEON::BI__builtin_neon_vld4_dup_v:
8460 case NEON::BI__builtin_neon_vld4q_dup_v: {
8461 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8462 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8463 Value *Align = getAlignmentValue32(PtrOp1);
8464 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8465 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8466 }
8467 case NEON::BI__builtin_neon_vld1_dup_v:
8468 case NEON::BI__builtin_neon_vld1q_dup_v: {
8469 Value *V = PoisonValue::get(Ty);
8470 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8471 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8472 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8473 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8474 return EmitNeonSplat(Ops[0], CI);
8475 }
8476 case NEON::BI__builtin_neon_vld2_lane_v:
8477 case NEON::BI__builtin_neon_vld2q_lane_v:
8478 case NEON::BI__builtin_neon_vld3_lane_v:
8479 case NEON::BI__builtin_neon_vld3q_lane_v:
8480 case NEON::BI__builtin_neon_vld4_lane_v:
8481 case NEON::BI__builtin_neon_vld4q_lane_v: {
8482 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8483 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8484 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8485 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8486 Ops.push_back(getAlignmentValue32(PtrOp1));
8487 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8488 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8489 }
8490 case NEON::BI__builtin_neon_vmovl_v: {
8491 llvm::FixedVectorType *DTy =
8492 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8493 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8494 if (Usgn)
8495 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8496 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8497 }
8498 case NEON::BI__builtin_neon_vmovn_v: {
8499 llvm::FixedVectorType *QTy =
8500 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8501 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8502 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8503 }
8504 case NEON::BI__builtin_neon_vmull_v:
8505 // FIXME: the integer vmull operations could be emitted in terms of pure
8506 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8507 // hoisting the exts outside loops. Until global ISel comes along that can
8508 // see through such movement this leads to bad CodeGen. So we need an
8509 // intrinsic for now.
8510 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8511 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8512 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8513 case NEON::BI__builtin_neon_vpadal_v:
8514 case NEON::BI__builtin_neon_vpadalq_v: {
8515 // The source operand type has twice as many elements of half the size.
8516 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8517 llvm::Type *EltTy =
8518 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8519 auto *NarrowTy =
8520 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8521 llvm::Type *Tys[2] = { Ty, NarrowTy };
8522 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8523 }
8524 case NEON::BI__builtin_neon_vpaddl_v:
8525 case NEON::BI__builtin_neon_vpaddlq_v: {
8526 // The source operand type has twice as many elements of half the size.
8527 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8528 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8529 auto *NarrowTy =
8530 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8531 llvm::Type *Tys[2] = { Ty, NarrowTy };
8532 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8533 }
8534 case NEON::BI__builtin_neon_vqdmlal_v:
8535 case NEON::BI__builtin_neon_vqdmlsl_v: {
8536 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8537 Ops[1] =
8538 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8539 Ops.resize(2);
8540 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8541 }
8542 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8543 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8544 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8545 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8546 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8547 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8548 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8549 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8550 RTy->getNumElements() * 2);
8551 llvm::Type *Tys[2] = {
8552 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8553 /*isQuad*/ false))};
8554 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8555 }
8556 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8557 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8558 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8559 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8560 llvm::Type *Tys[2] = {
8561 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8562 /*isQuad*/ true))};
8563 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8564 }
8565 case NEON::BI__builtin_neon_vqshl_n_v:
8566 case NEON::BI__builtin_neon_vqshlq_n_v:
8567 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8568 1, false);
8569 case NEON::BI__builtin_neon_vqshlu_n_v:
8570 case NEON::BI__builtin_neon_vqshluq_n_v:
8571 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8572 1, false);
8573 case NEON::BI__builtin_neon_vrecpe_v:
8574 case NEON::BI__builtin_neon_vrecpeq_v:
8575 case NEON::BI__builtin_neon_vrsqrte_v:
8576 case NEON::BI__builtin_neon_vrsqrteq_v:
8577 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8578 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8579 case NEON::BI__builtin_neon_vrndi_v:
8580 case NEON::BI__builtin_neon_vrndiq_v:
8581 Int = Builder.getIsFPConstrained()
8582 ? Intrinsic::experimental_constrained_nearbyint
8583 : Intrinsic::nearbyint;
8584 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8585 case NEON::BI__builtin_neon_vrshr_n_v:
8586 case NEON::BI__builtin_neon_vrshrq_n_v:
8587 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8588 1, true);
8589 case NEON::BI__builtin_neon_vsha512hq_u64:
8590 case NEON::BI__builtin_neon_vsha512h2q_u64:
8591 case NEON::BI__builtin_neon_vsha512su0q_u64:
8592 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8593 Function *F = CGM.getIntrinsic(Int);
8594 return EmitNeonCall(F, Ops, "");
8595 }
8596 case NEON::BI__builtin_neon_vshl_n_v:
8597 case NEON::BI__builtin_neon_vshlq_n_v:
8598 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8599 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8600 "vshl_n");
8601 case NEON::BI__builtin_neon_vshll_n_v: {
8602 llvm::FixedVectorType *SrcTy =
8603 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8604 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8605 if (Usgn)
8606 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8607 else
8608 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8609 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8610 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8611 }
8612 case NEON::BI__builtin_neon_vshrn_n_v: {
8613 llvm::FixedVectorType *SrcTy =
8614 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8615 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8616 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8617 if (Usgn)
8618 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8619 else
8620 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8621 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8622 }
8623 case NEON::BI__builtin_neon_vshr_n_v:
8624 case NEON::BI__builtin_neon_vshrq_n_v:
8625 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8626 case NEON::BI__builtin_neon_vst1_v:
8627 case NEON::BI__builtin_neon_vst1q_v:
8628 case NEON::BI__builtin_neon_vst2_v:
8629 case NEON::BI__builtin_neon_vst2q_v:
8630 case NEON::BI__builtin_neon_vst3_v:
8631 case NEON::BI__builtin_neon_vst3q_v:
8632 case NEON::BI__builtin_neon_vst4_v:
8633 case NEON::BI__builtin_neon_vst4q_v:
8634 case NEON::BI__builtin_neon_vst2_lane_v:
8635 case NEON::BI__builtin_neon_vst2q_lane_v:
8636 case NEON::BI__builtin_neon_vst3_lane_v:
8637 case NEON::BI__builtin_neon_vst3q_lane_v:
8638 case NEON::BI__builtin_neon_vst4_lane_v:
8639 case NEON::BI__builtin_neon_vst4q_lane_v: {
8640 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8641 Ops.push_back(getAlignmentValue32(PtrOp0));
8642 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8643 }
8644 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8645 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8646 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8647 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8648 case NEON::BI__builtin_neon_vsm4eq_u32: {
8649 Function *F = CGM.getIntrinsic(Int);
8650 return EmitNeonCall(F, Ops, "");
8651 }
8652 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8653 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8654 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8655 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8656 Function *F = CGM.getIntrinsic(Int);
8657 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8658 return EmitNeonCall(F, Ops, "");
8659 }
8660 case NEON::BI__builtin_neon_vst1_x2_v:
8661 case NEON::BI__builtin_neon_vst1q_x2_v:
8662 case NEON::BI__builtin_neon_vst1_x3_v:
8663 case NEON::BI__builtin_neon_vst1q_x3_v:
8664 case NEON::BI__builtin_neon_vst1_x4_v:
8665 case NEON::BI__builtin_neon_vst1q_x4_v: {
8666 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8667 // in AArch64 it comes last. We may want to stick to one or another.
8668 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8669 Arch == llvm::Triple::aarch64_32) {
8670 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8671 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8672 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8673 }
8674 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8675 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8676 }
8677 case NEON::BI__builtin_neon_vsubhn_v: {
8678 llvm::FixedVectorType *SrcTy =
8679 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8680
8681 // %sum = add <4 x i32> %lhs, %rhs
8682 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8683 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8684 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8685
8686 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8687 Constant *ShiftAmt =
8688 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8689 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8690
8691 // %res = trunc <4 x i32> %high to <4 x i16>
8692 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8693 }
8694 case NEON::BI__builtin_neon_vtrn_v:
8695 case NEON::BI__builtin_neon_vtrnq_v: {
8696 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8697 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8698 Value *SV = nullptr;
8699
8700 for (unsigned vi = 0; vi != 2; ++vi) {
8701 SmallVector<int, 16> Indices;
8702 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8703 Indices.push_back(i+vi);
8704 Indices.push_back(i+e+vi);
8705 }
8706 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8707 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8708 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8709 }
8710 return SV;
8711 }
8712 case NEON::BI__builtin_neon_vtst_v:
8713 case NEON::BI__builtin_neon_vtstq_v: {
8714 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8715 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8716 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8717 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8718 ConstantAggregateZero::get(Ty));
8719 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8720 }
8721 case NEON::BI__builtin_neon_vuzp_v:
8722 case NEON::BI__builtin_neon_vuzpq_v: {
8723 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8724 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8725 Value *SV = nullptr;
8726
8727 for (unsigned vi = 0; vi != 2; ++vi) {
8728 SmallVector<int, 16> Indices;
8729 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8730 Indices.push_back(2*i+vi);
8731
8732 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8733 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8734 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8735 }
8736 return SV;
8737 }
8738 case NEON::BI__builtin_neon_vxarq_u64: {
8739 Function *F = CGM.getIntrinsic(Int);
8740 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8741 return EmitNeonCall(F, Ops, "");
8742 }
8743 case NEON::BI__builtin_neon_vzip_v:
8744 case NEON::BI__builtin_neon_vzipq_v: {
8745 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8746 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8747 Value *SV = nullptr;
8748
8749 for (unsigned vi = 0; vi != 2; ++vi) {
8750 SmallVector<int, 16> Indices;
8751 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8752 Indices.push_back((i + vi*e) >> 1);
8753 Indices.push_back(((i + vi*e) >> 1)+e);
8754 }
8755 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8756 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8757 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8758 }
8759 return SV;
8760 }
8761 case NEON::BI__builtin_neon_vdot_s32:
8762 case NEON::BI__builtin_neon_vdot_u32:
8763 case NEON::BI__builtin_neon_vdotq_s32:
8764 case NEON::BI__builtin_neon_vdotq_u32: {
8765 auto *InputTy =
8766 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8767 llvm::Type *Tys[2] = { Ty, InputTy };
8768 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8769 }
8770 case NEON::BI__builtin_neon_vfmlal_low_f16:
8771 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8772 auto *InputTy =
8773 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8774 llvm::Type *Tys[2] = { Ty, InputTy };
8775 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8776 }
8777 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8778 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8779 auto *InputTy =
8780 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8781 llvm::Type *Tys[2] = { Ty, InputTy };
8782 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8783 }
8784 case NEON::BI__builtin_neon_vfmlal_high_f16:
8785 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8786 auto *InputTy =
8787 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8788 llvm::Type *Tys[2] = { Ty, InputTy };
8789 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8790 }
8791 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8792 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8793 auto *InputTy =
8794 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8795 llvm::Type *Tys[2] = { Ty, InputTy };
8796 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8797 }
8798 case NEON::BI__builtin_neon_vmmlaq_s32:
8799 case NEON::BI__builtin_neon_vmmlaq_u32: {
8800 auto *InputTy =
8801 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8802 llvm::Type *Tys[2] = { Ty, InputTy };
8803 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8804 }
8805 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8806 auto *InputTy =
8807 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8808 llvm::Type *Tys[2] = { Ty, InputTy };
8809 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8810 }
8811 case NEON::BI__builtin_neon_vusdot_s32:
8812 case NEON::BI__builtin_neon_vusdotq_s32: {
8813 auto *InputTy =
8814 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8815 llvm::Type *Tys[2] = { Ty, InputTy };
8816 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8817 }
8818 case NEON::BI__builtin_neon_vbfdot_f32:
8819 case NEON::BI__builtin_neon_vbfdotq_f32: {
8820 llvm::Type *InputTy =
8821 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8822 llvm::Type *Tys[2] = { Ty, InputTy };
8823 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8824 }
8825 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8826 llvm::Type *Tys[1] = { Ty };
8827 Function *F = CGM.getIntrinsic(Int, Tys);
8828 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8829 }
8830
8831 }
8832
8833 assert(Int && "Expected valid intrinsic number");
8834
8835 // Determine the type(s) of this overloaded AArch64 intrinsic.
8836 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8837
8838 Value *Result = EmitNeonCall(F, Ops, NameHint);
8839 llvm::Type *ResultType = ConvertType(E->getType());
8840 // AArch64 intrinsic one-element vector type cast to
8841 // scalar type expected by the builtin
8842 return Builder.CreateBitCast(Result, ResultType, NameHint);
8843}
8844
8846 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8847 const CmpInst::Predicate Ip, const Twine &Name) {
8848 llvm::Type *OTy = Op->getType();
8849
8850 // FIXME: this is utterly horrific. We should not be looking at previous
8851 // codegen context to find out what needs doing. Unfortunately TableGen
8852 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8853 // (etc).
8854 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8855 OTy = BI->getOperand(0)->getType();
8856
8857 Op = Builder.CreateBitCast(Op, OTy);
8858 if (OTy->getScalarType()->isFloatingPointTy()) {
8859 if (Fp == CmpInst::FCMP_OEQ)
8860 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8861 else
8862 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8863 } else {
8864 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8865 }
8866 return Builder.CreateSExt(Op, Ty, Name);
8867}
8868
8870 Value *ExtOp, Value *IndexOp,
8871 llvm::Type *ResTy, unsigned IntID,
8872 const char *Name) {
8874 if (ExtOp)
8875 TblOps.push_back(ExtOp);
8876
8877 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8878 SmallVector<int, 16> Indices;
8879 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8880 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8881 Indices.push_back(2*i);
8882 Indices.push_back(2*i+1);
8883 }
8884
8885 int PairPos = 0, End = Ops.size() - 1;
8886 while (PairPos < End) {
8887 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8888 Ops[PairPos+1], Indices,
8889 Name));
8890 PairPos += 2;
8891 }
8892
8893 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8894 // of the 128-bit lookup table with zero.
8895 if (PairPos == End) {
8896 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8897 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8898 ZeroTbl, Indices, Name));
8899 }
8900
8901 Function *TblF;
8902 TblOps.push_back(IndexOp);
8903 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8904
8905 return CGF.EmitNeonCall(TblF, TblOps, Name);
8906}
8907
8908Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8909 unsigned Value;
8910 switch (BuiltinID) {
8911 default:
8912 return nullptr;
8913 case clang::ARM::BI__builtin_arm_nop:
8914 Value = 0;
8915 break;
8916 case clang::ARM::BI__builtin_arm_yield:
8917 case clang::ARM::BI__yield:
8918 Value = 1;
8919 break;
8920 case clang::ARM::BI__builtin_arm_wfe:
8921 case clang::ARM::BI__wfe:
8922 Value = 2;
8923 break;
8924 case clang::ARM::BI__builtin_arm_wfi:
8925 case clang::ARM::BI__wfi:
8926 Value = 3;
8927 break;
8928 case clang::ARM::BI__builtin_arm_sev:
8929 case clang::ARM::BI__sev:
8930 Value = 4;
8931 break;
8932 case clang::ARM::BI__builtin_arm_sevl:
8933 case clang::ARM::BI__sevl:
8934 Value = 5;
8935 break;
8936 }
8937
8938 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8939 llvm::ConstantInt::get(Int32Ty, Value));
8940}
8941
8946};
8947
8948// Generates the IR for __builtin_read_exec_*.
8949// Lowers the builtin to amdgcn_ballot intrinsic.
8951 llvm::Type *RegisterType,
8952 llvm::Type *ValueType, bool isExecHi) {
8953 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8954 CodeGen::CodeGenModule &CGM = CGF.CGM;
8955
8956 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8957 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8958
8959 if (isExecHi) {
8960 Value *Rt2 = Builder.CreateLShr(Call, 32);
8961 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8962 return Rt2;
8963 }
8964
8965 return Call;
8966}
8967
8968// Generates the IR for the read/write special register builtin,
8969// ValueType is the type of the value that is to be written or read,
8970// RegisterType is the type of the register being written to or read from.
8972 const CallExpr *E,
8973 llvm::Type *RegisterType,
8974 llvm::Type *ValueType,
8975 SpecialRegisterAccessKind AccessKind,
8976 StringRef SysReg = "") {
8977 // write and register intrinsics only support 32, 64 and 128 bit operations.
8978 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8979 RegisterType->isIntegerTy(128)) &&
8980 "Unsupported size for register.");
8981
8982 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8983 CodeGen::CodeGenModule &CGM = CGF.CGM;
8984 LLVMContext &Context = CGM.getLLVMContext();
8985
8986 if (SysReg.empty()) {
8987 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8988 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8989 }
8990
8991 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8992 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8993 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8994
8995 llvm::Type *Types[] = { RegisterType };
8996
8997 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8998 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8999 && "Can't fit 64-bit value in 32-bit register");
9000
9001 if (AccessKind != Write) {
9002 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
9003 llvm::Function *F = CGM.getIntrinsic(
9004 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
9005 : llvm::Intrinsic::read_register,
9006 Types);
9007 llvm::Value *Call = Builder.CreateCall(F, Metadata);
9008
9009 if (MixedTypes)
9010 // Read into 64 bit register and then truncate result to 32 bit.
9011 return Builder.CreateTrunc(Call, ValueType);
9012
9013 if (ValueType->isPointerTy())
9014 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
9015 return Builder.CreateIntToPtr(Call, ValueType);
9016
9017 return Call;
9018 }
9019
9020 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
9021 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
9022 if (MixedTypes) {
9023 // Extend 32 bit write value to 64 bit to pass to write.
9024 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
9025 return Builder.CreateCall(F, { Metadata, ArgValue });
9026 }
9027
9028 if (ValueType->isPointerTy()) {
9029 // Have VoidPtrTy ArgValue but want to return an i32/i64.
9030 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
9031 return Builder.CreateCall(F, { Metadata, ArgValue });
9032 }
9033
9034 return Builder.CreateCall(F, { Metadata, ArgValue });
9035}
9036
9037/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
9038/// argument that specifies the vector type.
9039static bool HasExtraNeonArgument(unsigned BuiltinID) {
9040 switch (BuiltinID) {
9041 default: break;
9042 case NEON::BI__builtin_neon_vget_lane_i8:
9043 case NEON::BI__builtin_neon_vget_lane_i16:
9044 case NEON::BI__builtin_neon_vget_lane_bf16:
9045 case NEON::BI__builtin_neon_vget_lane_i32:
9046 case NEON::BI__builtin_neon_vget_lane_i64:
9047 case NEON::BI__builtin_neon_vget_lane_f32:
9048 case NEON::BI__builtin_neon_vgetq_lane_i8:
9049 case NEON::BI__builtin_neon_vgetq_lane_i16:
9050 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9051 case NEON::BI__builtin_neon_vgetq_lane_i32:
9052 case NEON::BI__builtin_neon_vgetq_lane_i64:
9053 case NEON::BI__builtin_neon_vgetq_lane_f32:
9054 case NEON::BI__builtin_neon_vduph_lane_bf16:
9055 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9056 case NEON::BI__builtin_neon_vset_lane_i8:
9057 case NEON::BI__builtin_neon_vset_lane_i16:
9058 case NEON::BI__builtin_neon_vset_lane_bf16:
9059 case NEON::BI__builtin_neon_vset_lane_i32:
9060 case NEON::BI__builtin_neon_vset_lane_i64:
9061 case NEON::BI__builtin_neon_vset_lane_f32:
9062 case NEON::BI__builtin_neon_vsetq_lane_i8:
9063 case NEON::BI__builtin_neon_vsetq_lane_i16:
9064 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9065 case NEON::BI__builtin_neon_vsetq_lane_i32:
9066 case NEON::BI__builtin_neon_vsetq_lane_i64:
9067 case NEON::BI__builtin_neon_vsetq_lane_f32:
9068 case NEON::BI__builtin_neon_vsha1h_u32:
9069 case NEON::BI__builtin_neon_vsha1cq_u32:
9070 case NEON::BI__builtin_neon_vsha1pq_u32:
9071 case NEON::BI__builtin_neon_vsha1mq_u32:
9072 case NEON::BI__builtin_neon_vcvth_bf16_f32:
9073 case clang::ARM::BI_MoveToCoprocessor:
9074 case clang::ARM::BI_MoveToCoprocessor2:
9075 return false;
9076 }
9077 return true;
9078}
9079
9080Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
9081 const CallExpr *E,
9082 ReturnValueSlot ReturnValue,
9083 llvm::Triple::ArchType Arch) {
9084 if (auto Hint = GetValueForARMHint(BuiltinID))
9085 return Hint;
9086
9087 if (BuiltinID == clang::ARM::BI__emit) {
9088 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
9089 llvm::FunctionType *FTy =
9090 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
9091
9093 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
9094 llvm_unreachable("Sema will ensure that the parameter is constant");
9095
9096 llvm::APSInt Value = Result.Val.getInt();
9097 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
9098
9099 llvm::InlineAsm *Emit =
9100 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
9101 /*hasSideEffects=*/true)
9102 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
9103 /*hasSideEffects=*/true);
9104
9105 return Builder.CreateCall(Emit);
9106 }
9107
9108 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
9109 Value *Option = EmitScalarExpr(E->getArg(0));
9110 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
9111 }
9112
9113 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
9114 Value *Address = EmitScalarExpr(E->getArg(0));
9115 Value *RW = EmitScalarExpr(E->getArg(1));
9116 Value *IsData = EmitScalarExpr(E->getArg(2));
9117
9118 // Locality is not supported on ARM target
9119 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
9120
9121 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
9122 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
9123 }
9124
9125 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
9126 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9127 return Builder.CreateCall(
9128 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9129 }
9130
9131 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
9132 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
9133 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9134 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
9135 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
9136 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
9137 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
9138 return Res;
9139 }
9140
9141
9142 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
9143 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9144 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
9145 }
9146 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
9147 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9148 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
9149 "cls");
9150 }
9151
9152 if (BuiltinID == clang::ARM::BI__clear_cache) {
9153 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9154 const FunctionDecl *FD = E->getDirectCallee();
9155 Value *Ops[2];
9156 for (unsigned i = 0; i < 2; i++)
9157 Ops[i] = EmitScalarExpr(E->getArg(i));
9158 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9159 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9160 StringRef Name = FD->getName();
9161 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9162 }
9163
9164 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9165 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9166 Function *F;
9167
9168 switch (BuiltinID) {
9169 default: llvm_unreachable("unexpected builtin");
9170 case clang::ARM::BI__builtin_arm_mcrr:
9171 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9172 break;
9173 case clang::ARM::BI__builtin_arm_mcrr2:
9174 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9175 break;
9176 }
9177
9178 // MCRR{2} instruction has 5 operands but
9179 // the intrinsic has 4 because Rt and Rt2
9180 // are represented as a single unsigned 64
9181 // bit integer in the intrinsic definition
9182 // but internally it's represented as 2 32
9183 // bit integers.
9184
9185 Value *Coproc = EmitScalarExpr(E->getArg(0));
9186 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9187 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9188 Value *CRm = EmitScalarExpr(E->getArg(3));
9189
9190 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9191 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9192 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9193 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9194
9195 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9196 }
9197
9198 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9199 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9200 Function *F;
9201
9202 switch (BuiltinID) {
9203 default: llvm_unreachable("unexpected builtin");
9204 case clang::ARM::BI__builtin_arm_mrrc:
9205 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9206 break;
9207 case clang::ARM::BI__builtin_arm_mrrc2:
9208 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9209 break;
9210 }
9211
9212 Value *Coproc = EmitScalarExpr(E->getArg(0));
9213 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9214 Value *CRm = EmitScalarExpr(E->getArg(2));
9215 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9216
9217 // Returns an unsigned 64 bit integer, represented
9218 // as two 32 bit integers.
9219
9220 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9221 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9222 Rt = Builder.CreateZExt(Rt, Int64Ty);
9223 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9224
9225 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9226 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9227 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9228
9229 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9230 }
9231
9232 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9233 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9234 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9235 getContext().getTypeSize(E->getType()) == 64) ||
9236 BuiltinID == clang::ARM::BI__ldrexd) {
9237 Function *F;
9238
9239 switch (BuiltinID) {
9240 default: llvm_unreachable("unexpected builtin");
9241 case clang::ARM::BI__builtin_arm_ldaex:
9242 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9243 break;
9244 case clang::ARM::BI__builtin_arm_ldrexd:
9245 case clang::ARM::BI__builtin_arm_ldrex:
9246 case clang::ARM::BI__ldrexd:
9247 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9248 break;
9249 }
9250
9251 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9252 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9253
9254 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9255 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9256 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9257 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9258
9259 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9260 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9261 Val = Builder.CreateOr(Val, Val1);
9262 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9263 }
9264
9265 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9266 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9267 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9268
9269 QualType Ty = E->getType();
9270 llvm::Type *RealResTy = ConvertType(Ty);
9271 llvm::Type *IntTy =
9272 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9273
9275 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9276 : Intrinsic::arm_ldrex,
9277 UnqualPtrTy);
9278 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9279 Val->addParamAttr(
9280 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9281
9282 if (RealResTy->isPointerTy())
9283 return Builder.CreateIntToPtr(Val, RealResTy);
9284 else {
9285 llvm::Type *IntResTy = llvm::IntegerType::get(
9286 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9287 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9288 RealResTy);
9289 }
9290 }
9291
9292 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9293 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9294 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9295 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9297 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9298 : Intrinsic::arm_strexd);
9299 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9300
9301 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9302 Value *Val = EmitScalarExpr(E->getArg(0));
9303 Builder.CreateStore(Val, Tmp);
9304
9305 Address LdPtr = Tmp.withElementType(STy);
9306 Val = Builder.CreateLoad(LdPtr);
9307
9308 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9309 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9310 Value *StPtr = EmitScalarExpr(E->getArg(1));
9311 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9312 }
9313
9314 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9315 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9316 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9317 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9318
9319 QualType Ty = E->getArg(0)->getType();
9320 llvm::Type *StoreTy =
9321 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9322
9323 if (StoreVal->getType()->isPointerTy())
9324 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9325 else {
9326 llvm::Type *IntTy = llvm::IntegerType::get(
9328 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9329 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9330 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9331 }
9332
9334 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9335 : Intrinsic::arm_strex,
9336 StoreAddr->getType());
9337
9338 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9339 CI->addParamAttr(
9340 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9341 return CI;
9342 }
9343
9344 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9345 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9346 return Builder.CreateCall(F);
9347 }
9348
9349 // CRC32
9350 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9351 switch (BuiltinID) {
9352 case clang::ARM::BI__builtin_arm_crc32b:
9353 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9354 case clang::ARM::BI__builtin_arm_crc32cb:
9355 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9356 case clang::ARM::BI__builtin_arm_crc32h:
9357 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9358 case clang::ARM::BI__builtin_arm_crc32ch:
9359 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9360 case clang::ARM::BI__builtin_arm_crc32w:
9361 case clang::ARM::BI__builtin_arm_crc32d:
9362 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9363 case clang::ARM::BI__builtin_arm_crc32cw:
9364 case clang::ARM::BI__builtin_arm_crc32cd:
9365 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9366 }
9367
9368 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9369 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9370 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9371
9372 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9373 // intrinsics, hence we need different codegen for these cases.
9374 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9375 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9376 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9377 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9378 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9379 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9380
9381 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9382 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9383 return Builder.CreateCall(F, {Res, Arg1b});
9384 } else {
9385 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9386
9387 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9388 return Builder.CreateCall(F, {Arg0, Arg1});
9389 }
9390 }
9391
9392 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9393 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9394 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9395 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9396 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9397 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9398
9399 SpecialRegisterAccessKind AccessKind = Write;
9400 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9401 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9402 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9403 AccessKind = VolatileRead;
9404
9405 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9406 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9407
9408 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9409 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9410
9411 llvm::Type *ValueType;
9412 llvm::Type *RegisterType;
9413 if (IsPointerBuiltin) {
9414 ValueType = VoidPtrTy;
9416 } else if (Is64Bit) {
9417 ValueType = RegisterType = Int64Ty;
9418 } else {
9419 ValueType = RegisterType = Int32Ty;
9420 }
9421
9422 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9423 AccessKind);
9424 }
9425
9426 if (BuiltinID == ARM::BI__builtin_sponentry) {
9427 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9428 return Builder.CreateCall(F);
9429 }
9430
9431 // Handle MSVC intrinsics before argument evaluation to prevent double
9432 // evaluation.
9433 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9434 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9435
9436 // Deal with MVE builtins
9437 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9438 return Result;
9439 // Handle CDE builtins
9440 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9441 return Result;
9442
9443 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9444 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9445 return P.first == BuiltinID;
9446 });
9447 if (It != end(NEONEquivalentIntrinsicMap))
9448 BuiltinID = It->second;
9449
9450 // Find out if any arguments are required to be integer constant
9451 // expressions.
9452 unsigned ICEArguments = 0;
9454 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9455 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9456
9457 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9458 return Builder.getInt32(addr.getAlignment().getQuantity());
9459 };
9460
9461 Address PtrOp0 = Address::invalid();
9462 Address PtrOp1 = Address::invalid();
9464 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9465 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9466 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9467 if (i == 0) {
9468 switch (BuiltinID) {
9469 case NEON::BI__builtin_neon_vld1_v:
9470 case NEON::BI__builtin_neon_vld1q_v:
9471 case NEON::BI__builtin_neon_vld1q_lane_v:
9472 case NEON::BI__builtin_neon_vld1_lane_v:
9473 case NEON::BI__builtin_neon_vld1_dup_v:
9474 case NEON::BI__builtin_neon_vld1q_dup_v:
9475 case NEON::BI__builtin_neon_vst1_v:
9476 case NEON::BI__builtin_neon_vst1q_v:
9477 case NEON::BI__builtin_neon_vst1q_lane_v:
9478 case NEON::BI__builtin_neon_vst1_lane_v:
9479 case NEON::BI__builtin_neon_vst2_v:
9480 case NEON::BI__builtin_neon_vst2q_v:
9481 case NEON::BI__builtin_neon_vst2_lane_v:
9482 case NEON::BI__builtin_neon_vst2q_lane_v:
9483 case NEON::BI__builtin_neon_vst3_v:
9484 case NEON::BI__builtin_neon_vst3q_v:
9485 case NEON::BI__builtin_neon_vst3_lane_v:
9486 case NEON::BI__builtin_neon_vst3q_lane_v:
9487 case NEON::BI__builtin_neon_vst4_v:
9488 case NEON::BI__builtin_neon_vst4q_v:
9489 case NEON::BI__builtin_neon_vst4_lane_v:
9490 case NEON::BI__builtin_neon_vst4q_lane_v:
9491 // Get the alignment for the argument in addition to the value;
9492 // we'll use it later.
9493 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9494 Ops.push_back(PtrOp0.emitRawPointer(*this));
9495 continue;
9496 }
9497 }
9498 if (i == 1) {
9499 switch (BuiltinID) {
9500 case NEON::BI__builtin_neon_vld2_v:
9501 case NEON::BI__builtin_neon_vld2q_v:
9502 case NEON::BI__builtin_neon_vld3_v:
9503 case NEON::BI__builtin_neon_vld3q_v:
9504 case NEON::BI__builtin_neon_vld4_v:
9505 case NEON::BI__builtin_neon_vld4q_v:
9506 case NEON::BI__builtin_neon_vld2_lane_v:
9507 case NEON::BI__builtin_neon_vld2q_lane_v:
9508 case NEON::BI__builtin_neon_vld3_lane_v:
9509 case NEON::BI__builtin_neon_vld3q_lane_v:
9510 case NEON::BI__builtin_neon_vld4_lane_v:
9511 case NEON::BI__builtin_neon_vld4q_lane_v:
9512 case NEON::BI__builtin_neon_vld2_dup_v:
9513 case NEON::BI__builtin_neon_vld2q_dup_v:
9514 case NEON::BI__builtin_neon_vld3_dup_v:
9515 case NEON::BI__builtin_neon_vld3q_dup_v:
9516 case NEON::BI__builtin_neon_vld4_dup_v:
9517 case NEON::BI__builtin_neon_vld4q_dup_v:
9518 // Get the alignment for the argument in addition to the value;
9519 // we'll use it later.
9520 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9521 Ops.push_back(PtrOp1.emitRawPointer(*this));
9522 continue;
9523 }
9524 }
9525
9526 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9527 }
9528
9529 switch (BuiltinID) {
9530 default: break;
9531
9532 case NEON::BI__builtin_neon_vget_lane_i8:
9533 case NEON::BI__builtin_neon_vget_lane_i16:
9534 case NEON::BI__builtin_neon_vget_lane_i32:
9535 case NEON::BI__builtin_neon_vget_lane_i64:
9536 case NEON::BI__builtin_neon_vget_lane_bf16:
9537 case NEON::BI__builtin_neon_vget_lane_f32:
9538 case NEON::BI__builtin_neon_vgetq_lane_i8:
9539 case NEON::BI__builtin_neon_vgetq_lane_i16:
9540 case NEON::BI__builtin_neon_vgetq_lane_i32:
9541 case NEON::BI__builtin_neon_vgetq_lane_i64:
9542 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9543 case NEON::BI__builtin_neon_vgetq_lane_f32:
9544 case NEON::BI__builtin_neon_vduph_lane_bf16:
9545 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9546 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9547
9548 case NEON::BI__builtin_neon_vrndns_f32: {
9549 Value *Arg = EmitScalarExpr(E->getArg(0));
9550 llvm::Type *Tys[] = {Arg->getType()};
9551 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9552 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9553
9554 case NEON::BI__builtin_neon_vset_lane_i8:
9555 case NEON::BI__builtin_neon_vset_lane_i16:
9556 case NEON::BI__builtin_neon_vset_lane_i32:
9557 case NEON::BI__builtin_neon_vset_lane_i64:
9558 case NEON::BI__builtin_neon_vset_lane_bf16:
9559 case NEON::BI__builtin_neon_vset_lane_f32:
9560 case NEON::BI__builtin_neon_vsetq_lane_i8:
9561 case NEON::BI__builtin_neon_vsetq_lane_i16:
9562 case NEON::BI__builtin_neon_vsetq_lane_i32:
9563 case NEON::BI__builtin_neon_vsetq_lane_i64:
9564 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9565 case NEON::BI__builtin_neon_vsetq_lane_f32:
9566 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9567
9568 case NEON::BI__builtin_neon_vsha1h_u32:
9569 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9570 "vsha1h");
9571 case NEON::BI__builtin_neon_vsha1cq_u32:
9572 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9573 "vsha1h");
9574 case NEON::BI__builtin_neon_vsha1pq_u32:
9575 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9576 "vsha1h");
9577 case NEON::BI__builtin_neon_vsha1mq_u32:
9578 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9579 "vsha1h");
9580
9581 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9582 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9583 "vcvtbfp2bf");
9584 }
9585
9586 // The ARM _MoveToCoprocessor builtins put the input register value as
9587 // the first argument, but the LLVM intrinsic expects it as the third one.
9588 case clang::ARM::BI_MoveToCoprocessor:
9589 case clang::ARM::BI_MoveToCoprocessor2: {
9590 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9591 ? Intrinsic::arm_mcr
9592 : Intrinsic::arm_mcr2);
9593 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9594 Ops[3], Ops[4], Ops[5]});
9595 }
9596 }
9597
9598 // Get the last argument, which specifies the vector type.
9599 assert(HasExtraArg);
9600 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9601 std::optional<llvm::APSInt> Result =
9603 if (!Result)
9604 return nullptr;
9605
9606 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9607 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9608 // Determine the overloaded type of this builtin.
9609 llvm::Type *Ty;
9610 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9611 Ty = FloatTy;
9612 else
9613 Ty = DoubleTy;
9614
9615 // Determine whether this is an unsigned conversion or not.
9616 bool usgn = Result->getZExtValue() == 1;
9617 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9618
9619 // Call the appropriate intrinsic.
9620 Function *F = CGM.getIntrinsic(Int, Ty);
9621 return Builder.CreateCall(F, Ops, "vcvtr");
9622 }
9623
9624 // Determine the type of this overloaded NEON intrinsic.
9625 NeonTypeFlags Type = Result->getZExtValue();
9626 bool usgn = Type.isUnsigned();
9627 bool rightShift = false;
9628
9629 llvm::FixedVectorType *VTy =
9630 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9631 getTarget().hasBFloat16Type());
9632 llvm::Type *Ty = VTy;
9633 if (!Ty)
9634 return nullptr;
9635
9636 // Many NEON builtins have identical semantics and uses in ARM and
9637 // AArch64. Emit these in a single function.
9638 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9639 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9640 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9641 if (Builtin)
9643 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9644 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9645
9646 unsigned Int;
9647 switch (BuiltinID) {
9648 default: return nullptr;
9649 case NEON::BI__builtin_neon_vld1q_lane_v:
9650 // Handle 64-bit integer elements as a special case. Use shuffles of
9651 // one-element vectors to avoid poor code for i64 in the backend.
9652 if (VTy->getElementType()->isIntegerTy(64)) {
9653 // Extract the other lane.
9654 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9655 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9656 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9657 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9658 // Load the value as a one-element vector.
9659 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9660 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9661 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9662 Value *Align = getAlignmentValue32(PtrOp0);
9663 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9664 // Combine them.
9665 int Indices[] = {1 - Lane, Lane};
9666 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9667 }
9668 [[fallthrough]];
9669 case NEON::BI__builtin_neon_vld1_lane_v: {
9670 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9671 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9672 Value *Ld = Builder.CreateLoad(PtrOp0);
9673 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9674 }
9675 case NEON::BI__builtin_neon_vqrshrn_n_v:
9676 Int =
9677 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9678 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9679 1, true);
9680 case NEON::BI__builtin_neon_vqrshrun_n_v:
9681 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9682 Ops, "vqrshrun_n", 1, true);
9683 case NEON::BI__builtin_neon_vqshrn_n_v:
9684 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9685 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9686 1, true);
9687 case NEON::BI__builtin_neon_vqshrun_n_v:
9688 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9689 Ops, "vqshrun_n", 1, true);
9690 case NEON::BI__builtin_neon_vrecpe_v:
9691 case NEON::BI__builtin_neon_vrecpeq_v:
9692 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9693 Ops, "vrecpe");
9694 case NEON::BI__builtin_neon_vrshrn_n_v:
9695 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9696 Ops, "vrshrn_n", 1, true);
9697 case NEON::BI__builtin_neon_vrsra_n_v:
9698 case NEON::BI__builtin_neon_vrsraq_n_v:
9699 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9700 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9701 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9702 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9703 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9704 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9705 case NEON::BI__builtin_neon_vsri_n_v:
9706 case NEON::BI__builtin_neon_vsriq_n_v:
9707 rightShift = true;
9708 [[fallthrough]];
9709 case NEON::BI__builtin_neon_vsli_n_v:
9710 case NEON::BI__builtin_neon_vsliq_n_v:
9711 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9712 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9713 Ops, "vsli_n");
9714 case NEON::BI__builtin_neon_vsra_n_v:
9715 case NEON::BI__builtin_neon_vsraq_n_v:
9716 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9717 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9718 return Builder.CreateAdd(Ops[0], Ops[1]);
9719 case NEON::BI__builtin_neon_vst1q_lane_v:
9720 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9721 // a one-element vector and avoid poor code for i64 in the backend.
9722 if (VTy->getElementType()->isIntegerTy(64)) {
9723 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9724 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9725 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9726 Ops[2] = getAlignmentValue32(PtrOp0);
9727 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9728 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9729 Tys), Ops);
9730 }
9731 [[fallthrough]];
9732 case NEON::BI__builtin_neon_vst1_lane_v: {
9733 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9734 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9735 return Builder.CreateStore(Ops[1],
9736 PtrOp0.withElementType(Ops[1]->getType()));
9737 }
9738 case NEON::BI__builtin_neon_vtbl1_v:
9739 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9740 Ops, "vtbl1");
9741 case NEON::BI__builtin_neon_vtbl2_v:
9742 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9743 Ops, "vtbl2");
9744 case NEON::BI__builtin_neon_vtbl3_v:
9745 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9746 Ops, "vtbl3");
9747 case NEON::BI__builtin_neon_vtbl4_v:
9748 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9749 Ops, "vtbl4");
9750 case NEON::BI__builtin_neon_vtbx1_v:
9751 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9752 Ops, "vtbx1");
9753 case NEON::BI__builtin_neon_vtbx2_v:
9754 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9755 Ops, "vtbx2");
9756 case NEON::BI__builtin_neon_vtbx3_v:
9757 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9758 Ops, "vtbx3");
9759 case NEON::BI__builtin_neon_vtbx4_v:
9760 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9761 Ops, "vtbx4");
9762 }
9763}
9764
9765template<typename Integer>
9767 return E->getIntegerConstantExpr(Context)->getExtValue();
9768}
9769
9770static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9771 llvm::Type *T, bool Unsigned) {
9772 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9773 // which finds it convenient to specify signed/unsigned as a boolean flag.
9774 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9775}
9776
9777static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9778 uint32_t Shift, bool Unsigned) {
9779 // MVE helper function for integer shift right. This must handle signed vs
9780 // unsigned, and also deal specially with the case where the shift count is
9781 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9782 // undefined behavior, but in MVE it's legal, so we must convert it to code
9783 // that is not undefined in IR.
9784 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9785 ->getElementType()
9786 ->getPrimitiveSizeInBits();
9787 if (Shift == LaneBits) {
9788 // An unsigned shift of the full lane size always generates zero, so we can
9789 // simply emit a zero vector. A signed shift of the full lane size does the
9790 // same thing as shifting by one bit fewer.
9791 if (Unsigned)
9792 return llvm::Constant::getNullValue(V->getType());
9793 else
9794 --Shift;
9795 }
9796 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9797}
9798
9799static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9800 // MVE-specific helper function for a vector splat, which infers the element
9801 // count of the output vector by knowing that MVE vectors are all 128 bits
9802 // wide.
9803 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9804 return Builder.CreateVectorSplat(Elements, V);
9805}
9806
9807static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9808 CodeGenFunction *CGF,
9809 llvm::Value *V,
9810 llvm::Type *DestType) {
9811 // Convert one MVE vector type into another by reinterpreting its in-register
9812 // format.
9813 //
9814 // Little-endian, this is identical to a bitcast (which reinterprets the
9815 // memory format). But big-endian, they're not necessarily the same, because
9816 // the register and memory formats map to each other differently depending on
9817 // the lane size.
9818 //
9819 // We generate a bitcast whenever we can (if we're little-endian, or if the
9820 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9821 // that performs the different kind of reinterpretation.
9822 if (CGF->getTarget().isBigEndian() &&
9823 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9824 return Builder.CreateCall(
9825 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9826 {DestType, V->getType()}),
9827 V);
9828 } else {
9829 return Builder.CreateBitCast(V, DestType);
9830 }
9831}
9832
9833static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9834 // Make a shufflevector that extracts every other element of a vector (evens
9835 // or odds, as desired).
9836 SmallVector<int, 16> Indices;
9837 unsigned InputElements =
9838 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9839 for (unsigned i = 0; i < InputElements; i += 2)
9840 Indices.push_back(i + Odd);
9841 return Builder.CreateShuffleVector(V, Indices);
9842}
9843
9844static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9845 llvm::Value *V1) {
9846 // Make a shufflevector that interleaves two vectors element by element.
9847 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9848 SmallVector<int, 16> Indices;
9849 unsigned InputElements =
9850 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9851 for (unsigned i = 0; i < InputElements; i++) {
9852 Indices.push_back(i);
9853 Indices.push_back(i + InputElements);
9854 }
9855 return Builder.CreateShuffleVector(V0, V1, Indices);
9856}
9857
9858template<unsigned HighBit, unsigned OtherBits>
9859static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9860 // MVE-specific helper function to make a vector splat of a constant such as
9861 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9862 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9863 unsigned LaneBits = T->getPrimitiveSizeInBits();
9864 uint32_t Value = HighBit << (LaneBits - 1);
9865 if (OtherBits)
9866 Value |= (1UL << (LaneBits - 1)) - 1;
9867 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9868 return ARMMVEVectorSplat(Builder, Lane);
9869}
9870
9871static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9872 llvm::Value *V,
9873 unsigned ReverseWidth) {
9874 // MVE-specific helper function which reverses the elements of a
9875 // vector within every (ReverseWidth)-bit collection of lanes.
9876 SmallVector<int, 16> Indices;
9877 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9878 unsigned Elements = 128 / LaneSize;
9879 unsigned Mask = ReverseWidth / LaneSize - 1;
9880 for (unsigned i = 0; i < Elements; i++)
9881 Indices.push_back(i ^ Mask);
9882 return Builder.CreateShuffleVector(V, Indices);
9883}
9884
9886 const CallExpr *E,
9887 ReturnValueSlot ReturnValue,
9888 llvm::Triple::ArchType Arch) {
9889 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9890 Intrinsic::ID IRIntr;
9891 unsigned NumVectors;
9892
9893 // Code autogenerated by Tablegen will handle all the simple builtins.
9894 switch (BuiltinID) {
9895 #include "clang/Basic/arm_mve_builtin_cg.inc"
9896
9897 // If we didn't match an MVE builtin id at all, go back to the
9898 // main EmitARMBuiltinExpr.
9899 default:
9900 return nullptr;
9901 }
9902
9903 // Anything that breaks from that switch is an MVE builtin that
9904 // needs handwritten code to generate.
9905
9906 switch (CustomCodeGenType) {
9907
9908 case CustomCodeGen::VLD24: {
9911
9912 auto MvecCType = E->getType();
9913 auto MvecLType = ConvertType(MvecCType);
9914 assert(MvecLType->isStructTy() &&
9915 "Return type for vld[24]q should be a struct");
9916 assert(MvecLType->getStructNumElements() == 1 &&
9917 "Return-type struct for vld[24]q should have one element");
9918 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9919 assert(MvecLTypeInner->isArrayTy() &&
9920 "Return-type struct for vld[24]q should contain an array");
9921 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9922 "Array member of return-type struct vld[24]q has wrong length");
9923 auto VecLType = MvecLTypeInner->getArrayElementType();
9924
9925 Tys.push_back(VecLType);
9926
9927 auto Addr = E->getArg(0);
9928 Ops.push_back(EmitScalarExpr(Addr));
9929 Tys.push_back(ConvertType(Addr->getType()));
9930
9931 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9932 Value *LoadResult = Builder.CreateCall(F, Ops);
9933 Value *MvecOut = PoisonValue::get(MvecLType);
9934 for (unsigned i = 0; i < NumVectors; ++i) {
9935 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9936 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9937 }
9938
9939 if (ReturnValue.isNull())
9940 return MvecOut;
9941 else
9942 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9943 }
9944
9945 case CustomCodeGen::VST24: {
9948
9949 auto Addr = E->getArg(0);
9950 Ops.push_back(EmitScalarExpr(Addr));
9951 Tys.push_back(ConvertType(Addr->getType()));
9952
9953 auto MvecCType = E->getArg(1)->getType();
9954 auto MvecLType = ConvertType(MvecCType);
9955 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9956 assert(MvecLType->getStructNumElements() == 1 &&
9957 "Data-type struct for vst2q should have one element");
9958 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9959 assert(MvecLTypeInner->isArrayTy() &&
9960 "Data-type struct for vst2q should contain an array");
9961 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9962 "Array member of return-type struct vld[24]q has wrong length");
9963 auto VecLType = MvecLTypeInner->getArrayElementType();
9964
9965 Tys.push_back(VecLType);
9966
9967 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9968 EmitAggExpr(E->getArg(1), MvecSlot);
9969 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9970 for (unsigned i = 0; i < NumVectors; i++)
9971 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9972
9973 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9974 Value *ToReturn = nullptr;
9975 for (unsigned i = 0; i < NumVectors; i++) {
9976 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9977 ToReturn = Builder.CreateCall(F, Ops);
9978 Ops.pop_back();
9979 }
9980 return ToReturn;
9981 }
9982 }
9983 llvm_unreachable("unknown custom codegen type.");
9984}
9985
9987 const CallExpr *E,
9988 ReturnValueSlot ReturnValue,
9989 llvm::Triple::ArchType Arch) {
9990 switch (BuiltinID) {
9991 default:
9992 return nullptr;
9993#include "clang/Basic/arm_cde_builtin_cg.inc"
9994 }
9995}
9996
9997static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9998 const CallExpr *E,
10000 llvm::Triple::ArchType Arch) {
10001 unsigned int Int = 0;
10002 const char *s = nullptr;
10003
10004 switch (BuiltinID) {
10005 default:
10006 return nullptr;
10007 case NEON::BI__builtin_neon_vtbl1_v:
10008 case NEON::BI__builtin_neon_vqtbl1_v:
10009 case NEON::BI__builtin_neon_vqtbl1q_v:
10010 case NEON::BI__builtin_neon_vtbl2_v:
10011 case NEON::BI__builtin_neon_vqtbl2_v:
10012 case NEON::BI__builtin_neon_vqtbl2q_v:
10013 case NEON::BI__builtin_neon_vtbl3_v:
10014 case NEON::BI__builtin_neon_vqtbl3_v:
10015 case NEON::BI__builtin_neon_vqtbl3q_v:
10016 case NEON::BI__builtin_neon_vtbl4_v:
10017 case NEON::BI__builtin_neon_vqtbl4_v:
10018 case NEON::BI__builtin_neon_vqtbl4q_v:
10019 break;
10020 case NEON::BI__builtin_neon_vtbx1_v:
10021 case NEON::BI__builtin_neon_vqtbx1_v:
10022 case NEON::BI__builtin_neon_vqtbx1q_v:
10023 case NEON::BI__builtin_neon_vtbx2_v:
10024 case NEON::BI__builtin_neon_vqtbx2_v:
10025 case NEON::BI__builtin_neon_vqtbx2q_v:
10026 case NEON::BI__builtin_neon_vtbx3_v:
10027 case NEON::BI__builtin_neon_vqtbx3_v:
10028 case NEON::BI__builtin_neon_vqtbx3q_v:
10029 case NEON::BI__builtin_neon_vtbx4_v:
10030 case NEON::BI__builtin_neon_vqtbx4_v:
10031 case NEON::BI__builtin_neon_vqtbx4q_v:
10032 break;
10033 }
10034
10035 assert(E->getNumArgs() >= 3);
10036
10037 // Get the last argument, which specifies the vector type.
10038 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
10039 std::optional<llvm::APSInt> Result =
10041 if (!Result)
10042 return nullptr;
10043
10044 // Determine the type of this overloaded NEON intrinsic.
10045 NeonTypeFlags Type = Result->getZExtValue();
10046 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
10047 if (!Ty)
10048 return nullptr;
10049
10050 CodeGen::CGBuilderTy &Builder = CGF.Builder;
10051
10052 // AArch64 scalar builtins are not overloaded, they do not have an extra
10053 // argument that specifies the vector type, need to handle each case.
10054 switch (BuiltinID) {
10055 case NEON::BI__builtin_neon_vtbl1_v: {
10056 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
10057 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10058 }
10059 case NEON::BI__builtin_neon_vtbl2_v: {
10060 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
10061 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10062 }
10063 case NEON::BI__builtin_neon_vtbl3_v: {
10064 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
10065 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10066 }
10067 case NEON::BI__builtin_neon_vtbl4_v: {
10068 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
10069 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10070 }
10071 case NEON::BI__builtin_neon_vtbx1_v: {
10072 Value *TblRes =
10073 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
10074 Intrinsic::aarch64_neon_tbl1, "vtbl1");
10075
10076 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
10077 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
10078 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10079
10080 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10081 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10082 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10083 }
10084 case NEON::BI__builtin_neon_vtbx2_v: {
10085 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
10086 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
10087 }
10088 case NEON::BI__builtin_neon_vtbx3_v: {
10089 Value *TblRes =
10090 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
10091 Intrinsic::aarch64_neon_tbl2, "vtbl2");
10092
10093 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
10094 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
10095 TwentyFourV);
10096 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10097
10098 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10099 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10100 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10101 }
10102 case NEON::BI__builtin_neon_vtbx4_v: {
10103 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
10104 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
10105 }
10106 case NEON::BI__builtin_neon_vqtbl1_v:
10107 case NEON::BI__builtin_neon_vqtbl1q_v:
10108 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
10109 case NEON::BI__builtin_neon_vqtbl2_v:
10110 case NEON::BI__builtin_neon_vqtbl2q_v: {
10111 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
10112 case NEON::BI__builtin_neon_vqtbl3_v:
10113 case NEON::BI__builtin_neon_vqtbl3q_v:
10114 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
10115 case NEON::BI__builtin_neon_vqtbl4_v:
10116 case NEON::BI__builtin_neon_vqtbl4q_v:
10117 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
10118 case NEON::BI__builtin_neon_vqtbx1_v:
10119 case NEON::BI__builtin_neon_vqtbx1q_v:
10120 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
10121 case NEON::BI__builtin_neon_vqtbx2_v:
10122 case NEON::BI__builtin_neon_vqtbx2q_v:
10123 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
10124 case NEON::BI__builtin_neon_vqtbx3_v:
10125 case NEON::BI__builtin_neon_vqtbx3q_v:
10126 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
10127 case NEON::BI__builtin_neon_vqtbx4_v:
10128 case NEON::BI__builtin_neon_vqtbx4q_v:
10129 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
10130 }
10131 }
10132
10133 if (!Int)
10134 return nullptr;
10135
10136 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
10137 return CGF.EmitNeonCall(F, Ops, s);
10138}
10139
10141 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
10142 Op = Builder.CreateBitCast(Op, Int16Ty);
10143 Value *V = PoisonValue::get(VTy);
10144 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
10145 Op = Builder.CreateInsertElement(V, Op, CI);
10146 return Op;
10147}
10148
10149/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
10150/// access builtin. Only required if it can't be inferred from the base pointer
10151/// operand.
10152llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10153 switch (TypeFlags.getMemEltType()) {
10154 case SVETypeFlags::MemEltTyDefault:
10155 return getEltType(TypeFlags);
10156 case SVETypeFlags::MemEltTyInt8:
10157 return Builder.getInt8Ty();
10158 case SVETypeFlags::MemEltTyInt16:
10159 return Builder.getInt16Ty();
10160 case SVETypeFlags::MemEltTyInt32:
10161 return Builder.getInt32Ty();
10162 case SVETypeFlags::MemEltTyInt64:
10163 return Builder.getInt64Ty();
10164 }
10165 llvm_unreachable("Unknown MemEltType");
10166}
10167
10168llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10169 switch (TypeFlags.getEltType()) {
10170 default:
10171 llvm_unreachable("Invalid SVETypeFlag!");
10172
10173 case SVETypeFlags::EltTyInt8:
10174 return Builder.getInt8Ty();
10175 case SVETypeFlags::EltTyInt16:
10176 return Builder.getInt16Ty();
10177 case SVETypeFlags::EltTyInt32:
10178 return Builder.getInt32Ty();
10179 case SVETypeFlags::EltTyInt64:
10180 return Builder.getInt64Ty();
10181 case SVETypeFlags::EltTyInt128:
10182 return Builder.getInt128Ty();
10183
10184 case SVETypeFlags::EltTyFloat16:
10185 return Builder.getHalfTy();
10186 case SVETypeFlags::EltTyFloat32:
10187 return Builder.getFloatTy();
10188 case SVETypeFlags::EltTyFloat64:
10189 return Builder.getDoubleTy();
10190
10191 case SVETypeFlags::EltTyBFloat16:
10192 return Builder.getBFloatTy();
10193
10194 case SVETypeFlags::EltTyBool8:
10195 case SVETypeFlags::EltTyBool16:
10196 case SVETypeFlags::EltTyBool32:
10197 case SVETypeFlags::EltTyBool64:
10198 return Builder.getInt1Ty();
10199 }
10200}
10201
10202// Return the llvm predicate vector type corresponding to the specified element
10203// TypeFlags.
10204llvm::ScalableVectorType *
10206 switch (TypeFlags.getEltType()) {
10207 default: llvm_unreachable("Unhandled SVETypeFlag!");
10208
10209 case SVETypeFlags::EltTyInt8:
10210 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10211 case SVETypeFlags::EltTyInt16:
10212 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10213 case SVETypeFlags::EltTyInt32:
10214 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10215 case SVETypeFlags::EltTyInt64:
10216 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10217
10218 case SVETypeFlags::EltTyBFloat16:
10219 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10220 case SVETypeFlags::EltTyFloat16:
10221 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10222 case SVETypeFlags::EltTyFloat32:
10223 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10224 case SVETypeFlags::EltTyFloat64:
10225 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10226
10227 case SVETypeFlags::EltTyBool8:
10228 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10229 case SVETypeFlags::EltTyBool16:
10230 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10231 case SVETypeFlags::EltTyBool32:
10232 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10233 case SVETypeFlags::EltTyBool64:
10234 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10235 }
10236}
10237
10238// Return the llvm vector type corresponding to the specified element TypeFlags.
10239llvm::ScalableVectorType *
10240CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10241 switch (TypeFlags.getEltType()) {
10242 default:
10243 llvm_unreachable("Invalid SVETypeFlag!");
10244
10245 case SVETypeFlags::EltTyInt8:
10246 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10247 case SVETypeFlags::EltTyInt16:
10248 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10249 case SVETypeFlags::EltTyInt32:
10250 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10251 case SVETypeFlags::EltTyInt64:
10252 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10253
10254 case SVETypeFlags::EltTyMFloat8:
10255 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10256 case SVETypeFlags::EltTyFloat16:
10257 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10258 case SVETypeFlags::EltTyBFloat16:
10259 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10260 case SVETypeFlags::EltTyFloat32:
10261 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10262 case SVETypeFlags::EltTyFloat64:
10263 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10264
10265 case SVETypeFlags::EltTyBool8:
10266 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10267 case SVETypeFlags::EltTyBool16:
10268 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10269 case SVETypeFlags::EltTyBool32:
10270 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10271 case SVETypeFlags::EltTyBool64:
10272 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10273 }
10274}
10275
10276llvm::Value *
10278 Function *Ptrue =
10279 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10280 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10281}
10282
10283constexpr unsigned SVEBitsPerBlock = 128;
10284
10285static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10286 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10287 return llvm::ScalableVectorType::get(EltTy, NumElts);
10288}
10289
10290// Reinterpret the input predicate so that it can be used to correctly isolate
10291// the elements of the specified datatype.
10293 llvm::ScalableVectorType *VTy) {
10294
10295 if (isa<TargetExtType>(Pred->getType()) &&
10296 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10297 return Pred;
10298
10299 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10300 if (Pred->getType() == RTy)
10301 return Pred;
10302
10303 unsigned IntID;
10304 llvm::Type *IntrinsicTy;
10305 switch (VTy->getMinNumElements()) {
10306 default:
10307 llvm_unreachable("unsupported element count!");
10308 case 1:
10309 case 2:
10310 case 4:
10311 case 8:
10312 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10313 IntrinsicTy = RTy;
10314 break;
10315 case 16:
10316 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10317 IntrinsicTy = Pred->getType();
10318 break;
10319 }
10320
10321 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10322 Value *C = Builder.CreateCall(F, Pred);
10323 assert(C->getType() == RTy && "Unexpected return type!");
10324 return C;
10325}
10326
10328 llvm::StructType *Ty) {
10329 if (PredTuple->getType() == Ty)
10330 return PredTuple;
10331
10332 Value *Ret = llvm::PoisonValue::get(Ty);
10333 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10334 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10335 Pred = EmitSVEPredicateCast(
10336 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10337 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10338 }
10339
10340 return Ret;
10341}
10342
10345 unsigned IntID) {
10346 auto *ResultTy = getSVEType(TypeFlags);
10347 auto *OverloadedTy =
10348 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10349
10350 Function *F = nullptr;
10351 if (Ops[1]->getType()->isVectorTy())
10352 // This is the "vector base, scalar offset" case. In order to uniquely
10353 // map this built-in to an LLVM IR intrinsic, we need both the return type
10354 // and the type of the vector base.
10355 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10356 else
10357 // This is the "scalar base, vector offset case". The type of the offset
10358 // is encoded in the name of the intrinsic. We only need to specify the
10359 // return type in order to uniquely map this built-in to an LLVM IR
10360 // intrinsic.
10361 F = CGM.getIntrinsic(IntID, OverloadedTy);
10362
10363 // At the ACLE level there's only one predicate type, svbool_t, which is
10364 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10365 // actual type being loaded. For example, when loading doubles (i64) the
10366 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10367 // the predicate and the data being loaded must match. Cast to the type
10368 // expected by the intrinsic. The intrinsic itself should be defined in
10369 // a way than enforces relations between parameter types.
10370 Ops[0] = EmitSVEPredicateCast(
10371 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10372
10373 // Pass 0 when the offset is missing. This can only be applied when using
10374 // the "vector base" addressing mode for which ACLE allows no offset. The
10375 // corresponding LLVM IR always requires an offset.
10376 if (Ops.size() == 2) {
10377 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10378 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10379 }
10380
10381 // For "vector base, scalar index" scale the index so that it becomes a
10382 // scalar offset.
10383 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10384 unsigned BytesPerElt =
10385 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10386 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10387 }
10388
10389 Value *Call = Builder.CreateCall(F, Ops);
10390
10391 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10392 // other cases it's folded into a nop.
10393 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10394 : Builder.CreateSExt(Call, ResultTy);
10395}
10396
10399 unsigned IntID) {
10400 auto *SrcDataTy = getSVEType(TypeFlags);
10401 auto *OverloadedTy =
10402 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10403
10404 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10405 // it's the first argument. Move it accordingly.
10406 Ops.insert(Ops.begin(), Ops.pop_back_val());
10407
10408 Function *F = nullptr;
10409 if (Ops[2]->getType()->isVectorTy())
10410 // This is the "vector base, scalar offset" case. In order to uniquely
10411 // map this built-in to an LLVM IR intrinsic, we need both the return type
10412 // and the type of the vector base.
10413 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10414 else
10415 // This is the "scalar base, vector offset case". The type of the offset
10416 // is encoded in the name of the intrinsic. We only need to specify the
10417 // return type in order to uniquely map this built-in to an LLVM IR
10418 // intrinsic.
10419 F = CGM.getIntrinsic(IntID, OverloadedTy);
10420
10421 // Pass 0 when the offset is missing. This can only be applied when using
10422 // the "vector base" addressing mode for which ACLE allows no offset. The
10423 // corresponding LLVM IR always requires an offset.
10424 if (Ops.size() == 3) {
10425 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10426 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10427 }
10428
10429 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10430 // folded into a nop.
10431 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10432
10433 // At the ACLE level there's only one predicate type, svbool_t, which is
10434 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10435 // actual type being stored. For example, when storing doubles (i64) the
10436 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10437 // the predicate and the data being stored must match. Cast to the type
10438 // expected by the intrinsic. The intrinsic itself should be defined in
10439 // a way that enforces relations between parameter types.
10440 Ops[1] = EmitSVEPredicateCast(
10441 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10442
10443 // For "vector base, scalar index" scale the index so that it becomes a
10444 // scalar offset.
10445 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10446 unsigned BytesPerElt =
10447 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10448 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10449 }
10450
10451 return Builder.CreateCall(F, Ops);
10452}
10453
10456 unsigned IntID) {
10457 // The gather prefetches are overloaded on the vector input - this can either
10458 // be the vector of base addresses or vector of offsets.
10459 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10460 if (!OverloadedTy)
10461 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10462
10463 // Cast the predicate from svbool_t to the right number of elements.
10464 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10465
10466 // vector + imm addressing modes
10467 if (Ops[1]->getType()->isVectorTy()) {
10468 if (Ops.size() == 3) {
10469 // Pass 0 for 'vector+imm' when the index is omitted.
10470 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10471
10472 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10473 std::swap(Ops[2], Ops[3]);
10474 } else {
10475 // Index needs to be passed as scaled offset.
10476 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10477 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10478 if (BytesPerElt > 1)
10479 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10480 }
10481 }
10482
10483 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10484 return Builder.CreateCall(F, Ops);
10485}
10486
10489 unsigned IntID) {
10490 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10491 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10492 Value *BasePtr = Ops[1];
10493
10494 // Does the load have an offset?
10495 if (Ops.size() > 2)
10496 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10497
10498 Function *F = CGM.getIntrinsic(IntID, {VTy});
10499 return Builder.CreateCall(F, {Predicate, BasePtr});
10500}
10501
10504 unsigned IntID) {
10505 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10506
10507 unsigned N;
10508 switch (IntID) {
10509 case Intrinsic::aarch64_sve_st2:
10510 case Intrinsic::aarch64_sve_st1_pn_x2:
10511 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10512 case Intrinsic::aarch64_sve_st2q:
10513 N = 2;
10514 break;
10515 case Intrinsic::aarch64_sve_st3:
10516 case Intrinsic::aarch64_sve_st3q:
10517 N = 3;
10518 break;
10519 case Intrinsic::aarch64_sve_st4:
10520 case Intrinsic::aarch64_sve_st1_pn_x4:
10521 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10522 case Intrinsic::aarch64_sve_st4q:
10523 N = 4;
10524 break;
10525 default:
10526 llvm_unreachable("unknown intrinsic!");
10527 }
10528
10529 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10530 Value *BasePtr = Ops[1];
10531
10532 // Does the store have an offset?
10533 if (Ops.size() > (2 + N))
10534 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10535
10536 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10537 // need to break up the tuple vector.
10539 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10540 Operands.push_back(Ops[I]);
10541 Operands.append({Predicate, BasePtr});
10542 Function *F = CGM.getIntrinsic(IntID, { VTy });
10543
10544 return Builder.CreateCall(F, Operands);
10545}
10546
10547// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10548// svpmullt_pair intrinsics, with the exception that their results are bitcast
10549// to a wider type.
10552 unsigned BuiltinID) {
10553 // Splat scalar operand to vector (intrinsics with _n infix)
10554 if (TypeFlags.hasSplatOperand()) {
10555 unsigned OpNo = TypeFlags.getSplatOperand();
10556 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10557 }
10558
10559 // The pair-wise function has a narrower overloaded type.
10560 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10561 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10562
10563 // Now bitcast to the wider result type.
10564 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10565 return EmitSVEReinterpret(Call, Ty);
10566}
10567
10569 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10570 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10571 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10572 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10573}
10574
10577 unsigned BuiltinID) {
10578 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10579 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10580 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10581
10582 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10583 Value *BasePtr = Ops[1];
10584
10585 // Implement the index operand if not omitted.
10586 if (Ops.size() > 3)
10587 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10588
10589 Value *PrfOp = Ops.back();
10590
10591 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10592 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10593}
10594
10596 llvm::Type *ReturnTy,
10598 unsigned IntrinsicID,
10599 bool IsZExtReturn) {
10600 QualType LangPTy = E->getArg(1)->getType();
10601 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10602 LangPTy->castAs<PointerType>()->getPointeeType());
10603
10604 // The vector type that is returned may be different from the
10605 // eventual type loaded from memory.
10606 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10607 llvm::ScalableVectorType *MemoryTy = nullptr;
10608 llvm::ScalableVectorType *PredTy = nullptr;
10609 bool IsQuadLoad = false;
10610 switch (IntrinsicID) {
10611 case Intrinsic::aarch64_sve_ld1uwq:
10612 case Intrinsic::aarch64_sve_ld1udq:
10613 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10614 PredTy = llvm::ScalableVectorType::get(
10615 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10616 IsQuadLoad = true;
10617 break;
10618 default:
10619 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10620 PredTy = MemoryTy;
10621 break;
10622 }
10623
10624 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10625 Value *BasePtr = Ops[1];
10626
10627 // Does the load have an offset?
10628 if (Ops.size() > 2)
10629 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10630
10631 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10632 auto *Load =
10633 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10634 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10635 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10636
10637 if (IsQuadLoad)
10638 return Load;
10639
10640 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10641 : Builder.CreateSExt(Load, VectorTy);
10642}
10643
10646 unsigned IntrinsicID) {
10647 QualType LangPTy = E->getArg(1)->getType();
10648 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10649 LangPTy->castAs<PointerType>()->getPointeeType());
10650
10651 // The vector type that is stored may be different from the
10652 // eventual type stored to memory.
10653 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10654 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10655
10656 auto PredTy = MemoryTy;
10657 auto AddrMemoryTy = MemoryTy;
10658 bool IsQuadStore = false;
10659
10660 switch (IntrinsicID) {
10661 case Intrinsic::aarch64_sve_st1wq:
10662 case Intrinsic::aarch64_sve_st1dq:
10663 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10664 PredTy =
10665 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10666 IsQuadStore = true;
10667 break;
10668 default:
10669 break;
10670 }
10671 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10672 Value *BasePtr = Ops[1];
10673
10674 // Does the store have an offset?
10675 if (Ops.size() == 4)
10676 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10677
10678 // Last value is always the data
10679 Value *Val =
10680 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10681
10682 Function *F =
10683 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10684 auto *Store =
10685 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10686 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10687 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10688 return Store;
10689}
10690
10693 unsigned IntID) {
10694 Ops[2] = EmitSVEPredicateCast(
10696
10697 SmallVector<Value *> NewOps;
10698 NewOps.push_back(Ops[2]);
10699
10700 llvm::Value *BasePtr = Ops[3];
10701 llvm::Value *RealSlice = Ops[1];
10702 // If the intrinsic contains the vnum parameter, multiply it with the vector
10703 // size in bytes.
10704 if (Ops.size() == 5) {
10705 Function *StreamingVectorLength =
10706 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10707 llvm::Value *StreamingVectorLengthCall =
10708 Builder.CreateCall(StreamingVectorLength);
10709 llvm::Value *Mulvl =
10710 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10711 // The type of the ptr parameter is void *, so use Int8Ty here.
10712 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10713 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10714 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10715 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10716 }
10717 NewOps.push_back(BasePtr);
10718 NewOps.push_back(Ops[0]);
10719 NewOps.push_back(RealSlice);
10720 Function *F = CGM.getIntrinsic(IntID);
10721 return Builder.CreateCall(F, NewOps);
10722}
10723
10726 unsigned IntID) {
10727 auto *VecTy = getSVEType(TypeFlags);
10728 Function *F = CGM.getIntrinsic(IntID, VecTy);
10729 if (TypeFlags.isReadZA())
10730 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10731 else if (TypeFlags.isWriteZA())
10732 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10733 return Builder.CreateCall(F, Ops);
10734}
10735
10738 unsigned IntID) {
10739 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10740 if (Ops.size() == 0)
10741 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10742 Function *F = CGM.getIntrinsic(IntID, {});
10743 return Builder.CreateCall(F, Ops);
10744}
10745
10748 unsigned IntID) {
10749 if (Ops.size() == 2)
10750 Ops.push_back(Builder.getInt32(0));
10751 else
10752 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10753 Function *F = CGM.getIntrinsic(IntID, {});
10754 return Builder.CreateCall(F, Ops);
10755}
10756
10757// Limit the usage of scalable llvm IR generated by the ACLE by using the
10758// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10759Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10760 return Builder.CreateVectorSplat(
10761 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10762}
10763
10765 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
10766#ifndef NDEBUG
10767 auto *VecTy = cast<llvm::VectorType>(Ty);
10768 ElementCount EC = VecTy->getElementCount();
10769 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
10770 "Only <1 x i8> expected");
10771#endif
10772 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
10773 }
10774 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10775}
10776
10777Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10778 // FIXME: For big endian this needs an additional REV, or needs a separate
10779 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10780 // instruction is defined as 'bitwise' equivalent from memory point of
10781 // view (when storing/reloading), whereas the svreinterpret builtin
10782 // implements bitwise equivalent cast from register point of view.
10783 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10784
10785 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10786 Value *Tuple = llvm::PoisonValue::get(Ty);
10787
10788 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10789 Value *In = Builder.CreateExtractValue(Val, I);
10790 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10791 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10792 }
10793
10794 return Tuple;
10795 }
10796
10797 return Builder.CreateBitCast(Val, Ty);
10798}
10799
10800static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10802 auto *SplatZero = Constant::getNullValue(Ty);
10803 Ops.insert(Ops.begin(), SplatZero);
10804}
10805
10806static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10808 auto *SplatUndef = UndefValue::get(Ty);
10809 Ops.insert(Ops.begin(), SplatUndef);
10810}
10811
10814 llvm::Type *ResultType,
10815 ArrayRef<Value *> Ops) {
10816 if (TypeFlags.isOverloadNone())
10817 return {};
10818
10819 llvm::Type *DefaultType = getSVEType(TypeFlags);
10820
10821 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10822 return {DefaultType, Ops[1]->getType()};
10823
10824 if (TypeFlags.isOverloadWhileRW())
10825 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10826
10827 if (TypeFlags.isOverloadCvt())
10828 return {Ops[0]->getType(), Ops.back()->getType()};
10829
10830 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10831 ResultType->isVectorTy())
10832 return {ResultType, Ops[1]->getType()};
10833
10834 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10835 return {DefaultType};
10836}
10837
10839 ArrayRef<Value *> Ops) {
10840 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10841 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10842 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10843
10844 if (TypeFlags.isTupleSet())
10845 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10846 return Builder.CreateExtractValue(Ops[0], Idx);
10847}
10848
10850 llvm::Type *Ty,
10851 ArrayRef<Value *> Ops) {
10852 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10853
10854 Value *Tuple = llvm::PoisonValue::get(Ty);
10855 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10856 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10857
10858 return Tuple;
10859}
10860
10862 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10863 SVETypeFlags TypeFlags) {
10864 // Find out if any arguments are required to be integer constant expressions.
10865 unsigned ICEArguments = 0;
10867 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10868 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10869
10870 // Tuple set/get only requires one insert/extract vector, which is
10871 // created by EmitSVETupleSetOrGet.
10872 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10873
10874 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10875 bool IsICE = ICEArguments & (1 << i);
10876 Value *Arg = EmitScalarExpr(E->getArg(i));
10877
10878 if (IsICE) {
10879 // If this is required to be a constant, constant fold it so that we know
10880 // that the generated intrinsic gets a ConstantInt.
10881 std::optional<llvm::APSInt> Result =
10882 E->getArg(i)->getIntegerConstantExpr(getContext());
10883 assert(Result && "Expected argument to be a constant");
10884
10885 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10886 // truncate because the immediate has been range checked and no valid
10887 // immediate requires more than a handful of bits.
10888 *Result = Result->extOrTrunc(32);
10889 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10890 continue;
10891 }
10892
10893 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10894 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10895 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10896
10897 continue;
10898 }
10899
10900 Ops.push_back(Arg);
10901 }
10902}
10903
10905 const CallExpr *E) {
10906 llvm::Type *Ty = ConvertType(E->getType());
10907 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10908 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10909 Value *Val = EmitScalarExpr(E->getArg(0));
10910 return EmitSVEReinterpret(Val, Ty);
10911 }
10912
10913 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10915
10917 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10918 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10919
10920 if (TypeFlags.isLoad())
10921 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10922 TypeFlags.isZExtReturn());
10923 else if (TypeFlags.isStore())
10924 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10925 else if (TypeFlags.isGatherLoad())
10926 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10927 else if (TypeFlags.isScatterStore())
10928 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10929 else if (TypeFlags.isPrefetch())
10930 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10931 else if (TypeFlags.isGatherPrefetch())
10932 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10933 else if (TypeFlags.isStructLoad())
10934 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10935 else if (TypeFlags.isStructStore())
10936 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10937 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10938 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10939 else if (TypeFlags.isTupleCreate())
10940 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10941 else if (TypeFlags.isUndef())
10942 return UndefValue::get(Ty);
10943 else if (Builtin->LLVMIntrinsic != 0) {
10944 // Emit set FPMR for intrinsics that require it
10945 if (TypeFlags.setsFPMR())
10946 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10947 Ops.pop_back_val());
10948 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10950
10951 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10953
10954 // Some ACLE builtins leave out the argument to specify the predicate
10955 // pattern, which is expected to be expanded to an SV_ALL pattern.
10956 if (TypeFlags.isAppendSVALL())
10957 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10958 if (TypeFlags.isInsertOp1SVALL())
10959 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10960
10961 // Predicates must match the main datatype.
10962 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10963 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10964 if (PredTy->getElementType()->isIntegerTy(1))
10965 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10966
10967 // Splat scalar operand to vector (intrinsics with _n infix)
10968 if (TypeFlags.hasSplatOperand()) {
10969 unsigned OpNo = TypeFlags.getSplatOperand();
10970 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10971 }
10972
10973 if (TypeFlags.isReverseCompare())
10974 std::swap(Ops[1], Ops[2]);
10975 else if (TypeFlags.isReverseUSDOT())
10976 std::swap(Ops[1], Ops[2]);
10977 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10978 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10979 std::swap(Ops[1], Ops[2]);
10980 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10981 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10982 std::swap(Ops[1], Ops[3]);
10983
10984 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10985 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10986 llvm::Type *OpndTy = Ops[1]->getType();
10987 auto *SplatZero = Constant::getNullValue(OpndTy);
10988 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10989 }
10990
10991 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10992 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10993 Value *Call = Builder.CreateCall(F, Ops);
10994
10995 if (Call->getType() == Ty)
10996 return Call;
10997
10998 // Predicate results must be converted to svbool_t.
10999 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
11000 return EmitSVEPredicateCast(Call, PredTy);
11001 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
11002 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
11003
11004 llvm_unreachable("unsupported element count!");
11005 }
11006
11007 switch (BuiltinID) {
11008 default:
11009 return nullptr;
11010
11011 case SVE::BI__builtin_sve_svreinterpret_b: {
11012 auto SVCountTy =
11013 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11014 Function *CastFromSVCountF =
11015 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
11016 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
11017 }
11018 case SVE::BI__builtin_sve_svreinterpret_c: {
11019 auto SVCountTy =
11020 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11021 Function *CastToSVCountF =
11022 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
11023 return Builder.CreateCall(CastToSVCountF, Ops[0]);
11024 }
11025
11026 case SVE::BI__builtin_sve_svpsel_lane_b8:
11027 case SVE::BI__builtin_sve_svpsel_lane_b16:
11028 case SVE::BI__builtin_sve_svpsel_lane_b32:
11029 case SVE::BI__builtin_sve_svpsel_lane_b64:
11030 case SVE::BI__builtin_sve_svpsel_lane_c8:
11031 case SVE::BI__builtin_sve_svpsel_lane_c16:
11032 case SVE::BI__builtin_sve_svpsel_lane_c32:
11033 case SVE::BI__builtin_sve_svpsel_lane_c64: {
11034 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
11035 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
11036 "aarch64.svcount")) &&
11037 "Unexpected TargetExtType");
11038 auto SVCountTy =
11039 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
11040 Function *CastFromSVCountF =
11041 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
11042 Function *CastToSVCountF =
11043 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
11044
11045 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
11046 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
11047 llvm::Value *Ops0 =
11048 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
11049 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
11050 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
11051 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
11052 }
11053 case SVE::BI__builtin_sve_svmov_b_z: {
11054 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
11055 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11056 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11057 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
11058 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
11059 }
11060
11061 case SVE::BI__builtin_sve_svnot_b_z: {
11062 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
11063 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11064 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11065 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
11066 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
11067 }
11068
11069 case SVE::BI__builtin_sve_svmovlb_u16:
11070 case SVE::BI__builtin_sve_svmovlb_u32:
11071 case SVE::BI__builtin_sve_svmovlb_u64:
11072 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
11073
11074 case SVE::BI__builtin_sve_svmovlb_s16:
11075 case SVE::BI__builtin_sve_svmovlb_s32:
11076 case SVE::BI__builtin_sve_svmovlb_s64:
11077 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
11078
11079 case SVE::BI__builtin_sve_svmovlt_u16:
11080 case SVE::BI__builtin_sve_svmovlt_u32:
11081 case SVE::BI__builtin_sve_svmovlt_u64:
11082 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
11083
11084 case SVE::BI__builtin_sve_svmovlt_s16:
11085 case SVE::BI__builtin_sve_svmovlt_s32:
11086 case SVE::BI__builtin_sve_svmovlt_s64:
11087 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
11088
11089 case SVE::BI__builtin_sve_svpmullt_u16:
11090 case SVE::BI__builtin_sve_svpmullt_u64:
11091 case SVE::BI__builtin_sve_svpmullt_n_u16:
11092 case SVE::BI__builtin_sve_svpmullt_n_u64:
11093 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
11094
11095 case SVE::BI__builtin_sve_svpmullb_u16:
11096 case SVE::BI__builtin_sve_svpmullb_u64:
11097 case SVE::BI__builtin_sve_svpmullb_n_u16:
11098 case SVE::BI__builtin_sve_svpmullb_n_u64:
11099 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
11100
11101 case SVE::BI__builtin_sve_svdup_n_b8:
11102 case SVE::BI__builtin_sve_svdup_n_b16:
11103 case SVE::BI__builtin_sve_svdup_n_b32:
11104 case SVE::BI__builtin_sve_svdup_n_b64: {
11105 Value *CmpNE =
11106 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
11107 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
11108 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
11109 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
11110 }
11111
11112 case SVE::BI__builtin_sve_svdupq_n_b8:
11113 case SVE::BI__builtin_sve_svdupq_n_b16:
11114 case SVE::BI__builtin_sve_svdupq_n_b32:
11115 case SVE::BI__builtin_sve_svdupq_n_b64:
11116 case SVE::BI__builtin_sve_svdupq_n_u8:
11117 case SVE::BI__builtin_sve_svdupq_n_s8:
11118 case SVE::BI__builtin_sve_svdupq_n_u64:
11119 case SVE::BI__builtin_sve_svdupq_n_f64:
11120 case SVE::BI__builtin_sve_svdupq_n_s64:
11121 case SVE::BI__builtin_sve_svdupq_n_u16:
11122 case SVE::BI__builtin_sve_svdupq_n_f16:
11123 case SVE::BI__builtin_sve_svdupq_n_bf16:
11124 case SVE::BI__builtin_sve_svdupq_n_s16:
11125 case SVE::BI__builtin_sve_svdupq_n_u32:
11126 case SVE::BI__builtin_sve_svdupq_n_f32:
11127 case SVE::BI__builtin_sve_svdupq_n_s32: {
11128 // These builtins are implemented by storing each element to an array and using
11129 // ld1rq to materialize a vector.
11130 unsigned NumOpnds = Ops.size();
11131
11132 bool IsBoolTy =
11133 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
11134
11135 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
11136 // so that the compare can use the width that is natural for the expected
11137 // number of predicate lanes.
11138 llvm::Type *EltTy = Ops[0]->getType();
11139 if (IsBoolTy)
11140 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
11141
11143 for (unsigned I = 0; I < NumOpnds; ++I)
11144 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
11145 Value *Vec = BuildVector(VecOps);
11146
11147 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
11148 Value *InsertSubVec = Builder.CreateInsertVector(
11149 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
11150
11151 Function *F =
11152 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11153 Value *DupQLane =
11154 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11155
11156 if (!IsBoolTy)
11157 return DupQLane;
11158
11159 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11160 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11161
11162 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11163 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11164 : Intrinsic::aarch64_sve_cmpne_wide,
11165 OverloadedTy);
11166 Value *Call = Builder.CreateCall(
11167 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11168 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11169 }
11170
11171 case SVE::BI__builtin_sve_svpfalse_b:
11172 return ConstantInt::getFalse(Ty);
11173
11174 case SVE::BI__builtin_sve_svpfalse_c: {
11175 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11176 Function *CastToSVCountF =
11177 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11178 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11179 }
11180
11181 case SVE::BI__builtin_sve_svlen_bf16:
11182 case SVE::BI__builtin_sve_svlen_f16:
11183 case SVE::BI__builtin_sve_svlen_f32:
11184 case SVE::BI__builtin_sve_svlen_f64:
11185 case SVE::BI__builtin_sve_svlen_s8:
11186 case SVE::BI__builtin_sve_svlen_s16:
11187 case SVE::BI__builtin_sve_svlen_s32:
11188 case SVE::BI__builtin_sve_svlen_s64:
11189 case SVE::BI__builtin_sve_svlen_u8:
11190 case SVE::BI__builtin_sve_svlen_u16:
11191 case SVE::BI__builtin_sve_svlen_u32:
11192 case SVE::BI__builtin_sve_svlen_u64: {
11193 SVETypeFlags TF(Builtin->TypeModifier);
11194 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11195 auto *NumEls =
11196 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11197
11198 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11199 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11200 }
11201
11202 case SVE::BI__builtin_sve_svtbl2_u8:
11203 case SVE::BI__builtin_sve_svtbl2_s8:
11204 case SVE::BI__builtin_sve_svtbl2_u16:
11205 case SVE::BI__builtin_sve_svtbl2_s16:
11206 case SVE::BI__builtin_sve_svtbl2_u32:
11207 case SVE::BI__builtin_sve_svtbl2_s32:
11208 case SVE::BI__builtin_sve_svtbl2_u64:
11209 case SVE::BI__builtin_sve_svtbl2_s64:
11210 case SVE::BI__builtin_sve_svtbl2_f16:
11211 case SVE::BI__builtin_sve_svtbl2_bf16:
11212 case SVE::BI__builtin_sve_svtbl2_f32:
11213 case SVE::BI__builtin_sve_svtbl2_f64: {
11214 SVETypeFlags TF(Builtin->TypeModifier);
11215 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11216 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11217 return Builder.CreateCall(F, Ops);
11218 }
11219
11220 case SVE::BI__builtin_sve_svset_neonq_s8:
11221 case SVE::BI__builtin_sve_svset_neonq_s16:
11222 case SVE::BI__builtin_sve_svset_neonq_s32:
11223 case SVE::BI__builtin_sve_svset_neonq_s64:
11224 case SVE::BI__builtin_sve_svset_neonq_u8:
11225 case SVE::BI__builtin_sve_svset_neonq_u16:
11226 case SVE::BI__builtin_sve_svset_neonq_u32:
11227 case SVE::BI__builtin_sve_svset_neonq_u64:
11228 case SVE::BI__builtin_sve_svset_neonq_f16:
11229 case SVE::BI__builtin_sve_svset_neonq_f32:
11230 case SVE::BI__builtin_sve_svset_neonq_f64:
11231 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11232 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11233 }
11234
11235 case SVE::BI__builtin_sve_svget_neonq_s8:
11236 case SVE::BI__builtin_sve_svget_neonq_s16:
11237 case SVE::BI__builtin_sve_svget_neonq_s32:
11238 case SVE::BI__builtin_sve_svget_neonq_s64:
11239 case SVE::BI__builtin_sve_svget_neonq_u8:
11240 case SVE::BI__builtin_sve_svget_neonq_u16:
11241 case SVE::BI__builtin_sve_svget_neonq_u32:
11242 case SVE::BI__builtin_sve_svget_neonq_u64:
11243 case SVE::BI__builtin_sve_svget_neonq_f16:
11244 case SVE::BI__builtin_sve_svget_neonq_f32:
11245 case SVE::BI__builtin_sve_svget_neonq_f64:
11246 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11247 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11248 }
11249
11250 case SVE::BI__builtin_sve_svdup_neonq_s8:
11251 case SVE::BI__builtin_sve_svdup_neonq_s16:
11252 case SVE::BI__builtin_sve_svdup_neonq_s32:
11253 case SVE::BI__builtin_sve_svdup_neonq_s64:
11254 case SVE::BI__builtin_sve_svdup_neonq_u8:
11255 case SVE::BI__builtin_sve_svdup_neonq_u16:
11256 case SVE::BI__builtin_sve_svdup_neonq_u32:
11257 case SVE::BI__builtin_sve_svdup_neonq_u64:
11258 case SVE::BI__builtin_sve_svdup_neonq_f16:
11259 case SVE::BI__builtin_sve_svdup_neonq_f32:
11260 case SVE::BI__builtin_sve_svdup_neonq_f64:
11261 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11262 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11263 Builder.getInt64(0));
11264 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11265 {Insert, Builder.getInt64(0)});
11266 }
11267 }
11268
11269 /// Should not happen
11270 return nullptr;
11271}
11272
11273static void swapCommutativeSMEOperands(unsigned BuiltinID,
11275 unsigned MultiVec;
11276 switch (BuiltinID) {
11277 default:
11278 return;
11279 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11280 MultiVec = 1;
11281 break;
11282 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11283 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11284 MultiVec = 2;
11285 break;
11286 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11287 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11288 MultiVec = 4;
11289 break;
11290 }
11291
11292 if (MultiVec > 0)
11293 for (unsigned I = 0; I < MultiVec; ++I)
11294 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11295}
11296
11298 const CallExpr *E) {
11299 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11301
11303 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11304 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11305
11306 if (TypeFlags.isLoad() || TypeFlags.isStore())
11307 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11308 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11309 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11310 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11311 BuiltinID == SME::BI__builtin_sme_svzero_za)
11312 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11313 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11314 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11315 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11316 BuiltinID == SME::BI__builtin_sme_svstr_za)
11317 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11318
11319 // Emit set FPMR for intrinsics that require it
11320 if (TypeFlags.setsFPMR())
11321 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11322 Ops.pop_back_val());
11323 // Handle builtins which require their multi-vector operands to be swapped
11324 swapCommutativeSMEOperands(BuiltinID, Ops);
11325
11326 // Should not happen!
11327 if (Builtin->LLVMIntrinsic == 0)
11328 return nullptr;
11329
11330 if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
11331 // If we already know the streaming mode, don't bother with the intrinsic
11332 // and emit a constant instead
11333 const auto *FD = cast<FunctionDecl>(CurFuncDecl);
11334 if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
11335 unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
11336 if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
11337 bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
11338 return ConstantInt::getBool(Builder.getContext(), IsStreaming);
11339 }
11340 }
11341 }
11342
11343 // Predicates must match the main datatype.
11344 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11345 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11346 if (PredTy->getElementType()->isIntegerTy(1))
11347 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11348
11349 Function *F =
11350 TypeFlags.isOverloadNone()
11351 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11352 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11353
11354 return Builder.CreateCall(F, Ops);
11355}
11356
11358 const CallExpr *E,
11359 llvm::Triple::ArchType Arch) {
11360 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11361 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11362 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11363
11364 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11365 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11366 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11367
11368 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11369 return EmitAArch64CpuSupports(E);
11370
11371 unsigned HintID = static_cast<unsigned>(-1);
11372 switch (BuiltinID) {
11373 default: break;
11374 case clang::AArch64::BI__builtin_arm_nop:
11375 HintID = 0;
11376 break;
11377 case clang::AArch64::BI__builtin_arm_yield:
11378 case clang::AArch64::BI__yield:
11379 HintID = 1;
11380 break;
11381 case clang::AArch64::BI__builtin_arm_wfe:
11382 case clang::AArch64::BI__wfe:
11383 HintID = 2;
11384 break;
11385 case clang::AArch64::BI__builtin_arm_wfi:
11386 case clang::AArch64::BI__wfi:
11387 HintID = 3;
11388 break;
11389 case clang::AArch64::BI__builtin_arm_sev:
11390 case clang::AArch64::BI__sev:
11391 HintID = 4;
11392 break;
11393 case clang::AArch64::BI__builtin_arm_sevl:
11394 case clang::AArch64::BI__sevl:
11395 HintID = 5;
11396 break;
11397 }
11398
11399 if (HintID != static_cast<unsigned>(-1)) {
11400 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11401 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11402 }
11403
11404 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11405 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11406 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11407 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11408 }
11409
11410 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11411 // Create call to __arm_sme_state and store the results to the two pointers.
11413 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11414 false),
11415 "__arm_sme_state"));
11416 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11417 "aarch64_pstate_sm_compatible");
11418 CI->setAttributes(Attrs);
11419 CI->setCallingConv(
11420 llvm::CallingConv::
11421 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11422 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11423 EmitPointerWithAlignment(E->getArg(0)));
11424 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11425 EmitPointerWithAlignment(E->getArg(1)));
11426 }
11427
11428 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11429 assert((getContext().getTypeSize(E->getType()) == 32) &&
11430 "rbit of unusual size!");
11431 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11432 return Builder.CreateCall(
11433 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11434 }
11435 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11436 assert((getContext().getTypeSize(E->getType()) == 64) &&
11437 "rbit of unusual size!");
11438 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11439 return Builder.CreateCall(
11440 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11441 }
11442
11443 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11444 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11445 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11446 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11447 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11448 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11449 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11450 return Res;
11451 }
11452
11453 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11454 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11455 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11456 "cls");
11457 }
11458 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11459 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11460 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11461 "cls");
11462 }
11463
11464 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11465 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11466 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11467 llvm::Type *Ty = Arg->getType();
11468 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11469 Arg, "frint32z");
11470 }
11471
11472 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11473 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11474 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11475 llvm::Type *Ty = Arg->getType();
11476 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11477 Arg, "frint64z");
11478 }
11479
11480 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11481 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11482 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11483 llvm::Type *Ty = Arg->getType();
11484 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11485 Arg, "frint32x");
11486 }
11487
11488 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11489 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11490 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11491 llvm::Type *Ty = Arg->getType();
11492 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11493 Arg, "frint64x");
11494 }
11495
11496 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11497 assert((getContext().getTypeSize(E->getType()) == 32) &&
11498 "__jcvt of unusual size!");
11499 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11500 return Builder.CreateCall(
11501 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11502 }
11503
11504 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11505 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11506 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11507 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11508 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11509 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11510
11511 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11512 // Load from the address via an LLVM intrinsic, receiving a
11513 // tuple of 8 i64 words, and store each one to ValPtr.
11514 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11515 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11516 llvm::Value *ToRet;
11517 for (size_t i = 0; i < 8; i++) {
11518 llvm::Value *ValOffsetPtr =
11519 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11520 Address Addr =
11521 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11522 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11523 }
11524 return ToRet;
11525 } else {
11526 // Load 8 i64 words from ValPtr, and store them to the address
11527 // via an LLVM intrinsic.
11529 Args.push_back(MemAddr);
11530 for (size_t i = 0; i < 8; i++) {
11531 llvm::Value *ValOffsetPtr =
11532 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11533 Address Addr =
11534 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11535 Args.push_back(Builder.CreateLoad(Addr));
11536 }
11537
11538 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11539 ? Intrinsic::aarch64_st64b
11540 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11541 ? Intrinsic::aarch64_st64bv
11542 : Intrinsic::aarch64_st64bv0);
11543 Function *F = CGM.getIntrinsic(Intr);
11544 return Builder.CreateCall(F, Args);
11545 }
11546 }
11547
11548 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11549 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11550
11551 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11552 ? Intrinsic::aarch64_rndr
11553 : Intrinsic::aarch64_rndrrs);
11554 Function *F = CGM.getIntrinsic(Intr);
11555 llvm::Value *Val = Builder.CreateCall(F);
11556 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11557 Value *Status = Builder.CreateExtractValue(Val, 1);
11558
11559 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11560 Builder.CreateStore(RandomValue, MemAddress);
11561 Status = Builder.CreateZExt(Status, Int32Ty);
11562 return Status;
11563 }
11564
11565 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11566 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11567 const FunctionDecl *FD = E->getDirectCallee();
11568 Value *Ops[2];
11569 for (unsigned i = 0; i < 2; i++)
11570 Ops[i] = EmitScalarExpr(E->getArg(i));
11571 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11572 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11573 StringRef Name = FD->getName();
11574 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11575 }
11576
11577 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11578 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11579 getContext().getTypeSize(E->getType()) == 128) {
11580 Function *F =
11581 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11582 ? Intrinsic::aarch64_ldaxp
11583 : Intrinsic::aarch64_ldxp);
11584
11585 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11586 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11587
11588 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11589 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11590 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11591 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11592 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11593
11594 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11595 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11596 Val = Builder.CreateOr(Val, Val1);
11597 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11598 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11599 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11600 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11601
11602 QualType Ty = E->getType();
11603 llvm::Type *RealResTy = ConvertType(Ty);
11604 llvm::Type *IntTy =
11605 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11606
11607 Function *F =
11608 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11609 ? Intrinsic::aarch64_ldaxr
11610 : Intrinsic::aarch64_ldxr,
11611 UnqualPtrTy);
11612 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11613 Val->addParamAttr(
11614 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11615
11616 if (RealResTy->isPointerTy())
11617 return Builder.CreateIntToPtr(Val, RealResTy);
11618
11619 llvm::Type *IntResTy = llvm::IntegerType::get(
11620 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11621 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11622 RealResTy);
11623 }
11624
11625 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11626 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11627 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11628 Function *F =
11629 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11630 ? Intrinsic::aarch64_stlxp
11631 : Intrinsic::aarch64_stxp);
11632 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11633
11634 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11635 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11636
11637 Tmp = Tmp.withElementType(STy);
11638 llvm::Value *Val = Builder.CreateLoad(Tmp);
11639
11640 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11641 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11642 Value *StPtr = EmitScalarExpr(E->getArg(1));
11643 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11644 }
11645
11646 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11647 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11648 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11649 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11650
11651 QualType Ty = E->getArg(0)->getType();
11652 llvm::Type *StoreTy =
11653 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11654
11655 if (StoreVal->getType()->isPointerTy())
11656 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11657 else {
11658 llvm::Type *IntTy = llvm::IntegerType::get(
11660 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11661 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11662 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11663 }
11664
11665 Function *F =
11666 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11667 ? Intrinsic::aarch64_stlxr
11668 : Intrinsic::aarch64_stxr,
11669 StoreAddr->getType());
11670 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11671 CI->addParamAttr(
11672 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11673 return CI;
11674 }
11675
11676 if (BuiltinID == clang::AArch64::BI__getReg) {
11678 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11679 llvm_unreachable("Sema will ensure that the parameter is constant");
11680
11681 llvm::APSInt Value = Result.Val.getInt();
11682 LLVMContext &Context = CGM.getLLVMContext();
11683 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11684
11685 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11686 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11687 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11688
11689 llvm::Function *F =
11690 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11691 return Builder.CreateCall(F, Metadata);
11692 }
11693
11694 if (BuiltinID == clang::AArch64::BI__break) {
11696 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11697 llvm_unreachable("Sema will ensure that the parameter is constant");
11698
11699 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11700 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11701 }
11702
11703 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11704 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11705 return Builder.CreateCall(F);
11706 }
11707
11708 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11709 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11710 llvm::SyncScope::SingleThread);
11711
11712 // CRC32
11713 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11714 switch (BuiltinID) {
11715 case clang::AArch64::BI__builtin_arm_crc32b:
11716 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11717 case clang::AArch64::BI__builtin_arm_crc32cb:
11718 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11719 case clang::AArch64::BI__builtin_arm_crc32h:
11720 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11721 case clang::AArch64::BI__builtin_arm_crc32ch:
11722 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11723 case clang::AArch64::BI__builtin_arm_crc32w:
11724 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11725 case clang::AArch64::BI__builtin_arm_crc32cw:
11726 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11727 case clang::AArch64::BI__builtin_arm_crc32d:
11728 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11729 case clang::AArch64::BI__builtin_arm_crc32cd:
11730 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11731 }
11732
11733 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11734 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11735 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11736 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11737
11738 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11739 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11740
11741 return Builder.CreateCall(F, {Arg0, Arg1});
11742 }
11743
11744 // Memory Operations (MOPS)
11745 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11746 Value *Dst = EmitScalarExpr(E->getArg(0));
11747 Value *Val = EmitScalarExpr(E->getArg(1));
11748 Value *Size = EmitScalarExpr(E->getArg(2));
11749 Val = Builder.CreateTrunc(Val, Int8Ty);
11750 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11751 return Builder.CreateCall(
11752 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11753 }
11754
11755 // Memory Tagging Extensions (MTE) Intrinsics
11756 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11757 switch (BuiltinID) {
11758 case clang::AArch64::BI__builtin_arm_irg:
11759 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11760 case clang::AArch64::BI__builtin_arm_addg:
11761 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11762 case clang::AArch64::BI__builtin_arm_gmi:
11763 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11764 case clang::AArch64::BI__builtin_arm_ldg:
11765 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11766 case clang::AArch64::BI__builtin_arm_stg:
11767 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11768 case clang::AArch64::BI__builtin_arm_subp:
11769 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11770 }
11771
11772 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11773 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11774 Value *Pointer = EmitScalarExpr(E->getArg(0));
11775 Value *Mask = EmitScalarExpr(E->getArg(1));
11776
11777 Mask = Builder.CreateZExt(Mask, Int64Ty);
11778 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11779 {Pointer, Mask});
11780 }
11781 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11782 Value *Pointer = EmitScalarExpr(E->getArg(0));
11783 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11784
11785 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11786 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11787 {Pointer, TagOffset});
11788 }
11789 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11790 Value *Pointer = EmitScalarExpr(E->getArg(0));
11791 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11792
11793 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11794 return Builder.CreateCall(
11795 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11796 }
11797 // Although it is possible to supply a different return
11798 // address (first arg) to this intrinsic, for now we set
11799 // return address same as input address.
11800 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11801 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11802 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11803 {TagAddress, TagAddress});
11804 }
11805 // Although it is possible to supply a different tag (to set)
11806 // to this intrinsic (as first arg), for now we supply
11807 // the tag that is in input address arg (common use case).
11808 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11809 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11810 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11811 {TagAddress, TagAddress});
11812 }
11813 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11814 Value *PointerA = EmitScalarExpr(E->getArg(0));
11815 Value *PointerB = EmitScalarExpr(E->getArg(1));
11816 return Builder.CreateCall(
11817 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11818 }
11819 }
11820
11821 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11822 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11823 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11824 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11825 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11826 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11827 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11828 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11829
11830 SpecialRegisterAccessKind AccessKind = Write;
11831 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11832 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11833 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11834 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11835 AccessKind = VolatileRead;
11836
11837 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11838 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11839
11840 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11841 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11842
11843 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11844 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11845
11846 llvm::Type *ValueType;
11847 llvm::Type *RegisterType = Int64Ty;
11848 if (Is32Bit) {
11849 ValueType = Int32Ty;
11850 } else if (Is128Bit) {
11851 llvm::Type *Int128Ty =
11852 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11853 ValueType = Int128Ty;
11854 RegisterType = Int128Ty;
11855 } else if (IsPointerBuiltin) {
11856 ValueType = VoidPtrTy;
11857 } else {
11858 ValueType = Int64Ty;
11859 };
11860
11861 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11862 AccessKind);
11863 }
11864
11865 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11866 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11867 LLVMContext &Context = CGM.getLLVMContext();
11868
11869 unsigned SysReg =
11870 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11871
11872 std::string SysRegStr;
11873 llvm::raw_string_ostream(SysRegStr) <<
11874 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11875 ((SysReg >> 11) & 7) << ":" <<
11876 ((SysReg >> 7) & 15) << ":" <<
11877 ((SysReg >> 3) & 15) << ":" <<
11878 ( SysReg & 7);
11879
11880 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11881 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11882 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11883
11884 llvm::Type *RegisterType = Int64Ty;
11885 llvm::Type *Types[] = { RegisterType };
11886
11887 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11888 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11889
11890 return Builder.CreateCall(F, Metadata);
11891 }
11892
11893 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11894 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11895
11896 return Builder.CreateCall(F, { Metadata, ArgValue });
11897 }
11898
11899 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11900 llvm::Function *F =
11901 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11902 return Builder.CreateCall(F);
11903 }
11904
11905 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11906 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11907 return Builder.CreateCall(F);
11908 }
11909
11910 if (BuiltinID == clang::AArch64::BI__mulh ||
11911 BuiltinID == clang::AArch64::BI__umulh) {
11912 llvm::Type *ResType = ConvertType(E->getType());
11913 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11914
11915 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11916 Value *LHS =
11917 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11918 Value *RHS =
11919 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11920
11921 Value *MulResult, *HigherBits;
11922 if (IsSigned) {
11923 MulResult = Builder.CreateNSWMul(LHS, RHS);
11924 HigherBits = Builder.CreateAShr(MulResult, 64);
11925 } else {
11926 MulResult = Builder.CreateNUWMul(LHS, RHS);
11927 HigherBits = Builder.CreateLShr(MulResult, 64);
11928 }
11929 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11930
11931 return HigherBits;
11932 }
11933
11934 if (BuiltinID == AArch64::BI__writex18byte ||
11935 BuiltinID == AArch64::BI__writex18word ||
11936 BuiltinID == AArch64::BI__writex18dword ||
11937 BuiltinID == AArch64::BI__writex18qword) {
11938 // Process the args first
11939 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11940 Value *DataArg = EmitScalarExpr(E->getArg(1));
11941
11942 // Read x18 as i8*
11943 llvm::Value *X18 = readX18AsPtr(*this);
11944
11945 // Store val at x18 + offset
11946 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11947 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11948 StoreInst *Store =
11949 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11950 return Store;
11951 }
11952
11953 if (BuiltinID == AArch64::BI__readx18byte ||
11954 BuiltinID == AArch64::BI__readx18word ||
11955 BuiltinID == AArch64::BI__readx18dword ||
11956 BuiltinID == AArch64::BI__readx18qword) {
11957 // Process the args first
11958 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11959
11960 // Read x18 as i8*
11961 llvm::Value *X18 = readX18AsPtr(*this);
11962
11963 // Load x18 + offset
11964 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11965 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11966 llvm::Type *IntTy = ConvertType(E->getType());
11967 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11968 return Load;
11969 }
11970
11971 if (BuiltinID == AArch64::BI__addx18byte ||
11972 BuiltinID == AArch64::BI__addx18word ||
11973 BuiltinID == AArch64::BI__addx18dword ||
11974 BuiltinID == AArch64::BI__addx18qword ||
11975 BuiltinID == AArch64::BI__incx18byte ||
11976 BuiltinID == AArch64::BI__incx18word ||
11977 BuiltinID == AArch64::BI__incx18dword ||
11978 BuiltinID == AArch64::BI__incx18qword) {
11979 llvm::Type *IntTy;
11980 bool isIncrement;
11981 switch (BuiltinID) {
11982 case AArch64::BI__incx18byte:
11983 IntTy = Int8Ty;
11984 isIncrement = true;
11985 break;
11986 case AArch64::BI__incx18word:
11987 IntTy = Int16Ty;
11988 isIncrement = true;
11989 break;
11990 case AArch64::BI__incx18dword:
11991 IntTy = Int32Ty;
11992 isIncrement = true;
11993 break;
11994 case AArch64::BI__incx18qword:
11995 IntTy = Int64Ty;
11996 isIncrement = true;
11997 break;
11998 default:
11999 IntTy = ConvertType(E->getArg(1)->getType());
12000 isIncrement = false;
12001 break;
12002 }
12003 // Process the args first
12004 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
12005 Value *ValToAdd =
12006 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
12007
12008 // Read x18 as i8*
12009 llvm::Value *X18 = readX18AsPtr(*this);
12010
12011 // Load x18 + offset
12012 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
12013 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
12014 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
12015
12016 // Add values
12017 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
12018
12019 // Store val at x18 + offset
12020 StoreInst *Store =
12021 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
12022 return Store;
12023 }
12024
12025 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
12026 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
12027 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
12028 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
12029 Value *Arg = EmitScalarExpr(E->getArg(0));
12030 llvm::Type *RetTy = ConvertType(E->getType());
12031 return Builder.CreateBitCast(Arg, RetTy);
12032 }
12033
12034 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
12035 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
12036 BuiltinID == AArch64::BI_CountLeadingZeros ||
12037 BuiltinID == AArch64::BI_CountLeadingZeros64) {
12038 Value *Arg = EmitScalarExpr(E->getArg(0));
12039 llvm::Type *ArgType = Arg->getType();
12040
12041 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
12042 BuiltinID == AArch64::BI_CountLeadingOnes64)
12043 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
12044
12045 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
12046 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
12047
12048 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
12049 BuiltinID == AArch64::BI_CountLeadingZeros64)
12050 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12051 return Result;
12052 }
12053
12054 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
12055 BuiltinID == AArch64::BI_CountLeadingSigns64) {
12056 Value *Arg = EmitScalarExpr(E->getArg(0));
12057
12058 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
12059 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
12060 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
12061
12062 Value *Result = Builder.CreateCall(F, Arg, "cls");
12063 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
12064 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12065 return Result;
12066 }
12067
12068 if (BuiltinID == AArch64::BI_CountOneBits ||
12069 BuiltinID == AArch64::BI_CountOneBits64) {
12070 Value *ArgValue = EmitScalarExpr(E->getArg(0));
12071 llvm::Type *ArgType = ArgValue->getType();
12072 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
12073
12074 Value *Result = Builder.CreateCall(F, ArgValue);
12075 if (BuiltinID == AArch64::BI_CountOneBits64)
12076 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12077 return Result;
12078 }
12079
12080 if (BuiltinID == AArch64::BI__prefetch) {
12081 Value *Address = EmitScalarExpr(E->getArg(0));
12082 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
12083 Value *Locality = ConstantInt::get(Int32Ty, 3);
12084 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
12085 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
12086 return Builder.CreateCall(F, {Address, RW, Locality, Data});
12087 }
12088
12089 if (BuiltinID == AArch64::BI__hlt) {
12090 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
12091 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
12092
12093 // Return 0 for convenience, even though MSVC returns some other undefined
12094 // value.
12095 return ConstantInt::get(Builder.getInt32Ty(), 0);
12096 }
12097
12098 // Handle MSVC intrinsics before argument evaluation to prevent double
12099 // evaluation.
12100 if (std::optional<MSVCIntrin> MsvcIntId =
12102 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
12103
12104 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
12105 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
12106 return P.first == BuiltinID;
12107 });
12108 if (It != end(NEONEquivalentIntrinsicMap))
12109 BuiltinID = It->second;
12110
12111 // Find out if any arguments are required to be integer constant
12112 // expressions.
12113 unsigned ICEArguments = 0;
12115 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
12116 assert(Error == ASTContext::GE_None && "Should not codegen an error");
12117
12119 Address PtrOp0 = Address::invalid();
12120 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
12121 if (i == 0) {
12122 switch (BuiltinID) {
12123 case NEON::BI__builtin_neon_vld1_v:
12124 case NEON::BI__builtin_neon_vld1q_v:
12125 case NEON::BI__builtin_neon_vld1_dup_v:
12126 case NEON::BI__builtin_neon_vld1q_dup_v:
12127 case NEON::BI__builtin_neon_vld1_lane_v:
12128 case NEON::BI__builtin_neon_vld1q_lane_v:
12129 case NEON::BI__builtin_neon_vst1_v:
12130 case NEON::BI__builtin_neon_vst1q_v:
12131 case NEON::BI__builtin_neon_vst1_lane_v:
12132 case NEON::BI__builtin_neon_vst1q_lane_v:
12133 case NEON::BI__builtin_neon_vldap1_lane_s64:
12134 case NEON::BI__builtin_neon_vldap1q_lane_s64:
12135 case NEON::BI__builtin_neon_vstl1_lane_s64:
12136 case NEON::BI__builtin_neon_vstl1q_lane_s64:
12137 // Get the alignment for the argument in addition to the value;
12138 // we'll use it later.
12139 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
12140 Ops.push_back(PtrOp0.emitRawPointer(*this));
12141 continue;
12142 }
12143 }
12144 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
12145 }
12146
12147 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12148 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12149 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12150
12151 if (Builtin) {
12152 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12153 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12154 assert(Result && "SISD intrinsic should have been handled");
12155 return Result;
12156 }
12157
12158 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12160 if (std::optional<llvm::APSInt> Result =
12162 // Determine the type of this overloaded NEON intrinsic.
12163 Type = NeonTypeFlags(Result->getZExtValue());
12164
12165 bool usgn = Type.isUnsigned();
12166 bool quad = Type.isQuad();
12167
12168 // Handle non-overloaded intrinsics first.
12169 switch (BuiltinID) {
12170 default: break;
12171 case NEON::BI__builtin_neon_vabsh_f16:
12172 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12173 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12174 case NEON::BI__builtin_neon_vaddq_p128: {
12175 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12176 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12177 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12178 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12179 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12180 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12181 return Builder.CreateBitCast(Ops[0], Int128Ty);
12182 }
12183 case NEON::BI__builtin_neon_vldrq_p128: {
12184 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12185 Value *Ptr = EmitScalarExpr(E->getArg(0));
12186 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12188 }
12189 case NEON::BI__builtin_neon_vstrq_p128: {
12190 Value *Ptr = Ops[0];
12191 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12192 }
12193 case NEON::BI__builtin_neon_vcvts_f32_u32:
12194 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12195 usgn = true;
12196 [[fallthrough]];
12197 case NEON::BI__builtin_neon_vcvts_f32_s32:
12198 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12199 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12200 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12201 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12202 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12203 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12204 if (usgn)
12205 return Builder.CreateUIToFP(Ops[0], FTy);
12206 return Builder.CreateSIToFP(Ops[0], FTy);
12207 }
12208 case NEON::BI__builtin_neon_vcvth_f16_u16:
12209 case NEON::BI__builtin_neon_vcvth_f16_u32:
12210 case NEON::BI__builtin_neon_vcvth_f16_u64:
12211 usgn = true;
12212 [[fallthrough]];
12213 case NEON::BI__builtin_neon_vcvth_f16_s16:
12214 case NEON::BI__builtin_neon_vcvth_f16_s32:
12215 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12216 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12217 llvm::Type *FTy = HalfTy;
12218 llvm::Type *InTy;
12219 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12220 InTy = Int64Ty;
12221 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12222 InTy = Int32Ty;
12223 else
12224 InTy = Int16Ty;
12225 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12226 if (usgn)
12227 return Builder.CreateUIToFP(Ops[0], FTy);
12228 return Builder.CreateSIToFP(Ops[0], FTy);
12229 }
12230 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12231 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12232 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12233 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12234 case NEON::BI__builtin_neon_vcvth_u16_f16:
12235 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12236 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12237 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12238 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12239 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12240 unsigned Int;
12241 llvm::Type* InTy = Int32Ty;
12242 llvm::Type* FTy = HalfTy;
12243 llvm::Type *Tys[2] = {InTy, FTy};
12244 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12245 switch (BuiltinID) {
12246 default: llvm_unreachable("missing builtin ID in switch!");
12247 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12248 Int = Intrinsic::aarch64_neon_fcvtau; break;
12249 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12250 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12251 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12252 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12253 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12254 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12255 case NEON::BI__builtin_neon_vcvth_u16_f16:
12256 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12257 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12258 Int = Intrinsic::aarch64_neon_fcvtas; break;
12259 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12260 Int = Intrinsic::aarch64_neon_fcvtms; break;
12261 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12262 Int = Intrinsic::aarch64_neon_fcvtns; break;
12263 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12264 Int = Intrinsic::aarch64_neon_fcvtps; break;
12265 case NEON::BI__builtin_neon_vcvth_s16_f16:
12266 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12267 }
12268 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12269 return Builder.CreateTrunc(Ops[0], Int16Ty);
12270 }
12271 case NEON::BI__builtin_neon_vcaleh_f16:
12272 case NEON::BI__builtin_neon_vcalth_f16:
12273 case NEON::BI__builtin_neon_vcageh_f16:
12274 case NEON::BI__builtin_neon_vcagth_f16: {
12275 unsigned Int;
12276 llvm::Type* InTy = Int32Ty;
12277 llvm::Type* FTy = HalfTy;
12278 llvm::Type *Tys[2] = {InTy, FTy};
12279 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12280 switch (BuiltinID) {
12281 default: llvm_unreachable("missing builtin ID in switch!");
12282 case NEON::BI__builtin_neon_vcageh_f16:
12283 Int = Intrinsic::aarch64_neon_facge; break;
12284 case NEON::BI__builtin_neon_vcagth_f16:
12285 Int = Intrinsic::aarch64_neon_facgt; break;
12286 case NEON::BI__builtin_neon_vcaleh_f16:
12287 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12288 case NEON::BI__builtin_neon_vcalth_f16:
12289 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12290 }
12291 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12292 return Builder.CreateTrunc(Ops[0], Int16Ty);
12293 }
12294 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12295 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12296 unsigned Int;
12297 llvm::Type* InTy = Int32Ty;
12298 llvm::Type* FTy = HalfTy;
12299 llvm::Type *Tys[2] = {InTy, FTy};
12300 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12301 switch (BuiltinID) {
12302 default: llvm_unreachable("missing builtin ID in switch!");
12303 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12304 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12305 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12306 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12307 }
12308 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12309 return Builder.CreateTrunc(Ops[0], Int16Ty);
12310 }
12311 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12312 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12313 unsigned Int;
12314 llvm::Type* FTy = HalfTy;
12315 llvm::Type* InTy = Int32Ty;
12316 llvm::Type *Tys[2] = {FTy, InTy};
12317 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12318 switch (BuiltinID) {
12319 default: llvm_unreachable("missing builtin ID in switch!");
12320 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12321 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12322 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12323 break;
12324 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12325 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12326 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12327 break;
12328 }
12329 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12330 }
12331 case NEON::BI__builtin_neon_vpaddd_s64: {
12332 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12333 Value *Vec = EmitScalarExpr(E->getArg(0));
12334 // The vector is v2f64, so make sure it's bitcast to that.
12335 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12336 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12337 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12338 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12339 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12340 // Pairwise addition of a v2f64 into a scalar f64.
12341 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12342 }
12343 case NEON::BI__builtin_neon_vpaddd_f64: {
12344 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12345 Value *Vec = EmitScalarExpr(E->getArg(0));
12346 // The vector is v2f64, so make sure it's bitcast to that.
12347 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12348 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12349 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12350 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12351 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12352 // Pairwise addition of a v2f64 into a scalar f64.
12353 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12354 }
12355 case NEON::BI__builtin_neon_vpadds_f32: {
12356 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12357 Value *Vec = EmitScalarExpr(E->getArg(0));
12358 // The vector is v2f32, so make sure it's bitcast to that.
12359 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12360 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12361 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12362 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12363 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12364 // Pairwise addition of a v2f32 into a scalar f32.
12365 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12366 }
12367 case NEON::BI__builtin_neon_vceqzd_s64:
12368 case NEON::BI__builtin_neon_vceqzd_f64:
12369 case NEON::BI__builtin_neon_vceqzs_f32:
12370 case NEON::BI__builtin_neon_vceqzh_f16:
12371 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12373 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12374 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12375 case NEON::BI__builtin_neon_vcgezd_s64:
12376 case NEON::BI__builtin_neon_vcgezd_f64:
12377 case NEON::BI__builtin_neon_vcgezs_f32:
12378 case NEON::BI__builtin_neon_vcgezh_f16:
12379 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12381 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12382 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12383 case NEON::BI__builtin_neon_vclezd_s64:
12384 case NEON::BI__builtin_neon_vclezd_f64:
12385 case NEON::BI__builtin_neon_vclezs_f32:
12386 case NEON::BI__builtin_neon_vclezh_f16:
12387 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12389 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12390 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12391 case NEON::BI__builtin_neon_vcgtzd_s64:
12392 case NEON::BI__builtin_neon_vcgtzd_f64:
12393 case NEON::BI__builtin_neon_vcgtzs_f32:
12394 case NEON::BI__builtin_neon_vcgtzh_f16:
12395 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12397 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12398 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12399 case NEON::BI__builtin_neon_vcltzd_s64:
12400 case NEON::BI__builtin_neon_vcltzd_f64:
12401 case NEON::BI__builtin_neon_vcltzs_f32:
12402 case NEON::BI__builtin_neon_vcltzh_f16:
12403 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12405 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12406 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12407
12408 case NEON::BI__builtin_neon_vceqzd_u64: {
12409 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12410 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12411 Ops[0] =
12412 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12413 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12414 }
12415 case NEON::BI__builtin_neon_vceqd_f64:
12416 case NEON::BI__builtin_neon_vcled_f64:
12417 case NEON::BI__builtin_neon_vcltd_f64:
12418 case NEON::BI__builtin_neon_vcged_f64:
12419 case NEON::BI__builtin_neon_vcgtd_f64: {
12420 llvm::CmpInst::Predicate P;
12421 switch (BuiltinID) {
12422 default: llvm_unreachable("missing builtin ID in switch!");
12423 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12424 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12425 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12426 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12427 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12428 }
12429 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12430 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12431 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12432 if (P == llvm::FCmpInst::FCMP_OEQ)
12433 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12434 else
12435 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12436 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12437 }
12438 case NEON::BI__builtin_neon_vceqs_f32:
12439 case NEON::BI__builtin_neon_vcles_f32:
12440 case NEON::BI__builtin_neon_vclts_f32:
12441 case NEON::BI__builtin_neon_vcges_f32:
12442 case NEON::BI__builtin_neon_vcgts_f32: {
12443 llvm::CmpInst::Predicate P;
12444 switch (BuiltinID) {
12445 default: llvm_unreachable("missing builtin ID in switch!");
12446 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12447 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12448 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12449 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12450 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12451 }
12452 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12453 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12454 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12455 if (P == llvm::FCmpInst::FCMP_OEQ)
12456 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12457 else
12458 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12459 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12460 }
12461 case NEON::BI__builtin_neon_vceqh_f16:
12462 case NEON::BI__builtin_neon_vcleh_f16:
12463 case NEON::BI__builtin_neon_vclth_f16:
12464 case NEON::BI__builtin_neon_vcgeh_f16:
12465 case NEON::BI__builtin_neon_vcgth_f16: {
12466 llvm::CmpInst::Predicate P;
12467 switch (BuiltinID) {
12468 default: llvm_unreachable("missing builtin ID in switch!");
12469 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12470 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12471 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12472 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12473 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12474 }
12475 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12476 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12477 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12478 if (P == llvm::FCmpInst::FCMP_OEQ)
12479 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12480 else
12481 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12482 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12483 }
12484 case NEON::BI__builtin_neon_vceqd_s64:
12485 case NEON::BI__builtin_neon_vceqd_u64:
12486 case NEON::BI__builtin_neon_vcgtd_s64:
12487 case NEON::BI__builtin_neon_vcgtd_u64:
12488 case NEON::BI__builtin_neon_vcltd_s64:
12489 case NEON::BI__builtin_neon_vcltd_u64:
12490 case NEON::BI__builtin_neon_vcged_u64:
12491 case NEON::BI__builtin_neon_vcged_s64:
12492 case NEON::BI__builtin_neon_vcled_u64:
12493 case NEON::BI__builtin_neon_vcled_s64: {
12494 llvm::CmpInst::Predicate P;
12495 switch (BuiltinID) {
12496 default: llvm_unreachable("missing builtin ID in switch!");
12497 case NEON::BI__builtin_neon_vceqd_s64:
12498 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12499 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12500 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12501 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12502 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12503 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12504 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12505 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12506 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12507 }
12508 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12509 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12510 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12511 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12512 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12513 }
12514 case NEON::BI__builtin_neon_vtstd_s64:
12515 case NEON::BI__builtin_neon_vtstd_u64: {
12516 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12517 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12518 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12519 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12520 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12521 llvm::Constant::getNullValue(Int64Ty));
12522 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12523 }
12524 case NEON::BI__builtin_neon_vset_lane_i8:
12525 case NEON::BI__builtin_neon_vset_lane_i16:
12526 case NEON::BI__builtin_neon_vset_lane_i32:
12527 case NEON::BI__builtin_neon_vset_lane_i64:
12528 case NEON::BI__builtin_neon_vset_lane_bf16:
12529 case NEON::BI__builtin_neon_vset_lane_f32:
12530 case NEON::BI__builtin_neon_vsetq_lane_i8:
12531 case NEON::BI__builtin_neon_vsetq_lane_i16:
12532 case NEON::BI__builtin_neon_vsetq_lane_i32:
12533 case NEON::BI__builtin_neon_vsetq_lane_i64:
12534 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12535 case NEON::BI__builtin_neon_vsetq_lane_f32:
12536 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12537 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12538 case NEON::BI__builtin_neon_vset_lane_f64:
12539 // The vector type needs a cast for the v1f64 variant.
12540 Ops[1] =
12541 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12542 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12543 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12544 case NEON::BI__builtin_neon_vsetq_lane_f64:
12545 // The vector type needs a cast for the v2f64 variant.
12546 Ops[1] =
12547 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12548 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12549 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12550
12551 case NEON::BI__builtin_neon_vget_lane_i8:
12552 case NEON::BI__builtin_neon_vdupb_lane_i8:
12553 Ops[0] =
12554 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12555 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12556 "vget_lane");
12557 case NEON::BI__builtin_neon_vgetq_lane_i8:
12558 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12559 Ops[0] =
12560 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12561 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12562 "vgetq_lane");
12563 case NEON::BI__builtin_neon_vget_lane_i16:
12564 case NEON::BI__builtin_neon_vduph_lane_i16:
12565 Ops[0] =
12566 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12567 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12568 "vget_lane");
12569 case NEON::BI__builtin_neon_vgetq_lane_i16:
12570 case NEON::BI__builtin_neon_vduph_laneq_i16:
12571 Ops[0] =
12572 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12573 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12574 "vgetq_lane");
12575 case NEON::BI__builtin_neon_vget_lane_i32:
12576 case NEON::BI__builtin_neon_vdups_lane_i32:
12577 Ops[0] =
12578 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12579 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12580 "vget_lane");
12581 case NEON::BI__builtin_neon_vdups_lane_f32:
12582 Ops[0] =
12583 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12584 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12585 "vdups_lane");
12586 case NEON::BI__builtin_neon_vgetq_lane_i32:
12587 case NEON::BI__builtin_neon_vdups_laneq_i32:
12588 Ops[0] =
12589 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12590 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12591 "vgetq_lane");
12592 case NEON::BI__builtin_neon_vget_lane_i64:
12593 case NEON::BI__builtin_neon_vdupd_lane_i64:
12594 Ops[0] =
12595 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12596 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12597 "vget_lane");
12598 case NEON::BI__builtin_neon_vdupd_lane_f64:
12599 Ops[0] =
12600 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12601 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12602 "vdupd_lane");
12603 case NEON::BI__builtin_neon_vgetq_lane_i64:
12604 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12605 Ops[0] =
12606 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12607 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12608 "vgetq_lane");
12609 case NEON::BI__builtin_neon_vget_lane_f32:
12610 Ops[0] =
12611 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12612 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12613 "vget_lane");
12614 case NEON::BI__builtin_neon_vget_lane_f64:
12615 Ops[0] =
12616 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12617 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12618 "vget_lane");
12619 case NEON::BI__builtin_neon_vgetq_lane_f32:
12620 case NEON::BI__builtin_neon_vdups_laneq_f32:
12621 Ops[0] =
12622 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12623 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12624 "vgetq_lane");
12625 case NEON::BI__builtin_neon_vgetq_lane_f64:
12626 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12627 Ops[0] =
12628 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12629 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12630 "vgetq_lane");
12631 case NEON::BI__builtin_neon_vaddh_f16:
12632 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12633 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12634 case NEON::BI__builtin_neon_vsubh_f16:
12635 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12636 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12637 case NEON::BI__builtin_neon_vmulh_f16:
12638 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12639 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12640 case NEON::BI__builtin_neon_vdivh_f16:
12641 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12642 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12643 case NEON::BI__builtin_neon_vfmah_f16:
12644 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12646 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12647 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12648 case NEON::BI__builtin_neon_vfmsh_f16: {
12649 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12650
12651 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12653 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12654 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12655 }
12656 case NEON::BI__builtin_neon_vaddd_s64:
12657 case NEON::BI__builtin_neon_vaddd_u64:
12658 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12659 case NEON::BI__builtin_neon_vsubd_s64:
12660 case NEON::BI__builtin_neon_vsubd_u64:
12661 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12662 case NEON::BI__builtin_neon_vqdmlalh_s16:
12663 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12664 SmallVector<Value *, 2> ProductOps;
12665 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12666 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12667 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12668 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12669 ProductOps, "vqdmlXl");
12670 Constant *CI = ConstantInt::get(SizeTy, 0);
12671 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12672
12673 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12674 ? Intrinsic::aarch64_neon_sqadd
12675 : Intrinsic::aarch64_neon_sqsub;
12676 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12677 }
12678 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12679 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12680 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12681 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12682 Ops, "vqshlu_n");
12683 }
12684 case NEON::BI__builtin_neon_vqshld_n_u64:
12685 case NEON::BI__builtin_neon_vqshld_n_s64: {
12686 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12687 ? Intrinsic::aarch64_neon_uqshl
12688 : Intrinsic::aarch64_neon_sqshl;
12689 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12690 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12691 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12692 }
12693 case NEON::BI__builtin_neon_vrshrd_n_u64:
12694 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12695 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12696 ? Intrinsic::aarch64_neon_urshl
12697 : Intrinsic::aarch64_neon_srshl;
12698 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12699 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12700 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12701 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12702 }
12703 case NEON::BI__builtin_neon_vrsrad_n_u64:
12704 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12705 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12706 ? Intrinsic::aarch64_neon_urshl
12707 : Intrinsic::aarch64_neon_srshl;
12708 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12709 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12710 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12711 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12712 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12713 }
12714 case NEON::BI__builtin_neon_vshld_n_s64:
12715 case NEON::BI__builtin_neon_vshld_n_u64: {
12716 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12717 return Builder.CreateShl(
12718 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12719 }
12720 case NEON::BI__builtin_neon_vshrd_n_s64: {
12721 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12722 return Builder.CreateAShr(
12723 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12724 Amt->getZExtValue())),
12725 "shrd_n");
12726 }
12727 case NEON::BI__builtin_neon_vshrd_n_u64: {
12728 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12729 uint64_t ShiftAmt = Amt->getZExtValue();
12730 // Right-shifting an unsigned value by its size yields 0.
12731 if (ShiftAmt == 64)
12732 return ConstantInt::get(Int64Ty, 0);
12733 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12734 "shrd_n");
12735 }
12736 case NEON::BI__builtin_neon_vsrad_n_s64: {
12737 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12738 Ops[1] = Builder.CreateAShr(
12739 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12740 Amt->getZExtValue())),
12741 "shrd_n");
12742 return Builder.CreateAdd(Ops[0], Ops[1]);
12743 }
12744 case NEON::BI__builtin_neon_vsrad_n_u64: {
12745 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12746 uint64_t ShiftAmt = Amt->getZExtValue();
12747 // Right-shifting an unsigned value by its size yields 0.
12748 // As Op + 0 = Op, return Ops[0] directly.
12749 if (ShiftAmt == 64)
12750 return Ops[0];
12751 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12752 "shrd_n");
12753 return Builder.CreateAdd(Ops[0], Ops[1]);
12754 }
12755 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12756 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12757 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12758 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12759 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12760 "lane");
12761 SmallVector<Value *, 2> ProductOps;
12762 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12763 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12764 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12765 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12766 ProductOps, "vqdmlXl");
12767 Constant *CI = ConstantInt::get(SizeTy, 0);
12768 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12769 Ops.pop_back();
12770
12771 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12772 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12773 ? Intrinsic::aarch64_neon_sqadd
12774 : Intrinsic::aarch64_neon_sqsub;
12775 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12776 }
12777 case NEON::BI__builtin_neon_vqdmlals_s32:
12778 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12779 SmallVector<Value *, 2> ProductOps;
12780 ProductOps.push_back(Ops[1]);
12781 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12782 Ops[1] =
12783 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12784 ProductOps, "vqdmlXl");
12785
12786 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12787 ? Intrinsic::aarch64_neon_sqadd
12788 : Intrinsic::aarch64_neon_sqsub;
12789 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12790 }
12791 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12792 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12793 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12794 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12795 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12796 "lane");
12797 SmallVector<Value *, 2> ProductOps;
12798 ProductOps.push_back(Ops[1]);
12799 ProductOps.push_back(Ops[2]);
12800 Ops[1] =
12801 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12802 ProductOps, "vqdmlXl");
12803 Ops.pop_back();
12804
12805 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12806 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12807 ? Intrinsic::aarch64_neon_sqadd
12808 : Intrinsic::aarch64_neon_sqsub;
12809 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12810 }
12811 case NEON::BI__builtin_neon_vget_lane_bf16:
12812 case NEON::BI__builtin_neon_vduph_lane_bf16:
12813 case NEON::BI__builtin_neon_vduph_lane_f16: {
12814 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12815 "vget_lane");
12816 }
12817 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12818 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12819 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12820 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12821 "vgetq_lane");
12822 }
12823
12824 case clang::AArch64::BI_InterlockedAdd:
12825 case clang::AArch64::BI_InterlockedAdd64: {
12826 Address DestAddr = CheckAtomicAlignment(*this, E);
12827 Value *Val = EmitScalarExpr(E->getArg(1));
12828 AtomicRMWInst *RMWI =
12829 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12830 llvm::AtomicOrdering::SequentiallyConsistent);
12831 return Builder.CreateAdd(RMWI, Val);
12832 }
12833 }
12834
12835 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12836 llvm::Type *Ty = VTy;
12837 if (!Ty)
12838 return nullptr;
12839
12840 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12841 // defer to common code if it's been added to our special map.
12844
12845 if (Builtin)
12847 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12848 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12849 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12850
12851 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12852 return V;
12853
12854 unsigned Int;
12855 switch (BuiltinID) {
12856 default: return nullptr;
12857 case NEON::BI__builtin_neon_vbsl_v:
12858 case NEON::BI__builtin_neon_vbslq_v: {
12859 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12860 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12861 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12862 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12863
12864 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12865 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12866 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12867 return Builder.CreateBitCast(Ops[0], Ty);
12868 }
12869 case NEON::BI__builtin_neon_vfma_lane_v:
12870 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12871 // The ARM builtins (and instructions) have the addend as the first
12872 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12873 Value *Addend = Ops[0];
12874 Value *Multiplicand = Ops[1];
12875 Value *LaneSource = Ops[2];
12876 Ops[0] = Multiplicand;
12877 Ops[1] = LaneSource;
12878 Ops[2] = Addend;
12879
12880 // Now adjust things to handle the lane access.
12881 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12882 ? llvm::FixedVectorType::get(VTy->getElementType(),
12883 VTy->getNumElements() / 2)
12884 : VTy;
12885 llvm::Constant *cst = cast<Constant>(Ops[3]);
12886 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12887 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12888 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12889
12890 Ops.pop_back();
12891 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12892 : Intrinsic::fma;
12893 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12894 }
12895 case NEON::BI__builtin_neon_vfma_laneq_v: {
12896 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12897 // v1f64 fma should be mapped to Neon scalar f64 fma
12898 if (VTy && VTy->getElementType() == DoubleTy) {
12899 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12900 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12901 llvm::FixedVectorType *VTy =
12903 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12904 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12905 Value *Result;
12907 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12908 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12909 return Builder.CreateBitCast(Result, Ty);
12910 }
12911 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12912 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12913
12914 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12915 VTy->getNumElements() * 2);
12916 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12917 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12918 cast<ConstantInt>(Ops[3]));
12919 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12920
12922 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12923 {Ops[2], Ops[1], Ops[0]});
12924 }
12925 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12926 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12927 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12928
12929 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12930 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12932 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12933 {Ops[2], Ops[1], Ops[0]});
12934 }
12935 case NEON::BI__builtin_neon_vfmah_lane_f16:
12936 case NEON::BI__builtin_neon_vfmas_lane_f32:
12937 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12938 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12939 case NEON::BI__builtin_neon_vfmad_lane_f64:
12940 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12941 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12942 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12943 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12945 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12946 {Ops[1], Ops[2], Ops[0]});
12947 }
12948 case NEON::BI__builtin_neon_vmull_v:
12949 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12950 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12951 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12952 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12953 case NEON::BI__builtin_neon_vmax_v:
12954 case NEON::BI__builtin_neon_vmaxq_v:
12955 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12956 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12957 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12958 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12959 case NEON::BI__builtin_neon_vmaxh_f16: {
12960 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12961 Int = Intrinsic::aarch64_neon_fmax;
12962 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12963 }
12964 case NEON::BI__builtin_neon_vmin_v:
12965 case NEON::BI__builtin_neon_vminq_v:
12966 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12967 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12968 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12969 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12970 case NEON::BI__builtin_neon_vminh_f16: {
12971 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12972 Int = Intrinsic::aarch64_neon_fmin;
12973 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12974 }
12975 case NEON::BI__builtin_neon_vabd_v:
12976 case NEON::BI__builtin_neon_vabdq_v:
12977 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12978 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12979 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12980 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12981 case NEON::BI__builtin_neon_vpadal_v:
12982 case NEON::BI__builtin_neon_vpadalq_v: {
12983 unsigned ArgElts = VTy->getNumElements();
12984 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12985 unsigned BitWidth = EltTy->getBitWidth();
12986 auto *ArgTy = llvm::FixedVectorType::get(
12987 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12988 llvm::Type* Tys[2] = { VTy, ArgTy };
12989 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12991 TmpOps.push_back(Ops[1]);
12992 Function *F = CGM.getIntrinsic(Int, Tys);
12993 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12994 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12995 return Builder.CreateAdd(tmp, addend);
12996 }
12997 case NEON::BI__builtin_neon_vpmin_v:
12998 case NEON::BI__builtin_neon_vpminq_v:
12999 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
13000 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
13001 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
13002 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
13003 case NEON::BI__builtin_neon_vpmax_v:
13004 case NEON::BI__builtin_neon_vpmaxq_v:
13005 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
13006 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
13007 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
13008 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
13009 case NEON::BI__builtin_neon_vminnm_v:
13010 case NEON::BI__builtin_neon_vminnmq_v:
13011 Int = Intrinsic::aarch64_neon_fminnm;
13012 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
13013 case NEON::BI__builtin_neon_vminnmh_f16:
13014 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13015 Int = Intrinsic::aarch64_neon_fminnm;
13016 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
13017 case NEON::BI__builtin_neon_vmaxnm_v:
13018 case NEON::BI__builtin_neon_vmaxnmq_v:
13019 Int = Intrinsic::aarch64_neon_fmaxnm;
13020 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
13021 case NEON::BI__builtin_neon_vmaxnmh_f16:
13022 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13023 Int = Intrinsic::aarch64_neon_fmaxnm;
13024 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
13025 case NEON::BI__builtin_neon_vrecpss_f32: {
13026 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13027 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
13028 Ops, "vrecps");
13029 }
13030 case NEON::BI__builtin_neon_vrecpsd_f64:
13031 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13032 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
13033 Ops, "vrecps");
13034 case NEON::BI__builtin_neon_vrecpsh_f16:
13035 Ops.push_back(EmitScalarExpr(E->getArg(1)));
13036 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
13037 Ops, "vrecps");
13038 case NEON::BI__builtin_neon_vqshrun_n_v:
13039 Int = Intrinsic::aarch64_neon_sqshrun;
13040 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
13041 case NEON::BI__builtin_neon_vqrshrun_n_v:
13042 Int = Intrinsic::aarch64_neon_sqrshrun;
13043 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
13044 case NEON::BI__builtin_neon_vqshrn_n_v:
13045 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
13046 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
13047 case NEON::BI__builtin_neon_vrshrn_n_v:
13048 Int = Intrinsic::aarch64_neon_rshrn;
13049 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
13050 case NEON::BI__builtin_neon_vqrshrn_n_v:
13051 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
13052 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
13053 case NEON::BI__builtin_neon_vrndah_f16: {
13054 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13055 Int = Builder.getIsFPConstrained()
13056 ? Intrinsic::experimental_constrained_round
13057 : Intrinsic::round;
13058 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
13059 }
13060 case NEON::BI__builtin_neon_vrnda_v:
13061 case NEON::BI__builtin_neon_vrndaq_v: {
13062 Int = Builder.getIsFPConstrained()
13063 ? Intrinsic::experimental_constrained_round
13064 : Intrinsic::round;
13065 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
13066 }
13067 case NEON::BI__builtin_neon_vrndih_f16: {
13068 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13069 Int = Builder.getIsFPConstrained()
13070 ? Intrinsic::experimental_constrained_nearbyint
13071 : Intrinsic::nearbyint;
13072 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
13073 }
13074 case NEON::BI__builtin_neon_vrndmh_f16: {
13075 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13076 Int = Builder.getIsFPConstrained()
13077 ? Intrinsic::experimental_constrained_floor
13078 : Intrinsic::floor;
13079 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
13080 }
13081 case NEON::BI__builtin_neon_vrndm_v:
13082 case NEON::BI__builtin_neon_vrndmq_v: {
13083 Int = Builder.getIsFPConstrained()
13084 ? Intrinsic::experimental_constrained_floor
13085 : Intrinsic::floor;
13086 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
13087 }
13088 case NEON::BI__builtin_neon_vrndnh_f16: {
13089 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13090 Int = Builder.getIsFPConstrained()
13091 ? Intrinsic::experimental_constrained_roundeven
13092 : Intrinsic::roundeven;
13093 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
13094 }
13095 case NEON::BI__builtin_neon_vrndn_v:
13096 case NEON::BI__builtin_neon_vrndnq_v: {
13097 Int = Builder.getIsFPConstrained()
13098 ? Intrinsic::experimental_constrained_roundeven
13099 : Intrinsic::roundeven;
13100 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
13101 }
13102 case NEON::BI__builtin_neon_vrndns_f32: {
13103 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13104 Int = Builder.getIsFPConstrained()
13105 ? Intrinsic::experimental_constrained_roundeven
13106 : Intrinsic::roundeven;
13107 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
13108 }
13109 case NEON::BI__builtin_neon_vrndph_f16: {
13110 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13111 Int = Builder.getIsFPConstrained()
13112 ? Intrinsic::experimental_constrained_ceil
13113 : Intrinsic::ceil;
13114 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
13115 }
13116 case NEON::BI__builtin_neon_vrndp_v:
13117 case NEON::BI__builtin_neon_vrndpq_v: {
13118 Int = Builder.getIsFPConstrained()
13119 ? Intrinsic::experimental_constrained_ceil
13120 : Intrinsic::ceil;
13121 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13122 }
13123 case NEON::BI__builtin_neon_vrndxh_f16: {
13124 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13125 Int = Builder.getIsFPConstrained()
13126 ? Intrinsic::experimental_constrained_rint
13127 : Intrinsic::rint;
13128 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13129 }
13130 case NEON::BI__builtin_neon_vrndx_v:
13131 case NEON::BI__builtin_neon_vrndxq_v: {
13132 Int = Builder.getIsFPConstrained()
13133 ? Intrinsic::experimental_constrained_rint
13134 : Intrinsic::rint;
13135 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13136 }
13137 case NEON::BI__builtin_neon_vrndh_f16: {
13138 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13139 Int = Builder.getIsFPConstrained()
13140 ? Intrinsic::experimental_constrained_trunc
13141 : Intrinsic::trunc;
13142 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13143 }
13144 case NEON::BI__builtin_neon_vrnd32x_f32:
13145 case NEON::BI__builtin_neon_vrnd32xq_f32:
13146 case NEON::BI__builtin_neon_vrnd32x_f64:
13147 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13148 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13149 Int = Intrinsic::aarch64_neon_frint32x;
13150 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13151 }
13152 case NEON::BI__builtin_neon_vrnd32z_f32:
13153 case NEON::BI__builtin_neon_vrnd32zq_f32:
13154 case NEON::BI__builtin_neon_vrnd32z_f64:
13155 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13156 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13157 Int = Intrinsic::aarch64_neon_frint32z;
13158 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13159 }
13160 case NEON::BI__builtin_neon_vrnd64x_f32:
13161 case NEON::BI__builtin_neon_vrnd64xq_f32:
13162 case NEON::BI__builtin_neon_vrnd64x_f64:
13163 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13164 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13165 Int = Intrinsic::aarch64_neon_frint64x;
13166 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13167 }
13168 case NEON::BI__builtin_neon_vrnd64z_f32:
13169 case NEON::BI__builtin_neon_vrnd64zq_f32:
13170 case NEON::BI__builtin_neon_vrnd64z_f64:
13171 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13172 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13173 Int = Intrinsic::aarch64_neon_frint64z;
13174 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13175 }
13176 case NEON::BI__builtin_neon_vrnd_v:
13177 case NEON::BI__builtin_neon_vrndq_v: {
13178 Int = Builder.getIsFPConstrained()
13179 ? Intrinsic::experimental_constrained_trunc
13180 : Intrinsic::trunc;
13181 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13182 }
13183 case NEON::BI__builtin_neon_vcvt_f64_v:
13184 case NEON::BI__builtin_neon_vcvtq_f64_v:
13185 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13186 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13187 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13188 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13189 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13190 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13191 "unexpected vcvt_f64_f32 builtin");
13192 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13193 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13194
13195 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13196 }
13197 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13198 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13199 "unexpected vcvt_f32_f64 builtin");
13200 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13201 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13202
13203 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13204 }
13205 case NEON::BI__builtin_neon_vcvt_s32_v:
13206 case NEON::BI__builtin_neon_vcvt_u32_v:
13207 case NEON::BI__builtin_neon_vcvt_s64_v:
13208 case NEON::BI__builtin_neon_vcvt_u64_v:
13209 case NEON::BI__builtin_neon_vcvt_s16_f16:
13210 case NEON::BI__builtin_neon_vcvt_u16_f16:
13211 case NEON::BI__builtin_neon_vcvtq_s32_v:
13212 case NEON::BI__builtin_neon_vcvtq_u32_v:
13213 case NEON::BI__builtin_neon_vcvtq_s64_v:
13214 case NEON::BI__builtin_neon_vcvtq_u64_v:
13215 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13216 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13217 Int =
13218 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13219 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13220 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13221 }
13222 case NEON::BI__builtin_neon_vcvta_s16_f16:
13223 case NEON::BI__builtin_neon_vcvta_u16_f16:
13224 case NEON::BI__builtin_neon_vcvta_s32_v:
13225 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13226 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13227 case NEON::BI__builtin_neon_vcvta_u32_v:
13228 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13229 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13230 case NEON::BI__builtin_neon_vcvta_s64_v:
13231 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13232 case NEON::BI__builtin_neon_vcvta_u64_v:
13233 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13234 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13235 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13236 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13237 }
13238 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13239 case NEON::BI__builtin_neon_vcvtm_s32_v:
13240 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13241 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13242 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13243 case NEON::BI__builtin_neon_vcvtm_u32_v:
13244 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13245 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13246 case NEON::BI__builtin_neon_vcvtm_s64_v:
13247 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13248 case NEON::BI__builtin_neon_vcvtm_u64_v:
13249 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13250 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13251 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13252 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13253 }
13254 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13255 case NEON::BI__builtin_neon_vcvtn_s32_v:
13256 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13257 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13258 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13259 case NEON::BI__builtin_neon_vcvtn_u32_v:
13260 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13261 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13262 case NEON::BI__builtin_neon_vcvtn_s64_v:
13263 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13264 case NEON::BI__builtin_neon_vcvtn_u64_v:
13265 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13266 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13267 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13268 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13269 }
13270 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13271 case NEON::BI__builtin_neon_vcvtp_s32_v:
13272 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13273 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13274 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13275 case NEON::BI__builtin_neon_vcvtp_u32_v:
13276 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13277 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13278 case NEON::BI__builtin_neon_vcvtp_s64_v:
13279 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13280 case NEON::BI__builtin_neon_vcvtp_u64_v:
13281 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13282 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13283 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13284 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13285 }
13286 case NEON::BI__builtin_neon_vmulx_v:
13287 case NEON::BI__builtin_neon_vmulxq_v: {
13288 Int = Intrinsic::aarch64_neon_fmulx;
13289 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13290 }
13291 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13292 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13293 // vmulx_lane should be mapped to Neon scalar mulx after
13294 // extracting the scalar element
13295 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13296 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13297 Ops.pop_back();
13298 Int = Intrinsic::aarch64_neon_fmulx;
13299 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13300 }
13301 case NEON::BI__builtin_neon_vmul_lane_v:
13302 case NEON::BI__builtin_neon_vmul_laneq_v: {
13303 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13304 bool Quad = false;
13305 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13306 Quad = true;
13307 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13308 llvm::FixedVectorType *VTy =
13310 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13311 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13312 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13313 return Builder.CreateBitCast(Result, Ty);
13314 }
13315 case NEON::BI__builtin_neon_vnegd_s64:
13316 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13317 case NEON::BI__builtin_neon_vnegh_f16:
13318 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13319 case NEON::BI__builtin_neon_vpmaxnm_v:
13320 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13321 Int = Intrinsic::aarch64_neon_fmaxnmp;
13322 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13323 }
13324 case NEON::BI__builtin_neon_vpminnm_v:
13325 case NEON::BI__builtin_neon_vpminnmq_v: {
13326 Int = Intrinsic::aarch64_neon_fminnmp;
13327 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13328 }
13329 case NEON::BI__builtin_neon_vsqrth_f16: {
13330 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13331 Int = Builder.getIsFPConstrained()
13332 ? Intrinsic::experimental_constrained_sqrt
13333 : Intrinsic::sqrt;
13334 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13335 }
13336 case NEON::BI__builtin_neon_vsqrt_v:
13337 case NEON::BI__builtin_neon_vsqrtq_v: {
13338 Int = Builder.getIsFPConstrained()
13339 ? Intrinsic::experimental_constrained_sqrt
13340 : Intrinsic::sqrt;
13341 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13342 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13343 }
13344 case NEON::BI__builtin_neon_vrbit_v:
13345 case NEON::BI__builtin_neon_vrbitq_v: {
13346 Int = Intrinsic::bitreverse;
13347 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13348 }
13349 case NEON::BI__builtin_neon_vaddv_u8:
13350 // FIXME: These are handled by the AArch64 scalar code.
13351 usgn = true;
13352 [[fallthrough]];
13353 case NEON::BI__builtin_neon_vaddv_s8: {
13354 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13355 Ty = Int32Ty;
13356 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13357 llvm::Type *Tys[2] = { Ty, VTy };
13358 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13359 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13360 return Builder.CreateTrunc(Ops[0], Int8Ty);
13361 }
13362 case NEON::BI__builtin_neon_vaddv_u16:
13363 usgn = true;
13364 [[fallthrough]];
13365 case NEON::BI__builtin_neon_vaddv_s16: {
13366 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13367 Ty = Int32Ty;
13368 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13369 llvm::Type *Tys[2] = { Ty, VTy };
13370 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13371 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13372 return Builder.CreateTrunc(Ops[0], Int16Ty);
13373 }
13374 case NEON::BI__builtin_neon_vaddvq_u8:
13375 usgn = true;
13376 [[fallthrough]];
13377 case NEON::BI__builtin_neon_vaddvq_s8: {
13378 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13379 Ty = Int32Ty;
13380 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13381 llvm::Type *Tys[2] = { Ty, VTy };
13382 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13383 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13384 return Builder.CreateTrunc(Ops[0], Int8Ty);
13385 }
13386 case NEON::BI__builtin_neon_vaddvq_u16:
13387 usgn = true;
13388 [[fallthrough]];
13389 case NEON::BI__builtin_neon_vaddvq_s16: {
13390 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13391 Ty = Int32Ty;
13392 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13393 llvm::Type *Tys[2] = { Ty, VTy };
13394 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13395 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13396 return Builder.CreateTrunc(Ops[0], Int16Ty);
13397 }
13398 case NEON::BI__builtin_neon_vmaxv_u8: {
13399 Int = Intrinsic::aarch64_neon_umaxv;
13400 Ty = Int32Ty;
13401 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13402 llvm::Type *Tys[2] = { Ty, VTy };
13403 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13404 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13405 return Builder.CreateTrunc(Ops[0], Int8Ty);
13406 }
13407 case NEON::BI__builtin_neon_vmaxv_u16: {
13408 Int = Intrinsic::aarch64_neon_umaxv;
13409 Ty = Int32Ty;
13410 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13411 llvm::Type *Tys[2] = { Ty, VTy };
13412 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13413 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13414 return Builder.CreateTrunc(Ops[0], Int16Ty);
13415 }
13416 case NEON::BI__builtin_neon_vmaxvq_u8: {
13417 Int = Intrinsic::aarch64_neon_umaxv;
13418 Ty = Int32Ty;
13419 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13420 llvm::Type *Tys[2] = { Ty, VTy };
13421 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13422 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13423 return Builder.CreateTrunc(Ops[0], Int8Ty);
13424 }
13425 case NEON::BI__builtin_neon_vmaxvq_u16: {
13426 Int = Intrinsic::aarch64_neon_umaxv;
13427 Ty = Int32Ty;
13428 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13429 llvm::Type *Tys[2] = { Ty, VTy };
13430 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13431 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13432 return Builder.CreateTrunc(Ops[0], Int16Ty);
13433 }
13434 case NEON::BI__builtin_neon_vmaxv_s8: {
13435 Int = Intrinsic::aarch64_neon_smaxv;
13436 Ty = Int32Ty;
13437 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13438 llvm::Type *Tys[2] = { Ty, VTy };
13439 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13440 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13441 return Builder.CreateTrunc(Ops[0], Int8Ty);
13442 }
13443 case NEON::BI__builtin_neon_vmaxv_s16: {
13444 Int = Intrinsic::aarch64_neon_smaxv;
13445 Ty = Int32Ty;
13446 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13447 llvm::Type *Tys[2] = { Ty, VTy };
13448 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13449 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13450 return Builder.CreateTrunc(Ops[0], Int16Ty);
13451 }
13452 case NEON::BI__builtin_neon_vmaxvq_s8: {
13453 Int = Intrinsic::aarch64_neon_smaxv;
13454 Ty = Int32Ty;
13455 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13456 llvm::Type *Tys[2] = { Ty, VTy };
13457 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13458 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13459 return Builder.CreateTrunc(Ops[0], Int8Ty);
13460 }
13461 case NEON::BI__builtin_neon_vmaxvq_s16: {
13462 Int = Intrinsic::aarch64_neon_smaxv;
13463 Ty = Int32Ty;
13464 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13465 llvm::Type *Tys[2] = { Ty, VTy };
13466 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13467 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13468 return Builder.CreateTrunc(Ops[0], Int16Ty);
13469 }
13470 case NEON::BI__builtin_neon_vmaxv_f16: {
13471 Int = Intrinsic::aarch64_neon_fmaxv;
13472 Ty = HalfTy;
13473 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13474 llvm::Type *Tys[2] = { Ty, VTy };
13475 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13476 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13477 return Builder.CreateTrunc(Ops[0], HalfTy);
13478 }
13479 case NEON::BI__builtin_neon_vmaxvq_f16: {
13480 Int = Intrinsic::aarch64_neon_fmaxv;
13481 Ty = HalfTy;
13482 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13483 llvm::Type *Tys[2] = { Ty, VTy };
13484 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13485 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13486 return Builder.CreateTrunc(Ops[0], HalfTy);
13487 }
13488 case NEON::BI__builtin_neon_vminv_u8: {
13489 Int = Intrinsic::aarch64_neon_uminv;
13490 Ty = Int32Ty;
13491 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13492 llvm::Type *Tys[2] = { Ty, VTy };
13493 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13494 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13495 return Builder.CreateTrunc(Ops[0], Int8Ty);
13496 }
13497 case NEON::BI__builtin_neon_vminv_u16: {
13498 Int = Intrinsic::aarch64_neon_uminv;
13499 Ty = Int32Ty;
13500 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13501 llvm::Type *Tys[2] = { Ty, VTy };
13502 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13503 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13504 return Builder.CreateTrunc(Ops[0], Int16Ty);
13505 }
13506 case NEON::BI__builtin_neon_vminvq_u8: {
13507 Int = Intrinsic::aarch64_neon_uminv;
13508 Ty = Int32Ty;
13509 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13510 llvm::Type *Tys[2] = { Ty, VTy };
13511 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13512 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13513 return Builder.CreateTrunc(Ops[0], Int8Ty);
13514 }
13515 case NEON::BI__builtin_neon_vminvq_u16: {
13516 Int = Intrinsic::aarch64_neon_uminv;
13517 Ty = Int32Ty;
13518 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13519 llvm::Type *Tys[2] = { Ty, VTy };
13520 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13521 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13522 return Builder.CreateTrunc(Ops[0], Int16Ty);
13523 }
13524 case NEON::BI__builtin_neon_vminv_s8: {
13525 Int = Intrinsic::aarch64_neon_sminv;
13526 Ty = Int32Ty;
13527 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13528 llvm::Type *Tys[2] = { Ty, VTy };
13529 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13530 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13531 return Builder.CreateTrunc(Ops[0], Int8Ty);
13532 }
13533 case NEON::BI__builtin_neon_vminv_s16: {
13534 Int = Intrinsic::aarch64_neon_sminv;
13535 Ty = Int32Ty;
13536 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13537 llvm::Type *Tys[2] = { Ty, VTy };
13538 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13539 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13540 return Builder.CreateTrunc(Ops[0], Int16Ty);
13541 }
13542 case NEON::BI__builtin_neon_vminvq_s8: {
13543 Int = Intrinsic::aarch64_neon_sminv;
13544 Ty = Int32Ty;
13545 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13546 llvm::Type *Tys[2] = { Ty, VTy };
13547 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13548 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13549 return Builder.CreateTrunc(Ops[0], Int8Ty);
13550 }
13551 case NEON::BI__builtin_neon_vminvq_s16: {
13552 Int = Intrinsic::aarch64_neon_sminv;
13553 Ty = Int32Ty;
13554 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13555 llvm::Type *Tys[2] = { Ty, VTy };
13556 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13557 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13558 return Builder.CreateTrunc(Ops[0], Int16Ty);
13559 }
13560 case NEON::BI__builtin_neon_vminv_f16: {
13561 Int = Intrinsic::aarch64_neon_fminv;
13562 Ty = HalfTy;
13563 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13564 llvm::Type *Tys[2] = { Ty, VTy };
13565 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13566 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13567 return Builder.CreateTrunc(Ops[0], HalfTy);
13568 }
13569 case NEON::BI__builtin_neon_vminvq_f16: {
13570 Int = Intrinsic::aarch64_neon_fminv;
13571 Ty = HalfTy;
13572 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13573 llvm::Type *Tys[2] = { Ty, VTy };
13574 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13575 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13576 return Builder.CreateTrunc(Ops[0], HalfTy);
13577 }
13578 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13579 Int = Intrinsic::aarch64_neon_fmaxnmv;
13580 Ty = HalfTy;
13581 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13582 llvm::Type *Tys[2] = { Ty, VTy };
13583 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13584 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13585 return Builder.CreateTrunc(Ops[0], HalfTy);
13586 }
13587 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13588 Int = Intrinsic::aarch64_neon_fmaxnmv;
13589 Ty = HalfTy;
13590 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13591 llvm::Type *Tys[2] = { Ty, VTy };
13592 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13593 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13594 return Builder.CreateTrunc(Ops[0], HalfTy);
13595 }
13596 case NEON::BI__builtin_neon_vminnmv_f16: {
13597 Int = Intrinsic::aarch64_neon_fminnmv;
13598 Ty = HalfTy;
13599 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13600 llvm::Type *Tys[2] = { Ty, VTy };
13601 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13602 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13603 return Builder.CreateTrunc(Ops[0], HalfTy);
13604 }
13605 case NEON::BI__builtin_neon_vminnmvq_f16: {
13606 Int = Intrinsic::aarch64_neon_fminnmv;
13607 Ty = HalfTy;
13608 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13609 llvm::Type *Tys[2] = { Ty, VTy };
13610 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13611 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13612 return Builder.CreateTrunc(Ops[0], HalfTy);
13613 }
13614 case NEON::BI__builtin_neon_vmul_n_f64: {
13615 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13616 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13617 return Builder.CreateFMul(Ops[0], RHS);
13618 }
13619 case NEON::BI__builtin_neon_vaddlv_u8: {
13620 Int = Intrinsic::aarch64_neon_uaddlv;
13621 Ty = Int32Ty;
13622 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13623 llvm::Type *Tys[2] = { Ty, VTy };
13624 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13625 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13626 return Builder.CreateTrunc(Ops[0], Int16Ty);
13627 }
13628 case NEON::BI__builtin_neon_vaddlv_u16: {
13629 Int = Intrinsic::aarch64_neon_uaddlv;
13630 Ty = Int32Ty;
13631 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13632 llvm::Type *Tys[2] = { Ty, VTy };
13633 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13634 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13635 }
13636 case NEON::BI__builtin_neon_vaddlvq_u8: {
13637 Int = Intrinsic::aarch64_neon_uaddlv;
13638 Ty = Int32Ty;
13639 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13640 llvm::Type *Tys[2] = { Ty, VTy };
13641 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13642 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13643 return Builder.CreateTrunc(Ops[0], Int16Ty);
13644 }
13645 case NEON::BI__builtin_neon_vaddlvq_u16: {
13646 Int = Intrinsic::aarch64_neon_uaddlv;
13647 Ty = Int32Ty;
13648 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13649 llvm::Type *Tys[2] = { Ty, VTy };
13650 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13651 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13652 }
13653 case NEON::BI__builtin_neon_vaddlv_s8: {
13654 Int = Intrinsic::aarch64_neon_saddlv;
13655 Ty = Int32Ty;
13656 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13657 llvm::Type *Tys[2] = { Ty, VTy };
13658 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13659 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13660 return Builder.CreateTrunc(Ops[0], Int16Ty);
13661 }
13662 case NEON::BI__builtin_neon_vaddlv_s16: {
13663 Int = Intrinsic::aarch64_neon_saddlv;
13664 Ty = Int32Ty;
13665 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13666 llvm::Type *Tys[2] = { Ty, VTy };
13667 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13668 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13669 }
13670 case NEON::BI__builtin_neon_vaddlvq_s8: {
13671 Int = Intrinsic::aarch64_neon_saddlv;
13672 Ty = Int32Ty;
13673 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13674 llvm::Type *Tys[2] = { Ty, VTy };
13675 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13676 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13677 return Builder.CreateTrunc(Ops[0], Int16Ty);
13678 }
13679 case NEON::BI__builtin_neon_vaddlvq_s16: {
13680 Int = Intrinsic::aarch64_neon_saddlv;
13681 Ty = Int32Ty;
13682 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13683 llvm::Type *Tys[2] = { Ty, VTy };
13684 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13685 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13686 }
13687 case NEON::BI__builtin_neon_vsri_n_v:
13688 case NEON::BI__builtin_neon_vsriq_n_v: {
13689 Int = Intrinsic::aarch64_neon_vsri;
13690 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13691 return EmitNeonCall(Intrin, Ops, "vsri_n");
13692 }
13693 case NEON::BI__builtin_neon_vsli_n_v:
13694 case NEON::BI__builtin_neon_vsliq_n_v: {
13695 Int = Intrinsic::aarch64_neon_vsli;
13696 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13697 return EmitNeonCall(Intrin, Ops, "vsli_n");
13698 }
13699 case NEON::BI__builtin_neon_vsra_n_v:
13700 case NEON::BI__builtin_neon_vsraq_n_v:
13701 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13702 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13703 return Builder.CreateAdd(Ops[0], Ops[1]);
13704 case NEON::BI__builtin_neon_vrsra_n_v:
13705 case NEON::BI__builtin_neon_vrsraq_n_v: {
13706 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13708 TmpOps.push_back(Ops[1]);
13709 TmpOps.push_back(Ops[2]);
13710 Function* F = CGM.getIntrinsic(Int, Ty);
13711 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13712 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13713 return Builder.CreateAdd(Ops[0], tmp);
13714 }
13715 case NEON::BI__builtin_neon_vld1_v:
13716 case NEON::BI__builtin_neon_vld1q_v: {
13717 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13718 }
13719 case NEON::BI__builtin_neon_vst1_v:
13720 case NEON::BI__builtin_neon_vst1q_v:
13721 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13722 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13723 case NEON::BI__builtin_neon_vld1_lane_v:
13724 case NEON::BI__builtin_neon_vld1q_lane_v: {
13725 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13726 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13727 PtrOp0.getAlignment());
13728 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13729 }
13730 case NEON::BI__builtin_neon_vldap1_lane_s64:
13731 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13732 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13733 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13734 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13735 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13736 Ops[0] = LI;
13737 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13738 }
13739 case NEON::BI__builtin_neon_vld1_dup_v:
13740 case NEON::BI__builtin_neon_vld1q_dup_v: {
13741 Value *V = PoisonValue::get(Ty);
13742 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13743 PtrOp0.getAlignment());
13744 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13745 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13746 return EmitNeonSplat(Ops[0], CI);
13747 }
13748 case NEON::BI__builtin_neon_vst1_lane_v:
13749 case NEON::BI__builtin_neon_vst1q_lane_v:
13750 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13751 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13752 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13753 case NEON::BI__builtin_neon_vstl1_lane_s64:
13754 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13755 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13756 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13757 llvm::StoreInst *SI =
13758 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13759 SI->setAtomic(llvm::AtomicOrdering::Release);
13760 return SI;
13761 }
13762 case NEON::BI__builtin_neon_vld2_v:
13763 case NEON::BI__builtin_neon_vld2q_v: {
13764 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13765 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13766 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13767 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13768 }
13769 case NEON::BI__builtin_neon_vld3_v:
13770 case NEON::BI__builtin_neon_vld3q_v: {
13771 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13772 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13773 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13774 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13775 }
13776 case NEON::BI__builtin_neon_vld4_v:
13777 case NEON::BI__builtin_neon_vld4q_v: {
13778 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13779 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13780 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13781 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13782 }
13783 case NEON::BI__builtin_neon_vld2_dup_v:
13784 case NEON::BI__builtin_neon_vld2q_dup_v: {
13785 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13786 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13787 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13788 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13789 }
13790 case NEON::BI__builtin_neon_vld3_dup_v:
13791 case NEON::BI__builtin_neon_vld3q_dup_v: {
13792 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13793 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13794 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13795 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13796 }
13797 case NEON::BI__builtin_neon_vld4_dup_v:
13798 case NEON::BI__builtin_neon_vld4q_dup_v: {
13799 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13800 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13801 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13802 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13803 }
13804 case NEON::BI__builtin_neon_vld2_lane_v:
13805 case NEON::BI__builtin_neon_vld2q_lane_v: {
13806 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13807 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13808 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13809 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13810 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13811 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13812 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13813 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13814 }
13815 case NEON::BI__builtin_neon_vld3_lane_v:
13816 case NEON::BI__builtin_neon_vld3q_lane_v: {
13817 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13818 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13819 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13820 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13821 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13822 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13823 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13824 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13825 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13826 }
13827 case NEON::BI__builtin_neon_vld4_lane_v:
13828 case NEON::BI__builtin_neon_vld4q_lane_v: {
13829 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13830 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13831 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13832 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13833 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13834 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13835 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13836 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13837 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13838 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13839 }
13840 case NEON::BI__builtin_neon_vst2_v:
13841 case NEON::BI__builtin_neon_vst2q_v: {
13842 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13843 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13844 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13845 Ops, "");
13846 }
13847 case NEON::BI__builtin_neon_vst2_lane_v:
13848 case NEON::BI__builtin_neon_vst2q_lane_v: {
13849 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13850 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13851 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13852 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13853 Ops, "");
13854 }
13855 case NEON::BI__builtin_neon_vst3_v:
13856 case NEON::BI__builtin_neon_vst3q_v: {
13857 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13858 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13859 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13860 Ops, "");
13861 }
13862 case NEON::BI__builtin_neon_vst3_lane_v:
13863 case NEON::BI__builtin_neon_vst3q_lane_v: {
13864 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13865 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13866 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13867 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13868 Ops, "");
13869 }
13870 case NEON::BI__builtin_neon_vst4_v:
13871 case NEON::BI__builtin_neon_vst4q_v: {
13872 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13873 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13874 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13875 Ops, "");
13876 }
13877 case NEON::BI__builtin_neon_vst4_lane_v:
13878 case NEON::BI__builtin_neon_vst4q_lane_v: {
13879 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13880 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13881 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13882 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13883 Ops, "");
13884 }
13885 case NEON::BI__builtin_neon_vtrn_v:
13886 case NEON::BI__builtin_neon_vtrnq_v: {
13887 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13888 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13889 Value *SV = nullptr;
13890
13891 for (unsigned vi = 0; vi != 2; ++vi) {
13892 SmallVector<int, 16> Indices;
13893 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13894 Indices.push_back(i+vi);
13895 Indices.push_back(i+e+vi);
13896 }
13897 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13898 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13899 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13900 }
13901 return SV;
13902 }
13903 case NEON::BI__builtin_neon_vuzp_v:
13904 case NEON::BI__builtin_neon_vuzpq_v: {
13905 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13906 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13907 Value *SV = nullptr;
13908
13909 for (unsigned vi = 0; vi != 2; ++vi) {
13910 SmallVector<int, 16> Indices;
13911 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13912 Indices.push_back(2*i+vi);
13913
13914 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13915 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13916 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13917 }
13918 return SV;
13919 }
13920 case NEON::BI__builtin_neon_vzip_v:
13921 case NEON::BI__builtin_neon_vzipq_v: {
13922 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13923 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13924 Value *SV = nullptr;
13925
13926 for (unsigned vi = 0; vi != 2; ++vi) {
13927 SmallVector<int, 16> Indices;
13928 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13929 Indices.push_back((i + vi*e) >> 1);
13930 Indices.push_back(((i + vi*e) >> 1)+e);
13931 }
13932 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13933 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13934 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13935 }
13936 return SV;
13937 }
13938 case NEON::BI__builtin_neon_vqtbl1q_v: {
13939 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13940 Ops, "vtbl1");
13941 }
13942 case NEON::BI__builtin_neon_vqtbl2q_v: {
13943 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13944 Ops, "vtbl2");
13945 }
13946 case NEON::BI__builtin_neon_vqtbl3q_v: {
13947 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13948 Ops, "vtbl3");
13949 }
13950 case NEON::BI__builtin_neon_vqtbl4q_v: {
13951 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13952 Ops, "vtbl4");
13953 }
13954 case NEON::BI__builtin_neon_vqtbx1q_v: {
13955 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13956 Ops, "vtbx1");
13957 }
13958 case NEON::BI__builtin_neon_vqtbx2q_v: {
13959 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13960 Ops, "vtbx2");
13961 }
13962 case NEON::BI__builtin_neon_vqtbx3q_v: {
13963 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13964 Ops, "vtbx3");
13965 }
13966 case NEON::BI__builtin_neon_vqtbx4q_v: {
13967 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13968 Ops, "vtbx4");
13969 }
13970 case NEON::BI__builtin_neon_vsqadd_v:
13971 case NEON::BI__builtin_neon_vsqaddq_v: {
13972 Int = Intrinsic::aarch64_neon_usqadd;
13973 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13974 }
13975 case NEON::BI__builtin_neon_vuqadd_v:
13976 case NEON::BI__builtin_neon_vuqaddq_v: {
13977 Int = Intrinsic::aarch64_neon_suqadd;
13978 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13979 }
13980
13981 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13982 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13983 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13984 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13985 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13986 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13987 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13988 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13989 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13990 llvm::Type *Tys[2];
13991 Tys[0] = Ty;
13992 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13993 /*isQuad*/ false));
13994 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13995 }
13996 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13997 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13998 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13999 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
14000 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
14001 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
14002 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
14003 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
14004 Int = Intrinsic::aarch64_neon_vluti2_laneq;
14005 llvm::Type *Tys[2];
14006 Tys[0] = Ty;
14007 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14008 /*isQuad*/ true));
14009 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
14010 }
14011 case NEON::BI__builtin_neon_vluti2_lane_bf16:
14012 case NEON::BI__builtin_neon_vluti2_lane_f16:
14013 case NEON::BI__builtin_neon_vluti2_lane_p16:
14014 case NEON::BI__builtin_neon_vluti2_lane_p8:
14015 case NEON::BI__builtin_neon_vluti2_lane_s16:
14016 case NEON::BI__builtin_neon_vluti2_lane_s8:
14017 case NEON::BI__builtin_neon_vluti2_lane_u16:
14018 case NEON::BI__builtin_neon_vluti2_lane_u8: {
14019 Int = Intrinsic::aarch64_neon_vluti2_lane;
14020 llvm::Type *Tys[2];
14021 Tys[0] = Ty;
14022 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14023 /*isQuad*/ false));
14024 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
14025 }
14026 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
14027 case NEON::BI__builtin_neon_vluti2q_lane_f16:
14028 case NEON::BI__builtin_neon_vluti2q_lane_p16:
14029 case NEON::BI__builtin_neon_vluti2q_lane_p8:
14030 case NEON::BI__builtin_neon_vluti2q_lane_s16:
14031 case NEON::BI__builtin_neon_vluti2q_lane_s8:
14032 case NEON::BI__builtin_neon_vluti2q_lane_u16:
14033 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
14034 Int = Intrinsic::aarch64_neon_vluti2_lane;
14035 llvm::Type *Tys[2];
14036 Tys[0] = Ty;
14037 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14038 /*isQuad*/ true));
14039 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
14040 }
14041 case NEON::BI__builtin_neon_vluti4q_lane_p8:
14042 case NEON::BI__builtin_neon_vluti4q_lane_s8:
14043 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
14044 Int = Intrinsic::aarch64_neon_vluti4q_lane;
14045 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
14046 }
14047 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
14048 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
14049 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
14050 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
14051 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
14052 }
14053 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
14054 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
14055 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
14056 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
14057 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
14058 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
14059 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
14060 }
14061 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
14062 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
14063 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
14064 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
14065 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
14066 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
14067 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
14068 }
14069
14070 case NEON::BI__builtin_neon_vamin_f16:
14071 case NEON::BI__builtin_neon_vaminq_f16:
14072 case NEON::BI__builtin_neon_vamin_f32:
14073 case NEON::BI__builtin_neon_vaminq_f32:
14074 case NEON::BI__builtin_neon_vaminq_f64: {
14075 Int = Intrinsic::aarch64_neon_famin;
14076 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
14077 }
14078 case NEON::BI__builtin_neon_vamax_f16:
14079 case NEON::BI__builtin_neon_vamaxq_f16:
14080 case NEON::BI__builtin_neon_vamax_f32:
14081 case NEON::BI__builtin_neon_vamaxq_f32:
14082 case NEON::BI__builtin_neon_vamaxq_f64: {
14083 Int = Intrinsic::aarch64_neon_famax;
14084 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
14085 }
14086 case NEON::BI__builtin_neon_vscale_f16:
14087 case NEON::BI__builtin_neon_vscaleq_f16:
14088 case NEON::BI__builtin_neon_vscale_f32:
14089 case NEON::BI__builtin_neon_vscaleq_f32:
14090 case NEON::BI__builtin_neon_vscaleq_f64: {
14091 Int = Intrinsic::aarch64_neon_fp8_fscale;
14092 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
14093 }
14094 }
14095}
14096
14097Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
14098 const CallExpr *E) {
14099 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
14100 BuiltinID == BPF::BI__builtin_btf_type_id ||
14101 BuiltinID == BPF::BI__builtin_preserve_type_info ||
14102 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
14103 "unexpected BPF builtin");
14104
14105 // A sequence number, injected into IR builtin functions, to
14106 // prevent CSE given the only difference of the function
14107 // may just be the debuginfo metadata.
14108 static uint32_t BuiltinSeqNum;
14109
14110 switch (BuiltinID) {
14111 default:
14112 llvm_unreachable("Unexpected BPF builtin");
14113 case BPF::BI__builtin_preserve_field_info: {
14114 const Expr *Arg = E->getArg(0);
14115 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
14116
14117 if (!getDebugInfo()) {
14118 CGM.Error(E->getExprLoc(),
14119 "using __builtin_preserve_field_info() without -g");
14120 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14121 : EmitLValue(Arg).emitRawPointer(*this);
14122 }
14123
14124 // Enable underlying preserve_*_access_index() generation.
14125 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14126 IsInPreservedAIRegion = true;
14127 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14128 : EmitLValue(Arg).emitRawPointer(*this);
14129 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14130
14131 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14132 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14133
14134 // Built the IR for the preserve_field_info intrinsic.
14135 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14136 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14137 {FieldAddr->getType()});
14138 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14139 }
14140 case BPF::BI__builtin_btf_type_id:
14141 case BPF::BI__builtin_preserve_type_info: {
14142 if (!getDebugInfo()) {
14143 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14144 return nullptr;
14145 }
14146
14147 const Expr *Arg0 = E->getArg(0);
14148 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14149 Arg0->getType(), Arg0->getExprLoc());
14150
14151 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14152 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14153 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14154
14155 llvm::Function *FnDecl;
14156 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14157 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14158 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14159 else
14160 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14161 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14162 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14163 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14164 return Fn;
14165 }
14166 case BPF::BI__builtin_preserve_enum_value: {
14167 if (!getDebugInfo()) {
14168 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14169 return nullptr;
14170 }
14171
14172 const Expr *Arg0 = E->getArg(0);
14173 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14174 Arg0->getType(), Arg0->getExprLoc());
14175
14176 // Find enumerator
14177 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14178 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14179 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14180 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14181
14182 auto InitVal = Enumerator->getInitVal();
14183 std::string InitValStr;
14184 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14185 InitValStr = std::to_string(InitVal.getSExtValue());
14186 else
14187 InitValStr = std::to_string(InitVal.getZExtValue());
14188 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14189 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14190
14191 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14192 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14193 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14194
14195 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14196 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14197 CallInst *Fn =
14198 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14199 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14200 return Fn;
14201 }
14202 }
14203}
14204
14205llvm::Value *CodeGenFunction::
14207 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14208 "Not a power-of-two sized vector!");
14209 bool AllConstants = true;
14210 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14211 AllConstants &= isa<Constant>(Ops[i]);
14212
14213 // If this is a constant vector, create a ConstantVector.
14214 if (AllConstants) {
14216 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14217 CstOps.push_back(cast<Constant>(Ops[i]));
14218 return llvm::ConstantVector::get(CstOps);
14219 }
14220
14221 // Otherwise, insertelement the values to build the vector.
14222 Value *Result = llvm::PoisonValue::get(
14223 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14224
14225 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14226 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14227
14228 return Result;
14229}
14230
14231// Convert the mask from an integer type to a vector of i1.
14233 unsigned NumElts) {
14234
14235 auto *MaskTy = llvm::FixedVectorType::get(
14236 CGF.Builder.getInt1Ty(),
14237 cast<IntegerType>(Mask->getType())->getBitWidth());
14238 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14239
14240 // If we have less than 8 elements, then the starting mask was an i8 and
14241 // we need to extract down to the right number of elements.
14242 if (NumElts < 8) {
14243 int Indices[4];
14244 for (unsigned i = 0; i != NumElts; ++i)
14245 Indices[i] = i;
14246 MaskVec = CGF.Builder.CreateShuffleVector(
14247 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14248 }
14249 return MaskVec;
14250}
14251
14253 Align Alignment) {
14254 Value *Ptr = Ops[0];
14255
14256 Value *MaskVec = getMaskVecValue(
14257 CGF, Ops[2],
14258 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14259
14260 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14261}
14262
14264 Align Alignment) {
14265 llvm::Type *Ty = Ops[1]->getType();
14266 Value *Ptr = Ops[0];
14267
14268 Value *MaskVec = getMaskVecValue(
14269 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14270
14271 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14272}
14273
14275 ArrayRef<Value *> Ops) {
14276 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14277 Value *Ptr = Ops[0];
14278
14279 Value *MaskVec = getMaskVecValue(
14280 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14281
14282 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14283 ResultTy);
14284 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14285}
14286
14289 bool IsCompress) {
14290 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14291
14292 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14293
14294 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14295 : Intrinsic::x86_avx512_mask_expand;
14296 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14297 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14298}
14299
14301 ArrayRef<Value *> Ops) {
14302 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14303 Value *Ptr = Ops[0];
14304
14305 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14306
14307 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14308 ResultTy);
14309 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14310}
14311
14312static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14314 bool InvertLHS = false) {
14315 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14316 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14317 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14318
14319 if (InvertLHS)
14320 LHS = CGF.Builder.CreateNot(LHS);
14321
14322 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14323 Ops[0]->getType());
14324}
14325
14327 Value *Amt, bool IsRight) {
14328 llvm::Type *Ty = Op0->getType();
14329
14330 // Amount may be scalar immediate, in which case create a splat vector.
14331 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14332 // we only care about the lowest log2 bits anyway.
14333 if (Amt->getType() != Ty) {
14334 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14335 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14336 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14337 }
14338
14339 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14340 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14341 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14342}
14343
14345 bool IsSigned) {
14346 Value *Op0 = Ops[0];
14347 Value *Op1 = Ops[1];
14348 llvm::Type *Ty = Op0->getType();
14349 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14350
14351 CmpInst::Predicate Pred;
14352 switch (Imm) {
14353 case 0x0:
14354 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14355 break;
14356 case 0x1:
14357 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14358 break;
14359 case 0x2:
14360 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14361 break;
14362 case 0x3:
14363 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14364 break;
14365 case 0x4:
14366 Pred = ICmpInst::ICMP_EQ;
14367 break;
14368 case 0x5:
14369 Pred = ICmpInst::ICMP_NE;
14370 break;
14371 case 0x6:
14372 return llvm::Constant::getNullValue(Ty); // FALSE
14373 case 0x7:
14374 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14375 default:
14376 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14377 }
14378
14379 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14380 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14381 return Res;
14382}
14383
14385 Value *Mask, Value *Op0, Value *Op1) {
14386
14387 // If the mask is all ones just return first argument.
14388 if (const auto *C = dyn_cast<Constant>(Mask))
14389 if (C->isAllOnesValue())
14390 return Op0;
14391
14392 Mask = getMaskVecValue(
14393 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14394
14395 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14396}
14397
14399 Value *Mask, Value *Op0, Value *Op1) {
14400 // If the mask is all ones just return first argument.
14401 if (const auto *C = dyn_cast<Constant>(Mask))
14402 if (C->isAllOnesValue())
14403 return Op0;
14404
14405 auto *MaskTy = llvm::FixedVectorType::get(
14406 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14407 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14408 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14409 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14410}
14411
14413 unsigned NumElts, Value *MaskIn) {
14414 if (MaskIn) {
14415 const auto *C = dyn_cast<Constant>(MaskIn);
14416 if (!C || !C->isAllOnesValue())
14417 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14418 }
14419
14420 if (NumElts < 8) {
14421 int Indices[8];
14422 for (unsigned i = 0; i != NumElts; ++i)
14423 Indices[i] = i;
14424 for (unsigned i = NumElts; i != 8; ++i)
14425 Indices[i] = i % NumElts + NumElts;
14426 Cmp = CGF.Builder.CreateShuffleVector(
14427 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14428 }
14429
14430 return CGF.Builder.CreateBitCast(Cmp,
14431 IntegerType::get(CGF.getLLVMContext(),
14432 std::max(NumElts, 8U)));
14433}
14434
14436 bool Signed, ArrayRef<Value *> Ops) {
14437 assert((Ops.size() == 2 || Ops.size() == 4) &&
14438 "Unexpected number of arguments");
14439 unsigned NumElts =
14440 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14441 Value *Cmp;
14442
14443 if (CC == 3) {
14444 Cmp = Constant::getNullValue(
14445 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14446 } else if (CC == 7) {
14447 Cmp = Constant::getAllOnesValue(
14448 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14449 } else {
14450 ICmpInst::Predicate Pred;
14451 switch (CC) {
14452 default: llvm_unreachable("Unknown condition code");
14453 case 0: Pred = ICmpInst::ICMP_EQ; break;
14454 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14455 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14456 case 4: Pred = ICmpInst::ICMP_NE; break;
14457 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14458 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14459 }
14460 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14461 }
14462
14463 Value *MaskIn = nullptr;
14464 if (Ops.size() == 4)
14465 MaskIn = Ops[3];
14466
14467 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14468}
14469
14471 Value *Zero = Constant::getNullValue(In->getType());
14472 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14473}
14474
14476 ArrayRef<Value *> Ops, bool IsSigned) {
14477 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14478 llvm::Type *Ty = Ops[1]->getType();
14479
14480 Value *Res;
14481 if (Rnd != 4) {
14482 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14483 : Intrinsic::x86_avx512_uitofp_round;
14484 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14485 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14486 } else {
14487 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14488 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14489 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14490 }
14491
14492 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14493}
14494
14495// Lowers X86 FMA intrinsics to IR.
14497 ArrayRef<Value *> Ops, unsigned BuiltinID,
14498 bool IsAddSub) {
14499
14500 bool Subtract = false;
14501 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14502 switch (BuiltinID) {
14503 default: break;
14504 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14505 Subtract = true;
14506 [[fallthrough]];
14507 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14508 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14509 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14510 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14511 break;
14512 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14513 Subtract = true;
14514 [[fallthrough]];
14515 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14516 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14517 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14518 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14519 break;
14520 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14521 Subtract = true;
14522 [[fallthrough]];
14523 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14524 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14525 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14526 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14527 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14528 Subtract = true;
14529 [[fallthrough]];
14530 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14531 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14532 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14533 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14534 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14535 Subtract = true;
14536 [[fallthrough]];
14537 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14538 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14539 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14540 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14541 break;
14542 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14543 Subtract = true;
14544 [[fallthrough]];
14545 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14546 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14547 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14548 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14549 break;
14550 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14551 Subtract = true;
14552 LLVM_FALLTHROUGH;
14553 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14554 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14555 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14556 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14557 break;
14558 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14559 Subtract = true;
14560 LLVM_FALLTHROUGH;
14561 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14562 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14563 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14564 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14565 break;
14566 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14567 Subtract = true;
14568 LLVM_FALLTHROUGH;
14569 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14570 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14571 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14572 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14573 break;
14574 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14575 Subtract = true;
14576 LLVM_FALLTHROUGH;
14577 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14578 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14579 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14580 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14581 break;
14582 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14583 Subtract = true;
14584 LLVM_FALLTHROUGH;
14585 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14586 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14587 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14588 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14589 break;
14590 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14591 Subtract = true;
14592 LLVM_FALLTHROUGH;
14593 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14594 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14595 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14596 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14597 break;
14598 }
14599
14600 Value *A = Ops[0];
14601 Value *B = Ops[1];
14602 Value *C = Ops[2];
14603
14604 if (Subtract)
14605 C = CGF.Builder.CreateFNeg(C);
14606
14607 Value *Res;
14608
14609 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14610 if (IID != Intrinsic::not_intrinsic &&
14611 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14612 IsAddSub)) {
14613 Function *Intr = CGF.CGM.getIntrinsic(IID);
14614 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14615 } else {
14616 llvm::Type *Ty = A->getType();
14617 Function *FMA;
14618 if (CGF.Builder.getIsFPConstrained()) {
14619 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14620 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14621 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14622 } else {
14623 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14624 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14625 }
14626 }
14627
14628 // Handle any required masking.
14629 Value *MaskFalseVal = nullptr;
14630 switch (BuiltinID) {
14631 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14632 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14633 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14634 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14635 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14636 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14637 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14638 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14639 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14640 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14641 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14642 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14643 MaskFalseVal = Ops[0];
14644 break;
14645 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14646 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14647 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14648 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14649 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14650 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14651 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14652 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14653 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14654 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14655 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14656 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14657 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14658 break;
14659 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14660 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14661 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14662 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14663 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14664 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14665 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14666 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14667 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14668 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14669 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14670 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14671 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14672 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14673 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14674 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14675 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14676 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14677 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14678 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14679 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14680 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14681 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14682 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14683 MaskFalseVal = Ops[2];
14684 break;
14685 }
14686
14687 if (MaskFalseVal)
14688 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14689
14690 return Res;
14691}
14692
14694 MutableArrayRef<Value *> Ops, Value *Upper,
14695 bool ZeroMask = false, unsigned PTIdx = 0,
14696 bool NegAcc = false) {
14697 unsigned Rnd = 4;
14698 if (Ops.size() > 4)
14699 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14700
14701 if (NegAcc)
14702 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14703
14704 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14705 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14706 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14707 Value *Res;
14708 if (Rnd != 4) {
14709 Intrinsic::ID IID;
14710
14711 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14712 case 16:
14713 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14714 break;
14715 case 32:
14716 IID = Intrinsic::x86_avx512_vfmadd_f32;
14717 break;
14718 case 64:
14719 IID = Intrinsic::x86_avx512_vfmadd_f64;
14720 break;
14721 default:
14722 llvm_unreachable("Unexpected size");
14723 }
14724 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14725 {Ops[0], Ops[1], Ops[2], Ops[4]});
14726 } else if (CGF.Builder.getIsFPConstrained()) {
14727 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14728 Function *FMA = CGF.CGM.getIntrinsic(
14729 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14730 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14731 } else {
14732 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14733 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14734 }
14735 // If we have more than 3 arguments, we need to do masking.
14736 if (Ops.size() > 3) {
14737 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14738 : Ops[PTIdx];
14739
14740 // If we negated the accumulator and the its the PassThru value we need to
14741 // bypass the negate. Conveniently Upper should be the same thing in this
14742 // case.
14743 if (NegAcc && PTIdx == 2)
14744 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14745
14746 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14747 }
14748 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14749}
14750
14751static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14752 ArrayRef<Value *> Ops) {
14753 llvm::Type *Ty = Ops[0]->getType();
14754 // Arguments have a vXi32 type so cast to vXi64.
14755 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14756 Ty->getPrimitiveSizeInBits() / 64);
14757 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14758 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14759
14760 if (IsSigned) {
14761 // Shift left then arithmetic shift right.
14762 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14763 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14764 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14765 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14766 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14767 } else {
14768 // Clear the upper bits.
14769 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14770 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14771 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14772 }
14773
14774 return CGF.Builder.CreateMul(LHS, RHS);
14775}
14776
14777// Emit a masked pternlog intrinsic. This only exists because the header has to
14778// use a macro and we aren't able to pass the input argument to a pternlog
14779// builtin and a select builtin without evaluating it twice.
14780static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14781 ArrayRef<Value *> Ops) {
14782 llvm::Type *Ty = Ops[0]->getType();
14783
14784 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14785 unsigned EltWidth = Ty->getScalarSizeInBits();
14786 Intrinsic::ID IID;
14787 if (VecWidth == 128 && EltWidth == 32)
14788 IID = Intrinsic::x86_avx512_pternlog_d_128;
14789 else if (VecWidth == 256 && EltWidth == 32)
14790 IID = Intrinsic::x86_avx512_pternlog_d_256;
14791 else if (VecWidth == 512 && EltWidth == 32)
14792 IID = Intrinsic::x86_avx512_pternlog_d_512;
14793 else if (VecWidth == 128 && EltWidth == 64)
14794 IID = Intrinsic::x86_avx512_pternlog_q_128;
14795 else if (VecWidth == 256 && EltWidth == 64)
14796 IID = Intrinsic::x86_avx512_pternlog_q_256;
14797 else if (VecWidth == 512 && EltWidth == 64)
14798 IID = Intrinsic::x86_avx512_pternlog_q_512;
14799 else
14800 llvm_unreachable("Unexpected intrinsic");
14801
14802 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14803 Ops.drop_back());
14804 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14805 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14806}
14807
14809 llvm::Type *DstTy) {
14810 unsigned NumberOfElements =
14811 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14812 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14813 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14814}
14815
14816Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14817 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14818 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14819 return EmitX86CpuIs(CPUStr);
14820}
14821
14822// Convert F16 halfs to floats.
14825 llvm::Type *DstTy) {
14826 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14827 "Unknown cvtph2ps intrinsic");
14828
14829 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14830 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14831 Function *F =
14832 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14833 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14834 }
14835
14836 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14837 Value *Src = Ops[0];
14838
14839 // Extract the subvector.
14840 if (NumDstElts !=
14841 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14842 assert(NumDstElts == 4 && "Unexpected vector size");
14843 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14844 }
14845
14846 // Bitcast from vXi16 to vXf16.
14847 auto *HalfTy = llvm::FixedVectorType::get(
14848 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14849 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14850
14851 // Perform the fp-extension.
14852 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14853
14854 if (Ops.size() >= 3)
14855 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14856 return Res;
14857}
14858
14859Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14860
14861 llvm::Type *Int32Ty = Builder.getInt32Ty();
14862
14863 // Matching the struct layout from the compiler-rt/libgcc structure that is
14864 // filled in:
14865 // unsigned int __cpu_vendor;
14866 // unsigned int __cpu_type;
14867 // unsigned int __cpu_subtype;
14868 // unsigned int __cpu_features[1];
14869 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14870 llvm::ArrayType::get(Int32Ty, 1));
14871
14872 // Grab the global __cpu_model.
14873 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14874 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14875
14876 // Calculate the index needed to access the correct field based on the
14877 // range. Also adjust the expected value.
14878 unsigned Index;
14879 unsigned Value;
14880 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14881#define X86_VENDOR(ENUM, STRING) \
14882 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14883#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14884 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14885#define X86_CPU_TYPE(ENUM, STR) \
14886 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14887#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14888 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14889#define X86_CPU_SUBTYPE(ENUM, STR) \
14890 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14891#include "llvm/TargetParser/X86TargetParser.def"
14892 .Default({0, 0});
14893 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14894
14895 // Grab the appropriate field from __cpu_model.
14896 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14897 ConstantInt::get(Int32Ty, Index)};
14898 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14899 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14901
14902 // Check the value of the field against the requested value.
14903 return Builder.CreateICmpEQ(CpuValue,
14904 llvm::ConstantInt::get(Int32Ty, Value));
14905}
14906
14907Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14908 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14909 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14910 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14911 return Builder.getFalse();
14912 return EmitX86CpuSupports(FeatureStr);
14913}
14914
14915Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14916 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14917}
14918
14919llvm::Value *
14920CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14921 Value *Result = Builder.getTrue();
14922 if (FeatureMask[0] != 0) {
14923 // Matching the struct layout from the compiler-rt/libgcc structure that is
14924 // filled in:
14925 // unsigned int __cpu_vendor;
14926 // unsigned int __cpu_type;
14927 // unsigned int __cpu_subtype;
14928 // unsigned int __cpu_features[1];
14929 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14930 llvm::ArrayType::get(Int32Ty, 1));
14931
14932 // Grab the global __cpu_model.
14933 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14934 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14935
14936 // Grab the first (0th) element from the field __cpu_features off of the
14937 // global in the struct STy.
14938 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14939 Builder.getInt32(0)};
14940 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14941 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14943
14944 // Check the value of the bit corresponding to the feature requested.
14945 Value *Mask = Builder.getInt32(FeatureMask[0]);
14946 Value *Bitset = Builder.CreateAnd(Features, Mask);
14947 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14948 Result = Builder.CreateAnd(Result, Cmp);
14949 }
14950
14951 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14952 llvm::Constant *CpuFeatures2 =
14953 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14954 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14955 for (int i = 1; i != 4; ++i) {
14956 const uint32_t M = FeatureMask[i];
14957 if (!M)
14958 continue;
14959 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14960 Value *Features = Builder.CreateAlignedLoad(
14961 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14963 // Check the value of the bit corresponding to the feature requested.
14964 Value *Mask = Builder.getInt32(M);
14965 Value *Bitset = Builder.CreateAnd(Features, Mask);
14966 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14967 Result = Builder.CreateAnd(Result, Cmp);
14968 }
14969
14970 return Result;
14971}
14972
14973Value *CodeGenFunction::EmitAArch64CpuInit() {
14974 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14975 llvm::FunctionCallee Func =
14976 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14977 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14978 cast<llvm::GlobalValue>(Func.getCallee())
14979 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14980 return Builder.CreateCall(Func);
14981}
14982
14984 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14985 llvm::FunctionCallee Func =
14986 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14987 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14988 CalleeGV->setDSOLocal(true);
14989 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14990 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14991}
14992
14993Value *CodeGenFunction::EmitX86CpuInit() {
14994 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14995 /*Variadic*/ false);
14996 llvm::FunctionCallee Func =
14997 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14998 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14999 cast<llvm::GlobalValue>(Func.getCallee())
15000 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
15001 return Builder.CreateCall(Func);
15002}
15003
15004Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
15005 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
15006 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
15008 ArgStr.split(Features, "+");
15009 for (auto &Feature : Features) {
15010 Feature = Feature.trim();
15011 if (!llvm::AArch64::parseFMVExtension(Feature))
15012 return Builder.getFalse();
15013 if (Feature != "default")
15014 Features.push_back(Feature);
15015 }
15016 return EmitAArch64CpuSupports(Features);
15017}
15018
15019llvm::Value *
15020CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
15021 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
15022 Value *Result = Builder.getTrue();
15023 if (FeaturesMask != 0) {
15024 // Get features from structure in runtime library
15025 // struct {
15026 // unsigned long long features;
15027 // } __aarch64_cpu_features;
15028 llvm::Type *STy = llvm::StructType::get(Int64Ty);
15029 llvm::Constant *AArch64CPUFeatures =
15030 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
15031 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
15032 llvm::Value *CpuFeatures = Builder.CreateGEP(
15033 STy, AArch64CPUFeatures,
15034 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
15035 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
15037 Value *Mask = Builder.getInt64(FeaturesMask);
15038 Value *Bitset = Builder.CreateAnd(Features, Mask);
15039 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
15040 Result = Builder.CreateAnd(Result, Cmp);
15041 }
15042 return Result;
15043}
15044
15046
15047 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
15048 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
15049 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
15050 return Builder.getFalse();
15051
15052 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
15053}
15054
15055static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
15056 CodeGenModule &CGM) {
15057 llvm::Type *Int32Ty = Builder.getInt32Ty();
15058 llvm::Type *Int64Ty = Builder.getInt64Ty();
15059 llvm::ArrayType *ArrayOfInt64Ty =
15060 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
15061 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
15062 llvm::Constant *RISCVFeaturesBits =
15063 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
15064 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
15065 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
15066 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
15067 IndexVal};
15068 Value *Ptr =
15069 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
15070 Value *FeaturesBit =
15071 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
15072 return FeaturesBit;
15073}
15074
15076 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
15077 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
15078
15079 for (auto Feat : FeaturesStrs) {
15080 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
15081
15082 // If there isn't BitPos for this feature, skip this version.
15083 // It also report the warning to user during compilation.
15084 if (BitPos == -1)
15085 return Builder.getFalse();
15086
15087 RequireBitMasks[GroupID] |= (1ULL << BitPos);
15088 }
15089
15090 Value *Result = nullptr;
15091 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
15092 if (RequireBitMasks[Idx] == 0)
15093 continue;
15094
15095 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
15096 Value *Bitset =
15097 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
15098 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
15099 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
15100 }
15101
15102 assert(Result && "Should have value here.");
15103
15104 return Result;
15105}
15106
15107Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
15108 const CallExpr *E) {
15109 if (BuiltinID == Builtin::BI__builtin_cpu_is)
15110 return EmitX86CpuIs(E);
15111 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
15112 return EmitX86CpuSupports(E);
15113 if (BuiltinID == Builtin::BI__builtin_cpu_init)
15114 return EmitX86CpuInit();
15115
15116 // Handle MSVC intrinsics before argument evaluation to prevent double
15117 // evaluation.
15118 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15119 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15120
15122 bool IsMaskFCmp = false;
15123 bool IsConjFMA = false;
15124
15125 // Find out if any arguments are required to be integer constant expressions.
15126 unsigned ICEArguments = 0;
15128 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15129 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15130
15131 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15132 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15133 }
15134
15135 // These exist so that the builtin that takes an immediate can be bounds
15136 // checked by clang to avoid passing bad immediates to the backend. Since
15137 // AVX has a larger immediate than SSE we would need separate builtins to
15138 // do the different bounds checking. Rather than create a clang specific
15139 // SSE only builtin, this implements eight separate builtins to match gcc
15140 // implementation.
15141 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15142 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15143 llvm::Function *F = CGM.getIntrinsic(ID);
15144 return Builder.CreateCall(F, Ops);
15145 };
15146
15147 // For the vector forms of FP comparisons, translate the builtins directly to
15148 // IR.
15149 // TODO: The builtins could be removed if the SSE header files used vector
15150 // extension comparisons directly (vector ordered/unordered may need
15151 // additional support via __builtin_isnan()).
15152 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15153 bool IsSignaling) {
15154 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15155 Value *Cmp;
15156 if (IsSignaling)
15157 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15158 else
15159 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15160 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15161 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15162 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15163 return Builder.CreateBitCast(Sext, FPVecTy);
15164 };
15165
15166 switch (BuiltinID) {
15167 default: return nullptr;
15168 case X86::BI_mm_prefetch: {
15169 Value *Address = Ops[0];
15170 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15171 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15172 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15173 Value *Data = ConstantInt::get(Int32Ty, 1);
15174 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15175 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15176 }
15177 case X86::BI_mm_clflush: {
15178 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15179 Ops[0]);
15180 }
15181 case X86::BI_mm_lfence: {
15182 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15183 }
15184 case X86::BI_mm_mfence: {
15185 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15186 }
15187 case X86::BI_mm_sfence: {
15188 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15189 }
15190 case X86::BI_mm_pause: {
15191 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15192 }
15193 case X86::BI__rdtsc: {
15194 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15195 }
15196 case X86::BI__builtin_ia32_rdtscp: {
15197 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15198 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15199 Ops[0]);
15200 return Builder.CreateExtractValue(Call, 0);
15201 }
15202 case X86::BI__builtin_ia32_lzcnt_u16:
15203 case X86::BI__builtin_ia32_lzcnt_u32:
15204 case X86::BI__builtin_ia32_lzcnt_u64: {
15205 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15206 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15207 }
15208 case X86::BI__builtin_ia32_tzcnt_u16:
15209 case X86::BI__builtin_ia32_tzcnt_u32:
15210 case X86::BI__builtin_ia32_tzcnt_u64: {
15211 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15212 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15213 }
15214 case X86::BI__builtin_ia32_undef128:
15215 case X86::BI__builtin_ia32_undef256:
15216 case X86::BI__builtin_ia32_undef512:
15217 // The x86 definition of "undef" is not the same as the LLVM definition
15218 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15219 // IR optimizer and backend.
15220 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15221 // value, we should use that here instead of a zero.
15222 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15223 case X86::BI__builtin_ia32_vec_ext_v4hi:
15224 case X86::BI__builtin_ia32_vec_ext_v16qi:
15225 case X86::BI__builtin_ia32_vec_ext_v8hi:
15226 case X86::BI__builtin_ia32_vec_ext_v4si:
15227 case X86::BI__builtin_ia32_vec_ext_v4sf:
15228 case X86::BI__builtin_ia32_vec_ext_v2di:
15229 case X86::BI__builtin_ia32_vec_ext_v32qi:
15230 case X86::BI__builtin_ia32_vec_ext_v16hi:
15231 case X86::BI__builtin_ia32_vec_ext_v8si:
15232 case X86::BI__builtin_ia32_vec_ext_v4di: {
15233 unsigned NumElts =
15234 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15235 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15236 Index &= NumElts - 1;
15237 // These builtins exist so we can ensure the index is an ICE and in range.
15238 // Otherwise we could just do this in the header file.
15239 return Builder.CreateExtractElement(Ops[0], Index);
15240 }
15241 case X86::BI__builtin_ia32_vec_set_v4hi:
15242 case X86::BI__builtin_ia32_vec_set_v16qi:
15243 case X86::BI__builtin_ia32_vec_set_v8hi:
15244 case X86::BI__builtin_ia32_vec_set_v4si:
15245 case X86::BI__builtin_ia32_vec_set_v2di:
15246 case X86::BI__builtin_ia32_vec_set_v32qi:
15247 case X86::BI__builtin_ia32_vec_set_v16hi:
15248 case X86::BI__builtin_ia32_vec_set_v8si:
15249 case X86::BI__builtin_ia32_vec_set_v4di: {
15250 unsigned NumElts =
15251 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15252 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15253 Index &= NumElts - 1;
15254 // These builtins exist so we can ensure the index is an ICE and in range.
15255 // Otherwise we could just do this in the header file.
15256 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15257 }
15258 case X86::BI_mm_setcsr:
15259 case X86::BI__builtin_ia32_ldmxcsr: {
15260 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15261 Builder.CreateStore(Ops[0], Tmp);
15262 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15263 Tmp.getPointer());
15264 }
15265 case X86::BI_mm_getcsr:
15266 case X86::BI__builtin_ia32_stmxcsr: {
15268 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15269 Tmp.getPointer());
15270 return Builder.CreateLoad(Tmp, "stmxcsr");
15271 }
15272 case X86::BI__builtin_ia32_xsave:
15273 case X86::BI__builtin_ia32_xsave64:
15274 case X86::BI__builtin_ia32_xrstor:
15275 case X86::BI__builtin_ia32_xrstor64:
15276 case X86::BI__builtin_ia32_xsaveopt:
15277 case X86::BI__builtin_ia32_xsaveopt64:
15278 case X86::BI__builtin_ia32_xrstors:
15279 case X86::BI__builtin_ia32_xrstors64:
15280 case X86::BI__builtin_ia32_xsavec:
15281 case X86::BI__builtin_ia32_xsavec64:
15282 case X86::BI__builtin_ia32_xsaves:
15283 case X86::BI__builtin_ia32_xsaves64:
15284 case X86::BI__builtin_ia32_xsetbv:
15285 case X86::BI_xsetbv: {
15286 Intrinsic::ID ID;
15287#define INTRINSIC_X86_XSAVE_ID(NAME) \
15288 case X86::BI__builtin_ia32_##NAME: \
15289 ID = Intrinsic::x86_##NAME; \
15290 break
15291 switch (BuiltinID) {
15292 default: llvm_unreachable("Unsupported intrinsic!");
15294 INTRINSIC_X86_XSAVE_ID(xsave64);
15295 INTRINSIC_X86_XSAVE_ID(xrstor);
15296 INTRINSIC_X86_XSAVE_ID(xrstor64);
15297 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15298 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15299 INTRINSIC_X86_XSAVE_ID(xrstors);
15300 INTRINSIC_X86_XSAVE_ID(xrstors64);
15301 INTRINSIC_X86_XSAVE_ID(xsavec);
15302 INTRINSIC_X86_XSAVE_ID(xsavec64);
15303 INTRINSIC_X86_XSAVE_ID(xsaves);
15304 INTRINSIC_X86_XSAVE_ID(xsaves64);
15305 INTRINSIC_X86_XSAVE_ID(xsetbv);
15306 case X86::BI_xsetbv:
15307 ID = Intrinsic::x86_xsetbv;
15308 break;
15309 }
15310#undef INTRINSIC_X86_XSAVE_ID
15311 Value *Mhi = Builder.CreateTrunc(
15312 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15313 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15314 Ops[1] = Mhi;
15315 Ops.push_back(Mlo);
15316 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15317 }
15318 case X86::BI__builtin_ia32_xgetbv:
15319 case X86::BI_xgetbv:
15320 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15321 case X86::BI__builtin_ia32_storedqudi128_mask:
15322 case X86::BI__builtin_ia32_storedqusi128_mask:
15323 case X86::BI__builtin_ia32_storedquhi128_mask:
15324 case X86::BI__builtin_ia32_storedquqi128_mask:
15325 case X86::BI__builtin_ia32_storeupd128_mask:
15326 case X86::BI__builtin_ia32_storeups128_mask:
15327 case X86::BI__builtin_ia32_storedqudi256_mask:
15328 case X86::BI__builtin_ia32_storedqusi256_mask:
15329 case X86::BI__builtin_ia32_storedquhi256_mask:
15330 case X86::BI__builtin_ia32_storedquqi256_mask:
15331 case X86::BI__builtin_ia32_storeupd256_mask:
15332 case X86::BI__builtin_ia32_storeups256_mask:
15333 case X86::BI__builtin_ia32_storedqudi512_mask:
15334 case X86::BI__builtin_ia32_storedqusi512_mask:
15335 case X86::BI__builtin_ia32_storedquhi512_mask:
15336 case X86::BI__builtin_ia32_storedquqi512_mask:
15337 case X86::BI__builtin_ia32_storeupd512_mask:
15338 case X86::BI__builtin_ia32_storeups512_mask:
15339 return EmitX86MaskedStore(*this, Ops, Align(1));
15340
15341 case X86::BI__builtin_ia32_storesbf16128_mask:
15342 case X86::BI__builtin_ia32_storesh128_mask:
15343 case X86::BI__builtin_ia32_storess128_mask:
15344 case X86::BI__builtin_ia32_storesd128_mask:
15345 return EmitX86MaskedStore(*this, Ops, Align(1));
15346
15347 case X86::BI__builtin_ia32_cvtmask2b128:
15348 case X86::BI__builtin_ia32_cvtmask2b256:
15349 case X86::BI__builtin_ia32_cvtmask2b512:
15350 case X86::BI__builtin_ia32_cvtmask2w128:
15351 case X86::BI__builtin_ia32_cvtmask2w256:
15352 case X86::BI__builtin_ia32_cvtmask2w512:
15353 case X86::BI__builtin_ia32_cvtmask2d128:
15354 case X86::BI__builtin_ia32_cvtmask2d256:
15355 case X86::BI__builtin_ia32_cvtmask2d512:
15356 case X86::BI__builtin_ia32_cvtmask2q128:
15357 case X86::BI__builtin_ia32_cvtmask2q256:
15358 case X86::BI__builtin_ia32_cvtmask2q512:
15359 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15360
15361 case X86::BI__builtin_ia32_cvtb2mask128:
15362 case X86::BI__builtin_ia32_cvtb2mask256:
15363 case X86::BI__builtin_ia32_cvtb2mask512:
15364 case X86::BI__builtin_ia32_cvtw2mask128:
15365 case X86::BI__builtin_ia32_cvtw2mask256:
15366 case X86::BI__builtin_ia32_cvtw2mask512:
15367 case X86::BI__builtin_ia32_cvtd2mask128:
15368 case X86::BI__builtin_ia32_cvtd2mask256:
15369 case X86::BI__builtin_ia32_cvtd2mask512:
15370 case X86::BI__builtin_ia32_cvtq2mask128:
15371 case X86::BI__builtin_ia32_cvtq2mask256:
15372 case X86::BI__builtin_ia32_cvtq2mask512:
15373 return EmitX86ConvertToMask(*this, Ops[0]);
15374
15375 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15376 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15377 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15378 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15379 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15380 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15381 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15382 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15383 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15384 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15385 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15386 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15387 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15388 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15389 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15390 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15391 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15392 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15393 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15394 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15395 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15396 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15397 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15398 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15399 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15400 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15401
15402 case X86::BI__builtin_ia32_vfmaddss3:
15403 case X86::BI__builtin_ia32_vfmaddsd3:
15404 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15405 case X86::BI__builtin_ia32_vfmaddss3_mask:
15406 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15407 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15408 case X86::BI__builtin_ia32_vfmaddss:
15409 case X86::BI__builtin_ia32_vfmaddsd:
15410 return EmitScalarFMAExpr(*this, E, Ops,
15411 Constant::getNullValue(Ops[0]->getType()));
15412 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15413 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15414 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15415 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15416 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15417 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15418 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15419 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15420 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15421 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15422 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15423 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15424 /*NegAcc*/ true);
15425 case X86::BI__builtin_ia32_vfmaddph:
15426 case X86::BI__builtin_ia32_vfmaddps:
15427 case X86::BI__builtin_ia32_vfmaddpd:
15428 case X86::BI__builtin_ia32_vfmaddph256:
15429 case X86::BI__builtin_ia32_vfmaddps256:
15430 case X86::BI__builtin_ia32_vfmaddpd256:
15431 case X86::BI__builtin_ia32_vfmaddph512_mask:
15432 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15433 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15434 case X86::BI__builtin_ia32_vfmaddnepbh128:
15435 case X86::BI__builtin_ia32_vfmaddnepbh256:
15436 case X86::BI__builtin_ia32_vfmaddnepbh512:
15437 case X86::BI__builtin_ia32_vfmaddps512_mask:
15438 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15439 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15440 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15441 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15442 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15443 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15444 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15445 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15446 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15447 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15448 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15449 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15450 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15451 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15452 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15453 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15454 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15455 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15456 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15457 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15458 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15459 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15460 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15461 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15462 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15463 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15464 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15465 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15466 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15467 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15468 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15469 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15470 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15471 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15472 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15473 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15474 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15475 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15476 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15477 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15478 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15479 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15480 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15481 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15482 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15483 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15484
15485 case X86::BI__builtin_ia32_movdqa32store128_mask:
15486 case X86::BI__builtin_ia32_movdqa64store128_mask:
15487 case X86::BI__builtin_ia32_storeaps128_mask:
15488 case X86::BI__builtin_ia32_storeapd128_mask:
15489 case X86::BI__builtin_ia32_movdqa32store256_mask:
15490 case X86::BI__builtin_ia32_movdqa64store256_mask:
15491 case X86::BI__builtin_ia32_storeaps256_mask:
15492 case X86::BI__builtin_ia32_storeapd256_mask:
15493 case X86::BI__builtin_ia32_movdqa32store512_mask:
15494 case X86::BI__builtin_ia32_movdqa64store512_mask:
15495 case X86::BI__builtin_ia32_storeaps512_mask:
15496 case X86::BI__builtin_ia32_storeapd512_mask:
15497 return EmitX86MaskedStore(
15498 *this, Ops,
15499 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15500
15501 case X86::BI__builtin_ia32_loadups128_mask:
15502 case X86::BI__builtin_ia32_loadups256_mask:
15503 case X86::BI__builtin_ia32_loadups512_mask:
15504 case X86::BI__builtin_ia32_loadupd128_mask:
15505 case X86::BI__builtin_ia32_loadupd256_mask:
15506 case X86::BI__builtin_ia32_loadupd512_mask:
15507 case X86::BI__builtin_ia32_loaddquqi128_mask:
15508 case X86::BI__builtin_ia32_loaddquqi256_mask:
15509 case X86::BI__builtin_ia32_loaddquqi512_mask:
15510 case X86::BI__builtin_ia32_loaddquhi128_mask:
15511 case X86::BI__builtin_ia32_loaddquhi256_mask:
15512 case X86::BI__builtin_ia32_loaddquhi512_mask:
15513 case X86::BI__builtin_ia32_loaddqusi128_mask:
15514 case X86::BI__builtin_ia32_loaddqusi256_mask:
15515 case X86::BI__builtin_ia32_loaddqusi512_mask:
15516 case X86::BI__builtin_ia32_loaddqudi128_mask:
15517 case X86::BI__builtin_ia32_loaddqudi256_mask:
15518 case X86::BI__builtin_ia32_loaddqudi512_mask:
15519 return EmitX86MaskedLoad(*this, Ops, Align(1));
15520
15521 case X86::BI__builtin_ia32_loadsbf16128_mask:
15522 case X86::BI__builtin_ia32_loadsh128_mask:
15523 case X86::BI__builtin_ia32_loadss128_mask:
15524 case X86::BI__builtin_ia32_loadsd128_mask:
15525 return EmitX86MaskedLoad(*this, Ops, Align(1));
15526
15527 case X86::BI__builtin_ia32_loadaps128_mask:
15528 case X86::BI__builtin_ia32_loadaps256_mask:
15529 case X86::BI__builtin_ia32_loadaps512_mask:
15530 case X86::BI__builtin_ia32_loadapd128_mask:
15531 case X86::BI__builtin_ia32_loadapd256_mask:
15532 case X86::BI__builtin_ia32_loadapd512_mask:
15533 case X86::BI__builtin_ia32_movdqa32load128_mask:
15534 case X86::BI__builtin_ia32_movdqa32load256_mask:
15535 case X86::BI__builtin_ia32_movdqa32load512_mask:
15536 case X86::BI__builtin_ia32_movdqa64load128_mask:
15537 case X86::BI__builtin_ia32_movdqa64load256_mask:
15538 case X86::BI__builtin_ia32_movdqa64load512_mask:
15539 return EmitX86MaskedLoad(
15540 *this, Ops,
15541 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15542
15543 case X86::BI__builtin_ia32_expandloaddf128_mask:
15544 case X86::BI__builtin_ia32_expandloaddf256_mask:
15545 case X86::BI__builtin_ia32_expandloaddf512_mask:
15546 case X86::BI__builtin_ia32_expandloadsf128_mask:
15547 case X86::BI__builtin_ia32_expandloadsf256_mask:
15548 case X86::BI__builtin_ia32_expandloadsf512_mask:
15549 case X86::BI__builtin_ia32_expandloaddi128_mask:
15550 case X86::BI__builtin_ia32_expandloaddi256_mask:
15551 case X86::BI__builtin_ia32_expandloaddi512_mask:
15552 case X86::BI__builtin_ia32_expandloadsi128_mask:
15553 case X86::BI__builtin_ia32_expandloadsi256_mask:
15554 case X86::BI__builtin_ia32_expandloadsi512_mask:
15555 case X86::BI__builtin_ia32_expandloadhi128_mask:
15556 case X86::BI__builtin_ia32_expandloadhi256_mask:
15557 case X86::BI__builtin_ia32_expandloadhi512_mask:
15558 case X86::BI__builtin_ia32_expandloadqi128_mask:
15559 case X86::BI__builtin_ia32_expandloadqi256_mask:
15560 case X86::BI__builtin_ia32_expandloadqi512_mask:
15561 return EmitX86ExpandLoad(*this, Ops);
15562
15563 case X86::BI__builtin_ia32_compressstoredf128_mask:
15564 case X86::BI__builtin_ia32_compressstoredf256_mask:
15565 case X86::BI__builtin_ia32_compressstoredf512_mask:
15566 case X86::BI__builtin_ia32_compressstoresf128_mask:
15567 case X86::BI__builtin_ia32_compressstoresf256_mask:
15568 case X86::BI__builtin_ia32_compressstoresf512_mask:
15569 case X86::BI__builtin_ia32_compressstoredi128_mask:
15570 case X86::BI__builtin_ia32_compressstoredi256_mask:
15571 case X86::BI__builtin_ia32_compressstoredi512_mask:
15572 case X86::BI__builtin_ia32_compressstoresi128_mask:
15573 case X86::BI__builtin_ia32_compressstoresi256_mask:
15574 case X86::BI__builtin_ia32_compressstoresi512_mask:
15575 case X86::BI__builtin_ia32_compressstorehi128_mask:
15576 case X86::BI__builtin_ia32_compressstorehi256_mask:
15577 case X86::BI__builtin_ia32_compressstorehi512_mask:
15578 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15579 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15580 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15581 return EmitX86CompressStore(*this, Ops);
15582
15583 case X86::BI__builtin_ia32_expanddf128_mask:
15584 case X86::BI__builtin_ia32_expanddf256_mask:
15585 case X86::BI__builtin_ia32_expanddf512_mask:
15586 case X86::BI__builtin_ia32_expandsf128_mask:
15587 case X86::BI__builtin_ia32_expandsf256_mask:
15588 case X86::BI__builtin_ia32_expandsf512_mask:
15589 case X86::BI__builtin_ia32_expanddi128_mask:
15590 case X86::BI__builtin_ia32_expanddi256_mask:
15591 case X86::BI__builtin_ia32_expanddi512_mask:
15592 case X86::BI__builtin_ia32_expandsi128_mask:
15593 case X86::BI__builtin_ia32_expandsi256_mask:
15594 case X86::BI__builtin_ia32_expandsi512_mask:
15595 case X86::BI__builtin_ia32_expandhi128_mask:
15596 case X86::BI__builtin_ia32_expandhi256_mask:
15597 case X86::BI__builtin_ia32_expandhi512_mask:
15598 case X86::BI__builtin_ia32_expandqi128_mask:
15599 case X86::BI__builtin_ia32_expandqi256_mask:
15600 case X86::BI__builtin_ia32_expandqi512_mask:
15601 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15602
15603 case X86::BI__builtin_ia32_compressdf128_mask:
15604 case X86::BI__builtin_ia32_compressdf256_mask:
15605 case X86::BI__builtin_ia32_compressdf512_mask:
15606 case X86::BI__builtin_ia32_compresssf128_mask:
15607 case X86::BI__builtin_ia32_compresssf256_mask:
15608 case X86::BI__builtin_ia32_compresssf512_mask:
15609 case X86::BI__builtin_ia32_compressdi128_mask:
15610 case X86::BI__builtin_ia32_compressdi256_mask:
15611 case X86::BI__builtin_ia32_compressdi512_mask:
15612 case X86::BI__builtin_ia32_compresssi128_mask:
15613 case X86::BI__builtin_ia32_compresssi256_mask:
15614 case X86::BI__builtin_ia32_compresssi512_mask:
15615 case X86::BI__builtin_ia32_compresshi128_mask:
15616 case X86::BI__builtin_ia32_compresshi256_mask:
15617 case X86::BI__builtin_ia32_compresshi512_mask:
15618 case X86::BI__builtin_ia32_compressqi128_mask:
15619 case X86::BI__builtin_ia32_compressqi256_mask:
15620 case X86::BI__builtin_ia32_compressqi512_mask:
15621 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15622
15623 case X86::BI__builtin_ia32_gather3div2df:
15624 case X86::BI__builtin_ia32_gather3div2di:
15625 case X86::BI__builtin_ia32_gather3div4df:
15626 case X86::BI__builtin_ia32_gather3div4di:
15627 case X86::BI__builtin_ia32_gather3div4sf:
15628 case X86::BI__builtin_ia32_gather3div4si:
15629 case X86::BI__builtin_ia32_gather3div8sf:
15630 case X86::BI__builtin_ia32_gather3div8si:
15631 case X86::BI__builtin_ia32_gather3siv2df:
15632 case X86::BI__builtin_ia32_gather3siv2di:
15633 case X86::BI__builtin_ia32_gather3siv4df:
15634 case X86::BI__builtin_ia32_gather3siv4di:
15635 case X86::BI__builtin_ia32_gather3siv4sf:
15636 case X86::BI__builtin_ia32_gather3siv4si:
15637 case X86::BI__builtin_ia32_gather3siv8sf:
15638 case X86::BI__builtin_ia32_gather3siv8si:
15639 case X86::BI__builtin_ia32_gathersiv8df:
15640 case X86::BI__builtin_ia32_gathersiv16sf:
15641 case X86::BI__builtin_ia32_gatherdiv8df:
15642 case X86::BI__builtin_ia32_gatherdiv16sf:
15643 case X86::BI__builtin_ia32_gathersiv8di:
15644 case X86::BI__builtin_ia32_gathersiv16si:
15645 case X86::BI__builtin_ia32_gatherdiv8di:
15646 case X86::BI__builtin_ia32_gatherdiv16si: {
15647 Intrinsic::ID IID;
15648 switch (BuiltinID) {
15649 default: llvm_unreachable("Unexpected builtin");
15650 case X86::BI__builtin_ia32_gather3div2df:
15651 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15652 break;
15653 case X86::BI__builtin_ia32_gather3div2di:
15654 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15655 break;
15656 case X86::BI__builtin_ia32_gather3div4df:
15657 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15658 break;
15659 case X86::BI__builtin_ia32_gather3div4di:
15660 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15661 break;
15662 case X86::BI__builtin_ia32_gather3div4sf:
15663 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15664 break;
15665 case X86::BI__builtin_ia32_gather3div4si:
15666 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15667 break;
15668 case X86::BI__builtin_ia32_gather3div8sf:
15669 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15670 break;
15671 case X86::BI__builtin_ia32_gather3div8si:
15672 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15673 break;
15674 case X86::BI__builtin_ia32_gather3siv2df:
15675 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15676 break;
15677 case X86::BI__builtin_ia32_gather3siv2di:
15678 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15679 break;
15680 case X86::BI__builtin_ia32_gather3siv4df:
15681 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15682 break;
15683 case X86::BI__builtin_ia32_gather3siv4di:
15684 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15685 break;
15686 case X86::BI__builtin_ia32_gather3siv4sf:
15687 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15688 break;
15689 case X86::BI__builtin_ia32_gather3siv4si:
15690 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15691 break;
15692 case X86::BI__builtin_ia32_gather3siv8sf:
15693 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15694 break;
15695 case X86::BI__builtin_ia32_gather3siv8si:
15696 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15697 break;
15698 case X86::BI__builtin_ia32_gathersiv8df:
15699 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15700 break;
15701 case X86::BI__builtin_ia32_gathersiv16sf:
15702 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15703 break;
15704 case X86::BI__builtin_ia32_gatherdiv8df:
15705 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15706 break;
15707 case X86::BI__builtin_ia32_gatherdiv16sf:
15708 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15709 break;
15710 case X86::BI__builtin_ia32_gathersiv8di:
15711 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15712 break;
15713 case X86::BI__builtin_ia32_gathersiv16si:
15714 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15715 break;
15716 case X86::BI__builtin_ia32_gatherdiv8di:
15717 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15718 break;
15719 case X86::BI__builtin_ia32_gatherdiv16si:
15720 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15721 break;
15722 }
15723
15724 unsigned MinElts = std::min(
15725 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15726 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15727 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15728 Function *Intr = CGM.getIntrinsic(IID);
15729 return Builder.CreateCall(Intr, Ops);
15730 }
15731
15732 case X86::BI__builtin_ia32_scattersiv8df:
15733 case X86::BI__builtin_ia32_scattersiv16sf:
15734 case X86::BI__builtin_ia32_scatterdiv8df:
15735 case X86::BI__builtin_ia32_scatterdiv16sf:
15736 case X86::BI__builtin_ia32_scattersiv8di:
15737 case X86::BI__builtin_ia32_scattersiv16si:
15738 case X86::BI__builtin_ia32_scatterdiv8di:
15739 case X86::BI__builtin_ia32_scatterdiv16si:
15740 case X86::BI__builtin_ia32_scatterdiv2df:
15741 case X86::BI__builtin_ia32_scatterdiv2di:
15742 case X86::BI__builtin_ia32_scatterdiv4df:
15743 case X86::BI__builtin_ia32_scatterdiv4di:
15744 case X86::BI__builtin_ia32_scatterdiv4sf:
15745 case X86::BI__builtin_ia32_scatterdiv4si:
15746 case X86::BI__builtin_ia32_scatterdiv8sf:
15747 case X86::BI__builtin_ia32_scatterdiv8si:
15748 case X86::BI__builtin_ia32_scattersiv2df:
15749 case X86::BI__builtin_ia32_scattersiv2di:
15750 case X86::BI__builtin_ia32_scattersiv4df:
15751 case X86::BI__builtin_ia32_scattersiv4di:
15752 case X86::BI__builtin_ia32_scattersiv4sf:
15753 case X86::BI__builtin_ia32_scattersiv4si:
15754 case X86::BI__builtin_ia32_scattersiv8sf:
15755 case X86::BI__builtin_ia32_scattersiv8si: {
15756 Intrinsic::ID IID;
15757 switch (BuiltinID) {
15758 default: llvm_unreachable("Unexpected builtin");
15759 case X86::BI__builtin_ia32_scattersiv8df:
15760 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15761 break;
15762 case X86::BI__builtin_ia32_scattersiv16sf:
15763 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15764 break;
15765 case X86::BI__builtin_ia32_scatterdiv8df:
15766 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15767 break;
15768 case X86::BI__builtin_ia32_scatterdiv16sf:
15769 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15770 break;
15771 case X86::BI__builtin_ia32_scattersiv8di:
15772 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15773 break;
15774 case X86::BI__builtin_ia32_scattersiv16si:
15775 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15776 break;
15777 case X86::BI__builtin_ia32_scatterdiv8di:
15778 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15779 break;
15780 case X86::BI__builtin_ia32_scatterdiv16si:
15781 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15782 break;
15783 case X86::BI__builtin_ia32_scatterdiv2df:
15784 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15785 break;
15786 case X86::BI__builtin_ia32_scatterdiv2di:
15787 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15788 break;
15789 case X86::BI__builtin_ia32_scatterdiv4df:
15790 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15791 break;
15792 case X86::BI__builtin_ia32_scatterdiv4di:
15793 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15794 break;
15795 case X86::BI__builtin_ia32_scatterdiv4sf:
15796 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15797 break;
15798 case X86::BI__builtin_ia32_scatterdiv4si:
15799 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15800 break;
15801 case X86::BI__builtin_ia32_scatterdiv8sf:
15802 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15803 break;
15804 case X86::BI__builtin_ia32_scatterdiv8si:
15805 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15806 break;
15807 case X86::BI__builtin_ia32_scattersiv2df:
15808 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15809 break;
15810 case X86::BI__builtin_ia32_scattersiv2di:
15811 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15812 break;
15813 case X86::BI__builtin_ia32_scattersiv4df:
15814 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15815 break;
15816 case X86::BI__builtin_ia32_scattersiv4di:
15817 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15818 break;
15819 case X86::BI__builtin_ia32_scattersiv4sf:
15820 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15821 break;
15822 case X86::BI__builtin_ia32_scattersiv4si:
15823 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15824 break;
15825 case X86::BI__builtin_ia32_scattersiv8sf:
15826 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15827 break;
15828 case X86::BI__builtin_ia32_scattersiv8si:
15829 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15830 break;
15831 }
15832
15833 unsigned MinElts = std::min(
15834 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15835 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15836 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15837 Function *Intr = CGM.getIntrinsic(IID);
15838 return Builder.CreateCall(Intr, Ops);
15839 }
15840
15841 case X86::BI__builtin_ia32_vextractf128_pd256:
15842 case X86::BI__builtin_ia32_vextractf128_ps256:
15843 case X86::BI__builtin_ia32_vextractf128_si256:
15844 case X86::BI__builtin_ia32_extract128i256:
15845 case X86::BI__builtin_ia32_extractf64x4_mask:
15846 case X86::BI__builtin_ia32_extractf32x4_mask:
15847 case X86::BI__builtin_ia32_extracti64x4_mask:
15848 case X86::BI__builtin_ia32_extracti32x4_mask:
15849 case X86::BI__builtin_ia32_extractf32x8_mask:
15850 case X86::BI__builtin_ia32_extracti32x8_mask:
15851 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15852 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15853 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15854 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15855 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15856 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15857 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15858 unsigned NumElts = DstTy->getNumElements();
15859 unsigned SrcNumElts =
15860 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15861 unsigned SubVectors = SrcNumElts / NumElts;
15862 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15863 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15864 Index &= SubVectors - 1; // Remove any extra bits.
15865 Index *= NumElts;
15866
15867 int Indices[16];
15868 for (unsigned i = 0; i != NumElts; ++i)
15869 Indices[i] = i + Index;
15870
15871 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15872 "extract");
15873
15874 if (Ops.size() == 4)
15875 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15876
15877 return Res;
15878 }
15879 case X86::BI__builtin_ia32_vinsertf128_pd256:
15880 case X86::BI__builtin_ia32_vinsertf128_ps256:
15881 case X86::BI__builtin_ia32_vinsertf128_si256:
15882 case X86::BI__builtin_ia32_insert128i256:
15883 case X86::BI__builtin_ia32_insertf64x4:
15884 case X86::BI__builtin_ia32_insertf32x4:
15885 case X86::BI__builtin_ia32_inserti64x4:
15886 case X86::BI__builtin_ia32_inserti32x4:
15887 case X86::BI__builtin_ia32_insertf32x8:
15888 case X86::BI__builtin_ia32_inserti32x8:
15889 case X86::BI__builtin_ia32_insertf32x4_256:
15890 case X86::BI__builtin_ia32_inserti32x4_256:
15891 case X86::BI__builtin_ia32_insertf64x2_256:
15892 case X86::BI__builtin_ia32_inserti64x2_256:
15893 case X86::BI__builtin_ia32_insertf64x2_512:
15894 case X86::BI__builtin_ia32_inserti64x2_512: {
15895 unsigned DstNumElts =
15896 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15897 unsigned SrcNumElts =
15898 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15899 unsigned SubVectors = DstNumElts / SrcNumElts;
15900 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15901 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15902 Index &= SubVectors - 1; // Remove any extra bits.
15903 Index *= SrcNumElts;
15904
15905 int Indices[16];
15906 for (unsigned i = 0; i != DstNumElts; ++i)
15907 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15908
15909 Value *Op1 = Builder.CreateShuffleVector(
15910 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15911
15912 for (unsigned i = 0; i != DstNumElts; ++i) {
15913 if (i >= Index && i < (Index + SrcNumElts))
15914 Indices[i] = (i - Index) + DstNumElts;
15915 else
15916 Indices[i] = i;
15917 }
15918
15919 return Builder.CreateShuffleVector(Ops[0], Op1,
15920 ArrayRef(Indices, DstNumElts), "insert");
15921 }
15922 case X86::BI__builtin_ia32_pmovqd512_mask:
15923 case X86::BI__builtin_ia32_pmovwb512_mask: {
15924 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15925 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15926 }
15927 case X86::BI__builtin_ia32_pmovdb512_mask:
15928 case X86::BI__builtin_ia32_pmovdw512_mask:
15929 case X86::BI__builtin_ia32_pmovqw512_mask: {
15930 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15931 if (C->isAllOnesValue())
15932 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15933
15934 Intrinsic::ID IID;
15935 switch (BuiltinID) {
15936 default: llvm_unreachable("Unsupported intrinsic!");
15937 case X86::BI__builtin_ia32_pmovdb512_mask:
15938 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15939 break;
15940 case X86::BI__builtin_ia32_pmovdw512_mask:
15941 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15942 break;
15943 case X86::BI__builtin_ia32_pmovqw512_mask:
15944 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15945 break;
15946 }
15947
15948 Function *Intr = CGM.getIntrinsic(IID);
15949 return Builder.CreateCall(Intr, Ops);
15950 }
15951 case X86::BI__builtin_ia32_pblendw128:
15952 case X86::BI__builtin_ia32_blendpd:
15953 case X86::BI__builtin_ia32_blendps:
15954 case X86::BI__builtin_ia32_blendpd256:
15955 case X86::BI__builtin_ia32_blendps256:
15956 case X86::BI__builtin_ia32_pblendw256:
15957 case X86::BI__builtin_ia32_pblendd128:
15958 case X86::BI__builtin_ia32_pblendd256: {
15959 unsigned NumElts =
15960 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15961 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15962
15963 int Indices[16];
15964 // If there are more than 8 elements, the immediate is used twice so make
15965 // sure we handle that.
15966 for (unsigned i = 0; i != NumElts; ++i)
15967 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15968
15969 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15970 ArrayRef(Indices, NumElts), "blend");
15971 }
15972 case X86::BI__builtin_ia32_pshuflw:
15973 case X86::BI__builtin_ia32_pshuflw256:
15974 case X86::BI__builtin_ia32_pshuflw512: {
15975 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15976 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15977 unsigned NumElts = Ty->getNumElements();
15978
15979 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15980 Imm = (Imm & 0xff) * 0x01010101;
15981
15982 int Indices[32];
15983 for (unsigned l = 0; l != NumElts; l += 8) {
15984 for (unsigned i = 0; i != 4; ++i) {
15985 Indices[l + i] = l + (Imm & 3);
15986 Imm >>= 2;
15987 }
15988 for (unsigned i = 4; i != 8; ++i)
15989 Indices[l + i] = l + i;
15990 }
15991
15992 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15993 "pshuflw");
15994 }
15995 case X86::BI__builtin_ia32_pshufhw:
15996 case X86::BI__builtin_ia32_pshufhw256:
15997 case X86::BI__builtin_ia32_pshufhw512: {
15998 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15999 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16000 unsigned NumElts = Ty->getNumElements();
16001
16002 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16003 Imm = (Imm & 0xff) * 0x01010101;
16004
16005 int Indices[32];
16006 for (unsigned l = 0; l != NumElts; l += 8) {
16007 for (unsigned i = 0; i != 4; ++i)
16008 Indices[l + i] = l + i;
16009 for (unsigned i = 4; i != 8; ++i) {
16010 Indices[l + i] = l + 4 + (Imm & 3);
16011 Imm >>= 2;
16012 }
16013 }
16014
16015 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16016 "pshufhw");
16017 }
16018 case X86::BI__builtin_ia32_pshufd:
16019 case X86::BI__builtin_ia32_pshufd256:
16020 case X86::BI__builtin_ia32_pshufd512:
16021 case X86::BI__builtin_ia32_vpermilpd:
16022 case X86::BI__builtin_ia32_vpermilps:
16023 case X86::BI__builtin_ia32_vpermilpd256:
16024 case X86::BI__builtin_ia32_vpermilps256:
16025 case X86::BI__builtin_ia32_vpermilpd512:
16026 case X86::BI__builtin_ia32_vpermilps512: {
16027 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16028 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16029 unsigned NumElts = Ty->getNumElements();
16030 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16031 unsigned NumLaneElts = NumElts / NumLanes;
16032
16033 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16034 Imm = (Imm & 0xff) * 0x01010101;
16035
16036 int Indices[16];
16037 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16038 for (unsigned i = 0; i != NumLaneElts; ++i) {
16039 Indices[i + l] = (Imm % NumLaneElts) + l;
16040 Imm /= NumLaneElts;
16041 }
16042 }
16043
16044 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16045 "permil");
16046 }
16047 case X86::BI__builtin_ia32_shufpd:
16048 case X86::BI__builtin_ia32_shufpd256:
16049 case X86::BI__builtin_ia32_shufpd512:
16050 case X86::BI__builtin_ia32_shufps:
16051 case X86::BI__builtin_ia32_shufps256:
16052 case X86::BI__builtin_ia32_shufps512: {
16053 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16054 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16055 unsigned NumElts = Ty->getNumElements();
16056 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16057 unsigned NumLaneElts = NumElts / NumLanes;
16058
16059 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16060 Imm = (Imm & 0xff) * 0x01010101;
16061
16062 int Indices[16];
16063 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16064 for (unsigned i = 0; i != NumLaneElts; ++i) {
16065 unsigned Index = Imm % NumLaneElts;
16066 Imm /= NumLaneElts;
16067 if (i >= (NumLaneElts / 2))
16068 Index += NumElts;
16069 Indices[l + i] = l + Index;
16070 }
16071 }
16072
16073 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16074 ArrayRef(Indices, NumElts), "shufp");
16075 }
16076 case X86::BI__builtin_ia32_permdi256:
16077 case X86::BI__builtin_ia32_permdf256:
16078 case X86::BI__builtin_ia32_permdi512:
16079 case X86::BI__builtin_ia32_permdf512: {
16080 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16081 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16082 unsigned NumElts = Ty->getNumElements();
16083
16084 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
16085 int Indices[8];
16086 for (unsigned l = 0; l != NumElts; l += 4)
16087 for (unsigned i = 0; i != 4; ++i)
16088 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
16089
16090 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16091 "perm");
16092 }
16093 case X86::BI__builtin_ia32_palignr128:
16094 case X86::BI__builtin_ia32_palignr256:
16095 case X86::BI__builtin_ia32_palignr512: {
16096 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16097
16098 unsigned NumElts =
16099 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16100 assert(NumElts % 16 == 0);
16101
16102 // If palignr is shifting the pair of vectors more than the size of two
16103 // lanes, emit zero.
16104 if (ShiftVal >= 32)
16105 return llvm::Constant::getNullValue(ConvertType(E->getType()));
16106
16107 // If palignr is shifting the pair of input vectors more than one lane,
16108 // but less than two lanes, convert to shifting in zeroes.
16109 if (ShiftVal > 16) {
16110 ShiftVal -= 16;
16111 Ops[1] = Ops[0];
16112 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
16113 }
16114
16115 int Indices[64];
16116 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16117 for (unsigned l = 0; l != NumElts; l += 16) {
16118 for (unsigned i = 0; i != 16; ++i) {
16119 unsigned Idx = ShiftVal + i;
16120 if (Idx >= 16)
16121 Idx += NumElts - 16; // End of lane, switch operand.
16122 Indices[l + i] = Idx + l;
16123 }
16124 }
16125
16126 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16127 ArrayRef(Indices, NumElts), "palignr");
16128 }
16129 case X86::BI__builtin_ia32_alignd128:
16130 case X86::BI__builtin_ia32_alignd256:
16131 case X86::BI__builtin_ia32_alignd512:
16132 case X86::BI__builtin_ia32_alignq128:
16133 case X86::BI__builtin_ia32_alignq256:
16134 case X86::BI__builtin_ia32_alignq512: {
16135 unsigned NumElts =
16136 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16137 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16138
16139 // Mask the shift amount to width of a vector.
16140 ShiftVal &= NumElts - 1;
16141
16142 int Indices[16];
16143 for (unsigned i = 0; i != NumElts; ++i)
16144 Indices[i] = i + ShiftVal;
16145
16146 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16147 ArrayRef(Indices, NumElts), "valign");
16148 }
16149 case X86::BI__builtin_ia32_shuf_f32x4_256:
16150 case X86::BI__builtin_ia32_shuf_f64x2_256:
16151 case X86::BI__builtin_ia32_shuf_i32x4_256:
16152 case X86::BI__builtin_ia32_shuf_i64x2_256:
16153 case X86::BI__builtin_ia32_shuf_f32x4:
16154 case X86::BI__builtin_ia32_shuf_f64x2:
16155 case X86::BI__builtin_ia32_shuf_i32x4:
16156 case X86::BI__builtin_ia32_shuf_i64x2: {
16157 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16158 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16159 unsigned NumElts = Ty->getNumElements();
16160 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16161 unsigned NumLaneElts = NumElts / NumLanes;
16162
16163 int Indices[16];
16164 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16165 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16166 Imm /= NumLanes; // Discard the bits we just used.
16167 if (l >= (NumElts / 2))
16168 Index += NumElts; // Switch to other source.
16169 for (unsigned i = 0; i != NumLaneElts; ++i) {
16170 Indices[l + i] = Index + i;
16171 }
16172 }
16173
16174 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16175 ArrayRef(Indices, NumElts), "shuf");
16176 }
16177
16178 case X86::BI__builtin_ia32_vperm2f128_pd256:
16179 case X86::BI__builtin_ia32_vperm2f128_ps256:
16180 case X86::BI__builtin_ia32_vperm2f128_si256:
16181 case X86::BI__builtin_ia32_permti256: {
16182 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16183 unsigned NumElts =
16184 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16185
16186 // This takes a very simple approach since there are two lanes and a
16187 // shuffle can have 2 inputs. So we reserve the first input for the first
16188 // lane and the second input for the second lane. This may result in
16189 // duplicate sources, but this can be dealt with in the backend.
16190
16191 Value *OutOps[2];
16192 int Indices[8];
16193 for (unsigned l = 0; l != 2; ++l) {
16194 // Determine the source for this lane.
16195 if (Imm & (1 << ((l * 4) + 3)))
16196 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16197 else if (Imm & (1 << ((l * 4) + 1)))
16198 OutOps[l] = Ops[1];
16199 else
16200 OutOps[l] = Ops[0];
16201
16202 for (unsigned i = 0; i != NumElts/2; ++i) {
16203 // Start with ith element of the source for this lane.
16204 unsigned Idx = (l * NumElts) + i;
16205 // If bit 0 of the immediate half is set, switch to the high half of
16206 // the source.
16207 if (Imm & (1 << (l * 4)))
16208 Idx += NumElts/2;
16209 Indices[(l * (NumElts/2)) + i] = Idx;
16210 }
16211 }
16212
16213 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16214 ArrayRef(Indices, NumElts), "vperm");
16215 }
16216
16217 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16218 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16219 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16220 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16221 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16222 // Builtin type is vXi64 so multiply by 8 to get bytes.
16223 unsigned NumElts = ResultType->getNumElements() * 8;
16224
16225 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16226 if (ShiftVal >= 16)
16227 return llvm::Constant::getNullValue(ResultType);
16228
16229 int Indices[64];
16230 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16231 for (unsigned l = 0; l != NumElts; l += 16) {
16232 for (unsigned i = 0; i != 16; ++i) {
16233 unsigned Idx = NumElts + i - ShiftVal;
16234 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16235 Indices[l + i] = Idx + l;
16236 }
16237 }
16238
16239 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16240 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16241 Value *Zero = llvm::Constant::getNullValue(VecTy);
16242 Value *SV = Builder.CreateShuffleVector(
16243 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16244 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16245 }
16246 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16247 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16248 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16249 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16250 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16251 // Builtin type is vXi64 so multiply by 8 to get bytes.
16252 unsigned NumElts = ResultType->getNumElements() * 8;
16253
16254 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16255 if (ShiftVal >= 16)
16256 return llvm::Constant::getNullValue(ResultType);
16257
16258 int Indices[64];
16259 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16260 for (unsigned l = 0; l != NumElts; l += 16) {
16261 for (unsigned i = 0; i != 16; ++i) {
16262 unsigned Idx = i + ShiftVal;
16263 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16264 Indices[l + i] = Idx + l;
16265 }
16266 }
16267
16268 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16269 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16270 Value *Zero = llvm::Constant::getNullValue(VecTy);
16271 Value *SV = Builder.CreateShuffleVector(
16272 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16273 return Builder.CreateBitCast(SV, ResultType, "cast");
16274 }
16275 case X86::BI__builtin_ia32_kshiftliqi:
16276 case X86::BI__builtin_ia32_kshiftlihi:
16277 case X86::BI__builtin_ia32_kshiftlisi:
16278 case X86::BI__builtin_ia32_kshiftlidi: {
16279 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16280 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16281
16282 if (ShiftVal >= NumElts)
16283 return llvm::Constant::getNullValue(Ops[0]->getType());
16284
16285 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16286
16287 int Indices[64];
16288 for (unsigned i = 0; i != NumElts; ++i)
16289 Indices[i] = NumElts + i - ShiftVal;
16290
16291 Value *Zero = llvm::Constant::getNullValue(In->getType());
16292 Value *SV = Builder.CreateShuffleVector(
16293 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16294 return Builder.CreateBitCast(SV, Ops[0]->getType());
16295 }
16296 case X86::BI__builtin_ia32_kshiftriqi:
16297 case X86::BI__builtin_ia32_kshiftrihi:
16298 case X86::BI__builtin_ia32_kshiftrisi:
16299 case X86::BI__builtin_ia32_kshiftridi: {
16300 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16301 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16302
16303 if (ShiftVal >= NumElts)
16304 return llvm::Constant::getNullValue(Ops[0]->getType());
16305
16306 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16307
16308 int Indices[64];
16309 for (unsigned i = 0; i != NumElts; ++i)
16310 Indices[i] = i + ShiftVal;
16311
16312 Value *Zero = llvm::Constant::getNullValue(In->getType());
16313 Value *SV = Builder.CreateShuffleVector(
16314 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16315 return Builder.CreateBitCast(SV, Ops[0]->getType());
16316 }
16317 case X86::BI__builtin_ia32_movnti:
16318 case X86::BI__builtin_ia32_movnti64:
16319 case X86::BI__builtin_ia32_movntsd:
16320 case X86::BI__builtin_ia32_movntss: {
16321 llvm::MDNode *Node = llvm::MDNode::get(
16322 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16323
16324 Value *Ptr = Ops[0];
16325 Value *Src = Ops[1];
16326
16327 // Extract the 0'th element of the source vector.
16328 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16329 BuiltinID == X86::BI__builtin_ia32_movntss)
16330 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16331
16332 // Unaligned nontemporal store of the scalar value.
16333 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16334 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16335 SI->setAlignment(llvm::Align(1));
16336 return SI;
16337 }
16338 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16339 case X86::BI__builtin_ia32_vprotb:
16340 case X86::BI__builtin_ia32_vprotw:
16341 case X86::BI__builtin_ia32_vprotd:
16342 case X86::BI__builtin_ia32_vprotq:
16343 case X86::BI__builtin_ia32_vprotbi:
16344 case X86::BI__builtin_ia32_vprotwi:
16345 case X86::BI__builtin_ia32_vprotdi:
16346 case X86::BI__builtin_ia32_vprotqi:
16347 case X86::BI__builtin_ia32_prold128:
16348 case X86::BI__builtin_ia32_prold256:
16349 case X86::BI__builtin_ia32_prold512:
16350 case X86::BI__builtin_ia32_prolq128:
16351 case X86::BI__builtin_ia32_prolq256:
16352 case X86::BI__builtin_ia32_prolq512:
16353 case X86::BI__builtin_ia32_prolvd128:
16354 case X86::BI__builtin_ia32_prolvd256:
16355 case X86::BI__builtin_ia32_prolvd512:
16356 case X86::BI__builtin_ia32_prolvq128:
16357 case X86::BI__builtin_ia32_prolvq256:
16358 case X86::BI__builtin_ia32_prolvq512:
16359 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16360 case X86::BI__builtin_ia32_prord128:
16361 case X86::BI__builtin_ia32_prord256:
16362 case X86::BI__builtin_ia32_prord512:
16363 case X86::BI__builtin_ia32_prorq128:
16364 case X86::BI__builtin_ia32_prorq256:
16365 case X86::BI__builtin_ia32_prorq512:
16366 case X86::BI__builtin_ia32_prorvd128:
16367 case X86::BI__builtin_ia32_prorvd256:
16368 case X86::BI__builtin_ia32_prorvd512:
16369 case X86::BI__builtin_ia32_prorvq128:
16370 case X86::BI__builtin_ia32_prorvq256:
16371 case X86::BI__builtin_ia32_prorvq512:
16372 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16373 case X86::BI__builtin_ia32_selectb_128:
16374 case X86::BI__builtin_ia32_selectb_256:
16375 case X86::BI__builtin_ia32_selectb_512:
16376 case X86::BI__builtin_ia32_selectw_128:
16377 case X86::BI__builtin_ia32_selectw_256:
16378 case X86::BI__builtin_ia32_selectw_512:
16379 case X86::BI__builtin_ia32_selectd_128:
16380 case X86::BI__builtin_ia32_selectd_256:
16381 case X86::BI__builtin_ia32_selectd_512:
16382 case X86::BI__builtin_ia32_selectq_128:
16383 case X86::BI__builtin_ia32_selectq_256:
16384 case X86::BI__builtin_ia32_selectq_512:
16385 case X86::BI__builtin_ia32_selectph_128:
16386 case X86::BI__builtin_ia32_selectph_256:
16387 case X86::BI__builtin_ia32_selectph_512:
16388 case X86::BI__builtin_ia32_selectpbf_128:
16389 case X86::BI__builtin_ia32_selectpbf_256:
16390 case X86::BI__builtin_ia32_selectpbf_512:
16391 case X86::BI__builtin_ia32_selectps_128:
16392 case X86::BI__builtin_ia32_selectps_256:
16393 case X86::BI__builtin_ia32_selectps_512:
16394 case X86::BI__builtin_ia32_selectpd_128:
16395 case X86::BI__builtin_ia32_selectpd_256:
16396 case X86::BI__builtin_ia32_selectpd_512:
16397 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16398 case X86::BI__builtin_ia32_selectsh_128:
16399 case X86::BI__builtin_ia32_selectsbf_128:
16400 case X86::BI__builtin_ia32_selectss_128:
16401 case X86::BI__builtin_ia32_selectsd_128: {
16402 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16403 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16404 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16405 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16406 }
16407 case X86::BI__builtin_ia32_cmpb128_mask:
16408 case X86::BI__builtin_ia32_cmpb256_mask:
16409 case X86::BI__builtin_ia32_cmpb512_mask:
16410 case X86::BI__builtin_ia32_cmpw128_mask:
16411 case X86::BI__builtin_ia32_cmpw256_mask:
16412 case X86::BI__builtin_ia32_cmpw512_mask:
16413 case X86::BI__builtin_ia32_cmpd128_mask:
16414 case X86::BI__builtin_ia32_cmpd256_mask:
16415 case X86::BI__builtin_ia32_cmpd512_mask:
16416 case X86::BI__builtin_ia32_cmpq128_mask:
16417 case X86::BI__builtin_ia32_cmpq256_mask:
16418 case X86::BI__builtin_ia32_cmpq512_mask: {
16419 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16420 return EmitX86MaskedCompare(*this, CC, true, Ops);
16421 }
16422 case X86::BI__builtin_ia32_ucmpb128_mask:
16423 case X86::BI__builtin_ia32_ucmpb256_mask:
16424 case X86::BI__builtin_ia32_ucmpb512_mask:
16425 case X86::BI__builtin_ia32_ucmpw128_mask:
16426 case X86::BI__builtin_ia32_ucmpw256_mask:
16427 case X86::BI__builtin_ia32_ucmpw512_mask:
16428 case X86::BI__builtin_ia32_ucmpd128_mask:
16429 case X86::BI__builtin_ia32_ucmpd256_mask:
16430 case X86::BI__builtin_ia32_ucmpd512_mask:
16431 case X86::BI__builtin_ia32_ucmpq128_mask:
16432 case X86::BI__builtin_ia32_ucmpq256_mask:
16433 case X86::BI__builtin_ia32_ucmpq512_mask: {
16434 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16435 return EmitX86MaskedCompare(*this, CC, false, Ops);
16436 }
16437 case X86::BI__builtin_ia32_vpcomb:
16438 case X86::BI__builtin_ia32_vpcomw:
16439 case X86::BI__builtin_ia32_vpcomd:
16440 case X86::BI__builtin_ia32_vpcomq:
16441 return EmitX86vpcom(*this, Ops, true);
16442 case X86::BI__builtin_ia32_vpcomub:
16443 case X86::BI__builtin_ia32_vpcomuw:
16444 case X86::BI__builtin_ia32_vpcomud:
16445 case X86::BI__builtin_ia32_vpcomuq:
16446 return EmitX86vpcom(*this, Ops, false);
16447
16448 case X86::BI__builtin_ia32_kortestcqi:
16449 case X86::BI__builtin_ia32_kortestchi:
16450 case X86::BI__builtin_ia32_kortestcsi:
16451 case X86::BI__builtin_ia32_kortestcdi: {
16452 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16453 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16454 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16455 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16456 }
16457 case X86::BI__builtin_ia32_kortestzqi:
16458 case X86::BI__builtin_ia32_kortestzhi:
16459 case X86::BI__builtin_ia32_kortestzsi:
16460 case X86::BI__builtin_ia32_kortestzdi: {
16461 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16462 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16463 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16464 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16465 }
16466
16467 case X86::BI__builtin_ia32_ktestcqi:
16468 case X86::BI__builtin_ia32_ktestzqi:
16469 case X86::BI__builtin_ia32_ktestchi:
16470 case X86::BI__builtin_ia32_ktestzhi:
16471 case X86::BI__builtin_ia32_ktestcsi:
16472 case X86::BI__builtin_ia32_ktestzsi:
16473 case X86::BI__builtin_ia32_ktestcdi:
16474 case X86::BI__builtin_ia32_ktestzdi: {
16475 Intrinsic::ID IID;
16476 switch (BuiltinID) {
16477 default: llvm_unreachable("Unsupported intrinsic!");
16478 case X86::BI__builtin_ia32_ktestcqi:
16479 IID = Intrinsic::x86_avx512_ktestc_b;
16480 break;
16481 case X86::BI__builtin_ia32_ktestzqi:
16482 IID = Intrinsic::x86_avx512_ktestz_b;
16483 break;
16484 case X86::BI__builtin_ia32_ktestchi:
16485 IID = Intrinsic::x86_avx512_ktestc_w;
16486 break;
16487 case X86::BI__builtin_ia32_ktestzhi:
16488 IID = Intrinsic::x86_avx512_ktestz_w;
16489 break;
16490 case X86::BI__builtin_ia32_ktestcsi:
16491 IID = Intrinsic::x86_avx512_ktestc_d;
16492 break;
16493 case X86::BI__builtin_ia32_ktestzsi:
16494 IID = Intrinsic::x86_avx512_ktestz_d;
16495 break;
16496 case X86::BI__builtin_ia32_ktestcdi:
16497 IID = Intrinsic::x86_avx512_ktestc_q;
16498 break;
16499 case X86::BI__builtin_ia32_ktestzdi:
16500 IID = Intrinsic::x86_avx512_ktestz_q;
16501 break;
16502 }
16503
16504 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16505 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16506 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16507 Function *Intr = CGM.getIntrinsic(IID);
16508 return Builder.CreateCall(Intr, {LHS, RHS});
16509 }
16510
16511 case X86::BI__builtin_ia32_kaddqi:
16512 case X86::BI__builtin_ia32_kaddhi:
16513 case X86::BI__builtin_ia32_kaddsi:
16514 case X86::BI__builtin_ia32_kadddi: {
16515 Intrinsic::ID IID;
16516 switch (BuiltinID) {
16517 default: llvm_unreachable("Unsupported intrinsic!");
16518 case X86::BI__builtin_ia32_kaddqi:
16519 IID = Intrinsic::x86_avx512_kadd_b;
16520 break;
16521 case X86::BI__builtin_ia32_kaddhi:
16522 IID = Intrinsic::x86_avx512_kadd_w;
16523 break;
16524 case X86::BI__builtin_ia32_kaddsi:
16525 IID = Intrinsic::x86_avx512_kadd_d;
16526 break;
16527 case X86::BI__builtin_ia32_kadddi:
16528 IID = Intrinsic::x86_avx512_kadd_q;
16529 break;
16530 }
16531
16532 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16533 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16534 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16535 Function *Intr = CGM.getIntrinsic(IID);
16536 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16537 return Builder.CreateBitCast(Res, Ops[0]->getType());
16538 }
16539 case X86::BI__builtin_ia32_kandqi:
16540 case X86::BI__builtin_ia32_kandhi:
16541 case X86::BI__builtin_ia32_kandsi:
16542 case X86::BI__builtin_ia32_kanddi:
16543 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16544 case X86::BI__builtin_ia32_kandnqi:
16545 case X86::BI__builtin_ia32_kandnhi:
16546 case X86::BI__builtin_ia32_kandnsi:
16547 case X86::BI__builtin_ia32_kandndi:
16548 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16549 case X86::BI__builtin_ia32_korqi:
16550 case X86::BI__builtin_ia32_korhi:
16551 case X86::BI__builtin_ia32_korsi:
16552 case X86::BI__builtin_ia32_kordi:
16553 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16554 case X86::BI__builtin_ia32_kxnorqi:
16555 case X86::BI__builtin_ia32_kxnorhi:
16556 case X86::BI__builtin_ia32_kxnorsi:
16557 case X86::BI__builtin_ia32_kxnordi:
16558 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16559 case X86::BI__builtin_ia32_kxorqi:
16560 case X86::BI__builtin_ia32_kxorhi:
16561 case X86::BI__builtin_ia32_kxorsi:
16562 case X86::BI__builtin_ia32_kxordi:
16563 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16564 case X86::BI__builtin_ia32_knotqi:
16565 case X86::BI__builtin_ia32_knothi:
16566 case X86::BI__builtin_ia32_knotsi:
16567 case X86::BI__builtin_ia32_knotdi: {
16568 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16569 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16570 return Builder.CreateBitCast(Builder.CreateNot(Res),
16571 Ops[0]->getType());
16572 }
16573 case X86::BI__builtin_ia32_kmovb:
16574 case X86::BI__builtin_ia32_kmovw:
16575 case X86::BI__builtin_ia32_kmovd:
16576 case X86::BI__builtin_ia32_kmovq: {
16577 // Bitcast to vXi1 type and then back to integer. This gets the mask
16578 // register type into the IR, but might be optimized out depending on
16579 // what's around it.
16580 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16581 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16582 return Builder.CreateBitCast(Res, Ops[0]->getType());
16583 }
16584
16585 case X86::BI__builtin_ia32_kunpckdi:
16586 case X86::BI__builtin_ia32_kunpcksi:
16587 case X86::BI__builtin_ia32_kunpckhi: {
16588 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16589 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16590 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16591 int Indices[64];
16592 for (unsigned i = 0; i != NumElts; ++i)
16593 Indices[i] = i;
16594
16595 // First extract half of each vector. This gives better codegen than
16596 // doing it in a single shuffle.
16597 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16598 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16599 // Concat the vectors.
16600 // NOTE: Operands are swapped to match the intrinsic definition.
16601 Value *Res =
16602 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16603 return Builder.CreateBitCast(Res, Ops[0]->getType());
16604 }
16605
16606 case X86::BI__builtin_ia32_vplzcntd_128:
16607 case X86::BI__builtin_ia32_vplzcntd_256:
16608 case X86::BI__builtin_ia32_vplzcntd_512:
16609 case X86::BI__builtin_ia32_vplzcntq_128:
16610 case X86::BI__builtin_ia32_vplzcntq_256:
16611 case X86::BI__builtin_ia32_vplzcntq_512: {
16612 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16613 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16614 }
16615 case X86::BI__builtin_ia32_sqrtss:
16616 case X86::BI__builtin_ia32_sqrtsd: {
16617 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16618 Function *F;
16619 if (Builder.getIsFPConstrained()) {
16620 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16621 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16622 A->getType());
16623 A = Builder.CreateConstrainedFPCall(F, {A});
16624 } else {
16625 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16626 A = Builder.CreateCall(F, {A});
16627 }
16628 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16629 }
16630 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16631 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16632 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16633 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16634 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16635 // otherwise keep the intrinsic.
16636 if (CC != 4) {
16637 Intrinsic::ID IID;
16638
16639 switch (BuiltinID) {
16640 default:
16641 llvm_unreachable("Unsupported intrinsic!");
16642 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16643 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16644 break;
16645 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16646 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16647 break;
16648 case X86::BI__builtin_ia32_sqrtss_round_mask:
16649 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16650 break;
16651 }
16652 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16653 }
16654 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16655 Function *F;
16656 if (Builder.getIsFPConstrained()) {
16657 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16658 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16659 A->getType());
16660 A = Builder.CreateConstrainedFPCall(F, A);
16661 } else {
16662 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16663 A = Builder.CreateCall(F, A);
16664 }
16665 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16666 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16667 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16668 }
16669 case X86::BI__builtin_ia32_sqrtpd256:
16670 case X86::BI__builtin_ia32_sqrtpd:
16671 case X86::BI__builtin_ia32_sqrtps256:
16672 case X86::BI__builtin_ia32_sqrtps:
16673 case X86::BI__builtin_ia32_sqrtph256:
16674 case X86::BI__builtin_ia32_sqrtph:
16675 case X86::BI__builtin_ia32_sqrtph512:
16676 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16677 case X86::BI__builtin_ia32_vsqrtnepbf16:
16678 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16679 case X86::BI__builtin_ia32_sqrtps512:
16680 case X86::BI__builtin_ia32_sqrtpd512: {
16681 if (Ops.size() == 2) {
16682 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16683 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16684 // otherwise keep the intrinsic.
16685 if (CC != 4) {
16686 Intrinsic::ID IID;
16687
16688 switch (BuiltinID) {
16689 default:
16690 llvm_unreachable("Unsupported intrinsic!");
16691 case X86::BI__builtin_ia32_sqrtph512:
16692 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16693 break;
16694 case X86::BI__builtin_ia32_sqrtps512:
16695 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16696 break;
16697 case X86::BI__builtin_ia32_sqrtpd512:
16698 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16699 break;
16700 }
16701 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16702 }
16703 }
16704 if (Builder.getIsFPConstrained()) {
16705 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16706 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16707 Ops[0]->getType());
16708 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16709 } else {
16710 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16711 return Builder.CreateCall(F, Ops[0]);
16712 }
16713 }
16714
16715 case X86::BI__builtin_ia32_pmuludq128:
16716 case X86::BI__builtin_ia32_pmuludq256:
16717 case X86::BI__builtin_ia32_pmuludq512:
16718 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16719
16720 case X86::BI__builtin_ia32_pmuldq128:
16721 case X86::BI__builtin_ia32_pmuldq256:
16722 case X86::BI__builtin_ia32_pmuldq512:
16723 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16724
16725 case X86::BI__builtin_ia32_pternlogd512_mask:
16726 case X86::BI__builtin_ia32_pternlogq512_mask:
16727 case X86::BI__builtin_ia32_pternlogd128_mask:
16728 case X86::BI__builtin_ia32_pternlogd256_mask:
16729 case X86::BI__builtin_ia32_pternlogq128_mask:
16730 case X86::BI__builtin_ia32_pternlogq256_mask:
16731 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16732
16733 case X86::BI__builtin_ia32_pternlogd512_maskz:
16734 case X86::BI__builtin_ia32_pternlogq512_maskz:
16735 case X86::BI__builtin_ia32_pternlogd128_maskz:
16736 case X86::BI__builtin_ia32_pternlogd256_maskz:
16737 case X86::BI__builtin_ia32_pternlogq128_maskz:
16738 case X86::BI__builtin_ia32_pternlogq256_maskz:
16739 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16740
16741 case X86::BI__builtin_ia32_vpshldd128:
16742 case X86::BI__builtin_ia32_vpshldd256:
16743 case X86::BI__builtin_ia32_vpshldd512:
16744 case X86::BI__builtin_ia32_vpshldq128:
16745 case X86::BI__builtin_ia32_vpshldq256:
16746 case X86::BI__builtin_ia32_vpshldq512:
16747 case X86::BI__builtin_ia32_vpshldw128:
16748 case X86::BI__builtin_ia32_vpshldw256:
16749 case X86::BI__builtin_ia32_vpshldw512:
16750 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16751
16752 case X86::BI__builtin_ia32_vpshrdd128:
16753 case X86::BI__builtin_ia32_vpshrdd256:
16754 case X86::BI__builtin_ia32_vpshrdd512:
16755 case X86::BI__builtin_ia32_vpshrdq128:
16756 case X86::BI__builtin_ia32_vpshrdq256:
16757 case X86::BI__builtin_ia32_vpshrdq512:
16758 case X86::BI__builtin_ia32_vpshrdw128:
16759 case X86::BI__builtin_ia32_vpshrdw256:
16760 case X86::BI__builtin_ia32_vpshrdw512:
16761 // Ops 0 and 1 are swapped.
16762 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16763
16764 case X86::BI__builtin_ia32_vpshldvd128:
16765 case X86::BI__builtin_ia32_vpshldvd256:
16766 case X86::BI__builtin_ia32_vpshldvd512:
16767 case X86::BI__builtin_ia32_vpshldvq128:
16768 case X86::BI__builtin_ia32_vpshldvq256:
16769 case X86::BI__builtin_ia32_vpshldvq512:
16770 case X86::BI__builtin_ia32_vpshldvw128:
16771 case X86::BI__builtin_ia32_vpshldvw256:
16772 case X86::BI__builtin_ia32_vpshldvw512:
16773 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16774
16775 case X86::BI__builtin_ia32_vpshrdvd128:
16776 case X86::BI__builtin_ia32_vpshrdvd256:
16777 case X86::BI__builtin_ia32_vpshrdvd512:
16778 case X86::BI__builtin_ia32_vpshrdvq128:
16779 case X86::BI__builtin_ia32_vpshrdvq256:
16780 case X86::BI__builtin_ia32_vpshrdvq512:
16781 case X86::BI__builtin_ia32_vpshrdvw128:
16782 case X86::BI__builtin_ia32_vpshrdvw256:
16783 case X86::BI__builtin_ia32_vpshrdvw512:
16784 // Ops 0 and 1 are swapped.
16785 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16786
16787 // Reductions
16788 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16789 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16790 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16791 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16792 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16793 Function *F =
16794 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16795 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16796 Builder.getFastMathFlags().setAllowReassoc();
16797 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16798 }
16799 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16800 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16801 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16802 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16803 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16804 Function *F =
16805 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16806 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16807 Builder.getFastMathFlags().setAllowReassoc();
16808 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16809 }
16810 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16811 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16812 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16813 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16814 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16815 Function *F =
16816 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16817 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16818 Builder.getFastMathFlags().setNoNaNs();
16819 return Builder.CreateCall(F, {Ops[0]});
16820 }
16821 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16822 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16823 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16824 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16825 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16826 Function *F =
16827 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16828 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16829 Builder.getFastMathFlags().setNoNaNs();
16830 return Builder.CreateCall(F, {Ops[0]});
16831 }
16832
16833 case X86::BI__builtin_ia32_rdrand16_step:
16834 case X86::BI__builtin_ia32_rdrand32_step:
16835 case X86::BI__builtin_ia32_rdrand64_step:
16836 case X86::BI__builtin_ia32_rdseed16_step:
16837 case X86::BI__builtin_ia32_rdseed32_step:
16838 case X86::BI__builtin_ia32_rdseed64_step: {
16839 Intrinsic::ID ID;
16840 switch (BuiltinID) {
16841 default: llvm_unreachable("Unsupported intrinsic!");
16842 case X86::BI__builtin_ia32_rdrand16_step:
16843 ID = Intrinsic::x86_rdrand_16;
16844 break;
16845 case X86::BI__builtin_ia32_rdrand32_step:
16846 ID = Intrinsic::x86_rdrand_32;
16847 break;
16848 case X86::BI__builtin_ia32_rdrand64_step:
16849 ID = Intrinsic::x86_rdrand_64;
16850 break;
16851 case X86::BI__builtin_ia32_rdseed16_step:
16852 ID = Intrinsic::x86_rdseed_16;
16853 break;
16854 case X86::BI__builtin_ia32_rdseed32_step:
16855 ID = Intrinsic::x86_rdseed_32;
16856 break;
16857 case X86::BI__builtin_ia32_rdseed64_step:
16858 ID = Intrinsic::x86_rdseed_64;
16859 break;
16860 }
16861
16862 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16863 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16864 Ops[0]);
16865 return Builder.CreateExtractValue(Call, 1);
16866 }
16867 case X86::BI__builtin_ia32_addcarryx_u32:
16868 case X86::BI__builtin_ia32_addcarryx_u64:
16869 case X86::BI__builtin_ia32_subborrow_u32:
16870 case X86::BI__builtin_ia32_subborrow_u64: {
16871 Intrinsic::ID IID;
16872 switch (BuiltinID) {
16873 default: llvm_unreachable("Unsupported intrinsic!");
16874 case X86::BI__builtin_ia32_addcarryx_u32:
16875 IID = Intrinsic::x86_addcarry_32;
16876 break;
16877 case X86::BI__builtin_ia32_addcarryx_u64:
16878 IID = Intrinsic::x86_addcarry_64;
16879 break;
16880 case X86::BI__builtin_ia32_subborrow_u32:
16881 IID = Intrinsic::x86_subborrow_32;
16882 break;
16883 case X86::BI__builtin_ia32_subborrow_u64:
16884 IID = Intrinsic::x86_subborrow_64;
16885 break;
16886 }
16887
16888 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16889 { Ops[0], Ops[1], Ops[2] });
16890 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16891 Ops[3]);
16892 return Builder.CreateExtractValue(Call, 0);
16893 }
16894
16895 case X86::BI__builtin_ia32_fpclassps128_mask:
16896 case X86::BI__builtin_ia32_fpclassps256_mask:
16897 case X86::BI__builtin_ia32_fpclassps512_mask:
16898 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16899 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16900 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16901 case X86::BI__builtin_ia32_fpclassph128_mask:
16902 case X86::BI__builtin_ia32_fpclassph256_mask:
16903 case X86::BI__builtin_ia32_fpclassph512_mask:
16904 case X86::BI__builtin_ia32_fpclasspd128_mask:
16905 case X86::BI__builtin_ia32_fpclasspd256_mask:
16906 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16907 unsigned NumElts =
16908 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16909 Value *MaskIn = Ops[2];
16910 Ops.erase(&Ops[2]);
16911
16912 Intrinsic::ID ID;
16913 switch (BuiltinID) {
16914 default: llvm_unreachable("Unsupported intrinsic!");
16915 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16916 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16917 break;
16918 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16919 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16920 break;
16921 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16922 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16923 break;
16924 case X86::BI__builtin_ia32_fpclassph128_mask:
16925 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16926 break;
16927 case X86::BI__builtin_ia32_fpclassph256_mask:
16928 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16929 break;
16930 case X86::BI__builtin_ia32_fpclassph512_mask:
16931 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16932 break;
16933 case X86::BI__builtin_ia32_fpclassps128_mask:
16934 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16935 break;
16936 case X86::BI__builtin_ia32_fpclassps256_mask:
16937 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16938 break;
16939 case X86::BI__builtin_ia32_fpclassps512_mask:
16940 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16941 break;
16942 case X86::BI__builtin_ia32_fpclasspd128_mask:
16943 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16944 break;
16945 case X86::BI__builtin_ia32_fpclasspd256_mask:
16946 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16947 break;
16948 case X86::BI__builtin_ia32_fpclasspd512_mask:
16949 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16950 break;
16951 }
16952
16953 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16954 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16955 }
16956
16957 case X86::BI__builtin_ia32_vp2intersect_q_512:
16958 case X86::BI__builtin_ia32_vp2intersect_q_256:
16959 case X86::BI__builtin_ia32_vp2intersect_q_128:
16960 case X86::BI__builtin_ia32_vp2intersect_d_512:
16961 case X86::BI__builtin_ia32_vp2intersect_d_256:
16962 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16963 unsigned NumElts =
16964 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16965 Intrinsic::ID ID;
16966
16967 switch (BuiltinID) {
16968 default: llvm_unreachable("Unsupported intrinsic!");
16969 case X86::BI__builtin_ia32_vp2intersect_q_512:
16970 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16971 break;
16972 case X86::BI__builtin_ia32_vp2intersect_q_256:
16973 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16974 break;
16975 case X86::BI__builtin_ia32_vp2intersect_q_128:
16976 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16977 break;
16978 case X86::BI__builtin_ia32_vp2intersect_d_512:
16979 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16980 break;
16981 case X86::BI__builtin_ia32_vp2intersect_d_256:
16982 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16983 break;
16984 case X86::BI__builtin_ia32_vp2intersect_d_128:
16985 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16986 break;
16987 }
16988
16989 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16990 Value *Result = Builder.CreateExtractValue(Call, 0);
16991 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16993
16994 Result = Builder.CreateExtractValue(Call, 1);
16995 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16997 }
16998
16999 case X86::BI__builtin_ia32_vpmultishiftqb128:
17000 case X86::BI__builtin_ia32_vpmultishiftqb256:
17001 case X86::BI__builtin_ia32_vpmultishiftqb512: {
17002 Intrinsic::ID ID;
17003 switch (BuiltinID) {
17004 default: llvm_unreachable("Unsupported intrinsic!");
17005 case X86::BI__builtin_ia32_vpmultishiftqb128:
17006 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
17007 break;
17008 case X86::BI__builtin_ia32_vpmultishiftqb256:
17009 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
17010 break;
17011 case X86::BI__builtin_ia32_vpmultishiftqb512:
17012 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
17013 break;
17014 }
17015
17016 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17017 }
17018
17019 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17020 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17021 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
17022 unsigned NumElts =
17023 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17024 Value *MaskIn = Ops[2];
17025 Ops.erase(&Ops[2]);
17026
17027 Intrinsic::ID ID;
17028 switch (BuiltinID) {
17029 default: llvm_unreachable("Unsupported intrinsic!");
17030 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
17031 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
17032 break;
17033 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
17034 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
17035 break;
17036 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
17037 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
17038 break;
17039 }
17040
17041 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
17042 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
17043 }
17044
17045 // packed comparison intrinsics
17046 case X86::BI__builtin_ia32_cmpeqps:
17047 case X86::BI__builtin_ia32_cmpeqpd:
17048 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
17049 case X86::BI__builtin_ia32_cmpltps:
17050 case X86::BI__builtin_ia32_cmpltpd:
17051 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
17052 case X86::BI__builtin_ia32_cmpleps:
17053 case X86::BI__builtin_ia32_cmplepd:
17054 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
17055 case X86::BI__builtin_ia32_cmpunordps:
17056 case X86::BI__builtin_ia32_cmpunordpd:
17057 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
17058 case X86::BI__builtin_ia32_cmpneqps:
17059 case X86::BI__builtin_ia32_cmpneqpd:
17060 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
17061 case X86::BI__builtin_ia32_cmpnltps:
17062 case X86::BI__builtin_ia32_cmpnltpd:
17063 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
17064 case X86::BI__builtin_ia32_cmpnleps:
17065 case X86::BI__builtin_ia32_cmpnlepd:
17066 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
17067 case X86::BI__builtin_ia32_cmpordps:
17068 case X86::BI__builtin_ia32_cmpordpd:
17069 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
17070 case X86::BI__builtin_ia32_cmpph128_mask:
17071 case X86::BI__builtin_ia32_cmpph256_mask:
17072 case X86::BI__builtin_ia32_cmpph512_mask:
17073 case X86::BI__builtin_ia32_cmpps128_mask:
17074 case X86::BI__builtin_ia32_cmpps256_mask:
17075 case X86::BI__builtin_ia32_cmpps512_mask:
17076 case X86::BI__builtin_ia32_cmppd128_mask:
17077 case X86::BI__builtin_ia32_cmppd256_mask:
17078 case X86::BI__builtin_ia32_cmppd512_mask:
17079 case X86::BI__builtin_ia32_vcmppd256_round_mask:
17080 case X86::BI__builtin_ia32_vcmpps256_round_mask:
17081 case X86::BI__builtin_ia32_vcmpph256_round_mask:
17082 case X86::BI__builtin_ia32_vcmppbf16512_mask:
17083 case X86::BI__builtin_ia32_vcmppbf16256_mask:
17084 case X86::BI__builtin_ia32_vcmppbf16128_mask:
17085 IsMaskFCmp = true;
17086 [[fallthrough]];
17087 case X86::BI__builtin_ia32_cmpps:
17088 case X86::BI__builtin_ia32_cmpps256:
17089 case X86::BI__builtin_ia32_cmppd:
17090 case X86::BI__builtin_ia32_cmppd256: {
17091 // Lowering vector comparisons to fcmp instructions, while
17092 // ignoring signalling behaviour requested
17093 // ignoring rounding mode requested
17094 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
17095
17096 // The third argument is the comparison condition, and integer in the
17097 // range [0, 31]
17098 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
17099
17100 // Lowering to IR fcmp instruction.
17101 // Ignoring requested signaling behaviour,
17102 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
17103 FCmpInst::Predicate Pred;
17104 bool IsSignaling;
17105 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
17106 // behavior is inverted. We'll handle that after the switch.
17107 switch (CC & 0xf) {
17108 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
17109 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
17110 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
17111 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
17112 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
17113 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
17114 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
17115 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
17116 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17117 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17118 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17119 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17120 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17121 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17122 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17123 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17124 default: llvm_unreachable("Unhandled CC");
17125 }
17126
17127 // Invert the signalling behavior for 16-31.
17128 if (CC & 0x10)
17129 IsSignaling = !IsSignaling;
17130
17131 // If the predicate is true or false and we're using constrained intrinsics,
17132 // we don't have a compare intrinsic we can use. Just use the legacy X86
17133 // specific intrinsic.
17134 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17135 // use the legacy X86 specific intrinsic.
17136 if (Builder.getIsFPConstrained() &&
17137 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17138 IsMaskFCmp)) {
17139
17140 Intrinsic::ID IID;
17141 switch (BuiltinID) {
17142 default: llvm_unreachable("Unexpected builtin");
17143 case X86::BI__builtin_ia32_cmpps:
17144 IID = Intrinsic::x86_sse_cmp_ps;
17145 break;
17146 case X86::BI__builtin_ia32_cmpps256:
17147 IID = Intrinsic::x86_avx_cmp_ps_256;
17148 break;
17149 case X86::BI__builtin_ia32_cmppd:
17150 IID = Intrinsic::x86_sse2_cmp_pd;
17151 break;
17152 case X86::BI__builtin_ia32_cmppd256:
17153 IID = Intrinsic::x86_avx_cmp_pd_256;
17154 break;
17155 case X86::BI__builtin_ia32_cmpph128_mask:
17156 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17157 break;
17158 case X86::BI__builtin_ia32_cmpph256_mask:
17159 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17160 break;
17161 case X86::BI__builtin_ia32_cmpph512_mask:
17162 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17163 break;
17164 case X86::BI__builtin_ia32_cmpps512_mask:
17165 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17166 break;
17167 case X86::BI__builtin_ia32_cmppd512_mask:
17168 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17169 break;
17170 case X86::BI__builtin_ia32_cmpps128_mask:
17171 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17172 break;
17173 case X86::BI__builtin_ia32_cmpps256_mask:
17174 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17175 break;
17176 case X86::BI__builtin_ia32_cmppd128_mask:
17177 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17178 break;
17179 case X86::BI__builtin_ia32_cmppd256_mask:
17180 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17181 break;
17182 }
17183
17184 Function *Intr = CGM.getIntrinsic(IID);
17185 if (IsMaskFCmp) {
17186 unsigned NumElts =
17187 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17188 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17189 Value *Cmp = Builder.CreateCall(Intr, Ops);
17190 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17191 }
17192
17193 return Builder.CreateCall(Intr, Ops);
17194 }
17195
17196 // Builtins without the _mask suffix return a vector of integers
17197 // of the same width as the input vectors
17198 if (IsMaskFCmp) {
17199 // We ignore SAE if strict FP is disabled. We only keep precise
17200 // exception behavior under strict FP.
17201 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17202 // object will be required.
17203 unsigned NumElts =
17204 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17205 Value *Cmp;
17206 if (IsSignaling)
17207 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17208 else
17209 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17210 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17211 }
17212
17213 return getVectorFCmpIR(Pred, IsSignaling);
17214 }
17215
17216 // SSE scalar comparison intrinsics
17217 case X86::BI__builtin_ia32_cmpeqss:
17218 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17219 case X86::BI__builtin_ia32_cmpltss:
17220 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17221 case X86::BI__builtin_ia32_cmpless:
17222 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17223 case X86::BI__builtin_ia32_cmpunordss:
17224 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17225 case X86::BI__builtin_ia32_cmpneqss:
17226 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17227 case X86::BI__builtin_ia32_cmpnltss:
17228 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17229 case X86::BI__builtin_ia32_cmpnless:
17230 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17231 case X86::BI__builtin_ia32_cmpordss:
17232 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17233 case X86::BI__builtin_ia32_cmpeqsd:
17234 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17235 case X86::BI__builtin_ia32_cmpltsd:
17236 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17237 case X86::BI__builtin_ia32_cmplesd:
17238 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17239 case X86::BI__builtin_ia32_cmpunordsd:
17240 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17241 case X86::BI__builtin_ia32_cmpneqsd:
17242 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17243 case X86::BI__builtin_ia32_cmpnltsd:
17244 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17245 case X86::BI__builtin_ia32_cmpnlesd:
17246 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17247 case X86::BI__builtin_ia32_cmpordsd:
17248 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17249
17250 // f16c half2float intrinsics
17251 case X86::BI__builtin_ia32_vcvtph2ps:
17252 case X86::BI__builtin_ia32_vcvtph2ps256:
17253 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17254 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17255 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17256 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17257 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17258 }
17259
17260 // AVX512 bf16 intrinsics
17261 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17262 Ops[2] = getMaskVecValue(
17263 *this, Ops[2],
17264 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17265 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17266 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17267 }
17268 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17269 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17270
17271 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17272 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17273 Intrinsic::ID IID;
17274 switch (BuiltinID) {
17275 default: llvm_unreachable("Unsupported intrinsic!");
17276 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17277 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17278 break;
17279 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17280 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17281 break;
17282 }
17283 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17284 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17285 }
17286
17287 case X86::BI__cpuid:
17288 case X86::BI__cpuidex: {
17289 Value *FuncId = EmitScalarExpr(E->getArg(1));
17290 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17291 ? EmitScalarExpr(E->getArg(2))
17292 : llvm::ConstantInt::get(Int32Ty, 0);
17293
17294 llvm::StructType *CpuidRetTy =
17295 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17296 llvm::FunctionType *FTy =
17297 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17298
17299 StringRef Asm, Constraints;
17300 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17301 Asm = "cpuid";
17302 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17303 } else {
17304 // x86-64 uses %rbx as the base register, so preserve it.
17305 Asm = "xchgq %rbx, ${1:q}\n"
17306 "cpuid\n"
17307 "xchgq %rbx, ${1:q}";
17308 Constraints = "={ax},=r,={cx},={dx},0,2";
17309 }
17310
17311 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17312 /*hasSideEffects=*/false);
17313 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17314 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17315 Value *Store = nullptr;
17316 for (unsigned i = 0; i < 4; i++) {
17317 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17318 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17319 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17320 }
17321
17322 // Return the last store instruction to signal that we have emitted the
17323 // the intrinsic.
17324 return Store;
17325 }
17326
17327 case X86::BI__emul:
17328 case X86::BI__emulu: {
17329 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17330 bool isSigned = (BuiltinID == X86::BI__emul);
17331 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17332 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17333 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17334 }
17335 case X86::BI__mulh:
17336 case X86::BI__umulh:
17337 case X86::BI_mul128:
17338 case X86::BI_umul128: {
17339 llvm::Type *ResType = ConvertType(E->getType());
17340 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17341
17342 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17343 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17344 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17345
17346 Value *MulResult, *HigherBits;
17347 if (IsSigned) {
17348 MulResult = Builder.CreateNSWMul(LHS, RHS);
17349 HigherBits = Builder.CreateAShr(MulResult, 64);
17350 } else {
17351 MulResult = Builder.CreateNUWMul(LHS, RHS);
17352 HigherBits = Builder.CreateLShr(MulResult, 64);
17353 }
17354 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17355
17356 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17357 return HigherBits;
17358
17359 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17360 Builder.CreateStore(HigherBits, HighBitsAddress);
17361 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17362 }
17363
17364 case X86::BI__faststorefence: {
17365 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17366 llvm::SyncScope::System);
17367 }
17368 case X86::BI__shiftleft128:
17369 case X86::BI__shiftright128: {
17370 llvm::Function *F = CGM.getIntrinsic(
17371 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17372 Int64Ty);
17373 // Flip low/high ops and zero-extend amount to matching type.
17374 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17375 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17376 std::swap(Ops[0], Ops[1]);
17377 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17378 return Builder.CreateCall(F, Ops);
17379 }
17380 case X86::BI_ReadWriteBarrier:
17381 case X86::BI_ReadBarrier:
17382 case X86::BI_WriteBarrier: {
17383 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17384 llvm::SyncScope::SingleThread);
17385 }
17386
17387 case X86::BI_AddressOfReturnAddress: {
17388 Function *F =
17389 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17390 return Builder.CreateCall(F);
17391 }
17392 case X86::BI__stosb: {
17393 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17394 // instruction, but it will create a memset that won't be optimized away.
17395 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17396 }
17397 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17398 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17399 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17400 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17401 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17402 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17403 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17404 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17405 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17406 Intrinsic::ID IID;
17407 switch (BuiltinID) {
17408 default:
17409 llvm_unreachable("Unsupported intrinsic!");
17410 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17411 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17412 break;
17413 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17414 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17415 break;
17416 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17417 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17418 break;
17419 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17420 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17421 break;
17422 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17423 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17424 break;
17425 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17426 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17427 break;
17428 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17429 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17430 break;
17431 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17432 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17433 break;
17434 }
17435
17436 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17437 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17438 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17439
17440 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17441 assert(PtrTy && "arg3 must be of pointer type");
17442 QualType PtreeTy = PtrTy->getPointeeType();
17443 llvm::Type *TyPtee = ConvertType(PtreeTy);
17444
17445 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17446 // Then store tile0 into DstPtr0
17447 Value *T0 = Builder.CreateExtractValue(Call, 0);
17448 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17449 {TyPtee}, {T0});
17450 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17451
17452 // Then store tile1 into DstPtr1
17453 Value *T1 = Builder.CreateExtractValue(Call, 1);
17454 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17455 {TyPtee}, {T1});
17456 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17457
17458 // Note: Here we escape directly use x86_tilestored64_internal to store
17459 // the results due to it can't make sure the Mem written scope. This may
17460 // cause shapes reloads after first amx intrinsic, which current amx reg-
17461 // ister allocation has no ability to handle it.
17462
17463 return Store;
17464 }
17465 case X86::BI__ud2:
17466 // llvm.trap makes a ud2a instruction on x86.
17467 return EmitTrapCall(Intrinsic::trap);
17468 case X86::BI__int2c: {
17469 // This syscall signals a driver assertion failure in x86 NT kernels.
17470 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17471 llvm::InlineAsm *IA =
17472 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17473 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17474 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17475 llvm::Attribute::NoReturn);
17476 llvm::CallInst *CI = Builder.CreateCall(IA);
17477 CI->setAttributes(NoReturnAttr);
17478 return CI;
17479 }
17480 case X86::BI__readfsbyte:
17481 case X86::BI__readfsword:
17482 case X86::BI__readfsdword:
17483 case X86::BI__readfsqword: {
17484 llvm::Type *IntTy = ConvertType(E->getType());
17485 Value *Ptr = Builder.CreateIntToPtr(
17486 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17487 LoadInst *Load = Builder.CreateAlignedLoad(
17488 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17489 Load->setVolatile(true);
17490 return Load;
17491 }
17492 case X86::BI__readgsbyte:
17493 case X86::BI__readgsword:
17494 case X86::BI__readgsdword:
17495 case X86::BI__readgsqword: {
17496 llvm::Type *IntTy = ConvertType(E->getType());
17497 Value *Ptr = Builder.CreateIntToPtr(
17498 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17499 LoadInst *Load = Builder.CreateAlignedLoad(
17500 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17501 Load->setVolatile(true);
17502 return Load;
17503 }
17504 case X86::BI__builtin_ia32_encodekey128_u32: {
17505 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17506
17507 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17508
17509 for (int i = 0; i < 3; ++i) {
17510 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17511 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17512 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17513 }
17514
17515 return Builder.CreateExtractValue(Call, 0);
17516 }
17517 case X86::BI__builtin_ia32_encodekey256_u32: {
17518 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17519
17520 Value *Call =
17521 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17522
17523 for (int i = 0; i < 4; ++i) {
17524 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17525 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17526 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17527 }
17528
17529 return Builder.CreateExtractValue(Call, 0);
17530 }
17531 case X86::BI__builtin_ia32_aesenc128kl_u8:
17532 case X86::BI__builtin_ia32_aesdec128kl_u8:
17533 case X86::BI__builtin_ia32_aesenc256kl_u8:
17534 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17535 Intrinsic::ID IID;
17536 StringRef BlockName;
17537 switch (BuiltinID) {
17538 default:
17539 llvm_unreachable("Unexpected builtin");
17540 case X86::BI__builtin_ia32_aesenc128kl_u8:
17541 IID = Intrinsic::x86_aesenc128kl;
17542 BlockName = "aesenc128kl";
17543 break;
17544 case X86::BI__builtin_ia32_aesdec128kl_u8:
17545 IID = Intrinsic::x86_aesdec128kl;
17546 BlockName = "aesdec128kl";
17547 break;
17548 case X86::BI__builtin_ia32_aesenc256kl_u8:
17549 IID = Intrinsic::x86_aesenc256kl;
17550 BlockName = "aesenc256kl";
17551 break;
17552 case X86::BI__builtin_ia32_aesdec256kl_u8:
17553 IID = Intrinsic::x86_aesdec256kl;
17554 BlockName = "aesdec256kl";
17555 break;
17556 }
17557
17558 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17559
17560 BasicBlock *NoError =
17561 createBasicBlock(BlockName + "_no_error", this->CurFn);
17562 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17563 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17564
17565 Value *Ret = Builder.CreateExtractValue(Call, 0);
17566 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17567 Value *Out = Builder.CreateExtractValue(Call, 1);
17568 Builder.CreateCondBr(Succ, NoError, Error);
17569
17570 Builder.SetInsertPoint(NoError);
17572 Builder.CreateBr(End);
17573
17574 Builder.SetInsertPoint(Error);
17575 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17576 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17577 Builder.CreateBr(End);
17578
17579 Builder.SetInsertPoint(End);
17580 return Builder.CreateExtractValue(Call, 0);
17581 }
17582 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17583 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17584 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17585 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17586 Intrinsic::ID IID;
17587 StringRef BlockName;
17588 switch (BuiltinID) {
17589 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17590 IID = Intrinsic::x86_aesencwide128kl;
17591 BlockName = "aesencwide128kl";
17592 break;
17593 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17594 IID = Intrinsic::x86_aesdecwide128kl;
17595 BlockName = "aesdecwide128kl";
17596 break;
17597 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17598 IID = Intrinsic::x86_aesencwide256kl;
17599 BlockName = "aesencwide256kl";
17600 break;
17601 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17602 IID = Intrinsic::x86_aesdecwide256kl;
17603 BlockName = "aesdecwide256kl";
17604 break;
17605 }
17606
17607 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17608 Value *InOps[9];
17609 InOps[0] = Ops[2];
17610 for (int i = 0; i != 8; ++i) {
17611 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17612 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17613 }
17614
17615 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17616
17617 BasicBlock *NoError =
17618 createBasicBlock(BlockName + "_no_error", this->CurFn);
17619 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17620 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17621
17622 Value *Ret = Builder.CreateExtractValue(Call, 0);
17623 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17624 Builder.CreateCondBr(Succ, NoError, Error);
17625
17626 Builder.SetInsertPoint(NoError);
17627 for (int i = 0; i != 8; ++i) {
17628 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17629 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17630 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17631 }
17632 Builder.CreateBr(End);
17633
17634 Builder.SetInsertPoint(Error);
17635 for (int i = 0; i != 8; ++i) {
17636 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17637 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17638 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17639 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17640 }
17641 Builder.CreateBr(End);
17642
17643 Builder.SetInsertPoint(End);
17644 return Builder.CreateExtractValue(Call, 0);
17645 }
17646 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17647 IsConjFMA = true;
17648 [[fallthrough]];
17649 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17650 Intrinsic::ID IID = IsConjFMA
17651 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17652 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17653 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17654 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17655 }
17656 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17657 IsConjFMA = true;
17658 LLVM_FALLTHROUGH;
17659 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17660 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17661 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17662 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17663 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17664 }
17665 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17666 IsConjFMA = true;
17667 [[fallthrough]];
17668 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17669 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17670 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17671 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17672 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17673 return EmitX86Select(*this, And, Call, Ops[0]);
17674 }
17675 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17676 IsConjFMA = true;
17677 [[fallthrough]];
17678 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17679 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17680 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17681 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17682 static constexpr int Mask[] = {0, 5, 6, 7};
17683 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17684 }
17685 case X86::BI__builtin_ia32_prefetchi:
17686 return Builder.CreateCall(
17687 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17688 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17689 llvm::ConstantInt::get(Int32Ty, 0)});
17690 }
17691}
17692
17693Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17694 const CallExpr *E) {
17695 // Do not emit the builtin arguments in the arguments of a function call,
17696 // because the evaluation order of function arguments is not specified in C++.
17697 // This is important when testing to ensure the arguments are emitted in the
17698 // same order every time. Eg:
17699 // Instead of:
17700 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17701 // EmitScalarExpr(E->getArg(1)), "swdiv");
17702 // Use:
17703 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17704 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17705 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17706
17707 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17708
17709#include "llvm/TargetParser/PPCTargetParser.def"
17710 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17711 unsigned Mask, CmpInst::Predicate CompOp,
17712 unsigned OpValue) -> Value * {
17713 if (SupportMethod == BUILTIN_PPC_FALSE)
17714 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17715
17716 if (SupportMethod == BUILTIN_PPC_TRUE)
17717 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17718
17719 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17720
17721 llvm::Value *FieldValue = nullptr;
17722 if (SupportMethod == USE_SYS_CONF) {
17723 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17724 llvm::Constant *SysConf =
17725 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17726
17727 // Grab the appropriate field from _system_configuration.
17728 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17729 ConstantInt::get(Int32Ty, FieldIdx)};
17730
17731 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17732 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17734 } else if (SupportMethod == SYS_CALL) {
17735 llvm::FunctionType *FTy =
17736 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17737 llvm::FunctionCallee Func =
17738 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17739
17740 FieldValue =
17741 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17742 }
17743 assert(FieldValue &&
17744 "SupportMethod value is not defined in PPCTargetParser.def.");
17745
17746 if (Mask)
17747 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17748
17749 llvm::Type *ValueType = FieldValue->getType();
17750 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17751 assert(
17752 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17753 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17754
17755 return Builder.CreateICmp(
17756 CompOp, FieldValue,
17757 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17758 };
17759
17760 switch (BuiltinID) {
17761 default: return nullptr;
17762
17763 case Builtin::BI__builtin_cpu_is: {
17764 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17765 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17766 llvm::Triple Triple = getTarget().getTriple();
17767
17768 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17769 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17770
17771 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17772 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17773#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17774 AIXID) \
17775 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17776#include "llvm/TargetParser/PPCTargetParser.def"
17777 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17778 BUILTIN_PPC_UNSUPPORTED, 0}));
17779
17780 if (Triple.isOSAIX()) {
17781 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17782 "Invalid CPU name. Missed by SemaChecking?");
17783 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17784 ICmpInst::ICMP_EQ, AIXIDValue);
17785 }
17786
17787 assert(Triple.isOSLinux() &&
17788 "__builtin_cpu_is() is only supported for AIX and Linux.");
17789
17790 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17791 "Invalid CPU name. Missed by SemaChecking?");
17792
17793 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17794 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17795
17796 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17797 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17798 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17799 return Builder.CreateICmpEQ(TheCall,
17800 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17801 }
17802 case Builtin::BI__builtin_cpu_supports: {
17803 llvm::Triple Triple = getTarget().getTriple();
17804 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17805 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17806 if (Triple.isOSAIX()) {
17807 unsigned SupportMethod, FieldIdx, Mask, Value;
17808 CmpInst::Predicate CompOp;
17809 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17810 unsigned>
17811 CPUSupportType;
17812 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17813 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17814#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17815 VALUE) \
17816 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17817#include "llvm/TargetParser/PPCTargetParser.def"
17818 .Default({BUILTIN_PPC_FALSE, 0, 0,
17819 CmpInst::Predicate(), 0}));
17820 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17821 Value);
17822 }
17823
17824 assert(Triple.isOSLinux() &&
17825 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17826 unsigned FeatureWord;
17827 unsigned BitMask;
17828 std::tie(FeatureWord, BitMask) =
17829 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17830#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17831 .Case(Name, {FA_WORD, Bitmask})
17832#include "llvm/TargetParser/PPCTargetParser.def"
17833 .Default({0, 0});
17834 if (!BitMask)
17835 return Builder.getFalse();
17836 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17837 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17838 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17839 Value *Mask =
17840 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17841 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17842#undef PPC_FAWORD_HWCAP
17843#undef PPC_FAWORD_HWCAP2
17844#undef PPC_FAWORD_CPUID
17845 }
17846
17847 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17848 // call __builtin_readcyclecounter.
17849 case PPC::BI__builtin_ppc_get_timebase:
17850 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17851
17852 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17853 case PPC::BI__builtin_altivec_lvx:
17854 case PPC::BI__builtin_altivec_lvxl:
17855 case PPC::BI__builtin_altivec_lvebx:
17856 case PPC::BI__builtin_altivec_lvehx:
17857 case PPC::BI__builtin_altivec_lvewx:
17858 case PPC::BI__builtin_altivec_lvsl:
17859 case PPC::BI__builtin_altivec_lvsr:
17860 case PPC::BI__builtin_vsx_lxvd2x:
17861 case PPC::BI__builtin_vsx_lxvw4x:
17862 case PPC::BI__builtin_vsx_lxvd2x_be:
17863 case PPC::BI__builtin_vsx_lxvw4x_be:
17864 case PPC::BI__builtin_vsx_lxvl:
17865 case PPC::BI__builtin_vsx_lxvll:
17866 {
17868 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17869 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17870 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17871 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17872 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17873 Ops.pop_back();
17874 }
17875
17876 switch (BuiltinID) {
17877 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17878 case PPC::BI__builtin_altivec_lvx:
17879 ID = Intrinsic::ppc_altivec_lvx;
17880 break;
17881 case PPC::BI__builtin_altivec_lvxl:
17882 ID = Intrinsic::ppc_altivec_lvxl;
17883 break;
17884 case PPC::BI__builtin_altivec_lvebx:
17885 ID = Intrinsic::ppc_altivec_lvebx;
17886 break;
17887 case PPC::BI__builtin_altivec_lvehx:
17888 ID = Intrinsic::ppc_altivec_lvehx;
17889 break;
17890 case PPC::BI__builtin_altivec_lvewx:
17891 ID = Intrinsic::ppc_altivec_lvewx;
17892 break;
17893 case PPC::BI__builtin_altivec_lvsl:
17894 ID = Intrinsic::ppc_altivec_lvsl;
17895 break;
17896 case PPC::BI__builtin_altivec_lvsr:
17897 ID = Intrinsic::ppc_altivec_lvsr;
17898 break;
17899 case PPC::BI__builtin_vsx_lxvd2x:
17900 ID = Intrinsic::ppc_vsx_lxvd2x;
17901 break;
17902 case PPC::BI__builtin_vsx_lxvw4x:
17903 ID = Intrinsic::ppc_vsx_lxvw4x;
17904 break;
17905 case PPC::BI__builtin_vsx_lxvd2x_be:
17906 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17907 break;
17908 case PPC::BI__builtin_vsx_lxvw4x_be:
17909 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17910 break;
17911 case PPC::BI__builtin_vsx_lxvl:
17912 ID = Intrinsic::ppc_vsx_lxvl;
17913 break;
17914 case PPC::BI__builtin_vsx_lxvll:
17915 ID = Intrinsic::ppc_vsx_lxvll;
17916 break;
17917 }
17918 llvm::Function *F = CGM.getIntrinsic(ID);
17919 return Builder.CreateCall(F, Ops, "");
17920 }
17921
17922 // vec_st, vec_xst_be
17923 case PPC::BI__builtin_altivec_stvx:
17924 case PPC::BI__builtin_altivec_stvxl:
17925 case PPC::BI__builtin_altivec_stvebx:
17926 case PPC::BI__builtin_altivec_stvehx:
17927 case PPC::BI__builtin_altivec_stvewx:
17928 case PPC::BI__builtin_vsx_stxvd2x:
17929 case PPC::BI__builtin_vsx_stxvw4x:
17930 case PPC::BI__builtin_vsx_stxvd2x_be:
17931 case PPC::BI__builtin_vsx_stxvw4x_be:
17932 case PPC::BI__builtin_vsx_stxvl:
17933 case PPC::BI__builtin_vsx_stxvll:
17934 {
17936 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17937 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17938 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17939 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17940 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17941 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17942 Ops.pop_back();
17943 }
17944
17945 switch (BuiltinID) {
17946 default: llvm_unreachable("Unsupported st intrinsic!");
17947 case PPC::BI__builtin_altivec_stvx:
17948 ID = Intrinsic::ppc_altivec_stvx;
17949 break;
17950 case PPC::BI__builtin_altivec_stvxl:
17951 ID = Intrinsic::ppc_altivec_stvxl;
17952 break;
17953 case PPC::BI__builtin_altivec_stvebx:
17954 ID = Intrinsic::ppc_altivec_stvebx;
17955 break;
17956 case PPC::BI__builtin_altivec_stvehx:
17957 ID = Intrinsic::ppc_altivec_stvehx;
17958 break;
17959 case PPC::BI__builtin_altivec_stvewx:
17960 ID = Intrinsic::ppc_altivec_stvewx;
17961 break;
17962 case PPC::BI__builtin_vsx_stxvd2x:
17963 ID = Intrinsic::ppc_vsx_stxvd2x;
17964 break;
17965 case PPC::BI__builtin_vsx_stxvw4x:
17966 ID = Intrinsic::ppc_vsx_stxvw4x;
17967 break;
17968 case PPC::BI__builtin_vsx_stxvd2x_be:
17969 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17970 break;
17971 case PPC::BI__builtin_vsx_stxvw4x_be:
17972 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17973 break;
17974 case PPC::BI__builtin_vsx_stxvl:
17975 ID = Intrinsic::ppc_vsx_stxvl;
17976 break;
17977 case PPC::BI__builtin_vsx_stxvll:
17978 ID = Intrinsic::ppc_vsx_stxvll;
17979 break;
17980 }
17981 llvm::Function *F = CGM.getIntrinsic(ID);
17982 return Builder.CreateCall(F, Ops, "");
17983 }
17984 case PPC::BI__builtin_vsx_ldrmb: {
17985 // Essentially boils down to performing an unaligned VMX load sequence so
17986 // as to avoid crossing a page boundary and then shuffling the elements
17987 // into the right side of the vector register.
17988 Value *Op0 = EmitScalarExpr(E->getArg(0));
17989 Value *Op1 = EmitScalarExpr(E->getArg(1));
17990 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17991 llvm::Type *ResTy = ConvertType(E->getType());
17992 bool IsLE = getTarget().isLittleEndian();
17993
17994 // If the user wants the entire vector, just load the entire vector.
17995 if (NumBytes == 16) {
17996 Value *LD =
17998 if (!IsLE)
17999 return LD;
18000
18001 // Reverse the bytes on LE.
18002 SmallVector<int, 16> RevMask;
18003 for (int Idx = 0; Idx < 16; Idx++)
18004 RevMask.push_back(15 - Idx);
18005 return Builder.CreateShuffleVector(LD, LD, RevMask);
18006 }
18007
18008 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
18009 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
18010 : Intrinsic::ppc_altivec_lvsl);
18011 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
18012 Value *HiMem = Builder.CreateGEP(
18013 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
18014 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
18015 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
18016 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
18017
18018 Op0 = IsLE ? HiLd : LoLd;
18019 Op1 = IsLE ? LoLd : HiLd;
18020 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
18021 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
18022
18023 if (IsLE) {
18024 SmallVector<int, 16> Consts;
18025 for (int Idx = 0; Idx < 16; Idx++) {
18026 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
18027 : 16 - (NumBytes - Idx);
18028 Consts.push_back(Val);
18029 }
18030 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
18031 Zero, Consts);
18032 }
18034 for (int Idx = 0; Idx < 16; Idx++)
18035 Consts.push_back(Builder.getInt8(NumBytes + Idx));
18036 Value *Mask2 = ConstantVector::get(Consts);
18037 return Builder.CreateBitCast(
18038 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
18039 }
18040 case PPC::BI__builtin_vsx_strmb: {
18041 Value *Op0 = EmitScalarExpr(E->getArg(0));
18042 Value *Op1 = EmitScalarExpr(E->getArg(1));
18043 Value *Op2 = EmitScalarExpr(E->getArg(2));
18044 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
18045 bool IsLE = getTarget().isLittleEndian();
18046 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
18047 // Storing the whole vector, simply store it on BE and reverse bytes and
18048 // store on LE.
18049 if (Width == 16) {
18050 Value *StVec = Op2;
18051 if (IsLE) {
18052 SmallVector<int, 16> RevMask;
18053 for (int Idx = 0; Idx < 16; Idx++)
18054 RevMask.push_back(15 - Idx);
18055 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
18056 }
18057 return Builder.CreateStore(
18058 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
18059 }
18060 auto *ConvTy = Int64Ty;
18061 unsigned NumElts = 0;
18062 switch (Width) {
18063 default:
18064 llvm_unreachable("width for stores must be a power of 2");
18065 case 8:
18066 ConvTy = Int64Ty;
18067 NumElts = 2;
18068 break;
18069 case 4:
18070 ConvTy = Int32Ty;
18071 NumElts = 4;
18072 break;
18073 case 2:
18074 ConvTy = Int16Ty;
18075 NumElts = 8;
18076 break;
18077 case 1:
18078 ConvTy = Int8Ty;
18079 NumElts = 16;
18080 break;
18081 }
18082 Value *Vec = Builder.CreateBitCast(
18083 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
18084 Value *Ptr =
18085 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
18086 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
18087 if (IsLE && Width > 1) {
18088 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
18089 Elt = Builder.CreateCall(F, Elt);
18090 }
18091 return Builder.CreateStore(
18092 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
18093 };
18094 unsigned Stored = 0;
18095 unsigned RemainingBytes = NumBytes;
18096 Value *Result;
18097 if (NumBytes == 16)
18098 return StoreSubVec(16, 0, 0);
18099 if (NumBytes >= 8) {
18100 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
18101 RemainingBytes -= 8;
18102 Stored += 8;
18103 }
18104 if (RemainingBytes >= 4) {
18105 Result = StoreSubVec(4, NumBytes - Stored - 4,
18106 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
18107 RemainingBytes -= 4;
18108 Stored += 4;
18109 }
18110 if (RemainingBytes >= 2) {
18111 Result = StoreSubVec(2, NumBytes - Stored - 2,
18112 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
18113 RemainingBytes -= 2;
18114 Stored += 2;
18115 }
18116 if (RemainingBytes)
18117 Result =
18118 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18119 return Result;
18120 }
18121 // Square root
18122 case PPC::BI__builtin_vsx_xvsqrtsp:
18123 case PPC::BI__builtin_vsx_xvsqrtdp: {
18124 llvm::Type *ResultType = ConvertType(E->getType());
18125 Value *X = EmitScalarExpr(E->getArg(0));
18126 if (Builder.getIsFPConstrained()) {
18127 llvm::Function *F = CGM.getIntrinsic(
18128 Intrinsic::experimental_constrained_sqrt, ResultType);
18129 return Builder.CreateConstrainedFPCall(F, X);
18130 } else {
18131 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18132 return Builder.CreateCall(F, X);
18133 }
18134 }
18135 // Count leading zeros
18136 case PPC::BI__builtin_altivec_vclzb:
18137 case PPC::BI__builtin_altivec_vclzh:
18138 case PPC::BI__builtin_altivec_vclzw:
18139 case PPC::BI__builtin_altivec_vclzd: {
18140 llvm::Type *ResultType = ConvertType(E->getType());
18141 Value *X = EmitScalarExpr(E->getArg(0));
18142 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18143 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18144 return Builder.CreateCall(F, {X, Undef});
18145 }
18146 case PPC::BI__builtin_altivec_vctzb:
18147 case PPC::BI__builtin_altivec_vctzh:
18148 case PPC::BI__builtin_altivec_vctzw:
18149 case PPC::BI__builtin_altivec_vctzd: {
18150 llvm::Type *ResultType = ConvertType(E->getType());
18151 Value *X = EmitScalarExpr(E->getArg(0));
18152 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18153 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18154 return Builder.CreateCall(F, {X, Undef});
18155 }
18156 case PPC::BI__builtin_altivec_vinsd:
18157 case PPC::BI__builtin_altivec_vinsw:
18158 case PPC::BI__builtin_altivec_vinsd_elt:
18159 case PPC::BI__builtin_altivec_vinsw_elt: {
18160 llvm::Type *ResultType = ConvertType(E->getType());
18161 Value *Op0 = EmitScalarExpr(E->getArg(0));
18162 Value *Op1 = EmitScalarExpr(E->getArg(1));
18163 Value *Op2 = EmitScalarExpr(E->getArg(2));
18164
18165 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18166 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18167
18168 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18169 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18170
18171 // The third argument must be a compile time constant.
18172 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18173 assert(ArgCI &&
18174 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18175
18176 // Valid value for the third argument is dependent on the input type and
18177 // builtin called.
18178 int ValidMaxValue = 0;
18179 if (IsUnaligned)
18180 ValidMaxValue = (Is32bit) ? 12 : 8;
18181 else
18182 ValidMaxValue = (Is32bit) ? 3 : 1;
18183
18184 // Get value of third argument.
18185 int64_t ConstArg = ArgCI->getSExtValue();
18186
18187 // Compose range checking error message.
18188 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18189 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18190 RangeErrMsg += " is outside of the valid range [0, ";
18191 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18192
18193 // Issue error if third argument is not within the valid range.
18194 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18195 CGM.Error(E->getExprLoc(), RangeErrMsg);
18196
18197 // Input to vec_replace_elt is an element index, convert to byte index.
18198 if (!IsUnaligned) {
18199 ConstArg *= Is32bit ? 4 : 8;
18200 // Fix the constant according to endianess.
18201 if (getTarget().isLittleEndian())
18202 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18203 }
18204
18205 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18206 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18207 // Casting input to vector int as per intrinsic definition.
18208 Op0 =
18209 Is32bit
18210 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18211 : Builder.CreateBitCast(Op0,
18212 llvm::FixedVectorType::get(Int64Ty, 2));
18213 return Builder.CreateBitCast(
18214 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18215 }
18216 case PPC::BI__builtin_altivec_vadduqm:
18217 case PPC::BI__builtin_altivec_vsubuqm: {
18218 Value *Op0 = EmitScalarExpr(E->getArg(0));
18219 Value *Op1 = EmitScalarExpr(E->getArg(1));
18220 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18221 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18222 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18223 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18224 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18225 else
18226 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18227 }
18228 case PPC::BI__builtin_altivec_vaddcuq_c:
18229 case PPC::BI__builtin_altivec_vsubcuq_c: {
18231 Value *Op0 = EmitScalarExpr(E->getArg(0));
18232 Value *Op1 = EmitScalarExpr(E->getArg(1));
18233 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18234 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18235 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18236 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18237 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18238 ? Intrinsic::ppc_altivec_vaddcuq
18239 : Intrinsic::ppc_altivec_vsubcuq;
18240 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18241 }
18242 case PPC::BI__builtin_altivec_vaddeuqm_c:
18243 case PPC::BI__builtin_altivec_vaddecuq_c:
18244 case PPC::BI__builtin_altivec_vsubeuqm_c:
18245 case PPC::BI__builtin_altivec_vsubecuq_c: {
18247 Value *Op0 = EmitScalarExpr(E->getArg(0));
18248 Value *Op1 = EmitScalarExpr(E->getArg(1));
18249 Value *Op2 = EmitScalarExpr(E->getArg(2));
18250 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18251 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18252 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18253 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18254 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18255 switch (BuiltinID) {
18256 default:
18257 llvm_unreachable("Unsupported intrinsic!");
18258 case PPC::BI__builtin_altivec_vaddeuqm_c:
18259 ID = Intrinsic::ppc_altivec_vaddeuqm;
18260 break;
18261 case PPC::BI__builtin_altivec_vaddecuq_c:
18262 ID = Intrinsic::ppc_altivec_vaddecuq;
18263 break;
18264 case PPC::BI__builtin_altivec_vsubeuqm_c:
18265 ID = Intrinsic::ppc_altivec_vsubeuqm;
18266 break;
18267 case PPC::BI__builtin_altivec_vsubecuq_c:
18268 ID = Intrinsic::ppc_altivec_vsubecuq;
18269 break;
18270 }
18271 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18272 }
18273 case PPC::BI__builtin_ppc_rldimi:
18274 case PPC::BI__builtin_ppc_rlwimi: {
18275 Value *Op0 = EmitScalarExpr(E->getArg(0));
18276 Value *Op1 = EmitScalarExpr(E->getArg(1));
18277 Value *Op2 = EmitScalarExpr(E->getArg(2));
18278 Value *Op3 = EmitScalarExpr(E->getArg(3));
18279 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18280 // leverage peephole and avoid legalization efforts.
18281 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18282 !getTarget().getTriple().isPPC64()) {
18283 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18284 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18285 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18286 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18287 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18288 }
18289 return Builder.CreateCall(
18290 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18291 ? Intrinsic::ppc_rldimi
18292 : Intrinsic::ppc_rlwimi),
18293 {Op0, Op1, Op2, Op3});
18294 }
18295 case PPC::BI__builtin_ppc_rlwnm: {
18296 Value *Op0 = EmitScalarExpr(E->getArg(0));
18297 Value *Op1 = EmitScalarExpr(E->getArg(1));
18298 Value *Op2 = EmitScalarExpr(E->getArg(2));
18299 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18300 {Op0, Op1, Op2});
18301 }
18302 case PPC::BI__builtin_ppc_poppar4:
18303 case PPC::BI__builtin_ppc_poppar8: {
18304 Value *Op0 = EmitScalarExpr(E->getArg(0));
18305 llvm::Type *ArgType = Op0->getType();
18306 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18307 Value *Tmp = Builder.CreateCall(F, Op0);
18308
18309 llvm::Type *ResultType = ConvertType(E->getType());
18310 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18311 if (Result->getType() != ResultType)
18312 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18313 "cast");
18314 return Result;
18315 }
18316 case PPC::BI__builtin_ppc_cmpb: {
18317 Value *Op0 = EmitScalarExpr(E->getArg(0));
18318 Value *Op1 = EmitScalarExpr(E->getArg(1));
18319 if (getTarget().getTriple().isPPC64()) {
18320 Function *F =
18321 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18322 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18323 }
18324 // For 32 bit, emit the code as below:
18325 // %conv = trunc i64 %a to i32
18326 // %conv1 = trunc i64 %b to i32
18327 // %shr = lshr i64 %a, 32
18328 // %conv2 = trunc i64 %shr to i32
18329 // %shr3 = lshr i64 %b, 32
18330 // %conv4 = trunc i64 %shr3 to i32
18331 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18332 // %conv5 = zext i32 %0 to i64
18333 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18334 // %conv614 = zext i32 %1 to i64
18335 // %shl = shl nuw i64 %conv614, 32
18336 // %or = or i64 %shl, %conv5
18337 // ret i64 %or
18338 Function *F =
18339 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18340 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18341 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18342 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18343 Value *ArgOneHi =
18344 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18345 Value *ArgTwoHi =
18346 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18347 Value *ResLo = Builder.CreateZExt(
18348 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18349 Value *ResHiShift = Builder.CreateZExt(
18350 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18351 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18352 return Builder.CreateOr(ResLo, ResHi);
18353 }
18354 // Copy sign
18355 case PPC::BI__builtin_vsx_xvcpsgnsp:
18356 case PPC::BI__builtin_vsx_xvcpsgndp: {
18357 llvm::Type *ResultType = ConvertType(E->getType());
18358 Value *X = EmitScalarExpr(E->getArg(0));
18359 Value *Y = EmitScalarExpr(E->getArg(1));
18360 ID = Intrinsic::copysign;
18361 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18362 return Builder.CreateCall(F, {X, Y});
18363 }
18364 // Rounding/truncation
18365 case PPC::BI__builtin_vsx_xvrspip:
18366 case PPC::BI__builtin_vsx_xvrdpip:
18367 case PPC::BI__builtin_vsx_xvrdpim:
18368 case PPC::BI__builtin_vsx_xvrspim:
18369 case PPC::BI__builtin_vsx_xvrdpi:
18370 case PPC::BI__builtin_vsx_xvrspi:
18371 case PPC::BI__builtin_vsx_xvrdpic:
18372 case PPC::BI__builtin_vsx_xvrspic:
18373 case PPC::BI__builtin_vsx_xvrdpiz:
18374 case PPC::BI__builtin_vsx_xvrspiz: {
18375 llvm::Type *ResultType = ConvertType(E->getType());
18376 Value *X = EmitScalarExpr(E->getArg(0));
18377 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18378 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18379 ID = Builder.getIsFPConstrained()
18380 ? Intrinsic::experimental_constrained_floor
18381 : Intrinsic::floor;
18382 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18383 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18384 ID = Builder.getIsFPConstrained()
18385 ? Intrinsic::experimental_constrained_round
18386 : Intrinsic::round;
18387 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18388 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18389 ID = Builder.getIsFPConstrained()
18390 ? Intrinsic::experimental_constrained_rint
18391 : Intrinsic::rint;
18392 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18393 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18394 ID = Builder.getIsFPConstrained()
18395 ? Intrinsic::experimental_constrained_ceil
18396 : Intrinsic::ceil;
18397 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18398 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18399 ID = Builder.getIsFPConstrained()
18400 ? Intrinsic::experimental_constrained_trunc
18401 : Intrinsic::trunc;
18402 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18403 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18404 : Builder.CreateCall(F, X);
18405 }
18406
18407 // Absolute value
18408 case PPC::BI__builtin_vsx_xvabsdp:
18409 case PPC::BI__builtin_vsx_xvabssp: {
18410 llvm::Type *ResultType = ConvertType(E->getType());
18411 Value *X = EmitScalarExpr(E->getArg(0));
18412 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18413 return Builder.CreateCall(F, X);
18414 }
18415
18416 // Fastmath by default
18417 case PPC::BI__builtin_ppc_recipdivf:
18418 case PPC::BI__builtin_ppc_recipdivd:
18419 case PPC::BI__builtin_ppc_rsqrtf:
18420 case PPC::BI__builtin_ppc_rsqrtd: {
18421 FastMathFlags FMF = Builder.getFastMathFlags();
18422 Builder.getFastMathFlags().setFast();
18423 llvm::Type *ResultType = ConvertType(E->getType());
18424 Value *X = EmitScalarExpr(E->getArg(0));
18425
18426 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18427 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18428 Value *Y = EmitScalarExpr(E->getArg(1));
18429 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18430 Builder.getFastMathFlags() &= (FMF);
18431 return FDiv;
18432 }
18433 auto *One = ConstantFP::get(ResultType, 1.0);
18434 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18435 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18436 Builder.getFastMathFlags() &= (FMF);
18437 return FDiv;
18438 }
18439 case PPC::BI__builtin_ppc_alignx: {
18440 Value *Op0 = EmitScalarExpr(E->getArg(0));
18441 Value *Op1 = EmitScalarExpr(E->getArg(1));
18442 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18443 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18444 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18445 llvm::Value::MaximumAlignment);
18446
18447 emitAlignmentAssumption(Op1, E->getArg(1),
18448 /*The expr loc is sufficient.*/ SourceLocation(),
18449 AlignmentCI, nullptr);
18450 return Op1;
18451 }
18452 case PPC::BI__builtin_ppc_rdlam: {
18453 Value *Op0 = EmitScalarExpr(E->getArg(0));
18454 Value *Op1 = EmitScalarExpr(E->getArg(1));
18455 Value *Op2 = EmitScalarExpr(E->getArg(2));
18456 llvm::Type *Ty = Op0->getType();
18457 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18458 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18459 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18460 return Builder.CreateAnd(Rotate, Op2);
18461 }
18462 case PPC::BI__builtin_ppc_load2r: {
18463 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18464 Value *Op0 = EmitScalarExpr(E->getArg(0));
18465 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18466 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18467 }
18468 // FMA variations
18469 case PPC::BI__builtin_ppc_fnmsub:
18470 case PPC::BI__builtin_ppc_fnmsubs:
18471 case PPC::BI__builtin_vsx_xvmaddadp:
18472 case PPC::BI__builtin_vsx_xvmaddasp:
18473 case PPC::BI__builtin_vsx_xvnmaddadp:
18474 case PPC::BI__builtin_vsx_xvnmaddasp:
18475 case PPC::BI__builtin_vsx_xvmsubadp:
18476 case PPC::BI__builtin_vsx_xvmsubasp:
18477 case PPC::BI__builtin_vsx_xvnmsubadp:
18478 case PPC::BI__builtin_vsx_xvnmsubasp: {
18479 llvm::Type *ResultType = ConvertType(E->getType());
18480 Value *X = EmitScalarExpr(E->getArg(0));
18481 Value *Y = EmitScalarExpr(E->getArg(1));
18482 Value *Z = EmitScalarExpr(E->getArg(2));
18483 llvm::Function *F;
18484 if (Builder.getIsFPConstrained())
18485 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18486 else
18487 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18488 switch (BuiltinID) {
18489 case PPC::BI__builtin_vsx_xvmaddadp:
18490 case PPC::BI__builtin_vsx_xvmaddasp:
18491 if (Builder.getIsFPConstrained())
18492 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18493 else
18494 return Builder.CreateCall(F, {X, Y, Z});
18495 case PPC::BI__builtin_vsx_xvnmaddadp:
18496 case PPC::BI__builtin_vsx_xvnmaddasp:
18497 if (Builder.getIsFPConstrained())
18498 return Builder.CreateFNeg(
18499 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18500 else
18501 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18502 case PPC::BI__builtin_vsx_xvmsubadp:
18503 case PPC::BI__builtin_vsx_xvmsubasp:
18504 if (Builder.getIsFPConstrained())
18505 return Builder.CreateConstrainedFPCall(
18506 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18507 else
18508 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18509 case PPC::BI__builtin_ppc_fnmsub:
18510 case PPC::BI__builtin_ppc_fnmsubs:
18511 case PPC::BI__builtin_vsx_xvnmsubadp:
18512 case PPC::BI__builtin_vsx_xvnmsubasp:
18513 if (Builder.getIsFPConstrained())
18514 return Builder.CreateFNeg(
18515 Builder.CreateConstrainedFPCall(
18516 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18517 "neg");
18518 else
18519 return Builder.CreateCall(
18520 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18521 }
18522 llvm_unreachable("Unknown FMA operation");
18523 return nullptr; // Suppress no-return warning
18524 }
18525
18526 case PPC::BI__builtin_vsx_insertword: {
18527 Value *Op0 = EmitScalarExpr(E->getArg(0));
18528 Value *Op1 = EmitScalarExpr(E->getArg(1));
18529 Value *Op2 = EmitScalarExpr(E->getArg(2));
18530 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18531
18532 // Third argument is a compile time constant int. It must be clamped to
18533 // to the range [0, 12].
18534 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18535 assert(ArgCI &&
18536 "Third arg to xxinsertw intrinsic must be constant integer");
18537 const int64_t MaxIndex = 12;
18538 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18539
18540 // The builtin semantics don't exactly match the xxinsertw instructions
18541 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18542 // word from the first argument, and inserts it in the second argument. The
18543 // instruction extracts the word from its second input register and inserts
18544 // it into its first input register, so swap the first and second arguments.
18545 std::swap(Op0, Op1);
18546
18547 // Need to cast the second argument from a vector of unsigned int to a
18548 // vector of long long.
18549 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18550
18551 if (getTarget().isLittleEndian()) {
18552 // Reverse the double words in the vector we will extract from.
18553 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18554 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18555
18556 // Reverse the index.
18557 Index = MaxIndex - Index;
18558 }
18559
18560 // Intrinsic expects the first arg to be a vector of int.
18561 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18562 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18563 return Builder.CreateCall(F, {Op0, Op1, Op2});
18564 }
18565
18566 case PPC::BI__builtin_vsx_extractuword: {
18567 Value *Op0 = EmitScalarExpr(E->getArg(0));
18568 Value *Op1 = EmitScalarExpr(E->getArg(1));
18569 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18570
18571 // Intrinsic expects the first argument to be a vector of doublewords.
18572 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18573
18574 // The second argument is a compile time constant int that needs to
18575 // be clamped to the range [0, 12].
18576 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18577 assert(ArgCI &&
18578 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18579 const int64_t MaxIndex = 12;
18580 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18581
18582 if (getTarget().isLittleEndian()) {
18583 // Reverse the index.
18584 Index = MaxIndex - Index;
18585 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18586
18587 // Emit the call, then reverse the double words of the results vector.
18588 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18589
18590 Value *ShuffleCall =
18591 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18592 return ShuffleCall;
18593 } else {
18594 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18595 return Builder.CreateCall(F, {Op0, Op1});
18596 }
18597 }
18598
18599 case PPC::BI__builtin_vsx_xxpermdi: {
18600 Value *Op0 = EmitScalarExpr(E->getArg(0));
18601 Value *Op1 = EmitScalarExpr(E->getArg(1));
18602 Value *Op2 = EmitScalarExpr(E->getArg(2));
18603 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18604 assert(ArgCI && "Third arg must be constant integer!");
18605
18606 unsigned Index = ArgCI->getZExtValue();
18607 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18608 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18609
18610 // Account for endianness by treating this as just a shuffle. So we use the
18611 // same indices for both LE and BE in order to produce expected results in
18612 // both cases.
18613 int ElemIdx0 = (Index & 2) >> 1;
18614 int ElemIdx1 = 2 + (Index & 1);
18615
18616 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18617 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18618 QualType BIRetType = E->getType();
18619 auto RetTy = ConvertType(BIRetType);
18620 return Builder.CreateBitCast(ShuffleCall, RetTy);
18621 }
18622
18623 case PPC::BI__builtin_vsx_xxsldwi: {
18624 Value *Op0 = EmitScalarExpr(E->getArg(0));
18625 Value *Op1 = EmitScalarExpr(E->getArg(1));
18626 Value *Op2 = EmitScalarExpr(E->getArg(2));
18627 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18628 assert(ArgCI && "Third argument must be a compile time constant");
18629 unsigned Index = ArgCI->getZExtValue() & 0x3;
18630 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18631 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18632
18633 // Create a shuffle mask
18634 int ElemIdx0;
18635 int ElemIdx1;
18636 int ElemIdx2;
18637 int ElemIdx3;
18638 if (getTarget().isLittleEndian()) {
18639 // Little endian element N comes from element 8+N-Index of the
18640 // concatenated wide vector (of course, using modulo arithmetic on
18641 // the total number of elements).
18642 ElemIdx0 = (8 - Index) % 8;
18643 ElemIdx1 = (9 - Index) % 8;
18644 ElemIdx2 = (10 - Index) % 8;
18645 ElemIdx3 = (11 - Index) % 8;
18646 } else {
18647 // Big endian ElemIdx<N> = Index + N
18648 ElemIdx0 = Index;
18649 ElemIdx1 = Index + 1;
18650 ElemIdx2 = Index + 2;
18651 ElemIdx3 = Index + 3;
18652 }
18653
18654 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18655 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18656 QualType BIRetType = E->getType();
18657 auto RetTy = ConvertType(BIRetType);
18658 return Builder.CreateBitCast(ShuffleCall, RetTy);
18659 }
18660
18661 case PPC::BI__builtin_pack_vector_int128: {
18662 Value *Op0 = EmitScalarExpr(E->getArg(0));
18663 Value *Op1 = EmitScalarExpr(E->getArg(1));
18664 bool isLittleEndian = getTarget().isLittleEndian();
18665 Value *PoisonValue =
18666 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18667 Value *Res = Builder.CreateInsertElement(
18668 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18669 Res = Builder.CreateInsertElement(Res, Op1,
18670 (uint64_t)(isLittleEndian ? 0 : 1));
18671 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18672 }
18673
18674 case PPC::BI__builtin_unpack_vector_int128: {
18675 Value *Op0 = EmitScalarExpr(E->getArg(0));
18676 Value *Op1 = EmitScalarExpr(E->getArg(1));
18677 ConstantInt *Index = cast<ConstantInt>(Op1);
18678 Value *Unpacked = Builder.CreateBitCast(
18679 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18680
18681 if (getTarget().isLittleEndian())
18682 Index =
18683 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18684
18685 return Builder.CreateExtractElement(Unpacked, Index);
18686 }
18687
18688 case PPC::BI__builtin_ppc_sthcx: {
18689 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18690 Value *Op0 = EmitScalarExpr(E->getArg(0));
18691 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18692 return Builder.CreateCall(F, {Op0, Op1});
18693 }
18694
18695 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18696 // Some of the MMA instructions accumulate their result into an existing
18697 // accumulator whereas the others generate a new accumulator. So we need to
18698 // use custom code generation to expand a builtin call with a pointer to a
18699 // load (if the corresponding instruction accumulates its result) followed by
18700 // the call to the intrinsic and a store of the result.
18701#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18702 case PPC::BI__builtin_##Name:
18703#include "clang/Basic/BuiltinsPPC.def"
18704 {
18706 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18707 if (E->getArg(i)->getType()->isArrayType())
18708 Ops.push_back(
18709 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18710 else
18711 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18712 // The first argument of these two builtins is a pointer used to store their
18713 // result. However, the llvm intrinsics return their result in multiple
18714 // return values. So, here we emit code extracting these values from the
18715 // intrinsic results and storing them using that pointer.
18716 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18717 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18718 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18719 unsigned NumVecs = 2;
18720 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18721 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18722 NumVecs = 4;
18723 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18724 }
18725 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18726 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18727 Value *Vec = Builder.CreateLoad(Addr);
18728 Value *Call = Builder.CreateCall(F, {Vec});
18729 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18730 Value *Ptr = Ops[0];
18731 for (unsigned i=0; i<NumVecs; i++) {
18732 Value *Vec = Builder.CreateExtractValue(Call, i);
18733 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18734 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18735 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18736 }
18737 return Call;
18738 }
18739 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18740 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18741 // Reverse the order of the operands for LE, so the
18742 // same builtin call can be used on both LE and BE
18743 // without the need for the programmer to swap operands.
18744 // The operands are reversed starting from the second argument,
18745 // the first operand is the pointer to the pair/accumulator
18746 // that is being built.
18747 if (getTarget().isLittleEndian())
18748 std::reverse(Ops.begin() + 1, Ops.end());
18749 }
18750 bool Accumulate;
18751 switch (BuiltinID) {
18752 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18753 case PPC::BI__builtin_##Name: \
18754 ID = Intrinsic::ppc_##Intr; \
18755 Accumulate = Acc; \
18756 break;
18757 #include "clang/Basic/BuiltinsPPC.def"
18758 }
18759 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18760 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18761 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18762 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18763 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18764 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18765 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18766 } else {
18767 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18768 }
18769 Ops.pop_back();
18770 llvm::Function *F = CGM.getIntrinsic(ID);
18771 return Builder.CreateCall(F, Ops, "");
18772 }
18773 SmallVector<Value*, 4> CallOps;
18774 if (Accumulate) {
18775 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18776 Value *Acc = Builder.CreateLoad(Addr);
18777 CallOps.push_back(Acc);
18778 }
18779 for (unsigned i=1; i<Ops.size(); i++)
18780 CallOps.push_back(Ops[i]);
18781 llvm::Function *F = CGM.getIntrinsic(ID);
18782 Value *Call = Builder.CreateCall(F, CallOps);
18783 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18784 }
18785
18786 case PPC::BI__builtin_ppc_compare_and_swap:
18787 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18788 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18789 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18790 Value *OldVal = Builder.CreateLoad(OldValAddr);
18791 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18792 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18793 Value *Op2 = EmitScalarExpr(E->getArg(2));
18794 auto Pair = EmitAtomicCompareExchange(
18795 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18796 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18797 // Unlike c11's atomic_compare_exchange, according to
18798 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18799 // > In either case, the contents of the memory location specified by addr
18800 // > are copied into the memory location specified by old_val_addr.
18801 // But it hasn't specified storing to OldValAddr is atomic or not and
18802 // which order to use. Now following XL's codegen, treat it as a normal
18803 // store.
18804 Value *LoadedVal = Pair.first.getScalarVal();
18805 Builder.CreateStore(LoadedVal, OldValAddr);
18806 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18807 }
18808 case PPC::BI__builtin_ppc_fetch_and_add:
18809 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18810 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18811 llvm::AtomicOrdering::Monotonic);
18812 }
18813 case PPC::BI__builtin_ppc_fetch_and_and:
18814 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18815 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18816 llvm::AtomicOrdering::Monotonic);
18817 }
18818
18819 case PPC::BI__builtin_ppc_fetch_and_or:
18820 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18821 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18822 llvm::AtomicOrdering::Monotonic);
18823 }
18824 case PPC::BI__builtin_ppc_fetch_and_swap:
18825 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18826 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18827 llvm::AtomicOrdering::Monotonic);
18828 }
18829 case PPC::BI__builtin_ppc_ldarx:
18830 case PPC::BI__builtin_ppc_lwarx:
18831 case PPC::BI__builtin_ppc_lharx:
18832 case PPC::BI__builtin_ppc_lbarx:
18833 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18834 case PPC::BI__builtin_ppc_mfspr: {
18835 Value *Op0 = EmitScalarExpr(E->getArg(0));
18836 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18837 ? Int32Ty
18838 : Int64Ty;
18839 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18840 return Builder.CreateCall(F, {Op0});
18841 }
18842 case PPC::BI__builtin_ppc_mtspr: {
18843 Value *Op0 = EmitScalarExpr(E->getArg(0));
18844 Value *Op1 = EmitScalarExpr(E->getArg(1));
18845 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18846 ? Int32Ty
18847 : Int64Ty;
18848 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18849 return Builder.CreateCall(F, {Op0, Op1});
18850 }
18851 case PPC::BI__builtin_ppc_popcntb: {
18852 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18853 llvm::Type *ArgType = ArgValue->getType();
18854 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18855 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18856 }
18857 case PPC::BI__builtin_ppc_mtfsf: {
18858 // The builtin takes a uint32 that needs to be cast to an
18859 // f64 to be passed to the intrinsic.
18860 Value *Op0 = EmitScalarExpr(E->getArg(0));
18861 Value *Op1 = EmitScalarExpr(E->getArg(1));
18862 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18863 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18864 return Builder.CreateCall(F, {Op0, Cast}, "");
18865 }
18866
18867 case PPC::BI__builtin_ppc_swdiv_nochk:
18868 case PPC::BI__builtin_ppc_swdivs_nochk: {
18869 Value *Op0 = EmitScalarExpr(E->getArg(0));
18870 Value *Op1 = EmitScalarExpr(E->getArg(1));
18871 FastMathFlags FMF = Builder.getFastMathFlags();
18872 Builder.getFastMathFlags().setFast();
18873 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18874 Builder.getFastMathFlags() &= (FMF);
18875 return FDiv;
18876 }
18877 case PPC::BI__builtin_ppc_fric:
18879 *this, E, Intrinsic::rint,
18880 Intrinsic::experimental_constrained_rint))
18881 .getScalarVal();
18882 case PPC::BI__builtin_ppc_frim:
18883 case PPC::BI__builtin_ppc_frims:
18885 *this, E, Intrinsic::floor,
18886 Intrinsic::experimental_constrained_floor))
18887 .getScalarVal();
18888 case PPC::BI__builtin_ppc_frin:
18889 case PPC::BI__builtin_ppc_frins:
18891 *this, E, Intrinsic::round,
18892 Intrinsic::experimental_constrained_round))
18893 .getScalarVal();
18894 case PPC::BI__builtin_ppc_frip:
18895 case PPC::BI__builtin_ppc_frips:
18897 *this, E, Intrinsic::ceil,
18898 Intrinsic::experimental_constrained_ceil))
18899 .getScalarVal();
18900 case PPC::BI__builtin_ppc_friz:
18901 case PPC::BI__builtin_ppc_frizs:
18903 *this, E, Intrinsic::trunc,
18904 Intrinsic::experimental_constrained_trunc))
18905 .getScalarVal();
18906 case PPC::BI__builtin_ppc_fsqrt:
18907 case PPC::BI__builtin_ppc_fsqrts:
18909 *this, E, Intrinsic::sqrt,
18910 Intrinsic::experimental_constrained_sqrt))
18911 .getScalarVal();
18912 case PPC::BI__builtin_ppc_test_data_class: {
18913 Value *Op0 = EmitScalarExpr(E->getArg(0));
18914 Value *Op1 = EmitScalarExpr(E->getArg(1));
18915 return Builder.CreateCall(
18916 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18917 {Op0, Op1}, "test_data_class");
18918 }
18919 case PPC::BI__builtin_ppc_maxfe: {
18920 Value *Op0 = EmitScalarExpr(E->getArg(0));
18921 Value *Op1 = EmitScalarExpr(E->getArg(1));
18922 Value *Op2 = EmitScalarExpr(E->getArg(2));
18923 Value *Op3 = EmitScalarExpr(E->getArg(3));
18924 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18925 {Op0, Op1, Op2, Op3});
18926 }
18927 case PPC::BI__builtin_ppc_maxfl: {
18928 Value *Op0 = EmitScalarExpr(E->getArg(0));
18929 Value *Op1 = EmitScalarExpr(E->getArg(1));
18930 Value *Op2 = EmitScalarExpr(E->getArg(2));
18931 Value *Op3 = EmitScalarExpr(E->getArg(3));
18932 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18933 {Op0, Op1, Op2, Op3});
18934 }
18935 case PPC::BI__builtin_ppc_maxfs: {
18936 Value *Op0 = EmitScalarExpr(E->getArg(0));
18937 Value *Op1 = EmitScalarExpr(E->getArg(1));
18938 Value *Op2 = EmitScalarExpr(E->getArg(2));
18939 Value *Op3 = EmitScalarExpr(E->getArg(3));
18940 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18941 {Op0, Op1, Op2, Op3});
18942 }
18943 case PPC::BI__builtin_ppc_minfe: {
18944 Value *Op0 = EmitScalarExpr(E->getArg(0));
18945 Value *Op1 = EmitScalarExpr(E->getArg(1));
18946 Value *Op2 = EmitScalarExpr(E->getArg(2));
18947 Value *Op3 = EmitScalarExpr(E->getArg(3));
18948 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18949 {Op0, Op1, Op2, Op3});
18950 }
18951 case PPC::BI__builtin_ppc_minfl: {
18952 Value *Op0 = EmitScalarExpr(E->getArg(0));
18953 Value *Op1 = EmitScalarExpr(E->getArg(1));
18954 Value *Op2 = EmitScalarExpr(E->getArg(2));
18955 Value *Op3 = EmitScalarExpr(E->getArg(3));
18956 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18957 {Op0, Op1, Op2, Op3});
18958 }
18959 case PPC::BI__builtin_ppc_minfs: {
18960 Value *Op0 = EmitScalarExpr(E->getArg(0));
18961 Value *Op1 = EmitScalarExpr(E->getArg(1));
18962 Value *Op2 = EmitScalarExpr(E->getArg(2));
18963 Value *Op3 = EmitScalarExpr(E->getArg(3));
18964 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18965 {Op0, Op1, Op2, Op3});
18966 }
18967 case PPC::BI__builtin_ppc_swdiv:
18968 case PPC::BI__builtin_ppc_swdivs: {
18969 Value *Op0 = EmitScalarExpr(E->getArg(0));
18970 Value *Op1 = EmitScalarExpr(E->getArg(1));
18971 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18972 }
18973 case PPC::BI__builtin_ppc_set_fpscr_rn:
18974 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18975 {EmitScalarExpr(E->getArg(0))});
18976 case PPC::BI__builtin_ppc_mffs:
18977 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18978 }
18979}
18980
18981namespace {
18982// If \p E is not null pointer, insert address space cast to match return
18983// type of \p E if necessary.
18984Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18985 const CallExpr *E = nullptr) {
18986 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18987 auto *Call = CGF.Builder.CreateCall(F);
18988 Call->addRetAttr(
18989 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18990 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18991 if (!E)
18992 return Call;
18993 QualType BuiltinRetType = E->getType();
18994 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18995 if (RetTy == Call->getType())
18996 return Call;
18997 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18998}
18999
19000Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
19001 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
19002 auto *Call = CGF.Builder.CreateCall(F);
19003 Call->addRetAttr(
19004 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
19005 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
19006 return Call;
19007}
19008
19009// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19010/// Emit code based on Code Object ABI version.
19011/// COV_4 : Emit code to use dispatch ptr
19012/// COV_5+ : Emit code to use implicitarg ptr
19013/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
19014/// and use its value for COV_4 or COV_5+ approach. It is used for
19015/// compiling device libraries in an ABI-agnostic way.
19016///
19017/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
19018/// clang during compilation of user code.
19019Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
19020 llvm::LoadInst *LD;
19021
19022 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
19023
19024 if (Cov == CodeObjectVersionKind::COV_None) {
19025 StringRef Name = "__oclc_ABI_version";
19026 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
19027 if (!ABIVersionC)
19028 ABIVersionC = new llvm::GlobalVariable(
19029 CGF.CGM.getModule(), CGF.Int32Ty, false,
19030 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
19031 llvm::GlobalVariable::NotThreadLocal,
19033
19034 // This load will be eliminated by the IPSCCP because it is constant
19035 // weak_odr without externally_initialized. Either changing it to weak or
19036 // adding externally_initialized will keep the load.
19037 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
19038 CGF.CGM.getIntAlign());
19039
19040 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
19041 ABIVersion,
19042 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
19043
19044 // Indexing the implicit kernarg segment.
19045 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
19046 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19047
19048 // Indexing the HSA kernel_dispatch_packet struct.
19049 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
19050 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19051
19052 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
19053 LD = CGF.Builder.CreateLoad(
19055 } else {
19056 Value *GEP = nullptr;
19057 if (Cov >= CodeObjectVersionKind::COV_5) {
19058 // Indexing the implicit kernarg segment.
19059 GEP = CGF.Builder.CreateConstGEP1_32(
19060 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19061 } else {
19062 // Indexing the HSA kernel_dispatch_packet struct.
19063 GEP = CGF.Builder.CreateConstGEP1_32(
19064 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19065 }
19066 LD = CGF.Builder.CreateLoad(
19068 }
19069
19070 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
19071 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
19072 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
19073 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
19074 LD->setMetadata(llvm::LLVMContext::MD_noundef,
19075 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19076 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19077 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19078 return LD;
19079}
19080
19081// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19082Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
19083 const unsigned XOffset = 12;
19084 auto *DP = EmitAMDGPUDispatchPtr(CGF);
19085 // Indexing the HSA kernel_dispatch_packet struct.
19086 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
19087 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
19088 auto *LD = CGF.Builder.CreateLoad(
19090
19091 llvm::MDBuilder MDB(CGF.getLLVMContext());
19092
19093 // Known non-zero.
19094 LD->setMetadata(llvm::LLVMContext::MD_range,
19095 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
19096 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19097 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19098 return LD;
19099}
19100} // namespace
19101
19102// For processing memory ordering and memory scope arguments of various
19103// amdgcn builtins.
19104// \p Order takes a C++11 comptabile memory-ordering specifier and converts
19105// it into LLVM's memory ordering specifier using atomic C ABI, and writes
19106// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
19107// specific SyncScopeID and writes it to \p SSID.
19109 llvm::AtomicOrdering &AO,
19110 llvm::SyncScope::ID &SSID) {
19111 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
19112
19113 // Map C11/C++11 memory ordering to LLVM memory ordering
19114 assert(llvm::isValidAtomicOrderingCABI(ord));
19115 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
19116 case llvm::AtomicOrderingCABI::acquire:
19117 case llvm::AtomicOrderingCABI::consume:
19118 AO = llvm::AtomicOrdering::Acquire;
19119 break;
19120 case llvm::AtomicOrderingCABI::release:
19121 AO = llvm::AtomicOrdering::Release;
19122 break;
19123 case llvm::AtomicOrderingCABI::acq_rel:
19124 AO = llvm::AtomicOrdering::AcquireRelease;
19125 break;
19126 case llvm::AtomicOrderingCABI::seq_cst:
19127 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19128 break;
19129 case llvm::AtomicOrderingCABI::relaxed:
19130 AO = llvm::AtomicOrdering::Monotonic;
19131 break;
19132 }
19133
19134 // Some of the atomic builtins take the scope as a string name.
19135 StringRef scp;
19136 if (llvm::getConstantStringInfo(Scope, scp)) {
19137 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19138 return;
19139 }
19140
19141 // Older builtins had an enum argument for the memory scope.
19142 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19143 switch (scope) {
19144 case 0: // __MEMORY_SCOPE_SYSTEM
19145 SSID = llvm::SyncScope::System;
19146 break;
19147 case 1: // __MEMORY_SCOPE_DEVICE
19148 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19149 break;
19150 case 2: // __MEMORY_SCOPE_WRKGRP
19151 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19152 break;
19153 case 3: // __MEMORY_SCOPE_WVFRNT
19154 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19155 break;
19156 case 4: // __MEMORY_SCOPE_SINGLE
19157 SSID = llvm::SyncScope::SingleThread;
19158 break;
19159 default:
19160 SSID = llvm::SyncScope::System;
19161 break;
19162 }
19163}
19164
19165llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19166 unsigned Idx,
19167 const CallExpr *E) {
19168 llvm::Value *Arg = nullptr;
19169 if ((ICEArguments & (1 << Idx)) == 0) {
19170 Arg = EmitScalarExpr(E->getArg(Idx));
19171 } else {
19172 // If this is required to be a constant, constant fold it so that we
19173 // know that the generated intrinsic gets a ConstantInt.
19174 std::optional<llvm::APSInt> Result =
19175 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19176 assert(Result && "Expected argument to be a constant");
19177 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19178 }
19179 return Arg;
19180}
19181
19182// Return dot product intrinsic that corresponds to the QT scalar type
19183static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19184 if (QT->isFloatingType())
19185 return RT.getFDotIntrinsic();
19186 if (QT->isSignedIntegerType())
19187 return RT.getSDotIntrinsic();
19188 assert(QT->isUnsignedIntegerType());
19189 return RT.getUDotIntrinsic();
19190}
19191
19192static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19194 return RT.getFirstBitSHighIntrinsic();
19195 }
19196
19198 return RT.getFirstBitUHighIntrinsic();
19199}
19200
19202 const CallExpr *E,
19203 ReturnValueSlot ReturnValue) {
19204 if (!getLangOpts().HLSL)
19205 return nullptr;
19206
19207 switch (BuiltinID) {
19208 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19209 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19210 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19211
19212 // TODO: Map to an hlsl_device address space.
19213 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19214
19215 return Builder.CreateIntrinsic(RetTy, Intrinsic::dx_resource_getpointer,
19216 ArrayRef<Value *>{HandleOp, IndexOp});
19217 }
19218 case Builtin::BI__builtin_hlsl_all: {
19219 Value *Op0 = EmitScalarExpr(E->getArg(0));
19220 return Builder.CreateIntrinsic(
19221 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19222 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19223 "hlsl.all");
19224 }
19225 case Builtin::BI__builtin_hlsl_any: {
19226 Value *Op0 = EmitScalarExpr(E->getArg(0));
19227 return Builder.CreateIntrinsic(
19228 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19229 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19230 "hlsl.any");
19231 }
19232 case Builtin::BI__builtin_hlsl_asdouble:
19233 return handleAsDoubleBuiltin(*this, E);
19234 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19235 Value *OpX = EmitScalarExpr(E->getArg(0));
19236 Value *OpMin = EmitScalarExpr(E->getArg(1));
19237 Value *OpMax = EmitScalarExpr(E->getArg(2));
19238
19239 QualType Ty = E->getArg(0)->getType();
19240 if (auto *VecTy = Ty->getAs<VectorType>())
19241 Ty = VecTy->getElementType();
19242
19243 Intrinsic::ID Intr;
19244 if (Ty->isFloatingType()) {
19245 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19246 } else if (Ty->isUnsignedIntegerType()) {
19247 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19248 } else {
19249 assert(Ty->isSignedIntegerType());
19250 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19251 }
19252 return Builder.CreateIntrinsic(
19253 /*ReturnType=*/OpX->getType(), Intr,
19254 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19255 }
19256 case Builtin::BI__builtin_hlsl_cross: {
19257 Value *Op0 = EmitScalarExpr(E->getArg(0));
19258 Value *Op1 = EmitScalarExpr(E->getArg(1));
19259 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19260 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19261 "cross operands must have a float representation");
19262 // make sure each vector has exactly 3 elements
19263 assert(
19264 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19265 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19266 "input vectors must have 3 elements each");
19267 return Builder.CreateIntrinsic(
19268 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19269 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19270 }
19271 case Builtin::BI__builtin_hlsl_dot: {
19272 Value *Op0 = EmitScalarExpr(E->getArg(0));
19273 Value *Op1 = EmitScalarExpr(E->getArg(1));
19274 llvm::Type *T0 = Op0->getType();
19275 llvm::Type *T1 = Op1->getType();
19276
19277 // If the arguments are scalars, just emit a multiply
19278 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19279 if (T0->isFloatingPointTy())
19280 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19281
19282 if (T0->isIntegerTy())
19283 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19284
19285 llvm_unreachable(
19286 "Scalar dot product is only supported on ints and floats.");
19287 }
19288 // For vectors, validate types and emit the appropriate intrinsic
19289
19290 // A VectorSplat should have happened
19291 assert(T0->isVectorTy() && T1->isVectorTy() &&
19292 "Dot product of vector and scalar is not supported.");
19293
19294 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
19295 [[maybe_unused]] auto *VecTy1 =
19296 E->getArg(1)->getType()->getAs<VectorType>();
19297
19298 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19299 "Dot product of vectors need the same element types.");
19300
19301 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19302 "Dot product requires vectors to be of the same size.");
19303
19304 return Builder.CreateIntrinsic(
19305 /*ReturnType=*/T0->getScalarType(),
19306 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19307 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19308 }
19309 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19310 Value *A = EmitScalarExpr(E->getArg(0));
19311 Value *B = EmitScalarExpr(E->getArg(1));
19312 Value *C = EmitScalarExpr(E->getArg(2));
19313
19314 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19315 return Builder.CreateIntrinsic(
19316 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19317 "hlsl.dot4add.i8packed");
19318 }
19319 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19320 Value *A = EmitScalarExpr(E->getArg(0));
19321 Value *B = EmitScalarExpr(E->getArg(1));
19322 Value *C = EmitScalarExpr(E->getArg(2));
19323
19324 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19325 return Builder.CreateIntrinsic(
19326 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19327 "hlsl.dot4add.u8packed");
19328 }
19329 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19330
19331 Value *X = EmitScalarExpr(E->getArg(0));
19332
19333 return Builder.CreateIntrinsic(
19334 /*ReturnType=*/ConvertType(E->getType()),
19336 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19337 }
19338 case Builtin::BI__builtin_hlsl_lerp: {
19339 Value *X = EmitScalarExpr(E->getArg(0));
19340 Value *Y = EmitScalarExpr(E->getArg(1));
19341 Value *S = EmitScalarExpr(E->getArg(2));
19342 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19343 llvm_unreachable("lerp operand must have a float representation");
19344 return Builder.CreateIntrinsic(
19345 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19346 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19347 }
19348 case Builtin::BI__builtin_hlsl_length: {
19349 Value *X = EmitScalarExpr(E->getArg(0));
19350
19351 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19352 "length operand must have a float representation");
19353 // if the operand is a scalar, we can use the fabs llvm intrinsic directly
19354 if (!E->getArg(0)->getType()->isVectorType())
19355 return EmitFAbs(*this, X);
19356
19357 return Builder.CreateIntrinsic(
19358 /*ReturnType=*/X->getType()->getScalarType(),
19359 CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
19360 nullptr, "hlsl.length");
19361 }
19362 case Builtin::BI__builtin_hlsl_normalize: {
19363 Value *X = EmitScalarExpr(E->getArg(0));
19364
19365 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19366 "normalize operand must have a float representation");
19367
19368 return Builder.CreateIntrinsic(
19369 /*ReturnType=*/X->getType(),
19370 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19371 nullptr, "hlsl.normalize");
19372 }
19373 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19374 Value *X = EmitScalarExpr(E->getArg(0));
19375
19376 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19377 "degree operand must have a float representation");
19378
19379 return Builder.CreateIntrinsic(
19380 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19381 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19382 }
19383 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19384 Value *Op0 = EmitScalarExpr(E->getArg(0));
19385 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19386 llvm_unreachable("frac operand must have a float representation");
19387 return Builder.CreateIntrinsic(
19388 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19389 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19390}
19391case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19392 Value *Op0 = EmitScalarExpr(E->getArg(0));
19393 llvm::Type *Xty = Op0->getType();
19394 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19395 if (Xty->isVectorTy()) {
19396 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
19397 retType = llvm::VectorType::get(
19398 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19399 }
19400 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19401 llvm_unreachable("isinf operand must have a float representation");
19402 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19403 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19404 }
19405 case Builtin::BI__builtin_hlsl_mad: {
19406 Value *M = EmitScalarExpr(E->getArg(0));
19407 Value *A = EmitScalarExpr(E->getArg(1));
19408 Value *B = EmitScalarExpr(E->getArg(2));
19409 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19410 return Builder.CreateIntrinsic(
19411 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19412 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19413
19414 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19415 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19416 return Builder.CreateIntrinsic(
19417 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19418 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19419
19420 Value *Mul = Builder.CreateNSWMul(M, A);
19421 return Builder.CreateNSWAdd(Mul, B);
19422 }
19423 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19424 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19425 return Builder.CreateIntrinsic(
19426 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19427 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19428
19429 Value *Mul = Builder.CreateNUWMul(M, A);
19430 return Builder.CreateNUWAdd(Mul, B);
19431 }
19432 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19433 Value *Op0 = EmitScalarExpr(E->getArg(0));
19434 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19435 llvm_unreachable("rcp operand must have a float representation");
19436 llvm::Type *Ty = Op0->getType();
19437 llvm::Type *EltTy = Ty->getScalarType();
19438 Constant *One = Ty->isVectorTy()
19439 ? ConstantVector::getSplat(
19440 ElementCount::getFixed(
19441 cast<FixedVectorType>(Ty)->getNumElements()),
19442 ConstantFP::get(EltTy, 1.0))
19443 : ConstantFP::get(EltTy, 1.0);
19444 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19445 }
19446 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19447 Value *Op0 = EmitScalarExpr(E->getArg(0));
19448 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19449 llvm_unreachable("rsqrt operand must have a float representation");
19450 return Builder.CreateIntrinsic(
19451 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19452 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19453 }
19454 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19455 Value *Op0 = EmitScalarExpr(E->getArg(0));
19456 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19457 "saturate operand must have a float representation");
19458 return Builder.CreateIntrinsic(
19459 /*ReturnType=*/Op0->getType(),
19460 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19461 nullptr, "hlsl.saturate");
19462 }
19463 case Builtin::BI__builtin_hlsl_select: {
19464 Value *OpCond = EmitScalarExpr(E->getArg(0));
19465 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19466 Value *OpTrue =
19467 RValTrue.isScalar()
19468 ? RValTrue.getScalarVal()
19469 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19470 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19471 Value *OpFalse =
19472 RValFalse.isScalar()
19473 ? RValFalse.getScalarVal()
19474 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19475
19476 Value *SelectVal =
19477 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19478 if (!RValTrue.isScalar())
19479 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19480 ReturnValue.isVolatile());
19481
19482 return SelectVal;
19483 }
19484 case Builtin::BI__builtin_hlsl_step: {
19485 Value *Op0 = EmitScalarExpr(E->getArg(0));
19486 Value *Op1 = EmitScalarExpr(E->getArg(1));
19487 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19488 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19489 "step operands must have a float representation");
19490 return Builder.CreateIntrinsic(
19491 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19492 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19493 }
19494 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19495 Value *Op = EmitScalarExpr(E->getArg(0));
19496 assert(Op->getType()->isIntegerTy(1) &&
19497 "Intrinsic WaveActiveAllTrue operand must be a bool");
19498
19499 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
19500 return EmitRuntimeCall(
19501 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19502 }
19503 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19504 Value *Op = EmitScalarExpr(E->getArg(0));
19505 assert(Op->getType()->isIntegerTy(1) &&
19506 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19507
19508 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19509 return EmitRuntimeCall(
19510 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19511 }
19512 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19513 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19514 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19515 return EmitRuntimeCall(
19516 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19517 ArrayRef{OpExpr});
19518 }
19519 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19520 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19521 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19522 // for the DirectX intrinsic and the demangled builtin name
19523 switch (CGM.getTarget().getTriple().getArch()) {
19524 case llvm::Triple::dxil:
19525 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19526 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19527 case llvm::Triple::spirv:
19529 llvm::FunctionType::get(IntTy, {}, false),
19530 "__hlsl_wave_get_lane_index", {}, false, true));
19531 default:
19532 llvm_unreachable(
19533 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19534 }
19535 }
19536 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19537 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19538 return EmitRuntimeCall(
19539 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19540 }
19541 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19542 // Due to the use of variadic arguments we must explicitly retreive them and
19543 // create our function type.
19544 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19545 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19546 llvm::FunctionType *FT = llvm::FunctionType::get(
19547 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19548 false);
19549
19550 // Get overloaded name
19551 std::string Name =
19552 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19553 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19554 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19555 /*Local=*/false,
19556 /*AssumeConvergent=*/true),
19557 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19558 }
19559 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19560 auto *Arg0 = E->getArg(0);
19561 Value *Op0 = EmitScalarExpr(Arg0);
19562 llvm::Type *Xty = Op0->getType();
19563 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19564 if (Xty->isVectorTy()) {
19565 auto *XVecTy = Arg0->getType()->getAs<VectorType>();
19566 retType = llvm::VectorType::get(
19567 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19568 }
19569 assert((Arg0->getType()->hasFloatingRepresentation() ||
19570 Arg0->getType()->hasIntegerRepresentation()) &&
19571 "sign operand must have a float or int representation");
19572
19574 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19575 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19576 ConstantInt::get(retType, 1), "hlsl.sign");
19577 }
19578
19579 return Builder.CreateIntrinsic(
19580 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19581 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19582 }
19583 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19584 Value *Op0 = EmitScalarExpr(E->getArg(0));
19585 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19586 "radians operand must have a float representation");
19587 return Builder.CreateIntrinsic(
19588 /*ReturnType=*/Op0->getType(),
19589 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19590 nullptr, "hlsl.radians");
19591 }
19592 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19593 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19594 Value *Offset = EmitScalarExpr(E->getArg(1));
19595 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19596 return Builder.CreateIntrinsic(
19597 /*ReturnType=*/Offset->getType(),
19598 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19599 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19600 }
19601 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19602
19603 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19604 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19605 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19606 "asuint operands types mismatch");
19607 return handleHlslSplitdouble(E, this);
19608 }
19609 case Builtin::BI__builtin_hlsl_elementwise_clip:
19610 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19611 "clip operands types mismatch");
19612 return handleHlslClip(E, this);
19613 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19614 Intrinsic::ID ID =
19615 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19616 return EmitRuntimeCall(
19617 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19618 }
19619 }
19620 return nullptr;
19621}
19622
19623void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19624 const CallExpr *E) {
19625 constexpr const char *Tag = "amdgpu-as";
19626
19627 LLVMContext &Ctx = Inst->getContext();
19629 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19630 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19631 StringRef AS;
19632 if (llvm::getConstantStringInfo(V, AS)) {
19633 MMRAs.push_back({Tag, AS});
19634 // TODO: Delete the resulting unused constant?
19635 continue;
19636 }
19637 CGM.Error(E->getExprLoc(),
19638 "expected an address space name as a string literal");
19639 }
19640
19641 llvm::sort(MMRAs);
19642 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19643 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19644}
19645
19647 const CallExpr *E) {
19648 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19649 llvm::SyncScope::ID SSID;
19650 switch (BuiltinID) {
19651 case AMDGPU::BI__builtin_amdgcn_div_scale:
19652 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19653 // Translate from the intrinsics's struct return to the builtin's out
19654 // argument.
19655
19656 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19657
19658 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19659 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19660 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19661
19662 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19663 X->getType());
19664
19665 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19666
19667 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19668 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19669
19670 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19671
19672 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19673 Builder.CreateStore(FlagExt, FlagOutPtr);
19674 return Result;
19675 }
19676 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19677 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19678 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19679 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19680 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19681 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19682
19683 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19684 Src0->getType());
19685 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19686 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19687 }
19688
19689 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19690 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19691 Intrinsic::amdgcn_ds_swizzle);
19692 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19693 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19694 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19696 // Find out if any arguments are required to be integer constant
19697 // expressions.
19698 unsigned ICEArguments = 0;
19700 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19701 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19702 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19703 unsigned Size = DataTy->getPrimitiveSizeInBits();
19704 llvm::Type *IntTy =
19705 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19706 Function *F =
19707 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19708 ? Intrinsic::amdgcn_mov_dpp8
19709 : Intrinsic::amdgcn_update_dpp,
19710 IntTy);
19711 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19712 E->getNumArgs() == 2);
19713 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19714 if (InsertOld)
19715 Args.push_back(llvm::PoisonValue::get(IntTy));
19716 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19717 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19718 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19719 Size < 32) {
19720 if (!DataTy->isIntegerTy())
19721 V = Builder.CreateBitCast(
19722 V, llvm::IntegerType::get(Builder.getContext(), Size));
19723 V = Builder.CreateZExtOrBitCast(V, IntTy);
19724 }
19725 llvm::Type *ExpTy =
19726 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19727 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19728 }
19729 Value *V = Builder.CreateCall(F, Args);
19730 if (Size < 32 && !DataTy->isIntegerTy())
19731 V = Builder.CreateTrunc(
19732 V, llvm::IntegerType::get(Builder.getContext(), Size));
19733 return Builder.CreateTruncOrBitCast(V, DataTy);
19734 }
19735 case AMDGPU::BI__builtin_amdgcn_permlane16:
19736 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19737 return emitBuiltinWithOneOverloadedType<6>(
19738 *this, E,
19739 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19740 ? Intrinsic::amdgcn_permlane16
19741 : Intrinsic::amdgcn_permlanex16);
19742 case AMDGPU::BI__builtin_amdgcn_permlane64:
19743 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19744 Intrinsic::amdgcn_permlane64);
19745 case AMDGPU::BI__builtin_amdgcn_readlane:
19746 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19747 Intrinsic::amdgcn_readlane);
19748 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19749 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19750 Intrinsic::amdgcn_readfirstlane);
19751 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19752 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19753 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19754 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19755 Intrinsic::amdgcn_div_fixup);
19756 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19757 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19758 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19759 case AMDGPU::BI__builtin_amdgcn_rcp:
19760 case AMDGPU::BI__builtin_amdgcn_rcpf:
19761 case AMDGPU::BI__builtin_amdgcn_rcph:
19762 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19763 case AMDGPU::BI__builtin_amdgcn_sqrt:
19764 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19765 case AMDGPU::BI__builtin_amdgcn_sqrth:
19766 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19767 Intrinsic::amdgcn_sqrt);
19768 case AMDGPU::BI__builtin_amdgcn_rsq:
19769 case AMDGPU::BI__builtin_amdgcn_rsqf:
19770 case AMDGPU::BI__builtin_amdgcn_rsqh:
19771 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19772 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19773 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19774 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19775 Intrinsic::amdgcn_rsq_clamp);
19776 case AMDGPU::BI__builtin_amdgcn_sinf:
19777 case AMDGPU::BI__builtin_amdgcn_sinh:
19778 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19779 case AMDGPU::BI__builtin_amdgcn_cosf:
19780 case AMDGPU::BI__builtin_amdgcn_cosh:
19781 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19782 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19783 return EmitAMDGPUDispatchPtr(*this, E);
19784 case AMDGPU::BI__builtin_amdgcn_logf:
19785 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19786 case AMDGPU::BI__builtin_amdgcn_exp2f:
19787 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19788 Intrinsic::amdgcn_exp2);
19789 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19790 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19791 Intrinsic::amdgcn_log_clamp);
19792 case AMDGPU::BI__builtin_amdgcn_ldexp:
19793 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19794 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19795 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19796 llvm::Function *F =
19797 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19798 return Builder.CreateCall(F, {Src0, Src1});
19799 }
19800 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19801 // The raw instruction has a different behavior for out of bounds exponent
19802 // values (implicit truncation instead of saturate to short_min/short_max).
19803 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19804 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19805 llvm::Function *F =
19806 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19807 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19808 }
19809 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19810 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19811 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19812 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19813 Intrinsic::amdgcn_frexp_mant);
19814 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19815 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19816 Value *Src0 = EmitScalarExpr(E->getArg(0));
19817 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19818 { Builder.getInt32Ty(), Src0->getType() });
19819 return Builder.CreateCall(F, Src0);
19820 }
19821 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19822 Value *Src0 = EmitScalarExpr(E->getArg(0));
19823 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19824 { Builder.getInt16Ty(), Src0->getType() });
19825 return Builder.CreateCall(F, Src0);
19826 }
19827 case AMDGPU::BI__builtin_amdgcn_fract:
19828 case AMDGPU::BI__builtin_amdgcn_fractf:
19829 case AMDGPU::BI__builtin_amdgcn_fracth:
19830 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19831 Intrinsic::amdgcn_fract);
19832 case AMDGPU::BI__builtin_amdgcn_lerp:
19833 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19834 Intrinsic::amdgcn_lerp);
19835 case AMDGPU::BI__builtin_amdgcn_ubfe:
19836 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19837 Intrinsic::amdgcn_ubfe);
19838 case AMDGPU::BI__builtin_amdgcn_sbfe:
19839 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19840 Intrinsic::amdgcn_sbfe);
19841 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19842 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19843 llvm::Type *ResultType = ConvertType(E->getType());
19844 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19845 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19846 return Builder.CreateCall(F, { Src });
19847 }
19848 case AMDGPU::BI__builtin_amdgcn_uicmp:
19849 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19850 case AMDGPU::BI__builtin_amdgcn_sicmp:
19851 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19852 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19853 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19854 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19855
19856 // FIXME-GFX10: How should 32 bit mask be handled?
19857 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19858 { Builder.getInt64Ty(), Src0->getType() });
19859 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19860 }
19861 case AMDGPU::BI__builtin_amdgcn_fcmp:
19862 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19863 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19864 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19865 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19866
19867 // FIXME-GFX10: How should 32 bit mask be handled?
19868 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19869 { Builder.getInt64Ty(), Src0->getType() });
19870 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19871 }
19872 case AMDGPU::BI__builtin_amdgcn_class:
19873 case AMDGPU::BI__builtin_amdgcn_classf:
19874 case AMDGPU::BI__builtin_amdgcn_classh:
19875 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19876 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19877 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19878 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19879 Intrinsic::amdgcn_fmed3);
19880 case AMDGPU::BI__builtin_amdgcn_ds_append:
19881 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19882 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19883 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19884 Value *Src0 = EmitScalarExpr(E->getArg(0));
19885 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19886 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19887 }
19888 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19889 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19890 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19891 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19892 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19893 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19894 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19895 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19896 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19897 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19898 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19899 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19900 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19901 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19902 Intrinsic::ID IID;
19903 switch (BuiltinID) {
19904 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19905 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19906 IID = Intrinsic::amdgcn_global_load_tr_b64;
19907 break;
19908 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19909 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19910 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19911 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19912 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19913 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19914 IID = Intrinsic::amdgcn_global_load_tr_b128;
19915 break;
19916 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19917 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19918 break;
19919 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19920 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19921 break;
19922 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19923 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19924 break;
19925 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19926 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19927 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19928 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19929 break;
19930 }
19931 llvm::Type *LoadTy = ConvertType(E->getType());
19932 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
19933 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
19934 return Builder.CreateCall(F, {Addr});
19935 }
19936 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19937 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
19938 {llvm::Type::getInt64Ty(getLLVMContext())});
19939 return Builder.CreateCall(F);
19940 }
19941 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19942 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
19943 {llvm::Type::getInt64Ty(getLLVMContext())});
19944 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
19945 return Builder.CreateCall(F, {Env});
19946 }
19947 case AMDGPU::BI__builtin_amdgcn_read_exec:
19948 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
19949 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19950 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
19951 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19952 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
19953 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19954 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19955 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19956 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19957 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
19958 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
19959 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
19960 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
19961 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
19962 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
19963
19964 // The builtins take these arguments as vec4 where the last element is
19965 // ignored. The intrinsic takes them as vec3.
19966 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19967 ArrayRef<int>{0, 1, 2});
19968 RayDir =
19969 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
19970 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19971 ArrayRef<int>{0, 1, 2});
19972
19973 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
19974 {NodePtr->getType(), RayDir->getType()});
19975 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19976 RayInverseDir, TextureDescr});
19977 }
19978
19979 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19981 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19982 Args.push_back(EmitScalarExpr(E->getArg(i)));
19983
19984 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19985 Value *Call = Builder.CreateCall(F, Args);
19986 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19987 Value *A = Builder.CreateExtractValue(Call, 1);
19988 llvm::Type *RetTy = ConvertType(E->getType());
19989 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19990 (uint64_t)0);
19991 return Builder.CreateInsertElement(I0, A, 1);
19992 }
19993 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
19994 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
19995 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
19997 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19998 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19999 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
20000 {VT, VT});
20001
20003 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
20004 Args.push_back(EmitScalarExpr(E->getArg(I)));
20005 return Builder.CreateCall(F, Args);
20006 }
20007 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20008 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20009 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20010 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20011 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20012 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20013 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20014 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20015 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20016 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20017 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20018 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20019 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20020 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20021 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20022 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20023 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20024 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20025 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20026 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20027 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20028 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20029 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20030 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20031 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20032 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20033 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20034 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20035 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20036 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20037 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20038 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20039 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20040 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20041 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20042 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20043 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20044 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20045 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20046 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20047 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20048 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20049 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20050 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20051 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20052 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20053 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20054 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20055 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20056 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20057 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20058 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20059 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20060 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20061 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20062 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20063 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20064 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20065 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20066 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
20067
20068 // These operations perform a matrix multiplication and accumulation of
20069 // the form:
20070 // D = A * B + C
20071 // We need to specify one type for matrices AB and one for matrices CD.
20072 // Sparse matrix operations can have different types for A and B as well as
20073 // an additional type for sparsity index.
20074 // Destination type should be put before types used for source operands.
20075 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
20076 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
20077 // There is no need for the variable opsel argument, so always set it to
20078 // "false".
20079 bool AppendFalseForOpselArg = false;
20080 unsigned BuiltinWMMAOp;
20081
20082 switch (BuiltinID) {
20083 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20084 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20085 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20086 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20087 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20088 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20089 break;
20090 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20091 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20092 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20093 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20094 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20095 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20096 break;
20097 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20098 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20099 AppendFalseForOpselArg = true;
20100 [[fallthrough]];
20101 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20102 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20103 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20104 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20105 break;
20106 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20107 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20108 AppendFalseForOpselArg = true;
20109 [[fallthrough]];
20110 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20111 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20112 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20113 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20114 break;
20115 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20116 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20117 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20118 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20119 break;
20120 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20121 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20122 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20123 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20124 break;
20125 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20126 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20127 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20128 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20129 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20130 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20131 break;
20132 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20133 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20134 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20135 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20136 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20137 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20138 break;
20139 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20140 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20141 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20142 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20143 break;
20144 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20145 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20146 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20147 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20148 break;
20149 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20150 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20151 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20152 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20153 break;
20154 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20155 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20156 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20157 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20158 break;
20159 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20160 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20161 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20162 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20163 break;
20164 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20165 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20166 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20167 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20168 break;
20169 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20170 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20171 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20172 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20173 break;
20174 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20175 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20176 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20177 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20178 break;
20179 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20180 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20181 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20182 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20183 break;
20184 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20185 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20186 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20187 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20188 break;
20189 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20190 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20191 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20192 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20193 break;
20194 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20195 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20196 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20197 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20198 break;
20199 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20200 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20201 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20202 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20203 break;
20204 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20205 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20206 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20207 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20208 break;
20209 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20210 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20211 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20212 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20213 break;
20214 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20215 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20216 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20217 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20218 break;
20219 }
20220
20222 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20223 Args.push_back(EmitScalarExpr(E->getArg(i)));
20224 if (AppendFalseForOpselArg)
20225 Args.push_back(Builder.getFalse());
20226
20228 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20229 ArgTypes.push_back(Args[ArgIdx]->getType());
20230
20231 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20232 return Builder.CreateCall(F, Args);
20233 }
20234
20235 // amdgcn workitem
20236 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20237 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20238 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20239 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20240 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20241 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20242
20243 // amdgcn workgroup size
20244 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20245 return EmitAMDGPUWorkGroupSize(*this, 0);
20246 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20247 return EmitAMDGPUWorkGroupSize(*this, 1);
20248 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20249 return EmitAMDGPUWorkGroupSize(*this, 2);
20250
20251 // amdgcn grid size
20252 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20253 return EmitAMDGPUGridSize(*this, 0);
20254 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20255 return EmitAMDGPUGridSize(*this, 1);
20256 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20257 return EmitAMDGPUGridSize(*this, 2);
20258
20259 // r600 intrinsics
20260 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20261 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20262 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20263 Intrinsic::r600_recipsqrt_ieee);
20264 case AMDGPU::BI__builtin_r600_read_tidig_x:
20265 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20266 case AMDGPU::BI__builtin_r600_read_tidig_y:
20267 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20268 case AMDGPU::BI__builtin_r600_read_tidig_z:
20269 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20270 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20271 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20272 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20273 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20274 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20275 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20276 }
20277 case AMDGPU::BI__builtin_amdgcn_fence: {
20279 EmitScalarExpr(E->getArg(1)), AO, SSID);
20280 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20281 if (E->getNumArgs() > 2)
20283 return Fence;
20284 }
20285 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20286 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20287 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20288 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20289 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20290 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20291 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20292 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20293 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20294 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20295 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20296 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20297 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20298 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20299 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20300 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20301 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20302 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20303 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20304 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20305 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20306 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20307 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20308 llvm::AtomicRMWInst::BinOp BinOp;
20309 switch (BuiltinID) {
20310 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20311 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20312 BinOp = llvm::AtomicRMWInst::UIncWrap;
20313 break;
20314 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20315 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20316 BinOp = llvm::AtomicRMWInst::UDecWrap;
20317 break;
20318 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20319 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20320 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20321 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20322 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20323 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20324 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20325 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20326 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20327 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20328 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20329 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20330 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20331 BinOp = llvm::AtomicRMWInst::FAdd;
20332 break;
20333 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20334 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20335 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20336 BinOp = llvm::AtomicRMWInst::FMin;
20337 break;
20338 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20339 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20340 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20341 BinOp = llvm::AtomicRMWInst::FMax;
20342 break;
20343 }
20344
20345 Address Ptr = CheckAtomicAlignment(*this, E);
20346 Value *Val = EmitScalarExpr(E->getArg(1));
20347 llvm::Type *OrigTy = Val->getType();
20348 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20349
20350 bool Volatile;
20351
20352 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20353 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20354 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20355 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20356 Volatile =
20357 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20358 } else {
20359 // Infer volatile from the passed type.
20360 Volatile =
20362 }
20363
20364 if (E->getNumArgs() >= 4) {
20365 // Some of the builtins have explicit ordering and scope arguments.
20367 EmitScalarExpr(E->getArg(3)), AO, SSID);
20368 } else {
20369 // Most of the builtins do not have syncscope/order arguments. For DS
20370 // atomics the scope doesn't really matter, as they implicitly operate at
20371 // workgroup scope.
20372 //
20373 // The global/flat cases need to use agent scope to consistently produce
20374 // the native instruction instead of a cmpxchg expansion.
20375 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20376 AO = AtomicOrdering::Monotonic;
20377
20378 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20379 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20380 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20381 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20382 llvm::Type *V2BF16Ty = FixedVectorType::get(
20383 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20384 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20385 }
20386 }
20387
20388 llvm::AtomicRMWInst *RMW =
20389 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20390 if (Volatile)
20391 RMW->setVolatile(true);
20392
20393 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20394 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20395 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20396 // instruction for flat and global operations.
20397 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20398 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20399
20400 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20401 // instruction, but this only matters for float fadd.
20402 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20403 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20404 }
20405
20406 return Builder.CreateBitCast(RMW, OrigTy);
20407 }
20408 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20409 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20410 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20411 llvm::Type *ResultType = ConvertType(E->getType());
20412 // s_sendmsg_rtn is mangled using return type only.
20413 Function *F =
20414 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20415 return Builder.CreateCall(F, {Arg});
20416 }
20417 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20418 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20419 // Because builtin types are limited, and the intrinsic uses a struct/pair
20420 // output, marshal the pair-of-i32 to <2 x i32>.
20421 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20422 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20423 Value *FI = EmitScalarExpr(E->getArg(2));
20424 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20425 Function *F =
20426 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20427 ? Intrinsic::amdgcn_permlane16_swap
20428 : Intrinsic::amdgcn_permlane32_swap);
20429 llvm::CallInst *Call =
20430 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20431
20432 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20433 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20434
20435 llvm::Type *ResultType = ConvertType(E->getType());
20436
20437 llvm::Value *Insert0 = Builder.CreateInsertElement(
20438 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20439 llvm::Value *AsVector =
20440 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20441 return AsVector;
20442 }
20443 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20444 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20445 return emitQuaternaryBuiltin(*this, E, Intrinsic::amdgcn_bitop3);
20446 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20447 return emitBuiltinWithOneOverloadedType<4>(
20448 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20449 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20450 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20451 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20452 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20453 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20454 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20455 return emitBuiltinWithOneOverloadedType<5>(
20456 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20457 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20458 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20459 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20460 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20461 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20462 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20463 llvm::Type *RetTy = nullptr;
20464 switch (BuiltinID) {
20465 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20466 RetTy = Int8Ty;
20467 break;
20468 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20469 RetTy = Int16Ty;
20470 break;
20471 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20472 RetTy = Int32Ty;
20473 break;
20474 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20475 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20476 break;
20477 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20478 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20479 break;
20480 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20481 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20482 break;
20483 }
20484 Function *F =
20485 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20486 return Builder.CreateCall(
20487 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20488 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20489 }
20490 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20491 return emitBuiltinWithOneOverloadedType<2>(
20492 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20493 default:
20494 return nullptr;
20495 }
20496}
20497
20499 const CallExpr *E) {
20500 switch (BuiltinID) {
20501 case SPIRV::BI__builtin_spirv_distance: {
20502 Value *X = EmitScalarExpr(E->getArg(0));
20503 Value *Y = EmitScalarExpr(E->getArg(1));
20504 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
20505 E->getArg(1)->getType()->hasFloatingRepresentation() &&
20506 "Distance operands must have a float representation");
20507 assert(E->getArg(0)->getType()->isVectorType() &&
20508 E->getArg(1)->getType()->isVectorType() &&
20509 "Distance operands must be a vector");
20510 return Builder.CreateIntrinsic(
20511 /*ReturnType=*/X->getType()->getScalarType(), Intrinsic::spv_distance,
20512 ArrayRef<Value *>{X, Y}, nullptr, "spv.distance");
20513 }
20514 }
20515 return nullptr;
20516}
20517
20518/// Handle a SystemZ function in which the final argument is a pointer
20519/// to an int that receives the post-instruction CC value. At the LLVM level
20520/// this is represented as a function that returns a {result, cc} pair.
20522 unsigned IntrinsicID,
20523 const CallExpr *E) {
20524 unsigned NumArgs = E->getNumArgs() - 1;
20525 SmallVector<Value *, 8> Args(NumArgs);
20526 for (unsigned I = 0; I < NumArgs; ++I)
20527 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20528 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20529 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20530 Value *Call = CGF.Builder.CreateCall(F, Args);
20531 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20532 CGF.Builder.CreateStore(CC, CCPtr);
20533 return CGF.Builder.CreateExtractValue(Call, 0);
20534}
20535
20537 const CallExpr *E) {
20538 switch (BuiltinID) {
20539 case SystemZ::BI__builtin_tbegin: {
20540 Value *TDB = EmitScalarExpr(E->getArg(0));
20541 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20542 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20543 return Builder.CreateCall(F, {TDB, Control});
20544 }
20545 case SystemZ::BI__builtin_tbegin_nofloat: {
20546 Value *TDB = EmitScalarExpr(E->getArg(0));
20547 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20548 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20549 return Builder.CreateCall(F, {TDB, Control});
20550 }
20551 case SystemZ::BI__builtin_tbeginc: {
20552 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20553 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20554 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20555 return Builder.CreateCall(F, {TDB, Control});
20556 }
20557 case SystemZ::BI__builtin_tabort: {
20558 Value *Data = EmitScalarExpr(E->getArg(0));
20559 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20560 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20561 }
20562 case SystemZ::BI__builtin_non_tx_store: {
20563 Value *Address = EmitScalarExpr(E->getArg(0));
20564 Value *Data = EmitScalarExpr(E->getArg(1));
20565 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20566 return Builder.CreateCall(F, {Data, Address});
20567 }
20568
20569 // Vector builtins. Note that most vector builtins are mapped automatically
20570 // to target-specific LLVM intrinsics. The ones handled specially here can
20571 // be represented via standard LLVM IR, which is preferable to enable common
20572 // LLVM optimizations.
20573
20574 case SystemZ::BI__builtin_s390_vclzb:
20575 case SystemZ::BI__builtin_s390_vclzh:
20576 case SystemZ::BI__builtin_s390_vclzf:
20577 case SystemZ::BI__builtin_s390_vclzg: {
20578 llvm::Type *ResultType = ConvertType(E->getType());
20579 Value *X = EmitScalarExpr(E->getArg(0));
20580 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20581 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20582 return Builder.CreateCall(F, {X, Undef});
20583 }
20584
20585 case SystemZ::BI__builtin_s390_vctzb:
20586 case SystemZ::BI__builtin_s390_vctzh:
20587 case SystemZ::BI__builtin_s390_vctzf:
20588 case SystemZ::BI__builtin_s390_vctzg: {
20589 llvm::Type *ResultType = ConvertType(E->getType());
20590 Value *X = EmitScalarExpr(E->getArg(0));
20591 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20592 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20593 return Builder.CreateCall(F, {X, Undef});
20594 }
20595
20596 case SystemZ::BI__builtin_s390_verllb:
20597 case SystemZ::BI__builtin_s390_verllh:
20598 case SystemZ::BI__builtin_s390_verllf:
20599 case SystemZ::BI__builtin_s390_verllg: {
20600 llvm::Type *ResultType = ConvertType(E->getType());
20601 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20602 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20603 // Splat scalar rotate amount to vector type.
20604 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20605 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20606 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20607 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20608 return Builder.CreateCall(F, { Src, Src, Amt });
20609 }
20610
20611 case SystemZ::BI__builtin_s390_verllvb:
20612 case SystemZ::BI__builtin_s390_verllvh:
20613 case SystemZ::BI__builtin_s390_verllvf:
20614 case SystemZ::BI__builtin_s390_verllvg: {
20615 llvm::Type *ResultType = ConvertType(E->getType());
20616 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20617 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20618 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20619 return Builder.CreateCall(F, { Src, Src, Amt });
20620 }
20621
20622 case SystemZ::BI__builtin_s390_vfsqsb:
20623 case SystemZ::BI__builtin_s390_vfsqdb: {
20624 llvm::Type *ResultType = ConvertType(E->getType());
20625 Value *X = EmitScalarExpr(E->getArg(0));
20626 if (Builder.getIsFPConstrained()) {
20627 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20628 return Builder.CreateConstrainedFPCall(F, { X });
20629 } else {
20630 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20631 return Builder.CreateCall(F, X);
20632 }
20633 }
20634 case SystemZ::BI__builtin_s390_vfmasb:
20635 case SystemZ::BI__builtin_s390_vfmadb: {
20636 llvm::Type *ResultType = ConvertType(E->getType());
20637 Value *X = EmitScalarExpr(E->getArg(0));
20638 Value *Y = EmitScalarExpr(E->getArg(1));
20639 Value *Z = EmitScalarExpr(E->getArg(2));
20640 if (Builder.getIsFPConstrained()) {
20641 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20642 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20643 } else {
20644 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20645 return Builder.CreateCall(F, {X, Y, Z});
20646 }
20647 }
20648 case SystemZ::BI__builtin_s390_vfmssb:
20649 case SystemZ::BI__builtin_s390_vfmsdb: {
20650 llvm::Type *ResultType = ConvertType(E->getType());
20651 Value *X = EmitScalarExpr(E->getArg(0));
20652 Value *Y = EmitScalarExpr(E->getArg(1));
20653 Value *Z = EmitScalarExpr(E->getArg(2));
20654 if (Builder.getIsFPConstrained()) {
20655 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20656 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20657 } else {
20658 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20659 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20660 }
20661 }
20662 case SystemZ::BI__builtin_s390_vfnmasb:
20663 case SystemZ::BI__builtin_s390_vfnmadb: {
20664 llvm::Type *ResultType = ConvertType(E->getType());
20665 Value *X = EmitScalarExpr(E->getArg(0));
20666 Value *Y = EmitScalarExpr(E->getArg(1));
20667 Value *Z = EmitScalarExpr(E->getArg(2));
20668 if (Builder.getIsFPConstrained()) {
20669 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20670 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20671 } else {
20672 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20673 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20674 }
20675 }
20676 case SystemZ::BI__builtin_s390_vfnmssb:
20677 case SystemZ::BI__builtin_s390_vfnmsdb: {
20678 llvm::Type *ResultType = ConvertType(E->getType());
20679 Value *X = EmitScalarExpr(E->getArg(0));
20680 Value *Y = EmitScalarExpr(E->getArg(1));
20681 Value *Z = EmitScalarExpr(E->getArg(2));
20682 if (Builder.getIsFPConstrained()) {
20683 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20684 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20685 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20686 } else {
20687 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20688 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20689 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20690 }
20691 }
20692 case SystemZ::BI__builtin_s390_vflpsb:
20693 case SystemZ::BI__builtin_s390_vflpdb: {
20694 llvm::Type *ResultType = ConvertType(E->getType());
20695 Value *X = EmitScalarExpr(E->getArg(0));
20696 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20697 return Builder.CreateCall(F, X);
20698 }
20699 case SystemZ::BI__builtin_s390_vflnsb:
20700 case SystemZ::BI__builtin_s390_vflndb: {
20701 llvm::Type *ResultType = ConvertType(E->getType());
20702 Value *X = EmitScalarExpr(E->getArg(0));
20703 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20704 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20705 }
20706 case SystemZ::BI__builtin_s390_vfisb:
20707 case SystemZ::BI__builtin_s390_vfidb: {
20708 llvm::Type *ResultType = ConvertType(E->getType());
20709 Value *X = EmitScalarExpr(E->getArg(0));
20710 // Constant-fold the M4 and M5 mask arguments.
20711 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20712 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20713 // Check whether this instance can be represented via a LLVM standard
20714 // intrinsic. We only support some combinations of M4 and M5.
20715 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20716 Intrinsic::ID CI;
20717 switch (M4.getZExtValue()) {
20718 default: break;
20719 case 0: // IEEE-inexact exception allowed
20720 switch (M5.getZExtValue()) {
20721 default: break;
20722 case 0: ID = Intrinsic::rint;
20723 CI = Intrinsic::experimental_constrained_rint; break;
20724 }
20725 break;
20726 case 4: // IEEE-inexact exception suppressed
20727 switch (M5.getZExtValue()) {
20728 default: break;
20729 case 0: ID = Intrinsic::nearbyint;
20730 CI = Intrinsic::experimental_constrained_nearbyint; break;
20731 case 1: ID = Intrinsic::round;
20732 CI = Intrinsic::experimental_constrained_round; break;
20733 case 5: ID = Intrinsic::trunc;
20734 CI = Intrinsic::experimental_constrained_trunc; break;
20735 case 6: ID = Intrinsic::ceil;
20736 CI = Intrinsic::experimental_constrained_ceil; break;
20737 case 7: ID = Intrinsic::floor;
20738 CI = Intrinsic::experimental_constrained_floor; break;
20739 }
20740 break;
20741 }
20742 if (ID != Intrinsic::not_intrinsic) {
20743 if (Builder.getIsFPConstrained()) {
20744 Function *F = CGM.getIntrinsic(CI, ResultType);
20745 return Builder.CreateConstrainedFPCall(F, X);
20746 } else {
20747 Function *F = CGM.getIntrinsic(ID, ResultType);
20748 return Builder.CreateCall(F, X);
20749 }
20750 }
20751 switch (BuiltinID) { // FIXME: constrained version?
20752 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20753 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20754 default: llvm_unreachable("Unknown BuiltinID");
20755 }
20756 Function *F = CGM.getIntrinsic(ID);
20757 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20758 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20759 return Builder.CreateCall(F, {X, M4Value, M5Value});
20760 }
20761 case SystemZ::BI__builtin_s390_vfmaxsb:
20762 case SystemZ::BI__builtin_s390_vfmaxdb: {
20763 llvm::Type *ResultType = ConvertType(E->getType());
20764 Value *X = EmitScalarExpr(E->getArg(0));
20765 Value *Y = EmitScalarExpr(E->getArg(1));
20766 // Constant-fold the M4 mask argument.
20767 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20768 // Check whether this instance can be represented via a LLVM standard
20769 // intrinsic. We only support some values of M4.
20770 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20771 Intrinsic::ID CI;
20772 switch (M4.getZExtValue()) {
20773 default: break;
20774 case 4: ID = Intrinsic::maxnum;
20775 CI = Intrinsic::experimental_constrained_maxnum; break;
20776 }
20777 if (ID != Intrinsic::not_intrinsic) {
20778 if (Builder.getIsFPConstrained()) {
20779 Function *F = CGM.getIntrinsic(CI, ResultType);
20780 return Builder.CreateConstrainedFPCall(F, {X, Y});
20781 } else {
20782 Function *F = CGM.getIntrinsic(ID, ResultType);
20783 return Builder.CreateCall(F, {X, Y});
20784 }
20785 }
20786 switch (BuiltinID) {
20787 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20788 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20789 default: llvm_unreachable("Unknown BuiltinID");
20790 }
20791 Function *F = CGM.getIntrinsic(ID);
20792 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20793 return Builder.CreateCall(F, {X, Y, M4Value});
20794 }
20795 case SystemZ::BI__builtin_s390_vfminsb:
20796 case SystemZ::BI__builtin_s390_vfmindb: {
20797 llvm::Type *ResultType = ConvertType(E->getType());
20798 Value *X = EmitScalarExpr(E->getArg(0));
20799 Value *Y = EmitScalarExpr(E->getArg(1));
20800 // Constant-fold the M4 mask argument.
20801 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20802 // Check whether this instance can be represented via a LLVM standard
20803 // intrinsic. We only support some values of M4.
20804 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20805 Intrinsic::ID CI;
20806 switch (M4.getZExtValue()) {
20807 default: break;
20808 case 4: ID = Intrinsic::minnum;
20809 CI = Intrinsic::experimental_constrained_minnum; break;
20810 }
20811 if (ID != Intrinsic::not_intrinsic) {
20812 if (Builder.getIsFPConstrained()) {
20813 Function *F = CGM.getIntrinsic(CI, ResultType);
20814 return Builder.CreateConstrainedFPCall(F, {X, Y});
20815 } else {
20816 Function *F = CGM.getIntrinsic(ID, ResultType);
20817 return Builder.CreateCall(F, {X, Y});
20818 }
20819 }
20820 switch (BuiltinID) {
20821 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20822 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20823 default: llvm_unreachable("Unknown BuiltinID");
20824 }
20825 Function *F = CGM.getIntrinsic(ID);
20826 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20827 return Builder.CreateCall(F, {X, Y, M4Value});
20828 }
20829
20830 case SystemZ::BI__builtin_s390_vlbrh:
20831 case SystemZ::BI__builtin_s390_vlbrf:
20832 case SystemZ::BI__builtin_s390_vlbrg: {
20833 llvm::Type *ResultType = ConvertType(E->getType());
20834 Value *X = EmitScalarExpr(E->getArg(0));
20835 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20836 return Builder.CreateCall(F, X);
20837 }
20838
20839 // Vector intrinsics that output the post-instruction CC value.
20840
20841#define INTRINSIC_WITH_CC(NAME) \
20842 case SystemZ::BI__builtin_##NAME: \
20843 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20844
20845 INTRINSIC_WITH_CC(s390_vpkshs);
20846 INTRINSIC_WITH_CC(s390_vpksfs);
20847 INTRINSIC_WITH_CC(s390_vpksgs);
20848
20849 INTRINSIC_WITH_CC(s390_vpklshs);
20850 INTRINSIC_WITH_CC(s390_vpklsfs);
20851 INTRINSIC_WITH_CC(s390_vpklsgs);
20852
20853 INTRINSIC_WITH_CC(s390_vceqbs);
20854 INTRINSIC_WITH_CC(s390_vceqhs);
20855 INTRINSIC_WITH_CC(s390_vceqfs);
20856 INTRINSIC_WITH_CC(s390_vceqgs);
20857
20858 INTRINSIC_WITH_CC(s390_vchbs);
20859 INTRINSIC_WITH_CC(s390_vchhs);
20860 INTRINSIC_WITH_CC(s390_vchfs);
20861 INTRINSIC_WITH_CC(s390_vchgs);
20862
20863 INTRINSIC_WITH_CC(s390_vchlbs);
20864 INTRINSIC_WITH_CC(s390_vchlhs);
20865 INTRINSIC_WITH_CC(s390_vchlfs);
20866 INTRINSIC_WITH_CC(s390_vchlgs);
20867
20868 INTRINSIC_WITH_CC(s390_vfaebs);
20869 INTRINSIC_WITH_CC(s390_vfaehs);
20870 INTRINSIC_WITH_CC(s390_vfaefs);
20871
20872 INTRINSIC_WITH_CC(s390_vfaezbs);
20873 INTRINSIC_WITH_CC(s390_vfaezhs);
20874 INTRINSIC_WITH_CC(s390_vfaezfs);
20875
20876 INTRINSIC_WITH_CC(s390_vfeebs);
20877 INTRINSIC_WITH_CC(s390_vfeehs);
20878 INTRINSIC_WITH_CC(s390_vfeefs);
20879
20880 INTRINSIC_WITH_CC(s390_vfeezbs);
20881 INTRINSIC_WITH_CC(s390_vfeezhs);
20882 INTRINSIC_WITH_CC(s390_vfeezfs);
20883
20884 INTRINSIC_WITH_CC(s390_vfenebs);
20885 INTRINSIC_WITH_CC(s390_vfenehs);
20886 INTRINSIC_WITH_CC(s390_vfenefs);
20887
20888 INTRINSIC_WITH_CC(s390_vfenezbs);
20889 INTRINSIC_WITH_CC(s390_vfenezhs);
20890 INTRINSIC_WITH_CC(s390_vfenezfs);
20891
20892 INTRINSIC_WITH_CC(s390_vistrbs);
20893 INTRINSIC_WITH_CC(s390_vistrhs);
20894 INTRINSIC_WITH_CC(s390_vistrfs);
20895
20896 INTRINSIC_WITH_CC(s390_vstrcbs);
20897 INTRINSIC_WITH_CC(s390_vstrchs);
20898 INTRINSIC_WITH_CC(s390_vstrcfs);
20899
20900 INTRINSIC_WITH_CC(s390_vstrczbs);
20901 INTRINSIC_WITH_CC(s390_vstrczhs);
20902 INTRINSIC_WITH_CC(s390_vstrczfs);
20903
20904 INTRINSIC_WITH_CC(s390_vfcesbs);
20905 INTRINSIC_WITH_CC(s390_vfcedbs);
20906 INTRINSIC_WITH_CC(s390_vfchsbs);
20907 INTRINSIC_WITH_CC(s390_vfchdbs);
20908 INTRINSIC_WITH_CC(s390_vfchesbs);
20909 INTRINSIC_WITH_CC(s390_vfchedbs);
20910
20911 INTRINSIC_WITH_CC(s390_vftcisb);
20912 INTRINSIC_WITH_CC(s390_vftcidb);
20913
20914 INTRINSIC_WITH_CC(s390_vstrsb);
20915 INTRINSIC_WITH_CC(s390_vstrsh);
20916 INTRINSIC_WITH_CC(s390_vstrsf);
20917
20918 INTRINSIC_WITH_CC(s390_vstrszb);
20919 INTRINSIC_WITH_CC(s390_vstrszh);
20920 INTRINSIC_WITH_CC(s390_vstrszf);
20921
20922#undef INTRINSIC_WITH_CC
20923
20924 default:
20925 return nullptr;
20926 }
20927}
20928
20929namespace {
20930// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
20931struct NVPTXMmaLdstInfo {
20932 unsigned NumResults; // Number of elements to load/store
20933 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
20934 unsigned IID_col;
20935 unsigned IID_row;
20936};
20937
20938#define MMA_INTR(geom_op_type, layout) \
20939 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20940#define MMA_LDST(n, geom_op_type) \
20941 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20942
20943static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
20944 switch (BuiltinID) {
20945 // FP MMA loads
20946 case NVPTX::BI__hmma_m16n16k16_ld_a:
20947 return MMA_LDST(8, m16n16k16_load_a_f16);
20948 case NVPTX::BI__hmma_m16n16k16_ld_b:
20949 return MMA_LDST(8, m16n16k16_load_b_f16);
20950 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20951 return MMA_LDST(4, m16n16k16_load_c_f16);
20952 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20953 return MMA_LDST(8, m16n16k16_load_c_f32);
20954 case NVPTX::BI__hmma_m32n8k16_ld_a:
20955 return MMA_LDST(8, m32n8k16_load_a_f16);
20956 case NVPTX::BI__hmma_m32n8k16_ld_b:
20957 return MMA_LDST(8, m32n8k16_load_b_f16);
20958 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20959 return MMA_LDST(4, m32n8k16_load_c_f16);
20960 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20961 return MMA_LDST(8, m32n8k16_load_c_f32);
20962 case NVPTX::BI__hmma_m8n32k16_ld_a:
20963 return MMA_LDST(8, m8n32k16_load_a_f16);
20964 case NVPTX::BI__hmma_m8n32k16_ld_b:
20965 return MMA_LDST(8, m8n32k16_load_b_f16);
20966 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20967 return MMA_LDST(4, m8n32k16_load_c_f16);
20968 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20969 return MMA_LDST(8, m8n32k16_load_c_f32);
20970
20971 // Integer MMA loads
20972 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20973 return MMA_LDST(2, m16n16k16_load_a_s8);
20974 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20975 return MMA_LDST(2, m16n16k16_load_a_u8);
20976 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20977 return MMA_LDST(2, m16n16k16_load_b_s8);
20978 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20979 return MMA_LDST(2, m16n16k16_load_b_u8);
20980 case NVPTX::BI__imma_m16n16k16_ld_c:
20981 return MMA_LDST(8, m16n16k16_load_c_s32);
20982 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20983 return MMA_LDST(4, m32n8k16_load_a_s8);
20984 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20985 return MMA_LDST(4, m32n8k16_load_a_u8);
20986 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20987 return MMA_LDST(1, m32n8k16_load_b_s8);
20988 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20989 return MMA_LDST(1, m32n8k16_load_b_u8);
20990 case NVPTX::BI__imma_m32n8k16_ld_c:
20991 return MMA_LDST(8, m32n8k16_load_c_s32);
20992 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20993 return MMA_LDST(1, m8n32k16_load_a_s8);
20994 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20995 return MMA_LDST(1, m8n32k16_load_a_u8);
20996 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20997 return MMA_LDST(4, m8n32k16_load_b_s8);
20998 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20999 return MMA_LDST(4, m8n32k16_load_b_u8);
21000 case NVPTX::BI__imma_m8n32k16_ld_c:
21001 return MMA_LDST(8, m8n32k16_load_c_s32);
21002
21003 // Sub-integer MMA loads.
21004 // Only row/col layout is supported by A/B fragments.
21005 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21006 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
21007 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21008 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
21009 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21010 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
21011 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21012 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
21013 case NVPTX::BI__imma_m8n8k32_ld_c:
21014 return MMA_LDST(2, m8n8k32_load_c_s32);
21015 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21016 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
21017 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21018 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
21019 case NVPTX::BI__bmma_m8n8k128_ld_c:
21020 return MMA_LDST(2, m8n8k128_load_c_s32);
21021
21022 // Double MMA loads
21023 case NVPTX::BI__dmma_m8n8k4_ld_a:
21024 return MMA_LDST(1, m8n8k4_load_a_f64);
21025 case NVPTX::BI__dmma_m8n8k4_ld_b:
21026 return MMA_LDST(1, m8n8k4_load_b_f64);
21027 case NVPTX::BI__dmma_m8n8k4_ld_c:
21028 return MMA_LDST(2, m8n8k4_load_c_f64);
21029
21030 // Alternate float MMA loads
21031 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21032 return MMA_LDST(4, m16n16k16_load_a_bf16);
21033 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21034 return MMA_LDST(4, m16n16k16_load_b_bf16);
21035 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21036 return MMA_LDST(2, m8n32k16_load_a_bf16);
21037 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21038 return MMA_LDST(8, m8n32k16_load_b_bf16);
21039 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21040 return MMA_LDST(8, m32n8k16_load_a_bf16);
21041 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21042 return MMA_LDST(2, m32n8k16_load_b_bf16);
21043 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21044 return MMA_LDST(4, m16n16k8_load_a_tf32);
21045 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21046 return MMA_LDST(4, m16n16k8_load_b_tf32);
21047 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
21048 return MMA_LDST(8, m16n16k8_load_c_f32);
21049
21050 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
21051 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
21052 // use fragment C for both loads and stores.
21053 // FP MMA stores.
21054 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21055 return MMA_LDST(4, m16n16k16_store_d_f16);
21056 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21057 return MMA_LDST(8, m16n16k16_store_d_f32);
21058 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21059 return MMA_LDST(4, m32n8k16_store_d_f16);
21060 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21061 return MMA_LDST(8, m32n8k16_store_d_f32);
21062 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21063 return MMA_LDST(4, m8n32k16_store_d_f16);
21064 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21065 return MMA_LDST(8, m8n32k16_store_d_f32);
21066
21067 // Integer and sub-integer MMA stores.
21068 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
21069 // name, integer loads/stores use LLVM's i32.
21070 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21071 return MMA_LDST(8, m16n16k16_store_d_s32);
21072 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21073 return MMA_LDST(8, m32n8k16_store_d_s32);
21074 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21075 return MMA_LDST(8, m8n32k16_store_d_s32);
21076 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21077 return MMA_LDST(2, m8n8k32_store_d_s32);
21078 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21079 return MMA_LDST(2, m8n8k128_store_d_s32);
21080
21081 // Double MMA store
21082 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21083 return MMA_LDST(2, m8n8k4_store_d_f64);
21084
21085 // Alternate float MMA store
21086 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21087 return MMA_LDST(8, m16n16k8_store_d_f32);
21088
21089 default:
21090 llvm_unreachable("Unknown MMA builtin");
21091 }
21092}
21093#undef MMA_LDST
21094#undef MMA_INTR
21095
21096
21097struct NVPTXMmaInfo {
21098 unsigned NumEltsA;
21099 unsigned NumEltsB;
21100 unsigned NumEltsC;
21101 unsigned NumEltsD;
21102
21103 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21104 // over 'col' for layout. The index of non-satf variants is expected to match
21105 // the undocumented layout constants used by CUDA's mma.hpp.
21106 std::array<unsigned, 8> Variants;
21107
21108 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21109 unsigned Index = Layout + 4 * Satf;
21110 if (Index >= Variants.size())
21111 return 0;
21112 return Variants[Index];
21113 }
21114};
21115
21116 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21117 // Layout and Satf, 0 otherwise.
21118static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21119 // clang-format off
21120#define MMA_VARIANTS(geom, type) \
21121 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21122 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21123 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21124 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21125#define MMA_SATF_VARIANTS(geom, type) \
21126 MMA_VARIANTS(geom, type), \
21127 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21128 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21129 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21130 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21131// Sub-integer MMA only supports row.col layout.
21132#define MMA_VARIANTS_I4(geom, type) \
21133 0, \
21134 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21135 0, \
21136 0, \
21137 0, \
21138 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21139 0, \
21140 0
21141// b1 MMA does not support .satfinite.
21142#define MMA_VARIANTS_B1_XOR(geom, type) \
21143 0, \
21144 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21145 0, \
21146 0, \
21147 0, \
21148 0, \
21149 0, \
21150 0
21151#define MMA_VARIANTS_B1_AND(geom, type) \
21152 0, \
21153 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21154 0, \
21155 0, \
21156 0, \
21157 0, \
21158 0, \
21159 0
21160 // clang-format on
21161 switch (BuiltinID) {
21162 // FP MMA
21163 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21164 // NumEltsN of return value are ordered as A,B,C,D.
21165 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21166 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21167 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21168 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21169 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21170 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21171 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21172 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21173 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21174 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21175 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21176 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21177 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21178 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21179 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21180 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21181 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21182 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21183 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21184 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21185 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21186 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21187 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21188 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21189
21190 // Integer MMA
21191 case NVPTX::BI__imma_m16n16k16_mma_s8:
21192 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21193 case NVPTX::BI__imma_m16n16k16_mma_u8:
21194 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21195 case NVPTX::BI__imma_m32n8k16_mma_s8:
21196 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21197 case NVPTX::BI__imma_m32n8k16_mma_u8:
21198 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21199 case NVPTX::BI__imma_m8n32k16_mma_s8:
21200 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21201 case NVPTX::BI__imma_m8n32k16_mma_u8:
21202 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21203
21204 // Sub-integer MMA
21205 case NVPTX::BI__imma_m8n8k32_mma_s4:
21206 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21207 case NVPTX::BI__imma_m8n8k32_mma_u4:
21208 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21209 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21210 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21211 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21212 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21213
21214 // Double MMA
21215 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21216 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21217
21218 // Alternate FP MMA
21219 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21220 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21221 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21222 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21223 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21224 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21225 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21226 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21227 default:
21228 llvm_unreachable("Unexpected builtin ID.");
21229 }
21230#undef MMA_VARIANTS
21231#undef MMA_SATF_VARIANTS
21232#undef MMA_VARIANTS_I4
21233#undef MMA_VARIANTS_B1_AND
21234#undef MMA_VARIANTS_B1_XOR
21235}
21236
21237static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21238 const CallExpr *E) {
21239 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21240 QualType ArgType = E->getArg(0)->getType();
21242 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21243 return CGF.Builder.CreateCall(
21244 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21245 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21246}
21247
21248static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21249 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21250 QualType ArgType = E->getArg(0)->getType();
21252 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21253
21254 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21255 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21256 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21257 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21258 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21259
21260 return LD;
21261}
21262
21263static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21264 const CallExpr *E) {
21265 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21266 llvm::Type *ElemTy =
21267 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21268 return CGF.Builder.CreateCall(
21269 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21270 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21271}
21272
21273static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21274 CodeGenFunction &CGF, const CallExpr *E,
21275 int SrcSize) {
21276 return E->getNumArgs() == 3
21277 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21278 {CGF.EmitScalarExpr(E->getArg(0)),
21279 CGF.EmitScalarExpr(E->getArg(1)),
21280 CGF.EmitScalarExpr(E->getArg(2))})
21281 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21282 {CGF.EmitScalarExpr(E->getArg(0)),
21283 CGF.EmitScalarExpr(E->getArg(1))});
21284}
21285
21286static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21287 const CallExpr *E, CodeGenFunction &CGF) {
21288 auto &C = CGF.CGM.getContext();
21289 if (!(C.getLangOpts().NativeHalfType ||
21290 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21291 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21292 " requires native half type support.");
21293 return nullptr;
21294 }
21295
21296 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21297 return MakeLdg(CGF, E);
21298
21299 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21300 return MakeLdu(IntrinsicID, CGF, E);
21301
21303 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21304 auto *FTy = F->getFunctionType();
21305 unsigned ICEArguments = 0;
21307 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21308 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21309 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21310 assert((ICEArguments & (1 << i)) == 0);
21311 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21312 auto *PTy = FTy->getParamType(i);
21313 if (PTy != ArgValue->getType())
21314 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21315 Args.push_back(ArgValue);
21316 }
21317
21318 return CGF.Builder.CreateCall(F, Args);
21319}
21320} // namespace
21321
21323 const CallExpr *E) {
21324 switch (BuiltinID) {
21325 case NVPTX::BI__nvvm_atom_add_gen_i:
21326 case NVPTX::BI__nvvm_atom_add_gen_l:
21327 case NVPTX::BI__nvvm_atom_add_gen_ll:
21328 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21329
21330 case NVPTX::BI__nvvm_atom_sub_gen_i:
21331 case NVPTX::BI__nvvm_atom_sub_gen_l:
21332 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21333 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21334
21335 case NVPTX::BI__nvvm_atom_and_gen_i:
21336 case NVPTX::BI__nvvm_atom_and_gen_l:
21337 case NVPTX::BI__nvvm_atom_and_gen_ll:
21338 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21339
21340 case NVPTX::BI__nvvm_atom_or_gen_i:
21341 case NVPTX::BI__nvvm_atom_or_gen_l:
21342 case NVPTX::BI__nvvm_atom_or_gen_ll:
21343 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21344
21345 case NVPTX::BI__nvvm_atom_xor_gen_i:
21346 case NVPTX::BI__nvvm_atom_xor_gen_l:
21347 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21348 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21349
21350 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21351 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21352 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21353 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21354
21355 case NVPTX::BI__nvvm_atom_max_gen_i:
21356 case NVPTX::BI__nvvm_atom_max_gen_l:
21357 case NVPTX::BI__nvvm_atom_max_gen_ll:
21358 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21359
21360 case NVPTX::BI__nvvm_atom_max_gen_ui:
21361 case NVPTX::BI__nvvm_atom_max_gen_ul:
21362 case NVPTX::BI__nvvm_atom_max_gen_ull:
21363 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21364
21365 case NVPTX::BI__nvvm_atom_min_gen_i:
21366 case NVPTX::BI__nvvm_atom_min_gen_l:
21367 case NVPTX::BI__nvvm_atom_min_gen_ll:
21368 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21369
21370 case NVPTX::BI__nvvm_atom_min_gen_ui:
21371 case NVPTX::BI__nvvm_atom_min_gen_ul:
21372 case NVPTX::BI__nvvm_atom_min_gen_ull:
21373 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21374
21375 case NVPTX::BI__nvvm_atom_cas_gen_us:
21376 case NVPTX::BI__nvvm_atom_cas_gen_i:
21377 case NVPTX::BI__nvvm_atom_cas_gen_l:
21378 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21379 // __nvvm_atom_cas_gen_* should return the old value rather than the
21380 // success flag.
21381 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21382
21383 case NVPTX::BI__nvvm_atom_add_gen_f:
21384 case NVPTX::BI__nvvm_atom_add_gen_d: {
21385 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21386 Value *Val = EmitScalarExpr(E->getArg(1));
21387
21388 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21389 AtomicOrdering::SequentiallyConsistent);
21390 }
21391
21392 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21393 Value *Ptr = EmitScalarExpr(E->getArg(0));
21394 Value *Val = EmitScalarExpr(E->getArg(1));
21395 Function *FnALI32 =
21396 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21397 return Builder.CreateCall(FnALI32, {Ptr, Val});
21398 }
21399
21400 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21401 Value *Ptr = EmitScalarExpr(E->getArg(0));
21402 Value *Val = EmitScalarExpr(E->getArg(1));
21403 Function *FnALD32 =
21404 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21405 return Builder.CreateCall(FnALD32, {Ptr, Val});
21406 }
21407
21408 case NVPTX::BI__nvvm_ldg_c:
21409 case NVPTX::BI__nvvm_ldg_sc:
21410 case NVPTX::BI__nvvm_ldg_c2:
21411 case NVPTX::BI__nvvm_ldg_sc2:
21412 case NVPTX::BI__nvvm_ldg_c4:
21413 case NVPTX::BI__nvvm_ldg_sc4:
21414 case NVPTX::BI__nvvm_ldg_s:
21415 case NVPTX::BI__nvvm_ldg_s2:
21416 case NVPTX::BI__nvvm_ldg_s4:
21417 case NVPTX::BI__nvvm_ldg_i:
21418 case NVPTX::BI__nvvm_ldg_i2:
21419 case NVPTX::BI__nvvm_ldg_i4:
21420 case NVPTX::BI__nvvm_ldg_l:
21421 case NVPTX::BI__nvvm_ldg_l2:
21422 case NVPTX::BI__nvvm_ldg_ll:
21423 case NVPTX::BI__nvvm_ldg_ll2:
21424 case NVPTX::BI__nvvm_ldg_uc:
21425 case NVPTX::BI__nvvm_ldg_uc2:
21426 case NVPTX::BI__nvvm_ldg_uc4:
21427 case NVPTX::BI__nvvm_ldg_us:
21428 case NVPTX::BI__nvvm_ldg_us2:
21429 case NVPTX::BI__nvvm_ldg_us4:
21430 case NVPTX::BI__nvvm_ldg_ui:
21431 case NVPTX::BI__nvvm_ldg_ui2:
21432 case NVPTX::BI__nvvm_ldg_ui4:
21433 case NVPTX::BI__nvvm_ldg_ul:
21434 case NVPTX::BI__nvvm_ldg_ul2:
21435 case NVPTX::BI__nvvm_ldg_ull:
21436 case NVPTX::BI__nvvm_ldg_ull2:
21437 case NVPTX::BI__nvvm_ldg_f:
21438 case NVPTX::BI__nvvm_ldg_f2:
21439 case NVPTX::BI__nvvm_ldg_f4:
21440 case NVPTX::BI__nvvm_ldg_d:
21441 case NVPTX::BI__nvvm_ldg_d2:
21442 // PTX Interoperability section 2.2: "For a vector with an even number of
21443 // elements, its alignment is set to number of elements times the alignment
21444 // of its member: n*alignof(t)."
21445 return MakeLdg(*this, E);
21446
21447 case NVPTX::BI__nvvm_ldu_c:
21448 case NVPTX::BI__nvvm_ldu_sc:
21449 case NVPTX::BI__nvvm_ldu_c2:
21450 case NVPTX::BI__nvvm_ldu_sc2:
21451 case NVPTX::BI__nvvm_ldu_c4:
21452 case NVPTX::BI__nvvm_ldu_sc4:
21453 case NVPTX::BI__nvvm_ldu_s:
21454 case NVPTX::BI__nvvm_ldu_s2:
21455 case NVPTX::BI__nvvm_ldu_s4:
21456 case NVPTX::BI__nvvm_ldu_i:
21457 case NVPTX::BI__nvvm_ldu_i2:
21458 case NVPTX::BI__nvvm_ldu_i4:
21459 case NVPTX::BI__nvvm_ldu_l:
21460 case NVPTX::BI__nvvm_ldu_l2:
21461 case NVPTX::BI__nvvm_ldu_ll:
21462 case NVPTX::BI__nvvm_ldu_ll2:
21463 case NVPTX::BI__nvvm_ldu_uc:
21464 case NVPTX::BI__nvvm_ldu_uc2:
21465 case NVPTX::BI__nvvm_ldu_uc4:
21466 case NVPTX::BI__nvvm_ldu_us:
21467 case NVPTX::BI__nvvm_ldu_us2:
21468 case NVPTX::BI__nvvm_ldu_us4:
21469 case NVPTX::BI__nvvm_ldu_ui:
21470 case NVPTX::BI__nvvm_ldu_ui2:
21471 case NVPTX::BI__nvvm_ldu_ui4:
21472 case NVPTX::BI__nvvm_ldu_ul:
21473 case NVPTX::BI__nvvm_ldu_ul2:
21474 case NVPTX::BI__nvvm_ldu_ull:
21475 case NVPTX::BI__nvvm_ldu_ull2:
21476 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21477 case NVPTX::BI__nvvm_ldu_f:
21478 case NVPTX::BI__nvvm_ldu_f2:
21479 case NVPTX::BI__nvvm_ldu_f4:
21480 case NVPTX::BI__nvvm_ldu_d:
21481 case NVPTX::BI__nvvm_ldu_d2:
21482 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21483
21484 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21485 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21486 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21487 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21488 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21489 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21490 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21491 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21492 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21493 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21494 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21495 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21496 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21497 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21498 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21499 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21500 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21501 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21502 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21503 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21504 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21505 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21506 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21507 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21508 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21509 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21510 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21511 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21512 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21513 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21514 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21515 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21516 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21517 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21518 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21519 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21520 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21521 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21522 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21523 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21524 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21525 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21526 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21527 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21528 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21529 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21530 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21531 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21532 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21533 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21534 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21535 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21536 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21537 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21538 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21539 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21540 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21541 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21542 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21543 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21544 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21545 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21546 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21547 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21548 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21549 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21550 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21551 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21552 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21553 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21554 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21555 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21556 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21557 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21558 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21559 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21560 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21561 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21562 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21563 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21564 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21565 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21566 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21567 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21568 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21569 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21570 Value *Ptr = EmitScalarExpr(E->getArg(0));
21571 llvm::Type *ElemTy =
21572 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21573 return Builder.CreateCall(
21575 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21576 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21577 }
21578 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21579 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21580 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21581 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21582 Value *Ptr = EmitScalarExpr(E->getArg(0));
21583 llvm::Type *ElemTy =
21584 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21585 return Builder.CreateCall(
21587 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21588 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21589 }
21590 case NVPTX::BI__nvvm_match_all_sync_i32p:
21591 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21592 Value *Mask = EmitScalarExpr(E->getArg(0));
21593 Value *Val = EmitScalarExpr(E->getArg(1));
21594 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21595 Value *ResultPair = Builder.CreateCall(
21596 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21597 ? Intrinsic::nvvm_match_all_sync_i32p
21598 : Intrinsic::nvvm_match_all_sync_i64p),
21599 {Mask, Val});
21600 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21601 PredOutPtr.getElementType());
21602 Builder.CreateStore(Pred, PredOutPtr);
21603 return Builder.CreateExtractValue(ResultPair, 0);
21604 }
21605
21606 // FP MMA loads
21607 case NVPTX::BI__hmma_m16n16k16_ld_a:
21608 case NVPTX::BI__hmma_m16n16k16_ld_b:
21609 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21610 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21611 case NVPTX::BI__hmma_m32n8k16_ld_a:
21612 case NVPTX::BI__hmma_m32n8k16_ld_b:
21613 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21614 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21615 case NVPTX::BI__hmma_m8n32k16_ld_a:
21616 case NVPTX::BI__hmma_m8n32k16_ld_b:
21617 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21618 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21619 // Integer MMA loads.
21620 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21621 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21622 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21623 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21624 case NVPTX::BI__imma_m16n16k16_ld_c:
21625 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21626 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21627 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21628 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21629 case NVPTX::BI__imma_m32n8k16_ld_c:
21630 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21631 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21632 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21633 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21634 case NVPTX::BI__imma_m8n32k16_ld_c:
21635 // Sub-integer MMA loads.
21636 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21637 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21638 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21639 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21640 case NVPTX::BI__imma_m8n8k32_ld_c:
21641 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21642 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21643 case NVPTX::BI__bmma_m8n8k128_ld_c:
21644 // Double MMA loads.
21645 case NVPTX::BI__dmma_m8n8k4_ld_a:
21646 case NVPTX::BI__dmma_m8n8k4_ld_b:
21647 case NVPTX::BI__dmma_m8n8k4_ld_c:
21648 // Alternate float MMA loads.
21649 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21650 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21651 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21652 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21653 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21654 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21655 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21656 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21657 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21658 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21659 Value *Src = EmitScalarExpr(E->getArg(1));
21660 Value *Ldm = EmitScalarExpr(E->getArg(2));
21661 std::optional<llvm::APSInt> isColMajorArg =
21662 E->getArg(3)->getIntegerConstantExpr(getContext());
21663 if (!isColMajorArg)
21664 return nullptr;
21665 bool isColMajor = isColMajorArg->getSExtValue();
21666 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21667 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21668 if (IID == 0)
21669 return nullptr;
21670
21671 Value *Result =
21672 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21673
21674 // Save returned values.
21675 assert(II.NumResults);
21676 if (II.NumResults == 1) {
21679 } else {
21680 for (unsigned i = 0; i < II.NumResults; ++i) {
21682 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21683 Dst.getElementType()),
21685 llvm::ConstantInt::get(IntTy, i)),
21687 }
21688 }
21689 return Result;
21690 }
21691
21692 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21693 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21694 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21695 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21696 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21697 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21698 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21699 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21700 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21701 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21702 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21703 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21704 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21705 Value *Dst = EmitScalarExpr(E->getArg(0));
21706 Address Src = EmitPointerWithAlignment(E->getArg(1));
21707 Value *Ldm = EmitScalarExpr(E->getArg(2));
21708 std::optional<llvm::APSInt> isColMajorArg =
21709 E->getArg(3)->getIntegerConstantExpr(getContext());
21710 if (!isColMajorArg)
21711 return nullptr;
21712 bool isColMajor = isColMajorArg->getSExtValue();
21713 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21714 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21715 if (IID == 0)
21716 return nullptr;
21717 Function *Intrinsic =
21718 CGM.getIntrinsic(IID, Dst->getType());
21719 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21720 SmallVector<Value *, 10> Values = {Dst};
21721 for (unsigned i = 0; i < II.NumResults; ++i) {
21723 Src.getElementType(),
21725 llvm::ConstantInt::get(IntTy, i)),
21727 Values.push_back(Builder.CreateBitCast(V, ParamType));
21728 }
21729 Values.push_back(Ldm);
21730 Value *Result = Builder.CreateCall(Intrinsic, Values);
21731 return Result;
21732 }
21733
21734 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21735 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21736 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21737 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21738 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21739 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21740 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21741 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21742 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21743 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21744 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21745 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21746 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21747 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21748 case NVPTX::BI__imma_m16n16k16_mma_s8:
21749 case NVPTX::BI__imma_m16n16k16_mma_u8:
21750 case NVPTX::BI__imma_m32n8k16_mma_s8:
21751 case NVPTX::BI__imma_m32n8k16_mma_u8:
21752 case NVPTX::BI__imma_m8n32k16_mma_s8:
21753 case NVPTX::BI__imma_m8n32k16_mma_u8:
21754 case NVPTX::BI__imma_m8n8k32_mma_s4:
21755 case NVPTX::BI__imma_m8n8k32_mma_u4:
21756 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21757 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21758 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21759 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21760 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21761 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21762 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21763 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21764 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21765 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21766 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21767 std::optional<llvm::APSInt> LayoutArg =
21768 E->getArg(4)->getIntegerConstantExpr(getContext());
21769 if (!LayoutArg)
21770 return nullptr;
21771 int Layout = LayoutArg->getSExtValue();
21772 if (Layout < 0 || Layout > 3)
21773 return nullptr;
21774 llvm::APSInt SatfArg;
21775 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21776 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21777 SatfArg = 0; // .b1 does not have satf argument.
21778 else if (std::optional<llvm::APSInt> OptSatfArg =
21779 E->getArg(5)->getIntegerConstantExpr(getContext()))
21780 SatfArg = *OptSatfArg;
21781 else
21782 return nullptr;
21783 bool Satf = SatfArg.getSExtValue();
21784 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21785 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21786 if (IID == 0) // Unsupported combination of Layout/Satf.
21787 return nullptr;
21788
21790 Function *Intrinsic = CGM.getIntrinsic(IID);
21791 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21792 // Load A
21793 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21795 SrcA.getElementType(),
21796 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21797 llvm::ConstantInt::get(IntTy, i)),
21799 Values.push_back(Builder.CreateBitCast(V, AType));
21800 }
21801 // Load B
21802 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21803 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21805 SrcB.getElementType(),
21806 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21807 llvm::ConstantInt::get(IntTy, i)),
21809 Values.push_back(Builder.CreateBitCast(V, BType));
21810 }
21811 // Load C
21812 llvm::Type *CType =
21813 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21814 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21816 SrcC.getElementType(),
21817 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21818 llvm::ConstantInt::get(IntTy, i)),
21820 Values.push_back(Builder.CreateBitCast(V, CType));
21821 }
21822 Value *Result = Builder.CreateCall(Intrinsic, Values);
21823 llvm::Type *DType = Dst.getElementType();
21824 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21826 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21828 llvm::ConstantInt::get(IntTy, i)),
21830 return Result;
21831 }
21832 // The following builtins require half type support
21833 case NVPTX::BI__nvvm_ex2_approx_f16:
21834 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21835 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21836 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21837 case NVPTX::BI__nvvm_ff2f16x2_rn:
21838 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21839 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21840 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21841 case NVPTX::BI__nvvm_ff2f16x2_rz:
21842 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21843 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21844 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21845 case NVPTX::BI__nvvm_fma_rn_f16:
21846 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21847 case NVPTX::BI__nvvm_fma_rn_f16x2:
21848 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21849 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21850 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21851 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21852 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21853 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21854 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21855 *this);
21856 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21857 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21858 *this);
21859 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21860 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21861 *this);
21862 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21863 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21864 *this);
21865 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21866 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21867 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21868 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21869 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21870 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21871 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21872 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21873 case NVPTX::BI__nvvm_fmax_f16:
21874 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
21875 case NVPTX::BI__nvvm_fmax_f16x2:
21876 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
21877 case NVPTX::BI__nvvm_fmax_ftz_f16:
21878 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
21879 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21880 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
21881 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21882 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
21883 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21884 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
21885 *this);
21886 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21887 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21888 E, *this);
21889 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21890 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21891 BuiltinID, E, *this);
21892 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21893 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
21894 *this);
21895 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21896 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21897 E, *this);
21898 case NVPTX::BI__nvvm_fmax_nan_f16:
21899 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
21900 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21901 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
21902 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21903 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
21904 *this);
21905 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21906 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21907 E, *this);
21908 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21909 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
21910 *this);
21911 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21912 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
21913 *this);
21914 case NVPTX::BI__nvvm_fmin_f16:
21915 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
21916 case NVPTX::BI__nvvm_fmin_f16x2:
21917 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
21918 case NVPTX::BI__nvvm_fmin_ftz_f16:
21919 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
21920 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21921 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
21922 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21923 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
21924 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21925 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
21926 *this);
21927 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21928 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21929 E, *this);
21930 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
21931 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
21932 BuiltinID, E, *this);
21933 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
21934 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
21935 *this);
21936 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
21937 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
21938 E, *this);
21939 case NVPTX::BI__nvvm_fmin_nan_f16:
21940 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
21941 case NVPTX::BI__nvvm_fmin_nan_f16x2:
21942 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
21943 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
21944 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
21945 *this);
21946 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
21947 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
21948 E, *this);
21949 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
21950 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
21951 *this);
21952 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
21953 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
21954 *this);
21955 case NVPTX::BI__nvvm_ldg_h:
21956 case NVPTX::BI__nvvm_ldg_h2:
21957 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
21958 case NVPTX::BI__nvvm_ldu_h:
21959 case NVPTX::BI__nvvm_ldu_h2:
21960 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21961 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
21962 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
21963 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
21964 4);
21965 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
21966 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
21967 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
21968 8);
21969 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
21970 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
21971 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
21972 16);
21973 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
21974 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
21975 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
21976 16);
21977 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
21978 return Builder.CreateCall(
21979 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
21980 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
21981 return Builder.CreateCall(
21982 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
21983 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
21984 return Builder.CreateCall(
21985 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
21986 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
21987 return Builder.CreateCall(
21988 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
21989 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
21990 return Builder.CreateCall(
21991 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
21992 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
21993 return Builder.CreateCall(
21994 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
21995 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
21996 return Builder.CreateCall(
21997 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
21998 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
21999 return Builder.CreateCall(
22000 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
22001 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
22002 return Builder.CreateCall(
22003 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
22004 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
22005 return Builder.CreateCall(
22006 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
22007 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
22008 return Builder.CreateCall(
22009 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
22010 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
22011 return Builder.CreateCall(
22012 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
22013 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
22014 return Builder.CreateCall(
22015 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
22016 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
22017 return Builder.CreateCall(
22018 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
22019 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
22020 return Builder.CreateCall(
22021 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
22022 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
22023 return Builder.CreateCall(
22024 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
22025 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
22026 return Builder.CreateCall(
22027 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
22028 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
22029 return Builder.CreateCall(
22030 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
22031 case NVPTX::BI__nvvm_is_explicit_cluster:
22032 return Builder.CreateCall(
22033 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
22034 case NVPTX::BI__nvvm_isspacep_shared_cluster:
22035 return Builder.CreateCall(
22036 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
22037 EmitScalarExpr(E->getArg(0)));
22038 case NVPTX::BI__nvvm_mapa:
22039 return Builder.CreateCall(
22040 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
22041 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22042 case NVPTX::BI__nvvm_mapa_shared_cluster:
22043 return Builder.CreateCall(
22044 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
22045 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22046 case NVPTX::BI__nvvm_getctarank:
22047 return Builder.CreateCall(
22048 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
22049 EmitScalarExpr(E->getArg(0)));
22050 case NVPTX::BI__nvvm_getctarank_shared_cluster:
22051 return Builder.CreateCall(
22052 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
22053 EmitScalarExpr(E->getArg(0)));
22054 case NVPTX::BI__nvvm_barrier_cluster_arrive:
22055 return Builder.CreateCall(
22056 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
22057 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
22058 return Builder.CreateCall(
22059 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
22060 case NVPTX::BI__nvvm_barrier_cluster_wait:
22061 return Builder.CreateCall(
22062 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
22063 case NVPTX::BI__nvvm_fence_sc_cluster:
22064 return Builder.CreateCall(
22065 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
22066 default:
22067 return nullptr;
22068 }
22069}
22070
22071namespace {
22072struct BuiltinAlignArgs {
22073 llvm::Value *Src = nullptr;
22074 llvm::Type *SrcType = nullptr;
22075 llvm::Value *Alignment = nullptr;
22076 llvm::Value *Mask = nullptr;
22077 llvm::IntegerType *IntType = nullptr;
22078
22079 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22080 QualType AstType = E->getArg(0)->getType();
22081 if (AstType->isArrayType())
22082 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22083 else
22084 Src = CGF.EmitScalarExpr(E->getArg(0));
22085 SrcType = Src->getType();
22086 if (SrcType->isPointerTy()) {
22087 IntType = IntegerType::get(
22088 CGF.getLLVMContext(),
22089 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22090 } else {
22091 assert(SrcType->isIntegerTy());
22092 IntType = cast<llvm::IntegerType>(SrcType);
22093 }
22094 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22095 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22096 auto *One = llvm::ConstantInt::get(IntType, 1);
22097 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22098 }
22099};
22100} // namespace
22101
22102/// Generate (x & (y-1)) == 0.
22104 BuiltinAlignArgs Args(E, *this);
22105 llvm::Value *SrcAddress = Args.Src;
22106 if (Args.SrcType->isPointerTy())
22107 SrcAddress =
22108 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22109 return RValue::get(Builder.CreateICmpEQ(
22110 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22111 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22112}
22113
22114/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22115/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22116/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22118 BuiltinAlignArgs Args(E, *this);
22119 llvm::Value *SrcForMask = Args.Src;
22120 if (AlignUp) {
22121 // When aligning up we have to first add the mask to ensure we go over the
22122 // next alignment value and then align down to the next valid multiple.
22123 // By adding the mask, we ensure that align_up on an already aligned
22124 // value will not change the value.
22125 if (Args.Src->getType()->isPointerTy()) {
22126 if (getLangOpts().isSignedOverflowDefined())
22127 SrcForMask =
22128 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22129 else
22130 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22131 /*SignedIndices=*/true,
22132 /*isSubtraction=*/false,
22133 E->getExprLoc(), "over_boundary");
22134 } else {
22135 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22136 }
22137 }
22138 // Invert the mask to only clear the lower bits.
22139 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22140 llvm::Value *Result = nullptr;
22141 if (Args.Src->getType()->isPointerTy()) {
22142 Result = Builder.CreateIntrinsic(
22143 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22144 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22145 } else {
22146 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22147 }
22148 assert(Result->getType() == Args.SrcType);
22149 return RValue::get(Result);
22150}
22151
22153 const CallExpr *E) {
22154 switch (BuiltinID) {
22155 case WebAssembly::BI__builtin_wasm_memory_size: {
22156 llvm::Type *ResultType = ConvertType(E->getType());
22157 Value *I = EmitScalarExpr(E->getArg(0));
22158 Function *Callee =
22159 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22160 return Builder.CreateCall(Callee, I);
22161 }
22162 case WebAssembly::BI__builtin_wasm_memory_grow: {
22163 llvm::Type *ResultType = ConvertType(E->getType());
22164 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22165 EmitScalarExpr(E->getArg(1))};
22166 Function *Callee =
22167 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22168 return Builder.CreateCall(Callee, Args);
22169 }
22170 case WebAssembly::BI__builtin_wasm_tls_size: {
22171 llvm::Type *ResultType = ConvertType(E->getType());
22172 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22173 return Builder.CreateCall(Callee);
22174 }
22175 case WebAssembly::BI__builtin_wasm_tls_align: {
22176 llvm::Type *ResultType = ConvertType(E->getType());
22177 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22178 return Builder.CreateCall(Callee);
22179 }
22180 case WebAssembly::BI__builtin_wasm_tls_base: {
22181 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22182 return Builder.CreateCall(Callee);
22183 }
22184 case WebAssembly::BI__builtin_wasm_throw: {
22185 Value *Tag = EmitScalarExpr(E->getArg(0));
22186 Value *Obj = EmitScalarExpr(E->getArg(1));
22187 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22188 return Builder.CreateCall(Callee, {Tag, Obj});
22189 }
22190 case WebAssembly::BI__builtin_wasm_rethrow: {
22191 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22192 return Builder.CreateCall(Callee);
22193 }
22194 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22195 Value *Addr = EmitScalarExpr(E->getArg(0));
22196 Value *Expected = EmitScalarExpr(E->getArg(1));
22197 Value *Timeout = EmitScalarExpr(E->getArg(2));
22198 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22199 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22200 }
22201 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22202 Value *Addr = EmitScalarExpr(E->getArg(0));
22203 Value *Expected = EmitScalarExpr(E->getArg(1));
22204 Value *Timeout = EmitScalarExpr(E->getArg(2));
22205 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22206 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22207 }
22208 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22209 Value *Addr = EmitScalarExpr(E->getArg(0));
22210 Value *Count = EmitScalarExpr(E->getArg(1));
22211 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22212 return Builder.CreateCall(Callee, {Addr, Count});
22213 }
22214 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22215 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22216 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22217 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22218 Value *Src = EmitScalarExpr(E->getArg(0));
22219 llvm::Type *ResT = ConvertType(E->getType());
22220 Function *Callee =
22221 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22222 return Builder.CreateCall(Callee, {Src});
22223 }
22224 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22225 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22226 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22227 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22228 Value *Src = EmitScalarExpr(E->getArg(0));
22229 llvm::Type *ResT = ConvertType(E->getType());
22230 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22231 {ResT, Src->getType()});
22232 return Builder.CreateCall(Callee, {Src});
22233 }
22234 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22235 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22236 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22237 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22238 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22239 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22240 Value *Src = EmitScalarExpr(E->getArg(0));
22241 llvm::Type *ResT = ConvertType(E->getType());
22242 Function *Callee =
22243 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22244 return Builder.CreateCall(Callee, {Src});
22245 }
22246 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22247 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22248 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22249 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22250 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22251 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22252 Value *Src = EmitScalarExpr(E->getArg(0));
22253 llvm::Type *ResT = ConvertType(E->getType());
22254 Function *Callee =
22255 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22256 return Builder.CreateCall(Callee, {Src});
22257 }
22258 case WebAssembly::BI__builtin_wasm_min_f32:
22259 case WebAssembly::BI__builtin_wasm_min_f64:
22260 case WebAssembly::BI__builtin_wasm_min_f16x8:
22261 case WebAssembly::BI__builtin_wasm_min_f32x4:
22262 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22263 Value *LHS = EmitScalarExpr(E->getArg(0));
22264 Value *RHS = EmitScalarExpr(E->getArg(1));
22265 Function *Callee =
22266 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22267 return Builder.CreateCall(Callee, {LHS, RHS});
22268 }
22269 case WebAssembly::BI__builtin_wasm_max_f32:
22270 case WebAssembly::BI__builtin_wasm_max_f64:
22271 case WebAssembly::BI__builtin_wasm_max_f16x8:
22272 case WebAssembly::BI__builtin_wasm_max_f32x4:
22273 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22274 Value *LHS = EmitScalarExpr(E->getArg(0));
22275 Value *RHS = EmitScalarExpr(E->getArg(1));
22276 Function *Callee =
22277 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22278 return Builder.CreateCall(Callee, {LHS, RHS});
22279 }
22280 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22281 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22282 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22283 Value *LHS = EmitScalarExpr(E->getArg(0));
22284 Value *RHS = EmitScalarExpr(E->getArg(1));
22285 Function *Callee =
22286 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22287 return Builder.CreateCall(Callee, {LHS, RHS});
22288 }
22289 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22290 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22291 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22292 Value *LHS = EmitScalarExpr(E->getArg(0));
22293 Value *RHS = EmitScalarExpr(E->getArg(1));
22294 Function *Callee =
22295 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22296 return Builder.CreateCall(Callee, {LHS, RHS});
22297 }
22298 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22299 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22300 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22301 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22302 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22303 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22304 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22305 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22306 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22307 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22308 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22309 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22310 unsigned IntNo;
22311 switch (BuiltinID) {
22312 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22313 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22314 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22315 IntNo = Intrinsic::ceil;
22316 break;
22317 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22318 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22319 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22320 IntNo = Intrinsic::floor;
22321 break;
22322 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22323 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22324 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22325 IntNo = Intrinsic::trunc;
22326 break;
22327 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22328 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22329 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22330 IntNo = Intrinsic::nearbyint;
22331 break;
22332 default:
22333 llvm_unreachable("unexpected builtin ID");
22334 }
22335 Value *Value = EmitScalarExpr(E->getArg(0));
22337 return Builder.CreateCall(Callee, Value);
22338 }
22339 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22340 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22341 return Builder.CreateCall(Callee);
22342 }
22343 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22344 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22345 return Builder.CreateCall(Callee);
22346 }
22347 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22348 Value *Src = EmitScalarExpr(E->getArg(0));
22349 Value *Indices = EmitScalarExpr(E->getArg(1));
22350 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22351 return Builder.CreateCall(Callee, {Src, Indices});
22352 }
22353 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22354 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22355 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22356 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22357 Value *Vec = EmitScalarExpr(E->getArg(0));
22358 Value *Neg = Builder.CreateNeg(Vec, "neg");
22359 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22360 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22361 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22362 }
22363 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22364 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22365 Value *LHS = EmitScalarExpr(E->getArg(0));
22366 Value *RHS = EmitScalarExpr(E->getArg(1));
22367 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22368 ConvertType(E->getType()));
22369 return Builder.CreateCall(Callee, {LHS, RHS});
22370 }
22371 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22372 Value *LHS = EmitScalarExpr(E->getArg(0));
22373 Value *RHS = EmitScalarExpr(E->getArg(1));
22374 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22375 return Builder.CreateCall(Callee, {LHS, RHS});
22376 }
22377 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22378 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22379 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22380 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22381 Value *Vec = EmitScalarExpr(E->getArg(0));
22382 unsigned IntNo;
22383 switch (BuiltinID) {
22384 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22385 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22386 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22387 break;
22388 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22389 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22390 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22391 break;
22392 default:
22393 llvm_unreachable("unexpected builtin ID");
22394 }
22395
22397 return Builder.CreateCall(Callee, Vec);
22398 }
22399 case WebAssembly::BI__builtin_wasm_bitselect: {
22400 Value *V1 = EmitScalarExpr(E->getArg(0));
22401 Value *V2 = EmitScalarExpr(E->getArg(1));
22402 Value *C = EmitScalarExpr(E->getArg(2));
22403 Function *Callee =
22404 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22405 return Builder.CreateCall(Callee, {V1, V2, C});
22406 }
22407 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22408 Value *LHS = EmitScalarExpr(E->getArg(0));
22409 Value *RHS = EmitScalarExpr(E->getArg(1));
22410 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22411 return Builder.CreateCall(Callee, {LHS, RHS});
22412 }
22413 case WebAssembly::BI__builtin_wasm_any_true_v128:
22414 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22415 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22416 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22417 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22418 unsigned IntNo;
22419 switch (BuiltinID) {
22420 case WebAssembly::BI__builtin_wasm_any_true_v128:
22421 IntNo = Intrinsic::wasm_anytrue;
22422 break;
22423 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22424 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22425 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22426 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22427 IntNo = Intrinsic::wasm_alltrue;
22428 break;
22429 default:
22430 llvm_unreachable("unexpected builtin ID");
22431 }
22432 Value *Vec = EmitScalarExpr(E->getArg(0));
22433 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22434 return Builder.CreateCall(Callee, {Vec});
22435 }
22436 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22437 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22438 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22439 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22440 Value *Vec = EmitScalarExpr(E->getArg(0));
22441 Function *Callee =
22442 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22443 return Builder.CreateCall(Callee, {Vec});
22444 }
22445 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22446 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22447 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22448 Value *Vec = EmitScalarExpr(E->getArg(0));
22449 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22450 return Builder.CreateCall(Callee, {Vec});
22451 }
22452 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22453 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22454 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22455 Value *Vec = EmitScalarExpr(E->getArg(0));
22456 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22457 return Builder.CreateCall(Callee, {Vec});
22458 }
22459 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22460 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22461 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22462 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22463 Value *Low = EmitScalarExpr(E->getArg(0));
22464 Value *High = EmitScalarExpr(E->getArg(1));
22465 unsigned IntNo;
22466 switch (BuiltinID) {
22467 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22468 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22469 IntNo = Intrinsic::wasm_narrow_signed;
22470 break;
22471 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22472 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22473 IntNo = Intrinsic::wasm_narrow_unsigned;
22474 break;
22475 default:
22476 llvm_unreachable("unexpected builtin ID");
22477 }
22478 Function *Callee =
22479 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22480 return Builder.CreateCall(Callee, {Low, High});
22481 }
22482 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22483 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22484 Value *Vec = EmitScalarExpr(E->getArg(0));
22485 unsigned IntNo;
22486 switch (BuiltinID) {
22487 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22488 IntNo = Intrinsic::fptosi_sat;
22489 break;
22490 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22491 IntNo = Intrinsic::fptoui_sat;
22492 break;
22493 default:
22494 llvm_unreachable("unexpected builtin ID");
22495 }
22496 llvm::Type *SrcT = Vec->getType();
22497 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22498 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22499 Value *Trunc = Builder.CreateCall(Callee, Vec);
22500 Value *Splat = Constant::getNullValue(TruncT);
22501 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22502 }
22503 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22504 Value *Ops[18];
22505 size_t OpIdx = 0;
22506 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22507 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22508 while (OpIdx < 18) {
22509 std::optional<llvm::APSInt> LaneConst =
22510 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22511 assert(LaneConst && "Constant arg isn't actually constant?");
22512 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22513 }
22514 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22515 return Builder.CreateCall(Callee, Ops);
22516 }
22517 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22518 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22519 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22520 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22521 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22522 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22523 Value *A = EmitScalarExpr(E->getArg(0));
22524 Value *B = EmitScalarExpr(E->getArg(1));
22525 Value *C = EmitScalarExpr(E->getArg(2));
22526 unsigned IntNo;
22527 switch (BuiltinID) {
22528 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22529 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22530 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22531 IntNo = Intrinsic::wasm_relaxed_madd;
22532 break;
22533 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22534 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22535 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22536 IntNo = Intrinsic::wasm_relaxed_nmadd;
22537 break;
22538 default:
22539 llvm_unreachable("unexpected builtin ID");
22540 }
22541 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22542 return Builder.CreateCall(Callee, {A, B, C});
22543 }
22544 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22545 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22546 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22547 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22548 Value *A = EmitScalarExpr(E->getArg(0));
22549 Value *B = EmitScalarExpr(E->getArg(1));
22550 Value *C = EmitScalarExpr(E->getArg(2));
22551 Function *Callee =
22552 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22553 return Builder.CreateCall(Callee, {A, B, C});
22554 }
22555 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22556 Value *Src = EmitScalarExpr(E->getArg(0));
22557 Value *Indices = EmitScalarExpr(E->getArg(1));
22558 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22559 return Builder.CreateCall(Callee, {Src, Indices});
22560 }
22561 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22562 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22563 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22564 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22565 Value *LHS = EmitScalarExpr(E->getArg(0));
22566 Value *RHS = EmitScalarExpr(E->getArg(1));
22567 unsigned IntNo;
22568 switch (BuiltinID) {
22569 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22570 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22571 IntNo = Intrinsic::wasm_relaxed_min;
22572 break;
22573 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22574 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22575 IntNo = Intrinsic::wasm_relaxed_max;
22576 break;
22577 default:
22578 llvm_unreachable("unexpected builtin ID");
22579 }
22580 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22581 return Builder.CreateCall(Callee, {LHS, RHS});
22582 }
22583 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22584 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22585 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22586 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22587 Value *Vec = EmitScalarExpr(E->getArg(0));
22588 unsigned IntNo;
22589 switch (BuiltinID) {
22590 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22591 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22592 break;
22593 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22594 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22595 break;
22596 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22597 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22598 break;
22599 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22600 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22601 break;
22602 default:
22603 llvm_unreachable("unexpected builtin ID");
22604 }
22605 Function *Callee = CGM.getIntrinsic(IntNo);
22606 return Builder.CreateCall(Callee, {Vec});
22607 }
22608 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22609 Value *LHS = EmitScalarExpr(E->getArg(0));
22610 Value *RHS = EmitScalarExpr(E->getArg(1));
22611 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22612 return Builder.CreateCall(Callee, {LHS, RHS});
22613 }
22614 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22615 Value *LHS = EmitScalarExpr(E->getArg(0));
22616 Value *RHS = EmitScalarExpr(E->getArg(1));
22617 Function *Callee =
22618 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22619 return Builder.CreateCall(Callee, {LHS, RHS});
22620 }
22621 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22622 Value *LHS = EmitScalarExpr(E->getArg(0));
22623 Value *RHS = EmitScalarExpr(E->getArg(1));
22624 Value *Acc = EmitScalarExpr(E->getArg(2));
22625 Function *Callee =
22626 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22627 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22628 }
22629 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22630 Value *LHS = EmitScalarExpr(E->getArg(0));
22631 Value *RHS = EmitScalarExpr(E->getArg(1));
22632 Value *Acc = EmitScalarExpr(E->getArg(2));
22633 Function *Callee =
22634 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22635 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22636 }
22637 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22638 Value *Addr = EmitScalarExpr(E->getArg(0));
22639 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22640 return Builder.CreateCall(Callee, {Addr});
22641 }
22642 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22643 Value *Val = EmitScalarExpr(E->getArg(0));
22644 Value *Addr = EmitScalarExpr(E->getArg(1));
22645 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22646 return Builder.CreateCall(Callee, {Val, Addr});
22647 }
22648 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22649 Value *Val = EmitScalarExpr(E->getArg(0));
22650 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22651 return Builder.CreateCall(Callee, {Val});
22652 }
22653 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22654 Value *Vector = EmitScalarExpr(E->getArg(0));
22655 Value *Index = EmitScalarExpr(E->getArg(1));
22656 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22657 return Builder.CreateCall(Callee, {Vector, Index});
22658 }
22659 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22660 Value *Vector = EmitScalarExpr(E->getArg(0));
22661 Value *Index = EmitScalarExpr(E->getArg(1));
22662 Value *Val = EmitScalarExpr(E->getArg(2));
22663 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22664 return Builder.CreateCall(Callee, {Vector, Index, Val});
22665 }
22666 case WebAssembly::BI__builtin_wasm_table_get: {
22667 assert(E->getArg(0)->getType()->isArrayType());
22668 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22669 Value *Index = EmitScalarExpr(E->getArg(1));
22672 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22673 else if (E->getType().isWebAssemblyFuncrefType())
22674 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22675 else
22676 llvm_unreachable(
22677 "Unexpected reference type for __builtin_wasm_table_get");
22678 return Builder.CreateCall(Callee, {Table, Index});
22679 }
22680 case WebAssembly::BI__builtin_wasm_table_set: {
22681 assert(E->getArg(0)->getType()->isArrayType());
22682 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22683 Value *Index = EmitScalarExpr(E->getArg(1));
22684 Value *Val = EmitScalarExpr(E->getArg(2));
22686 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22687 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22688 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22689 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22690 else
22691 llvm_unreachable(
22692 "Unexpected reference type for __builtin_wasm_table_set");
22693 return Builder.CreateCall(Callee, {Table, Index, Val});
22694 }
22695 case WebAssembly::BI__builtin_wasm_table_size: {
22696 assert(E->getArg(0)->getType()->isArrayType());
22697 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22698 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22699 return Builder.CreateCall(Callee, Value);
22700 }
22701 case WebAssembly::BI__builtin_wasm_table_grow: {
22702 assert(E->getArg(0)->getType()->isArrayType());
22703 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22704 Value *Val = EmitScalarExpr(E->getArg(1));
22705 Value *NElems = EmitScalarExpr(E->getArg(2));
22706
22708 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22709 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22710 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22711 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22712 else
22713 llvm_unreachable(
22714 "Unexpected reference type for __builtin_wasm_table_grow");
22715
22716 return Builder.CreateCall(Callee, {Table, Val, NElems});
22717 }
22718 case WebAssembly::BI__builtin_wasm_table_fill: {
22719 assert(E->getArg(0)->getType()->isArrayType());
22720 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22721 Value *Index = EmitScalarExpr(E->getArg(1));
22722 Value *Val = EmitScalarExpr(E->getArg(2));
22723 Value *NElems = EmitScalarExpr(E->getArg(3));
22724
22726 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22727 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22728 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22729 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22730 else
22731 llvm_unreachable(
22732 "Unexpected reference type for __builtin_wasm_table_fill");
22733
22734 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22735 }
22736 case WebAssembly::BI__builtin_wasm_table_copy: {
22737 assert(E->getArg(0)->getType()->isArrayType());
22738 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22739 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22740 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22741 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22742 Value *NElems = EmitScalarExpr(E->getArg(4));
22743
22744 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22745
22746 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22747 }
22748 default:
22749 return nullptr;
22750 }
22751}
22752
22753static std::pair<Intrinsic::ID, unsigned>
22755 struct Info {
22756 unsigned BuiltinID;
22757 Intrinsic::ID IntrinsicID;
22758 unsigned VecLen;
22759 };
22760 static Info Infos[] = {
22761#define CUSTOM_BUILTIN_MAPPING(x,s) \
22762 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22763 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22764 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22765 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22766 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22767 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22768 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22769 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22770 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22771 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22772 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22773 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22774 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22775 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22776 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22777 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22778 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22779 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22780 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22781 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22782 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22783 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22784 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22785 // Legacy builtins that take a vector in place of a vector predicate.
22786 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22787 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22788 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22789 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22790 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22791 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22792 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22793 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22794#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22795#undef CUSTOM_BUILTIN_MAPPING
22796 };
22797
22798 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22799 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22800 (void)SortOnce;
22801
22802 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22803 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22804 return {Intrinsic::not_intrinsic, 0};
22805
22806 return {F->IntrinsicID, F->VecLen};
22807}
22808
22810 const CallExpr *E) {
22811 Intrinsic::ID ID;
22812 unsigned VecLen;
22813 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22814
22815 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22816 // The base pointer is passed by address, so it needs to be loaded.
22817 Address A = EmitPointerWithAlignment(E->getArg(0));
22819 llvm::Value *Base = Builder.CreateLoad(BP);
22820 // The treatment of both loads and stores is the same: the arguments for
22821 // the builtin are the same as the arguments for the intrinsic.
22822 // Load:
22823 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22824 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22825 // Store:
22826 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22827 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22829 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22830 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22831
22832 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22833 // The load intrinsics generate two results (Value, NewBase), stores
22834 // generate one (NewBase). The new base address needs to be stored.
22835 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22836 : Result;
22837 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22838 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22839 llvm::Value *RetVal =
22840 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22841 if (IsLoad)
22842 RetVal = Builder.CreateExtractValue(Result, 0);
22843 return RetVal;
22844 };
22845
22846 // Handle the conversion of bit-reverse load intrinsics to bit code.
22847 // The intrinsic call after this function only reads from memory and the
22848 // write to memory is dealt by the store instruction.
22849 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22850 // The intrinsic generates one result, which is the new value for the base
22851 // pointer. It needs to be returned. The result of the load instruction is
22852 // passed to intrinsic by address, so the value needs to be stored.
22853 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22854
22855 // Expressions like &(*pt++) will be incremented per evaluation.
22856 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22857 // per call.
22858 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22859 DestAddr = DestAddr.withElementType(Int8Ty);
22860 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
22861
22862 // Operands are Base, Dest, Modifier.
22863 // The intrinsic format in LLVM IR is defined as
22864 // { ValueType, i8* } (i8*, i32).
22865 llvm::Value *Result = Builder.CreateCall(
22866 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
22867
22868 // The value needs to be stored as the variable is passed by reference.
22869 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
22870
22871 // The store needs to be truncated to fit the destination type.
22872 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22873 // to be handled with stores of respective destination type.
22874 DestVal = Builder.CreateTrunc(DestVal, DestTy);
22875
22876 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
22877 // The updated value of the base pointer is returned.
22878 return Builder.CreateExtractValue(Result, 1);
22879 };
22880
22881 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
22882 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
22883 : Intrinsic::hexagon_V6_vandvrt;
22884 return Builder.CreateCall(CGM.getIntrinsic(ID),
22885 {Vec, Builder.getInt32(-1)});
22886 };
22887 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
22888 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
22889 : Intrinsic::hexagon_V6_vandqrt;
22890 return Builder.CreateCall(CGM.getIntrinsic(ID),
22891 {Pred, Builder.getInt32(-1)});
22892 };
22893
22894 switch (BuiltinID) {
22895 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
22896 // and the corresponding C/C++ builtins use loads/stores to update
22897 // the predicate.
22898 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
22899 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
22900 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
22901 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
22902 // Get the type from the 0-th argument.
22903 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22904 Address PredAddr =
22905 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22906 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
22907 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22908 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
22909
22910 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22911 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22912 PredAddr.getAlignment());
22913 return Builder.CreateExtractValue(Result, 0);
22914 }
22915 // These are identical to the builtins above, except they don't consume
22916 // input carry, only generate carry-out. Since they still produce two
22917 // outputs, generate the store of the predicate, but no load.
22918 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
22919 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
22920 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
22921 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
22922 // Get the type from the 0-th argument.
22923 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22924 Address PredAddr =
22925 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22926 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22927 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22928
22929 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22930 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22931 PredAddr.getAlignment());
22932 return Builder.CreateExtractValue(Result, 0);
22933 }
22934
22935 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
22936 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
22937 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
22938 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
22939 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
22940 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
22941 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
22942 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
22944 const Expr *PredOp = E->getArg(0);
22945 // There will be an implicit cast to a boolean vector. Strip it.
22946 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
22947 if (Cast->getCastKind() == CK_BitCast)
22948 PredOp = Cast->getSubExpr();
22949 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
22950 }
22951 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
22952 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22953 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
22954 }
22955
22956 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
22957 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
22958 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
22959 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
22960 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
22961 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
22962 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
22963 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
22964 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
22965 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
22966 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
22967 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
22968 return MakeCircOp(ID, /*IsLoad=*/true);
22969 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
22970 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
22971 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
22972 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
22973 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
22974 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
22975 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
22976 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
22977 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
22978 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
22979 return MakeCircOp(ID, /*IsLoad=*/false);
22980 case Hexagon::BI__builtin_brev_ldub:
22981 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
22982 case Hexagon::BI__builtin_brev_ldb:
22983 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
22984 case Hexagon::BI__builtin_brev_lduh:
22985 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
22986 case Hexagon::BI__builtin_brev_ldh:
22987 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
22988 case Hexagon::BI__builtin_brev_ldw:
22989 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
22990 case Hexagon::BI__builtin_brev_ldd:
22991 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
22992 } // switch
22993
22994 return nullptr;
22995}
22996
22998 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
22999 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
23000 return EmitRISCVCpuIs(CPUStr);
23001}
23002
23003Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) {
23004 llvm::Type *Int32Ty = Builder.getInt32Ty();
23005 llvm::Type *Int64Ty = Builder.getInt64Ty();
23006 llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty);
23007 llvm::Constant *RISCVCPUModel =
23008 CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model");
23009 cast<llvm::GlobalValue>(RISCVCPUModel)->setDSOLocal(true);
23010
23011 auto loadRISCVCPUID = [&](unsigned Index) {
23012 Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index);
23013 Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index),
23014 Ptr, llvm::MaybeAlign());
23015 return CPUID;
23016 };
23017
23018 const llvm::RISCV::CPUModel Model = llvm::RISCV::getCPUModel(CPUStr);
23019
23020 // Compare mvendorid.
23021 Value *VendorID = loadRISCVCPUID(0);
23022 Value *Result =
23023 Builder.CreateICmpEQ(VendorID, Builder.getInt32(Model.MVendorID));
23024
23025 // Compare marchid.
23026 Value *ArchID = loadRISCVCPUID(1);
23027 Result = Builder.CreateAnd(
23028 Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(Model.MArchID)));
23029
23030 // Compare mimpid.
23031 Value *ImpID = loadRISCVCPUID(2);
23032 Result = Builder.CreateAnd(
23033 Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(Model.MImpID)));
23034
23035 return Result;
23036}
23037
23039 const CallExpr *E,
23040 ReturnValueSlot ReturnValue) {
23041
23042 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
23043 return EmitRISCVCpuSupports(E);
23044 if (BuiltinID == Builtin::BI__builtin_cpu_init)
23045 return EmitRISCVCpuInit();
23046 if (BuiltinID == Builtin::BI__builtin_cpu_is)
23047 return EmitRISCVCpuIs(E);
23048
23050 llvm::Type *ResultType = ConvertType(E->getType());
23051
23052 // Find out if any arguments are required to be integer constant expressions.
23053 unsigned ICEArguments = 0;
23055 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
23056 if (Error == ASTContext::GE_Missing_type) {
23057 // Vector intrinsics don't have a type string.
23058 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
23059 BuiltinID <= clang::RISCV::LastRVVBuiltin);
23060 ICEArguments = 0;
23061 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
23062 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
23063 ICEArguments = 1 << 1;
23064 } else {
23065 assert(Error == ASTContext::GE_None && "Unexpected error");
23066 }
23067
23068 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
23069 ICEArguments |= (1 << 1);
23070 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
23071 ICEArguments |= (1 << 2);
23072
23073 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
23074 // Handle aggregate argument, namely RVV tuple types in segment load/store
23075 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
23076 LValue L = EmitAggExprToLValue(E->getArg(i));
23077 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
23078 Ops.push_back(AggValue);
23079 continue;
23080 }
23081 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
23082 }
23083
23084 Intrinsic::ID ID = Intrinsic::not_intrinsic;
23085 // The 0th bit simulates the `vta` of RVV
23086 // The 1st bit simulates the `vma` of RVV
23087 constexpr unsigned RVV_VTA = 0x1;
23088 constexpr unsigned RVV_VMA = 0x2;
23089 int PolicyAttrs = 0;
23090 bool IsMasked = false;
23091 // This is used by segment load/store to determine it's llvm type.
23092 unsigned SegInstSEW = 8;
23093
23094 // Required for overloaded intrinsics.
23096 switch (BuiltinID) {
23097 default: llvm_unreachable("unexpected builtin ID");
23098 case RISCV::BI__builtin_riscv_orc_b_32:
23099 case RISCV::BI__builtin_riscv_orc_b_64:
23100 case RISCV::BI__builtin_riscv_clmul_32:
23101 case RISCV::BI__builtin_riscv_clmul_64:
23102 case RISCV::BI__builtin_riscv_clmulh_32:
23103 case RISCV::BI__builtin_riscv_clmulh_64:
23104 case RISCV::BI__builtin_riscv_clmulr_32:
23105 case RISCV::BI__builtin_riscv_clmulr_64:
23106 case RISCV::BI__builtin_riscv_xperm4_32:
23107 case RISCV::BI__builtin_riscv_xperm4_64:
23108 case RISCV::BI__builtin_riscv_xperm8_32:
23109 case RISCV::BI__builtin_riscv_xperm8_64:
23110 case RISCV::BI__builtin_riscv_brev8_32:
23111 case RISCV::BI__builtin_riscv_brev8_64:
23112 case RISCV::BI__builtin_riscv_zip_32:
23113 case RISCV::BI__builtin_riscv_unzip_32: {
23114 switch (BuiltinID) {
23115 default: llvm_unreachable("unexpected builtin ID");
23116 // Zbb
23117 case RISCV::BI__builtin_riscv_orc_b_32:
23118 case RISCV::BI__builtin_riscv_orc_b_64:
23119 ID = Intrinsic::riscv_orc_b;
23120 break;
23121
23122 // Zbc
23123 case RISCV::BI__builtin_riscv_clmul_32:
23124 case RISCV::BI__builtin_riscv_clmul_64:
23125 ID = Intrinsic::riscv_clmul;
23126 break;
23127 case RISCV::BI__builtin_riscv_clmulh_32:
23128 case RISCV::BI__builtin_riscv_clmulh_64:
23129 ID = Intrinsic::riscv_clmulh;
23130 break;
23131 case RISCV::BI__builtin_riscv_clmulr_32:
23132 case RISCV::BI__builtin_riscv_clmulr_64:
23133 ID = Intrinsic::riscv_clmulr;
23134 break;
23135
23136 // Zbkx
23137 case RISCV::BI__builtin_riscv_xperm8_32:
23138 case RISCV::BI__builtin_riscv_xperm8_64:
23139 ID = Intrinsic::riscv_xperm8;
23140 break;
23141 case RISCV::BI__builtin_riscv_xperm4_32:
23142 case RISCV::BI__builtin_riscv_xperm4_64:
23143 ID = Intrinsic::riscv_xperm4;
23144 break;
23145
23146 // Zbkb
23147 case RISCV::BI__builtin_riscv_brev8_32:
23148 case RISCV::BI__builtin_riscv_brev8_64:
23149 ID = Intrinsic::riscv_brev8;
23150 break;
23151 case RISCV::BI__builtin_riscv_zip_32:
23152 ID = Intrinsic::riscv_zip;
23153 break;
23154 case RISCV::BI__builtin_riscv_unzip_32:
23155 ID = Intrinsic::riscv_unzip;
23156 break;
23157 }
23158
23159 IntrinsicTypes = {ResultType};
23160 break;
23161 }
23162
23163 // Zk builtins
23164
23165 // Zknh
23166 case RISCV::BI__builtin_riscv_sha256sig0:
23167 ID = Intrinsic::riscv_sha256sig0;
23168 break;
23169 case RISCV::BI__builtin_riscv_sha256sig1:
23170 ID = Intrinsic::riscv_sha256sig1;
23171 break;
23172 case RISCV::BI__builtin_riscv_sha256sum0:
23173 ID = Intrinsic::riscv_sha256sum0;
23174 break;
23175 case RISCV::BI__builtin_riscv_sha256sum1:
23176 ID = Intrinsic::riscv_sha256sum1;
23177 break;
23178
23179 // Zksed
23180 case RISCV::BI__builtin_riscv_sm4ks:
23181 ID = Intrinsic::riscv_sm4ks;
23182 break;
23183 case RISCV::BI__builtin_riscv_sm4ed:
23184 ID = Intrinsic::riscv_sm4ed;
23185 break;
23186
23187 // Zksh
23188 case RISCV::BI__builtin_riscv_sm3p0:
23189 ID = Intrinsic::riscv_sm3p0;
23190 break;
23191 case RISCV::BI__builtin_riscv_sm3p1:
23192 ID = Intrinsic::riscv_sm3p1;
23193 break;
23194
23195 case RISCV::BI__builtin_riscv_clz_32:
23196 case RISCV::BI__builtin_riscv_clz_64: {
23197 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
23198 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23199 if (Result->getType() != ResultType)
23200 Result =
23201 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23202 return Result;
23203 }
23204 case RISCV::BI__builtin_riscv_ctz_32:
23205 case RISCV::BI__builtin_riscv_ctz_64: {
23206 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
23207 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23208 if (Result->getType() != ResultType)
23209 Result =
23210 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23211 return Result;
23212 }
23213
23214 // Zihintntl
23215 case RISCV::BI__builtin_riscv_ntl_load: {
23216 llvm::Type *ResTy = ConvertType(E->getType());
23217 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23218 if (Ops.size() == 2)
23219 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
23220
23221 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23223 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23224 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23225 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23226
23227 int Width;
23228 if(ResTy->isScalableTy()) {
23229 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
23230 llvm::Type *ScalarTy = ResTy->getScalarType();
23231 Width = ScalarTy->getPrimitiveSizeInBits() *
23232 SVTy->getElementCount().getKnownMinValue();
23233 } else
23234 Width = ResTy->getPrimitiveSizeInBits();
23235 LoadInst *Load = Builder.CreateLoad(
23236 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
23237
23238 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23239 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23240 RISCVDomainNode);
23241
23242 return Load;
23243 }
23244 case RISCV::BI__builtin_riscv_ntl_store: {
23245 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23246 if (Ops.size() == 3)
23247 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
23248
23249 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23251 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23252 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23253 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23254
23255 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
23256 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23257 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23258 RISCVDomainNode);
23259
23260 return Store;
23261 }
23262 // XCValu
23263 case RISCV::BI__builtin_riscv_cv_alu_addN:
23264 ID = Intrinsic::riscv_cv_alu_addN;
23265 break;
23266 case RISCV::BI__builtin_riscv_cv_alu_addRN:
23267 ID = Intrinsic::riscv_cv_alu_addRN;
23268 break;
23269 case RISCV::BI__builtin_riscv_cv_alu_adduN:
23270 ID = Intrinsic::riscv_cv_alu_adduN;
23271 break;
23272 case RISCV::BI__builtin_riscv_cv_alu_adduRN:
23273 ID = Intrinsic::riscv_cv_alu_adduRN;
23274 break;
23275 case RISCV::BI__builtin_riscv_cv_alu_clip:
23276 ID = Intrinsic::riscv_cv_alu_clip;
23277 break;
23278 case RISCV::BI__builtin_riscv_cv_alu_clipu:
23279 ID = Intrinsic::riscv_cv_alu_clipu;
23280 break;
23281 case RISCV::BI__builtin_riscv_cv_alu_extbs:
23282 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23283 "extbs");
23284 case RISCV::BI__builtin_riscv_cv_alu_extbz:
23285 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23286 "extbz");
23287 case RISCV::BI__builtin_riscv_cv_alu_exths:
23288 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23289 "exths");
23290 case RISCV::BI__builtin_riscv_cv_alu_exthz:
23291 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23292 "exthz");
23293 case RISCV::BI__builtin_riscv_cv_alu_slet:
23294 return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
23295 "sle");
23296 case RISCV::BI__builtin_riscv_cv_alu_sletu:
23297 return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
23298 "sleu");
23299 case RISCV::BI__builtin_riscv_cv_alu_subN:
23300 ID = Intrinsic::riscv_cv_alu_subN;
23301 break;
23302 case RISCV::BI__builtin_riscv_cv_alu_subRN:
23303 ID = Intrinsic::riscv_cv_alu_subRN;
23304 break;
23305 case RISCV::BI__builtin_riscv_cv_alu_subuN:
23306 ID = Intrinsic::riscv_cv_alu_subuN;
23307 break;
23308 case RISCV::BI__builtin_riscv_cv_alu_subuRN:
23309 ID = Intrinsic::riscv_cv_alu_subuRN;
23310 break;
23311
23312 // Vector builtins are handled from here.
23313#include "clang/Basic/riscv_vector_builtin_cg.inc"
23314
23315 // SiFive Vector builtins are handled from here.
23316#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23317 }
23318
23319 assert(ID != Intrinsic::not_intrinsic);
23320
23321 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
23322 return Builder.CreateCall(F, Ops, "");
23323}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3443
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8971
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9807
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:376
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1425
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6997
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:568
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2318
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2284
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6866
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2775
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9777
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:1040
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9770
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:8012
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9997
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8024
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7994
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:9039
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2653
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:621
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:978
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:671
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6993
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8025
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1622
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:8029
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
@ UnsignedAlts
Definition: CGBuiltin.cpp:6960
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6965
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6969
@ Use64BitVectors
Definition: CGBuiltin.cpp:6962
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6957
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6967
@ InventFloatType
Definition: CGBuiltin.cpp:6959
@ AddRetType
Definition: CGBuiltin.cpp:6952
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6954
@ VectorizeRetType
Definition: CGBuiltin.cpp:6956
@ VectorRet
Definition: CGBuiltin.cpp:6966
@ Add1ArgType
Definition: CGBuiltin.cpp:6953
@ Use128BitVectors
Definition: CGBuiltin.cpp:6963
static Value * loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder, CodeGenModule &CGM)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:1100
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:802
static Value * emitQuaternaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:778
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2681
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:645
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:1008
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9766
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8026
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9844
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6749
constexpr unsigned SVEBitsPerBlock
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2018
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7836
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6990
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:142
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:688
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:428
static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo)
Checks no arguments or results are passed indirectly in the ABI (i.e.
Definition: CGBuiltin.cpp:914
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: CGBuiltin.cpp:249
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9871
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1859
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1708
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1486
static Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:765
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:7002
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:880
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:871
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2511
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:1048
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1561
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9833
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:725
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:996
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2565
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2704
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6825
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:419
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9859
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:347
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8950
static void emitSincosBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:838
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:78
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8942
@ VolatileRead
Definition: CGBuiltin.cpp:8944
@ NormalRead
Definition: CGBuiltin.cpp:8943
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:513
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:358
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2553
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:473
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:705
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:336
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9799
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8021
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:7321
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:400
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:963
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:8087
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:790
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:820
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:657
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2786
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1435
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2519
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:744
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:925
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:212
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:411
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1471
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8869
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2278
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:632
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:101
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:8023
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7596
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:147
unsigned Iter
Definition: HTMLLogger.cpp:153
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:144
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:43
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
C Language Family Type Representation.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:465
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
IdentifierTable & Idents
Definition: ASTContext.h:680
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:682
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1169
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2196
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2763
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2482
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2384
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2387
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3577
QualType getElementType() const
Definition: Type.h:3589
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition: SyncScope.h:273
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:150
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:248
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:252
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:123
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2874
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:858
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:898
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:915
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:389
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:381
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:405
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:398
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:219
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:437
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:346
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
MutableArrayRef< ArgInfo > arguments()
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:305
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID, bool NoMerge=false)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
llvm::Value * EmitCheckedArgForAssume(const Expr *E)
Emits an argument for a call to a __builtin_assume.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
llvm::Value * EmitRISCVCpuIs(const CallExpr *E)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Type * ConvertType(QualType T)
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitSPIRVBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2909
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:262
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:667
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
llvm::Value * getAggregatePointer(QualType PointeeType, CodeGenFunction &CGF) const
Definition: CGValue.h:88
bool isScalar() const
Definition: CGValue.h:64
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:386
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:170
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:160
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:142
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:179
Complex values, per C99 6.2.5p11.
Definition: Type.h:3145
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4232
Represents a sugar type with __counted_by or __sized_by annotations, including their _or_null variant...
Definition: Type.h:3306
DynamicCountPointerKind getKind() const
Definition: Type.h:3336
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2016
T * getAttr() const
Definition: DeclBase.h:576
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:596
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:246
DeclContext * getDeclContext()
Definition: DeclBase.h:451
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:432
bool hasAttr() const
Definition: DeclBase.h:580
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3090
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3086
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3587
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3070
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3963
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:978
Represents a member of a struct/union/class.
Definition: Decl.h:3033
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4705
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3638
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5102
@ SME_PStateSMEnabledMask
Definition: Type.h:4587
@ SME_PStateSMCompatibleMask
Definition: Type.h:4588
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5402
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:296
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7780
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3198
QualType getPointeeType() const
Definition: Type.h:3208
A (possibly-)qualified type.
Definition: Type.h:929
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:8015
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2893
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:8057
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2889
The collection of all-type qualifiers we support.
Definition: Type.h:324
Represents a struct/union/class.
Definition: Decl.h:4148
field_range fields() const
Definition: Decl.h:4354
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:345
bool isUnion() const
Definition: Decl.h:3770
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee? This is a fundamental language change,...
Definition: TargetCXXABI.h:188
Exposes information about the current target.
Definition: TargetInfo.h:220
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:311
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:697
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1262
bool isLittleEndian() const
Definition: TargetInfo.h:1673
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:861
bool isBigEndian() const
Definition: TargetInfo.h:1672
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:1333
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1679
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:733
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isBlockPointerType() const
Definition: Type.h:8200
bool isVoidType() const
Definition: Type.h:8510
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:710
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition: Type.cpp:2055
bool isArrayType() const
Definition: Type.h:8258
bool isCountAttributedType() const
Definition: Type.cpp:727
bool isPointerType() const
Definition: Type.h:8186
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8550
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8800
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8625
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2270
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2220
bool isObjCObjectPointerType() const
Definition: Type.h:8328
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2292
bool isVectorType() const
Definition: Type.h:8298
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8731
bool isRecordType() const
Definition: Type.h:8286
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2513
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2581
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4034
unsigned getNumElements() const
Definition: Type.h:4049
QualType getElementType() const
Definition: Type.h:4048
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: FixedPoint.h:19
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1212
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2355
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:447
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:726
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1695
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2128
bool Ret(InterpState &S, CodePtr &PC)
Definition: Interp.h:318
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
Definition: SyncScope.h:42
llvm::StringRef getAsString(SyncScope S)
Definition: SyncScope.h:60
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
int int32_t
unsigned long uint64_t
long int64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742