clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CGValue.h"
21#include "CodeGenFunction.h"
22#include "CodeGenModule.h"
23#include "ConstantEmitter.h"
24#include "PatternInit.h"
25#include "TargetInfo.h"
27#include "clang/AST/Attr.h"
28#include "clang/AST/Decl.h"
29#include "clang/AST/Expr.h"
30#include "clang/AST/OSLog.h"
32#include "clang/AST/Type.h"
38#include "llvm/ADT/APFloat.h"
39#include "llvm/ADT/APInt.h"
40#include "llvm/ADT/FloatingPointMode.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/StringExtras.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/DataLayout.h"
45#include "llvm/IR/InlineAsm.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsBPF.h"
51#include "llvm/IR/IntrinsicsDirectX.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/IntrinsicsNVPTX.h"
54#include "llvm/IR/IntrinsicsPowerPC.h"
55#include "llvm/IR/IntrinsicsR600.h"
56#include "llvm/IR/IntrinsicsRISCV.h"
57#include "llvm/IR/IntrinsicsS390.h"
58#include "llvm/IR/IntrinsicsWebAssembly.h"
59#include "llvm/IR/IntrinsicsX86.h"
60#include "llvm/IR/MDBuilder.h"
61#include "llvm/IR/MatrixBuilder.h"
62#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
63#include "llvm/Support/AMDGPUAddrSpace.h"
64#include "llvm/Support/ConvertUTF.h"
65#include "llvm/Support/MathExtras.h"
66#include "llvm/Support/ScopedPrinter.h"
67#include "llvm/TargetParser/AArch64TargetParser.h"
68#include "llvm/TargetParser/RISCVISAInfo.h"
69#include "llvm/TargetParser/RISCVTargetParser.h"
70#include "llvm/TargetParser/X86TargetParser.h"
71#include <optional>
72#include <utility>
73
74using namespace clang;
75using namespace CodeGen;
76using namespace llvm;
77
78static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
79 Align AlignmentInBytes) {
80 ConstantInt *Byte;
81 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
82 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
83 // Nothing to initialize.
84 return;
85 case LangOptions::TrivialAutoVarInitKind::Zero:
86 Byte = CGF.Builder.getInt8(0x00);
87 break;
88 case LangOptions::TrivialAutoVarInitKind::Pattern: {
89 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
90 Byte = llvm::dyn_cast<llvm::ConstantInt>(
91 initializationPatternFor(CGF.CGM, Int8));
92 break;
93 }
94 }
95 if (CGF.CGM.stopAutoInit())
96 return;
97 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
98 I->addAnnotationMetadata("auto-init");
99}
100
102 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
103
104 Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
105 Value *CMP;
106 Value *LastInstr;
107
108 if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
109 FZeroConst = ConstantVector::getSplat(
110 ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
111 auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
112 CMP = CGF->Builder.CreateIntrinsic(
113 CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
114 {FCompInst}, nullptr);
115 } else
116 CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
117
118 if (CGF->CGM.getTarget().getTriple().isDXIL())
119 LastInstr = CGF->Builder.CreateIntrinsic(
120 CGF->VoidTy, llvm::Intrinsic::dx_discard, {CMP}, nullptr);
121 else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
122 BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
123 BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
124
125 CGF->Builder.CreateCondBr(CMP, LT0, End);
126
127 CGF->Builder.SetInsertPoint(LT0);
128
129 CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_discard, {},
130 nullptr);
131
132 LastInstr = CGF->Builder.CreateBr(End);
133
134 CGF->Builder.SetInsertPoint(End);
135 } else {
136 llvm_unreachable("Backend Codegen not supported.");
137 }
138
139 return LastInstr;
140}
141
143 Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
144 const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
145 const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
146
147 CallArgList Args;
148 LValue Op1TmpLValue =
149 CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
150 LValue Op2TmpLValue =
151 CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
152
154 Args.reverseWritebacks();
155
156 Value *LowBits = nullptr;
157 Value *HighBits = nullptr;
158
159 if (CGF->CGM.getTarget().getTriple().isDXIL()) {
160
161 llvm::Type *RetElementTy = CGF->Int32Ty;
162 if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
163 RetElementTy = llvm::VectorType::get(
164 CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
165 auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
166
167 CallInst *CI = CGF->Builder.CreateIntrinsic(
168 RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
169
170 LowBits = CGF->Builder.CreateExtractValue(CI, 0);
171 HighBits = CGF->Builder.CreateExtractValue(CI, 1);
172
173 } else {
174 // For Non DXIL targets we generate the instructions.
175
176 if (!Op0->getType()->isVectorTy()) {
177 FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
178 Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
179
180 LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
181 HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
182 } else {
183 int NumElements = 1;
184 if (const auto *VecTy =
185 E->getArg(0)->getType()->getAs<clang::VectorType>())
186 NumElements = VecTy->getNumElements();
187
188 FixedVectorType *Uint32VecTy =
189 FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
190 Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
191 if (NumElements == 1) {
192 LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
193 HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
194 } else {
195 SmallVector<int> EvenMask, OddMask;
196 for (int I = 0, E = NumElements; I != E; ++I) {
197 EvenMask.push_back(I * 2);
198 OddMask.push_back(I * 2 + 1);
199 }
200 LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
201 HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
202 }
203 }
204 }
205 CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
206 auto *LastInst =
207 CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
208 CGF->EmitWritebacks(Args);
209 return LastInst;
210}
211
213 assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
214 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
215 "asdouble operands types mismatch");
216 Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
217 Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
218
219 llvm::Type *ResultType = CGF.DoubleTy;
220 int N = 1;
221 if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
222 N = VTy->getNumElements();
223 ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
224 }
225
226 if (CGF.CGM.getTarget().getTriple().isDXIL())
227 return CGF.Builder.CreateIntrinsic(
228 /*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
229 ArrayRef<Value *>{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
230
231 if (!E->getArg(0)->getType()->isVectorType()) {
232 OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
233 OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
234 }
235
237 for (int i = 0; i < N; i++) {
238 Mask.push_back(i);
239 Mask.push_back(i + N);
240 }
241
242 Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
243
244 return CGF.Builder.CreateBitCast(BitVec, ResultType);
245}
246
247/// Helper for the read/write/add/inc X18 builtins: read the X18 register and
248/// return it as an i8 pointer.
250 LLVMContext &Context = CGF.CGM.getLLVMContext();
251 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
252 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
253 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
254 llvm::Function *F =
255 CGF.CGM.getIntrinsic(llvm::Intrinsic::read_register, {CGF.Int64Ty});
256 llvm::Value *X18 = CGF.Builder.CreateCall(F, Metadata);
257 return CGF.Builder.CreateIntToPtr(X18, CGF.Int8PtrTy);
258}
259
260/// getBuiltinLibFunction - Given a builtin id for a function like
261/// "__builtin_fabsf", return a Function* for "fabsf".
263 unsigned BuiltinID) {
264 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
265
266 // Get the name, skip over the __builtin_ prefix (if necessary).
267 StringRef Name;
268 GlobalDecl D(FD);
269
270 // TODO: This list should be expanded or refactored after all GCC-compatible
271 // std libcall builtins are implemented.
272 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
273 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
274 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
275 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
276 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
277 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
278 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
279 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
280 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
281 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
282 {Builtin::BI__builtin_printf, "__printfieee128"},
283 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
284 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
285 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
286 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
287 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
288 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
289 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
290 {Builtin::BI__builtin_scanf, "__scanfieee128"},
291 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
292 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
293 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
294 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
295 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
296 };
297
298 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
299 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
300 // if it is 64-bit 'long double' mode.
301 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
302 {Builtin::BI__builtin_frexpl, "frexp"},
303 {Builtin::BI__builtin_ldexpl, "ldexp"},
304 {Builtin::BI__builtin_modfl, "modf"},
305 };
306
307 // If the builtin has been declared explicitly with an assembler label,
308 // use the mangled name. This differs from the plain label on platforms
309 // that prefix labels.
310 if (FD->hasAttr<AsmLabelAttr>())
311 Name = getMangledName(D);
312 else {
313 // TODO: This mutation should also be applied to other targets other than
314 // PPC, after backend supports IEEE 128-bit style libcalls.
315 if (getTriple().isPPC64() &&
316 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
317 F128Builtins.contains(BuiltinID))
318 Name = F128Builtins[BuiltinID];
319 else if (getTriple().isOSAIX() &&
320 &getTarget().getLongDoubleFormat() ==
321 &llvm::APFloat::IEEEdouble() &&
322 AIXLongDouble64Builtins.contains(BuiltinID))
323 Name = AIXLongDouble64Builtins[BuiltinID];
324 else
325 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
326 }
327
328 llvm::FunctionType *Ty =
329 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
330
331 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
332}
333
334/// Emit the conversions required to turn the given value into an
335/// integer of the given size.
336static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
337 QualType T, llvm::IntegerType *IntType) {
338 V = CGF.EmitToMemory(V, T);
339
340 if (V->getType()->isPointerTy())
341 return CGF.Builder.CreatePtrToInt(V, IntType);
342
343 assert(V->getType() == IntType);
344 return V;
345}
346
347static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
348 QualType T, llvm::Type *ResultType) {
349 V = CGF.EmitFromMemory(V, T);
350
351 if (ResultType->isPointerTy())
352 return CGF.Builder.CreateIntToPtr(V, ResultType);
353
354 assert(V->getType() == ResultType);
355 return V;
356}
357
359 ASTContext &Ctx = CGF.getContext();
360 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
361 unsigned Bytes = Ptr.getElementType()->isPointerTy()
363 : Ptr.getElementType()->getScalarSizeInBits() / 8;
364 unsigned Align = Ptr.getAlignment().getQuantity();
365 if (Align % Bytes != 0) {
366 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
367 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
368 // Force address to be at least naturally-aligned.
369 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
370 }
371 return Ptr;
372}
373
374/// Utility to insert an atomic instruction based on Intrinsic::ID
375/// and the expression node.
377 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
378 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
379
380 QualType T = E->getType();
381 assert(E->getArg(0)->getType()->isPointerType());
383 E->getArg(0)->getType()->getPointeeType()));
384 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
385
386 Address DestAddr = CheckAtomicAlignment(CGF, E);
387
388 llvm::IntegerType *IntType = llvm::IntegerType::get(
389 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
390
391 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
392 llvm::Type *ValueType = Val->getType();
393 Val = EmitToInt(CGF, Val, T, IntType);
394
395 llvm::Value *Result =
396 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
397 return EmitFromInt(CGF, Result, T, ValueType);
398}
399
401 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
402 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
403
404 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
405 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
406 LV.setNontemporal(true);
407 CGF.EmitStoreOfScalar(Val, LV, false);
408 return nullptr;
409}
410
412 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
413
414 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
415 LV.setNontemporal(true);
416 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
417}
418
420 llvm::AtomicRMWInst::BinOp Kind,
421 const CallExpr *E) {
422 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
423}
424
425/// Utility to insert an atomic instruction based Intrinsic::ID and
426/// the expression node, where the return value is the result of the
427/// operation.
429 llvm::AtomicRMWInst::BinOp Kind,
430 const CallExpr *E,
431 Instruction::BinaryOps Op,
432 bool Invert = false) {
433 QualType T = E->getType();
434 assert(E->getArg(0)->getType()->isPointerType());
436 E->getArg(0)->getType()->getPointeeType()));
437 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
438
439 Address DestAddr = CheckAtomicAlignment(CGF, E);
440
441 llvm::IntegerType *IntType = llvm::IntegerType::get(
442 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
443
444 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
445 llvm::Type *ValueType = Val->getType();
446 Val = EmitToInt(CGF, Val, T, IntType);
447
448 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
449 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
450 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
451 if (Invert)
452 Result =
453 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
454 llvm::ConstantInt::getAllOnesValue(IntType));
455 Result = EmitFromInt(CGF, Result, T, ValueType);
456 return RValue::get(Result);
457}
458
459/// Utility to insert an atomic cmpxchg instruction.
460///
461/// @param CGF The current codegen function.
462/// @param E Builtin call expression to convert to cmpxchg.
463/// arg0 - address to operate on
464/// arg1 - value to compare with
465/// arg2 - new value
466/// @param ReturnBool Specifies whether to return success flag of
467/// cmpxchg result or the old value.
468///
469/// @returns result of cmpxchg, according to ReturnBool
470///
471/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
472/// invoke the function EmitAtomicCmpXchgForMSIntrin.
474 bool ReturnBool) {
475 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
476 Address DestAddr = CheckAtomicAlignment(CGF, E);
477
478 llvm::IntegerType *IntType = llvm::IntegerType::get(
479 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
480
481 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
482 llvm::Type *ValueType = Cmp->getType();
483 Cmp = EmitToInt(CGF, Cmp, T, IntType);
484 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
485
487 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
488 llvm::AtomicOrdering::SequentiallyConsistent);
489 if (ReturnBool)
490 // Extract boolean success flag and zext it to int.
491 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
492 CGF.ConvertType(E->getType()));
493 else
494 // Extract old value and emit it using the same type as compare value.
495 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
496 ValueType);
497}
498
499/// This function should be invoked to emit atomic cmpxchg for Microsoft's
500/// _InterlockedCompareExchange* intrinsics which have the following signature:
501/// T _InterlockedCompareExchange(T volatile *Destination,
502/// T Exchange,
503/// T Comparand);
504///
505/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
506/// cmpxchg *Destination, Comparand, Exchange.
507/// So we need to swap Comparand and Exchange when invoking
508/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
509/// function MakeAtomicCmpXchgValue since it expects the arguments to be
510/// already swapped.
511
512static
514 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
515 assert(E->getArg(0)->getType()->isPointerType());
517 E->getType(), E->getArg(0)->getType()->getPointeeType()));
519 E->getArg(1)->getType()));
521 E->getArg(2)->getType()));
522
523 Address DestAddr = CheckAtomicAlignment(CGF, E);
524
525 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
526 auto *RTy = Exchange->getType();
527
528 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
529
530 if (RTy->isPointerTy()) {
531 Exchange = CGF.Builder.CreatePtrToInt(Exchange, CGF.IntPtrTy);
532 Comparand = CGF.Builder.CreatePtrToInt(Comparand, CGF.IntPtrTy);
533 }
534
535 // For Release ordering, the failure ordering should be Monotonic.
536 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
537 AtomicOrdering::Monotonic :
538 SuccessOrdering;
539
540 // The atomic instruction is marked volatile for consistency with MSVC. This
541 // blocks the few atomics optimizations that LLVM has. If we want to optimize
542 // _Interlocked* operations in the future, we will have to remove the volatile
543 // marker.
544 auto *CmpXchg = CGF.Builder.CreateAtomicCmpXchg(
545 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
546 CmpXchg->setVolatile(true);
547
548 auto *Result = CGF.Builder.CreateExtractValue(CmpXchg, 0);
549 if (RTy->isPointerTy()) {
550 Result = CGF.Builder.CreateIntToPtr(Result, RTy);
551 }
552
553 return Result;
554}
555
556// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
557// prototyped like this:
558//
559// unsigned char _InterlockedCompareExchange128...(
560// __int64 volatile * _Destination,
561// __int64 _ExchangeHigh,
562// __int64 _ExchangeLow,
563// __int64 * _ComparandResult);
564//
565// Note that Destination is assumed to be at least 16-byte aligned, despite
566// being typed int64.
567
569 const CallExpr *E,
570 AtomicOrdering SuccessOrdering) {
571 assert(E->getNumArgs() == 4);
572 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
573 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
574 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
575 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
576
577 assert(DestPtr->getType()->isPointerTy());
578 assert(!ExchangeHigh->getType()->isPointerTy());
579 assert(!ExchangeLow->getType()->isPointerTy());
580
581 // For Release ordering, the failure ordering should be Monotonic.
582 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
583 ? AtomicOrdering::Monotonic
584 : SuccessOrdering;
585
586 // Convert to i128 pointers and values. Alignment is also overridden for
587 // destination pointer.
588 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
589 Address DestAddr(DestPtr, Int128Ty,
591 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
592
593 // (((i128)hi) << 64) | ((i128)lo)
594 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
595 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
596 ExchangeHigh =
597 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
598 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
599
600 // Load the comparand for the instruction.
601 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
602
603 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
604 SuccessOrdering, FailureOrdering);
605
606 // The atomic instruction is marked volatile for consistency with MSVC. This
607 // blocks the few atomics optimizations that LLVM has. If we want to optimize
608 // _Interlocked* operations in the future, we will have to remove the volatile
609 // marker.
610 CXI->setVolatile(true);
611
612 // Store the result as an outparameter.
613 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
614 ComparandAddr);
615
616 // Get the success boolean and zero extend it to i8.
617 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
618 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
619}
620
622 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
623 assert(E->getArg(0)->getType()->isPointerType());
624
625 auto *IntTy = CGF.ConvertType(E->getType());
626 Address DestAddr = CheckAtomicAlignment(CGF, E);
627 auto *Result = CGF.Builder.CreateAtomicRMW(
628 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
629 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
630}
631
633 CodeGenFunction &CGF, const CallExpr *E,
634 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
635 assert(E->getArg(0)->getType()->isPointerType());
636
637 auto *IntTy = CGF.ConvertType(E->getType());
638 Address DestAddr = CheckAtomicAlignment(CGF, E);
639 auto *Result = CGF.Builder.CreateAtomicRMW(
640 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
641 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
642}
643
644// Build a plain volatile load.
646 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
647 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
648 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
649 llvm::Type *ITy =
650 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
651 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
652 Load->setVolatile(true);
653 return Load;
654}
655
656// Build a plain volatile store.
658 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
659 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
660 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
661 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
662 llvm::StoreInst *Store =
663 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
664 Store->setVolatile(true);
665 return Store;
666}
667
668// Emit a simple mangled intrinsic that has 1 argument and a return type
669// matching the argument type. Depending on mode, this may be a constrained
670// floating-point intrinsic.
672 const CallExpr *E, unsigned IntrinsicID,
673 unsigned ConstrainedIntrinsicID) {
674 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
675
676 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
677 if (CGF.Builder.getIsFPConstrained()) {
678 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
679 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
680 } else {
681 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
682 return CGF.Builder.CreateCall(F, Src0);
683 }
684}
685
686// Emit an intrinsic that has 2 operands of the same type as its result.
687// Depending on mode, this may be a constrained floating-point intrinsic.
689 const CallExpr *E, unsigned IntrinsicID,
690 unsigned ConstrainedIntrinsicID) {
691 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
692 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
693
694 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
695 if (CGF.Builder.getIsFPConstrained()) {
696 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
697 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
698 } else {
699 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
700 return CGF.Builder.CreateCall(F, { Src0, Src1 });
701 }
702}
703
704// Has second type mangled argument.
706 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
707 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
708 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
709 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
710
711 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
712 if (CGF.Builder.getIsFPConstrained()) {
713 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
714 {Src0->getType(), Src1->getType()});
715 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
716 }
717
718 Function *F =
719 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
720 return CGF.Builder.CreateCall(F, {Src0, Src1});
721}
722
723// Emit an intrinsic that has 3 operands of the same type as its result.
724// Depending on mode, this may be a constrained floating-point intrinsic.
726 const CallExpr *E, unsigned IntrinsicID,
727 unsigned ConstrainedIntrinsicID) {
728 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
729 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
730 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
731
732 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
733 if (CGF.Builder.getIsFPConstrained()) {
734 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
735 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
736 } else {
737 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
738 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
739 }
740}
741
742// Emit an intrinsic where all operands are of the same type as the result.
743// Depending on mode, this may be a constrained floating-point intrinsic.
745 unsigned IntrinsicID,
746 unsigned ConstrainedIntrinsicID,
747 llvm::Type *Ty,
748 ArrayRef<Value *> Args) {
749 Function *F;
750 if (CGF.Builder.getIsFPConstrained())
751 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
752 else
753 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
754
755 if (CGF.Builder.getIsFPConstrained())
756 return CGF.Builder.CreateConstrainedFPCall(F, Args);
757 else
758 return CGF.Builder.CreateCall(F, Args);
759}
760
761// Emit a simple intrinsic that has N scalar arguments and a return type
762// matching the argument type. It is assumed that only the first argument is
763// overloaded.
764template <unsigned N>
766 const CallExpr *E,
767 unsigned IntrinsicID,
768 llvm::StringRef Name = "") {
769 static_assert(N, "expect non-empty argument");
771 for (unsigned I = 0; I < N; ++I)
772 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
773 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
774 return CGF.Builder.CreateCall(F, Args, Name);
775}
776
777// Emit an intrinsic that has 4 operands of the same type as its result.
779 unsigned IntrinsicID) {
780 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
781 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
782 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
783 llvm::Value *Src3 = CGF.EmitScalarExpr(E->getArg(3));
784
785 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
786 return CGF.Builder.CreateCall(F, {Src0, Src1, Src2, Src3});
787}
788
789// Emit an intrinsic that has 1 float or double operand, and 1 integer.
791 const CallExpr *E,
792 unsigned IntrinsicID) {
793 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
794 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
795
796 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
797 return CGF.Builder.CreateCall(F, {Src0, Src1});
798}
799
800// Emit an intrinsic that has overloaded integer result and fp operand.
801static Value *
803 unsigned IntrinsicID,
804 unsigned ConstrainedIntrinsicID) {
805 llvm::Type *ResultType = CGF.ConvertType(E->getType());
806 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
807
808 if (CGF.Builder.getIsFPConstrained()) {
809 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
810 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
811 {ResultType, Src0->getType()});
812 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
813 } else {
814 Function *F =
815 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
816 return CGF.Builder.CreateCall(F, Src0);
817 }
818}
819
821 llvm::Intrinsic::ID IntrinsicID) {
822 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
823 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
824
825 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
826 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
827 llvm::Function *F =
828 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
829 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
830
831 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
832 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
833 CGF.EmitStoreOfScalar(Exp, LV);
834
835 return CGF.Builder.CreateExtractValue(Call, 0);
836}
837
838/// EmitFAbs - Emit a call to @llvm.fabs().
840 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
841 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
842 Call->setDoesNotAccessMemory();
843 return Call;
844}
845
846/// Emit the computation of the sign bit for a floating point value. Returns
847/// the i1 sign bit value.
849 LLVMContext &C = CGF.CGM.getLLVMContext();
850
851 llvm::Type *Ty = V->getType();
852 int Width = Ty->getPrimitiveSizeInBits();
853 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
854 V = CGF.Builder.CreateBitCast(V, IntTy);
855 if (Ty->isPPC_FP128Ty()) {
856 // We want the sign bit of the higher-order double. The bitcast we just
857 // did works as if the double-double was stored to memory and then
858 // read as an i128. The "store" will put the higher-order double in the
859 // lower address in both little- and big-Endian modes, but the "load"
860 // will treat those bits as a different part of the i128: the low bits in
861 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
862 // we need to shift the high bits down to the low before truncating.
863 Width >>= 1;
864 if (CGF.getTarget().isBigEndian()) {
865 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
866 V = CGF.Builder.CreateLShr(V, ShiftCst);
867 }
868 // We are truncating value in order to extract the higher-order
869 // double, which we will be using to extract the sign from.
870 IntTy = llvm::IntegerType::get(C, Width);
871 V = CGF.Builder.CreateTrunc(V, IntTy);
872 }
873 Value *Zero = llvm::Constant::getNullValue(IntTy);
874 return CGF.Builder.CreateICmpSLT(V, Zero);
875}
876
877/// Checks no arguments or results are passed indirectly in the ABI (i.e. via a
878/// hidden pointer). This is used to check annotating FP libcalls (that could
879/// set `errno`) with "int" TBAA metadata is safe. If any floating-point
880/// arguments are passed indirectly, setup for the call could be incorrectly
881/// optimized out.
883 auto IsIndirect = [&](ABIArgInfo const &info) {
884 return info.isIndirect() || info.isIndirectAliased() || info.isInAlloca();
885 };
886 return !IsIndirect(FnInfo.getReturnInfo()) &&
887 llvm::none_of(FnInfo.arguments(),
888 [&](CGFunctionInfoArgInfo const &ArgInfo) {
889 return IsIndirect(ArgInfo.info);
890 });
891}
892
894 const CallExpr *E, llvm::Constant *calleeValue) {
895 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
896 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
897 llvm::CallBase *callOrInvoke = nullptr;
898 CGFunctionInfo const *FnInfo = nullptr;
899 RValue Call =
900 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot(),
901 /*Chain=*/nullptr, &callOrInvoke, &FnInfo);
902
903 if (unsigned BuiltinID = FD->getBuiltinID()) {
904 // Check whether a FP math builtin function, such as BI__builtin_expf
905 ASTContext &Context = CGF.getContext();
906 bool ConstWithoutErrnoAndExceptions =
908 // Restrict to target with errno, for example, MacOS doesn't set errno.
909 // TODO: Support builtin function with complex type returned, eg: cacosh
910 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
911 !CGF.Builder.getIsFPConstrained() && Call.isScalar() &&
913 // Emit "int" TBAA metadata on FP math libcalls.
914 clang::QualType IntTy = Context.IntTy;
915 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
916 CGF.CGM.DecorateInstructionWithTBAA(callOrInvoke, TBAAInfo);
917 }
918 }
919 return Call;
920}
921
922/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
923/// depending on IntrinsicID.
924///
925/// \arg CGF The current codegen function.
926/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
927/// \arg X The first argument to the llvm.*.with.overflow.*.
928/// \arg Y The second argument to the llvm.*.with.overflow.*.
929/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
930/// \returns The result (i.e. sum/product) returned by the intrinsic.
931static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
932 const llvm::Intrinsic::ID IntrinsicID,
933 llvm::Value *X, llvm::Value *Y,
934 llvm::Value *&Carry) {
935 // Make sure we have integers of the same width.
936 assert(X->getType() == Y->getType() &&
937 "Arguments must be the same type. (Did you forget to make sure both "
938 "arguments have the same integer width?)");
939
940 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
941 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
942 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
943 return CGF.Builder.CreateExtractValue(Tmp, 0);
944}
945
946static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
947 int low, int high) {
948 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
949 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
950 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
951 Call->addRangeRetAttr(CR);
952 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
953 return Call;
954}
955
956namespace {
957 struct WidthAndSignedness {
958 unsigned Width;
959 bool Signed;
960 };
961}
962
963static WidthAndSignedness
965 const clang::QualType Type) {
966 assert(Type->isIntegerType() && "Given type is not an integer.");
967 unsigned Width = context.getIntWidth(Type);
969 return {Width, Signed};
970}
971
972// Given one or more integer types, this function produces an integer type that
973// encompasses them: any value in one of the given types could be expressed in
974// the encompassing type.
975static struct WidthAndSignedness
976EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
977 assert(Types.size() > 0 && "Empty list of types.");
978
979 // If any of the given types is signed, we must return a signed type.
980 bool Signed = false;
981 for (const auto &Type : Types) {
982 Signed |= Type.Signed;
983 }
984
985 // The encompassing type must have a width greater than or equal to the width
986 // of the specified types. Additionally, if the encompassing type is signed,
987 // its width must be strictly greater than the width of any unsigned types
988 // given.
989 unsigned Width = 0;
990 for (const auto &Type : Types) {
991 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
992 if (Width < MinWidth) {
993 Width = MinWidth;
994 }
995 }
996
997 return {Width, Signed};
998}
999
1000Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
1001 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
1002 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
1003 ArgValue);
1004}
1005
1006/// Checks if using the result of __builtin_object_size(p, @p From) in place of
1007/// __builtin_object_size(p, @p To) is correct
1008static bool areBOSTypesCompatible(int From, int To) {
1009 // Note: Our __builtin_object_size implementation currently treats Type=0 and
1010 // Type=2 identically. Encoding this implementation detail here may make
1011 // improving __builtin_object_size difficult in the future, so it's omitted.
1012 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
1013}
1014
1015static llvm::Value *
1016getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
1017 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
1018}
1019
1020llvm::Value *
1021CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
1022 llvm::IntegerType *ResType,
1023 llvm::Value *EmittedE,
1024 bool IsDynamic) {
1025 uint64_t ObjectSize;
1026 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
1027 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
1028 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
1029}
1030
1032 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
1033 uint64_t &Offset) {
1034 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
1035 getLangOpts().getStrictFlexArraysLevel();
1036 uint32_t FieldNo = 0;
1037
1038 if (RD->isImplicit())
1039 return nullptr;
1040
1041 for (const FieldDecl *FD : RD->fields()) {
1042 if ((!FAMDecl || FD == FAMDecl) &&
1044 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
1045 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
1046 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1047 Offset += Layout.getFieldOffset(FieldNo);
1048 return FD;
1049 }
1050
1051 QualType Ty = FD->getType();
1052 if (Ty->isRecordType()) {
1054 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
1055 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
1056 Offset += Layout.getFieldOffset(FieldNo);
1057 return Field;
1058 }
1059 }
1060
1061 if (!RD->isUnion())
1062 ++FieldNo;
1063 }
1064
1065 return nullptr;
1066}
1067
1068static unsigned CountCountedByAttrs(const RecordDecl *RD) {
1069 unsigned Num = 0;
1070
1071 for (const FieldDecl *FD : RD->fields()) {
1072 if (FD->getType()->isCountAttributedType())
1073 return ++Num;
1074
1075 QualType Ty = FD->getType();
1076 if (Ty->isRecordType())
1078 }
1079
1080 return Num;
1081}
1082
1083llvm::Value *
1084CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
1085 llvm::IntegerType *ResType) {
1086 // The code generated here calculates the size of a struct with a flexible
1087 // array member that uses the counted_by attribute. There are two instances
1088 // we handle:
1089 //
1090 // struct s {
1091 // unsigned long flags;
1092 // int count;
1093 // int array[] __attribute__((counted_by(count)));
1094 // }
1095 //
1096 // 1) bdos of the flexible array itself:
1097 //
1098 // __builtin_dynamic_object_size(p->array, 1) ==
1099 // p->count * sizeof(*p->array)
1100 //
1101 // 2) bdos of a pointer into the flexible array:
1102 //
1103 // __builtin_dynamic_object_size(&p->array[42], 1) ==
1104 // (p->count - 42) * sizeof(*p->array)
1105 //
1106 // 2) bdos of the whole struct, including the flexible array:
1107 //
1108 // __builtin_dynamic_object_size(p, 1) ==
1109 // max(sizeof(struct s),
1110 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1111 //
1112 ASTContext &Ctx = getContext();
1113 const Expr *Base = E->IgnoreParenImpCasts();
1114 const Expr *Idx = nullptr;
1115
1116 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
1117 UO && UO->getOpcode() == UO_AddrOf) {
1118 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
1119 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
1120 Base = ASE->getBase()->IgnoreParenImpCasts();
1121 Idx = ASE->getIdx()->IgnoreParenImpCasts();
1122
1123 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
1124 int64_t Val = IL->getValue().getSExtValue();
1125 if (Val < 0)
1126 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1127
1128 if (Val == 0)
1129 // The index is 0, so we don't need to take it into account.
1130 Idx = nullptr;
1131 }
1132 } else {
1133 // Potential pointer to another element in the struct.
1134 Base = SubExpr;
1135 }
1136 }
1137
1138 // Get the flexible array member Decl.
1139 const RecordDecl *OuterRD = nullptr;
1140 const FieldDecl *FAMDecl = nullptr;
1141 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
1142 // Check if \p Base is referencing the FAM itself.
1143 const ValueDecl *VD = ME->getMemberDecl();
1145 FAMDecl = dyn_cast<FieldDecl>(VD);
1146 if (!FAMDecl)
1147 return nullptr;
1148 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
1149 // Check if we're pointing to the whole struct.
1150 QualType Ty = DRE->getDecl()->getType();
1151 if (Ty->isPointerType())
1152 Ty = Ty->getPointeeType();
1153 OuterRD = Ty->getAsRecordDecl();
1154
1155 // If we have a situation like this:
1156 //
1157 // struct union_of_fams {
1158 // int flags;
1159 // union {
1160 // signed char normal_field;
1161 // struct {
1162 // int count1;
1163 // int arr1[] __counted_by(count1);
1164 // };
1165 // struct {
1166 // signed char count2;
1167 // int arr2[] __counted_by(count2);
1168 // };
1169 // };
1170 // };
1171 //
1172 // We don't know which 'count' to use in this scenario:
1173 //
1174 // size_t get_size(struct union_of_fams *p) {
1175 // return __builtin_dynamic_object_size(p, 1);
1176 // }
1177 //
1178 // Instead of calculating a wrong number, we give up.
1179 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
1180 return nullptr;
1181 }
1182
1183 if (!OuterRD)
1184 return nullptr;
1185
1186 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
1187 // get its offset.
1188 uint64_t Offset = 0;
1189 FAMDecl =
1190 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
1191 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
1192
1193 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
1194 // No flexible array member found or it doesn't have the "counted_by"
1195 // attribute.
1196 return nullptr;
1197
1198 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
1199 if (!CountedByFD)
1200 // Can't find the field referenced by the "counted_by" attribute.
1201 return nullptr;
1202
1203 if (isa<DeclRefExpr>(Base))
1204 // The whole struct is specificed in the __bdos. The calculation of the
1205 // whole size of the structure can be done in two ways:
1206 //
1207 // 1) sizeof(struct S) + count * sizeof(typeof(fam))
1208 // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))
1209 //
1210 // The first will add additional padding after the end of the array,
1211 // allocation while the second method is more precise, but not quite
1212 // expected from programmers. See
1213 // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a
1214 // discussion of the topic.
1215 //
1216 // GCC isn't (currently) able to calculate __bdos on a pointer to the whole
1217 // structure. Therefore, because of the above issue, we'll choose to match
1218 // what GCC does for consistency's sake.
1219 return nullptr;
1220
1221 // Build a load of the counted_by field.
1222 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
1223 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
1224 if (!CountedByInst)
1225 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1226
1227 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1228
1229 // Build a load of the index and subtract it from the count.
1230 Value *IdxInst = nullptr;
1231 if (Idx) {
1232 if (Idx->HasSideEffects(getContext()))
1233 // We can't have side-effects.
1234 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1235
1236 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1237 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1238 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1239
1240 // We go ahead with the calculation here. If the index turns out to be
1241 // negative, we'll catch it at the end.
1242 CountedByInst =
1243 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1244 }
1245
1246 // Calculate how large the flexible array member is in bytes.
1247 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1249 llvm::Constant *ElemSize =
1250 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1251 Value *Res =
1252 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1253 Res = Builder.CreateIntCast(Res, ResType, IsSigned);
1254
1255 // A negative \p IdxInst or \p CountedByInst means that the index lands
1256 // outside of the flexible array member. If that's the case, we want to
1257 // return 0.
1258 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1259 if (IdxInst)
1260 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1261
1262 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1263}
1264
1265/// Returns a Value corresponding to the size of the given expression.
1266/// This Value may be either of the following:
1267/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1268/// it)
1269/// - A call to the @llvm.objectsize intrinsic
1270///
1271/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1272/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1273/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1274llvm::Value *
1275CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1276 llvm::IntegerType *ResType,
1277 llvm::Value *EmittedE, bool IsDynamic) {
1278 // We need to reference an argument if the pointer is a parameter with the
1279 // pass_object_size attribute.
1280 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1281 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1282 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1283 if (Param != nullptr && PS != nullptr &&
1284 areBOSTypesCompatible(PS->getType(), Type)) {
1285 auto Iter = SizeArguments.find(Param);
1286 assert(Iter != SizeArguments.end());
1287
1288 const ImplicitParamDecl *D = Iter->second;
1289 auto DIter = LocalDeclMap.find(D);
1290 assert(DIter != LocalDeclMap.end());
1291
1292 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1293 getContext().getSizeType(), E->getBeginLoc());
1294 }
1295 }
1296
1297 if (IsDynamic) {
1298 // Emit special code for a flexible array member with the "counted_by"
1299 // attribute.
1300 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1301 return V;
1302 }
1303
1304 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1305 // evaluate E for side-effects. In either case, we shouldn't lower to
1306 // @llvm.objectsize.
1307 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1308 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1309
1310 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1311 assert(Ptr->getType()->isPointerTy() &&
1312 "Non-pointer passed to __builtin_object_size?");
1313
1314 Function *F =
1315 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1316
1317 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1318 Value *Min = Builder.getInt1((Type & 2) != 0);
1319 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1320 Value *NullIsUnknown = Builder.getTrue();
1321 Value *Dynamic = Builder.getInt1(IsDynamic);
1322 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1323}
1324
1325namespace {
1326/// A struct to generically describe a bit test intrinsic.
1327struct BitTest {
1328 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1329 enum InterlockingKind : uint8_t {
1330 Unlocked,
1331 Sequential,
1332 Acquire,
1333 Release,
1334 NoFence
1335 };
1336
1337 ActionKind Action;
1338 InterlockingKind Interlocking;
1339 bool Is64Bit;
1340
1341 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1342};
1343
1344} // namespace
1345
1346BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1347 switch (BuiltinID) {
1348 // Main portable variants.
1349 case Builtin::BI_bittest:
1350 return {TestOnly, Unlocked, false};
1351 case Builtin::BI_bittestandcomplement:
1352 return {Complement, Unlocked, false};
1353 case Builtin::BI_bittestandreset:
1354 return {Reset, Unlocked, false};
1355 case Builtin::BI_bittestandset:
1356 return {Set, Unlocked, false};
1357 case Builtin::BI_interlockedbittestandreset:
1358 return {Reset, Sequential, false};
1359 case Builtin::BI_interlockedbittestandset:
1360 return {Set, Sequential, false};
1361
1362 // X86-specific 64-bit variants.
1363 case Builtin::BI_bittest64:
1364 return {TestOnly, Unlocked, true};
1365 case Builtin::BI_bittestandcomplement64:
1366 return {Complement, Unlocked, true};
1367 case Builtin::BI_bittestandreset64:
1368 return {Reset, Unlocked, true};
1369 case Builtin::BI_bittestandset64:
1370 return {Set, Unlocked, true};
1371 case Builtin::BI_interlockedbittestandreset64:
1372 return {Reset, Sequential, true};
1373 case Builtin::BI_interlockedbittestandset64:
1374 return {Set, Sequential, true};
1375
1376 // ARM/AArch64-specific ordering variants.
1377 case Builtin::BI_interlockedbittestandset_acq:
1378 return {Set, Acquire, false};
1379 case Builtin::BI_interlockedbittestandset_rel:
1380 return {Set, Release, false};
1381 case Builtin::BI_interlockedbittestandset_nf:
1382 return {Set, NoFence, false};
1383 case Builtin::BI_interlockedbittestandreset_acq:
1384 return {Reset, Acquire, false};
1385 case Builtin::BI_interlockedbittestandreset_rel:
1386 return {Reset, Release, false};
1387 case Builtin::BI_interlockedbittestandreset_nf:
1388 return {Reset, NoFence, false};
1389 }
1390 llvm_unreachable("expected only bittest intrinsics");
1391}
1392
1393static char bitActionToX86BTCode(BitTest::ActionKind A) {
1394 switch (A) {
1395 case BitTest::TestOnly: return '\0';
1396 case BitTest::Complement: return 'c';
1397 case BitTest::Reset: return 'r';
1398 case BitTest::Set: return 's';
1399 }
1400 llvm_unreachable("invalid action");
1401}
1402
1404 BitTest BT,
1405 const CallExpr *E, Value *BitBase,
1406 Value *BitPos) {
1407 char Action = bitActionToX86BTCode(BT.Action);
1408 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1409
1410 // Build the assembly.
1412 raw_svector_ostream AsmOS(Asm);
1413 if (BT.Interlocking != BitTest::Unlocked)
1414 AsmOS << "lock ";
1415 AsmOS << "bt";
1416 if (Action)
1417 AsmOS << Action;
1418 AsmOS << SizeSuffix << " $2, ($1)";
1419
1420 // Build the constraints. FIXME: We should support immediates when possible.
1421 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1422 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1423 if (!MachineClobbers.empty()) {
1424 Constraints += ',';
1425 Constraints += MachineClobbers;
1426 }
1427 llvm::IntegerType *IntType = llvm::IntegerType::get(
1428 CGF.getLLVMContext(),
1429 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1430 llvm::FunctionType *FTy =
1431 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1432
1433 llvm::InlineAsm *IA =
1434 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1435 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1436}
1437
1438static llvm::AtomicOrdering
1439getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1440 switch (I) {
1441 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1442 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1443 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1444 case BitTest::Release: return llvm::AtomicOrdering::Release;
1445 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1446 }
1447 llvm_unreachable("invalid interlocking");
1448}
1449
1450/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1451/// bits and a bit position and read and optionally modify the bit at that
1452/// position. The position index can be arbitrarily large, i.e. it can be larger
1453/// than 31 or 63, so we need an indexed load in the general case.
1454static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1455 unsigned BuiltinID,
1456 const CallExpr *E) {
1457 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1458 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1459
1460 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1461
1462 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1463 // indexing operation internally. Use them if possible.
1464 if (CGF.getTarget().getTriple().isX86())
1465 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1466
1467 // Otherwise, use generic code to load one byte and test the bit. Use all but
1468 // the bottom three bits as the array index, and the bottom three bits to form
1469 // a mask.
1470 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1471 Value *ByteIndex = CGF.Builder.CreateAShr(
1472 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1473 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBase, ByteIndex,
1474 "bittest.byteaddr"),
1475 CGF.Int8Ty, CharUnits::One());
1476 Value *PosLow =
1477 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1478 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1479
1480 // The updating instructions will need a mask.
1481 Value *Mask = nullptr;
1482 if (BT.Action != BitTest::TestOnly) {
1483 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1484 "bittest.mask");
1485 }
1486
1487 // Check the action and ordering of the interlocked intrinsics.
1488 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1489
1490 Value *OldByte = nullptr;
1491 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1492 // Emit a combined atomicrmw load/store operation for the interlocked
1493 // intrinsics.
1494 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1495 if (BT.Action == BitTest::Reset) {
1496 Mask = CGF.Builder.CreateNot(Mask);
1497 RMWOp = llvm::AtomicRMWInst::And;
1498 }
1499 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1500 } else {
1501 // Emit a plain load for the non-interlocked intrinsics.
1502 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1503 Value *NewByte = nullptr;
1504 switch (BT.Action) {
1505 case BitTest::TestOnly:
1506 // Don't store anything.
1507 break;
1508 case BitTest::Complement:
1509 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1510 break;
1511 case BitTest::Reset:
1512 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1513 break;
1514 case BitTest::Set:
1515 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1516 break;
1517 }
1518 if (NewByte)
1519 CGF.Builder.CreateStore(NewByte, ByteAddr);
1520 }
1521
1522 // However we loaded the old byte, either by plain load or atomicrmw, shift
1523 // the bit into the low position and mask it to 0 or 1.
1524 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1525 return CGF.Builder.CreateAnd(
1526 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1527}
1528
1530 unsigned BuiltinID,
1531 const CallExpr *E) {
1532 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1533
1535 raw_svector_ostream AsmOS(Asm);
1536 llvm::IntegerType *RetType = CGF.Int32Ty;
1537
1538 switch (BuiltinID) {
1539 case clang::PPC::BI__builtin_ppc_ldarx:
1540 AsmOS << "ldarx ";
1541 RetType = CGF.Int64Ty;
1542 break;
1543 case clang::PPC::BI__builtin_ppc_lwarx:
1544 AsmOS << "lwarx ";
1545 RetType = CGF.Int32Ty;
1546 break;
1547 case clang::PPC::BI__builtin_ppc_lharx:
1548 AsmOS << "lharx ";
1549 RetType = CGF.Int16Ty;
1550 break;
1551 case clang::PPC::BI__builtin_ppc_lbarx:
1552 AsmOS << "lbarx ";
1553 RetType = CGF.Int8Ty;
1554 break;
1555 default:
1556 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1557 }
1558
1559 AsmOS << "$0, ${1:y}";
1560
1561 std::string Constraints = "=r,*Z,~{memory}";
1562 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1563 if (!MachineClobbers.empty()) {
1564 Constraints += ',';
1565 Constraints += MachineClobbers;
1566 }
1567
1568 llvm::Type *PtrType = CGF.UnqualPtrTy;
1569 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1570
1571 llvm::InlineAsm *IA =
1572 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1573 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1574 CI->addParamAttr(
1575 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1576 return CI;
1577}
1578
1579namespace {
1580enum class MSVCSetJmpKind {
1581 _setjmpex,
1582 _setjmp3,
1583 _setjmp
1584};
1585}
1586
1587/// MSVC handles setjmp a bit differently on different platforms. On every
1588/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1589/// parameters can be passed as variadic arguments, but we always pass none.
1590static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1591 const CallExpr *E) {
1592 llvm::Value *Arg1 = nullptr;
1593 llvm::Type *Arg1Ty = nullptr;
1594 StringRef Name;
1595 bool IsVarArg = false;
1596 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1597 Name = "_setjmp3";
1598 Arg1Ty = CGF.Int32Ty;
1599 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1600 IsVarArg = true;
1601 } else {
1602 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1603 Arg1Ty = CGF.Int8PtrTy;
1604 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1605 Arg1 = CGF.Builder.CreateCall(
1606 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1607 } else
1608 Arg1 = CGF.Builder.CreateCall(
1609 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1610 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1611 }
1612
1613 // Mark the call site and declaration with ReturnsTwice.
1614 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1615 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1616 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1617 llvm::Attribute::ReturnsTwice);
1618 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1619 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1620 ReturnsTwiceAttr, /*Local=*/true);
1621
1622 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1623 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1624 llvm::Value *Args[] = {Buf, Arg1};
1625 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1626 CB->setAttributes(ReturnsTwiceAttr);
1627 return RValue::get(CB);
1628}
1629
1630// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1631// we handle them here.
1672 __fastfail,
1673};
1674
1675static std::optional<CodeGenFunction::MSVCIntrin>
1676translateArmToMsvcIntrin(unsigned BuiltinID) {
1677 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1678 switch (BuiltinID) {
1679 default:
1680 return std::nullopt;
1681 case clang::ARM::BI_BitScanForward:
1682 case clang::ARM::BI_BitScanForward64:
1683 return MSVCIntrin::_BitScanForward;
1684 case clang::ARM::BI_BitScanReverse:
1685 case clang::ARM::BI_BitScanReverse64:
1686 return MSVCIntrin::_BitScanReverse;
1687 case clang::ARM::BI_InterlockedAnd64:
1688 return MSVCIntrin::_InterlockedAnd;
1689 case clang::ARM::BI_InterlockedExchange64:
1690 return MSVCIntrin::_InterlockedExchange;
1691 case clang::ARM::BI_InterlockedExchangeAdd64:
1692 return MSVCIntrin::_InterlockedExchangeAdd;
1693 case clang::ARM::BI_InterlockedExchangeSub64:
1694 return MSVCIntrin::_InterlockedExchangeSub;
1695 case clang::ARM::BI_InterlockedOr64:
1696 return MSVCIntrin::_InterlockedOr;
1697 case clang::ARM::BI_InterlockedXor64:
1698 return MSVCIntrin::_InterlockedXor;
1699 case clang::ARM::BI_InterlockedDecrement64:
1700 return MSVCIntrin::_InterlockedDecrement;
1701 case clang::ARM::BI_InterlockedIncrement64:
1702 return MSVCIntrin::_InterlockedIncrement;
1703 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1704 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1705 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1706 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1707 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1708 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1709 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1710 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1711 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1712 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1713 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1714 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1715 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1716 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1717 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1718 case clang::ARM::BI_InterlockedExchange8_acq:
1719 case clang::ARM::BI_InterlockedExchange16_acq:
1720 case clang::ARM::BI_InterlockedExchange_acq:
1721 case clang::ARM::BI_InterlockedExchange64_acq:
1722 case clang::ARM::BI_InterlockedExchangePointer_acq:
1723 return MSVCIntrin::_InterlockedExchange_acq;
1724 case clang::ARM::BI_InterlockedExchange8_rel:
1725 case clang::ARM::BI_InterlockedExchange16_rel:
1726 case clang::ARM::BI_InterlockedExchange_rel:
1727 case clang::ARM::BI_InterlockedExchange64_rel:
1728 case clang::ARM::BI_InterlockedExchangePointer_rel:
1729 return MSVCIntrin::_InterlockedExchange_rel;
1730 case clang::ARM::BI_InterlockedExchange8_nf:
1731 case clang::ARM::BI_InterlockedExchange16_nf:
1732 case clang::ARM::BI_InterlockedExchange_nf:
1733 case clang::ARM::BI_InterlockedExchange64_nf:
1734 case clang::ARM::BI_InterlockedExchangePointer_nf:
1735 return MSVCIntrin::_InterlockedExchange_nf;
1736 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1737 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1738 case clang::ARM::BI_InterlockedCompareExchange_acq:
1739 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1740 case clang::ARM::BI_InterlockedCompareExchangePointer_acq:
1741 return MSVCIntrin::_InterlockedCompareExchange_acq;
1742 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1743 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1744 case clang::ARM::BI_InterlockedCompareExchange_rel:
1745 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1746 case clang::ARM::BI_InterlockedCompareExchangePointer_rel:
1747 return MSVCIntrin::_InterlockedCompareExchange_rel;
1748 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1749 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1750 case clang::ARM::BI_InterlockedCompareExchange_nf:
1751 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1752 return MSVCIntrin::_InterlockedCompareExchange_nf;
1753 case clang::ARM::BI_InterlockedOr8_acq:
1754 case clang::ARM::BI_InterlockedOr16_acq:
1755 case clang::ARM::BI_InterlockedOr_acq:
1756 case clang::ARM::BI_InterlockedOr64_acq:
1757 return MSVCIntrin::_InterlockedOr_acq;
1758 case clang::ARM::BI_InterlockedOr8_rel:
1759 case clang::ARM::BI_InterlockedOr16_rel:
1760 case clang::ARM::BI_InterlockedOr_rel:
1761 case clang::ARM::BI_InterlockedOr64_rel:
1762 return MSVCIntrin::_InterlockedOr_rel;
1763 case clang::ARM::BI_InterlockedOr8_nf:
1764 case clang::ARM::BI_InterlockedOr16_nf:
1765 case clang::ARM::BI_InterlockedOr_nf:
1766 case clang::ARM::BI_InterlockedOr64_nf:
1767 return MSVCIntrin::_InterlockedOr_nf;
1768 case clang::ARM::BI_InterlockedXor8_acq:
1769 case clang::ARM::BI_InterlockedXor16_acq:
1770 case clang::ARM::BI_InterlockedXor_acq:
1771 case clang::ARM::BI_InterlockedXor64_acq:
1772 return MSVCIntrin::_InterlockedXor_acq;
1773 case clang::ARM::BI_InterlockedXor8_rel:
1774 case clang::ARM::BI_InterlockedXor16_rel:
1775 case clang::ARM::BI_InterlockedXor_rel:
1776 case clang::ARM::BI_InterlockedXor64_rel:
1777 return MSVCIntrin::_InterlockedXor_rel;
1778 case clang::ARM::BI_InterlockedXor8_nf:
1779 case clang::ARM::BI_InterlockedXor16_nf:
1780 case clang::ARM::BI_InterlockedXor_nf:
1781 case clang::ARM::BI_InterlockedXor64_nf:
1782 return MSVCIntrin::_InterlockedXor_nf;
1783 case clang::ARM::BI_InterlockedAnd8_acq:
1784 case clang::ARM::BI_InterlockedAnd16_acq:
1785 case clang::ARM::BI_InterlockedAnd_acq:
1786 case clang::ARM::BI_InterlockedAnd64_acq:
1787 return MSVCIntrin::_InterlockedAnd_acq;
1788 case clang::ARM::BI_InterlockedAnd8_rel:
1789 case clang::ARM::BI_InterlockedAnd16_rel:
1790 case clang::ARM::BI_InterlockedAnd_rel:
1791 case clang::ARM::BI_InterlockedAnd64_rel:
1792 return MSVCIntrin::_InterlockedAnd_rel;
1793 case clang::ARM::BI_InterlockedAnd8_nf:
1794 case clang::ARM::BI_InterlockedAnd16_nf:
1795 case clang::ARM::BI_InterlockedAnd_nf:
1796 case clang::ARM::BI_InterlockedAnd64_nf:
1797 return MSVCIntrin::_InterlockedAnd_nf;
1798 case clang::ARM::BI_InterlockedIncrement16_acq:
1799 case clang::ARM::BI_InterlockedIncrement_acq:
1800 case clang::ARM::BI_InterlockedIncrement64_acq:
1801 return MSVCIntrin::_InterlockedIncrement_acq;
1802 case clang::ARM::BI_InterlockedIncrement16_rel:
1803 case clang::ARM::BI_InterlockedIncrement_rel:
1804 case clang::ARM::BI_InterlockedIncrement64_rel:
1805 return MSVCIntrin::_InterlockedIncrement_rel;
1806 case clang::ARM::BI_InterlockedIncrement16_nf:
1807 case clang::ARM::BI_InterlockedIncrement_nf:
1808 case clang::ARM::BI_InterlockedIncrement64_nf:
1809 return MSVCIntrin::_InterlockedIncrement_nf;
1810 case clang::ARM::BI_InterlockedDecrement16_acq:
1811 case clang::ARM::BI_InterlockedDecrement_acq:
1812 case clang::ARM::BI_InterlockedDecrement64_acq:
1813 return MSVCIntrin::_InterlockedDecrement_acq;
1814 case clang::ARM::BI_InterlockedDecrement16_rel:
1815 case clang::ARM::BI_InterlockedDecrement_rel:
1816 case clang::ARM::BI_InterlockedDecrement64_rel:
1817 return MSVCIntrin::_InterlockedDecrement_rel;
1818 case clang::ARM::BI_InterlockedDecrement16_nf:
1819 case clang::ARM::BI_InterlockedDecrement_nf:
1820 case clang::ARM::BI_InterlockedDecrement64_nf:
1821 return MSVCIntrin::_InterlockedDecrement_nf;
1822 }
1823 llvm_unreachable("must return from switch");
1824}
1825
1826static std::optional<CodeGenFunction::MSVCIntrin>
1827translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1828 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1829 switch (BuiltinID) {
1830 default:
1831 return std::nullopt;
1832 case clang::AArch64::BI_BitScanForward:
1833 case clang::AArch64::BI_BitScanForward64:
1834 return MSVCIntrin::_BitScanForward;
1835 case clang::AArch64::BI_BitScanReverse:
1836 case clang::AArch64::BI_BitScanReverse64:
1837 return MSVCIntrin::_BitScanReverse;
1838 case clang::AArch64::BI_InterlockedAnd64:
1839 return MSVCIntrin::_InterlockedAnd;
1840 case clang::AArch64::BI_InterlockedExchange64:
1841 return MSVCIntrin::_InterlockedExchange;
1842 case clang::AArch64::BI_InterlockedExchangeAdd64:
1843 return MSVCIntrin::_InterlockedExchangeAdd;
1844 case clang::AArch64::BI_InterlockedExchangeSub64:
1845 return MSVCIntrin::_InterlockedExchangeSub;
1846 case clang::AArch64::BI_InterlockedOr64:
1847 return MSVCIntrin::_InterlockedOr;
1848 case clang::AArch64::BI_InterlockedXor64:
1849 return MSVCIntrin::_InterlockedXor;
1850 case clang::AArch64::BI_InterlockedDecrement64:
1851 return MSVCIntrin::_InterlockedDecrement;
1852 case clang::AArch64::BI_InterlockedIncrement64:
1853 return MSVCIntrin::_InterlockedIncrement;
1854 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1855 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1856 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1857 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1858 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1859 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1860 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1861 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1862 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1863 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1864 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1865 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1866 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1867 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1868 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1869 case clang::AArch64::BI_InterlockedExchange8_acq:
1870 case clang::AArch64::BI_InterlockedExchange16_acq:
1871 case clang::AArch64::BI_InterlockedExchange_acq:
1872 case clang::AArch64::BI_InterlockedExchange64_acq:
1873 case clang::AArch64::BI_InterlockedExchangePointer_acq:
1874 return MSVCIntrin::_InterlockedExchange_acq;
1875 case clang::AArch64::BI_InterlockedExchange8_rel:
1876 case clang::AArch64::BI_InterlockedExchange16_rel:
1877 case clang::AArch64::BI_InterlockedExchange_rel:
1878 case clang::AArch64::BI_InterlockedExchange64_rel:
1879 case clang::AArch64::BI_InterlockedExchangePointer_rel:
1880 return MSVCIntrin::_InterlockedExchange_rel;
1881 case clang::AArch64::BI_InterlockedExchange8_nf:
1882 case clang::AArch64::BI_InterlockedExchange16_nf:
1883 case clang::AArch64::BI_InterlockedExchange_nf:
1884 case clang::AArch64::BI_InterlockedExchange64_nf:
1885 case clang::AArch64::BI_InterlockedExchangePointer_nf:
1886 return MSVCIntrin::_InterlockedExchange_nf;
1887 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1888 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1889 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1890 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1891 case clang::AArch64::BI_InterlockedCompareExchangePointer_acq:
1892 return MSVCIntrin::_InterlockedCompareExchange_acq;
1893 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1894 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1895 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1896 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1897 case clang::AArch64::BI_InterlockedCompareExchangePointer_rel:
1898 return MSVCIntrin::_InterlockedCompareExchange_rel;
1899 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1900 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1901 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1902 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1903 return MSVCIntrin::_InterlockedCompareExchange_nf;
1904 case clang::AArch64::BI_InterlockedCompareExchange128:
1905 return MSVCIntrin::_InterlockedCompareExchange128;
1906 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1907 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1908 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1909 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1910 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1911 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1912 case clang::AArch64::BI_InterlockedOr8_acq:
1913 case clang::AArch64::BI_InterlockedOr16_acq:
1914 case clang::AArch64::BI_InterlockedOr_acq:
1915 case clang::AArch64::BI_InterlockedOr64_acq:
1916 return MSVCIntrin::_InterlockedOr_acq;
1917 case clang::AArch64::BI_InterlockedOr8_rel:
1918 case clang::AArch64::BI_InterlockedOr16_rel:
1919 case clang::AArch64::BI_InterlockedOr_rel:
1920 case clang::AArch64::BI_InterlockedOr64_rel:
1921 return MSVCIntrin::_InterlockedOr_rel;
1922 case clang::AArch64::BI_InterlockedOr8_nf:
1923 case clang::AArch64::BI_InterlockedOr16_nf:
1924 case clang::AArch64::BI_InterlockedOr_nf:
1925 case clang::AArch64::BI_InterlockedOr64_nf:
1926 return MSVCIntrin::_InterlockedOr_nf;
1927 case clang::AArch64::BI_InterlockedXor8_acq:
1928 case clang::AArch64::BI_InterlockedXor16_acq:
1929 case clang::AArch64::BI_InterlockedXor_acq:
1930 case clang::AArch64::BI_InterlockedXor64_acq:
1931 return MSVCIntrin::_InterlockedXor_acq;
1932 case clang::AArch64::BI_InterlockedXor8_rel:
1933 case clang::AArch64::BI_InterlockedXor16_rel:
1934 case clang::AArch64::BI_InterlockedXor_rel:
1935 case clang::AArch64::BI_InterlockedXor64_rel:
1936 return MSVCIntrin::_InterlockedXor_rel;
1937 case clang::AArch64::BI_InterlockedXor8_nf:
1938 case clang::AArch64::BI_InterlockedXor16_nf:
1939 case clang::AArch64::BI_InterlockedXor_nf:
1940 case clang::AArch64::BI_InterlockedXor64_nf:
1941 return MSVCIntrin::_InterlockedXor_nf;
1942 case clang::AArch64::BI_InterlockedAnd8_acq:
1943 case clang::AArch64::BI_InterlockedAnd16_acq:
1944 case clang::AArch64::BI_InterlockedAnd_acq:
1945 case clang::AArch64::BI_InterlockedAnd64_acq:
1946 return MSVCIntrin::_InterlockedAnd_acq;
1947 case clang::AArch64::BI_InterlockedAnd8_rel:
1948 case clang::AArch64::BI_InterlockedAnd16_rel:
1949 case clang::AArch64::BI_InterlockedAnd_rel:
1950 case clang::AArch64::BI_InterlockedAnd64_rel:
1951 return MSVCIntrin::_InterlockedAnd_rel;
1952 case clang::AArch64::BI_InterlockedAnd8_nf:
1953 case clang::AArch64::BI_InterlockedAnd16_nf:
1954 case clang::AArch64::BI_InterlockedAnd_nf:
1955 case clang::AArch64::BI_InterlockedAnd64_nf:
1956 return MSVCIntrin::_InterlockedAnd_nf;
1957 case clang::AArch64::BI_InterlockedIncrement16_acq:
1958 case clang::AArch64::BI_InterlockedIncrement_acq:
1959 case clang::AArch64::BI_InterlockedIncrement64_acq:
1960 return MSVCIntrin::_InterlockedIncrement_acq;
1961 case clang::AArch64::BI_InterlockedIncrement16_rel:
1962 case clang::AArch64::BI_InterlockedIncrement_rel:
1963 case clang::AArch64::BI_InterlockedIncrement64_rel:
1964 return MSVCIntrin::_InterlockedIncrement_rel;
1965 case clang::AArch64::BI_InterlockedIncrement16_nf:
1966 case clang::AArch64::BI_InterlockedIncrement_nf:
1967 case clang::AArch64::BI_InterlockedIncrement64_nf:
1968 return MSVCIntrin::_InterlockedIncrement_nf;
1969 case clang::AArch64::BI_InterlockedDecrement16_acq:
1970 case clang::AArch64::BI_InterlockedDecrement_acq:
1971 case clang::AArch64::BI_InterlockedDecrement64_acq:
1972 return MSVCIntrin::_InterlockedDecrement_acq;
1973 case clang::AArch64::BI_InterlockedDecrement16_rel:
1974 case clang::AArch64::BI_InterlockedDecrement_rel:
1975 case clang::AArch64::BI_InterlockedDecrement64_rel:
1976 return MSVCIntrin::_InterlockedDecrement_rel;
1977 case clang::AArch64::BI_InterlockedDecrement16_nf:
1978 case clang::AArch64::BI_InterlockedDecrement_nf:
1979 case clang::AArch64::BI_InterlockedDecrement64_nf:
1980 return MSVCIntrin::_InterlockedDecrement_nf;
1981 }
1982 llvm_unreachable("must return from switch");
1983}
1984
1985static std::optional<CodeGenFunction::MSVCIntrin>
1986translateX86ToMsvcIntrin(unsigned BuiltinID) {
1987 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1988 switch (BuiltinID) {
1989 default:
1990 return std::nullopt;
1991 case clang::X86::BI_BitScanForward:
1992 case clang::X86::BI_BitScanForward64:
1993 return MSVCIntrin::_BitScanForward;
1994 case clang::X86::BI_BitScanReverse:
1995 case clang::X86::BI_BitScanReverse64:
1996 return MSVCIntrin::_BitScanReverse;
1997 case clang::X86::BI_InterlockedAnd64:
1998 return MSVCIntrin::_InterlockedAnd;
1999 case clang::X86::BI_InterlockedCompareExchange128:
2000 return MSVCIntrin::_InterlockedCompareExchange128;
2001 case clang::X86::BI_InterlockedExchange64:
2002 return MSVCIntrin::_InterlockedExchange;
2003 case clang::X86::BI_InterlockedExchangeAdd64:
2004 return MSVCIntrin::_InterlockedExchangeAdd;
2005 case clang::X86::BI_InterlockedExchangeSub64:
2006 return MSVCIntrin::_InterlockedExchangeSub;
2007 case clang::X86::BI_InterlockedOr64:
2008 return MSVCIntrin::_InterlockedOr;
2009 case clang::X86::BI_InterlockedXor64:
2010 return MSVCIntrin::_InterlockedXor;
2011 case clang::X86::BI_InterlockedDecrement64:
2012 return MSVCIntrin::_InterlockedDecrement;
2013 case clang::X86::BI_InterlockedIncrement64:
2014 return MSVCIntrin::_InterlockedIncrement;
2015 }
2016 llvm_unreachable("must return from switch");
2017}
2018
2019// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
2020Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
2021 const CallExpr *E) {
2022 switch (BuiltinID) {
2023 case MSVCIntrin::_BitScanForward:
2024 case MSVCIntrin::_BitScanReverse: {
2025 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
2026 Value *ArgValue = EmitScalarExpr(E->getArg(1));
2027
2028 llvm::Type *ArgType = ArgValue->getType();
2029 llvm::Type *IndexType = IndexAddress.getElementType();
2030 llvm::Type *ResultType = ConvertType(E->getType());
2031
2032 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
2033 Value *ResZero = llvm::Constant::getNullValue(ResultType);
2034 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
2035
2036 BasicBlock *Begin = Builder.GetInsertBlock();
2037 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
2038 Builder.SetInsertPoint(End);
2039 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
2040
2041 Builder.SetInsertPoint(Begin);
2042 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
2043 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
2044 Builder.CreateCondBr(IsZero, End, NotZero);
2045 Result->addIncoming(ResZero, Begin);
2046
2047 Builder.SetInsertPoint(NotZero);
2048
2049 if (BuiltinID == MSVCIntrin::_BitScanForward) {
2050 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
2051 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2052 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2053 Builder.CreateStore(ZeroCount, IndexAddress, false);
2054 } else {
2055 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
2056 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
2057
2058 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
2059 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
2060 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
2061 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
2062 Builder.CreateStore(Index, IndexAddress, false);
2063 }
2064 Builder.CreateBr(End);
2065 Result->addIncoming(ResOne, NotZero);
2066
2067 Builder.SetInsertPoint(End);
2068 return Result;
2069 }
2070 case MSVCIntrin::_InterlockedAnd:
2071 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
2072 case MSVCIntrin::_InterlockedExchange:
2073 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
2074 case MSVCIntrin::_InterlockedExchangeAdd:
2075 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
2076 case MSVCIntrin::_InterlockedExchangeSub:
2077 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
2078 case MSVCIntrin::_InterlockedOr:
2079 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
2080 case MSVCIntrin::_InterlockedXor:
2081 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
2082 case MSVCIntrin::_InterlockedExchangeAdd_acq:
2083 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2084 AtomicOrdering::Acquire);
2085 case MSVCIntrin::_InterlockedExchangeAdd_rel:
2086 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2087 AtomicOrdering::Release);
2088 case MSVCIntrin::_InterlockedExchangeAdd_nf:
2089 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
2090 AtomicOrdering::Monotonic);
2091 case MSVCIntrin::_InterlockedExchange_acq:
2092 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2093 AtomicOrdering::Acquire);
2094 case MSVCIntrin::_InterlockedExchange_rel:
2095 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2096 AtomicOrdering::Release);
2097 case MSVCIntrin::_InterlockedExchange_nf:
2098 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
2099 AtomicOrdering::Monotonic);
2100 case MSVCIntrin::_InterlockedCompareExchange:
2101 return EmitAtomicCmpXchgForMSIntrin(*this, E);
2102 case MSVCIntrin::_InterlockedCompareExchange_acq:
2103 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
2104 case MSVCIntrin::_InterlockedCompareExchange_rel:
2105 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
2106 case MSVCIntrin::_InterlockedCompareExchange_nf:
2107 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2108 case MSVCIntrin::_InterlockedCompareExchange128:
2110 *this, E, AtomicOrdering::SequentiallyConsistent);
2111 case MSVCIntrin::_InterlockedCompareExchange128_acq:
2112 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
2113 case MSVCIntrin::_InterlockedCompareExchange128_rel:
2114 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
2115 case MSVCIntrin::_InterlockedCompareExchange128_nf:
2116 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
2117 case MSVCIntrin::_InterlockedOr_acq:
2118 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2119 AtomicOrdering::Acquire);
2120 case MSVCIntrin::_InterlockedOr_rel:
2121 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2122 AtomicOrdering::Release);
2123 case MSVCIntrin::_InterlockedOr_nf:
2124 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
2125 AtomicOrdering::Monotonic);
2126 case MSVCIntrin::_InterlockedXor_acq:
2127 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2128 AtomicOrdering::Acquire);
2129 case MSVCIntrin::_InterlockedXor_rel:
2130 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2131 AtomicOrdering::Release);
2132 case MSVCIntrin::_InterlockedXor_nf:
2133 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
2134 AtomicOrdering::Monotonic);
2135 case MSVCIntrin::_InterlockedAnd_acq:
2136 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2137 AtomicOrdering::Acquire);
2138 case MSVCIntrin::_InterlockedAnd_rel:
2139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2140 AtomicOrdering::Release);
2141 case MSVCIntrin::_InterlockedAnd_nf:
2142 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
2143 AtomicOrdering::Monotonic);
2144 case MSVCIntrin::_InterlockedIncrement_acq:
2145 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
2146 case MSVCIntrin::_InterlockedIncrement_rel:
2147 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
2148 case MSVCIntrin::_InterlockedIncrement_nf:
2149 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
2150 case MSVCIntrin::_InterlockedDecrement_acq:
2151 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
2152 case MSVCIntrin::_InterlockedDecrement_rel:
2153 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
2154 case MSVCIntrin::_InterlockedDecrement_nf:
2155 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
2156
2157 case MSVCIntrin::_InterlockedDecrement:
2158 return EmitAtomicDecrementValue(*this, E);
2159 case MSVCIntrin::_InterlockedIncrement:
2160 return EmitAtomicIncrementValue(*this, E);
2161
2162 case MSVCIntrin::__fastfail: {
2163 // Request immediate process termination from the kernel. The instruction
2164 // sequences to do this are documented on MSDN:
2165 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
2166 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
2167 StringRef Asm, Constraints;
2168 switch (ISA) {
2169 default:
2170 ErrorUnsupported(E, "__fastfail call for this architecture");
2171 break;
2172 case llvm::Triple::x86:
2173 case llvm::Triple::x86_64:
2174 Asm = "int $$0x29";
2175 Constraints = "{cx}";
2176 break;
2177 case llvm::Triple::thumb:
2178 Asm = "udf #251";
2179 Constraints = "{r0}";
2180 break;
2181 case llvm::Triple::aarch64:
2182 Asm = "brk #0xF003";
2183 Constraints = "{w0}";
2184 }
2185 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
2186 llvm::InlineAsm *IA =
2187 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
2188 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
2189 getLLVMContext(), llvm::AttributeList::FunctionIndex,
2190 llvm::Attribute::NoReturn);
2191 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
2192 CI->setAttributes(NoReturnAttr);
2193 return CI;
2194 }
2195 }
2196 llvm_unreachable("Incorrect MSVC intrinsic!");
2197}
2198
2199namespace {
2200// ARC cleanup for __builtin_os_log_format
2201struct CallObjCArcUse final : EHScopeStack::Cleanup {
2202 CallObjCArcUse(llvm::Value *object) : object(object) {}
2203 llvm::Value *object;
2204
2205 void Emit(CodeGenFunction &CGF, Flags flags) override {
2206 CGF.EmitARCIntrinsicUse(object);
2207 }
2208};
2209}
2210
2212 BuiltinCheckKind Kind) {
2213 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
2214 "Unsupported builtin check kind");
2215
2216 Value *ArgValue = EmitScalarExpr(E);
2217 if (!SanOpts.has(SanitizerKind::Builtin))
2218 return ArgValue;
2219
2220 SanitizerScope SanScope(this);
2221 Value *Cond = Builder.CreateICmpNE(
2222 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2223 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2224 SanitizerHandler::InvalidBuiltin,
2226 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2227 {});
2228 return ArgValue;
2229}
2230
2232 Value *ArgValue = EvaluateExprAsBool(E);
2233 if (!SanOpts.has(SanitizerKind::Builtin))
2234 return ArgValue;
2235
2236 SanitizerScope SanScope(this);
2237 EmitCheck(
2238 std::make_pair(ArgValue, SanitizerKind::Builtin),
2239 SanitizerHandler::InvalidBuiltin,
2241 llvm::ConstantInt::get(Builder.getInt8Ty(), BCK_AssumePassedFalse)},
2242 std::nullopt);
2243 return ArgValue;
2244}
2245
2246static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2247 return CGF.Builder.CreateBinaryIntrinsic(
2248 Intrinsic::abs, ArgValue,
2249 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2250}
2251
2253 bool SanitizeOverflow) {
2254 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2255
2256 // Try to eliminate overflow check.
2257 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2258 if (!VCI->isMinSignedValue())
2259 return EmitAbs(CGF, ArgValue, true);
2260 }
2261
2262 CodeGenFunction::SanitizerScope SanScope(&CGF);
2263
2264 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2265 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2266 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2267 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2268 Value *NotOverflow = CGF.Builder.CreateNot(
2269 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2270
2271 // TODO: support -ftrapv-handler.
2272 if (SanitizeOverflow) {
2273 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2274 SanitizerHandler::NegateOverflow,
2275 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2277 {ArgValue});
2278 } else
2279 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2280
2281 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2282 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2283}
2284
2285/// Get the argument type for arguments to os_log_helper.
2287 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2288 return C.getCanonicalType(UnsignedTy);
2289}
2290
2293 CharUnits BufferAlignment) {
2294 ASTContext &Ctx = getContext();
2295
2297 {
2298 raw_svector_ostream OS(Name);
2299 OS << "__os_log_helper";
2300 OS << "_" << BufferAlignment.getQuantity();
2301 OS << "_" << int(Layout.getSummaryByte());
2302 OS << "_" << int(Layout.getNumArgsByte());
2303 for (const auto &Item : Layout.Items)
2304 OS << "_" << int(Item.getSizeByte()) << "_"
2305 << int(Item.getDescriptorByte());
2306 }
2307
2308 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2309 return F;
2310
2312 FunctionArgList Args;
2313 Args.push_back(ImplicitParamDecl::Create(
2314 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2316 ArgTys.emplace_back(Ctx.VoidPtrTy);
2317
2318 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2319 char Size = Layout.Items[I].getSizeByte();
2320 if (!Size)
2321 continue;
2322
2323 QualType ArgTy = getOSLogArgType(Ctx, Size);
2324 Args.push_back(ImplicitParamDecl::Create(
2325 Ctx, nullptr, SourceLocation(),
2326 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2328 ArgTys.emplace_back(ArgTy);
2329 }
2330
2331 QualType ReturnTy = Ctx.VoidTy;
2332
2333 // The helper function has linkonce_odr linkage to enable the linker to merge
2334 // identical functions. To ensure the merging always happens, 'noinline' is
2335 // attached to the function when compiling with -Oz.
2336 const CGFunctionInfo &FI =
2338 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2339 llvm::Function *Fn = llvm::Function::Create(
2340 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2341 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2342 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2344 Fn->setDoesNotThrow();
2345
2346 // Attach 'noinline' at -Oz.
2347 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2348 Fn->addFnAttr(llvm::Attribute::NoInline);
2349
2350 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2351 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2352
2353 // Create a scope with an artificial location for the body of this function.
2354 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2355
2356 CharUnits Offset;
2358 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2359 BufferAlignment);
2360 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2361 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2362 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2363 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2364
2365 unsigned I = 1;
2366 for (const auto &Item : Layout.Items) {
2368 Builder.getInt8(Item.getDescriptorByte()),
2369 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2371 Builder.getInt8(Item.getSizeByte()),
2372 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2373
2374 CharUnits Size = Item.size();
2375 if (!Size.getQuantity())
2376 continue;
2377
2378 Address Arg = GetAddrOfLocalVar(Args[I]);
2379 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2380 Addr = Addr.withElementType(Arg.getElementType());
2382 Offset += Size;
2383 ++I;
2384 }
2385
2387
2388 return Fn;
2389}
2390
2392 assert(E.getNumArgs() >= 2 &&
2393 "__builtin_os_log_format takes at least 2 arguments");
2394 ASTContext &Ctx = getContext();
2397 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2398 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2399
2400 // Ignore argument 1, the format string. It is not currently used.
2401 CallArgList Args;
2402 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2403
2404 for (const auto &Item : Layout.Items) {
2405 int Size = Item.getSizeByte();
2406 if (!Size)
2407 continue;
2408
2409 llvm::Value *ArgVal;
2410
2411 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2412 uint64_t Val = 0;
2413 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2414 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2415 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2416 } else if (const Expr *TheExpr = Item.getExpr()) {
2417 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2418
2419 // If a temporary object that requires destruction after the full
2420 // expression is passed, push a lifetime-extended cleanup to extend its
2421 // lifetime to the end of the enclosing block scope.
2422 auto LifetimeExtendObject = [&](const Expr *E) {
2423 E = E->IgnoreParenCasts();
2424 // Extend lifetimes of objects returned by function calls and message
2425 // sends.
2426
2427 // FIXME: We should do this in other cases in which temporaries are
2428 // created including arguments of non-ARC types (e.g., C++
2429 // temporaries).
2430 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2431 return true;
2432 return false;
2433 };
2434
2435 if (TheExpr->getType()->isObjCRetainableType() &&
2436 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2437 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2438 "Only scalar can be a ObjC retainable type");
2439 if (!isa<Constant>(ArgVal)) {
2440 CleanupKind Cleanup = getARCCleanupKind();
2441 QualType Ty = TheExpr->getType();
2443 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2444 ArgVal = EmitARCRetain(Ty, ArgVal);
2445 Builder.CreateStore(ArgVal, Addr);
2446 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2448 Cleanup & EHCleanup);
2449
2450 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2451 // argument has to be alive.
2452 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2453 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2454 }
2455 }
2456 } else {
2457 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2458 }
2459
2460 unsigned ArgValSize =
2461 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2462 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2463 ArgValSize);
2464 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2465 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2466 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2467 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2468 Args.add(RValue::get(ArgVal), ArgTy);
2469 }
2470
2471 const CGFunctionInfo &FI =
2474 Layout, BufAddr.getAlignment());
2476 return RValue::get(BufAddr, *this);
2477}
2478
2480 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2481 WidthAndSignedness ResultInfo) {
2482 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2483 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2484 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2485}
2486
2488 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2489 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2490 const clang::Expr *ResultArg, QualType ResultQTy,
2491 WidthAndSignedness ResultInfo) {
2493 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2494 "Cannot specialize this multiply");
2495
2496 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2497 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2498
2499 llvm::Value *HasOverflow;
2500 llvm::Value *Result = EmitOverflowIntrinsic(
2501 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2502
2503 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2504 // however, since the original builtin had a signed result, we need to report
2505 // an overflow when the result is greater than INT_MAX.
2506 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2507 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2508
2509 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2510 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2511
2512 bool isVolatile =
2513 ResultArg->getType()->getPointeeType().isVolatileQualified();
2514 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2515 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2516 isVolatile);
2517 return RValue::get(HasOverflow);
2518}
2519
2520/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2521static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2522 WidthAndSignedness Op1Info,
2523 WidthAndSignedness Op2Info,
2524 WidthAndSignedness ResultInfo) {
2525 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2526 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2527 Op1Info.Signed != Op2Info.Signed;
2528}
2529
2530/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2531/// the generic checked-binop irgen.
2532static RValue
2534 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2535 WidthAndSignedness Op2Info,
2536 const clang::Expr *ResultArg, QualType ResultQTy,
2537 WidthAndSignedness ResultInfo) {
2538 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2539 Op2Info, ResultInfo) &&
2540 "Not a mixed-sign multipliction we can specialize");
2541
2542 // Emit the signed and unsigned operands.
2543 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2544 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2545 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2546 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2547 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2548 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2549
2550 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2551 if (SignedOpWidth < UnsignedOpWidth)
2552 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2553 if (UnsignedOpWidth < SignedOpWidth)
2554 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2555
2556 llvm::Type *OpTy = Signed->getType();
2557 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2558 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2559 llvm::Type *ResTy = ResultPtr.getElementType();
2560 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2561
2562 // Take the absolute value of the signed operand.
2563 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2564 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2565 llvm::Value *AbsSigned =
2566 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2567
2568 // Perform a checked unsigned multiplication.
2569 llvm::Value *UnsignedOverflow;
2570 llvm::Value *UnsignedResult =
2571 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2572 Unsigned, UnsignedOverflow);
2573
2574 llvm::Value *Overflow, *Result;
2575 if (ResultInfo.Signed) {
2576 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2577 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2578 auto IntMax =
2579 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2580 llvm::Value *MaxResult =
2581 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2582 CGF.Builder.CreateZExt(IsNegative, OpTy));
2583 llvm::Value *SignedOverflow =
2584 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2585 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2586
2587 // Prepare the signed result (possibly by negating it).
2588 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2589 llvm::Value *SignedResult =
2590 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2591 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2592 } else {
2593 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2594 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2595 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2596 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2597 if (ResultInfo.Width < OpWidth) {
2598 auto IntMax =
2599 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2600 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2601 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2602 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2603 }
2604
2605 // Negate the product if it would be negative in infinite precision.
2606 Result = CGF.Builder.CreateSelect(
2607 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2608
2609 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2610 }
2611 assert(Overflow && Result && "Missing overflow or result");
2612
2613 bool isVolatile =
2614 ResultArg->getType()->getPointeeType().isVolatileQualified();
2615 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2616 isVolatile);
2617 return RValue::get(Overflow);
2618}
2619
2620static bool
2622 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2623 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2624 Ty = Ctx.getBaseElementType(Arr);
2625
2626 const auto *Record = Ty->getAsCXXRecordDecl();
2627 if (!Record)
2628 return false;
2629
2630 // We've already checked this type, or are in the process of checking it.
2631 if (!Seen.insert(Record).second)
2632 return false;
2633
2634 assert(Record->hasDefinition() &&
2635 "Incomplete types should already be diagnosed");
2636
2637 if (Record->isDynamicClass())
2638 return true;
2639
2640 for (FieldDecl *F : Record->fields()) {
2641 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2642 return true;
2643 }
2644 return false;
2645}
2646
2647/// Determine if the specified type requires laundering by checking if it is a
2648/// dynamic class type or contains a subobject which is a dynamic class type.
2650 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2651 return false;
2653 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2654}
2655
2656RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2657 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2658 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2659
2660 // The builtin's shift arg may have a different type than the source arg and
2661 // result, but the LLVM intrinsic uses the same type for all values.
2662 llvm::Type *Ty = Src->getType();
2663 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2664
2665 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2666 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2667 Function *F = CGM.getIntrinsic(IID, Ty);
2668 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2669}
2670
2671// Map math builtins for long-double to f128 version.
2672static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2673 switch (BuiltinID) {
2674#define MUTATE_LDBL(func) \
2675 case Builtin::BI__builtin_##func##l: \
2676 return Builtin::BI__builtin_##func##f128;
2707 MUTATE_LDBL(nans)
2708 MUTATE_LDBL(inf)
2727 MUTATE_LDBL(huge_val)
2737#undef MUTATE_LDBL
2738 default:
2739 return BuiltinID;
2740 }
2741}
2742
2743static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2744 Value *V) {
2745 if (CGF.Builder.getIsFPConstrained() &&
2746 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2747 if (Value *Result =
2748 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2749 return Result;
2750 }
2751 return nullptr;
2752}
2753
2755 const FunctionDecl *FD) {
2756 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2757 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2758 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2759
2761 for (auto &&FormalTy : FnTy->params())
2762 Args.push_back(llvm::PoisonValue::get(FormalTy));
2763
2764 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2765}
2766
2767RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2768 const CallExpr *E,
2769 ReturnValueSlot ReturnValue) {
2770 assert(!getContext().BuiltinInfo.isImmediate(BuiltinID) &&
2771 "Should not codegen for consteval builtins");
2772
2773 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2774 // See if we can constant fold this builtin. If so, don't emit it at all.
2775 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2778 !Result.hasSideEffects()) {
2779 if (Result.Val.isInt())
2780 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2781 Result.Val.getInt()));
2782 if (Result.Val.isFloat())
2783 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2784 Result.Val.getFloat()));
2785 }
2786
2787 // If current long-double semantics is IEEE 128-bit, replace math builtins
2788 // of long-double with f128 equivalent.
2789 // TODO: This mutation should also be applied to other targets other than PPC,
2790 // after backend supports IEEE 128-bit style libcalls.
2791 if (getTarget().getTriple().isPPC64() &&
2792 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2793 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2794
2795 // If the builtin has been declared explicitly with an assembler label,
2796 // disable the specialized emitting below. Ideally we should communicate the
2797 // rename in IR, or at least avoid generating the intrinsic calls that are
2798 // likely to get lowered to the renamed library functions.
2799 const unsigned BuiltinIDIfNoAsmLabel =
2800 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2801
2802 std::optional<bool> ErrnoOverriden;
2803 // ErrnoOverriden is true if math-errno is overriden via the
2804 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2805 // which implies math-errno.
2806 if (E->hasStoredFPFeatures()) {
2807 FPOptionsOverride OP = E->getFPFeatures();
2808 if (OP.hasMathErrnoOverride())
2809 ErrnoOverriden = OP.getMathErrnoOverride();
2810 }
2811 // True if 'attribute__((optnone))' is used. This attribute overrides
2812 // fast-math which implies math-errno.
2813 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2814
2815 // True if we are compiling at -O2 and errno has been disabled
2816 // using the '#pragma float_control(precise, off)', and
2817 // attribute opt-none hasn't been seen.
2818 bool ErrnoOverridenToFalseWithOpt =
2819 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2820 CGM.getCodeGenOpts().OptimizationLevel != 0;
2821
2822 // There are LLVM math intrinsics/instructions corresponding to math library
2823 // functions except the LLVM op will never set errno while the math library
2824 // might. Also, math builtins have the same semantics as their math library
2825 // twins. Thus, we can transform math library and builtin calls to their
2826 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2827 // In case FP exceptions are enabled, the experimental versions of the
2828 // intrinsics model those.
2829 bool ConstAlways =
2830 getContext().BuiltinInfo.isConst(BuiltinID);
2831
2832 // There's a special case with the fma builtins where they are always const
2833 // if the target environment is GNU or the target is OS is Windows and we're
2834 // targeting the MSVCRT.dll environment.
2835 // FIXME: This list can be become outdated. Need to find a way to get it some
2836 // other way.
2837 switch (BuiltinID) {
2838 case Builtin::BI__builtin_fma:
2839 case Builtin::BI__builtin_fmaf:
2840 case Builtin::BI__builtin_fmal:
2841 case Builtin::BI__builtin_fmaf16:
2842 case Builtin::BIfma:
2843 case Builtin::BIfmaf:
2844 case Builtin::BIfmal: {
2845 auto &Trip = CGM.getTriple();
2846 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2847 ConstAlways = true;
2848 break;
2849 }
2850 default:
2851 break;
2852 }
2853
2854 bool ConstWithoutErrnoAndExceptions =
2856 bool ConstWithoutExceptions =
2858
2859 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2860 // disabled.
2861 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2862 // or attributes that affect math-errno should prevent or allow math
2863 // intrincs to be generated. Intrinsics are generated:
2864 // 1- In fast math mode, unless math-errno is overriden
2865 // via '#pragma float_control(precise, on)', or via an
2866 // 'attribute__((optnone))'.
2867 // 2- If math-errno was enabled on command line but overriden
2868 // to false via '#pragma float_control(precise, off))' and
2869 // 'attribute__((optnone))' hasn't been used.
2870 // 3- If we are compiling with optimization and errno has been disabled
2871 // via '#pragma float_control(precise, off)', and
2872 // 'attribute__((optnone))' hasn't been used.
2873
2874 bool ConstWithoutErrnoOrExceptions =
2875 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2876 bool GenerateIntrinsics =
2877 (ConstAlways && !OptNone) ||
2878 (!getLangOpts().MathErrno &&
2879 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2880 if (!GenerateIntrinsics) {
2881 GenerateIntrinsics =
2882 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2883 if (!GenerateIntrinsics)
2884 GenerateIntrinsics =
2885 ConstWithoutErrnoOrExceptions &&
2886 (!getLangOpts().MathErrno &&
2887 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2888 if (!GenerateIntrinsics)
2889 GenerateIntrinsics =
2890 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2891 }
2892 if (GenerateIntrinsics) {
2893 switch (BuiltinIDIfNoAsmLabel) {
2894 case Builtin::BIacos:
2895 case Builtin::BIacosf:
2896 case Builtin::BIacosl:
2897 case Builtin::BI__builtin_acos:
2898 case Builtin::BI__builtin_acosf:
2899 case Builtin::BI__builtin_acosf16:
2900 case Builtin::BI__builtin_acosl:
2901 case Builtin::BI__builtin_acosf128:
2903 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2904
2905 case Builtin::BIasin:
2906 case Builtin::BIasinf:
2907 case Builtin::BIasinl:
2908 case Builtin::BI__builtin_asin:
2909 case Builtin::BI__builtin_asinf:
2910 case Builtin::BI__builtin_asinf16:
2911 case Builtin::BI__builtin_asinl:
2912 case Builtin::BI__builtin_asinf128:
2914 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2915
2916 case Builtin::BIatan:
2917 case Builtin::BIatanf:
2918 case Builtin::BIatanl:
2919 case Builtin::BI__builtin_atan:
2920 case Builtin::BI__builtin_atanf:
2921 case Builtin::BI__builtin_atanf16:
2922 case Builtin::BI__builtin_atanl:
2923 case Builtin::BI__builtin_atanf128:
2925 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2926
2927 case Builtin::BIatan2:
2928 case Builtin::BIatan2f:
2929 case Builtin::BIatan2l:
2930 case Builtin::BI__builtin_atan2:
2931 case Builtin::BI__builtin_atan2f:
2932 case Builtin::BI__builtin_atan2f16:
2933 case Builtin::BI__builtin_atan2l:
2934 case Builtin::BI__builtin_atan2f128:
2936 *this, E, Intrinsic::atan2,
2937 Intrinsic::experimental_constrained_atan2));
2938
2939 case Builtin::BIceil:
2940 case Builtin::BIceilf:
2941 case Builtin::BIceill:
2942 case Builtin::BI__builtin_ceil:
2943 case Builtin::BI__builtin_ceilf:
2944 case Builtin::BI__builtin_ceilf16:
2945 case Builtin::BI__builtin_ceill:
2946 case Builtin::BI__builtin_ceilf128:
2948 Intrinsic::ceil,
2949 Intrinsic::experimental_constrained_ceil));
2950
2951 case Builtin::BIcopysign:
2952 case Builtin::BIcopysignf:
2953 case Builtin::BIcopysignl:
2954 case Builtin::BI__builtin_copysign:
2955 case Builtin::BI__builtin_copysignf:
2956 case Builtin::BI__builtin_copysignf16:
2957 case Builtin::BI__builtin_copysignl:
2958 case Builtin::BI__builtin_copysignf128:
2959 return RValue::get(
2960 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2961
2962 case Builtin::BIcos:
2963 case Builtin::BIcosf:
2964 case Builtin::BIcosl:
2965 case Builtin::BI__builtin_cos:
2966 case Builtin::BI__builtin_cosf:
2967 case Builtin::BI__builtin_cosf16:
2968 case Builtin::BI__builtin_cosl:
2969 case Builtin::BI__builtin_cosf128:
2971 Intrinsic::cos,
2972 Intrinsic::experimental_constrained_cos));
2973
2974 case Builtin::BIcosh:
2975 case Builtin::BIcoshf:
2976 case Builtin::BIcoshl:
2977 case Builtin::BI__builtin_cosh:
2978 case Builtin::BI__builtin_coshf:
2979 case Builtin::BI__builtin_coshf16:
2980 case Builtin::BI__builtin_coshl:
2981 case Builtin::BI__builtin_coshf128:
2983 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
2984
2985 case Builtin::BIexp:
2986 case Builtin::BIexpf:
2987 case Builtin::BIexpl:
2988 case Builtin::BI__builtin_exp:
2989 case Builtin::BI__builtin_expf:
2990 case Builtin::BI__builtin_expf16:
2991 case Builtin::BI__builtin_expl:
2992 case Builtin::BI__builtin_expf128:
2994 Intrinsic::exp,
2995 Intrinsic::experimental_constrained_exp));
2996
2997 case Builtin::BIexp2:
2998 case Builtin::BIexp2f:
2999 case Builtin::BIexp2l:
3000 case Builtin::BI__builtin_exp2:
3001 case Builtin::BI__builtin_exp2f:
3002 case Builtin::BI__builtin_exp2f16:
3003 case Builtin::BI__builtin_exp2l:
3004 case Builtin::BI__builtin_exp2f128:
3006 Intrinsic::exp2,
3007 Intrinsic::experimental_constrained_exp2));
3008 case Builtin::BI__builtin_exp10:
3009 case Builtin::BI__builtin_exp10f:
3010 case Builtin::BI__builtin_exp10f16:
3011 case Builtin::BI__builtin_exp10l:
3012 case Builtin::BI__builtin_exp10f128: {
3013 // TODO: strictfp support
3014 if (Builder.getIsFPConstrained())
3015 break;
3016 return RValue::get(
3017 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
3018 }
3019 case Builtin::BIfabs:
3020 case Builtin::BIfabsf:
3021 case Builtin::BIfabsl:
3022 case Builtin::BI__builtin_fabs:
3023 case Builtin::BI__builtin_fabsf:
3024 case Builtin::BI__builtin_fabsf16:
3025 case Builtin::BI__builtin_fabsl:
3026 case Builtin::BI__builtin_fabsf128:
3027 return RValue::get(
3028 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
3029
3030 case Builtin::BIfloor:
3031 case Builtin::BIfloorf:
3032 case Builtin::BIfloorl:
3033 case Builtin::BI__builtin_floor:
3034 case Builtin::BI__builtin_floorf:
3035 case Builtin::BI__builtin_floorf16:
3036 case Builtin::BI__builtin_floorl:
3037 case Builtin::BI__builtin_floorf128:
3039 Intrinsic::floor,
3040 Intrinsic::experimental_constrained_floor));
3041
3042 case Builtin::BIfma:
3043 case Builtin::BIfmaf:
3044 case Builtin::BIfmal:
3045 case Builtin::BI__builtin_fma:
3046 case Builtin::BI__builtin_fmaf:
3047 case Builtin::BI__builtin_fmaf16:
3048 case Builtin::BI__builtin_fmal:
3049 case Builtin::BI__builtin_fmaf128:
3051 Intrinsic::fma,
3052 Intrinsic::experimental_constrained_fma));
3053
3054 case Builtin::BIfmax:
3055 case Builtin::BIfmaxf:
3056 case Builtin::BIfmaxl:
3057 case Builtin::BI__builtin_fmax:
3058 case Builtin::BI__builtin_fmaxf:
3059 case Builtin::BI__builtin_fmaxf16:
3060 case Builtin::BI__builtin_fmaxl:
3061 case Builtin::BI__builtin_fmaxf128:
3063 Intrinsic::maxnum,
3064 Intrinsic::experimental_constrained_maxnum));
3065
3066 case Builtin::BIfmin:
3067 case Builtin::BIfminf:
3068 case Builtin::BIfminl:
3069 case Builtin::BI__builtin_fmin:
3070 case Builtin::BI__builtin_fminf:
3071 case Builtin::BI__builtin_fminf16:
3072 case Builtin::BI__builtin_fminl:
3073 case Builtin::BI__builtin_fminf128:
3075 Intrinsic::minnum,
3076 Intrinsic::experimental_constrained_minnum));
3077
3078 case Builtin::BIfmaximum_num:
3079 case Builtin::BIfmaximum_numf:
3080 case Builtin::BIfmaximum_numl:
3081 case Builtin::BI__builtin_fmaximum_num:
3082 case Builtin::BI__builtin_fmaximum_numf:
3083 case Builtin::BI__builtin_fmaximum_numf16:
3084 case Builtin::BI__builtin_fmaximum_numl:
3085 case Builtin::BI__builtin_fmaximum_numf128:
3086 return RValue::get(
3087 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::maximumnum));
3088
3089 case Builtin::BIfminimum_num:
3090 case Builtin::BIfminimum_numf:
3091 case Builtin::BIfminimum_numl:
3092 case Builtin::BI__builtin_fminimum_num:
3093 case Builtin::BI__builtin_fminimum_numf:
3094 case Builtin::BI__builtin_fminimum_numf16:
3095 case Builtin::BI__builtin_fminimum_numl:
3096 case Builtin::BI__builtin_fminimum_numf128:
3097 return RValue::get(
3098 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::minimumnum));
3099
3100 // fmod() is a special-case. It maps to the frem instruction rather than an
3101 // LLVM intrinsic.
3102 case Builtin::BIfmod:
3103 case Builtin::BIfmodf:
3104 case Builtin::BIfmodl:
3105 case Builtin::BI__builtin_fmod:
3106 case Builtin::BI__builtin_fmodf:
3107 case Builtin::BI__builtin_fmodf16:
3108 case Builtin::BI__builtin_fmodl:
3109 case Builtin::BI__builtin_fmodf128:
3110 case Builtin::BI__builtin_elementwise_fmod: {
3111 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3112 Value *Arg1 = EmitScalarExpr(E->getArg(0));
3113 Value *Arg2 = EmitScalarExpr(E->getArg(1));
3114 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
3115 }
3116
3117 case Builtin::BIlog:
3118 case Builtin::BIlogf:
3119 case Builtin::BIlogl:
3120 case Builtin::BI__builtin_log:
3121 case Builtin::BI__builtin_logf:
3122 case Builtin::BI__builtin_logf16:
3123 case Builtin::BI__builtin_logl:
3124 case Builtin::BI__builtin_logf128:
3126 Intrinsic::log,
3127 Intrinsic::experimental_constrained_log));
3128
3129 case Builtin::BIlog10:
3130 case Builtin::BIlog10f:
3131 case Builtin::BIlog10l:
3132 case Builtin::BI__builtin_log10:
3133 case Builtin::BI__builtin_log10f:
3134 case Builtin::BI__builtin_log10f16:
3135 case Builtin::BI__builtin_log10l:
3136 case Builtin::BI__builtin_log10f128:
3138 Intrinsic::log10,
3139 Intrinsic::experimental_constrained_log10));
3140
3141 case Builtin::BIlog2:
3142 case Builtin::BIlog2f:
3143 case Builtin::BIlog2l:
3144 case Builtin::BI__builtin_log2:
3145 case Builtin::BI__builtin_log2f:
3146 case Builtin::BI__builtin_log2f16:
3147 case Builtin::BI__builtin_log2l:
3148 case Builtin::BI__builtin_log2f128:
3150 Intrinsic::log2,
3151 Intrinsic::experimental_constrained_log2));
3152
3153 case Builtin::BInearbyint:
3154 case Builtin::BInearbyintf:
3155 case Builtin::BInearbyintl:
3156 case Builtin::BI__builtin_nearbyint:
3157 case Builtin::BI__builtin_nearbyintf:
3158 case Builtin::BI__builtin_nearbyintl:
3159 case Builtin::BI__builtin_nearbyintf128:
3161 Intrinsic::nearbyint,
3162 Intrinsic::experimental_constrained_nearbyint));
3163
3164 case Builtin::BIpow:
3165 case Builtin::BIpowf:
3166 case Builtin::BIpowl:
3167 case Builtin::BI__builtin_pow:
3168 case Builtin::BI__builtin_powf:
3169 case Builtin::BI__builtin_powf16:
3170 case Builtin::BI__builtin_powl:
3171 case Builtin::BI__builtin_powf128:
3173 Intrinsic::pow,
3174 Intrinsic::experimental_constrained_pow));
3175
3176 case Builtin::BIrint:
3177 case Builtin::BIrintf:
3178 case Builtin::BIrintl:
3179 case Builtin::BI__builtin_rint:
3180 case Builtin::BI__builtin_rintf:
3181 case Builtin::BI__builtin_rintf16:
3182 case Builtin::BI__builtin_rintl:
3183 case Builtin::BI__builtin_rintf128:
3185 Intrinsic::rint,
3186 Intrinsic::experimental_constrained_rint));
3187
3188 case Builtin::BIround:
3189 case Builtin::BIroundf:
3190 case Builtin::BIroundl:
3191 case Builtin::BI__builtin_round:
3192 case Builtin::BI__builtin_roundf:
3193 case Builtin::BI__builtin_roundf16:
3194 case Builtin::BI__builtin_roundl:
3195 case Builtin::BI__builtin_roundf128:
3197 Intrinsic::round,
3198 Intrinsic::experimental_constrained_round));
3199
3200 case Builtin::BIroundeven:
3201 case Builtin::BIroundevenf:
3202 case Builtin::BIroundevenl:
3203 case Builtin::BI__builtin_roundeven:
3204 case Builtin::BI__builtin_roundevenf:
3205 case Builtin::BI__builtin_roundevenf16:
3206 case Builtin::BI__builtin_roundevenl:
3207 case Builtin::BI__builtin_roundevenf128:
3209 Intrinsic::roundeven,
3210 Intrinsic::experimental_constrained_roundeven));
3211
3212 case Builtin::BIsin:
3213 case Builtin::BIsinf:
3214 case Builtin::BIsinl:
3215 case Builtin::BI__builtin_sin:
3216 case Builtin::BI__builtin_sinf:
3217 case Builtin::BI__builtin_sinf16:
3218 case Builtin::BI__builtin_sinl:
3219 case Builtin::BI__builtin_sinf128:
3221 Intrinsic::sin,
3222 Intrinsic::experimental_constrained_sin));
3223
3224 case Builtin::BIsinh:
3225 case Builtin::BIsinhf:
3226 case Builtin::BIsinhl:
3227 case Builtin::BI__builtin_sinh:
3228 case Builtin::BI__builtin_sinhf:
3229 case Builtin::BI__builtin_sinhf16:
3230 case Builtin::BI__builtin_sinhl:
3231 case Builtin::BI__builtin_sinhf128:
3233 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
3234
3235 case Builtin::BIsqrt:
3236 case Builtin::BIsqrtf:
3237 case Builtin::BIsqrtl:
3238 case Builtin::BI__builtin_sqrt:
3239 case Builtin::BI__builtin_sqrtf:
3240 case Builtin::BI__builtin_sqrtf16:
3241 case Builtin::BI__builtin_sqrtl:
3242 case Builtin::BI__builtin_sqrtf128:
3243 case Builtin::BI__builtin_elementwise_sqrt: {
3245 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
3247 return RValue::get(Call);
3248 }
3249
3250 case Builtin::BItan:
3251 case Builtin::BItanf:
3252 case Builtin::BItanl:
3253 case Builtin::BI__builtin_tan:
3254 case Builtin::BI__builtin_tanf:
3255 case Builtin::BI__builtin_tanf16:
3256 case Builtin::BI__builtin_tanl:
3257 case Builtin::BI__builtin_tanf128:
3259 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
3260
3261 case Builtin::BItanh:
3262 case Builtin::BItanhf:
3263 case Builtin::BItanhl:
3264 case Builtin::BI__builtin_tanh:
3265 case Builtin::BI__builtin_tanhf:
3266 case Builtin::BI__builtin_tanhf16:
3267 case Builtin::BI__builtin_tanhl:
3268 case Builtin::BI__builtin_tanhf128:
3270 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3271
3272 case Builtin::BItrunc:
3273 case Builtin::BItruncf:
3274 case Builtin::BItruncl:
3275 case Builtin::BI__builtin_trunc:
3276 case Builtin::BI__builtin_truncf:
3277 case Builtin::BI__builtin_truncf16:
3278 case Builtin::BI__builtin_truncl:
3279 case Builtin::BI__builtin_truncf128:
3281 Intrinsic::trunc,
3282 Intrinsic::experimental_constrained_trunc));
3283
3284 case Builtin::BIlround:
3285 case Builtin::BIlroundf:
3286 case Builtin::BIlroundl:
3287 case Builtin::BI__builtin_lround:
3288 case Builtin::BI__builtin_lroundf:
3289 case Builtin::BI__builtin_lroundl:
3290 case Builtin::BI__builtin_lroundf128:
3292 *this, E, Intrinsic::lround,
3293 Intrinsic::experimental_constrained_lround));
3294
3295 case Builtin::BIllround:
3296 case Builtin::BIllroundf:
3297 case Builtin::BIllroundl:
3298 case Builtin::BI__builtin_llround:
3299 case Builtin::BI__builtin_llroundf:
3300 case Builtin::BI__builtin_llroundl:
3301 case Builtin::BI__builtin_llroundf128:
3303 *this, E, Intrinsic::llround,
3304 Intrinsic::experimental_constrained_llround));
3305
3306 case Builtin::BIlrint:
3307 case Builtin::BIlrintf:
3308 case Builtin::BIlrintl:
3309 case Builtin::BI__builtin_lrint:
3310 case Builtin::BI__builtin_lrintf:
3311 case Builtin::BI__builtin_lrintl:
3312 case Builtin::BI__builtin_lrintf128:
3314 *this, E, Intrinsic::lrint,
3315 Intrinsic::experimental_constrained_lrint));
3316
3317 case Builtin::BIllrint:
3318 case Builtin::BIllrintf:
3319 case Builtin::BIllrintl:
3320 case Builtin::BI__builtin_llrint:
3321 case Builtin::BI__builtin_llrintf:
3322 case Builtin::BI__builtin_llrintl:
3323 case Builtin::BI__builtin_llrintf128:
3325 *this, E, Intrinsic::llrint,
3326 Intrinsic::experimental_constrained_llrint));
3327 case Builtin::BI__builtin_ldexp:
3328 case Builtin::BI__builtin_ldexpf:
3329 case Builtin::BI__builtin_ldexpl:
3330 case Builtin::BI__builtin_ldexpf16:
3331 case Builtin::BI__builtin_ldexpf128: {
3333 *this, E, Intrinsic::ldexp,
3334 Intrinsic::experimental_constrained_ldexp));
3335 }
3336 default:
3337 break;
3338 }
3339 }
3340
3341 // Check NonnullAttribute/NullabilityArg and Alignment.
3342 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3343 unsigned ParmNum) {
3344 Value *Val = A.emitRawPointer(*this);
3345 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3346 ParmNum);
3347
3348 if (SanOpts.has(SanitizerKind::Alignment)) {
3349 SanitizerSet SkippedChecks;
3350 SkippedChecks.set(SanitizerKind::All);
3351 SkippedChecks.clear(SanitizerKind::Alignment);
3352 SourceLocation Loc = Arg->getExprLoc();
3353 // Strip an implicit cast.
3354 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3355 if (CE->getCastKind() == CK_BitCast)
3356 Arg = CE->getSubExpr();
3357 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3358 SkippedChecks);
3359 }
3360 };
3361
3362 switch (BuiltinIDIfNoAsmLabel) {
3363 default: break;
3364 case Builtin::BI__builtin___CFStringMakeConstantString:
3365 case Builtin::BI__builtin___NSStringMakeConstantString:
3366 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3367 case Builtin::BI__builtin_stdarg_start:
3368 case Builtin::BI__builtin_va_start:
3369 case Builtin::BI__va_start:
3370 case Builtin::BI__builtin_va_end:
3371 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3372 ? EmitScalarExpr(E->getArg(0))
3373 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3374 BuiltinID != Builtin::BI__builtin_va_end);
3375 return RValue::get(nullptr);
3376 case Builtin::BI__builtin_va_copy: {
3377 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3378 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3379 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3380 {DstPtr, SrcPtr});
3381 return RValue::get(nullptr);
3382 }
3383 case Builtin::BIabs:
3384 case Builtin::BIlabs:
3385 case Builtin::BIllabs:
3386 case Builtin::BI__builtin_abs:
3387 case Builtin::BI__builtin_labs:
3388 case Builtin::BI__builtin_llabs: {
3389 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3390
3391 Value *Result;
3392 switch (getLangOpts().getSignedOverflowBehavior()) {
3394 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3395 break;
3397 if (!SanitizeOverflow) {
3398 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3399 break;
3400 }
3401 [[fallthrough]];
3403 // TODO: Somehow handle the corner case when the address of abs is taken.
3404 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3405 break;
3406 }
3407 return RValue::get(Result);
3408 }
3409 case Builtin::BI__builtin_complex: {
3410 Value *Real = EmitScalarExpr(E->getArg(0));
3411 Value *Imag = EmitScalarExpr(E->getArg(1));
3412 return RValue::getComplex({Real, Imag});
3413 }
3414 case Builtin::BI__builtin_conj:
3415 case Builtin::BI__builtin_conjf:
3416 case Builtin::BI__builtin_conjl:
3417 case Builtin::BIconj:
3418 case Builtin::BIconjf:
3419 case Builtin::BIconjl: {
3420 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3421 Value *Real = ComplexVal.first;
3422 Value *Imag = ComplexVal.second;
3423 Imag = Builder.CreateFNeg(Imag, "neg");
3424 return RValue::getComplex(std::make_pair(Real, Imag));
3425 }
3426 case Builtin::BI__builtin_creal:
3427 case Builtin::BI__builtin_crealf:
3428 case Builtin::BI__builtin_creall:
3429 case Builtin::BIcreal:
3430 case Builtin::BIcrealf:
3431 case Builtin::BIcreall: {
3432 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3433 return RValue::get(ComplexVal.first);
3434 }
3435
3436 case Builtin::BI__builtin_preserve_access_index: {
3437 // Only enabled preserved access index region when debuginfo
3438 // is available as debuginfo is needed to preserve user-level
3439 // access pattern.
3440 if (!getDebugInfo()) {
3441 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3442 return RValue::get(EmitScalarExpr(E->getArg(0)));
3443 }
3444
3445 // Nested builtin_preserve_access_index() not supported
3447 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3448 return RValue::get(EmitScalarExpr(E->getArg(0)));
3449 }
3450
3451 IsInPreservedAIRegion = true;
3452 Value *Res = EmitScalarExpr(E->getArg(0));
3453 IsInPreservedAIRegion = false;
3454 return RValue::get(Res);
3455 }
3456
3457 case Builtin::BI__builtin_cimag:
3458 case Builtin::BI__builtin_cimagf:
3459 case Builtin::BI__builtin_cimagl:
3460 case Builtin::BIcimag:
3461 case Builtin::BIcimagf:
3462 case Builtin::BIcimagl: {
3463 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3464 return RValue::get(ComplexVal.second);
3465 }
3466
3467 case Builtin::BI__builtin_clrsb:
3468 case Builtin::BI__builtin_clrsbl:
3469 case Builtin::BI__builtin_clrsbll: {
3470 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3471 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3472
3473 llvm::Type *ArgType = ArgValue->getType();
3474 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3475
3476 llvm::Type *ResultType = ConvertType(E->getType());
3477 Value *Zero = llvm::Constant::getNullValue(ArgType);
3478 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3479 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3480 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3481 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3482 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3483 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3484 "cast");
3485 return RValue::get(Result);
3486 }
3487 case Builtin::BI__builtin_ctzs:
3488 case Builtin::BI__builtin_ctz:
3489 case Builtin::BI__builtin_ctzl:
3490 case Builtin::BI__builtin_ctzll:
3491 case Builtin::BI__builtin_ctzg: {
3492 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3493 E->getNumArgs() > 1;
3494
3495 Value *ArgValue =
3496 HasFallback ? EmitScalarExpr(E->getArg(0))
3498
3499 llvm::Type *ArgType = ArgValue->getType();
3500 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3501
3502 llvm::Type *ResultType = ConvertType(E->getType());
3503 Value *ZeroUndef =
3504 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3505 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3506 if (Result->getType() != ResultType)
3507 Result =
3508 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3509 if (!HasFallback)
3510 return RValue::get(Result);
3511
3512 Value *Zero = Constant::getNullValue(ArgType);
3513 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3514 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3515 Value *ResultOrFallback =
3516 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3517 return RValue::get(ResultOrFallback);
3518 }
3519 case Builtin::BI__builtin_clzs:
3520 case Builtin::BI__builtin_clz:
3521 case Builtin::BI__builtin_clzl:
3522 case Builtin::BI__builtin_clzll:
3523 case Builtin::BI__builtin_clzg: {
3524 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3525 E->getNumArgs() > 1;
3526
3527 Value *ArgValue =
3528 HasFallback ? EmitScalarExpr(E->getArg(0))
3530
3531 llvm::Type *ArgType = ArgValue->getType();
3532 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3533
3534 llvm::Type *ResultType = ConvertType(E->getType());
3535 Value *ZeroUndef =
3536 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3537 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3538 if (Result->getType() != ResultType)
3539 Result =
3540 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3541 if (!HasFallback)
3542 return RValue::get(Result);
3543
3544 Value *Zero = Constant::getNullValue(ArgType);
3545 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3546 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3547 Value *ResultOrFallback =
3548 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3549 return RValue::get(ResultOrFallback);
3550 }
3551 case Builtin::BI__builtin_ffs:
3552 case Builtin::BI__builtin_ffsl:
3553 case Builtin::BI__builtin_ffsll: {
3554 // ffs(x) -> x ? cttz(x) + 1 : 0
3555 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3556
3557 llvm::Type *ArgType = ArgValue->getType();
3558 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3559
3560 llvm::Type *ResultType = ConvertType(E->getType());
3561 Value *Tmp =
3562 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3563 llvm::ConstantInt::get(ArgType, 1));
3564 Value *Zero = llvm::Constant::getNullValue(ArgType);
3565 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3566 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3567 if (Result->getType() != ResultType)
3568 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3569 "cast");
3570 return RValue::get(Result);
3571 }
3572 case Builtin::BI__builtin_parity:
3573 case Builtin::BI__builtin_parityl:
3574 case Builtin::BI__builtin_parityll: {
3575 // parity(x) -> ctpop(x) & 1
3576 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3577
3578 llvm::Type *ArgType = ArgValue->getType();
3579 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3580
3581 llvm::Type *ResultType = ConvertType(E->getType());
3582 Value *Tmp = Builder.CreateCall(F, ArgValue);
3583 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3584 if (Result->getType() != ResultType)
3585 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3586 "cast");
3587 return RValue::get(Result);
3588 }
3589 case Builtin::BI__lzcnt16:
3590 case Builtin::BI__lzcnt:
3591 case Builtin::BI__lzcnt64: {
3592 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3593
3594 llvm::Type *ArgType = ArgValue->getType();
3595 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3596
3597 llvm::Type *ResultType = ConvertType(E->getType());
3598 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3599 if (Result->getType() != ResultType)
3600 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3601 "cast");
3602 return RValue::get(Result);
3603 }
3604 case Builtin::BI__popcnt16:
3605 case Builtin::BI__popcnt:
3606 case Builtin::BI__popcnt64:
3607 case Builtin::BI__builtin_popcount:
3608 case Builtin::BI__builtin_popcountl:
3609 case Builtin::BI__builtin_popcountll:
3610 case Builtin::BI__builtin_popcountg: {
3611 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3612
3613 llvm::Type *ArgType = ArgValue->getType();
3614 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3615
3616 llvm::Type *ResultType = ConvertType(E->getType());
3617 Value *Result = Builder.CreateCall(F, ArgValue);
3618 if (Result->getType() != ResultType)
3619 Result =
3620 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3621 return RValue::get(Result);
3622 }
3623 case Builtin::BI__builtin_unpredictable: {
3624 // Always return the argument of __builtin_unpredictable. LLVM does not
3625 // handle this builtin. Metadata for this builtin should be added directly
3626 // to instructions such as branches or switches that use it.
3627 return RValue::get(EmitScalarExpr(E->getArg(0)));
3628 }
3629 case Builtin::BI__builtin_expect: {
3630 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3631 llvm::Type *ArgType = ArgValue->getType();
3632
3633 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3634 // Don't generate llvm.expect on -O0 as the backend won't use it for
3635 // anything.
3636 // Note, we still IRGen ExpectedValue because it could have side-effects.
3637 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3638 return RValue::get(ArgValue);
3639
3640 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3641 Value *Result =
3642 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3643 return RValue::get(Result);
3644 }
3645 case Builtin::BI__builtin_expect_with_probability: {
3646 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3647 llvm::Type *ArgType = ArgValue->getType();
3648
3649 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3650 llvm::APFloat Probability(0.0);
3651 const Expr *ProbArg = E->getArg(2);
3652 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3653 assert(EvalSucceed && "probability should be able to evaluate as float");
3654 (void)EvalSucceed;
3655 bool LoseInfo = false;
3656 Probability.convert(llvm::APFloat::IEEEdouble(),
3657 llvm::RoundingMode::Dynamic, &LoseInfo);
3658 llvm::Type *Ty = ConvertType(ProbArg->getType());
3659 Constant *Confidence = ConstantFP::get(Ty, Probability);
3660 // Don't generate llvm.expect.with.probability on -O0 as the backend
3661 // won't use it for anything.
3662 // Note, we still IRGen ExpectedValue because it could have side-effects.
3663 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3664 return RValue::get(ArgValue);
3665
3666 Function *FnExpect =
3667 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3668 Value *Result = Builder.CreateCall(
3669 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3670 return RValue::get(Result);
3671 }
3672 case Builtin::BI__builtin_assume_aligned: {
3673 const Expr *Ptr = E->getArg(0);
3674 Value *PtrValue = EmitScalarExpr(Ptr);
3675 Value *OffsetValue =
3676 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3677
3678 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3679 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3680 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3681 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3682 llvm::Value::MaximumAlignment);
3683
3684 emitAlignmentAssumption(PtrValue, Ptr,
3685 /*The expr loc is sufficient.*/ SourceLocation(),
3686 AlignmentCI, OffsetValue);
3687 return RValue::get(PtrValue);
3688 }
3689 case Builtin::BI__assume:
3690 case Builtin::BI__builtin_assume: {
3691 if (E->getArg(0)->HasSideEffects(getContext()))
3692 return RValue::get(nullptr);
3693
3694 Value *ArgValue = EmitCheckedArgForAssume(E->getArg(0));
3695 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3696 Builder.CreateCall(FnAssume, ArgValue);
3697 return RValue::get(nullptr);
3698 }
3699 case Builtin::BI__builtin_assume_separate_storage: {
3700 const Expr *Arg0 = E->getArg(0);
3701 const Expr *Arg1 = E->getArg(1);
3702
3703 Value *Value0 = EmitScalarExpr(Arg0);
3704 Value *Value1 = EmitScalarExpr(Arg1);
3705
3706 Value *Values[] = {Value0, Value1};
3707 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3708 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3709 return RValue::get(nullptr);
3710 }
3711 case Builtin::BI__builtin_allow_runtime_check: {
3712 StringRef Kind =
3713 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3714 LLVMContext &Ctx = CGM.getLLVMContext();
3715 llvm::Value *Allow = Builder.CreateCall(
3716 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3717 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3718 return RValue::get(Allow);
3719 }
3720 case Builtin::BI__arithmetic_fence: {
3721 // Create the builtin call if FastMath is selected, and the target
3722 // supports the builtin, otherwise just return the argument.
3723 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3724 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3725 bool isArithmeticFenceEnabled =
3726 FMF.allowReassoc() &&
3728 QualType ArgType = E->getArg(0)->getType();
3729 if (ArgType->isComplexType()) {
3730 if (isArithmeticFenceEnabled) {
3731 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3732 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3733 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3734 ConvertType(ElementType));
3735 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3736 ConvertType(ElementType));
3737 return RValue::getComplex(std::make_pair(Real, Imag));
3738 }
3739 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3740 Value *Real = ComplexVal.first;
3741 Value *Imag = ComplexVal.second;
3742 return RValue::getComplex(std::make_pair(Real, Imag));
3743 }
3744 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3745 if (isArithmeticFenceEnabled)
3746 return RValue::get(
3747 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3748 return RValue::get(ArgValue);
3749 }
3750 case Builtin::BI__builtin_bswap16:
3751 case Builtin::BI__builtin_bswap32:
3752 case Builtin::BI__builtin_bswap64:
3753 case Builtin::BI_byteswap_ushort:
3754 case Builtin::BI_byteswap_ulong:
3755 case Builtin::BI_byteswap_uint64: {
3756 return RValue::get(
3757 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3758 }
3759 case Builtin::BI__builtin_bitreverse8:
3760 case Builtin::BI__builtin_bitreverse16:
3761 case Builtin::BI__builtin_bitreverse32:
3762 case Builtin::BI__builtin_bitreverse64: {
3763 return RValue::get(
3764 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3765 }
3766 case Builtin::BI__builtin_rotateleft8:
3767 case Builtin::BI__builtin_rotateleft16:
3768 case Builtin::BI__builtin_rotateleft32:
3769 case Builtin::BI__builtin_rotateleft64:
3770 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3771 case Builtin::BI_rotl16:
3772 case Builtin::BI_rotl:
3773 case Builtin::BI_lrotl:
3774 case Builtin::BI_rotl64:
3775 return emitRotate(E, false);
3776
3777 case Builtin::BI__builtin_rotateright8:
3778 case Builtin::BI__builtin_rotateright16:
3779 case Builtin::BI__builtin_rotateright32:
3780 case Builtin::BI__builtin_rotateright64:
3781 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3782 case Builtin::BI_rotr16:
3783 case Builtin::BI_rotr:
3784 case Builtin::BI_lrotr:
3785 case Builtin::BI_rotr64:
3786 return emitRotate(E, true);
3787
3788 case Builtin::BI__builtin_constant_p: {
3789 llvm::Type *ResultType = ConvertType(E->getType());
3790
3791 const Expr *Arg = E->getArg(0);
3792 QualType ArgType = Arg->getType();
3793 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3794 // and likely a mistake.
3795 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3796 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3797 // Per the GCC documentation, only numeric constants are recognized after
3798 // inlining.
3799 return RValue::get(ConstantInt::get(ResultType, 0));
3800
3801 if (Arg->HasSideEffects(getContext()))
3802 // The argument is unevaluated, so be conservative if it might have
3803 // side-effects.
3804 return RValue::get(ConstantInt::get(ResultType, 0));
3805
3806 Value *ArgValue = EmitScalarExpr(Arg);
3807 if (ArgType->isObjCObjectPointerType()) {
3808 // Convert Objective-C objects to id because we cannot distinguish between
3809 // LLVM types for Obj-C classes as they are opaque.
3810 ArgType = CGM.getContext().getObjCIdType();
3811 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3812 }
3813 Function *F =
3814 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3815 Value *Result = Builder.CreateCall(F, ArgValue);
3816 if (Result->getType() != ResultType)
3817 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3818 return RValue::get(Result);
3819 }
3820 case Builtin::BI__builtin_dynamic_object_size:
3821 case Builtin::BI__builtin_object_size: {
3822 unsigned Type =
3823 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3824 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3825
3826 // We pass this builtin onto the optimizer so that it can figure out the
3827 // object size in more complex cases.
3828 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3829 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3830 /*EmittedE=*/nullptr, IsDynamic));
3831 }
3832 case Builtin::BI__builtin_counted_by_ref: {
3833 // Default to returning '(void *) 0'.
3834 llvm::Value *Result = llvm::ConstantPointerNull::get(
3835 llvm::PointerType::getUnqual(getLLVMContext()));
3836
3837 const Expr *Arg = E->getArg(0)->IgnoreParenImpCasts();
3838
3839 if (auto *UO = dyn_cast<UnaryOperator>(Arg);
3840 UO && UO->getOpcode() == UO_AddrOf) {
3841 Arg = UO->getSubExpr()->IgnoreParenImpCasts();
3842
3843 if (auto *ASE = dyn_cast<ArraySubscriptExpr>(Arg))
3844 Arg = ASE->getBase()->IgnoreParenImpCasts();
3845 }
3846
3847 if (const MemberExpr *ME = dyn_cast_if_present<MemberExpr>(Arg)) {
3848 if (auto *CATy =
3849 ME->getMemberDecl()->getType()->getAs<CountAttributedType>();
3850 CATy && CATy->getKind() == CountAttributedType::CountedBy) {
3851 const auto *FAMDecl = cast<FieldDecl>(ME->getMemberDecl());
3852 if (const FieldDecl *CountFD = FAMDecl->findCountedByField())
3853 Result = GetCountedByFieldExprGEP(Arg, FAMDecl, CountFD);
3854 else
3855 llvm::report_fatal_error("Cannot find the counted_by 'count' field");
3856 }
3857 }
3858
3859 return RValue::get(Result);
3860 }
3861 case Builtin::BI__builtin_prefetch: {
3862 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3863 // FIXME: Technically these constants should of type 'int', yes?
3864 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3865 llvm::ConstantInt::get(Int32Ty, 0);
3866 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3867 llvm::ConstantInt::get(Int32Ty, 3);
3868 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3869 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3870 Builder.CreateCall(F, {Address, RW, Locality, Data});
3871 return RValue::get(nullptr);
3872 }
3873 case Builtin::BI__builtin_readcyclecounter: {
3874 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3875 return RValue::get(Builder.CreateCall(F));
3876 }
3877 case Builtin::BI__builtin_readsteadycounter: {
3878 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3879 return RValue::get(Builder.CreateCall(F));
3880 }
3881 case Builtin::BI__builtin___clear_cache: {
3882 Value *Begin = EmitScalarExpr(E->getArg(0));
3883 Value *End = EmitScalarExpr(E->getArg(1));
3884 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3885 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3886 }
3887 case Builtin::BI__builtin_trap:
3888 EmitTrapCall(Intrinsic::trap);
3889 return RValue::get(nullptr);
3890 case Builtin::BI__builtin_verbose_trap: {
3891 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3892 if (getDebugInfo()) {
3893 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3894 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3895 *E->getArg(1)->tryEvaluateString(getContext()));
3896 }
3897 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3898 // Currently no attempt is made to prevent traps from being merged.
3899 EmitTrapCall(Intrinsic::trap);
3900 return RValue::get(nullptr);
3901 }
3902 case Builtin::BI__debugbreak:
3903 EmitTrapCall(Intrinsic::debugtrap);
3904 return RValue::get(nullptr);
3905 case Builtin::BI__builtin_unreachable: {
3907
3908 // We do need to preserve an insertion point.
3909 EmitBlock(createBasicBlock("unreachable.cont"));
3910
3911 return RValue::get(nullptr);
3912 }
3913
3914 case Builtin::BI__builtin_powi:
3915 case Builtin::BI__builtin_powif:
3916 case Builtin::BI__builtin_powil: {
3917 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3918 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3919
3920 if (Builder.getIsFPConstrained()) {
3921 // FIXME: llvm.powi has 2 mangling types,
3922 // llvm.experimental.constrained.powi has one.
3923 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3924 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3925 Src0->getType());
3926 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3927 }
3928
3929 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3930 { Src0->getType(), Src1->getType() });
3931 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3932 }
3933 case Builtin::BI__builtin_frexpl: {
3934 // Linux PPC will not be adding additional PPCDoubleDouble support.
3935 // WIP to switch default to IEEE long double. Will emit libcall for
3936 // frexpl instead of legalizing this type in the BE.
3937 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3938 break;
3939 [[fallthrough]];
3940 }
3941 case Builtin::BI__builtin_frexp:
3942 case Builtin::BI__builtin_frexpf:
3943 case Builtin::BI__builtin_frexpf128:
3944 case Builtin::BI__builtin_frexpf16:
3945 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3946 case Builtin::BI__builtin_isgreater:
3947 case Builtin::BI__builtin_isgreaterequal:
3948 case Builtin::BI__builtin_isless:
3949 case Builtin::BI__builtin_islessequal:
3950 case Builtin::BI__builtin_islessgreater:
3951 case Builtin::BI__builtin_isunordered: {
3952 // Ordered comparisons: we know the arguments to these are matching scalar
3953 // floating point values.
3954 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3955 Value *LHS = EmitScalarExpr(E->getArg(0));
3956 Value *RHS = EmitScalarExpr(E->getArg(1));
3957
3958 switch (BuiltinID) {
3959 default: llvm_unreachable("Unknown ordered comparison");
3960 case Builtin::BI__builtin_isgreater:
3961 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3962 break;
3963 case Builtin::BI__builtin_isgreaterequal:
3964 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3965 break;
3966 case Builtin::BI__builtin_isless:
3967 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3968 break;
3969 case Builtin::BI__builtin_islessequal:
3970 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3971 break;
3972 case Builtin::BI__builtin_islessgreater:
3973 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3974 break;
3975 case Builtin::BI__builtin_isunordered:
3976 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3977 break;
3978 }
3979 // ZExt bool to int type.
3980 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3981 }
3982
3983 case Builtin::BI__builtin_isnan: {
3984 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3985 Value *V = EmitScalarExpr(E->getArg(0));
3986 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3987 return RValue::get(Result);
3988 return RValue::get(
3989 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3990 ConvertType(E->getType())));
3991 }
3992
3993 case Builtin::BI__builtin_issignaling: {
3994 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3995 Value *V = EmitScalarExpr(E->getArg(0));
3996 return RValue::get(
3997 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3998 ConvertType(E->getType())));
3999 }
4000
4001 case Builtin::BI__builtin_isinf: {
4002 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4003 Value *V = EmitScalarExpr(E->getArg(0));
4004 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4005 return RValue::get(Result);
4006 return RValue::get(
4007 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
4008 ConvertType(E->getType())));
4009 }
4010
4011 case Builtin::BIfinite:
4012 case Builtin::BI__finite:
4013 case Builtin::BIfinitef:
4014 case Builtin::BI__finitef:
4015 case Builtin::BIfinitel:
4016 case Builtin::BI__finitel:
4017 case Builtin::BI__builtin_isfinite: {
4018 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4019 Value *V = EmitScalarExpr(E->getArg(0));
4020 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
4021 return RValue::get(Result);
4022 return RValue::get(
4023 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
4024 ConvertType(E->getType())));
4025 }
4026
4027 case Builtin::BI__builtin_isnormal: {
4028 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4029 Value *V = EmitScalarExpr(E->getArg(0));
4030 return RValue::get(
4031 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
4032 ConvertType(E->getType())));
4033 }
4034
4035 case Builtin::BI__builtin_issubnormal: {
4036 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4037 Value *V = EmitScalarExpr(E->getArg(0));
4038 return RValue::get(
4039 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
4040 ConvertType(E->getType())));
4041 }
4042
4043 case Builtin::BI__builtin_iszero: {
4044 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4045 Value *V = EmitScalarExpr(E->getArg(0));
4046 return RValue::get(
4047 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
4048 ConvertType(E->getType())));
4049 }
4050
4051 case Builtin::BI__builtin_isfpclass: {
4053 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
4054 break;
4055 uint64_t Test = Result.Val.getInt().getLimitedValue();
4056 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4057 Value *V = EmitScalarExpr(E->getArg(0));
4058 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
4059 ConvertType(E->getType())));
4060 }
4061
4062 case Builtin::BI__builtin_nondeterministic_value: {
4063 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
4064
4065 Value *Result = PoisonValue::get(Ty);
4066 Result = Builder.CreateFreeze(Result);
4067
4068 return RValue::get(Result);
4069 }
4070
4071 case Builtin::BI__builtin_elementwise_abs: {
4072 Value *Result;
4073 QualType QT = E->getArg(0)->getType();
4074
4075 if (auto *VecTy = QT->getAs<VectorType>())
4076 QT = VecTy->getElementType();
4077 if (QT->isIntegerType())
4078 Result = Builder.CreateBinaryIntrinsic(
4079 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
4080 Builder.getFalse(), nullptr, "elt.abs");
4081 else
4082 Result = emitBuiltinWithOneOverloadedType<1>(
4083 *this, E, llvm::Intrinsic::fabs, "elt.abs");
4084
4085 return RValue::get(Result);
4086 }
4087 case Builtin::BI__builtin_elementwise_acos:
4088 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4089 *this, E, llvm::Intrinsic::acos, "elt.acos"));
4090 case Builtin::BI__builtin_elementwise_asin:
4091 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4092 *this, E, llvm::Intrinsic::asin, "elt.asin"));
4093 case Builtin::BI__builtin_elementwise_atan:
4094 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4095 *this, E, llvm::Intrinsic::atan, "elt.atan"));
4096 case Builtin::BI__builtin_elementwise_atan2:
4097 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4098 *this, E, llvm::Intrinsic::atan2, "elt.atan2"));
4099 case Builtin::BI__builtin_elementwise_ceil:
4100 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4101 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
4102 case Builtin::BI__builtin_elementwise_exp:
4103 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4104 *this, E, llvm::Intrinsic::exp, "elt.exp"));
4105 case Builtin::BI__builtin_elementwise_exp2:
4106 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4107 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
4108 case Builtin::BI__builtin_elementwise_log:
4109 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4110 *this, E, llvm::Intrinsic::log, "elt.log"));
4111 case Builtin::BI__builtin_elementwise_log2:
4112 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4113 *this, E, llvm::Intrinsic::log2, "elt.log2"));
4114 case Builtin::BI__builtin_elementwise_log10:
4115 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4116 *this, E, llvm::Intrinsic::log10, "elt.log10"));
4117 case Builtin::BI__builtin_elementwise_pow: {
4118 return RValue::get(
4119 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
4120 }
4121 case Builtin::BI__builtin_elementwise_bitreverse:
4122 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4123 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
4124 case Builtin::BI__builtin_elementwise_cos:
4125 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4126 *this, E, llvm::Intrinsic::cos, "elt.cos"));
4127 case Builtin::BI__builtin_elementwise_cosh:
4128 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4129 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
4130 case Builtin::BI__builtin_elementwise_floor:
4131 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4132 *this, E, llvm::Intrinsic::floor, "elt.floor"));
4133 case Builtin::BI__builtin_elementwise_popcount:
4134 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4135 *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
4136 case Builtin::BI__builtin_elementwise_roundeven:
4137 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4138 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
4139 case Builtin::BI__builtin_elementwise_round:
4140 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4141 *this, E, llvm::Intrinsic::round, "elt.round"));
4142 case Builtin::BI__builtin_elementwise_rint:
4143 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4144 *this, E, llvm::Intrinsic::rint, "elt.rint"));
4145 case Builtin::BI__builtin_elementwise_nearbyint:
4146 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4147 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
4148 case Builtin::BI__builtin_elementwise_sin:
4149 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4150 *this, E, llvm::Intrinsic::sin, "elt.sin"));
4151 case Builtin::BI__builtin_elementwise_sinh:
4152 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4153 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
4154 case Builtin::BI__builtin_elementwise_tan:
4155 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4156 *this, E, llvm::Intrinsic::tan, "elt.tan"));
4157 case Builtin::BI__builtin_elementwise_tanh:
4158 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4159 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
4160 case Builtin::BI__builtin_elementwise_trunc:
4161 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4162 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
4163 case Builtin::BI__builtin_elementwise_canonicalize:
4164 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4165 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
4166 case Builtin::BI__builtin_elementwise_copysign:
4167 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
4168 *this, E, llvm::Intrinsic::copysign));
4169 case Builtin::BI__builtin_elementwise_fma:
4170 return RValue::get(
4171 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
4172 case Builtin::BI__builtin_elementwise_add_sat:
4173 case Builtin::BI__builtin_elementwise_sub_sat: {
4174 Value *Op0 = EmitScalarExpr(E->getArg(0));
4175 Value *Op1 = EmitScalarExpr(E->getArg(1));
4176 Value *Result;
4177 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
4178 QualType Ty = E->getArg(0)->getType();
4179 if (auto *VecTy = Ty->getAs<VectorType>())
4180 Ty = VecTy->getElementType();
4181 bool IsSigned = Ty->isSignedIntegerType();
4182 unsigned Opc;
4183 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
4184 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
4185 else
4186 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
4187 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
4188 return RValue::get(Result);
4189 }
4190
4191 case Builtin::BI__builtin_elementwise_max: {
4192 Value *Op0 = EmitScalarExpr(E->getArg(0));
4193 Value *Op1 = EmitScalarExpr(E->getArg(1));
4194 Value *Result;
4195 if (Op0->getType()->isIntOrIntVectorTy()) {
4196 QualType Ty = E->getArg(0)->getType();
4197 if (auto *VecTy = Ty->getAs<VectorType>())
4198 Ty = VecTy->getElementType();
4199 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4200 ? llvm::Intrinsic::smax
4201 : llvm::Intrinsic::umax,
4202 Op0, Op1, nullptr, "elt.max");
4203 } else
4204 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
4205 return RValue::get(Result);
4206 }
4207 case Builtin::BI__builtin_elementwise_min: {
4208 Value *Op0 = EmitScalarExpr(E->getArg(0));
4209 Value *Op1 = EmitScalarExpr(E->getArg(1));
4210 Value *Result;
4211 if (Op0->getType()->isIntOrIntVectorTy()) {
4212 QualType Ty = E->getArg(0)->getType();
4213 if (auto *VecTy = Ty->getAs<VectorType>())
4214 Ty = VecTy->getElementType();
4215 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
4216 ? llvm::Intrinsic::smin
4217 : llvm::Intrinsic::umin,
4218 Op0, Op1, nullptr, "elt.min");
4219 } else
4220 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
4221 return RValue::get(Result);
4222 }
4223
4224 case Builtin::BI__builtin_elementwise_maximum: {
4225 Value *Op0 = EmitScalarExpr(E->getArg(0));
4226 Value *Op1 = EmitScalarExpr(E->getArg(1));
4227 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::maximum, Op0,
4228 Op1, nullptr, "elt.maximum");
4229 return RValue::get(Result);
4230 }
4231
4232 case Builtin::BI__builtin_elementwise_minimum: {
4233 Value *Op0 = EmitScalarExpr(E->getArg(0));
4234 Value *Op1 = EmitScalarExpr(E->getArg(1));
4235 Value *Result = Builder.CreateBinaryIntrinsic(llvm::Intrinsic::minimum, Op0,
4236 Op1, nullptr, "elt.minimum");
4237 return RValue::get(Result);
4238 }
4239
4240 case Builtin::BI__builtin_reduce_max: {
4241 auto GetIntrinsicID = [this](QualType QT) {
4242 if (auto *VecTy = QT->getAs<VectorType>())
4243 QT = VecTy->getElementType();
4244 else if (QT->isSizelessVectorType())
4246
4247 if (QT->isSignedIntegerType())
4248 return llvm::Intrinsic::vector_reduce_smax;
4249 if (QT->isUnsignedIntegerType())
4250 return llvm::Intrinsic::vector_reduce_umax;
4251 assert(QT->isFloatingType() && "must have a float here");
4252 return llvm::Intrinsic::vector_reduce_fmax;
4253 };
4254 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4255 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4256 }
4257
4258 case Builtin::BI__builtin_reduce_min: {
4259 auto GetIntrinsicID = [this](QualType QT) {
4260 if (auto *VecTy = QT->getAs<VectorType>())
4261 QT = VecTy->getElementType();
4262 else if (QT->isSizelessVectorType())
4264
4265 if (QT->isSignedIntegerType())
4266 return llvm::Intrinsic::vector_reduce_smin;
4267 if (QT->isUnsignedIntegerType())
4268 return llvm::Intrinsic::vector_reduce_umin;
4269 assert(QT->isFloatingType() && "must have a float here");
4270 return llvm::Intrinsic::vector_reduce_fmin;
4271 };
4272
4273 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4274 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
4275 }
4276
4277 case Builtin::BI__builtin_reduce_add:
4278 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4279 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
4280 case Builtin::BI__builtin_reduce_mul:
4281 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4282 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
4283 case Builtin::BI__builtin_reduce_xor:
4284 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4285 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
4286 case Builtin::BI__builtin_reduce_or:
4287 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4288 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
4289 case Builtin::BI__builtin_reduce_and:
4290 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4291 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
4292 case Builtin::BI__builtin_reduce_maximum:
4293 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4294 *this, E, llvm::Intrinsic::vector_reduce_fmaximum, "rdx.maximum"));
4295 case Builtin::BI__builtin_reduce_minimum:
4296 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
4297 *this, E, llvm::Intrinsic::vector_reduce_fminimum, "rdx.minimum"));
4298
4299 case Builtin::BI__builtin_matrix_transpose: {
4300 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
4301 Value *MatValue = EmitScalarExpr(E->getArg(0));
4302 MatrixBuilder MB(Builder);
4303 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
4304 MatrixTy->getNumColumns());
4305 return RValue::get(Result);
4306 }
4307
4308 case Builtin::BI__builtin_matrix_column_major_load: {
4309 MatrixBuilder MB(Builder);
4310 // Emit everything that isn't dependent on the first parameter type
4311 Value *Stride = EmitScalarExpr(E->getArg(3));
4312 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
4313 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
4314 assert(PtrTy && "arg0 must be of pointer type");
4315 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4316
4317 Address Src = EmitPointerWithAlignment(E->getArg(0));
4319 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4320 0);
4321 Value *Result = MB.CreateColumnMajorLoad(
4322 Src.getElementType(), Src.emitRawPointer(*this),
4323 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4324 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4325 return RValue::get(Result);
4326 }
4327
4328 case Builtin::BI__builtin_matrix_column_major_store: {
4329 MatrixBuilder MB(Builder);
4330 Value *Matrix = EmitScalarExpr(E->getArg(0));
4331 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4332 Value *Stride = EmitScalarExpr(E->getArg(2));
4333
4334 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4335 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4336 assert(PtrTy && "arg1 must be of pointer type");
4337 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4338
4340 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4341 0);
4342 Value *Result = MB.CreateColumnMajorStore(
4343 Matrix, Dst.emitRawPointer(*this),
4344 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4345 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4346 return RValue::get(Result);
4347 }
4348
4349 case Builtin::BI__builtin_isinf_sign: {
4350 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4351 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4352 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4353 Value *Arg = EmitScalarExpr(E->getArg(0));
4354 Value *AbsArg = EmitFAbs(*this, Arg);
4355 Value *IsInf = Builder.CreateFCmpOEQ(
4356 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4357 Value *IsNeg = EmitSignBit(*this, Arg);
4358
4359 llvm::Type *IntTy = ConvertType(E->getType());
4360 Value *Zero = Constant::getNullValue(IntTy);
4361 Value *One = ConstantInt::get(IntTy, 1);
4362 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4363 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4364 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4365 return RValue::get(Result);
4366 }
4367
4368 case Builtin::BI__builtin_flt_rounds: {
4369 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4370
4371 llvm::Type *ResultType = ConvertType(E->getType());
4372 Value *Result = Builder.CreateCall(F);
4373 if (Result->getType() != ResultType)
4374 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4375 "cast");
4376 return RValue::get(Result);
4377 }
4378
4379 case Builtin::BI__builtin_set_flt_rounds: {
4380 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4381
4382 Value *V = EmitScalarExpr(E->getArg(0));
4383 Builder.CreateCall(F, V);
4384 return RValue::get(nullptr);
4385 }
4386
4387 case Builtin::BI__builtin_fpclassify: {
4388 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4389 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4390 Value *V = EmitScalarExpr(E->getArg(5));
4391 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4392
4393 // Create Result
4394 BasicBlock *Begin = Builder.GetInsertBlock();
4395 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4396 Builder.SetInsertPoint(End);
4397 PHINode *Result =
4398 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4399 "fpclassify_result");
4400
4401 // if (V==0) return FP_ZERO
4402 Builder.SetInsertPoint(Begin);
4403 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4404 "iszero");
4405 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4406 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4407 Builder.CreateCondBr(IsZero, End, NotZero);
4408 Result->addIncoming(ZeroLiteral, Begin);
4409
4410 // if (V != V) return FP_NAN
4411 Builder.SetInsertPoint(NotZero);
4412 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4413 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4414 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4415 Builder.CreateCondBr(IsNan, End, NotNan);
4416 Result->addIncoming(NanLiteral, NotZero);
4417
4418 // if (fabs(V) == infinity) return FP_INFINITY
4419 Builder.SetInsertPoint(NotNan);
4420 Value *VAbs = EmitFAbs(*this, V);
4421 Value *IsInf =
4422 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4423 "isinf");
4424 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4425 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4426 Builder.CreateCondBr(IsInf, End, NotInf);
4427 Result->addIncoming(InfLiteral, NotNan);
4428
4429 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4430 Builder.SetInsertPoint(NotInf);
4431 APFloat Smallest = APFloat::getSmallestNormalized(
4432 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4433 Value *IsNormal =
4434 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4435 "isnormal");
4436 Value *NormalResult =
4437 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4438 EmitScalarExpr(E->getArg(3)));
4439 Builder.CreateBr(End);
4440 Result->addIncoming(NormalResult, NotInf);
4441
4442 // return Result
4443 Builder.SetInsertPoint(End);
4444 return RValue::get(Result);
4445 }
4446
4447 // An alloca will always return a pointer to the alloca (stack) address
4448 // space. This address space need not be the same as the AST / Language
4449 // default (e.g. in C / C++ auto vars are in the generic address space). At
4450 // the AST level this is handled within CreateTempAlloca et al., but for the
4451 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4452 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4453 case Builtin::BIalloca:
4454 case Builtin::BI_alloca:
4455 case Builtin::BI__builtin_alloca_uninitialized:
4456 case Builtin::BI__builtin_alloca: {
4457 Value *Size = EmitScalarExpr(E->getArg(0));
4458 const TargetInfo &TI = getContext().getTargetInfo();
4459 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4460 const Align SuitableAlignmentInBytes =
4461 CGM.getContext()
4463 .getAsAlign();
4464 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4465 AI->setAlignment(SuitableAlignmentInBytes);
4466 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4467 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4470 if (AAS != EAS) {
4471 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4472 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4473 EAS, Ty));
4474 }
4475 return RValue::get(AI);
4476 }
4477
4478 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4479 case Builtin::BI__builtin_alloca_with_align: {
4480 Value *Size = EmitScalarExpr(E->getArg(0));
4481 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4482 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4483 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4484 const Align AlignmentInBytes =
4485 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4486 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4487 AI->setAlignment(AlignmentInBytes);
4488 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4489 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4492 if (AAS != EAS) {
4493 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4494 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4495 EAS, Ty));
4496 }
4497 return RValue::get(AI);
4498 }
4499
4500 case Builtin::BIbzero:
4501 case Builtin::BI__builtin_bzero: {
4502 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4503 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4504 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4505 E->getArg(0)->getExprLoc(), FD, 0);
4506 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4507 return RValue::get(nullptr);
4508 }
4509
4510 case Builtin::BIbcopy:
4511 case Builtin::BI__builtin_bcopy: {
4512 Address Src = EmitPointerWithAlignment(E->getArg(0));
4513 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4514 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4516 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4517 0);
4519 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4520 0);
4521 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4522 return RValue::get(nullptr);
4523 }
4524
4525 case Builtin::BImemcpy:
4526 case Builtin::BI__builtin_memcpy:
4527 case Builtin::BImempcpy:
4528 case Builtin::BI__builtin_mempcpy: {
4529 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4530 Address Src = EmitPointerWithAlignment(E->getArg(1));
4531 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4532 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4533 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4534 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4535 if (BuiltinID == Builtin::BImempcpy ||
4536 BuiltinID == Builtin::BI__builtin_mempcpy)
4538 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4539 else
4540 return RValue::get(Dest, *this);
4541 }
4542
4543 case Builtin::BI__builtin_memcpy_inline: {
4544 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4545 Address Src = EmitPointerWithAlignment(E->getArg(1));
4546 uint64_t Size =
4547 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4548 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4549 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4550 Builder.CreateMemCpyInline(Dest, Src, Size);
4551 return RValue::get(nullptr);
4552 }
4553
4554 case Builtin::BI__builtin_char_memchr:
4555 BuiltinID = Builtin::BI__builtin_memchr;
4556 break;
4557
4558 case Builtin::BI__builtin___memcpy_chk: {
4559 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4560 Expr::EvalResult SizeResult, DstSizeResult;
4561 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4562 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4563 break;
4564 llvm::APSInt Size = SizeResult.Val.getInt();
4565 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4566 if (Size.ugt(DstSize))
4567 break;
4568 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4569 Address Src = EmitPointerWithAlignment(E->getArg(1));
4570 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4571 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4572 return RValue::get(Dest, *this);
4573 }
4574
4575 case Builtin::BI__builtin_objc_memmove_collectable: {
4576 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4577 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4578 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4580 DestAddr, SrcAddr, SizeVal);
4581 return RValue::get(DestAddr, *this);
4582 }
4583
4584 case Builtin::BI__builtin___memmove_chk: {
4585 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4586 Expr::EvalResult SizeResult, DstSizeResult;
4587 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4588 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4589 break;
4590 llvm::APSInt Size = SizeResult.Val.getInt();
4591 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4592 if (Size.ugt(DstSize))
4593 break;
4594 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4595 Address Src = EmitPointerWithAlignment(E->getArg(1));
4596 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4597 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4598 return RValue::get(Dest, *this);
4599 }
4600
4601 case Builtin::BImemmove:
4602 case Builtin::BI__builtin_memmove: {
4603 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4604 Address Src = EmitPointerWithAlignment(E->getArg(1));
4605 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4606 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4607 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4608 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4609 return RValue::get(Dest, *this);
4610 }
4611 case Builtin::BImemset:
4612 case Builtin::BI__builtin_memset: {
4613 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4614 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4615 Builder.getInt8Ty());
4616 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4617 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4618 E->getArg(0)->getExprLoc(), FD, 0);
4619 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4620 return RValue::get(Dest, *this);
4621 }
4622 case Builtin::BI__builtin_memset_inline: {
4623 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4624 Value *ByteVal =
4625 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4626 uint64_t Size =
4627 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4629 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4630 0);
4631 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4632 return RValue::get(nullptr);
4633 }
4634 case Builtin::BI__builtin___memset_chk: {
4635 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4636 Expr::EvalResult SizeResult, DstSizeResult;
4637 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4638 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4639 break;
4640 llvm::APSInt Size = SizeResult.Val.getInt();
4641 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4642 if (Size.ugt(DstSize))
4643 break;
4644 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4645 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4646 Builder.getInt8Ty());
4647 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4648 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4649 return RValue::get(Dest, *this);
4650 }
4651 case Builtin::BI__builtin_wmemchr: {
4652 // The MSVC runtime library does not provide a definition of wmemchr, so we
4653 // need an inline implementation.
4654 if (!getTarget().getTriple().isOSMSVCRT())
4655 break;
4656
4657 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4658 Value *Str = EmitScalarExpr(E->getArg(0));
4659 Value *Chr = EmitScalarExpr(E->getArg(1));
4660 Value *Size = EmitScalarExpr(E->getArg(2));
4661
4662 BasicBlock *Entry = Builder.GetInsertBlock();
4663 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4664 BasicBlock *Next = createBasicBlock("wmemchr.next");
4665 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4666 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4667 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4668
4669 EmitBlock(CmpEq);
4670 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4671 StrPhi->addIncoming(Str, Entry);
4672 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4673 SizePhi->addIncoming(Size, Entry);
4674 CharUnits WCharAlign =
4676 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4677 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4678 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4679 Builder.CreateCondBr(StrEqChr, Exit, Next);
4680
4681 EmitBlock(Next);
4682 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4683 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4684 Value *NextSizeEq0 =
4685 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4686 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4687 StrPhi->addIncoming(NextStr, Next);
4688 SizePhi->addIncoming(NextSize, Next);
4689
4690 EmitBlock(Exit);
4691 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4692 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4693 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4694 Ret->addIncoming(FoundChr, CmpEq);
4695 return RValue::get(Ret);
4696 }
4697 case Builtin::BI__builtin_wmemcmp: {
4698 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4699 // need an inline implementation.
4700 if (!getTarget().getTriple().isOSMSVCRT())
4701 break;
4702
4703 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4704
4705 Value *Dst = EmitScalarExpr(E->getArg(0));
4706 Value *Src = EmitScalarExpr(E->getArg(1));
4707 Value *Size = EmitScalarExpr(E->getArg(2));
4708
4709 BasicBlock *Entry = Builder.GetInsertBlock();
4710 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4711 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4712 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4713 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4714 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4715 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4716
4717 EmitBlock(CmpGT);
4718 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4719 DstPhi->addIncoming(Dst, Entry);
4720 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4721 SrcPhi->addIncoming(Src, Entry);
4722 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4723 SizePhi->addIncoming(Size, Entry);
4724 CharUnits WCharAlign =
4726 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4727 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4728 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4729 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4730
4731 EmitBlock(CmpLT);
4732 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4733 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4734
4735 EmitBlock(Next);
4736 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4737 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4738 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4739 Value *NextSizeEq0 =
4740 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4741 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4742 DstPhi->addIncoming(NextDst, Next);
4743 SrcPhi->addIncoming(NextSrc, Next);
4744 SizePhi->addIncoming(NextSize, Next);
4745
4746 EmitBlock(Exit);
4747 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4748 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4749 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4750 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4751 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4752 return RValue::get(Ret);
4753 }
4754 case Builtin::BI__builtin_dwarf_cfa: {
4755 // The offset in bytes from the first argument to the CFA.
4756 //
4757 // Why on earth is this in the frontend? Is there any reason at
4758 // all that the backend can't reasonably determine this while
4759 // lowering llvm.eh.dwarf.cfa()?
4760 //
4761 // TODO: If there's a satisfactory reason, add a target hook for
4762 // this instead of hard-coding 0, which is correct for most targets.
4763 int32_t Offset = 0;
4764
4765 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4766 return RValue::get(Builder.CreateCall(F,
4767 llvm::ConstantInt::get(Int32Ty, Offset)));
4768 }
4769 case Builtin::BI__builtin_return_address: {
4770 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4771 getContext().UnsignedIntTy);
4772 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4773 return RValue::get(Builder.CreateCall(F, Depth));
4774 }
4775 case Builtin::BI_ReturnAddress: {
4776 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4777 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4778 }
4779 case Builtin::BI__builtin_frame_address: {
4780 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4781 getContext().UnsignedIntTy);
4782 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4783 return RValue::get(Builder.CreateCall(F, Depth));
4784 }
4785 case Builtin::BI__builtin_extract_return_addr: {
4786 Value *Address = EmitScalarExpr(E->getArg(0));
4788 return RValue::get(Result);
4789 }
4790 case Builtin::BI__builtin_frob_return_addr: {
4791 Value *Address = EmitScalarExpr(E->getArg(0));
4793 return RValue::get(Result);
4794 }
4795 case Builtin::BI__builtin_dwarf_sp_column: {
4796 llvm::IntegerType *Ty
4797 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4799 if (Column == -1) {
4800 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4801 return RValue::get(llvm::UndefValue::get(Ty));
4802 }
4803 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4804 }
4805 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4806 Value *Address = EmitScalarExpr(E->getArg(0));
4807 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4808 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4809 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4810 }
4811 case Builtin::BI__builtin_eh_return: {
4812 Value *Int = EmitScalarExpr(E->getArg(0));
4813 Value *Ptr = EmitScalarExpr(E->getArg(1));
4814
4815 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4816 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4817 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4818 Function *F =
4819 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4820 : Intrinsic::eh_return_i64);
4821 Builder.CreateCall(F, {Int, Ptr});
4822 Builder.CreateUnreachable();
4823
4824 // We do need to preserve an insertion point.
4825 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4826
4827 return RValue::get(nullptr);
4828 }
4829 case Builtin::BI__builtin_unwind_init: {
4830 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4831 Builder.CreateCall(F);
4832 return RValue::get(nullptr);
4833 }
4834 case Builtin::BI__builtin_extend_pointer: {
4835 // Extends a pointer to the size of an _Unwind_Word, which is
4836 // uint64_t on all platforms. Generally this gets poked into a
4837 // register and eventually used as an address, so if the
4838 // addressing registers are wider than pointers and the platform
4839 // doesn't implicitly ignore high-order bits when doing
4840 // addressing, we need to make sure we zext / sext based on
4841 // the platform's expectations.
4842 //
4843 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4844
4845 // Cast the pointer to intptr_t.
4846 Value *Ptr = EmitScalarExpr(E->getArg(0));
4847 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4848
4849 // If that's 64 bits, we're done.
4850 if (IntPtrTy->getBitWidth() == 64)
4851 return RValue::get(Result);
4852
4853 // Otherwise, ask the codegen data what to do.
4854 if (getTargetHooks().extendPointerWithSExt())
4855 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4856 else
4857 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4858 }
4859 case Builtin::BI__builtin_setjmp: {
4860 // Buffer is a void**.
4861 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4862
4863 if (getTarget().getTriple().getArch() == llvm::Triple::systemz) {
4864 // On this target, the back end fills in the context buffer completely.
4865 // It doesn't really matter if the frontend stores to the buffer before
4866 // calling setjmp, the back-end is going to overwrite them anyway.
4867 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4868 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4869 }
4870
4871 // Store the frame pointer to the setjmp buffer.
4872 Value *FrameAddr = Builder.CreateCall(
4873 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4874 ConstantInt::get(Int32Ty, 0));
4875 Builder.CreateStore(FrameAddr, Buf);
4876
4877 // Store the stack pointer to the setjmp buffer.
4878 Value *StackAddr = Builder.CreateStackSave();
4879 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4880
4881 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4882 Builder.CreateStore(StackAddr, StackSaveSlot);
4883
4884 // Call LLVM's EH setjmp, which is lightweight.
4885 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4886 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4887 }
4888 case Builtin::BI__builtin_longjmp: {
4889 Value *Buf = EmitScalarExpr(E->getArg(0));
4890
4891 // Call LLVM's EH longjmp, which is lightweight.
4892 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4893
4894 // longjmp doesn't return; mark this as unreachable.
4895 Builder.CreateUnreachable();
4896
4897 // We do need to preserve an insertion point.
4898 EmitBlock(createBasicBlock("longjmp.cont"));
4899
4900 return RValue::get(nullptr);
4901 }
4902 case Builtin::BI__builtin_launder: {
4903 const Expr *Arg = E->getArg(0);
4904 QualType ArgTy = Arg->getType()->getPointeeType();
4905 Value *Ptr = EmitScalarExpr(Arg);
4906 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4908
4909 return RValue::get(Ptr);
4910 }
4911 case Builtin::BI__sync_fetch_and_add:
4912 case Builtin::BI__sync_fetch_and_sub:
4913 case Builtin::BI__sync_fetch_and_or:
4914 case Builtin::BI__sync_fetch_and_and:
4915 case Builtin::BI__sync_fetch_and_xor:
4916 case Builtin::BI__sync_fetch_and_nand:
4917 case Builtin::BI__sync_add_and_fetch:
4918 case Builtin::BI__sync_sub_and_fetch:
4919 case Builtin::BI__sync_and_and_fetch:
4920 case Builtin::BI__sync_or_and_fetch:
4921 case Builtin::BI__sync_xor_and_fetch:
4922 case Builtin::BI__sync_nand_and_fetch:
4923 case Builtin::BI__sync_val_compare_and_swap:
4924 case Builtin::BI__sync_bool_compare_and_swap:
4925 case Builtin::BI__sync_lock_test_and_set:
4926 case Builtin::BI__sync_lock_release:
4927 case Builtin::BI__sync_swap:
4928 llvm_unreachable("Shouldn't make it through sema");
4929 case Builtin::BI__sync_fetch_and_add_1:
4930 case Builtin::BI__sync_fetch_and_add_2:
4931 case Builtin::BI__sync_fetch_and_add_4:
4932 case Builtin::BI__sync_fetch_and_add_8:
4933 case Builtin::BI__sync_fetch_and_add_16:
4934 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4935 case Builtin::BI__sync_fetch_and_sub_1:
4936 case Builtin::BI__sync_fetch_and_sub_2:
4937 case Builtin::BI__sync_fetch_and_sub_4:
4938 case Builtin::BI__sync_fetch_and_sub_8:
4939 case Builtin::BI__sync_fetch_and_sub_16:
4940 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4941 case Builtin::BI__sync_fetch_and_or_1:
4942 case Builtin::BI__sync_fetch_and_or_2:
4943 case Builtin::BI__sync_fetch_and_or_4:
4944 case Builtin::BI__sync_fetch_and_or_8:
4945 case Builtin::BI__sync_fetch_and_or_16:
4946 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4947 case Builtin::BI__sync_fetch_and_and_1:
4948 case Builtin::BI__sync_fetch_and_and_2:
4949 case Builtin::BI__sync_fetch_and_and_4:
4950 case Builtin::BI__sync_fetch_and_and_8:
4951 case Builtin::BI__sync_fetch_and_and_16:
4952 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4953 case Builtin::BI__sync_fetch_and_xor_1:
4954 case Builtin::BI__sync_fetch_and_xor_2:
4955 case Builtin::BI__sync_fetch_and_xor_4:
4956 case Builtin::BI__sync_fetch_and_xor_8:
4957 case Builtin::BI__sync_fetch_and_xor_16:
4958 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4959 case Builtin::BI__sync_fetch_and_nand_1:
4960 case Builtin::BI__sync_fetch_and_nand_2:
4961 case Builtin::BI__sync_fetch_and_nand_4:
4962 case Builtin::BI__sync_fetch_and_nand_8:
4963 case Builtin::BI__sync_fetch_and_nand_16:
4964 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4965
4966 // Clang extensions: not overloaded yet.
4967 case Builtin::BI__sync_fetch_and_min:
4968 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4969 case Builtin::BI__sync_fetch_and_max:
4970 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4971 case Builtin::BI__sync_fetch_and_umin:
4972 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4973 case Builtin::BI__sync_fetch_and_umax:
4974 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4975
4976 case Builtin::BI__sync_add_and_fetch_1:
4977 case Builtin::BI__sync_add_and_fetch_2:
4978 case Builtin::BI__sync_add_and_fetch_4:
4979 case Builtin::BI__sync_add_and_fetch_8:
4980 case Builtin::BI__sync_add_and_fetch_16:
4981 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4982 llvm::Instruction::Add);
4983 case Builtin::BI__sync_sub_and_fetch_1:
4984 case Builtin::BI__sync_sub_and_fetch_2:
4985 case Builtin::BI__sync_sub_and_fetch_4:
4986 case Builtin::BI__sync_sub_and_fetch_8:
4987 case Builtin::BI__sync_sub_and_fetch_16:
4988 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4989 llvm::Instruction::Sub);
4990 case Builtin::BI__sync_and_and_fetch_1:
4991 case Builtin::BI__sync_and_and_fetch_2:
4992 case Builtin::BI__sync_and_and_fetch_4:
4993 case Builtin::BI__sync_and_and_fetch_8:
4994 case Builtin::BI__sync_and_and_fetch_16:
4995 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4996 llvm::Instruction::And);
4997 case Builtin::BI__sync_or_and_fetch_1:
4998 case Builtin::BI__sync_or_and_fetch_2:
4999 case Builtin::BI__sync_or_and_fetch_4:
5000 case Builtin::BI__sync_or_and_fetch_8:
5001 case Builtin::BI__sync_or_and_fetch_16:
5002 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
5003 llvm::Instruction::Or);
5004 case Builtin::BI__sync_xor_and_fetch_1:
5005 case Builtin::BI__sync_xor_and_fetch_2:
5006 case Builtin::BI__sync_xor_and_fetch_4:
5007 case Builtin::BI__sync_xor_and_fetch_8:
5008 case Builtin::BI__sync_xor_and_fetch_16:
5009 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
5010 llvm::Instruction::Xor);
5011 case Builtin::BI__sync_nand_and_fetch_1:
5012 case Builtin::BI__sync_nand_and_fetch_2:
5013 case Builtin::BI__sync_nand_and_fetch_4:
5014 case Builtin::BI__sync_nand_and_fetch_8:
5015 case Builtin::BI__sync_nand_and_fetch_16:
5016 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
5017 llvm::Instruction::And, true);
5018
5019 case Builtin::BI__sync_val_compare_and_swap_1:
5020 case Builtin::BI__sync_val_compare_and_swap_2:
5021 case Builtin::BI__sync_val_compare_and_swap_4:
5022 case Builtin::BI__sync_val_compare_and_swap_8:
5023 case Builtin::BI__sync_val_compare_and_swap_16:
5024 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
5025
5026 case Builtin::BI__sync_bool_compare_and_swap_1:
5027 case Builtin::BI__sync_bool_compare_and_swap_2:
5028 case Builtin::BI__sync_bool_compare_and_swap_4:
5029 case Builtin::BI__sync_bool_compare_and_swap_8:
5030 case Builtin::BI__sync_bool_compare_and_swap_16:
5031 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
5032
5033 case Builtin::BI__sync_swap_1:
5034 case Builtin::BI__sync_swap_2:
5035 case Builtin::BI__sync_swap_4:
5036 case Builtin::BI__sync_swap_8:
5037 case Builtin::BI__sync_swap_16:
5038 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5039
5040 case Builtin::BI__sync_lock_test_and_set_1:
5041 case Builtin::BI__sync_lock_test_and_set_2:
5042 case Builtin::BI__sync_lock_test_and_set_4:
5043 case Builtin::BI__sync_lock_test_and_set_8:
5044 case Builtin::BI__sync_lock_test_and_set_16:
5045 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
5046
5047 case Builtin::BI__sync_lock_release_1:
5048 case Builtin::BI__sync_lock_release_2:
5049 case Builtin::BI__sync_lock_release_4:
5050 case Builtin::BI__sync_lock_release_8:
5051 case Builtin::BI__sync_lock_release_16: {
5052 Address Ptr = CheckAtomicAlignment(*this, E);
5053 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
5054
5055 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
5056 getContext().getTypeSize(ElTy));
5057 llvm::StoreInst *Store =
5058 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
5059 Store->setAtomic(llvm::AtomicOrdering::Release);
5060 return RValue::get(nullptr);
5061 }
5062
5063 case Builtin::BI__sync_synchronize: {
5064 // We assume this is supposed to correspond to a C++0x-style
5065 // sequentially-consistent fence (i.e. this is only usable for
5066 // synchronization, not device I/O or anything like that). This intrinsic
5067 // is really badly designed in the sense that in theory, there isn't
5068 // any way to safely use it... but in practice, it mostly works
5069 // to use it with non-atomic loads and stores to get acquire/release
5070 // semantics.
5071 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
5072 return RValue::get(nullptr);
5073 }
5074
5075 case Builtin::BI__builtin_nontemporal_load:
5076 return RValue::get(EmitNontemporalLoad(*this, E));
5077 case Builtin::BI__builtin_nontemporal_store:
5078 return RValue::get(EmitNontemporalStore(*this, E));
5079 case Builtin::BI__c11_atomic_is_lock_free:
5080 case Builtin::BI__atomic_is_lock_free: {
5081 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
5082 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
5083 // _Atomic(T) is always properly-aligned.
5084 const char *LibCallName = "__atomic_is_lock_free";
5085 CallArgList Args;
5086 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
5087 getContext().getSizeType());
5088 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
5089 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
5091 else
5092 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
5094 const CGFunctionInfo &FuncInfo =
5096 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
5097 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
5098 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
5099 ReturnValueSlot(), Args);
5100 }
5101
5102 case Builtin::BI__atomic_test_and_set: {
5103 // Look at the argument type to determine whether this is a volatile
5104 // operation. The parameter type is always volatile.
5105 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5106 bool Volatile =
5108
5109 Address Ptr =
5111
5112 Value *NewVal = Builder.getInt8(1);
5113 Value *Order = EmitScalarExpr(E->getArg(1));
5114 if (isa<llvm::ConstantInt>(Order)) {
5115 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5116 AtomicRMWInst *Result = nullptr;
5117 switch (ord) {
5118 case 0: // memory_order_relaxed
5119 default: // invalid order
5120 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5121 llvm::AtomicOrdering::Monotonic);
5122 break;
5123 case 1: // memory_order_consume
5124 case 2: // memory_order_acquire
5125 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5126 llvm::AtomicOrdering::Acquire);
5127 break;
5128 case 3: // memory_order_release
5129 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5130 llvm::AtomicOrdering::Release);
5131 break;
5132 case 4: // memory_order_acq_rel
5133
5134 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5135 llvm::AtomicOrdering::AcquireRelease);
5136 break;
5137 case 5: // memory_order_seq_cst
5139 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
5140 llvm::AtomicOrdering::SequentiallyConsistent);
5141 break;
5142 }
5143 Result->setVolatile(Volatile);
5144 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5145 }
5146
5147 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5148
5149 llvm::BasicBlock *BBs[5] = {
5150 createBasicBlock("monotonic", CurFn),
5151 createBasicBlock("acquire", CurFn),
5152 createBasicBlock("release", CurFn),
5153 createBasicBlock("acqrel", CurFn),
5154 createBasicBlock("seqcst", CurFn)
5155 };
5156 llvm::AtomicOrdering Orders[5] = {
5157 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
5158 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
5159 llvm::AtomicOrdering::SequentiallyConsistent};
5160
5161 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5162 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5163
5164 Builder.SetInsertPoint(ContBB);
5165 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
5166
5167 for (unsigned i = 0; i < 5; ++i) {
5168 Builder.SetInsertPoint(BBs[i]);
5169 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
5170 Ptr, NewVal, Orders[i]);
5171 RMW->setVolatile(Volatile);
5172 Result->addIncoming(RMW, BBs[i]);
5173 Builder.CreateBr(ContBB);
5174 }
5175
5176 SI->addCase(Builder.getInt32(0), BBs[0]);
5177 SI->addCase(Builder.getInt32(1), BBs[1]);
5178 SI->addCase(Builder.getInt32(2), BBs[1]);
5179 SI->addCase(Builder.getInt32(3), BBs[2]);
5180 SI->addCase(Builder.getInt32(4), BBs[3]);
5181 SI->addCase(Builder.getInt32(5), BBs[4]);
5182
5183 Builder.SetInsertPoint(ContBB);
5184 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
5185 }
5186
5187 case Builtin::BI__atomic_clear: {
5188 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
5189 bool Volatile =
5191
5192 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
5193 Ptr = Ptr.withElementType(Int8Ty);
5194 Value *NewVal = Builder.getInt8(0);
5195 Value *Order = EmitScalarExpr(E->getArg(1));
5196 if (isa<llvm::ConstantInt>(Order)) {
5197 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5198 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5199 switch (ord) {
5200 case 0: // memory_order_relaxed
5201 default: // invalid order
5202 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
5203 break;
5204 case 3: // memory_order_release
5205 Store->setOrdering(llvm::AtomicOrdering::Release);
5206 break;
5207 case 5: // memory_order_seq_cst
5208 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
5209 break;
5210 }
5211 return RValue::get(nullptr);
5212 }
5213
5214 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5215
5216 llvm::BasicBlock *BBs[3] = {
5217 createBasicBlock("monotonic", CurFn),
5218 createBasicBlock("release", CurFn),
5219 createBasicBlock("seqcst", CurFn)
5220 };
5221 llvm::AtomicOrdering Orders[3] = {
5222 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
5223 llvm::AtomicOrdering::SequentiallyConsistent};
5224
5225 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5226 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
5227
5228 for (unsigned i = 0; i < 3; ++i) {
5229 Builder.SetInsertPoint(BBs[i]);
5230 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
5231 Store->setOrdering(Orders[i]);
5232 Builder.CreateBr(ContBB);
5233 }
5234
5235 SI->addCase(Builder.getInt32(0), BBs[0]);
5236 SI->addCase(Builder.getInt32(3), BBs[1]);
5237 SI->addCase(Builder.getInt32(5), BBs[2]);
5238
5239 Builder.SetInsertPoint(ContBB);
5240 return RValue::get(nullptr);
5241 }
5242
5243 case Builtin::BI__atomic_thread_fence:
5244 case Builtin::BI__atomic_signal_fence:
5245 case Builtin::BI__c11_atomic_thread_fence:
5246 case Builtin::BI__c11_atomic_signal_fence: {
5247 llvm::SyncScope::ID SSID;
5248 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
5249 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
5250 SSID = llvm::SyncScope::SingleThread;
5251 else
5252 SSID = llvm::SyncScope::System;
5253 Value *Order = EmitScalarExpr(E->getArg(0));
5254 if (isa<llvm::ConstantInt>(Order)) {
5255 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
5256 switch (ord) {
5257 case 0: // memory_order_relaxed
5258 default: // invalid order
5259 break;
5260 case 1: // memory_order_consume
5261 case 2: // memory_order_acquire
5262 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5263 break;
5264 case 3: // memory_order_release
5265 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5266 break;
5267 case 4: // memory_order_acq_rel
5268 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5269 break;
5270 case 5: // memory_order_seq_cst
5271 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5272 break;
5273 }
5274 return RValue::get(nullptr);
5275 }
5276
5277 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
5278 AcquireBB = createBasicBlock("acquire", CurFn);
5279 ReleaseBB = createBasicBlock("release", CurFn);
5280 AcqRelBB = createBasicBlock("acqrel", CurFn);
5281 SeqCstBB = createBasicBlock("seqcst", CurFn);
5282 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
5283
5284 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5285 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5286
5287 Builder.SetInsertPoint(AcquireBB);
5288 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
5289 Builder.CreateBr(ContBB);
5290 SI->addCase(Builder.getInt32(1), AcquireBB);
5291 SI->addCase(Builder.getInt32(2), AcquireBB);
5292
5293 Builder.SetInsertPoint(ReleaseBB);
5294 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
5295 Builder.CreateBr(ContBB);
5296 SI->addCase(Builder.getInt32(3), ReleaseBB);
5297
5298 Builder.SetInsertPoint(AcqRelBB);
5299 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
5300 Builder.CreateBr(ContBB);
5301 SI->addCase(Builder.getInt32(4), AcqRelBB);
5302
5303 Builder.SetInsertPoint(SeqCstBB);
5304 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
5305 Builder.CreateBr(ContBB);
5306 SI->addCase(Builder.getInt32(5), SeqCstBB);
5307
5308 Builder.SetInsertPoint(ContBB);
5309 return RValue::get(nullptr);
5310 }
5311 case Builtin::BI__scoped_atomic_thread_fence: {
5313
5314 Value *Order = EmitScalarExpr(E->getArg(0));
5315 Value *Scope = EmitScalarExpr(E->getArg(1));
5316 auto Ord = dyn_cast<llvm::ConstantInt>(Order);
5317 auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
5318 if (Ord && Scp) {
5319 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5320 ? ScopeModel->map(Scp->getZExtValue())
5321 : ScopeModel->map(ScopeModel->getFallBackValue());
5322 switch (Ord->getZExtValue()) {
5323 case 0: // memory_order_relaxed
5324 default: // invalid order
5325 break;
5326 case 1: // memory_order_consume
5327 case 2: // memory_order_acquire
5328 Builder.CreateFence(
5329 llvm::AtomicOrdering::Acquire,
5330 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5331 llvm::AtomicOrdering::Acquire,
5332 getLLVMContext()));
5333 break;
5334 case 3: // memory_order_release
5335 Builder.CreateFence(
5336 llvm::AtomicOrdering::Release,
5337 getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
5338 llvm::AtomicOrdering::Release,
5339 getLLVMContext()));
5340 break;
5341 case 4: // memory_order_acq_rel
5342 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
5343 getTargetHooks().getLLVMSyncScopeID(
5344 getLangOpts(), SS,
5345 llvm::AtomicOrdering::AcquireRelease,
5346 getLLVMContext()));
5347 break;
5348 case 5: // memory_order_seq_cst
5349 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
5350 getTargetHooks().getLLVMSyncScopeID(
5351 getLangOpts(), SS,
5352 llvm::AtomicOrdering::SequentiallyConsistent,
5353 getLLVMContext()));
5354 break;
5355 }
5356 return RValue::get(nullptr);
5357 }
5358
5359 llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
5360
5362 OrderBBs;
5363 if (Ord) {
5364 switch (Ord->getZExtValue()) {
5365 case 0: // memory_order_relaxed
5366 default: // invalid order
5367 ContBB->eraseFromParent();
5368 return RValue::get(nullptr);
5369 case 1: // memory_order_consume
5370 case 2: // memory_order_acquire
5371 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5372 llvm::AtomicOrdering::Acquire);
5373 break;
5374 case 3: // memory_order_release
5375 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5376 llvm::AtomicOrdering::Release);
5377 break;
5378 case 4: // memory_order_acq_rel
5379 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5380 llvm::AtomicOrdering::AcquireRelease);
5381 break;
5382 case 5: // memory_order_seq_cst
5383 OrderBBs.emplace_back(Builder.GetInsertBlock(),
5384 llvm::AtomicOrdering::SequentiallyConsistent);
5385 break;
5386 }
5387 } else {
5388 llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
5389 llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
5390 llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
5391 llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
5392
5393 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
5394 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
5395 SI->addCase(Builder.getInt32(1), AcquireBB);
5396 SI->addCase(Builder.getInt32(2), AcquireBB);
5397 SI->addCase(Builder.getInt32(3), ReleaseBB);
5398 SI->addCase(Builder.getInt32(4), AcqRelBB);
5399 SI->addCase(Builder.getInt32(5), SeqCstBB);
5400
5401 OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
5402 OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
5403 OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
5404 OrderBBs.emplace_back(SeqCstBB,
5405 llvm::AtomicOrdering::SequentiallyConsistent);
5406 }
5407
5408 for (auto &[OrderBB, Ordering] : OrderBBs) {
5409 Builder.SetInsertPoint(OrderBB);
5410 if (Scp) {
5411 SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
5412 ? ScopeModel->map(Scp->getZExtValue())
5413 : ScopeModel->map(ScopeModel->getFallBackValue());
5414 Builder.CreateFence(Ordering,
5415 getTargetHooks().getLLVMSyncScopeID(
5416 getLangOpts(), SS, Ordering, getLLVMContext()));
5417 Builder.CreateBr(ContBB);
5418 } else {
5419 llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
5420 for (unsigned Scp : ScopeModel->getRuntimeValues())
5421 BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
5422
5423 auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
5424 llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
5425 for (unsigned Scp : ScopeModel->getRuntimeValues()) {
5426 auto *B = BBs[Scp];
5427 SI->addCase(Builder.getInt32(Scp), B);
5428
5429 Builder.SetInsertPoint(B);
5430 Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
5431 getLangOpts(), ScopeModel->map(Scp),
5432 Ordering, getLLVMContext()));
5433 Builder.CreateBr(ContBB);
5434 }
5435 }
5436 }
5437
5438 Builder.SetInsertPoint(ContBB);
5439 return RValue::get(nullptr);
5440 }
5441
5442 case Builtin::BI__builtin_signbit:
5443 case Builtin::BI__builtin_signbitf:
5444 case Builtin::BI__builtin_signbitl: {
5445 return RValue::get(
5446 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
5447 ConvertType(E->getType())));
5448 }
5449 case Builtin::BI__warn_memset_zero_len:
5450 return RValue::getIgnored();
5451 case Builtin::BI__annotation: {
5452 // Re-encode each wide string to UTF8 and make an MDString.
5454 for (const Expr *Arg : E->arguments()) {
5455 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
5456 assert(Str->getCharByteWidth() == 2);
5457 StringRef WideBytes = Str->getBytes();
5458 std::string StrUtf8;
5459 if (!convertUTF16ToUTF8String(
5460 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
5461 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5462 continue;
5463 }
5464 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5465 }
5466
5467 // Build and MDTuple of MDStrings and emit the intrinsic call.
5468 llvm::Function *F =
5469 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5470 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5471 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5472 return RValue::getIgnored();
5473 }
5474 case Builtin::BI__builtin_annotation: {
5475 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5476 llvm::Function *F =
5477 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5478 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5479
5480 // Get the annotation string, go through casts. Sema requires this to be a
5481 // non-wide string literal, potentially casted, so the cast<> is safe.
5482 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5483 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5484 return RValue::get(
5485 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5486 }
5487 case Builtin::BI__builtin_addcb:
5488 case Builtin::BI__builtin_addcs:
5489 case Builtin::BI__builtin_addc:
5490 case Builtin::BI__builtin_addcl:
5491 case Builtin::BI__builtin_addcll:
5492 case Builtin::BI__builtin_subcb:
5493 case Builtin::BI__builtin_subcs:
5494 case Builtin::BI__builtin_subc:
5495 case Builtin::BI__builtin_subcl:
5496 case Builtin::BI__builtin_subcll: {
5497
5498 // We translate all of these builtins from expressions of the form:
5499 // int x = ..., y = ..., carryin = ..., carryout, result;
5500 // result = __builtin_addc(x, y, carryin, &carryout);
5501 //
5502 // to LLVM IR of the form:
5503 //
5504 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5505 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5506 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5507 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5508 // i32 %carryin)
5509 // %result = extractvalue {i32, i1} %tmp2, 0
5510 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5511 // %tmp3 = or i1 %carry1, %carry2
5512 // %tmp4 = zext i1 %tmp3 to i32
5513 // store i32 %tmp4, i32* %carryout
5514
5515 // Scalarize our inputs.
5516 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5517 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5518 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5519 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5520
5521 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5522 llvm::Intrinsic::ID IntrinsicId;
5523 switch (BuiltinID) {
5524 default: llvm_unreachable("Unknown multiprecision builtin id.");
5525 case Builtin::BI__builtin_addcb:
5526 case Builtin::BI__builtin_addcs:
5527 case Builtin::BI__builtin_addc:
5528 case Builtin::BI__builtin_addcl:
5529 case Builtin::BI__builtin_addcll:
5530 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5531 break;
5532 case Builtin::BI__builtin_subcb:
5533 case Builtin::BI__builtin_subcs:
5534 case Builtin::BI__builtin_subc:
5535 case Builtin::BI__builtin_subcl:
5536 case Builtin::BI__builtin_subcll:
5537 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5538 break;
5539 }
5540
5541 // Construct our resulting LLVM IR expression.
5542 llvm::Value *Carry1;
5543 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5544 X, Y, Carry1);
5545 llvm::Value *Carry2;
5546 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5547 Sum1, Carryin, Carry2);
5548 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5549 X->getType());
5550 Builder.CreateStore(CarryOut, CarryOutPtr);
5551 return RValue::get(Sum2);
5552 }
5553
5554 case Builtin::BI__builtin_add_overflow:
5555 case Builtin::BI__builtin_sub_overflow:
5556 case Builtin::BI__builtin_mul_overflow: {
5557 const clang::Expr *LeftArg = E->getArg(0);
5558 const clang::Expr *RightArg = E->getArg(1);
5559 const clang::Expr *ResultArg = E->getArg(2);
5560
5561 clang::QualType ResultQTy =
5562 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5563
5564 WidthAndSignedness LeftInfo =
5566 WidthAndSignedness RightInfo =
5568 WidthAndSignedness ResultInfo =
5570
5571 // Handle mixed-sign multiplication as a special case, because adding
5572 // runtime or backend support for our generic irgen would be too expensive.
5573 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5574 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5575 RightInfo, ResultArg, ResultQTy,
5576 ResultInfo);
5577
5578 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5579 ResultInfo))
5581 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5582 ResultInfo);
5583
5584 WidthAndSignedness EncompassingInfo =
5585 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5586
5587 llvm::Type *EncompassingLLVMTy =
5588 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5589
5590 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5591
5592 llvm::Intrinsic::ID IntrinsicId;
5593 switch (BuiltinID) {
5594 default:
5595 llvm_unreachable("Unknown overflow builtin id.");
5596 case Builtin::BI__builtin_add_overflow:
5597 IntrinsicId = EncompassingInfo.Signed
5598 ? llvm::Intrinsic::sadd_with_overflow
5599 : llvm::Intrinsic::uadd_with_overflow;
5600 break;
5601 case Builtin::BI__builtin_sub_overflow:
5602 IntrinsicId = EncompassingInfo.Signed
5603 ? llvm::Intrinsic::ssub_with_overflow
5604 : llvm::Intrinsic::usub_with_overflow;
5605 break;
5606 case Builtin::BI__builtin_mul_overflow:
5607 IntrinsicId = EncompassingInfo.Signed
5608 ? llvm::Intrinsic::smul_with_overflow
5609 : llvm::Intrinsic::umul_with_overflow;
5610 break;
5611 }
5612
5613 llvm::Value *Left = EmitScalarExpr(LeftArg);
5614 llvm::Value *Right = EmitScalarExpr(RightArg);
5615 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5616
5617 // Extend each operand to the encompassing type.
5618 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5619 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5620
5621 // Perform the operation on the extended values.
5622 llvm::Value *Overflow, *Result;
5623 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5624
5625 if (EncompassingInfo.Width > ResultInfo.Width) {
5626 // The encompassing type is wider than the result type, so we need to
5627 // truncate it.
5628 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5629
5630 // To see if the truncation caused an overflow, we will extend
5631 // the result and then compare it to the original result.
5632 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5633 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5634 llvm::Value *TruncationOverflow =
5635 Builder.CreateICmpNE(Result, ResultTruncExt);
5636
5637 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5638 Result = ResultTrunc;
5639 }
5640
5641 // Finally, store the result using the pointer.
5642 bool isVolatile =
5643 ResultArg->getType()->getPointeeType().isVolatileQualified();
5644 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5645
5646 return RValue::get(Overflow);
5647 }
5648
5649 case Builtin::BI__builtin_uadd_overflow:
5650 case Builtin::BI__builtin_uaddl_overflow:
5651 case Builtin::BI__builtin_uaddll_overflow:
5652 case Builtin::BI__builtin_usub_overflow:
5653 case Builtin::BI__builtin_usubl_overflow:
5654 case Builtin::BI__builtin_usubll_overflow:
5655 case Builtin::BI__builtin_umul_overflow:
5656 case Builtin::BI__builtin_umull_overflow:
5657 case Builtin::BI__builtin_umulll_overflow:
5658 case Builtin::BI__builtin_sadd_overflow:
5659 case Builtin::BI__builtin_saddl_overflow:
5660 case Builtin::BI__builtin_saddll_overflow:
5661 case Builtin::BI__builtin_ssub_overflow:
5662 case Builtin::BI__builtin_ssubl_overflow:
5663 case Builtin::BI__builtin_ssubll_overflow:
5664 case Builtin::BI__builtin_smul_overflow:
5665 case Builtin::BI__builtin_smull_overflow:
5666 case Builtin::BI__builtin_smulll_overflow: {
5667
5668 // We translate all of these builtins directly to the relevant llvm IR node.
5669
5670 // Scalarize our inputs.
5671 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5672 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5673 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5674
5675 // Decide which of the overflow intrinsics we are lowering to:
5676 llvm::Intrinsic::ID IntrinsicId;
5677 switch (BuiltinID) {
5678 default: llvm_unreachable("Unknown overflow builtin id.");
5679 case Builtin::BI__builtin_uadd_overflow:
5680 case Builtin::BI__builtin_uaddl_overflow:
5681 case Builtin::BI__builtin_uaddll_overflow:
5682 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5683 break;
5684 case Builtin::BI__builtin_usub_overflow:
5685 case Builtin::BI__builtin_usubl_overflow:
5686 case Builtin::BI__builtin_usubll_overflow:
5687 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5688 break;
5689 case Builtin::BI__builtin_umul_overflow:
5690 case Builtin::BI__builtin_umull_overflow:
5691 case Builtin::BI__builtin_umulll_overflow:
5692 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5693 break;
5694 case Builtin::BI__builtin_sadd_overflow:
5695 case Builtin::BI__builtin_saddl_overflow:
5696 case Builtin::BI__builtin_saddll_overflow:
5697 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5698 break;
5699 case Builtin::BI__builtin_ssub_overflow:
5700 case Builtin::BI__builtin_ssubl_overflow:
5701 case Builtin::BI__builtin_ssubll_overflow:
5702 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5703 break;
5704 case Builtin::BI__builtin_smul_overflow:
5705 case Builtin::BI__builtin_smull_overflow:
5706 case Builtin::BI__builtin_smulll_overflow:
5707 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5708 break;
5709 }
5710
5711
5712 llvm::Value *Carry;
5713 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5714 Builder.CreateStore(Sum, SumOutPtr);
5715
5716 return RValue::get(Carry);
5717 }
5718 case Builtin::BIaddressof:
5719 case Builtin::BI__addressof:
5720 case Builtin::BI__builtin_addressof:
5721 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5722 case Builtin::BI__builtin_function_start:
5725 case Builtin::BI__builtin_operator_new:
5727 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5728 case Builtin::BI__builtin_operator_delete:
5730 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5731 return RValue::get(nullptr);
5732
5733 case Builtin::BI__builtin_is_aligned:
5734 return EmitBuiltinIsAligned(E);
5735 case Builtin::BI__builtin_align_up:
5736 return EmitBuiltinAlignTo(E, true);
5737 case Builtin::BI__builtin_align_down:
5738 return EmitBuiltinAlignTo(E, false);
5739
5740 case Builtin::BI__noop:
5741 // __noop always evaluates to an integer literal zero.
5742 return RValue::get(ConstantInt::get(IntTy, 0));
5743 case Builtin::BI__builtin_call_with_static_chain: {
5744 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5745 const Expr *Chain = E->getArg(1);
5746 return EmitCall(Call->getCallee()->getType(),
5747 EmitCallee(Call->getCallee()), Call, ReturnValue,
5748 EmitScalarExpr(Chain));
5749 }
5750 case Builtin::BI_InterlockedExchange8:
5751 case Builtin::BI_InterlockedExchange16:
5752 case Builtin::BI_InterlockedExchange:
5753 case Builtin::BI_InterlockedExchangePointer:
5754 return RValue::get(
5755 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5756 case Builtin::BI_InterlockedCompareExchangePointer:
5757 return RValue::get(
5758 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange, E));
5759 case Builtin::BI_InterlockedCompareExchangePointer_nf:
5760 return RValue::get(
5761 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E));
5762 case Builtin::BI_InterlockedCompareExchange8:
5763 case Builtin::BI_InterlockedCompareExchange16:
5764 case Builtin::BI_InterlockedCompareExchange:
5765 case Builtin::BI_InterlockedCompareExchange64:
5767 case Builtin::BI_InterlockedIncrement16:
5768 case Builtin::BI_InterlockedIncrement:
5769 return RValue::get(
5770 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5771 case Builtin::BI_InterlockedDecrement16:
5772 case Builtin::BI_InterlockedDecrement:
5773 return RValue::get(
5774 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5775 case Builtin::BI_InterlockedAnd8:
5776 case Builtin::BI_InterlockedAnd16:
5777 case Builtin::BI_InterlockedAnd:
5778 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5779 case Builtin::BI_InterlockedExchangeAdd8:
5780 case Builtin::BI_InterlockedExchangeAdd16:
5781 case Builtin::BI_InterlockedExchangeAdd:
5782 return RValue::get(
5783 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5784 case Builtin::BI_InterlockedExchangeSub8:
5785 case Builtin::BI_InterlockedExchangeSub16:
5786 case Builtin::BI_InterlockedExchangeSub:
5787 return RValue::get(
5788 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5789 case Builtin::BI_InterlockedOr8:
5790 case Builtin::BI_InterlockedOr16:
5791 case Builtin::BI_InterlockedOr:
5792 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5793 case Builtin::BI_InterlockedXor8:
5794 case Builtin::BI_InterlockedXor16:
5795 case Builtin::BI_InterlockedXor:
5796 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5797
5798 case Builtin::BI_bittest64:
5799 case Builtin::BI_bittest:
5800 case Builtin::BI_bittestandcomplement64:
5801 case Builtin::BI_bittestandcomplement:
5802 case Builtin::BI_bittestandreset64:
5803 case Builtin::BI_bittestandreset:
5804 case Builtin::BI_bittestandset64:
5805 case Builtin::BI_bittestandset:
5806 case Builtin::BI_interlockedbittestandreset:
5807 case Builtin::BI_interlockedbittestandreset64:
5808 case Builtin::BI_interlockedbittestandset64:
5809 case Builtin::BI_interlockedbittestandset:
5810 case Builtin::BI_interlockedbittestandset_acq:
5811 case Builtin::BI_interlockedbittestandset_rel:
5812 case Builtin::BI_interlockedbittestandset_nf:
5813 case Builtin::BI_interlockedbittestandreset_acq:
5814 case Builtin::BI_interlockedbittestandreset_rel:
5815 case Builtin::BI_interlockedbittestandreset_nf:
5816 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5817
5818 // These builtins exist to emit regular volatile loads and stores not
5819 // affected by the -fms-volatile setting.
5820 case Builtin::BI__iso_volatile_load8:
5821 case Builtin::BI__iso_volatile_load16:
5822 case Builtin::BI__iso_volatile_load32:
5823 case Builtin::BI__iso_volatile_load64:
5824 return RValue::get(EmitISOVolatileLoad(*this, E));
5825 case Builtin::BI__iso_volatile_store8:
5826 case Builtin::BI__iso_volatile_store16:
5827 case Builtin::BI__iso_volatile_store32:
5828 case Builtin::BI__iso_volatile_store64:
5829 return RValue::get(EmitISOVolatileStore(*this, E));
5830
5831 case Builtin::BI__builtin_ptrauth_sign_constant:
5832 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5833
5834 case Builtin::BI__builtin_ptrauth_auth:
5835 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5836 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5837 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5838 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5839 case Builtin::BI__builtin_ptrauth_strip: {
5840 // Emit the arguments.
5842 for (auto argExpr : E->arguments())
5843 Args.push_back(EmitScalarExpr(argExpr));
5844
5845 // Cast the value to intptr_t, saving its original type.
5846 llvm::Type *OrigValueType = Args[0]->getType();
5847 if (OrigValueType->isPointerTy())
5848 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5849
5850 switch (BuiltinID) {
5851 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5852 if (Args[4]->getType()->isPointerTy())
5853 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5854 [[fallthrough]];
5855
5856 case Builtin::BI__builtin_ptrauth_auth:
5857 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5858 if (Args[2]->getType()->isPointerTy())
5859 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5860 break;
5861
5862 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5863 if (Args[1]->getType()->isPointerTy())
5864 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5865 break;
5866
5867 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5868 case Builtin::BI__builtin_ptrauth_strip:
5869 break;
5870 }
5871
5872 // Call the intrinsic.
5873 auto IntrinsicID = [&]() -> unsigned {
5874 switch (BuiltinID) {
5875 case Builtin::BI__builtin_ptrauth_auth:
5876 return llvm::Intrinsic::ptrauth_auth;
5877 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5878 return llvm::Intrinsic::ptrauth_resign;
5879 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5880 return llvm::Intrinsic::ptrauth_blend;
5881 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5882 return llvm::Intrinsic::ptrauth_sign_generic;
5883 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5884 return llvm::Intrinsic::ptrauth_sign;
5885 case Builtin::BI__builtin_ptrauth_strip:
5886 return llvm::Intrinsic::ptrauth_strip;
5887 }
5888 llvm_unreachable("bad ptrauth intrinsic");
5889 }();
5890 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5891 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5892
5893 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5894 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5895 OrigValueType->isPointerTy()) {
5896 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5897 }
5898 return RValue::get(Result);
5899 }
5900
5901 case Builtin::BI__exception_code:
5902 case Builtin::BI_exception_code:
5904 case Builtin::BI__exception_info:
5905 case Builtin::BI_exception_info:
5907 case Builtin::BI__abnormal_termination:
5908 case Builtin::BI_abnormal_termination:
5910 case Builtin::BI_setjmpex:
5911 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5912 E->getArg(0)->getType()->isPointerType())
5913 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5914 break;
5915 case Builtin::BI_setjmp:
5916 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5917 E->getArg(0)->getType()->isPointerType()) {
5918 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5919 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5920 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5921 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5922 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5923 }
5924 break;
5925
5926 // C++ std:: builtins.
5927 case Builtin::BImove:
5928 case Builtin::BImove_if_noexcept:
5929 case Builtin::BIforward:
5930 case Builtin::BIforward_like:
5931 case Builtin::BIas_const:
5932 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5933 case Builtin::BI__GetExceptionInfo: {
5934 if (llvm::GlobalVariable *GV =
5936 return RValue::get(GV);
5937 break;
5938 }
5939
5940 case Builtin::BI__fastfail:
5941 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5942
5943 case Builtin::BI__builtin_coro_id:
5944 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5945 case Builtin::BI__builtin_coro_promise:
5946 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5947 case Builtin::BI__builtin_coro_resume:
5948 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5949 return RValue::get(nullptr);
5950 case Builtin::BI__builtin_coro_frame:
5951 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5952 case Builtin::BI__builtin_coro_noop:
5953 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5954 case Builtin::BI__builtin_coro_free:
5955 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5956 case Builtin::BI__builtin_coro_destroy:
5957 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5958 return RValue::get(nullptr);
5959 case Builtin::BI__builtin_coro_done:
5960 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5961 case Builtin::BI__builtin_coro_alloc:
5962 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5963 case Builtin::BI__builtin_coro_begin:
5964 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5965 case Builtin::BI__builtin_coro_end:
5966 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5967 case Builtin::BI__builtin_coro_suspend:
5968 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5969 case Builtin::BI__builtin_coro_size:
5970 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5971 case Builtin::BI__builtin_coro_align:
5972 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5973
5974 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5975 case Builtin::BIread_pipe:
5976 case Builtin::BIwrite_pipe: {
5977 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5978 *Arg1 = EmitScalarExpr(E->getArg(1));
5979 CGOpenCLRuntime OpenCLRT(CGM);
5980 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5981 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5982
5983 // Type of the generic packet parameter.
5984 unsigned GenericAS =
5986 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5987
5988 // Testing which overloaded version we should generate the call for.
5989 if (2U == E->getNumArgs()) {
5990 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5991 : "__write_pipe_2";
5992 // Creating a generic function type to be able to call with any builtin or
5993 // user defined type.
5994 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5995 llvm::FunctionType *FTy = llvm::FunctionType::get(
5996 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5997 Value *ACast = Builder.CreateAddrSpaceCast(Arg1, I8PTy);
5998 return RValue::get(
6000 {Arg0, ACast, PacketSize, PacketAlign}));
6001 } else {
6002 assert(4 == E->getNumArgs() &&
6003 "Illegal number of parameters to pipe function");
6004 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
6005 : "__write_pipe_4";
6006
6007 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
6008 Int32Ty, Int32Ty};
6009 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
6010 *Arg3 = EmitScalarExpr(E->getArg(3));
6011 llvm::FunctionType *FTy = llvm::FunctionType::get(
6012 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6013 Value *ACast = Builder.CreateAddrSpaceCast(Arg3, I8PTy);
6014 // We know the third argument is an integer type, but we may need to cast
6015 // it to i32.
6016 if (Arg2->getType() != Int32Ty)
6017 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
6018 return RValue::get(
6020 {Arg0, Arg1, Arg2, ACast, PacketSize, PacketAlign}));
6021 }
6022 }
6023 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
6024 // functions
6025 case Builtin::BIreserve_read_pipe:
6026 case Builtin::BIreserve_write_pipe:
6027 case Builtin::BIwork_group_reserve_read_pipe:
6028 case Builtin::BIwork_group_reserve_write_pipe:
6029 case Builtin::BIsub_group_reserve_read_pipe:
6030 case Builtin::BIsub_group_reserve_write_pipe: {
6031 // Composing the mangled name for the function.
6032 const char *Name;
6033 if (BuiltinID == Builtin::BIreserve_read_pipe)
6034 Name = "__reserve_read_pipe";
6035 else if (BuiltinID == Builtin::BIreserve_write_pipe)
6036 Name = "__reserve_write_pipe";
6037 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
6038 Name = "__work_group_reserve_read_pipe";
6039 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
6040 Name = "__work_group_reserve_write_pipe";
6041 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
6042 Name = "__sub_group_reserve_read_pipe";
6043 else
6044 Name = "__sub_group_reserve_write_pipe";
6045
6046 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6047 *Arg1 = EmitScalarExpr(E->getArg(1));
6048 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
6049 CGOpenCLRuntime OpenCLRT(CGM);
6050 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6051 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6052
6053 // Building the generic function prototype.
6054 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
6055 llvm::FunctionType *FTy = llvm::FunctionType::get(
6056 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6057 // We know the second argument is an integer type, but we may need to cast
6058 // it to i32.
6059 if (Arg1->getType() != Int32Ty)
6060 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
6062 {Arg0, Arg1, PacketSize, PacketAlign}));
6063 }
6064 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
6065 // functions
6066 case Builtin::BIcommit_read_pipe:
6067 case Builtin::BIcommit_write_pipe:
6068 case Builtin::BIwork_group_commit_read_pipe:
6069 case Builtin::BIwork_group_commit_write_pipe:
6070 case Builtin::BIsub_group_commit_read_pipe:
6071 case Builtin::BIsub_group_commit_write_pipe: {
6072 const char *Name;
6073 if (BuiltinID == Builtin::BIcommit_read_pipe)
6074 Name = "__commit_read_pipe";
6075 else if (BuiltinID == Builtin::BIcommit_write_pipe)
6076 Name = "__commit_write_pipe";
6077 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
6078 Name = "__work_group_commit_read_pipe";
6079 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
6080 Name = "__work_group_commit_write_pipe";
6081 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
6082 Name = "__sub_group_commit_read_pipe";
6083 else
6084 Name = "__sub_group_commit_write_pipe";
6085
6086 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
6087 *Arg1 = EmitScalarExpr(E->getArg(1));
6088 CGOpenCLRuntime OpenCLRT(CGM);
6089 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6090 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6091
6092 // Building the generic function prototype.
6093 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
6094 llvm::FunctionType *FTy =
6095 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
6096 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6097
6099 {Arg0, Arg1, PacketSize, PacketAlign}));
6100 }
6101 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
6102 case Builtin::BIget_pipe_num_packets:
6103 case Builtin::BIget_pipe_max_packets: {
6104 const char *BaseName;
6105 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
6106 if (BuiltinID == Builtin::BIget_pipe_num_packets)
6107 BaseName = "__get_pipe_num_packets";
6108 else
6109 BaseName = "__get_pipe_max_packets";
6110 std::string Name = std::string(BaseName) +
6111 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
6112
6113 // Building the generic function prototype.
6114 Value *Arg0 = EmitScalarExpr(E->getArg(0));
6115 CGOpenCLRuntime OpenCLRT(CGM);
6116 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
6117 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
6118 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
6119 llvm::FunctionType *FTy = llvm::FunctionType::get(
6120 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6121
6123 {Arg0, PacketSize, PacketAlign}));
6124 }
6125
6126 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
6127 case Builtin::BIto_global:
6128 case Builtin::BIto_local:
6129 case Builtin::BIto_private: {
6130 auto Arg0 = EmitScalarExpr(E->getArg(0));
6131 auto NewArgT = llvm::PointerType::get(
6134 auto NewRetT = llvm::PointerType::get(
6138 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
6139 llvm::Value *NewArg;
6140 if (Arg0->getType()->getPointerAddressSpace() !=
6141 NewArgT->getPointerAddressSpace())
6142 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
6143 else
6144 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
6145 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
6146 auto NewCall =
6147 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
6148 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
6149 ConvertType(E->getType())));
6150 }
6151
6152 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
6153 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
6154 // The code below expands the builtin call to a call to one of the following
6155 // functions that an OpenCL runtime library will have to provide:
6156 // __enqueue_kernel_basic
6157 // __enqueue_kernel_varargs
6158 // __enqueue_kernel_basic_events
6159 // __enqueue_kernel_events_varargs
6160 case Builtin::BIenqueue_kernel: {
6161 StringRef Name; // Generated function call name
6162 unsigned NumArgs = E->getNumArgs();
6163
6164 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
6165 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6166 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6167
6168 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
6169 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
6170 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
6171 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
6172 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
6173
6174 if (NumArgs == 4) {
6175 // The most basic form of the call with parameters:
6176 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
6177 Name = "__enqueue_kernel_basic";
6178 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
6179 GenericVoidPtrTy};
6180 llvm::FunctionType *FTy = llvm::FunctionType::get(
6181 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6182
6183 auto Info =
6184 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6185 llvm::Value *Kernel =
6186 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6187 llvm::Value *Block =
6188 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6189
6190 auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),
6191 {Queue, Flags, Range, Kernel, Block});
6192 return RValue::get(RTCall);
6193 }
6194 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
6195
6196 // Create a temporary array to hold the sizes of local pointer arguments
6197 // for the block. \p First is the position of the first size argument.
6198 auto CreateArrayForSizeVar = [=](unsigned First)
6199 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
6200 llvm::APInt ArraySize(32, NumArgs - First);
6202 getContext().getSizeType(), ArraySize, nullptr,
6204 /*IndexTypeQuals=*/0);
6205 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
6206 llvm::Value *TmpPtr = Tmp.getPointer();
6207 // The EmitLifetime* pair expect a naked Alloca as their last argument,
6208 // however for cases where the default AS is not the Alloca AS, Tmp is
6209 // actually the Alloca ascasted to the default AS, hence the
6210 // stripPointerCasts()
6211 llvm::Value *Alloca = TmpPtr->stripPointerCasts();
6212 llvm::Value *TmpSize = EmitLifetimeStart(
6213 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6214 llvm::Value *ElemPtr;
6215 // Each of the following arguments specifies the size of the corresponding
6216 // argument passed to the enqueued block.
6217 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
6218 for (unsigned I = First; I < NumArgs; ++I) {
6219 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
6220 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
6221 {Zero, Index});
6222 if (I == First)
6223 ElemPtr = GEP;
6224 auto *V =
6225 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
6227 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
6228 }
6229 // Return the Alloca itself rather than a potential ascast as this is only
6230 // used by the paired EmitLifetimeEnd.
6231 return std::tie(ElemPtr, TmpSize, Alloca);
6232 };
6233
6234 // Could have events and/or varargs.
6235 if (E->getArg(3)->getType()->isBlockPointerType()) {
6236 // No events passed, but has variadic arguments.
6237 Name = "__enqueue_kernel_varargs";
6238 auto Info =
6239 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
6240 llvm::Value *Kernel =
6241 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6242 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6243 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6244 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
6245
6246 // Create a vector of the arguments, as well as a constant value to
6247 // express to the runtime the number of variadic arguments.
6248 llvm::Value *const Args[] = {Queue, Flags,
6249 Range, Kernel,
6250 Block, ConstantInt::get(IntTy, NumArgs - 4),
6251 ElemPtr};
6252 llvm::Type *const ArgTys[] = {
6253 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
6254 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
6255
6256 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
6257 auto Call = RValue::get(
6258 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
6259 if (TmpSize)
6260 EmitLifetimeEnd(TmpSize, TmpPtr);
6261 return Call;
6262 }
6263 // Any calls now have event arguments passed.
6264 if (NumArgs >= 7) {
6265 llvm::PointerType *PtrTy = llvm::PointerType::get(
6268
6269 llvm::Value *NumEvents =
6270 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
6271
6272 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
6273 // to be a null pointer constant (including `0` literal), we can take it
6274 // into account and emit null pointer directly.
6275 llvm::Value *EventWaitList = nullptr;
6276 if (E->getArg(4)->isNullPointerConstant(
6278 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
6279 } else {
6280 EventWaitList =
6281 E->getArg(4)->getType()->isArrayType()
6282 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
6283 : EmitScalarExpr(E->getArg(4));
6284 // Convert to generic address space.
6285 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
6286 }
6287 llvm::Value *EventRet = nullptr;
6288 if (E->getArg(5)->isNullPointerConstant(
6290 EventRet = llvm::ConstantPointerNull::get(PtrTy);
6291 } else {
6292 EventRet =
6293 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
6294 }
6295
6296 auto Info =
6297 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
6298 llvm::Value *Kernel =
6299 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6300 llvm::Value *Block =
6301 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6302
6303 std::vector<llvm::Type *> ArgTys = {
6304 QueueTy, Int32Ty, RangeTy, Int32Ty,
6305 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
6306
6307 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
6308 NumEvents, EventWaitList, EventRet,
6309 Kernel, Block};
6310
6311 if (NumArgs == 7) {
6312 // Has events but no variadics.
6313 Name = "__enqueue_kernel_basic_events";
6314 llvm::FunctionType *FTy = llvm::FunctionType::get(
6315 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6316 return RValue::get(
6319 }
6320 // Has event info and variadics
6321 // Pass the number of variadics to the runtime function too.
6322 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
6323 ArgTys.push_back(Int32Ty);
6324 Name = "__enqueue_kernel_events_varargs";
6325
6326 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
6327 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
6328 Args.push_back(ElemPtr);
6329 ArgTys.push_back(ElemPtr->getType());
6330
6331 llvm::FunctionType *FTy = llvm::FunctionType::get(
6332 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
6333 auto Call =
6336 if (TmpSize)
6337 EmitLifetimeEnd(TmpSize, TmpPtr);
6338 return Call;
6339 }
6340 llvm_unreachable("Unexpected enqueue_kernel signature");
6341 }
6342 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
6343 // parameter.
6344 case Builtin::BIget_kernel_work_group_size: {
6345 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6346 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6347 auto Info =
6348 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6349 Value *Kernel =
6350 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6351 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6354 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6355 false),
6356 "__get_kernel_work_group_size_impl"),
6357 {Kernel, Arg}));
6358 }
6359 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
6360 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6361 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6362 auto Info =
6363 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
6364 Value *Kernel =
6365 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6366 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6369 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
6370 false),
6371 "__get_kernel_preferred_work_group_size_multiple_impl"),
6372 {Kernel, Arg}));
6373 }
6374 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
6375 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
6376 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
6377 getContext().getTargetAddressSpace(LangAS::opencl_generic));
6378 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
6379 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
6380 auto Info =
6381 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
6382 Value *Kernel =
6383 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
6384 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
6385 const char *Name =
6386 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
6387 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
6388 : "__get_kernel_sub_group_count_for_ndrange_impl";
6391 llvm::FunctionType::get(
6392 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
6393 false),
6394 Name),
6395 {NDRange, Kernel, Block}));
6396 }
6397 case Builtin::BI__builtin_store_half:
6398 case Builtin::BI__builtin_store_halff: {
6399 Value *Val = EmitScalarExpr(E->getArg(0));
6401 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
6402 Builder.CreateStore(HalfVal, Address);
6403 return RValue::get(nullptr);
6404 }
6405 case Builtin::BI__builtin_load_half: {
6407 Value *HalfVal = Builder.CreateLoad(Address);
6408 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
6409 }
6410 case Builtin::BI__builtin_load_halff: {
6412 Value *HalfVal = Builder.CreateLoad(Address);
6413 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
6414 }
6415 case Builtin::BI__builtin_printf:
6416 case Builtin::BIprintf:
6417 if (getTarget().getTriple().isNVPTX() ||
6418 getTarget().getTriple().isAMDGCN() ||
6419 (getTarget().getTriple().isSPIRV() &&
6420 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
6421 if (getTarget().getTriple().isNVPTX())
6423 if ((getTarget().getTriple().isAMDGCN() ||
6424 getTarget().getTriple().isSPIRV()) &&
6425 getLangOpts().HIP)
6427 }
6428
6429 break;
6430 case Builtin::BI__builtin_canonicalize:
6431 case Builtin::BI__builtin_canonicalizef:
6432 case Builtin::BI__builtin_canonicalizef16:
6433 case Builtin::BI__builtin_canonicalizel:
6434 return RValue::get(
6435 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
6436
6437 case Builtin::BI__builtin_thread_pointer: {
6438 if (!getContext().getTargetInfo().isTLSSupported())
6439 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
6440 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6441 break;
6442 }
6443 case Builtin::BI__builtin_os_log_format:
6444 return emitBuiltinOSLogFormat(*E);
6445
6446 case Builtin::BI__xray_customevent: {
6448 return RValue::getIgnored();
6449
6452 return RValue::getIgnored();
6453
6454 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6455 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6456 return RValue::getIgnored();
6457
6458 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6459 auto FTy = F->getFunctionType();
6460 auto Arg0 = E->getArg(0);
6461 auto Arg0Val = EmitScalarExpr(Arg0);
6462 auto Arg0Ty = Arg0->getType();
6463 auto PTy0 = FTy->getParamType(0);
6464 if (PTy0 != Arg0Val->getType()) {
6465 if (Arg0Ty->isArrayType())
6466 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6467 else
6468 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6469 }
6470 auto Arg1 = EmitScalarExpr(E->getArg(1));
6471 auto PTy1 = FTy->getParamType(1);
6472 if (PTy1 != Arg1->getType())
6473 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6474 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6475 }
6476
6477 case Builtin::BI__xray_typedevent: {
6478 // TODO: There should be a way to always emit events even if the current
6479 // function is not instrumented. Losing events in a stream can cripple
6480 // a trace.
6482 return RValue::getIgnored();
6483
6486 return RValue::getIgnored();
6487
6488 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6489 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6490 return RValue::getIgnored();
6491
6492 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6493 auto FTy = F->getFunctionType();
6494 auto Arg0 = EmitScalarExpr(E->getArg(0));
6495 auto PTy0 = FTy->getParamType(0);
6496 if (PTy0 != Arg0->getType())
6497 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6498 auto Arg1 = E->getArg(1);
6499 auto Arg1Val = EmitScalarExpr(Arg1);
6500 auto Arg1Ty = Arg1->getType();
6501 auto PTy1 = FTy->getParamType(1);
6502 if (PTy1 != Arg1Val->getType()) {
6503 if (Arg1Ty->isArrayType())
6504 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6505 else
6506 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6507 }
6508 auto Arg2 = EmitScalarExpr(E->getArg(2));
6509 auto PTy2 = FTy->getParamType(2);
6510 if (PTy2 != Arg2->getType())
6511 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6512 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6513 }
6514
6515 case Builtin::BI__builtin_ms_va_start:
6516 case Builtin::BI__builtin_ms_va_end:
6517 return RValue::get(
6519 BuiltinID == Builtin::BI__builtin_ms_va_start));
6520
6521 case Builtin::BI__builtin_ms_va_copy: {
6522 // Lower this manually. We can't reliably determine whether or not any
6523 // given va_copy() is for a Win64 va_list from the calling convention
6524 // alone, because it's legal to do this from a System V ABI function.
6525 // With opaque pointer types, we won't have enough information in LLVM
6526 // IR to determine this from the argument types, either. Best to do it
6527 // now, while we have enough information.
6528 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6529 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6530
6531 DestAddr = DestAddr.withElementType(Int8PtrTy);
6532 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6533
6534 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6535 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6536 }
6537
6538 case Builtin::BI__builtin_get_device_side_mangled_name: {
6539 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6540 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6541 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6542 return RValue::get(Str.getPointer());
6543 }
6544 }
6545
6546 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6547 // the call using the normal call path, but using the unmangled
6548 // version of the function name.
6549 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6550 return emitLibraryCall(*this, FD, E,
6551 CGM.getBuiltinLibFunction(FD, BuiltinID));
6552
6553 // If this is a predefined lib function (e.g. malloc), emit the call
6554 // using exactly the normal call path.
6555 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6556 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6557
6558 // Check that a call to a target specific builtin has the correct target
6559 // features.
6560 // This is down here to avoid non-target specific builtins, however, if
6561 // generic builtins start to require generic target features then we
6562 // can move this up to the beginning of the function.
6564
6565 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6566 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6567
6568 // See if we have a target specific intrinsic.
6569 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6570 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6571 StringRef Prefix =
6572 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6573 if (!Prefix.empty()) {
6574 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6575 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6576 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6577 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6578 // NOTE we don't need to perform a compatibility flag check here since the
6579 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6580 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6581 if (IntrinsicID == Intrinsic::not_intrinsic)
6582 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6583 }
6584
6585 if (IntrinsicID != Intrinsic::not_intrinsic) {
6587
6588 // Find out if any arguments are required to be integer constant
6589 // expressions.
6590 unsigned ICEArguments = 0;
6592 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6593 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6594
6595 Function *F = CGM.getIntrinsic(IntrinsicID);
6596 llvm::FunctionType *FTy = F->getFunctionType();
6597
6598 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6599 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6600 // If the intrinsic arg type is different from the builtin arg type
6601 // we need to do a bit cast.
6602 llvm::Type *PTy = FTy->getParamType(i);
6603 if (PTy != ArgValue->getType()) {
6604 // XXX - vector of pointers?
6605 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6606 if (PtrTy->getAddressSpace() !=
6607 ArgValue->getType()->getPointerAddressSpace()) {
6608 ArgValue = Builder.CreateAddrSpaceCast(
6609 ArgValue, llvm::PointerType::get(getLLVMContext(),
6610 PtrTy->getAddressSpace()));
6611 }
6612 }
6613
6614 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6615 // in amx intrinsics.
6616 if (PTy->isX86_AMXTy())
6617 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6618 {ArgValue->getType()}, {ArgValue});
6619 else
6620 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6621 }
6622
6623 Args.push_back(ArgValue);
6624 }
6625
6626 Value *V = Builder.CreateCall(F, Args);
6627 QualType BuiltinRetType = E->getType();
6628
6629 llvm::Type *RetTy = VoidTy;
6630 if (!BuiltinRetType->isVoidType())
6631 RetTy = ConvertType(BuiltinRetType);
6632
6633 if (RetTy != V->getType()) {
6634 // XXX - vector of pointers?
6635 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6636 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6638 V, llvm::PointerType::get(getLLVMContext(),
6639 PtrTy->getAddressSpace()));
6640 }
6641 }
6642
6643 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6644 // in amx intrinsics.
6645 if (V->getType()->isX86_AMXTy())
6646 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6647 {V});
6648 else
6649 V = Builder.CreateBitCast(V, RetTy);
6650 }
6651
6652 if (RetTy->isVoidTy())
6653 return RValue::get(nullptr);
6654
6655 return RValue::get(V);
6656 }
6657
6658 // Some target-specific builtins can have aggregate return values, e.g.
6659 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6660 // ReturnValue to be non-null, so that the target-specific emission code can
6661 // always just emit into it.
6663 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6664 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6665 ReturnValue = ReturnValueSlot(DestPtr, false);
6666 }
6667
6668 // Now see if we can emit a target-specific builtin.
6669 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6670 switch (EvalKind) {
6671 case TEK_Scalar:
6672 if (V->getType()->isVoidTy())
6673 return RValue::get(nullptr);
6674 return RValue::get(V);
6675 case TEK_Aggregate:
6676 return RValue::getAggregate(ReturnValue.getAddress(),
6677 ReturnValue.isVolatile());
6678 case TEK_Complex:
6679 llvm_unreachable("No current target builtin returns complex");
6680 }
6681 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6682 }
6683
6684 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6685 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E, ReturnValue)) {
6686 switch (EvalKind) {
6687 case TEK_Scalar:
6688 if (V->getType()->isVoidTy())
6689 return RValue::get(nullptr);
6690 return RValue::get(V);
6691 case TEK_Aggregate:
6692 return RValue::getAggregate(ReturnValue.getAddress(),
6693 ReturnValue.isVolatile());
6694 case TEK_Complex:
6695 llvm_unreachable("No current hlsl builtin returns complex");
6696 }
6697 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6698 }
6699
6700 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6701 return EmitHipStdParUnsupportedBuiltin(this, FD);
6702
6703 ErrorUnsupported(E, "builtin function");
6704
6705 // Unknown builtin, for now just dump it out and return undef.
6706 return GetUndefRValue(E->getType());
6707}
6708
6710 unsigned BuiltinID, const CallExpr *E,
6711 ReturnValueSlot ReturnValue,
6712 llvm::Triple::ArchType Arch) {
6713 // When compiling in HipStdPar mode we have to be conservative in rejecting
6714 // target specific features in the FE, and defer the possible error to the
6715 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6716 // referenced by an accelerator executable function, we emit an error.
6717 // Returning nullptr here leads to the builtin being handled in
6718 // EmitStdParUnsupportedBuiltin.
6719 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6720 Arch != CGF->getTarget().getTriple().getArch())
6721 return nullptr;
6722
6723 switch (Arch) {
6724 case llvm::Triple::arm:
6725 case llvm::Triple::armeb:
6726 case llvm::Triple::thumb:
6727 case llvm::Triple::thumbeb:
6728 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6729 case llvm::Triple::aarch64:
6730 case llvm::Triple::aarch64_32:
6731 case llvm::Triple::aarch64_be:
6732 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6733 case llvm::Triple::bpfeb:
6734 case llvm::Triple::bpfel:
6735 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6736 case llvm::Triple::x86:
6737 case llvm::Triple::x86_64:
6738 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6739 case llvm::Triple::ppc:
6740 case llvm::Triple::ppcle:
6741 case llvm::Triple::ppc64:
6742 case llvm::Triple::ppc64le:
6743 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6744 case llvm::Triple::r600:
6745 case llvm::Triple::amdgcn:
6746 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6747 case llvm::Triple::systemz:
6748 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6749 case llvm::Triple::nvptx:
6750 case llvm::Triple::nvptx64:
6751 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6752 case llvm::Triple::wasm32:
6753 case llvm::Triple::wasm64:
6754 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6755 case llvm::Triple::hexagon:
6756 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6757 case llvm::Triple::riscv32:
6758 case llvm::Triple::riscv64:
6759 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6760 case llvm::Triple::spirv64:
6761 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6762 return nullptr;
6763 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6764 default:
6765 return nullptr;
6766 }
6767}
6768
6770 const CallExpr *E,
6771 ReturnValueSlot ReturnValue) {
6772 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6773 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6775 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6776 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6777 }
6778
6779 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6780 getTarget().getTriple().getArch());
6781}
6782
6783static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6784 NeonTypeFlags TypeFlags,
6785 bool HasLegalHalfType = true,
6786 bool V1Ty = false,
6787 bool AllowBFloatArgsAndRet = true) {
6788 int IsQuad = TypeFlags.isQuad();
6789 switch (TypeFlags.getEltType()) {
6792 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6795 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6797 if (AllowBFloatArgsAndRet)
6798 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6799 else
6800 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6802 if (HasLegalHalfType)
6803 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6804 else
6805 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6807 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6810 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6812 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6813 // There is a lot of i128 and f128 API missing.
6814 // so we use v16i8 to represent poly128 and get pattern matched.
6815 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6817 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6819 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6820 }
6821 llvm_unreachable("Unknown vector element type!");
6822}
6823
6824static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6825 NeonTypeFlags IntTypeFlags) {
6826 int IsQuad = IntTypeFlags.isQuad();
6827 switch (IntTypeFlags.getEltType()) {
6829 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6831 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6833 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6834 default:
6835 llvm_unreachable("Type can't be converted to floating-point!");
6836 }
6837}
6838
6840 const ElementCount &Count) {
6841 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6842 return Builder.CreateShuffleVector(V, V, SV, "lane");
6843}
6844
6846 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6847 return EmitNeonSplat(V, C, EC);
6848}
6849
6851 const char *name,
6852 unsigned shift, bool rightshift) {
6853 unsigned j = 0;
6854 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6855 ai != ae; ++ai, ++j) {
6856 if (F->isConstrainedFPIntrinsic())
6857 if (ai->getType()->isMetadataTy())
6858 continue;
6859 if (shift > 0 && shift == j)
6860 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6861 else
6862 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6863 }
6864
6865 if (F->isConstrainedFPIntrinsic())
6866 return Builder.CreateConstrainedFPCall(F, Ops, name);
6867 else
6868 return Builder.CreateCall(F, Ops, name);
6869}
6870
6872 bool neg) {
6873 int SV = cast<ConstantInt>(V)->getSExtValue();
6874 return ConstantInt::get(Ty, neg ? -SV : SV);
6875}
6876
6877// Right-shift a vector by a constant.
6879 llvm::Type *Ty, bool usgn,
6880 const char *name) {
6881 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6882
6883 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6884 int EltSize = VTy->getScalarSizeInBits();
6885
6886 Vec = Builder.CreateBitCast(Vec, Ty);
6887
6888 // lshr/ashr are undefined when the shift amount is equal to the vector
6889 // element size.
6890 if (ShiftAmt == EltSize) {
6891 if (usgn) {
6892 // Right-shifting an unsigned value by its size yields 0.
6893 return llvm::ConstantAggregateZero::get(VTy);
6894 } else {
6895 // Right-shifting a signed value by its size is equivalent
6896 // to a shift of size-1.
6897 --ShiftAmt;
6898 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6899 }
6900 }
6901
6902 Shift = EmitNeonShiftVector(Shift, Ty, false);
6903 if (usgn)
6904 return Builder.CreateLShr(Vec, Shift, name);
6905 else
6906 return Builder.CreateAShr(Vec, Shift, name);
6907}
6908
6909enum {
6910 AddRetType = (1 << 0),
6911 Add1ArgType = (1 << 1),
6912 Add2ArgTypes = (1 << 2),
6913
6916
6918 UnsignedAlts = (1 << 6),
6919
6922
6930
6931namespace {
6932struct ARMVectorIntrinsicInfo {
6933 const char *NameHint;
6934 unsigned BuiltinID;
6935 unsigned LLVMIntrinsic;
6936 unsigned AltLLVMIntrinsic;
6938
6939 bool operator<(unsigned RHSBuiltinID) const {
6940 return BuiltinID < RHSBuiltinID;
6941 }
6942 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6943 return BuiltinID < TE.BuiltinID;
6944 }
6945};
6946} // end anonymous namespace
6947
6948#define NEONMAP0(NameBase) \
6949 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6950
6951#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6952 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6953 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6954
6955#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6956 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6957 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6958 TypeModifier }
6959
6960static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6961 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6962 NEONMAP0(splat_lane_v),
6963 NEONMAP0(splat_laneq_v),
6964 NEONMAP0(splatq_lane_v),
6965 NEONMAP0(splatq_laneq_v),
6966 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6967 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6968 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6969 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6970 NEONMAP0(vadd_v),
6971 NEONMAP0(vaddhn_v),
6972 NEONMAP0(vaddq_v),
6973 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6974 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6975 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6976 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6977 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6978 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6979 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6980 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6981 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6982 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6983 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6984 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6985 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6986 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6987 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6988 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6989 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6990 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6991 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6992 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6993 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6994 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6995 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6996 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6997 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6998 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6999 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
7000 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
7001 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
7002 NEONMAP0(vceqz_v),
7003 NEONMAP0(vceqzq_v),
7004 NEONMAP0(vcgez_v),
7005 NEONMAP0(vcgezq_v),
7006 NEONMAP0(vcgtz_v),
7007 NEONMAP0(vcgtzq_v),
7008 NEONMAP0(vclez_v),
7009 NEONMAP0(vclezq_v),
7010 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
7011 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
7012 NEONMAP0(vcltz_v),
7013 NEONMAP0(vcltzq_v),
7014 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7015 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7016 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7017 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7018 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
7019 NEONMAP0(vcvt_f16_s16),
7020 NEONMAP0(vcvt_f16_u16),
7021 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
7022 NEONMAP0(vcvt_f32_v),
7023 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7024 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7025 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7026 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7027 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7028 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7029 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7030 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7031 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7032 NEONMAP0(vcvt_s16_f16),
7033 NEONMAP0(vcvt_s32_v),
7034 NEONMAP0(vcvt_s64_v),
7035 NEONMAP0(vcvt_u16_f16),
7036 NEONMAP0(vcvt_u32_v),
7037 NEONMAP0(vcvt_u64_v),
7038 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
7039 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
7040 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
7041 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
7042 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
7043 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
7044 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
7045 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
7046 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
7047 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
7048 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
7049 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
7050 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
7051 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
7052 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
7053 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
7054 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
7055 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
7056 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
7057 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
7058 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
7059 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
7060 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
7061 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
7062 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
7063 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
7064 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
7065 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
7066 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
7067 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
7068 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
7069 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
7070 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
7071 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
7072 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
7073 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
7074 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
7075 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
7076 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
7077 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
7078 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
7079 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
7080 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
7081 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
7082 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
7083 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
7084 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
7085 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
7086 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
7087 NEONMAP0(vcvtq_f16_s16),
7088 NEONMAP0(vcvtq_f16_u16),
7089 NEONMAP0(vcvtq_f32_v),
7090 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
7091 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
7092 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
7093 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
7094 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
7095 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
7096 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
7097 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
7098 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
7099 NEONMAP0(vcvtq_s16_f16),
7100 NEONMAP0(vcvtq_s32_v),
7101 NEONMAP0(vcvtq_s64_v),
7102 NEONMAP0(vcvtq_u16_f16),
7103 NEONMAP0(vcvtq_u32_v),
7104 NEONMAP0(vcvtq_u64_v),
7105 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
7106 NEONMAP1(vdot_u32, arm_neon_udot, 0),
7107 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
7108 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
7109 NEONMAP0(vext_v),
7110 NEONMAP0(vextq_v),
7111 NEONMAP0(vfma_v),
7112 NEONMAP0(vfmaq_v),
7113 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7114 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
7115 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7116 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
7117 NEONMAP0(vld1_dup_v),
7118 NEONMAP1(vld1_v, arm_neon_vld1, 0),
7119 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
7120 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
7121 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
7122 NEONMAP0(vld1q_dup_v),
7123 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
7124 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
7125 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
7126 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
7127 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
7128 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
7129 NEONMAP1(vld2_v, arm_neon_vld2, 0),
7130 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
7131 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
7132 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
7133 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
7134 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
7135 NEONMAP1(vld3_v, arm_neon_vld3, 0),
7136 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
7137 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
7138 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
7139 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
7140 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
7141 NEONMAP1(vld4_v, arm_neon_vld4, 0),
7142 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
7143 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
7144 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
7145 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7146 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
7147 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
7148 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
7149 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7150 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
7151 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
7152 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
7153 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
7154 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
7155 NEONMAP0(vmovl_v),
7156 NEONMAP0(vmovn_v),
7157 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
7158 NEONMAP0(vmull_v),
7159 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
7160 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7161 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
7162 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
7163 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7164 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
7165 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
7166 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
7167 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
7168 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
7169 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
7170 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7171 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
7172 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
7173 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
7174 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
7175 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
7176 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
7177 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
7178 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
7179 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
7180 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
7181 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
7182 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
7183 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
7184 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
7185 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
7186 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
7187 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
7188 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
7189 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
7190 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
7191 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7192 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
7193 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7194 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7195 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
7196 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
7197 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
7198 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
7199 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7200 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
7201 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
7202 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7203 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
7204 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
7205 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
7206 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7207 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
7208 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
7209 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
7210 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
7211 NEONMAP0(vrndi_v),
7212 NEONMAP0(vrndiq_v),
7213 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
7214 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
7215 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
7216 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
7217 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
7218 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
7219 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
7220 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
7221 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
7222 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7223 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
7224 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7225 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
7226 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7227 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
7228 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
7229 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
7230 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
7231 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
7232 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
7233 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
7234 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
7235 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
7236 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
7237 NEONMAP0(vshl_n_v),
7238 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7239 NEONMAP0(vshll_n_v),
7240 NEONMAP0(vshlq_n_v),
7241 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
7242 NEONMAP0(vshr_n_v),
7243 NEONMAP0(vshrn_n_v),
7244 NEONMAP0(vshrq_n_v),
7245 NEONMAP1(vst1_v, arm_neon_vst1, 0),
7246 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
7247 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
7248 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
7249 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
7250 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
7251 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
7252 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
7253 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
7254 NEONMAP1(vst2_v, arm_neon_vst2, 0),
7255 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
7256 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
7257 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
7258 NEONMAP1(vst3_v, arm_neon_vst3, 0),
7259 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
7260 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
7261 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
7262 NEONMAP1(vst4_v, arm_neon_vst4, 0),
7263 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
7264 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
7265 NEONMAP0(vsubhn_v),
7266 NEONMAP0(vtrn_v),
7267 NEONMAP0(vtrnq_v),
7268 NEONMAP0(vtst_v),
7269 NEONMAP0(vtstq_v),
7270 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
7271 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
7272 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
7273 NEONMAP0(vuzp_v),
7274 NEONMAP0(vuzpq_v),
7275 NEONMAP0(vzip_v),
7276 NEONMAP0(vzipq_v)
7277};
7278
7279static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
7280 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
7281 NEONMAP0(splat_lane_v),
7282 NEONMAP0(splat_laneq_v),
7283 NEONMAP0(splatq_lane_v),
7284 NEONMAP0(splatq_laneq_v),
7285 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
7286 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
7287 NEONMAP0(vadd_v),
7288 NEONMAP0(vaddhn_v),
7289 NEONMAP0(vaddq_p128),
7290 NEONMAP0(vaddq_v),
7291 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
7292 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
7293 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
7294 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
7295 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7296 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7297 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7298 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7299 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7300 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7301 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7302 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
7303 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
7304 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
7305 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
7306 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
7307 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
7308 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7309 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7310 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7311 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7312 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
7313 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
7314 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
7315 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
7316 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
7317 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
7318 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
7319 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
7320 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
7321 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
7322 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
7323 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
7324 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
7325 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
7326 NEONMAP0(vceqz_v),
7327 NEONMAP0(vceqzq_v),
7328 NEONMAP0(vcgez_v),
7329 NEONMAP0(vcgezq_v),
7330 NEONMAP0(vcgtz_v),
7331 NEONMAP0(vcgtzq_v),
7332 NEONMAP0(vclez_v),
7333 NEONMAP0(vclezq_v),
7334 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
7335 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
7336 NEONMAP0(vcltz_v),
7337 NEONMAP0(vcltzq_v),
7338 NEONMAP1(vclz_v, ctlz, Add1ArgType),
7339 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
7340 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7341 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7342 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7343 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7344 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7345 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7346 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7347 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7348 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
7349 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
7350 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
7351 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
7352 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
7353 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
7354 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
7355 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
7356 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
7357 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
7358 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
7359 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
7360 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
7361 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
7362 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
7363 NEONMAP0(vcvt_f16_s16),
7364 NEONMAP0(vcvt_f16_u16),
7365 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
7366 NEONMAP0(vcvt_f32_v),
7367 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7368 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7369 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7370 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7371 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7372 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7373 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7374 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7375 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7376 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7377 NEONMAP0(vcvtq_f16_s16),
7378 NEONMAP0(vcvtq_f16_u16),
7379 NEONMAP0(vcvtq_f32_v),
7380 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
7381 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
7382 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
7383 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7384 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
7385 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
7386 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
7387 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
7388 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
7389 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
7390 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
7391 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
7392 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
7393 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
7394 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
7395 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
7396 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7397 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7398 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7399 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7400 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7401 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7402 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7403 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
7404 NEONMAP0(vext_v),
7405 NEONMAP0(vextq_v),
7406 NEONMAP0(vfma_v),
7407 NEONMAP0(vfmaq_v),
7408 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
7409 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
7410 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
7411 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
7412 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
7413 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
7414 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
7415 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
7416 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7417 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
7418 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7419 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
7420 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
7421 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
7422 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
7423 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
7424 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
7425 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
7426 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
7427 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
7428 NEONMAP0(vmovl_v),
7429 NEONMAP0(vmovn_v),
7430 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
7431 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
7432 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
7433 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7434 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
7435 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
7436 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
7437 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
7438 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7439 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
7440 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
7441 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
7442 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
7443 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7444 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
7445 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
7446 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
7447 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
7448 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
7449 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
7450 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
7451 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
7452 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7453 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7454 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7455 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7456 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7457 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7458 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7459 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7460 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7461 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7462 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7463 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7464 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7465 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7466 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7467 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7468 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7469 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7470 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7471 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7472 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7473 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7474 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7475 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7476 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7477 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7478 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7479 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7480 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7481 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7482 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7483 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7484 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7485 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7486 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7487 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7488 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7489 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7490 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7491 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7492 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7493 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7494 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7495 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7496 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7497 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7498 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7499 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7500 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7501 NEONMAP0(vrndi_v),
7502 NEONMAP0(vrndiq_v),
7503 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7504 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7505 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7506 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7507 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7508 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7509 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7510 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7511 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7512 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7513 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7514 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7515 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7516 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7517 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7518 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7519 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7520 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7521 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7522 NEONMAP0(vshl_n_v),
7523 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7524 NEONMAP0(vshll_n_v),
7525 NEONMAP0(vshlq_n_v),
7526 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7527 NEONMAP0(vshr_n_v),
7528 NEONMAP0(vshrn_n_v),
7529 NEONMAP0(vshrq_n_v),
7530 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7531 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7532 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7533 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7534 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7535 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7536 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7537 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7538 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7539 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7540 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7541 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7542 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7543 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7544 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7545 NEONMAP0(vsubhn_v),
7546 NEONMAP0(vtst_v),
7547 NEONMAP0(vtstq_v),
7548 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7549 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7550 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7551 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7552};
7553
7554static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7555 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7556 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7557 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7558 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7559 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7560 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7561 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7562 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7563 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7564 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7565 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7566 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7567 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7568 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7569 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7570 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7571 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7572 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7573 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7574 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7575 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7576 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7577 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7578 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7579 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7580 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7581 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7582 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7583 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7584 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7585 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7586 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7587 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7588 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7589 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7590 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7591 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7592 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7593 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7594 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7595 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7596 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7597 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7598 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7599 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7600 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7601 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7602 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7603 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7604 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7605 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7606 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7607 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7608 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7609 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7610 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7611 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7612 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7613 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7614 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7615 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7616 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7617 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7618 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7619 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7620 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7621 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7622 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7623 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7624 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7625 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7626 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7627 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7628 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7629 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7630 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7631 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7632 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7633 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7634 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7635 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7636 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7637 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7638 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7639 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7640 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7641 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7642 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7643 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7644 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7645 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7646 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7647 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7648 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7649 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7650 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7651 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7652 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7653 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7654 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7655 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7656 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7657 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7658 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7659 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7660 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7661 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7662 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7663 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7664 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7665 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7666 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7667 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7668 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7669 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7670 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7671 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7672 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7673 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7674 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7675 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7676 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7677 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7678 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7679 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7680 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7681 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7682 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7683 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7684 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7685 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7686 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7687 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7688 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7689 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7690 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7691 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7692 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7693 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7694 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7695 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7696 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7697 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7698 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7699 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7700 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7701 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7702 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7703 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7704 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7705 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7706 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7707 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7708 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7709 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7710 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7711 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7712 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7713 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7714 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7715 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7716 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7717 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7718 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7719 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7720 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7721 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7722 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7723 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7724 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7725 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7726 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7727 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7728 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7729 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7730 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7731 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7732 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7733 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7734 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7735 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7736 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7737 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7738 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7739 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7740 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7741 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7742 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7743 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7744 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7745 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7746 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7747 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7748 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7749 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7750 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7751 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7752 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7753 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7754 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7755 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7756 // FP16 scalar intrinisics go here.
7757 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7758 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7759 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7760 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7761 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7762 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7763 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7764 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7765 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7766 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7767 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7768 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7769 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7770 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7771 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7772 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7773 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7774 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7775 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7776 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7777 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7778 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7779 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7780 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7781 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7782 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7783 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7784 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7785 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7786 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7787 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7788 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7789 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7790 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7791};
7792
7793// Some intrinsics are equivalent for codegen.
7794static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7795 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7796 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7797 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7798 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7799 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7800 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7801 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7802 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7803 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7804 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7805 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7806 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7807 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7808 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7809 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7810 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7811 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7812 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7813 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7814 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7815 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7816 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7817 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7818 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7819 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7820 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7821 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7822 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7823 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7824 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7825 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7826 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7827 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7828 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7829 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7830 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7831 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7832 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7833 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7834 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7835 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7836 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7837 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7838 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7839 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7840 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7841 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7842 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7843 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7844 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7845 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7846 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7847 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7848 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7849 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7850 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7851 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7852 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7853 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7854 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7855 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7856 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7857 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7858 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7859 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7860 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7861 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7862 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7863 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7864 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7865 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7866 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7867 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7868 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7869 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7870 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7871 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7872 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7873 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7874 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7875 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7876 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7877 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7878 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7879 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7880 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7881 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7882 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7883 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7884 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7885 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7886 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7887 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7888 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7889 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7890 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7891 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7892 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7893 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7894 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7895 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7896 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7897 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7898 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7899 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7900 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7901 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7902 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7903 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7904 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7905 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7906 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7907 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7908 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7909 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7910 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7911 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7912 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7913 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7914 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7915 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7916 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7917 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7918 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7919 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7920 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7921 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7922 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7923 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7924 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7925 // arbitrary one to be handled as tha canonical variation.
7926 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7927 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7928 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7929 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7930 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7931 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7932 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7933 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7934 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7935 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7936 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7937 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7938};
7939
7940#undef NEONMAP0
7941#undef NEONMAP1
7942#undef NEONMAP2
7943
7944#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7945 { \
7946 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7947 TypeModifier \
7948 }
7949
7950#define SVEMAP2(NameBase, TypeModifier) \
7951 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7952static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7953#define GET_SVE_LLVM_INTRINSIC_MAP
7954#include "clang/Basic/arm_sve_builtin_cg.inc"
7955#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7956#undef GET_SVE_LLVM_INTRINSIC_MAP
7957};
7958
7959#undef SVEMAP1
7960#undef SVEMAP2
7961
7962#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7963 { \
7964 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7965 TypeModifier \
7966 }
7967
7968#define SMEMAP2(NameBase, TypeModifier) \
7969 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7970static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7971#define GET_SME_LLVM_INTRINSIC_MAP
7972#include "clang/Basic/arm_sme_builtin_cg.inc"
7973#undef GET_SME_LLVM_INTRINSIC_MAP
7974};
7975
7976#undef SMEMAP1
7977#undef SMEMAP2
7978
7980
7985
7986static const ARMVectorIntrinsicInfo *
7988 unsigned BuiltinID, bool &MapProvenSorted) {
7989
7990#ifndef NDEBUG
7991 if (!MapProvenSorted) {
7992 assert(llvm::is_sorted(IntrinsicMap));
7993 MapProvenSorted = true;
7994 }
7995#endif
7996
7997 const ARMVectorIntrinsicInfo *Builtin =
7998 llvm::lower_bound(IntrinsicMap, BuiltinID);
7999
8000 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
8001 return Builtin;
8002
8003 return nullptr;
8004}
8005
8007 unsigned Modifier,
8008 llvm::Type *ArgType,
8009 const CallExpr *E) {
8010 int VectorSize = 0;
8011 if (Modifier & Use64BitVectors)
8012 VectorSize = 64;
8013 else if (Modifier & Use128BitVectors)
8014 VectorSize = 128;
8015
8016 // Return type.
8018 if (Modifier & AddRetType) {
8019 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
8020 if (Modifier & VectorizeRetType)
8021 Ty = llvm::FixedVectorType::get(
8022 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
8023
8024 Tys.push_back(Ty);
8025 }
8026
8027 // Arguments.
8028 if (Modifier & VectorizeArgTypes) {
8029 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
8030 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
8031 }
8032
8033 if (Modifier & (Add1ArgType | Add2ArgTypes))
8034 Tys.push_back(ArgType);
8035
8036 if (Modifier & Add2ArgTypes)
8037 Tys.push_back(ArgType);
8038
8039 if (Modifier & InventFloatType)
8040 Tys.push_back(FloatTy);
8041
8042 return CGM.getIntrinsic(IntrinsicID, Tys);
8043}
8044
8046 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
8047 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
8048 unsigned BuiltinID = SISDInfo.BuiltinID;
8049 unsigned int Int = SISDInfo.LLVMIntrinsic;
8050 unsigned Modifier = SISDInfo.TypeModifier;
8051 const char *s = SISDInfo.NameHint;
8052
8053 switch (BuiltinID) {
8054 case NEON::BI__builtin_neon_vcled_s64:
8055 case NEON::BI__builtin_neon_vcled_u64:
8056 case NEON::BI__builtin_neon_vcles_f32:
8057 case NEON::BI__builtin_neon_vcled_f64:
8058 case NEON::BI__builtin_neon_vcltd_s64:
8059 case NEON::BI__builtin_neon_vcltd_u64:
8060 case NEON::BI__builtin_neon_vclts_f32:
8061 case NEON::BI__builtin_neon_vcltd_f64:
8062 case NEON::BI__builtin_neon_vcales_f32:
8063 case NEON::BI__builtin_neon_vcaled_f64:
8064 case NEON::BI__builtin_neon_vcalts_f32:
8065 case NEON::BI__builtin_neon_vcaltd_f64:
8066 // Only one direction of comparisons actually exist, cmle is actually a cmge
8067 // with swapped operands. The table gives us the right intrinsic but we
8068 // still need to do the swap.
8069 std::swap(Ops[0], Ops[1]);
8070 break;
8071 }
8072
8073 assert(Int && "Generic code assumes a valid intrinsic");
8074
8075 // Determine the type(s) of this overloaded AArch64 intrinsic.
8076 const Expr *Arg = E->getArg(0);
8077 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
8078 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
8079
8080 int j = 0;
8081 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
8082 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
8083 ai != ae; ++ai, ++j) {
8084 llvm::Type *ArgTy = ai->getType();
8085 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
8086 ArgTy->getPrimitiveSizeInBits())
8087 continue;
8088
8089 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
8090 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
8091 // it before inserting.
8092 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
8093 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
8094 Ops[j] =
8095 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
8096 }
8097
8098 Value *Result = CGF.EmitNeonCall(F, Ops, s);
8099 llvm::Type *ResultType = CGF.ConvertType(E->getType());
8100 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
8101 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
8102 return CGF.Builder.CreateExtractElement(Result, C0);
8103
8104 return CGF.Builder.CreateBitCast(Result, ResultType, s);
8105}
8106
8108 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
8109 const char *NameHint, unsigned Modifier, const CallExpr *E,
8110 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
8111 llvm::Triple::ArchType Arch) {
8112 // Get the last argument, which specifies the vector type.
8113 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
8114 std::optional<llvm::APSInt> NeonTypeConst =
8116 if (!NeonTypeConst)
8117 return nullptr;
8118
8119 // Determine the type of this overloaded NEON intrinsic.
8120 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
8121 bool Usgn = Type.isUnsigned();
8122 bool Quad = Type.isQuad();
8123 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
8124 const bool AllowBFloatArgsAndRet =
8125 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
8126
8127 llvm::FixedVectorType *VTy =
8128 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
8129 llvm::Type *Ty = VTy;
8130 if (!Ty)
8131 return nullptr;
8132
8133 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8134 return Builder.getInt32(addr.getAlignment().getQuantity());
8135 };
8136
8137 unsigned Int = LLVMIntrinsic;
8138 if ((Modifier & UnsignedAlts) && !Usgn)
8139 Int = AltLLVMIntrinsic;
8140
8141 switch (BuiltinID) {
8142 default: break;
8143 case NEON::BI__builtin_neon_splat_lane_v:
8144 case NEON::BI__builtin_neon_splat_laneq_v:
8145 case NEON::BI__builtin_neon_splatq_lane_v:
8146 case NEON::BI__builtin_neon_splatq_laneq_v: {
8147 auto NumElements = VTy->getElementCount();
8148 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
8149 NumElements = NumElements * 2;
8150 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
8151 NumElements = NumElements.divideCoefficientBy(2);
8152
8153 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8154 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
8155 }
8156 case NEON::BI__builtin_neon_vpadd_v:
8157 case NEON::BI__builtin_neon_vpaddq_v:
8158 // We don't allow fp/int overloading of intrinsics.
8159 if (VTy->getElementType()->isFloatingPointTy() &&
8160 Int == Intrinsic::aarch64_neon_addp)
8161 Int = Intrinsic::aarch64_neon_faddp;
8162 break;
8163 case NEON::BI__builtin_neon_vabs_v:
8164 case NEON::BI__builtin_neon_vabsq_v:
8165 if (VTy->getElementType()->isFloatingPointTy())
8166 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
8167 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
8168 case NEON::BI__builtin_neon_vadd_v:
8169 case NEON::BI__builtin_neon_vaddq_v: {
8170 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
8171 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8172 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8173 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
8174 return Builder.CreateBitCast(Ops[0], Ty);
8175 }
8176 case NEON::BI__builtin_neon_vaddhn_v: {
8177 llvm::FixedVectorType *SrcTy =
8178 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8179
8180 // %sum = add <4 x i32> %lhs, %rhs
8181 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8182 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8183 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
8184
8185 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8186 Constant *ShiftAmt =
8187 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8188 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
8189
8190 // %res = trunc <4 x i32> %high to <4 x i16>
8191 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
8192 }
8193 case NEON::BI__builtin_neon_vcale_v:
8194 case NEON::BI__builtin_neon_vcaleq_v:
8195 case NEON::BI__builtin_neon_vcalt_v:
8196 case NEON::BI__builtin_neon_vcaltq_v:
8197 std::swap(Ops[0], Ops[1]);
8198 [[fallthrough]];
8199 case NEON::BI__builtin_neon_vcage_v:
8200 case NEON::BI__builtin_neon_vcageq_v:
8201 case NEON::BI__builtin_neon_vcagt_v:
8202 case NEON::BI__builtin_neon_vcagtq_v: {
8203 llvm::Type *Ty;
8204 switch (VTy->getScalarSizeInBits()) {
8205 default: llvm_unreachable("unexpected type");
8206 case 32:
8207 Ty = FloatTy;
8208 break;
8209 case 64:
8210 Ty = DoubleTy;
8211 break;
8212 case 16:
8213 Ty = HalfTy;
8214 break;
8215 }
8216 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
8217 llvm::Type *Tys[] = { VTy, VecFlt };
8218 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8219 return EmitNeonCall(F, Ops, NameHint);
8220 }
8221 case NEON::BI__builtin_neon_vceqz_v:
8222 case NEON::BI__builtin_neon_vceqzq_v:
8223 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
8224 ICmpInst::ICMP_EQ, "vceqz");
8225 case NEON::BI__builtin_neon_vcgez_v:
8226 case NEON::BI__builtin_neon_vcgezq_v:
8227 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
8228 ICmpInst::ICMP_SGE, "vcgez");
8229 case NEON::BI__builtin_neon_vclez_v:
8230 case NEON::BI__builtin_neon_vclezq_v:
8231 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
8232 ICmpInst::ICMP_SLE, "vclez");
8233 case NEON::BI__builtin_neon_vcgtz_v:
8234 case NEON::BI__builtin_neon_vcgtzq_v:
8235 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
8236 ICmpInst::ICMP_SGT, "vcgtz");
8237 case NEON::BI__builtin_neon_vcltz_v:
8238 case NEON::BI__builtin_neon_vcltzq_v:
8239 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
8240 ICmpInst::ICMP_SLT, "vcltz");
8241 case NEON::BI__builtin_neon_vclz_v:
8242 case NEON::BI__builtin_neon_vclzq_v:
8243 // We generate target-independent intrinsic, which needs a second argument
8244 // for whether or not clz of zero is undefined; on ARM it isn't.
8245 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
8246 break;
8247 case NEON::BI__builtin_neon_vcvt_f32_v:
8248 case NEON::BI__builtin_neon_vcvtq_f32_v:
8249 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8250 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
8251 HasLegalHalfType);
8252 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8253 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8254 case NEON::BI__builtin_neon_vcvt_f16_s16:
8255 case NEON::BI__builtin_neon_vcvt_f16_u16:
8256 case NEON::BI__builtin_neon_vcvtq_f16_s16:
8257 case NEON::BI__builtin_neon_vcvtq_f16_u16:
8258 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8259 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
8260 HasLegalHalfType);
8261 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8262 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8263 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
8264 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
8265 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
8266 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
8267 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8268 Function *F = CGM.getIntrinsic(Int, Tys);
8269 return EmitNeonCall(F, Ops, "vcvt_n");
8270 }
8271 case NEON::BI__builtin_neon_vcvt_n_f32_v:
8272 case NEON::BI__builtin_neon_vcvt_n_f64_v:
8273 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
8274 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
8275 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
8276 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
8277 Function *F = CGM.getIntrinsic(Int, Tys);
8278 return EmitNeonCall(F, Ops, "vcvt_n");
8279 }
8280 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
8281 case NEON::BI__builtin_neon_vcvt_n_s32_v:
8282 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
8283 case NEON::BI__builtin_neon_vcvt_n_u32_v:
8284 case NEON::BI__builtin_neon_vcvt_n_s64_v:
8285 case NEON::BI__builtin_neon_vcvt_n_u64_v:
8286 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
8287 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
8288 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
8289 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
8290 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
8291 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
8292 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8293 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8294 return EmitNeonCall(F, Ops, "vcvt_n");
8295 }
8296 case NEON::BI__builtin_neon_vcvt_s32_v:
8297 case NEON::BI__builtin_neon_vcvt_u32_v:
8298 case NEON::BI__builtin_neon_vcvt_s64_v:
8299 case NEON::BI__builtin_neon_vcvt_u64_v:
8300 case NEON::BI__builtin_neon_vcvt_s16_f16:
8301 case NEON::BI__builtin_neon_vcvt_u16_f16:
8302 case NEON::BI__builtin_neon_vcvtq_s32_v:
8303 case NEON::BI__builtin_neon_vcvtq_u32_v:
8304 case NEON::BI__builtin_neon_vcvtq_s64_v:
8305 case NEON::BI__builtin_neon_vcvtq_u64_v:
8306 case NEON::BI__builtin_neon_vcvtq_s16_f16:
8307 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
8308 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8309 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
8310 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
8311 }
8312 case NEON::BI__builtin_neon_vcvta_s16_f16:
8313 case NEON::BI__builtin_neon_vcvta_s32_v:
8314 case NEON::BI__builtin_neon_vcvta_s64_v:
8315 case NEON::BI__builtin_neon_vcvta_u16_f16:
8316 case NEON::BI__builtin_neon_vcvta_u32_v:
8317 case NEON::BI__builtin_neon_vcvta_u64_v:
8318 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
8319 case NEON::BI__builtin_neon_vcvtaq_s32_v:
8320 case NEON::BI__builtin_neon_vcvtaq_s64_v:
8321 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
8322 case NEON::BI__builtin_neon_vcvtaq_u32_v:
8323 case NEON::BI__builtin_neon_vcvtaq_u64_v:
8324 case NEON::BI__builtin_neon_vcvtn_s16_f16:
8325 case NEON::BI__builtin_neon_vcvtn_s32_v:
8326 case NEON::BI__builtin_neon_vcvtn_s64_v:
8327 case NEON::BI__builtin_neon_vcvtn_u16_f16:
8328 case NEON::BI__builtin_neon_vcvtn_u32_v:
8329 case NEON::BI__builtin_neon_vcvtn_u64_v:
8330 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
8331 case NEON::BI__builtin_neon_vcvtnq_s32_v:
8332 case NEON::BI__builtin_neon_vcvtnq_s64_v:
8333 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
8334 case NEON::BI__builtin_neon_vcvtnq_u32_v:
8335 case NEON::BI__builtin_neon_vcvtnq_u64_v:
8336 case NEON::BI__builtin_neon_vcvtp_s16_f16:
8337 case NEON::BI__builtin_neon_vcvtp_s32_v:
8338 case NEON::BI__builtin_neon_vcvtp_s64_v:
8339 case NEON::BI__builtin_neon_vcvtp_u16_f16:
8340 case NEON::BI__builtin_neon_vcvtp_u32_v:
8341 case NEON::BI__builtin_neon_vcvtp_u64_v:
8342 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
8343 case NEON::BI__builtin_neon_vcvtpq_s32_v:
8344 case NEON::BI__builtin_neon_vcvtpq_s64_v:
8345 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
8346 case NEON::BI__builtin_neon_vcvtpq_u32_v:
8347 case NEON::BI__builtin_neon_vcvtpq_u64_v:
8348 case NEON::BI__builtin_neon_vcvtm_s16_f16:
8349 case NEON::BI__builtin_neon_vcvtm_s32_v:
8350 case NEON::BI__builtin_neon_vcvtm_s64_v:
8351 case NEON::BI__builtin_neon_vcvtm_u16_f16:
8352 case NEON::BI__builtin_neon_vcvtm_u32_v:
8353 case NEON::BI__builtin_neon_vcvtm_u64_v:
8354 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
8355 case NEON::BI__builtin_neon_vcvtmq_s32_v:
8356 case NEON::BI__builtin_neon_vcvtmq_s64_v:
8357 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
8358 case NEON::BI__builtin_neon_vcvtmq_u32_v:
8359 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8360 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8361 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8362 }
8363 case NEON::BI__builtin_neon_vcvtx_f32_v: {
8364 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
8365 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
8366
8367 }
8368 case NEON::BI__builtin_neon_vext_v:
8369 case NEON::BI__builtin_neon_vextq_v: {
8370 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
8371 SmallVector<int, 16> Indices;
8372 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8373 Indices.push_back(i+CV);
8374
8375 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8376 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8377 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
8378 }
8379 case NEON::BI__builtin_neon_vfma_v:
8380 case NEON::BI__builtin_neon_vfmaq_v: {
8381 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8382 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8383 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8384
8385 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
8387 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
8388 {Ops[1], Ops[2], Ops[0]});
8389 }
8390 case NEON::BI__builtin_neon_vld1_v:
8391 case NEON::BI__builtin_neon_vld1q_v: {
8392 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8393 Ops.push_back(getAlignmentValue32(PtrOp0));
8394 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
8395 }
8396 case NEON::BI__builtin_neon_vld1_x2_v:
8397 case NEON::BI__builtin_neon_vld1q_x2_v:
8398 case NEON::BI__builtin_neon_vld1_x3_v:
8399 case NEON::BI__builtin_neon_vld1q_x3_v:
8400 case NEON::BI__builtin_neon_vld1_x4_v:
8401 case NEON::BI__builtin_neon_vld1q_x4_v: {
8402 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8403 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8404 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
8405 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8406 }
8407 case NEON::BI__builtin_neon_vld2_v:
8408 case NEON::BI__builtin_neon_vld2q_v:
8409 case NEON::BI__builtin_neon_vld3_v:
8410 case NEON::BI__builtin_neon_vld3q_v:
8411 case NEON::BI__builtin_neon_vld4_v:
8412 case NEON::BI__builtin_neon_vld4q_v:
8413 case NEON::BI__builtin_neon_vld2_dup_v:
8414 case NEON::BI__builtin_neon_vld2q_dup_v:
8415 case NEON::BI__builtin_neon_vld3_dup_v:
8416 case NEON::BI__builtin_neon_vld3q_dup_v:
8417 case NEON::BI__builtin_neon_vld4_dup_v:
8418 case NEON::BI__builtin_neon_vld4q_dup_v: {
8419 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8420 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8421 Value *Align = getAlignmentValue32(PtrOp1);
8422 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
8423 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8424 }
8425 case NEON::BI__builtin_neon_vld1_dup_v:
8426 case NEON::BI__builtin_neon_vld1q_dup_v: {
8427 Value *V = PoisonValue::get(Ty);
8428 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
8429 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
8430 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
8431 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
8432 return EmitNeonSplat(Ops[0], CI);
8433 }
8434 case NEON::BI__builtin_neon_vld2_lane_v:
8435 case NEON::BI__builtin_neon_vld2q_lane_v:
8436 case NEON::BI__builtin_neon_vld3_lane_v:
8437 case NEON::BI__builtin_neon_vld3q_lane_v:
8438 case NEON::BI__builtin_neon_vld4_lane_v:
8439 case NEON::BI__builtin_neon_vld4q_lane_v: {
8440 llvm::Type *Tys[] = {Ty, Int8PtrTy};
8441 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
8442 for (unsigned I = 2; I < Ops.size() - 1; ++I)
8443 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
8444 Ops.push_back(getAlignmentValue32(PtrOp1));
8445 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
8446 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8447 }
8448 case NEON::BI__builtin_neon_vmovl_v: {
8449 llvm::FixedVectorType *DTy =
8450 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8451 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
8452 if (Usgn)
8453 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8454 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8455 }
8456 case NEON::BI__builtin_neon_vmovn_v: {
8457 llvm::FixedVectorType *QTy =
8458 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8459 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8460 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8461 }
8462 case NEON::BI__builtin_neon_vmull_v:
8463 // FIXME: the integer vmull operations could be emitted in terms of pure
8464 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8465 // hoisting the exts outside loops. Until global ISel comes along that can
8466 // see through such movement this leads to bad CodeGen. So we need an
8467 // intrinsic for now.
8468 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8469 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8470 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8471 case NEON::BI__builtin_neon_vpadal_v:
8472 case NEON::BI__builtin_neon_vpadalq_v: {
8473 // The source operand type has twice as many elements of half the size.
8474 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8475 llvm::Type *EltTy =
8476 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8477 auto *NarrowTy =
8478 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8479 llvm::Type *Tys[2] = { Ty, NarrowTy };
8480 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8481 }
8482 case NEON::BI__builtin_neon_vpaddl_v:
8483 case NEON::BI__builtin_neon_vpaddlq_v: {
8484 // The source operand type has twice as many elements of half the size.
8485 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8486 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8487 auto *NarrowTy =
8488 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8489 llvm::Type *Tys[2] = { Ty, NarrowTy };
8490 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8491 }
8492 case NEON::BI__builtin_neon_vqdmlal_v:
8493 case NEON::BI__builtin_neon_vqdmlsl_v: {
8494 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8495 Ops[1] =
8496 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8497 Ops.resize(2);
8498 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8499 }
8500 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8501 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8502 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8503 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8504 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8505 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8506 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8507 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8508 RTy->getNumElements() * 2);
8509 llvm::Type *Tys[2] = {
8510 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8511 /*isQuad*/ false))};
8512 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8513 }
8514 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8515 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8516 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8517 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8518 llvm::Type *Tys[2] = {
8519 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8520 /*isQuad*/ true))};
8521 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8522 }
8523 case NEON::BI__builtin_neon_vqshl_n_v:
8524 case NEON::BI__builtin_neon_vqshlq_n_v:
8525 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8526 1, false);
8527 case NEON::BI__builtin_neon_vqshlu_n_v:
8528 case NEON::BI__builtin_neon_vqshluq_n_v:
8529 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8530 1, false);
8531 case NEON::BI__builtin_neon_vrecpe_v:
8532 case NEON::BI__builtin_neon_vrecpeq_v:
8533 case NEON::BI__builtin_neon_vrsqrte_v:
8534 case NEON::BI__builtin_neon_vrsqrteq_v:
8535 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8536 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8537 case NEON::BI__builtin_neon_vrndi_v:
8538 case NEON::BI__builtin_neon_vrndiq_v:
8539 Int = Builder.getIsFPConstrained()
8540 ? Intrinsic::experimental_constrained_nearbyint
8541 : Intrinsic::nearbyint;
8542 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8543 case NEON::BI__builtin_neon_vrshr_n_v:
8544 case NEON::BI__builtin_neon_vrshrq_n_v:
8545 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8546 1, true);
8547 case NEON::BI__builtin_neon_vsha512hq_u64:
8548 case NEON::BI__builtin_neon_vsha512h2q_u64:
8549 case NEON::BI__builtin_neon_vsha512su0q_u64:
8550 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8551 Function *F = CGM.getIntrinsic(Int);
8552 return EmitNeonCall(F, Ops, "");
8553 }
8554 case NEON::BI__builtin_neon_vshl_n_v:
8555 case NEON::BI__builtin_neon_vshlq_n_v:
8556 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8557 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8558 "vshl_n");
8559 case NEON::BI__builtin_neon_vshll_n_v: {
8560 llvm::FixedVectorType *SrcTy =
8561 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8562 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8563 if (Usgn)
8564 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8565 else
8566 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8567 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8568 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8569 }
8570 case NEON::BI__builtin_neon_vshrn_n_v: {
8571 llvm::FixedVectorType *SrcTy =
8572 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8573 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8574 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8575 if (Usgn)
8576 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8577 else
8578 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8579 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8580 }
8581 case NEON::BI__builtin_neon_vshr_n_v:
8582 case NEON::BI__builtin_neon_vshrq_n_v:
8583 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8584 case NEON::BI__builtin_neon_vst1_v:
8585 case NEON::BI__builtin_neon_vst1q_v:
8586 case NEON::BI__builtin_neon_vst2_v:
8587 case NEON::BI__builtin_neon_vst2q_v:
8588 case NEON::BI__builtin_neon_vst3_v:
8589 case NEON::BI__builtin_neon_vst3q_v:
8590 case NEON::BI__builtin_neon_vst4_v:
8591 case NEON::BI__builtin_neon_vst4q_v:
8592 case NEON::BI__builtin_neon_vst2_lane_v:
8593 case NEON::BI__builtin_neon_vst2q_lane_v:
8594 case NEON::BI__builtin_neon_vst3_lane_v:
8595 case NEON::BI__builtin_neon_vst3q_lane_v:
8596 case NEON::BI__builtin_neon_vst4_lane_v:
8597 case NEON::BI__builtin_neon_vst4q_lane_v: {
8598 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8599 Ops.push_back(getAlignmentValue32(PtrOp0));
8600 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8601 }
8602 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8603 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8604 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8605 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8606 case NEON::BI__builtin_neon_vsm4eq_u32: {
8607 Function *F = CGM.getIntrinsic(Int);
8608 return EmitNeonCall(F, Ops, "");
8609 }
8610 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8611 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8612 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8613 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8614 Function *F = CGM.getIntrinsic(Int);
8615 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8616 return EmitNeonCall(F, Ops, "");
8617 }
8618 case NEON::BI__builtin_neon_vst1_x2_v:
8619 case NEON::BI__builtin_neon_vst1q_x2_v:
8620 case NEON::BI__builtin_neon_vst1_x3_v:
8621 case NEON::BI__builtin_neon_vst1q_x3_v:
8622 case NEON::BI__builtin_neon_vst1_x4_v:
8623 case NEON::BI__builtin_neon_vst1q_x4_v: {
8624 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8625 // in AArch64 it comes last. We may want to stick to one or another.
8626 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8627 Arch == llvm::Triple::aarch64_32) {
8628 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8629 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8630 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8631 }
8632 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8633 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8634 }
8635 case NEON::BI__builtin_neon_vsubhn_v: {
8636 llvm::FixedVectorType *SrcTy =
8637 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8638
8639 // %sum = add <4 x i32> %lhs, %rhs
8640 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8641 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8642 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8643
8644 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8645 Constant *ShiftAmt =
8646 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8647 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8648
8649 // %res = trunc <4 x i32> %high to <4 x i16>
8650 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8651 }
8652 case NEON::BI__builtin_neon_vtrn_v:
8653 case NEON::BI__builtin_neon_vtrnq_v: {
8654 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8655 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8656 Value *SV = nullptr;
8657
8658 for (unsigned vi = 0; vi != 2; ++vi) {
8659 SmallVector<int, 16> Indices;
8660 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8661 Indices.push_back(i+vi);
8662 Indices.push_back(i+e+vi);
8663 }
8664 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8665 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8666 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8667 }
8668 return SV;
8669 }
8670 case NEON::BI__builtin_neon_vtst_v:
8671 case NEON::BI__builtin_neon_vtstq_v: {
8672 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8673 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8674 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8675 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8676 ConstantAggregateZero::get(Ty));
8677 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8678 }
8679 case NEON::BI__builtin_neon_vuzp_v:
8680 case NEON::BI__builtin_neon_vuzpq_v: {
8681 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8682 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8683 Value *SV = nullptr;
8684
8685 for (unsigned vi = 0; vi != 2; ++vi) {
8686 SmallVector<int, 16> Indices;
8687 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8688 Indices.push_back(2*i+vi);
8689
8690 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8691 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8692 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8693 }
8694 return SV;
8695 }
8696 case NEON::BI__builtin_neon_vxarq_u64: {
8697 Function *F = CGM.getIntrinsic(Int);
8698 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8699 return EmitNeonCall(F, Ops, "");
8700 }
8701 case NEON::BI__builtin_neon_vzip_v:
8702 case NEON::BI__builtin_neon_vzipq_v: {
8703 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8704 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8705 Value *SV = nullptr;
8706
8707 for (unsigned vi = 0; vi != 2; ++vi) {
8708 SmallVector<int, 16> Indices;
8709 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8710 Indices.push_back((i + vi*e) >> 1);
8711 Indices.push_back(((i + vi*e) >> 1)+e);
8712 }
8713 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8714 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8715 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8716 }
8717 return SV;
8718 }
8719 case NEON::BI__builtin_neon_vdot_s32:
8720 case NEON::BI__builtin_neon_vdot_u32:
8721 case NEON::BI__builtin_neon_vdotq_s32:
8722 case NEON::BI__builtin_neon_vdotq_u32: {
8723 auto *InputTy =
8724 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8725 llvm::Type *Tys[2] = { Ty, InputTy };
8726 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8727 }
8728 case NEON::BI__builtin_neon_vfmlal_low_f16:
8729 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8730 auto *InputTy =
8731 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8732 llvm::Type *Tys[2] = { Ty, InputTy };
8733 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8734 }
8735 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8736 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8737 auto *InputTy =
8738 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8739 llvm::Type *Tys[2] = { Ty, InputTy };
8740 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8741 }
8742 case NEON::BI__builtin_neon_vfmlal_high_f16:
8743 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8744 auto *InputTy =
8745 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8746 llvm::Type *Tys[2] = { Ty, InputTy };
8747 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8748 }
8749 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8750 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8751 auto *InputTy =
8752 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8753 llvm::Type *Tys[2] = { Ty, InputTy };
8754 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8755 }
8756 case NEON::BI__builtin_neon_vmmlaq_s32:
8757 case NEON::BI__builtin_neon_vmmlaq_u32: {
8758 auto *InputTy =
8759 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8760 llvm::Type *Tys[2] = { Ty, InputTy };
8761 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8762 }
8763 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8764 auto *InputTy =
8765 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8766 llvm::Type *Tys[2] = { Ty, InputTy };
8767 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8768 }
8769 case NEON::BI__builtin_neon_vusdot_s32:
8770 case NEON::BI__builtin_neon_vusdotq_s32: {
8771 auto *InputTy =
8772 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8773 llvm::Type *Tys[2] = { Ty, InputTy };
8774 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8775 }
8776 case NEON::BI__builtin_neon_vbfdot_f32:
8777 case NEON::BI__builtin_neon_vbfdotq_f32: {
8778 llvm::Type *InputTy =
8779 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8780 llvm::Type *Tys[2] = { Ty, InputTy };
8781 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8782 }
8783 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8784 llvm::Type *Tys[1] = { Ty };
8785 Function *F = CGM.getIntrinsic(Int, Tys);
8786 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8787 }
8788
8789 }
8790
8791 assert(Int && "Expected valid intrinsic number");
8792
8793 // Determine the type(s) of this overloaded AArch64 intrinsic.
8794 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8795
8796 Value *Result = EmitNeonCall(F, Ops, NameHint);
8797 llvm::Type *ResultType = ConvertType(E->getType());
8798 // AArch64 intrinsic one-element vector type cast to
8799 // scalar type expected by the builtin
8800 return Builder.CreateBitCast(Result, ResultType, NameHint);
8801}
8802
8804 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8805 const CmpInst::Predicate Ip, const Twine &Name) {
8806 llvm::Type *OTy = Op->getType();
8807
8808 // FIXME: this is utterly horrific. We should not be looking at previous
8809 // codegen context to find out what needs doing. Unfortunately TableGen
8810 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8811 // (etc).
8812 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8813 OTy = BI->getOperand(0)->getType();
8814
8815 Op = Builder.CreateBitCast(Op, OTy);
8816 if (OTy->getScalarType()->isFloatingPointTy()) {
8817 if (Fp == CmpInst::FCMP_OEQ)
8818 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8819 else
8820 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8821 } else {
8822 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8823 }
8824 return Builder.CreateSExt(Op, Ty, Name);
8825}
8826
8828 Value *ExtOp, Value *IndexOp,
8829 llvm::Type *ResTy, unsigned IntID,
8830 const char *Name) {
8832 if (ExtOp)
8833 TblOps.push_back(ExtOp);
8834
8835 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8836 SmallVector<int, 16> Indices;
8837 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8838 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8839 Indices.push_back(2*i);
8840 Indices.push_back(2*i+1);
8841 }
8842
8843 int PairPos = 0, End = Ops.size() - 1;
8844 while (PairPos < End) {
8845 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8846 Ops[PairPos+1], Indices,
8847 Name));
8848 PairPos += 2;
8849 }
8850
8851 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8852 // of the 128-bit lookup table with zero.
8853 if (PairPos == End) {
8854 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8855 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8856 ZeroTbl, Indices, Name));
8857 }
8858
8859 Function *TblF;
8860 TblOps.push_back(IndexOp);
8861 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8862
8863 return CGF.EmitNeonCall(TblF, TblOps, Name);
8864}
8865
8866Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8867 unsigned Value;
8868 switch (BuiltinID) {
8869 default:
8870 return nullptr;
8871 case clang::ARM::BI__builtin_arm_nop:
8872 Value = 0;
8873 break;
8874 case clang::ARM::BI__builtin_arm_yield:
8875 case clang::ARM::BI__yield:
8876 Value = 1;
8877 break;
8878 case clang::ARM::BI__builtin_arm_wfe:
8879 case clang::ARM::BI__wfe:
8880 Value = 2;
8881 break;
8882 case clang::ARM::BI__builtin_arm_wfi:
8883 case clang::ARM::BI__wfi:
8884 Value = 3;
8885 break;
8886 case clang::ARM::BI__builtin_arm_sev:
8887 case clang::ARM::BI__sev:
8888 Value = 4;
8889 break;
8890 case clang::ARM::BI__builtin_arm_sevl:
8891 case clang::ARM::BI__sevl:
8892 Value = 5;
8893 break;
8894 }
8895
8896 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8897 llvm::ConstantInt::get(Int32Ty, Value));
8898}
8899
8904};
8905
8906// Generates the IR for __builtin_read_exec_*.
8907// Lowers the builtin to amdgcn_ballot intrinsic.
8909 llvm::Type *RegisterType,
8910 llvm::Type *ValueType, bool isExecHi) {
8911 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8912 CodeGen::CodeGenModule &CGM = CGF.CGM;
8913
8914 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8915 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8916
8917 if (isExecHi) {
8918 Value *Rt2 = Builder.CreateLShr(Call, 32);
8919 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8920 return Rt2;
8921 }
8922
8923 return Call;
8924}
8925
8926// Generates the IR for the read/write special register builtin,
8927// ValueType is the type of the value that is to be written or read,
8928// RegisterType is the type of the register being written to or read from.
8930 const CallExpr *E,
8931 llvm::Type *RegisterType,
8932 llvm::Type *ValueType,
8933 SpecialRegisterAccessKind AccessKind,
8934 StringRef SysReg = "") {
8935 // write and register intrinsics only support 32, 64 and 128 bit operations.
8936 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8937 RegisterType->isIntegerTy(128)) &&
8938 "Unsupported size for register.");
8939
8940 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8941 CodeGen::CodeGenModule &CGM = CGF.CGM;
8942 LLVMContext &Context = CGM.getLLVMContext();
8943
8944 if (SysReg.empty()) {
8945 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8946 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8947 }
8948
8949 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8950 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8951 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8952
8953 llvm::Type *Types[] = { RegisterType };
8954
8955 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8956 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8957 && "Can't fit 64-bit value in 32-bit register");
8958
8959 if (AccessKind != Write) {
8960 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8961 llvm::Function *F = CGM.getIntrinsic(
8962 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8963 : llvm::Intrinsic::read_register,
8964 Types);
8965 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8966
8967 if (MixedTypes)
8968 // Read into 64 bit register and then truncate result to 32 bit.
8969 return Builder.CreateTrunc(Call, ValueType);
8970
8971 if (ValueType->isPointerTy())
8972 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8973 return Builder.CreateIntToPtr(Call, ValueType);
8974
8975 return Call;
8976 }
8977
8978 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8979 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8980 if (MixedTypes) {
8981 // Extend 32 bit write value to 64 bit to pass to write.
8982 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8983 return Builder.CreateCall(F, { Metadata, ArgValue });
8984 }
8985
8986 if (ValueType->isPointerTy()) {
8987 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8988 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8989 return Builder.CreateCall(F, { Metadata, ArgValue });
8990 }
8991
8992 return Builder.CreateCall(F, { Metadata, ArgValue });
8993}
8994
8995/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8996/// argument that specifies the vector type.
8997static bool HasExtraNeonArgument(unsigned BuiltinID) {
8998 switch (BuiltinID) {
8999 default: break;
9000 case NEON::BI__builtin_neon_vget_lane_i8:
9001 case NEON::BI__builtin_neon_vget_lane_i16:
9002 case NEON::BI__builtin_neon_vget_lane_bf16:
9003 case NEON::BI__builtin_neon_vget_lane_i32:
9004 case NEON::BI__builtin_neon_vget_lane_i64:
9005 case NEON::BI__builtin_neon_vget_lane_f32:
9006 case NEON::BI__builtin_neon_vgetq_lane_i8:
9007 case NEON::BI__builtin_neon_vgetq_lane_i16:
9008 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9009 case NEON::BI__builtin_neon_vgetq_lane_i32:
9010 case NEON::BI__builtin_neon_vgetq_lane_i64:
9011 case NEON::BI__builtin_neon_vgetq_lane_f32:
9012 case NEON::BI__builtin_neon_vduph_lane_bf16:
9013 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9014 case NEON::BI__builtin_neon_vset_lane_i8:
9015 case NEON::BI__builtin_neon_vset_lane_i16:
9016 case NEON::BI__builtin_neon_vset_lane_bf16:
9017 case NEON::BI__builtin_neon_vset_lane_i32:
9018 case NEON::BI__builtin_neon_vset_lane_i64:
9019 case NEON::BI__builtin_neon_vset_lane_f32:
9020 case NEON::BI__builtin_neon_vsetq_lane_i8:
9021 case NEON::BI__builtin_neon_vsetq_lane_i16:
9022 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9023 case NEON::BI__builtin_neon_vsetq_lane_i32:
9024 case NEON::BI__builtin_neon_vsetq_lane_i64:
9025 case NEON::BI__builtin_neon_vsetq_lane_f32:
9026 case NEON::BI__builtin_neon_vsha1h_u32:
9027 case NEON::BI__builtin_neon_vsha1cq_u32:
9028 case NEON::BI__builtin_neon_vsha1pq_u32:
9029 case NEON::BI__builtin_neon_vsha1mq_u32:
9030 case NEON::BI__builtin_neon_vcvth_bf16_f32:
9031 case clang::ARM::BI_MoveToCoprocessor:
9032 case clang::ARM::BI_MoveToCoprocessor2:
9033 return false;
9034 }
9035 return true;
9036}
9037
9038Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
9039 const CallExpr *E,
9040 ReturnValueSlot ReturnValue,
9041 llvm::Triple::ArchType Arch) {
9042 if (auto Hint = GetValueForARMHint(BuiltinID))
9043 return Hint;
9044
9045 if (BuiltinID == clang::ARM::BI__emit) {
9046 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
9047 llvm::FunctionType *FTy =
9048 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
9049
9051 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
9052 llvm_unreachable("Sema will ensure that the parameter is constant");
9053
9054 llvm::APSInt Value = Result.Val.getInt();
9055 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
9056
9057 llvm::InlineAsm *Emit =
9058 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
9059 /*hasSideEffects=*/true)
9060 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
9061 /*hasSideEffects=*/true);
9062
9063 return Builder.CreateCall(Emit);
9064 }
9065
9066 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
9067 Value *Option = EmitScalarExpr(E->getArg(0));
9068 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
9069 }
9070
9071 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
9072 Value *Address = EmitScalarExpr(E->getArg(0));
9073 Value *RW = EmitScalarExpr(E->getArg(1));
9074 Value *IsData = EmitScalarExpr(E->getArg(2));
9075
9076 // Locality is not supported on ARM target
9077 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
9078
9079 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
9080 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
9081 }
9082
9083 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
9084 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9085 return Builder.CreateCall(
9086 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
9087 }
9088
9089 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
9090 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
9091 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9092 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
9093 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
9094 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
9095 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
9096 return Res;
9097 }
9098
9099
9100 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
9101 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9102 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
9103 }
9104 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
9105 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
9106 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
9107 "cls");
9108 }
9109
9110 if (BuiltinID == clang::ARM::BI__clear_cache) {
9111 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
9112 const FunctionDecl *FD = E->getDirectCallee();
9113 Value *Ops[2];
9114 for (unsigned i = 0; i < 2; i++)
9115 Ops[i] = EmitScalarExpr(E->getArg(i));
9116 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
9117 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
9118 StringRef Name = FD->getName();
9119 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
9120 }
9121
9122 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
9123 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
9124 Function *F;
9125
9126 switch (BuiltinID) {
9127 default: llvm_unreachable("unexpected builtin");
9128 case clang::ARM::BI__builtin_arm_mcrr:
9129 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
9130 break;
9131 case clang::ARM::BI__builtin_arm_mcrr2:
9132 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
9133 break;
9134 }
9135
9136 // MCRR{2} instruction has 5 operands but
9137 // the intrinsic has 4 because Rt and Rt2
9138 // are represented as a single unsigned 64
9139 // bit integer in the intrinsic definition
9140 // but internally it's represented as 2 32
9141 // bit integers.
9142
9143 Value *Coproc = EmitScalarExpr(E->getArg(0));
9144 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9145 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
9146 Value *CRm = EmitScalarExpr(E->getArg(3));
9147
9148 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9149 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
9150 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
9151 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
9152
9153 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
9154 }
9155
9156 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
9157 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
9158 Function *F;
9159
9160 switch (BuiltinID) {
9161 default: llvm_unreachable("unexpected builtin");
9162 case clang::ARM::BI__builtin_arm_mrrc:
9163 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
9164 break;
9165 case clang::ARM::BI__builtin_arm_mrrc2:
9166 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
9167 break;
9168 }
9169
9170 Value *Coproc = EmitScalarExpr(E->getArg(0));
9171 Value *Opc1 = EmitScalarExpr(E->getArg(1));
9172 Value *CRm = EmitScalarExpr(E->getArg(2));
9173 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
9174
9175 // Returns an unsigned 64 bit integer, represented
9176 // as two 32 bit integers.
9177
9178 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
9179 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
9180 Rt = Builder.CreateZExt(Rt, Int64Ty);
9181 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
9182
9183 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
9184 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
9185 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
9186
9187 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
9188 }
9189
9190 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
9191 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9192 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
9193 getContext().getTypeSize(E->getType()) == 64) ||
9194 BuiltinID == clang::ARM::BI__ldrexd) {
9195 Function *F;
9196
9197 switch (BuiltinID) {
9198 default: llvm_unreachable("unexpected builtin");
9199 case clang::ARM::BI__builtin_arm_ldaex:
9200 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
9201 break;
9202 case clang::ARM::BI__builtin_arm_ldrexd:
9203 case clang::ARM::BI__builtin_arm_ldrex:
9204 case clang::ARM::BI__ldrexd:
9205 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
9206 break;
9207 }
9208
9209 Value *LdPtr = EmitScalarExpr(E->getArg(0));
9210 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
9211
9212 Value *Val0 = Builder.CreateExtractValue(Val, 1);
9213 Value *Val1 = Builder.CreateExtractValue(Val, 0);
9214 Val0 = Builder.CreateZExt(Val0, Int64Ty);
9215 Val1 = Builder.CreateZExt(Val1, Int64Ty);
9216
9217 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
9218 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
9219 Val = Builder.CreateOr(Val, Val1);
9220 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
9221 }
9222
9223 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
9224 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
9225 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
9226
9227 QualType Ty = E->getType();
9228 llvm::Type *RealResTy = ConvertType(Ty);
9229 llvm::Type *IntTy =
9230 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9231
9233 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
9234 : Intrinsic::arm_ldrex,
9235 UnqualPtrTy);
9236 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
9237 Val->addParamAttr(
9238 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
9239
9240 if (RealResTy->isPointerTy())
9241 return Builder.CreateIntToPtr(Val, RealResTy);
9242 else {
9243 llvm::Type *IntResTy = llvm::IntegerType::get(
9244 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
9245 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
9246 RealResTy);
9247 }
9248 }
9249
9250 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
9251 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
9252 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
9253 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
9255 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
9256 : Intrinsic::arm_strexd);
9257 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
9258
9259 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
9260 Value *Val = EmitScalarExpr(E->getArg(0));
9261 Builder.CreateStore(Val, Tmp);
9262
9263 Address LdPtr = Tmp.withElementType(STy);
9264 Val = Builder.CreateLoad(LdPtr);
9265
9266 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
9267 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
9268 Value *StPtr = EmitScalarExpr(E->getArg(1));
9269 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
9270 }
9271
9272 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
9273 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
9274 Value *StoreVal = EmitScalarExpr(E->getArg(0));
9275 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
9276
9277 QualType Ty = E->getArg(0)->getType();
9278 llvm::Type *StoreTy =
9279 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
9280
9281 if (StoreVal->getType()->isPointerTy())
9282 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
9283 else {
9284 llvm::Type *IntTy = llvm::IntegerType::get(
9286 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
9287 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
9288 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
9289 }
9290
9292 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
9293 : Intrinsic::arm_strex,
9294 StoreAddr->getType());
9295
9296 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
9297 CI->addParamAttr(
9298 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
9299 return CI;
9300 }
9301
9302 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
9303 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
9304 return Builder.CreateCall(F);
9305 }
9306
9307 // CRC32
9308 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
9309 switch (BuiltinID) {
9310 case clang::ARM::BI__builtin_arm_crc32b:
9311 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
9312 case clang::ARM::BI__builtin_arm_crc32cb:
9313 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
9314 case clang::ARM::BI__builtin_arm_crc32h:
9315 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
9316 case clang::ARM::BI__builtin_arm_crc32ch:
9317 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
9318 case clang::ARM::BI__builtin_arm_crc32w:
9319 case clang::ARM::BI__builtin_arm_crc32d:
9320 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
9321 case clang::ARM::BI__builtin_arm_crc32cw:
9322 case clang::ARM::BI__builtin_arm_crc32cd:
9323 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
9324 }
9325
9326 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
9327 Value *Arg0 = EmitScalarExpr(E->getArg(0));
9328 Value *Arg1 = EmitScalarExpr(E->getArg(1));
9329
9330 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
9331 // intrinsics, hence we need different codegen for these cases.
9332 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
9333 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
9334 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
9335 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
9336 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
9337 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
9338
9339 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9340 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
9341 return Builder.CreateCall(F, {Res, Arg1b});
9342 } else {
9343 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
9344
9345 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
9346 return Builder.CreateCall(F, {Arg0, Arg1});
9347 }
9348 }
9349
9350 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9351 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9352 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9353 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
9354 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
9355 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
9356
9357 SpecialRegisterAccessKind AccessKind = Write;
9358 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
9359 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9360 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
9361 AccessKind = VolatileRead;
9362
9363 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
9364 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
9365
9366 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
9367 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
9368
9369 llvm::Type *ValueType;
9370 llvm::Type *RegisterType;
9371 if (IsPointerBuiltin) {
9372 ValueType = VoidPtrTy;
9374 } else if (Is64Bit) {
9375 ValueType = RegisterType = Int64Ty;
9376 } else {
9377 ValueType = RegisterType = Int32Ty;
9378 }
9379
9380 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
9381 AccessKind);
9382 }
9383
9384 if (BuiltinID == ARM::BI__builtin_sponentry) {
9385 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
9386 return Builder.CreateCall(F);
9387 }
9388
9389 // Handle MSVC intrinsics before argument evaluation to prevent double
9390 // evaluation.
9391 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
9392 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
9393
9394 // Deal with MVE builtins
9395 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9396 return Result;
9397 // Handle CDE builtins
9398 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
9399 return Result;
9400
9401 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
9402 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
9403 return P.first == BuiltinID;
9404 });
9405 if (It != end(NEONEquivalentIntrinsicMap))
9406 BuiltinID = It->second;
9407
9408 // Find out if any arguments are required to be integer constant
9409 // expressions.
9410 unsigned ICEArguments = 0;
9412 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9413 assert(Error == ASTContext::GE_None && "Should not codegen an error");
9414
9415 auto getAlignmentValue32 = [&](Address addr) -> Value* {
9416 return Builder.getInt32(addr.getAlignment().getQuantity());
9417 };
9418
9419 Address PtrOp0 = Address::invalid();
9420 Address PtrOp1 = Address::invalid();
9422 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
9423 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
9424 for (unsigned i = 0, e = NumArgs; i != e; i++) {
9425 if (i == 0) {
9426 switch (BuiltinID) {
9427 case NEON::BI__builtin_neon_vld1_v:
9428 case NEON::BI__builtin_neon_vld1q_v:
9429 case NEON::BI__builtin_neon_vld1q_lane_v:
9430 case NEON::BI__builtin_neon_vld1_lane_v:
9431 case NEON::BI__builtin_neon_vld1_dup_v:
9432 case NEON::BI__builtin_neon_vld1q_dup_v:
9433 case NEON::BI__builtin_neon_vst1_v:
9434 case NEON::BI__builtin_neon_vst1q_v:
9435 case NEON::BI__builtin_neon_vst1q_lane_v:
9436 case NEON::BI__builtin_neon_vst1_lane_v:
9437 case NEON::BI__builtin_neon_vst2_v:
9438 case NEON::BI__builtin_neon_vst2q_v:
9439 case NEON::BI__builtin_neon_vst2_lane_v:
9440 case NEON::BI__builtin_neon_vst2q_lane_v:
9441 case NEON::BI__builtin_neon_vst3_v:
9442 case NEON::BI__builtin_neon_vst3q_v:
9443 case NEON::BI__builtin_neon_vst3_lane_v:
9444 case NEON::BI__builtin_neon_vst3q_lane_v:
9445 case NEON::BI__builtin_neon_vst4_v:
9446 case NEON::BI__builtin_neon_vst4q_v:
9447 case NEON::BI__builtin_neon_vst4_lane_v:
9448 case NEON::BI__builtin_neon_vst4q_lane_v:
9449 // Get the alignment for the argument in addition to the value;
9450 // we'll use it later.
9451 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
9452 Ops.push_back(PtrOp0.emitRawPointer(*this));
9453 continue;
9454 }
9455 }
9456 if (i == 1) {
9457 switch (BuiltinID) {
9458 case NEON::BI__builtin_neon_vld2_v:
9459 case NEON::BI__builtin_neon_vld2q_v:
9460 case NEON::BI__builtin_neon_vld3_v:
9461 case NEON::BI__builtin_neon_vld3q_v:
9462 case NEON::BI__builtin_neon_vld4_v:
9463 case NEON::BI__builtin_neon_vld4q_v:
9464 case NEON::BI__builtin_neon_vld2_lane_v:
9465 case NEON::BI__builtin_neon_vld2q_lane_v:
9466 case NEON::BI__builtin_neon_vld3_lane_v:
9467 case NEON::BI__builtin_neon_vld3q_lane_v:
9468 case NEON::BI__builtin_neon_vld4_lane_v:
9469 case NEON::BI__builtin_neon_vld4q_lane_v:
9470 case NEON::BI__builtin_neon_vld2_dup_v:
9471 case NEON::BI__builtin_neon_vld2q_dup_v:
9472 case NEON::BI__builtin_neon_vld3_dup_v:
9473 case NEON::BI__builtin_neon_vld3q_dup_v:
9474 case NEON::BI__builtin_neon_vld4_dup_v:
9475 case NEON::BI__builtin_neon_vld4q_dup_v:
9476 // Get the alignment for the argument in addition to the value;
9477 // we'll use it later.
9478 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9479 Ops.push_back(PtrOp1.emitRawPointer(*this));
9480 continue;
9481 }
9482 }
9483
9484 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9485 }
9486
9487 switch (BuiltinID) {
9488 default: break;
9489
9490 case NEON::BI__builtin_neon_vget_lane_i8:
9491 case NEON::BI__builtin_neon_vget_lane_i16:
9492 case NEON::BI__builtin_neon_vget_lane_i32:
9493 case NEON::BI__builtin_neon_vget_lane_i64:
9494 case NEON::BI__builtin_neon_vget_lane_bf16:
9495 case NEON::BI__builtin_neon_vget_lane_f32:
9496 case NEON::BI__builtin_neon_vgetq_lane_i8:
9497 case NEON::BI__builtin_neon_vgetq_lane_i16:
9498 case NEON::BI__builtin_neon_vgetq_lane_i32:
9499 case NEON::BI__builtin_neon_vgetq_lane_i64:
9500 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9501 case NEON::BI__builtin_neon_vgetq_lane_f32:
9502 case NEON::BI__builtin_neon_vduph_lane_bf16:
9503 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9504 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9505
9506 case NEON::BI__builtin_neon_vrndns_f32: {
9507 Value *Arg = EmitScalarExpr(E->getArg(0));
9508 llvm::Type *Tys[] = {Arg->getType()};
9509 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9510 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9511
9512 case NEON::BI__builtin_neon_vset_lane_i8:
9513 case NEON::BI__builtin_neon_vset_lane_i16:
9514 case NEON::BI__builtin_neon_vset_lane_i32:
9515 case NEON::BI__builtin_neon_vset_lane_i64:
9516 case NEON::BI__builtin_neon_vset_lane_bf16:
9517 case NEON::BI__builtin_neon_vset_lane_f32:
9518 case NEON::BI__builtin_neon_vsetq_lane_i8:
9519 case NEON::BI__builtin_neon_vsetq_lane_i16:
9520 case NEON::BI__builtin_neon_vsetq_lane_i32:
9521 case NEON::BI__builtin_neon_vsetq_lane_i64:
9522 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9523 case NEON::BI__builtin_neon_vsetq_lane_f32:
9524 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9525
9526 case NEON::BI__builtin_neon_vsha1h_u32:
9527 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9528 "vsha1h");
9529 case NEON::BI__builtin_neon_vsha1cq_u32:
9530 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9531 "vsha1h");
9532 case NEON::BI__builtin_neon_vsha1pq_u32:
9533 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9534 "vsha1h");
9535 case NEON::BI__builtin_neon_vsha1mq_u32:
9536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9537 "vsha1h");
9538
9539 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9541 "vcvtbfp2bf");
9542 }
9543
9544 // The ARM _MoveToCoprocessor builtins put the input register value as
9545 // the first argument, but the LLVM intrinsic expects it as the third one.
9546 case clang::ARM::BI_MoveToCoprocessor:
9547 case clang::ARM::BI_MoveToCoprocessor2: {
9548 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9549 ? Intrinsic::arm_mcr
9550 : Intrinsic::arm_mcr2);
9551 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9552 Ops[3], Ops[4], Ops[5]});
9553 }
9554 }
9555
9556 // Get the last argument, which specifies the vector type.
9557 assert(HasExtraArg);
9558 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9559 std::optional<llvm::APSInt> Result =
9561 if (!Result)
9562 return nullptr;
9563
9564 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9565 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9566 // Determine the overloaded type of this builtin.
9567 llvm::Type *Ty;
9568 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9569 Ty = FloatTy;
9570 else
9571 Ty = DoubleTy;
9572
9573 // Determine whether this is an unsigned conversion or not.
9574 bool usgn = Result->getZExtValue() == 1;
9575 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9576
9577 // Call the appropriate intrinsic.
9578 Function *F = CGM.getIntrinsic(Int, Ty);
9579 return Builder.CreateCall(F, Ops, "vcvtr");
9580 }
9581
9582 // Determine the type of this overloaded NEON intrinsic.
9583 NeonTypeFlags Type = Result->getZExtValue();
9584 bool usgn = Type.isUnsigned();
9585 bool rightShift = false;
9586
9587 llvm::FixedVectorType *VTy =
9588 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9589 getTarget().hasBFloat16Type());
9590 llvm::Type *Ty = VTy;
9591 if (!Ty)
9592 return nullptr;
9593
9594 // Many NEON builtins have identical semantics and uses in ARM and
9595 // AArch64. Emit these in a single function.
9596 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9597 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9598 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9599 if (Builtin)
9601 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9602 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9603
9604 unsigned Int;
9605 switch (BuiltinID) {
9606 default: return nullptr;
9607 case NEON::BI__builtin_neon_vld1q_lane_v:
9608 // Handle 64-bit integer elements as a special case. Use shuffles of
9609 // one-element vectors to avoid poor code for i64 in the backend.
9610 if (VTy->getElementType()->isIntegerTy(64)) {
9611 // Extract the other lane.
9612 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9613 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9614 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9615 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9616 // Load the value as a one-element vector.
9617 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9618 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9619 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9620 Value *Align = getAlignmentValue32(PtrOp0);
9621 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9622 // Combine them.
9623 int Indices[] = {1 - Lane, Lane};
9624 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9625 }
9626 [[fallthrough]];
9627 case NEON::BI__builtin_neon_vld1_lane_v: {
9628 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9629 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9630 Value *Ld = Builder.CreateLoad(PtrOp0);
9631 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9632 }
9633 case NEON::BI__builtin_neon_vqrshrn_n_v:
9634 Int =
9635 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9636 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9637 1, true);
9638 case NEON::BI__builtin_neon_vqrshrun_n_v:
9639 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9640 Ops, "vqrshrun_n", 1, true);
9641 case NEON::BI__builtin_neon_vqshrn_n_v:
9642 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9643 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9644 1, true);
9645 case NEON::BI__builtin_neon_vqshrun_n_v:
9646 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9647 Ops, "vqshrun_n", 1, true);
9648 case NEON::BI__builtin_neon_vrecpe_v:
9649 case NEON::BI__builtin_neon_vrecpeq_v:
9650 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9651 Ops, "vrecpe");
9652 case NEON::BI__builtin_neon_vrshrn_n_v:
9653 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9654 Ops, "vrshrn_n", 1, true);
9655 case NEON::BI__builtin_neon_vrsra_n_v:
9656 case NEON::BI__builtin_neon_vrsraq_n_v:
9657 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9658 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9659 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9660 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9661 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9662 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9663 case NEON::BI__builtin_neon_vsri_n_v:
9664 case NEON::BI__builtin_neon_vsriq_n_v:
9665 rightShift = true;
9666 [[fallthrough]];
9667 case NEON::BI__builtin_neon_vsli_n_v:
9668 case NEON::BI__builtin_neon_vsliq_n_v:
9669 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9670 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9671 Ops, "vsli_n");
9672 case NEON::BI__builtin_neon_vsra_n_v:
9673 case NEON::BI__builtin_neon_vsraq_n_v:
9674 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9675 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9676 return Builder.CreateAdd(Ops[0], Ops[1]);
9677 case NEON::BI__builtin_neon_vst1q_lane_v:
9678 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9679 // a one-element vector and avoid poor code for i64 in the backend.
9680 if (VTy->getElementType()->isIntegerTy(64)) {
9681 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9682 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9683 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9684 Ops[2] = getAlignmentValue32(PtrOp0);
9685 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9686 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9687 Tys), Ops);
9688 }
9689 [[fallthrough]];
9690 case NEON::BI__builtin_neon_vst1_lane_v: {
9691 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9692 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9693 return Builder.CreateStore(Ops[1],
9694 PtrOp0.withElementType(Ops[1]->getType()));
9695 }
9696 case NEON::BI__builtin_neon_vtbl1_v:
9697 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9698 Ops, "vtbl1");
9699 case NEON::BI__builtin_neon_vtbl2_v:
9700 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9701 Ops, "vtbl2");
9702 case NEON::BI__builtin_neon_vtbl3_v:
9703 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9704 Ops, "vtbl3");
9705 case NEON::BI__builtin_neon_vtbl4_v:
9706 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9707 Ops, "vtbl4");
9708 case NEON::BI__builtin_neon_vtbx1_v:
9709 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9710 Ops, "vtbx1");
9711 case NEON::BI__builtin_neon_vtbx2_v:
9712 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9713 Ops, "vtbx2");
9714 case NEON::BI__builtin_neon_vtbx3_v:
9715 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9716 Ops, "vtbx3");
9717 case NEON::BI__builtin_neon_vtbx4_v:
9718 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9719 Ops, "vtbx4");
9720 }
9721}
9722
9723template<typename Integer>
9725 return E->getIntegerConstantExpr(Context)->getExtValue();
9726}
9727
9728static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9729 llvm::Type *T, bool Unsigned) {
9730 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9731 // which finds it convenient to specify signed/unsigned as a boolean flag.
9732 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9733}
9734
9735static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9736 uint32_t Shift, bool Unsigned) {
9737 // MVE helper function for integer shift right. This must handle signed vs
9738 // unsigned, and also deal specially with the case where the shift count is
9739 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9740 // undefined behavior, but in MVE it's legal, so we must convert it to code
9741 // that is not undefined in IR.
9742 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9743 ->getElementType()
9744 ->getPrimitiveSizeInBits();
9745 if (Shift == LaneBits) {
9746 // An unsigned shift of the full lane size always generates zero, so we can
9747 // simply emit a zero vector. A signed shift of the full lane size does the
9748 // same thing as shifting by one bit fewer.
9749 if (Unsigned)
9750 return llvm::Constant::getNullValue(V->getType());
9751 else
9752 --Shift;
9753 }
9754 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9755}
9756
9757static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9758 // MVE-specific helper function for a vector splat, which infers the element
9759 // count of the output vector by knowing that MVE vectors are all 128 bits
9760 // wide.
9761 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9762 return Builder.CreateVectorSplat(Elements, V);
9763}
9764
9765static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9766 CodeGenFunction *CGF,
9767 llvm::Value *V,
9768 llvm::Type *DestType) {
9769 // Convert one MVE vector type into another by reinterpreting its in-register
9770 // format.
9771 //
9772 // Little-endian, this is identical to a bitcast (which reinterprets the
9773 // memory format). But big-endian, they're not necessarily the same, because
9774 // the register and memory formats map to each other differently depending on
9775 // the lane size.
9776 //
9777 // We generate a bitcast whenever we can (if we're little-endian, or if the
9778 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9779 // that performs the different kind of reinterpretation.
9780 if (CGF->getTarget().isBigEndian() &&
9781 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9782 return Builder.CreateCall(
9783 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9784 {DestType, V->getType()}),
9785 V);
9786 } else {
9787 return Builder.CreateBitCast(V, DestType);
9788 }
9789}
9790
9791static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9792 // Make a shufflevector that extracts every other element of a vector (evens
9793 // or odds, as desired).
9794 SmallVector<int, 16> Indices;
9795 unsigned InputElements =
9796 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9797 for (unsigned i = 0; i < InputElements; i += 2)
9798 Indices.push_back(i + Odd);
9799 return Builder.CreateShuffleVector(V, Indices);
9800}
9801
9802static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9803 llvm::Value *V1) {
9804 // Make a shufflevector that interleaves two vectors element by element.
9805 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9806 SmallVector<int, 16> Indices;
9807 unsigned InputElements =
9808 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9809 for (unsigned i = 0; i < InputElements; i++) {
9810 Indices.push_back(i);
9811 Indices.push_back(i + InputElements);
9812 }
9813 return Builder.CreateShuffleVector(V0, V1, Indices);
9814}
9815
9816template<unsigned HighBit, unsigned OtherBits>
9817static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9818 // MVE-specific helper function to make a vector splat of a constant such as
9819 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9820 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9821 unsigned LaneBits = T->getPrimitiveSizeInBits();
9822 uint32_t Value = HighBit << (LaneBits - 1);
9823 if (OtherBits)
9824 Value |= (1UL << (LaneBits - 1)) - 1;
9825 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9826 return ARMMVEVectorSplat(Builder, Lane);
9827}
9828
9829static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9830 llvm::Value *V,
9831 unsigned ReverseWidth) {
9832 // MVE-specific helper function which reverses the elements of a
9833 // vector within every (ReverseWidth)-bit collection of lanes.
9834 SmallVector<int, 16> Indices;
9835 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9836 unsigned Elements = 128 / LaneSize;
9837 unsigned Mask = ReverseWidth / LaneSize - 1;
9838 for (unsigned i = 0; i < Elements; i++)
9839 Indices.push_back(i ^ Mask);
9840 return Builder.CreateShuffleVector(V, Indices);
9841}
9842
9844 const CallExpr *E,
9845 ReturnValueSlot ReturnValue,
9846 llvm::Triple::ArchType Arch) {
9847 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9848 Intrinsic::ID IRIntr;
9849 unsigned NumVectors;
9850
9851 // Code autogenerated by Tablegen will handle all the simple builtins.
9852 switch (BuiltinID) {
9853 #include "clang/Basic/arm_mve_builtin_cg.inc"
9854
9855 // If we didn't match an MVE builtin id at all, go back to the
9856 // main EmitARMBuiltinExpr.
9857 default:
9858 return nullptr;
9859 }
9860
9861 // Anything that breaks from that switch is an MVE builtin that
9862 // needs handwritten code to generate.
9863
9864 switch (CustomCodeGenType) {
9865
9866 case CustomCodeGen::VLD24: {
9869
9870 auto MvecCType = E->getType();
9871 auto MvecLType = ConvertType(MvecCType);
9872 assert(MvecLType->isStructTy() &&
9873 "Return type for vld[24]q should be a struct");
9874 assert(MvecLType->getStructNumElements() == 1 &&
9875 "Return-type struct for vld[24]q should have one element");
9876 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9877 assert(MvecLTypeInner->isArrayTy() &&
9878 "Return-type struct for vld[24]q should contain an array");
9879 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9880 "Array member of return-type struct vld[24]q has wrong length");
9881 auto VecLType = MvecLTypeInner->getArrayElementType();
9882
9883 Tys.push_back(VecLType);
9884
9885 auto Addr = E->getArg(0);
9886 Ops.push_back(EmitScalarExpr(Addr));
9887 Tys.push_back(ConvertType(Addr->getType()));
9888
9889 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9890 Value *LoadResult = Builder.CreateCall(F, Ops);
9891 Value *MvecOut = PoisonValue::get(MvecLType);
9892 for (unsigned i = 0; i < NumVectors; ++i) {
9893 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9894 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9895 }
9896
9897 if (ReturnValue.isNull())
9898 return MvecOut;
9899 else
9900 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9901 }
9902
9903 case CustomCodeGen::VST24: {
9906
9907 auto Addr = E->getArg(0);
9908 Ops.push_back(EmitScalarExpr(Addr));
9909 Tys.push_back(ConvertType(Addr->getType()));
9910
9911 auto MvecCType = E->getArg(1)->getType();
9912 auto MvecLType = ConvertType(MvecCType);
9913 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9914 assert(MvecLType->getStructNumElements() == 1 &&
9915 "Data-type struct for vst2q should have one element");
9916 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9917 assert(MvecLTypeInner->isArrayTy() &&
9918 "Data-type struct for vst2q should contain an array");
9919 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9920 "Array member of return-type struct vld[24]q has wrong length");
9921 auto VecLType = MvecLTypeInner->getArrayElementType();
9922
9923 Tys.push_back(VecLType);
9924
9925 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9926 EmitAggExpr(E->getArg(1), MvecSlot);
9927 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9928 for (unsigned i = 0; i < NumVectors; i++)
9929 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9930
9931 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9932 Value *ToReturn = nullptr;
9933 for (unsigned i = 0; i < NumVectors; i++) {
9934 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9935 ToReturn = Builder.CreateCall(F, Ops);
9936 Ops.pop_back();
9937 }
9938 return ToReturn;
9939 }
9940 }
9941 llvm_unreachable("unknown custom codegen type.");
9942}
9943
9945 const CallExpr *E,
9946 ReturnValueSlot ReturnValue,
9947 llvm::Triple::ArchType Arch) {
9948 switch (BuiltinID) {
9949 default:
9950 return nullptr;
9951#include "clang/Basic/arm_cde_builtin_cg.inc"
9952 }
9953}
9954
9955static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9956 const CallExpr *E,
9958 llvm::Triple::ArchType Arch) {
9959 unsigned int Int = 0;
9960 const char *s = nullptr;
9961
9962 switch (BuiltinID) {
9963 default:
9964 return nullptr;
9965 case NEON::BI__builtin_neon_vtbl1_v:
9966 case NEON::BI__builtin_neon_vqtbl1_v:
9967 case NEON::BI__builtin_neon_vqtbl1q_v:
9968 case NEON::BI__builtin_neon_vtbl2_v:
9969 case NEON::BI__builtin_neon_vqtbl2_v:
9970 case NEON::BI__builtin_neon_vqtbl2q_v:
9971 case NEON::BI__builtin_neon_vtbl3_v:
9972 case NEON::BI__builtin_neon_vqtbl3_v:
9973 case NEON::BI__builtin_neon_vqtbl3q_v:
9974 case NEON::BI__builtin_neon_vtbl4_v:
9975 case NEON::BI__builtin_neon_vqtbl4_v:
9976 case NEON::BI__builtin_neon_vqtbl4q_v:
9977 break;
9978 case NEON::BI__builtin_neon_vtbx1_v:
9979 case NEON::BI__builtin_neon_vqtbx1_v:
9980 case NEON::BI__builtin_neon_vqtbx1q_v:
9981 case NEON::BI__builtin_neon_vtbx2_v:
9982 case NEON::BI__builtin_neon_vqtbx2_v:
9983 case NEON::BI__builtin_neon_vqtbx2q_v:
9984 case NEON::BI__builtin_neon_vtbx3_v:
9985 case NEON::BI__builtin_neon_vqtbx3_v:
9986 case NEON::BI__builtin_neon_vqtbx3q_v:
9987 case NEON::BI__builtin_neon_vtbx4_v:
9988 case NEON::BI__builtin_neon_vqtbx4_v:
9989 case NEON::BI__builtin_neon_vqtbx4q_v:
9990 break;
9991 }
9992
9993 assert(E->getNumArgs() >= 3);
9994
9995 // Get the last argument, which specifies the vector type.
9996 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9997 std::optional<llvm::APSInt> Result =
9999 if (!Result)
10000 return nullptr;
10001
10002 // Determine the type of this overloaded NEON intrinsic.
10003 NeonTypeFlags Type = Result->getZExtValue();
10004 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
10005 if (!Ty)
10006 return nullptr;
10007
10008 CodeGen::CGBuilderTy &Builder = CGF.Builder;
10009
10010 // AArch64 scalar builtins are not overloaded, they do not have an extra
10011 // argument that specifies the vector type, need to handle each case.
10012 switch (BuiltinID) {
10013 case NEON::BI__builtin_neon_vtbl1_v: {
10014 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
10015 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10016 }
10017 case NEON::BI__builtin_neon_vtbl2_v: {
10018 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
10019 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
10020 }
10021 case NEON::BI__builtin_neon_vtbl3_v: {
10022 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
10023 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10024 }
10025 case NEON::BI__builtin_neon_vtbl4_v: {
10026 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
10027 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
10028 }
10029 case NEON::BI__builtin_neon_vtbx1_v: {
10030 Value *TblRes =
10031 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
10032 Intrinsic::aarch64_neon_tbl1, "vtbl1");
10033
10034 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
10035 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
10036 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10037
10038 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10039 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10040 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10041 }
10042 case NEON::BI__builtin_neon_vtbx2_v: {
10043 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
10044 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
10045 }
10046 case NEON::BI__builtin_neon_vtbx3_v: {
10047 Value *TblRes =
10048 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
10049 Intrinsic::aarch64_neon_tbl2, "vtbl2");
10050
10051 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
10052 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
10053 TwentyFourV);
10054 CmpRes = Builder.CreateSExt(CmpRes, Ty);
10055
10056 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
10057 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
10058 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
10059 }
10060 case NEON::BI__builtin_neon_vtbx4_v: {
10061 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
10062 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
10063 }
10064 case NEON::BI__builtin_neon_vqtbl1_v:
10065 case NEON::BI__builtin_neon_vqtbl1q_v:
10066 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
10067 case NEON::BI__builtin_neon_vqtbl2_v:
10068 case NEON::BI__builtin_neon_vqtbl2q_v: {
10069 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
10070 case NEON::BI__builtin_neon_vqtbl3_v:
10071 case NEON::BI__builtin_neon_vqtbl3q_v:
10072 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
10073 case NEON::BI__builtin_neon_vqtbl4_v:
10074 case NEON::BI__builtin_neon_vqtbl4q_v:
10075 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
10076 case NEON::BI__builtin_neon_vqtbx1_v:
10077 case NEON::BI__builtin_neon_vqtbx1q_v:
10078 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
10079 case NEON::BI__builtin_neon_vqtbx2_v:
10080 case NEON::BI__builtin_neon_vqtbx2q_v:
10081 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
10082 case NEON::BI__builtin_neon_vqtbx3_v:
10083 case NEON::BI__builtin_neon_vqtbx3q_v:
10084 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
10085 case NEON::BI__builtin_neon_vqtbx4_v:
10086 case NEON::BI__builtin_neon_vqtbx4q_v:
10087 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
10088 }
10089 }
10090
10091 if (!Int)
10092 return nullptr;
10093
10094 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
10095 return CGF.EmitNeonCall(F, Ops, s);
10096}
10097
10099 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
10100 Op = Builder.CreateBitCast(Op, Int16Ty);
10101 Value *V = PoisonValue::get(VTy);
10102 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
10103 Op = Builder.CreateInsertElement(V, Op, CI);
10104 return Op;
10105}
10106
10107/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
10108/// access builtin. Only required if it can't be inferred from the base pointer
10109/// operand.
10110llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
10111 switch (TypeFlags.getMemEltType()) {
10112 case SVETypeFlags::MemEltTyDefault:
10113 return getEltType(TypeFlags);
10114 case SVETypeFlags::MemEltTyInt8:
10115 return Builder.getInt8Ty();
10116 case SVETypeFlags::MemEltTyInt16:
10117 return Builder.getInt16Ty();
10118 case SVETypeFlags::MemEltTyInt32:
10119 return Builder.getInt32Ty();
10120 case SVETypeFlags::MemEltTyInt64:
10121 return Builder.getInt64Ty();
10122 }
10123 llvm_unreachable("Unknown MemEltType");
10124}
10125
10126llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
10127 switch (TypeFlags.getEltType()) {
10128 default:
10129 llvm_unreachable("Invalid SVETypeFlag!");
10130
10131 case SVETypeFlags::EltTyInt8:
10132 return Builder.getInt8Ty();
10133 case SVETypeFlags::EltTyInt16:
10134 return Builder.getInt16Ty();
10135 case SVETypeFlags::EltTyInt32:
10136 return Builder.getInt32Ty();
10137 case SVETypeFlags::EltTyInt64:
10138 return Builder.getInt64Ty();
10139 case SVETypeFlags::EltTyInt128:
10140 return Builder.getInt128Ty();
10141
10142 case SVETypeFlags::EltTyFloat16:
10143 return Builder.getHalfTy();
10144 case SVETypeFlags::EltTyFloat32:
10145 return Builder.getFloatTy();
10146 case SVETypeFlags::EltTyFloat64:
10147 return Builder.getDoubleTy();
10148
10149 case SVETypeFlags::EltTyBFloat16:
10150 return Builder.getBFloatTy();
10151
10152 case SVETypeFlags::EltTyBool8:
10153 case SVETypeFlags::EltTyBool16:
10154 case SVETypeFlags::EltTyBool32:
10155 case SVETypeFlags::EltTyBool64:
10156 return Builder.getInt1Ty();
10157 }
10158}
10159
10160// Return the llvm predicate vector type corresponding to the specified element
10161// TypeFlags.
10162llvm::ScalableVectorType *
10164 switch (TypeFlags.getEltType()) {
10165 default: llvm_unreachable("Unhandled SVETypeFlag!");
10166
10167 case SVETypeFlags::EltTyInt8:
10168 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10169 case SVETypeFlags::EltTyInt16:
10170 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10171 case SVETypeFlags::EltTyInt32:
10172 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10173 case SVETypeFlags::EltTyInt64:
10174 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10175
10176 case SVETypeFlags::EltTyBFloat16:
10177 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10178 case SVETypeFlags::EltTyFloat16:
10179 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10180 case SVETypeFlags::EltTyFloat32:
10181 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10182 case SVETypeFlags::EltTyFloat64:
10183 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10184
10185 case SVETypeFlags::EltTyBool8:
10186 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10187 case SVETypeFlags::EltTyBool16:
10188 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10189 case SVETypeFlags::EltTyBool32:
10190 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10191 case SVETypeFlags::EltTyBool64:
10192 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10193 }
10194}
10195
10196// Return the llvm vector type corresponding to the specified element TypeFlags.
10197llvm::ScalableVectorType *
10198CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
10199 switch (TypeFlags.getEltType()) {
10200 default:
10201 llvm_unreachable("Invalid SVETypeFlag!");
10202
10203 case SVETypeFlags::EltTyInt8:
10204 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10205 case SVETypeFlags::EltTyInt16:
10206 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
10207 case SVETypeFlags::EltTyInt32:
10208 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
10209 case SVETypeFlags::EltTyInt64:
10210 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
10211
10212 case SVETypeFlags::EltTyMFloat8:
10213 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
10214 case SVETypeFlags::EltTyFloat16:
10215 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
10216 case SVETypeFlags::EltTyBFloat16:
10217 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
10218 case SVETypeFlags::EltTyFloat32:
10219 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
10220 case SVETypeFlags::EltTyFloat64:
10221 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
10222
10223 case SVETypeFlags::EltTyBool8:
10224 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
10225 case SVETypeFlags::EltTyBool16:
10226 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
10227 case SVETypeFlags::EltTyBool32:
10228 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
10229 case SVETypeFlags::EltTyBool64:
10230 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
10231 }
10232}
10233
10234llvm::Value *
10236 Function *Ptrue =
10237 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
10238 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
10239}
10240
10241constexpr unsigned SVEBitsPerBlock = 128;
10242
10243static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
10244 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
10245 return llvm::ScalableVectorType::get(EltTy, NumElts);
10246}
10247
10248// Reinterpret the input predicate so that it can be used to correctly isolate
10249// the elements of the specified datatype.
10251 llvm::ScalableVectorType *VTy) {
10252
10253 if (isa<TargetExtType>(Pred->getType()) &&
10254 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
10255 return Pred;
10256
10257 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
10258 if (Pred->getType() == RTy)
10259 return Pred;
10260
10261 unsigned IntID;
10262 llvm::Type *IntrinsicTy;
10263 switch (VTy->getMinNumElements()) {
10264 default:
10265 llvm_unreachable("unsupported element count!");
10266 case 1:
10267 case 2:
10268 case 4:
10269 case 8:
10270 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
10271 IntrinsicTy = RTy;
10272 break;
10273 case 16:
10274 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
10275 IntrinsicTy = Pred->getType();
10276 break;
10277 }
10278
10279 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
10280 Value *C = Builder.CreateCall(F, Pred);
10281 assert(C->getType() == RTy && "Unexpected return type!");
10282 return C;
10283}
10284
10286 llvm::StructType *Ty) {
10287 if (PredTuple->getType() == Ty)
10288 return PredTuple;
10289
10290 Value *Ret = llvm::PoisonValue::get(Ty);
10291 for (unsigned I = 0; I < Ty->getNumElements(); ++I) {
10292 Value *Pred = Builder.CreateExtractValue(PredTuple, I);
10293 Pred = EmitSVEPredicateCast(
10294 Pred, cast<llvm::ScalableVectorType>(Ty->getTypeAtIndex(I)));
10295 Ret = Builder.CreateInsertValue(Ret, Pred, I);
10296 }
10297
10298 return Ret;
10299}
10300
10303 unsigned IntID) {
10304 auto *ResultTy = getSVEType(TypeFlags);
10305 auto *OverloadedTy =
10306 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
10307
10308 Function *F = nullptr;
10309 if (Ops[1]->getType()->isVectorTy())
10310 // This is the "vector base, scalar offset" case. In order to uniquely
10311 // map this built-in to an LLVM IR intrinsic, we need both the return type
10312 // and the type of the vector base.
10313 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
10314 else
10315 // This is the "scalar base, vector offset case". The type of the offset
10316 // is encoded in the name of the intrinsic. We only need to specify the
10317 // return type in order to uniquely map this built-in to an LLVM IR
10318 // intrinsic.
10319 F = CGM.getIntrinsic(IntID, OverloadedTy);
10320
10321 // At the ACLE level there's only one predicate type, svbool_t, which is
10322 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10323 // actual type being loaded. For example, when loading doubles (i64) the
10324 // predicate should be <n x 2 x i1> instead. At the IR level the type of
10325 // the predicate and the data being loaded must match. Cast to the type
10326 // expected by the intrinsic. The intrinsic itself should be defined in
10327 // a way than enforces relations between parameter types.
10328 Ops[0] = EmitSVEPredicateCast(
10329 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
10330
10331 // Pass 0 when the offset is missing. This can only be applied when using
10332 // the "vector base" addressing mode for which ACLE allows no offset. The
10333 // corresponding LLVM IR always requires an offset.
10334 if (Ops.size() == 2) {
10335 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10336 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10337 }
10338
10339 // For "vector base, scalar index" scale the index so that it becomes a
10340 // scalar offset.
10341 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
10342 unsigned BytesPerElt =
10343 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10344 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10345 }
10346
10347 Value *Call = Builder.CreateCall(F, Ops);
10348
10349 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
10350 // other cases it's folded into a nop.
10351 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
10352 : Builder.CreateSExt(Call, ResultTy);
10353}
10354
10357 unsigned IntID) {
10358 auto *SrcDataTy = getSVEType(TypeFlags);
10359 auto *OverloadedTy =
10360 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
10361
10362 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
10363 // it's the first argument. Move it accordingly.
10364 Ops.insert(Ops.begin(), Ops.pop_back_val());
10365
10366 Function *F = nullptr;
10367 if (Ops[2]->getType()->isVectorTy())
10368 // This is the "vector base, scalar offset" case. In order to uniquely
10369 // map this built-in to an LLVM IR intrinsic, we need both the return type
10370 // and the type of the vector base.
10371 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
10372 else
10373 // This is the "scalar base, vector offset case". The type of the offset
10374 // is encoded in the name of the intrinsic. We only need to specify the
10375 // return type in order to uniquely map this built-in to an LLVM IR
10376 // intrinsic.
10377 F = CGM.getIntrinsic(IntID, OverloadedTy);
10378
10379 // Pass 0 when the offset is missing. This can only be applied when using
10380 // the "vector base" addressing mode for which ACLE allows no offset. The
10381 // corresponding LLVM IR always requires an offset.
10382 if (Ops.size() == 3) {
10383 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
10384 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10385 }
10386
10387 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
10388 // folded into a nop.
10389 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
10390
10391 // At the ACLE level there's only one predicate type, svbool_t, which is
10392 // mapped to <n x 16 x i1>. However, this might be incompatible with the
10393 // actual type being stored. For example, when storing doubles (i64) the
10394 // predicated should be <n x 2 x i1> instead. At the IR level the type of
10395 // the predicate and the data being stored must match. Cast to the type
10396 // expected by the intrinsic. The intrinsic itself should be defined in
10397 // a way that enforces relations between parameter types.
10398 Ops[1] = EmitSVEPredicateCast(
10399 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
10400
10401 // For "vector base, scalar index" scale the index so that it becomes a
10402 // scalar offset.
10403 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
10404 unsigned BytesPerElt =
10405 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
10406 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
10407 }
10408
10409 return Builder.CreateCall(F, Ops);
10410}
10411
10414 unsigned IntID) {
10415 // The gather prefetches are overloaded on the vector input - this can either
10416 // be the vector of base addresses or vector of offsets.
10417 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
10418 if (!OverloadedTy)
10419 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
10420
10421 // Cast the predicate from svbool_t to the right number of elements.
10422 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
10423
10424 // vector + imm addressing modes
10425 if (Ops[1]->getType()->isVectorTy()) {
10426 if (Ops.size() == 3) {
10427 // Pass 0 for 'vector+imm' when the index is omitted.
10428 Ops.push_back(ConstantInt::get(Int64Ty, 0));
10429
10430 // The sv_prfop is the last operand in the builtin and IR intrinsic.
10431 std::swap(Ops[2], Ops[3]);
10432 } else {
10433 // Index needs to be passed as scaled offset.
10434 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10435 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
10436 if (BytesPerElt > 1)
10437 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
10438 }
10439 }
10440
10441 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
10442 return Builder.CreateCall(F, Ops);
10443}
10444
10447 unsigned IntID) {
10448 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10449 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10450 Value *BasePtr = Ops[1];
10451
10452 // Does the load have an offset?
10453 if (Ops.size() > 2)
10454 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10455
10456 Function *F = CGM.getIntrinsic(IntID, {VTy});
10457 return Builder.CreateCall(F, {Predicate, BasePtr});
10458}
10459
10462 unsigned IntID) {
10463 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10464
10465 unsigned N;
10466 switch (IntID) {
10467 case Intrinsic::aarch64_sve_st2:
10468 case Intrinsic::aarch64_sve_st1_pn_x2:
10469 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10470 case Intrinsic::aarch64_sve_st2q:
10471 N = 2;
10472 break;
10473 case Intrinsic::aarch64_sve_st3:
10474 case Intrinsic::aarch64_sve_st3q:
10475 N = 3;
10476 break;
10477 case Intrinsic::aarch64_sve_st4:
10478 case Intrinsic::aarch64_sve_st1_pn_x4:
10479 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10480 case Intrinsic::aarch64_sve_st4q:
10481 N = 4;
10482 break;
10483 default:
10484 llvm_unreachable("unknown intrinsic!");
10485 }
10486
10487 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10488 Value *BasePtr = Ops[1];
10489
10490 // Does the store have an offset?
10491 if (Ops.size() > (2 + N))
10492 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10493
10494 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10495 // need to break up the tuple vector.
10497 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10498 Operands.push_back(Ops[I]);
10499 Operands.append({Predicate, BasePtr});
10500 Function *F = CGM.getIntrinsic(IntID, { VTy });
10501
10502 return Builder.CreateCall(F, Operands);
10503}
10504
10505// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10506// svpmullt_pair intrinsics, with the exception that their results are bitcast
10507// to a wider type.
10510 unsigned BuiltinID) {
10511 // Splat scalar operand to vector (intrinsics with _n infix)
10512 if (TypeFlags.hasSplatOperand()) {
10513 unsigned OpNo = TypeFlags.getSplatOperand();
10514 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10515 }
10516
10517 // The pair-wise function has a narrower overloaded type.
10518 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10519 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10520
10521 // Now bitcast to the wider result type.
10522 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10523 return EmitSVEReinterpret(Call, Ty);
10524}
10525
10527 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10528 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10529 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10530 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10531}
10532
10535 unsigned BuiltinID) {
10536 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10537 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10538 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10539
10540 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10541 Value *BasePtr = Ops[1];
10542
10543 // Implement the index operand if not omitted.
10544 if (Ops.size() > 3)
10545 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10546
10547 Value *PrfOp = Ops.back();
10548
10549 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10550 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10551}
10552
10554 llvm::Type *ReturnTy,
10556 unsigned IntrinsicID,
10557 bool IsZExtReturn) {
10558 QualType LangPTy = E->getArg(1)->getType();
10559 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10560 LangPTy->castAs<PointerType>()->getPointeeType());
10561
10562 // The vector type that is returned may be different from the
10563 // eventual type loaded from memory.
10564 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10565 llvm::ScalableVectorType *MemoryTy = nullptr;
10566 llvm::ScalableVectorType *PredTy = nullptr;
10567 bool IsQuadLoad = false;
10568 switch (IntrinsicID) {
10569 case Intrinsic::aarch64_sve_ld1uwq:
10570 case Intrinsic::aarch64_sve_ld1udq:
10571 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10572 PredTy = llvm::ScalableVectorType::get(
10573 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10574 IsQuadLoad = true;
10575 break;
10576 default:
10577 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10578 PredTy = MemoryTy;
10579 break;
10580 }
10581
10582 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10583 Value *BasePtr = Ops[1];
10584
10585 // Does the load have an offset?
10586 if (Ops.size() > 2)
10587 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10588
10589 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10590 auto *Load =
10591 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10592 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10593 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10594
10595 if (IsQuadLoad)
10596 return Load;
10597
10598 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10599 : Builder.CreateSExt(Load, VectorTy);
10600}
10601
10604 unsigned IntrinsicID) {
10605 QualType LangPTy = E->getArg(1)->getType();
10606 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10607 LangPTy->castAs<PointerType>()->getPointeeType());
10608
10609 // The vector type that is stored may be different from the
10610 // eventual type stored to memory.
10611 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10612 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10613
10614 auto PredTy = MemoryTy;
10615 auto AddrMemoryTy = MemoryTy;
10616 bool IsQuadStore = false;
10617
10618 switch (IntrinsicID) {
10619 case Intrinsic::aarch64_sve_st1wq:
10620 case Intrinsic::aarch64_sve_st1dq:
10621 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10622 PredTy =
10623 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10624 IsQuadStore = true;
10625 break;
10626 default:
10627 break;
10628 }
10629 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10630 Value *BasePtr = Ops[1];
10631
10632 // Does the store have an offset?
10633 if (Ops.size() == 4)
10634 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10635
10636 // Last value is always the data
10637 Value *Val =
10638 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10639
10640 Function *F =
10641 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10642 auto *Store =
10643 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10644 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10645 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10646 return Store;
10647}
10648
10651 unsigned IntID) {
10652 Ops[2] = EmitSVEPredicateCast(
10654
10655 SmallVector<Value *> NewOps;
10656 NewOps.push_back(Ops[2]);
10657
10658 llvm::Value *BasePtr = Ops[3];
10659 llvm::Value *RealSlice = Ops[1];
10660 // If the intrinsic contains the vnum parameter, multiply it with the vector
10661 // size in bytes.
10662 if (Ops.size() == 5) {
10663 Function *StreamingVectorLength =
10664 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10665 llvm::Value *StreamingVectorLengthCall =
10666 Builder.CreateCall(StreamingVectorLength);
10667 llvm::Value *Mulvl =
10668 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10669 // The type of the ptr parameter is void *, so use Int8Ty here.
10670 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10671 RealSlice = Builder.CreateZExt(RealSlice, Int64Ty);
10672 RealSlice = Builder.CreateAdd(RealSlice, Ops[4]);
10673 RealSlice = Builder.CreateTrunc(RealSlice, Int32Ty);
10674 }
10675 NewOps.push_back(BasePtr);
10676 NewOps.push_back(Ops[0]);
10677 NewOps.push_back(RealSlice);
10678 Function *F = CGM.getIntrinsic(IntID);
10679 return Builder.CreateCall(F, NewOps);
10680}
10681
10684 unsigned IntID) {
10685 auto *VecTy = getSVEType(TypeFlags);
10686 Function *F = CGM.getIntrinsic(IntID, VecTy);
10687 if (TypeFlags.isReadZA())
10688 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10689 else if (TypeFlags.isWriteZA())
10690 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10691 return Builder.CreateCall(F, Ops);
10692}
10693
10696 unsigned IntID) {
10697 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10698 if (Ops.size() == 0)
10699 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10700 Function *F = CGM.getIntrinsic(IntID, {});
10701 return Builder.CreateCall(F, Ops);
10702}
10703
10706 unsigned IntID) {
10707 if (Ops.size() == 2)
10708 Ops.push_back(Builder.getInt32(0));
10709 else
10710 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10711 Function *F = CGM.getIntrinsic(IntID, {});
10712 return Builder.CreateCall(F, Ops);
10713}
10714
10715// Limit the usage of scalable llvm IR generated by the ACLE by using the
10716// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10717Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10718 return Builder.CreateVectorSplat(
10719 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10720}
10721
10723 if (auto *Ty = Scalar->getType(); Ty->isVectorTy()) {
10724#ifndef NDEBUG
10725 auto *VecTy = cast<llvm::VectorType>(Ty);
10726 ElementCount EC = VecTy->getElementCount();
10727 assert(EC.isScalar() && VecTy->getElementType() == Int8Ty &&
10728 "Only <1 x i8> expected");
10729#endif
10730 Scalar = Builder.CreateExtractElement(Scalar, uint64_t(0));
10731 }
10732 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10733}
10734
10735Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10736 // FIXME: For big endian this needs an additional REV, or needs a separate
10737 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10738 // instruction is defined as 'bitwise' equivalent from memory point of
10739 // view (when storing/reloading), whereas the svreinterpret builtin
10740 // implements bitwise equivalent cast from register point of view.
10741 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10742
10743 if (auto *StructTy = dyn_cast<StructType>(Ty)) {
10744 Value *Tuple = llvm::PoisonValue::get(Ty);
10745
10746 for (unsigned I = 0; I < StructTy->getNumElements(); ++I) {
10747 Value *In = Builder.CreateExtractValue(Val, I);
10748 Value *Out = Builder.CreateBitCast(In, StructTy->getTypeAtIndex(I));
10749 Tuple = Builder.CreateInsertValue(Tuple, Out, I);
10750 }
10751
10752 return Tuple;
10753 }
10754
10755 return Builder.CreateBitCast(Val, Ty);
10756}
10757
10758static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10760 auto *SplatZero = Constant::getNullValue(Ty);
10761 Ops.insert(Ops.begin(), SplatZero);
10762}
10763
10764static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10766 auto *SplatUndef = UndefValue::get(Ty);
10767 Ops.insert(Ops.begin(), SplatUndef);
10768}
10769
10772 llvm::Type *ResultType,
10773 ArrayRef<Value *> Ops) {
10774 if (TypeFlags.isOverloadNone())
10775 return {};
10776
10777 llvm::Type *DefaultType = getSVEType(TypeFlags);
10778
10779 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10780 return {DefaultType, Ops[1]->getType()};
10781
10782 if (TypeFlags.isOverloadWhileRW())
10783 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10784
10785 if (TypeFlags.isOverloadCvt())
10786 return {Ops[0]->getType(), Ops.back()->getType()};
10787
10788 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10789 ResultType->isVectorTy())
10790 return {ResultType, Ops[1]->getType()};
10791
10792 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10793 return {DefaultType};
10794}
10795
10797 ArrayRef<Value *> Ops) {
10798 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10799 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10800 unsigned Idx = cast<ConstantInt>(Ops[1])->getZExtValue();
10801
10802 if (TypeFlags.isTupleSet())
10803 return Builder.CreateInsertValue(Ops[0], Ops[2], Idx);
10804 return Builder.CreateExtractValue(Ops[0], Idx);
10805}
10806
10808 llvm::Type *Ty,
10809 ArrayRef<Value *> Ops) {
10810 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10811
10812 Value *Tuple = llvm::PoisonValue::get(Ty);
10813 for (unsigned Idx = 0; Idx < Ops.size(); Idx++)
10814 Tuple = Builder.CreateInsertValue(Tuple, Ops[Idx], Idx);
10815
10816 return Tuple;
10817}
10818
10820 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10821 SVETypeFlags TypeFlags) {
10822 // Find out if any arguments are required to be integer constant expressions.
10823 unsigned ICEArguments = 0;
10825 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10826 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10827
10828 // Tuple set/get only requires one insert/extract vector, which is
10829 // created by EmitSVETupleSetOrGet.
10830 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10831
10832 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10833 bool IsICE = ICEArguments & (1 << i);
10834 Value *Arg = EmitScalarExpr(E->getArg(i));
10835
10836 if (IsICE) {
10837 // If this is required to be a constant, constant fold it so that we know
10838 // that the generated intrinsic gets a ConstantInt.
10839 std::optional<llvm::APSInt> Result =
10840 E->getArg(i)->getIntegerConstantExpr(getContext());
10841 assert(Result && "Expected argument to be a constant");
10842
10843 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10844 // truncate because the immediate has been range checked and no valid
10845 // immediate requires more than a handful of bits.
10846 *Result = Result->extOrTrunc(32);
10847 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10848 continue;
10849 }
10850
10851 if (isa<StructType>(Arg->getType()) && !IsTupleGetOrSet) {
10852 for (unsigned I = 0; I < Arg->getType()->getStructNumElements(); ++I)
10853 Ops.push_back(Builder.CreateExtractValue(Arg, I));
10854
10855 continue;
10856 }
10857
10858 Ops.push_back(Arg);
10859 }
10860}
10861
10863 const CallExpr *E) {
10864 llvm::Type *Ty = ConvertType(E->getType());
10865 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10866 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10867 Value *Val = EmitScalarExpr(E->getArg(0));
10868 return EmitSVEReinterpret(Val, Ty);
10869 }
10870
10871 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10873
10875 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10876 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10877
10878 if (TypeFlags.isLoad())
10879 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10880 TypeFlags.isZExtReturn());
10881 else if (TypeFlags.isStore())
10882 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10883 else if (TypeFlags.isGatherLoad())
10884 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10885 else if (TypeFlags.isScatterStore())
10886 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10887 else if (TypeFlags.isPrefetch())
10888 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10889 else if (TypeFlags.isGatherPrefetch())
10890 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10891 else if (TypeFlags.isStructLoad())
10892 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10893 else if (TypeFlags.isStructStore())
10894 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10895 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10896 return EmitSVETupleSetOrGet(TypeFlags, Ops);
10897 else if (TypeFlags.isTupleCreate())
10898 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10899 else if (TypeFlags.isUndef())
10900 return UndefValue::get(Ty);
10901 else if (Builtin->LLVMIntrinsic != 0) {
10902 // Emit set FPMR for intrinsics that require it
10903 if (TypeFlags.setsFPMR())
10904 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
10905 Ops.pop_back_val());
10906 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10908
10909 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10911
10912 // Some ACLE builtins leave out the argument to specify the predicate
10913 // pattern, which is expected to be expanded to an SV_ALL pattern.
10914 if (TypeFlags.isAppendSVALL())
10915 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10916 if (TypeFlags.isInsertOp1SVALL())
10917 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10918
10919 // Predicates must match the main datatype.
10920 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10921 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10922 if (PredTy->getElementType()->isIntegerTy(1))
10923 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10924
10925 // Splat scalar operand to vector (intrinsics with _n infix)
10926 if (TypeFlags.hasSplatOperand()) {
10927 unsigned OpNo = TypeFlags.getSplatOperand();
10928 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10929 }
10930
10931 if (TypeFlags.isReverseCompare())
10932 std::swap(Ops[1], Ops[2]);
10933 else if (TypeFlags.isReverseUSDOT())
10934 std::swap(Ops[1], Ops[2]);
10935 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10936 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10937 std::swap(Ops[1], Ops[2]);
10938 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10939 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10940 std::swap(Ops[1], Ops[3]);
10941
10942 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10943 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10944 llvm::Type *OpndTy = Ops[1]->getType();
10945 auto *SplatZero = Constant::getNullValue(OpndTy);
10946 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10947 }
10948
10949 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10950 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10951 Value *Call = Builder.CreateCall(F, Ops);
10952
10953 if (Call->getType() == Ty)
10954 return Call;
10955
10956 // Predicate results must be converted to svbool_t.
10957 if (auto PredTy = dyn_cast<llvm::ScalableVectorType>(Ty))
10958 return EmitSVEPredicateCast(Call, PredTy);
10959 if (auto PredTupleTy = dyn_cast<llvm::StructType>(Ty))
10960 return EmitSVEPredicateTupleCast(Call, PredTupleTy);
10961
10962 llvm_unreachable("unsupported element count!");
10963 }
10964
10965 switch (BuiltinID) {
10966 default:
10967 return nullptr;
10968
10969 case SVE::BI__builtin_sve_svreinterpret_b: {
10970 auto SVCountTy =
10971 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10972 Function *CastFromSVCountF =
10973 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10974 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10975 }
10976 case SVE::BI__builtin_sve_svreinterpret_c: {
10977 auto SVCountTy =
10978 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10979 Function *CastToSVCountF =
10980 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10981 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10982 }
10983
10984 case SVE::BI__builtin_sve_svpsel_lane_b8:
10985 case SVE::BI__builtin_sve_svpsel_lane_b16:
10986 case SVE::BI__builtin_sve_svpsel_lane_b32:
10987 case SVE::BI__builtin_sve_svpsel_lane_b64:
10988 case SVE::BI__builtin_sve_svpsel_lane_c8:
10989 case SVE::BI__builtin_sve_svpsel_lane_c16:
10990 case SVE::BI__builtin_sve_svpsel_lane_c32:
10991 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10992 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10993 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10994 "aarch64.svcount")) &&
10995 "Unexpected TargetExtType");
10996 auto SVCountTy =
10997 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10998 Function *CastFromSVCountF =
10999 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
11000 Function *CastToSVCountF =
11001 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
11002
11003 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
11004 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
11005 llvm::Value *Ops0 =
11006 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
11007 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
11008 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
11009 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
11010 }
11011 case SVE::BI__builtin_sve_svmov_b_z: {
11012 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
11013 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11014 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11015 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
11016 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
11017 }
11018
11019 case SVE::BI__builtin_sve_svnot_b_z: {
11020 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
11021 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11022 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
11023 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
11024 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
11025 }
11026
11027 case SVE::BI__builtin_sve_svmovlb_u16:
11028 case SVE::BI__builtin_sve_svmovlb_u32:
11029 case SVE::BI__builtin_sve_svmovlb_u64:
11030 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
11031
11032 case SVE::BI__builtin_sve_svmovlb_s16:
11033 case SVE::BI__builtin_sve_svmovlb_s32:
11034 case SVE::BI__builtin_sve_svmovlb_s64:
11035 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
11036
11037 case SVE::BI__builtin_sve_svmovlt_u16:
11038 case SVE::BI__builtin_sve_svmovlt_u32:
11039 case SVE::BI__builtin_sve_svmovlt_u64:
11040 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
11041
11042 case SVE::BI__builtin_sve_svmovlt_s16:
11043 case SVE::BI__builtin_sve_svmovlt_s32:
11044 case SVE::BI__builtin_sve_svmovlt_s64:
11045 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
11046
11047 case SVE::BI__builtin_sve_svpmullt_u16:
11048 case SVE::BI__builtin_sve_svpmullt_u64:
11049 case SVE::BI__builtin_sve_svpmullt_n_u16:
11050 case SVE::BI__builtin_sve_svpmullt_n_u64:
11051 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
11052
11053 case SVE::BI__builtin_sve_svpmullb_u16:
11054 case SVE::BI__builtin_sve_svpmullb_u64:
11055 case SVE::BI__builtin_sve_svpmullb_n_u16:
11056 case SVE::BI__builtin_sve_svpmullb_n_u64:
11057 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
11058
11059 case SVE::BI__builtin_sve_svdup_n_b8:
11060 case SVE::BI__builtin_sve_svdup_n_b16:
11061 case SVE::BI__builtin_sve_svdup_n_b32:
11062 case SVE::BI__builtin_sve_svdup_n_b64: {
11063 Value *CmpNE =
11064 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
11065 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
11066 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
11067 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
11068 }
11069
11070 case SVE::BI__builtin_sve_svdupq_n_b8:
11071 case SVE::BI__builtin_sve_svdupq_n_b16:
11072 case SVE::BI__builtin_sve_svdupq_n_b32:
11073 case SVE::BI__builtin_sve_svdupq_n_b64:
11074 case SVE::BI__builtin_sve_svdupq_n_u8:
11075 case SVE::BI__builtin_sve_svdupq_n_s8:
11076 case SVE::BI__builtin_sve_svdupq_n_u64:
11077 case SVE::BI__builtin_sve_svdupq_n_f64:
11078 case SVE::BI__builtin_sve_svdupq_n_s64:
11079 case SVE::BI__builtin_sve_svdupq_n_u16:
11080 case SVE::BI__builtin_sve_svdupq_n_f16:
11081 case SVE::BI__builtin_sve_svdupq_n_bf16:
11082 case SVE::BI__builtin_sve_svdupq_n_s16:
11083 case SVE::BI__builtin_sve_svdupq_n_u32:
11084 case SVE::BI__builtin_sve_svdupq_n_f32:
11085 case SVE::BI__builtin_sve_svdupq_n_s32: {
11086 // These builtins are implemented by storing each element to an array and using
11087 // ld1rq to materialize a vector.
11088 unsigned NumOpnds = Ops.size();
11089
11090 bool IsBoolTy =
11091 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
11092
11093 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
11094 // so that the compare can use the width that is natural for the expected
11095 // number of predicate lanes.
11096 llvm::Type *EltTy = Ops[0]->getType();
11097 if (IsBoolTy)
11098 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
11099
11101 for (unsigned I = 0; I < NumOpnds; ++I)
11102 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
11103 Value *Vec = BuildVector(VecOps);
11104
11105 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
11106 Value *InsertSubVec = Builder.CreateInsertVector(
11107 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
11108
11109 Function *F =
11110 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
11111 Value *DupQLane =
11112 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
11113
11114 if (!IsBoolTy)
11115 return DupQLane;
11116
11117 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11118 Value *Pred = EmitSVEAllTruePred(TypeFlags);
11119
11120 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
11121 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
11122 : Intrinsic::aarch64_sve_cmpne_wide,
11123 OverloadedTy);
11124 Value *Call = Builder.CreateCall(
11125 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
11126 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
11127 }
11128
11129 case SVE::BI__builtin_sve_svpfalse_b:
11130 return ConstantInt::getFalse(Ty);
11131
11132 case SVE::BI__builtin_sve_svpfalse_c: {
11133 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
11134 Function *CastToSVCountF =
11135 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
11136 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
11137 }
11138
11139 case SVE::BI__builtin_sve_svlen_bf16:
11140 case SVE::BI__builtin_sve_svlen_f16:
11141 case SVE::BI__builtin_sve_svlen_f32:
11142 case SVE::BI__builtin_sve_svlen_f64:
11143 case SVE::BI__builtin_sve_svlen_s8:
11144 case SVE::BI__builtin_sve_svlen_s16:
11145 case SVE::BI__builtin_sve_svlen_s32:
11146 case SVE::BI__builtin_sve_svlen_s64:
11147 case SVE::BI__builtin_sve_svlen_u8:
11148 case SVE::BI__builtin_sve_svlen_u16:
11149 case SVE::BI__builtin_sve_svlen_u32:
11150 case SVE::BI__builtin_sve_svlen_u64: {
11151 SVETypeFlags TF(Builtin->TypeModifier);
11152 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
11153 auto *NumEls =
11154 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
11155
11156 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
11157 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
11158 }
11159
11160 case SVE::BI__builtin_sve_svtbl2_u8:
11161 case SVE::BI__builtin_sve_svtbl2_s8:
11162 case SVE::BI__builtin_sve_svtbl2_u16:
11163 case SVE::BI__builtin_sve_svtbl2_s16:
11164 case SVE::BI__builtin_sve_svtbl2_u32:
11165 case SVE::BI__builtin_sve_svtbl2_s32:
11166 case SVE::BI__builtin_sve_svtbl2_u64:
11167 case SVE::BI__builtin_sve_svtbl2_s64:
11168 case SVE::BI__builtin_sve_svtbl2_f16:
11169 case SVE::BI__builtin_sve_svtbl2_bf16:
11170 case SVE::BI__builtin_sve_svtbl2_f32:
11171 case SVE::BI__builtin_sve_svtbl2_f64: {
11172 SVETypeFlags TF(Builtin->TypeModifier);
11173 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
11174 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
11175 return Builder.CreateCall(F, Ops);
11176 }
11177
11178 case SVE::BI__builtin_sve_svset_neonq_s8:
11179 case SVE::BI__builtin_sve_svset_neonq_s16:
11180 case SVE::BI__builtin_sve_svset_neonq_s32:
11181 case SVE::BI__builtin_sve_svset_neonq_s64:
11182 case SVE::BI__builtin_sve_svset_neonq_u8:
11183 case SVE::BI__builtin_sve_svset_neonq_u16:
11184 case SVE::BI__builtin_sve_svset_neonq_u32:
11185 case SVE::BI__builtin_sve_svset_neonq_u64:
11186 case SVE::BI__builtin_sve_svset_neonq_f16:
11187 case SVE::BI__builtin_sve_svset_neonq_f32:
11188 case SVE::BI__builtin_sve_svset_neonq_f64:
11189 case SVE::BI__builtin_sve_svset_neonq_bf16: {
11190 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
11191 }
11192
11193 case SVE::BI__builtin_sve_svget_neonq_s8:
11194 case SVE::BI__builtin_sve_svget_neonq_s16:
11195 case SVE::BI__builtin_sve_svget_neonq_s32:
11196 case SVE::BI__builtin_sve_svget_neonq_s64:
11197 case SVE::BI__builtin_sve_svget_neonq_u8:
11198 case SVE::BI__builtin_sve_svget_neonq_u16:
11199 case SVE::BI__builtin_sve_svget_neonq_u32:
11200 case SVE::BI__builtin_sve_svget_neonq_u64:
11201 case SVE::BI__builtin_sve_svget_neonq_f16:
11202 case SVE::BI__builtin_sve_svget_neonq_f32:
11203 case SVE::BI__builtin_sve_svget_neonq_f64:
11204 case SVE::BI__builtin_sve_svget_neonq_bf16: {
11205 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
11206 }
11207
11208 case SVE::BI__builtin_sve_svdup_neonq_s8:
11209 case SVE::BI__builtin_sve_svdup_neonq_s16:
11210 case SVE::BI__builtin_sve_svdup_neonq_s32:
11211 case SVE::BI__builtin_sve_svdup_neonq_s64:
11212 case SVE::BI__builtin_sve_svdup_neonq_u8:
11213 case SVE::BI__builtin_sve_svdup_neonq_u16:
11214 case SVE::BI__builtin_sve_svdup_neonq_u32:
11215 case SVE::BI__builtin_sve_svdup_neonq_u64:
11216 case SVE::BI__builtin_sve_svdup_neonq_f16:
11217 case SVE::BI__builtin_sve_svdup_neonq_f32:
11218 case SVE::BI__builtin_sve_svdup_neonq_f64:
11219 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
11220 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
11221 Builder.getInt64(0));
11222 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
11223 {Insert, Builder.getInt64(0)});
11224 }
11225 }
11226
11227 /// Should not happen
11228 return nullptr;
11229}
11230
11231static void swapCommutativeSMEOperands(unsigned BuiltinID,
11233 unsigned MultiVec;
11234 switch (BuiltinID) {
11235 default:
11236 return;
11237 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
11238 MultiVec = 1;
11239 break;
11240 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
11241 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
11242 MultiVec = 2;
11243 break;
11244 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
11245 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
11246 MultiVec = 4;
11247 break;
11248 }
11249
11250 if (MultiVec > 0)
11251 for (unsigned I = 0; I < MultiVec; ++I)
11252 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
11253}
11254
11256 const CallExpr *E) {
11257 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
11259
11261 SVETypeFlags TypeFlags(Builtin->TypeModifier);
11262 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
11263
11264 if (TypeFlags.isLoad() || TypeFlags.isStore())
11265 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11266 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
11267 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11268 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
11269 BuiltinID == SME::BI__builtin_sme_svzero_za)
11270 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11271 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
11272 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
11273 BuiltinID == SME::BI__builtin_sme_svldr_za ||
11274 BuiltinID == SME::BI__builtin_sme_svstr_za)
11275 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
11276
11277 // Emit set FPMR for intrinsics that require it
11278 if (TypeFlags.setsFPMR())
11279 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
11280 Ops.pop_back_val());
11281 // Handle builtins which require their multi-vector operands to be swapped
11282 swapCommutativeSMEOperands(BuiltinID, Ops);
11283
11284 // Should not happen!
11285 if (Builtin->LLVMIntrinsic == 0)
11286 return nullptr;
11287
11288 // Predicates must match the main datatype.
11289 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
11290 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
11291 if (PredTy->getElementType()->isIntegerTy(1))
11292 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
11293
11294 Function *F =
11295 TypeFlags.isOverloadNone()
11296 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
11297 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
11298
11299 return Builder.CreateCall(F, Ops);
11300}
11301
11303 const CallExpr *E,
11304 llvm::Triple::ArchType Arch) {
11305 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
11306 BuiltinID <= clang::AArch64::LastSVEBuiltin)
11307 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
11308
11309 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
11310 BuiltinID <= clang::AArch64::LastSMEBuiltin)
11311 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
11312
11313 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
11314 return EmitAArch64CpuSupports(E);
11315
11316 unsigned HintID = static_cast<unsigned>(-1);
11317 switch (BuiltinID) {
11318 default: break;
11319 case clang::AArch64::BI__builtin_arm_nop:
11320 HintID = 0;
11321 break;
11322 case clang::AArch64::BI__builtin_arm_yield:
11323 case clang::AArch64::BI__yield:
11324 HintID = 1;
11325 break;
11326 case clang::AArch64::BI__builtin_arm_wfe:
11327 case clang::AArch64::BI__wfe:
11328 HintID = 2;
11329 break;
11330 case clang::AArch64::BI__builtin_arm_wfi:
11331 case clang::AArch64::BI__wfi:
11332 HintID = 3;
11333 break;
11334 case clang::AArch64::BI__builtin_arm_sev:
11335 case clang::AArch64::BI__sev:
11336 HintID = 4;
11337 break;
11338 case clang::AArch64::BI__builtin_arm_sevl:
11339 case clang::AArch64::BI__sevl:
11340 HintID = 5;
11341 break;
11342 }
11343
11344 if (HintID != static_cast<unsigned>(-1)) {
11345 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
11346 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
11347 }
11348
11349 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
11350 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
11351 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11352 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
11353 }
11354
11355 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
11356 // Create call to __arm_sme_state and store the results to the two pointers.
11358 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
11359 false),
11360 "__arm_sme_state"));
11361 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
11362 "aarch64_pstate_sm_compatible");
11363 CI->setAttributes(Attrs);
11364 CI->setCallingConv(
11365 llvm::CallingConv::
11366 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
11367 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
11368 EmitPointerWithAlignment(E->getArg(0)));
11369 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
11370 EmitPointerWithAlignment(E->getArg(1)));
11371 }
11372
11373 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
11374 assert((getContext().getTypeSize(E->getType()) == 32) &&
11375 "rbit of unusual size!");
11376 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11377 return Builder.CreateCall(
11378 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11379 }
11380 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
11381 assert((getContext().getTypeSize(E->getType()) == 64) &&
11382 "rbit of unusual size!");
11383 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11384 return Builder.CreateCall(
11385 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
11386 }
11387
11388 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
11389 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
11390 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11391 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
11392 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11393 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
11394 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
11395 return Res;
11396 }
11397
11398 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
11399 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11400 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
11401 "cls");
11402 }
11403 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
11404 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11405 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
11406 "cls");
11407 }
11408
11409 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11410 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11411 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11412 llvm::Type *Ty = Arg->getType();
11413 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11414 Arg, "frint32z");
11415 }
11416
11417 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11418 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11419 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11420 llvm::Type *Ty = Arg->getType();
11421 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11422 Arg, "frint64z");
11423 }
11424
11425 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11426 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11427 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11428 llvm::Type *Ty = Arg->getType();
11429 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11430 Arg, "frint32x");
11431 }
11432
11433 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11434 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11435 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11436 llvm::Type *Ty = Arg->getType();
11437 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11438 Arg, "frint64x");
11439 }
11440
11441 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11442 assert((getContext().getTypeSize(E->getType()) == 32) &&
11443 "__jcvt of unusual size!");
11444 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11445 return Builder.CreateCall(
11446 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11447 }
11448
11449 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11450 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11451 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11452 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11453 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11454 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11455
11456 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11457 // Load from the address via an LLVM intrinsic, receiving a
11458 // tuple of 8 i64 words, and store each one to ValPtr.
11459 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11460 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11461 llvm::Value *ToRet;
11462 for (size_t i = 0; i < 8; i++) {
11463 llvm::Value *ValOffsetPtr =
11464 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11465 Address Addr =
11466 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11467 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11468 }
11469 return ToRet;
11470 } else {
11471 // Load 8 i64 words from ValPtr, and store them to the address
11472 // via an LLVM intrinsic.
11474 Args.push_back(MemAddr);
11475 for (size_t i = 0; i < 8; i++) {
11476 llvm::Value *ValOffsetPtr =
11477 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11478 Address Addr =
11479 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11480 Args.push_back(Builder.CreateLoad(Addr));
11481 }
11482
11483 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11484 ? Intrinsic::aarch64_st64b
11485 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11486 ? Intrinsic::aarch64_st64bv
11487 : Intrinsic::aarch64_st64bv0);
11488 Function *F = CGM.getIntrinsic(Intr);
11489 return Builder.CreateCall(F, Args);
11490 }
11491 }
11492
11493 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11494 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11495
11496 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11497 ? Intrinsic::aarch64_rndr
11498 : Intrinsic::aarch64_rndrrs);
11499 Function *F = CGM.getIntrinsic(Intr);
11500 llvm::Value *Val = Builder.CreateCall(F);
11501 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11502 Value *Status = Builder.CreateExtractValue(Val, 1);
11503
11504 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11505 Builder.CreateStore(RandomValue, MemAddress);
11506 Status = Builder.CreateZExt(Status, Int32Ty);
11507 return Status;
11508 }
11509
11510 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11511 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11512 const FunctionDecl *FD = E->getDirectCallee();
11513 Value *Ops[2];
11514 for (unsigned i = 0; i < 2; i++)
11515 Ops[i] = EmitScalarExpr(E->getArg(i));
11516 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11517 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11518 StringRef Name = FD->getName();
11519 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11520 }
11521
11522 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11523 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11524 getContext().getTypeSize(E->getType()) == 128) {
11525 Function *F =
11526 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11527 ? Intrinsic::aarch64_ldaxp
11528 : Intrinsic::aarch64_ldxp);
11529
11530 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11531 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11532
11533 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11534 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11535 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11536 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11537 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11538
11539 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11540 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11541 Val = Builder.CreateOr(Val, Val1);
11542 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11543 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11544 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11545 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11546
11547 QualType Ty = E->getType();
11548 llvm::Type *RealResTy = ConvertType(Ty);
11549 llvm::Type *IntTy =
11550 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11551
11552 Function *F =
11553 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11554 ? Intrinsic::aarch64_ldaxr
11555 : Intrinsic::aarch64_ldxr,
11556 UnqualPtrTy);
11557 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11558 Val->addParamAttr(
11559 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11560
11561 if (RealResTy->isPointerTy())
11562 return Builder.CreateIntToPtr(Val, RealResTy);
11563
11564 llvm::Type *IntResTy = llvm::IntegerType::get(
11565 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11566 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11567 RealResTy);
11568 }
11569
11570 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11571 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11572 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11573 Function *F =
11574 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11575 ? Intrinsic::aarch64_stlxp
11576 : Intrinsic::aarch64_stxp);
11577 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11578
11579 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11580 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11581
11582 Tmp = Tmp.withElementType(STy);
11583 llvm::Value *Val = Builder.CreateLoad(Tmp);
11584
11585 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11586 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11587 Value *StPtr = EmitScalarExpr(E->getArg(1));
11588 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11589 }
11590
11591 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11592 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11593 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11594 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11595
11596 QualType Ty = E->getArg(0)->getType();
11597 llvm::Type *StoreTy =
11598 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11599
11600 if (StoreVal->getType()->isPointerTy())
11601 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11602 else {
11603 llvm::Type *IntTy = llvm::IntegerType::get(
11605 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11606 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11607 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11608 }
11609
11610 Function *F =
11611 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11612 ? Intrinsic::aarch64_stlxr
11613 : Intrinsic::aarch64_stxr,
11614 StoreAddr->getType());
11615 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11616 CI->addParamAttr(
11617 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11618 return CI;
11619 }
11620
11621 if (BuiltinID == clang::AArch64::BI__getReg) {
11623 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11624 llvm_unreachable("Sema will ensure that the parameter is constant");
11625
11626 llvm::APSInt Value = Result.Val.getInt();
11627 LLVMContext &Context = CGM.getLLVMContext();
11628 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11629
11630 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11631 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11632 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11633
11634 llvm::Function *F =
11635 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11636 return Builder.CreateCall(F, Metadata);
11637 }
11638
11639 if (BuiltinID == clang::AArch64::BI__break) {
11641 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11642 llvm_unreachable("Sema will ensure that the parameter is constant");
11643
11644 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11645 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11646 }
11647
11648 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11649 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11650 return Builder.CreateCall(F);
11651 }
11652
11653 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11654 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11655 llvm::SyncScope::SingleThread);
11656
11657 // CRC32
11658 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11659 switch (BuiltinID) {
11660 case clang::AArch64::BI__builtin_arm_crc32b:
11661 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11662 case clang::AArch64::BI__builtin_arm_crc32cb:
11663 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11664 case clang::AArch64::BI__builtin_arm_crc32h:
11665 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11666 case clang::AArch64::BI__builtin_arm_crc32ch:
11667 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11668 case clang::AArch64::BI__builtin_arm_crc32w:
11669 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11670 case clang::AArch64::BI__builtin_arm_crc32cw:
11671 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11672 case clang::AArch64::BI__builtin_arm_crc32d:
11673 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11674 case clang::AArch64::BI__builtin_arm_crc32cd:
11675 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11676 }
11677
11678 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11679 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11680 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11681 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11682
11683 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11684 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11685
11686 return Builder.CreateCall(F, {Arg0, Arg1});
11687 }
11688
11689 // Memory Operations (MOPS)
11690 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11691 Value *Dst = EmitScalarExpr(E->getArg(0));
11692 Value *Val = EmitScalarExpr(E->getArg(1));
11693 Value *Size = EmitScalarExpr(E->getArg(2));
11694 Val = Builder.CreateTrunc(Val, Int8Ty);
11695 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11696 return Builder.CreateCall(
11697 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11698 }
11699
11700 // Memory Tagging Extensions (MTE) Intrinsics
11701 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11702 switch (BuiltinID) {
11703 case clang::AArch64::BI__builtin_arm_irg:
11704 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11705 case clang::AArch64::BI__builtin_arm_addg:
11706 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11707 case clang::AArch64::BI__builtin_arm_gmi:
11708 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11709 case clang::AArch64::BI__builtin_arm_ldg:
11710 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11711 case clang::AArch64::BI__builtin_arm_stg:
11712 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11713 case clang::AArch64::BI__builtin_arm_subp:
11714 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11715 }
11716
11717 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11718 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11719 Value *Pointer = EmitScalarExpr(E->getArg(0));
11720 Value *Mask = EmitScalarExpr(E->getArg(1));
11721
11722 Mask = Builder.CreateZExt(Mask, Int64Ty);
11723 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11724 {Pointer, Mask});
11725 }
11726 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11727 Value *Pointer = EmitScalarExpr(E->getArg(0));
11728 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11729
11730 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11731 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11732 {Pointer, TagOffset});
11733 }
11734 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11735 Value *Pointer = EmitScalarExpr(E->getArg(0));
11736 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11737
11738 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11739 return Builder.CreateCall(
11740 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11741 }
11742 // Although it is possible to supply a different return
11743 // address (first arg) to this intrinsic, for now we set
11744 // return address same as input address.
11745 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11746 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11747 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11748 {TagAddress, TagAddress});
11749 }
11750 // Although it is possible to supply a different tag (to set)
11751 // to this intrinsic (as first arg), for now we supply
11752 // the tag that is in input address arg (common use case).
11753 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11754 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11755 return Builder.CreateCall(CGM.getIntrinsic(MTEIntrinsicID),
11756 {TagAddress, TagAddress});
11757 }
11758 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11759 Value *PointerA = EmitScalarExpr(E->getArg(0));
11760 Value *PointerB = EmitScalarExpr(E->getArg(1));
11761 return Builder.CreateCall(
11762 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11763 }
11764 }
11765
11766 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11767 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11768 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11769 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11770 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11771 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11772 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11773 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11774
11775 SpecialRegisterAccessKind AccessKind = Write;
11776 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11777 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11778 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11779 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11780 AccessKind = VolatileRead;
11781
11782 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11783 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11784
11785 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11786 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11787
11788 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11789 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11790
11791 llvm::Type *ValueType;
11792 llvm::Type *RegisterType = Int64Ty;
11793 if (Is32Bit) {
11794 ValueType = Int32Ty;
11795 } else if (Is128Bit) {
11796 llvm::Type *Int128Ty =
11797 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11798 ValueType = Int128Ty;
11799 RegisterType = Int128Ty;
11800 } else if (IsPointerBuiltin) {
11801 ValueType = VoidPtrTy;
11802 } else {
11803 ValueType = Int64Ty;
11804 };
11805
11806 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11807 AccessKind);
11808 }
11809
11810 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11811 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11812 LLVMContext &Context = CGM.getLLVMContext();
11813
11814 unsigned SysReg =
11815 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11816
11817 std::string SysRegStr;
11818 llvm::raw_string_ostream(SysRegStr) <<
11819 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11820 ((SysReg >> 11) & 7) << ":" <<
11821 ((SysReg >> 7) & 15) << ":" <<
11822 ((SysReg >> 3) & 15) << ":" <<
11823 ( SysReg & 7);
11824
11825 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11826 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11827 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11828
11829 llvm::Type *RegisterType = Int64Ty;
11830 llvm::Type *Types[] = { RegisterType };
11831
11832 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11833 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11834
11835 return Builder.CreateCall(F, Metadata);
11836 }
11837
11838 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11839 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11840
11841 return Builder.CreateCall(F, { Metadata, ArgValue });
11842 }
11843
11844 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11845 llvm::Function *F =
11846 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11847 return Builder.CreateCall(F);
11848 }
11849
11850 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11851 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11852 return Builder.CreateCall(F);
11853 }
11854
11855 if (BuiltinID == clang::AArch64::BI__mulh ||
11856 BuiltinID == clang::AArch64::BI__umulh) {
11857 llvm::Type *ResType = ConvertType(E->getType());
11858 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11859
11860 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11861 Value *LHS =
11862 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11863 Value *RHS =
11864 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11865
11866 Value *MulResult, *HigherBits;
11867 if (IsSigned) {
11868 MulResult = Builder.CreateNSWMul(LHS, RHS);
11869 HigherBits = Builder.CreateAShr(MulResult, 64);
11870 } else {
11871 MulResult = Builder.CreateNUWMul(LHS, RHS);
11872 HigherBits = Builder.CreateLShr(MulResult, 64);
11873 }
11874 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11875
11876 return HigherBits;
11877 }
11878
11879 if (BuiltinID == AArch64::BI__writex18byte ||
11880 BuiltinID == AArch64::BI__writex18word ||
11881 BuiltinID == AArch64::BI__writex18dword ||
11882 BuiltinID == AArch64::BI__writex18qword) {
11883 // Process the args first
11884 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11885 Value *DataArg = EmitScalarExpr(E->getArg(1));
11886
11887 // Read x18 as i8*
11888 llvm::Value *X18 = readX18AsPtr(*this);
11889
11890 // Store val at x18 + offset
11891 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11892 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11893 StoreInst *Store =
11894 Builder.CreateAlignedStore(DataArg, Ptr, CharUnits::One());
11895 return Store;
11896 }
11897
11898 if (BuiltinID == AArch64::BI__readx18byte ||
11899 BuiltinID == AArch64::BI__readx18word ||
11900 BuiltinID == AArch64::BI__readx18dword ||
11901 BuiltinID == AArch64::BI__readx18qword) {
11902 // Process the args first
11903 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11904
11905 // Read x18 as i8*
11906 llvm::Value *X18 = readX18AsPtr(*this);
11907
11908 // Load x18 + offset
11909 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11910 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11911 llvm::Type *IntTy = ConvertType(E->getType());
11912 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11913 return Load;
11914 }
11915
11916 if (BuiltinID == AArch64::BI__addx18byte ||
11917 BuiltinID == AArch64::BI__addx18word ||
11918 BuiltinID == AArch64::BI__addx18dword ||
11919 BuiltinID == AArch64::BI__addx18qword ||
11920 BuiltinID == AArch64::BI__incx18byte ||
11921 BuiltinID == AArch64::BI__incx18word ||
11922 BuiltinID == AArch64::BI__incx18dword ||
11923 BuiltinID == AArch64::BI__incx18qword) {
11924 llvm::Type *IntTy;
11925 bool isIncrement;
11926 switch (BuiltinID) {
11927 case AArch64::BI__incx18byte:
11928 IntTy = Int8Ty;
11929 isIncrement = true;
11930 break;
11931 case AArch64::BI__incx18word:
11932 IntTy = Int16Ty;
11933 isIncrement = true;
11934 break;
11935 case AArch64::BI__incx18dword:
11936 IntTy = Int32Ty;
11937 isIncrement = true;
11938 break;
11939 case AArch64::BI__incx18qword:
11940 IntTy = Int64Ty;
11941 isIncrement = true;
11942 break;
11943 default:
11944 IntTy = ConvertType(E->getArg(1)->getType());
11945 isIncrement = false;
11946 break;
11947 }
11948 // Process the args first
11949 Value *OffsetArg = EmitScalarExpr(E->getArg(0));
11950 Value *ValToAdd =
11951 isIncrement ? ConstantInt::get(IntTy, 1) : EmitScalarExpr(E->getArg(1));
11952
11953 // Read x18 as i8*
11954 llvm::Value *X18 = readX18AsPtr(*this);
11955
11956 // Load x18 + offset
11957 Value *Offset = Builder.CreateZExt(OffsetArg, Int64Ty);
11958 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11959 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11960
11961 // Add values
11962 Value *AddResult = Builder.CreateAdd(Load, ValToAdd);
11963
11964 // Store val at x18 + offset
11965 StoreInst *Store =
11966 Builder.CreateAlignedStore(AddResult, Ptr, CharUnits::One());
11967 return Store;
11968 }
11969
11970 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11971 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11972 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11973 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11974 Value *Arg = EmitScalarExpr(E->getArg(0));
11975 llvm::Type *RetTy = ConvertType(E->getType());
11976 return Builder.CreateBitCast(Arg, RetTy);
11977 }
11978
11979 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11980 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11981 BuiltinID == AArch64::BI_CountLeadingZeros ||
11982 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11983 Value *Arg = EmitScalarExpr(E->getArg(0));
11984 llvm::Type *ArgType = Arg->getType();
11985
11986 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11987 BuiltinID == AArch64::BI_CountLeadingOnes64)
11988 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11989
11990 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11991 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11992
11993 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11994 BuiltinID == AArch64::BI_CountLeadingZeros64)
11995 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11996 return Result;
11997 }
11998
11999 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
12000 BuiltinID == AArch64::BI_CountLeadingSigns64) {
12001 Value *Arg = EmitScalarExpr(E->getArg(0));
12002
12003 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
12004 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
12005 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
12006
12007 Value *Result = Builder.CreateCall(F, Arg, "cls");
12008 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
12009 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12010 return Result;
12011 }
12012
12013 if (BuiltinID == AArch64::BI_CountOneBits ||
12014 BuiltinID == AArch64::BI_CountOneBits64) {
12015 Value *ArgValue = EmitScalarExpr(E->getArg(0));
12016 llvm::Type *ArgType = ArgValue->getType();
12017 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
12018
12019 Value *Result = Builder.CreateCall(F, ArgValue);
12020 if (BuiltinID == AArch64::BI_CountOneBits64)
12021 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
12022 return Result;
12023 }
12024
12025 if (BuiltinID == AArch64::BI__prefetch) {
12026 Value *Address = EmitScalarExpr(E->getArg(0));
12027 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
12028 Value *Locality = ConstantInt::get(Int32Ty, 3);
12029 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
12030 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
12031 return Builder.CreateCall(F, {Address, RW, Locality, Data});
12032 }
12033
12034 if (BuiltinID == AArch64::BI__hlt) {
12035 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
12036 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
12037
12038 // Return 0 for convenience, even though MSVC returns some other undefined
12039 // value.
12040 return ConstantInt::get(Builder.getInt32Ty(), 0);
12041 }
12042
12043 // Handle MSVC intrinsics before argument evaluation to prevent double
12044 // evaluation.
12045 if (std::optional<MSVCIntrin> MsvcIntId =
12047 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
12048
12049 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
12050 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
12051 return P.first == BuiltinID;
12052 });
12053 if (It != end(NEONEquivalentIntrinsicMap))
12054 BuiltinID = It->second;
12055
12056 // Find out if any arguments are required to be integer constant
12057 // expressions.
12058 unsigned ICEArguments = 0;
12060 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
12061 assert(Error == ASTContext::GE_None && "Should not codegen an error");
12062
12064 Address PtrOp0 = Address::invalid();
12065 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
12066 if (i == 0) {
12067 switch (BuiltinID) {
12068 case NEON::BI__builtin_neon_vld1_v:
12069 case NEON::BI__builtin_neon_vld1q_v:
12070 case NEON::BI__builtin_neon_vld1_dup_v:
12071 case NEON::BI__builtin_neon_vld1q_dup_v:
12072 case NEON::BI__builtin_neon_vld1_lane_v:
12073 case NEON::BI__builtin_neon_vld1q_lane_v:
12074 case NEON::BI__builtin_neon_vst1_v:
12075 case NEON::BI__builtin_neon_vst1q_v:
12076 case NEON::BI__builtin_neon_vst1_lane_v:
12077 case NEON::BI__builtin_neon_vst1q_lane_v:
12078 case NEON::BI__builtin_neon_vldap1_lane_s64:
12079 case NEON::BI__builtin_neon_vldap1q_lane_s64:
12080 case NEON::BI__builtin_neon_vstl1_lane_s64:
12081 case NEON::BI__builtin_neon_vstl1q_lane_s64:
12082 // Get the alignment for the argument in addition to the value;
12083 // we'll use it later.
12084 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
12085 Ops.push_back(PtrOp0.emitRawPointer(*this));
12086 continue;
12087 }
12088 }
12089 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
12090 }
12091
12092 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
12093 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
12094 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
12095
12096 if (Builtin) {
12097 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
12098 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
12099 assert(Result && "SISD intrinsic should have been handled");
12100 return Result;
12101 }
12102
12103 const Expr *Arg = E->getArg(E->getNumArgs()-1);
12105 if (std::optional<llvm::APSInt> Result =
12107 // Determine the type of this overloaded NEON intrinsic.
12108 Type = NeonTypeFlags(Result->getZExtValue());
12109
12110 bool usgn = Type.isUnsigned();
12111 bool quad = Type.isQuad();
12112
12113 // Handle non-overloaded intrinsics first.
12114 switch (BuiltinID) {
12115 default: break;
12116 case NEON::BI__builtin_neon_vabsh_f16:
12117 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12118 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
12119 case NEON::BI__builtin_neon_vaddq_p128: {
12120 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
12121 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12122 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12123 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12124 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
12125 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12126 return Builder.CreateBitCast(Ops[0], Int128Ty);
12127 }
12128 case NEON::BI__builtin_neon_vldrq_p128: {
12129 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
12130 Value *Ptr = EmitScalarExpr(E->getArg(0));
12131 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
12133 }
12134 case NEON::BI__builtin_neon_vstrq_p128: {
12135 Value *Ptr = Ops[0];
12136 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
12137 }
12138 case NEON::BI__builtin_neon_vcvts_f32_u32:
12139 case NEON::BI__builtin_neon_vcvtd_f64_u64:
12140 usgn = true;
12141 [[fallthrough]];
12142 case NEON::BI__builtin_neon_vcvts_f32_s32:
12143 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
12144 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12145 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
12146 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
12147 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
12148 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12149 if (usgn)
12150 return Builder.CreateUIToFP(Ops[0], FTy);
12151 return Builder.CreateSIToFP(Ops[0], FTy);
12152 }
12153 case NEON::BI__builtin_neon_vcvth_f16_u16:
12154 case NEON::BI__builtin_neon_vcvth_f16_u32:
12155 case NEON::BI__builtin_neon_vcvth_f16_u64:
12156 usgn = true;
12157 [[fallthrough]];
12158 case NEON::BI__builtin_neon_vcvth_f16_s16:
12159 case NEON::BI__builtin_neon_vcvth_f16_s32:
12160 case NEON::BI__builtin_neon_vcvth_f16_s64: {
12161 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12162 llvm::Type *FTy = HalfTy;
12163 llvm::Type *InTy;
12164 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
12165 InTy = Int64Ty;
12166 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
12167 InTy = Int32Ty;
12168 else
12169 InTy = Int16Ty;
12170 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
12171 if (usgn)
12172 return Builder.CreateUIToFP(Ops[0], FTy);
12173 return Builder.CreateSIToFP(Ops[0], FTy);
12174 }
12175 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12176 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12177 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12178 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12179 case NEON::BI__builtin_neon_vcvth_u16_f16:
12180 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12181 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12182 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12183 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12184 case NEON::BI__builtin_neon_vcvth_s16_f16: {
12185 unsigned Int;
12186 llvm::Type* InTy = Int32Ty;
12187 llvm::Type* FTy = HalfTy;
12188 llvm::Type *Tys[2] = {InTy, FTy};
12189 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12190 switch (BuiltinID) {
12191 default: llvm_unreachable("missing builtin ID in switch!");
12192 case NEON::BI__builtin_neon_vcvtah_u16_f16:
12193 Int = Intrinsic::aarch64_neon_fcvtau; break;
12194 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
12195 Int = Intrinsic::aarch64_neon_fcvtmu; break;
12196 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
12197 Int = Intrinsic::aarch64_neon_fcvtnu; break;
12198 case NEON::BI__builtin_neon_vcvtph_u16_f16:
12199 Int = Intrinsic::aarch64_neon_fcvtpu; break;
12200 case NEON::BI__builtin_neon_vcvth_u16_f16:
12201 Int = Intrinsic::aarch64_neon_fcvtzu; break;
12202 case NEON::BI__builtin_neon_vcvtah_s16_f16:
12203 Int = Intrinsic::aarch64_neon_fcvtas; break;
12204 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
12205 Int = Intrinsic::aarch64_neon_fcvtms; break;
12206 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
12207 Int = Intrinsic::aarch64_neon_fcvtns; break;
12208 case NEON::BI__builtin_neon_vcvtph_s16_f16:
12209 Int = Intrinsic::aarch64_neon_fcvtps; break;
12210 case NEON::BI__builtin_neon_vcvth_s16_f16:
12211 Int = Intrinsic::aarch64_neon_fcvtzs; break;
12212 }
12213 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
12214 return Builder.CreateTrunc(Ops[0], Int16Ty);
12215 }
12216 case NEON::BI__builtin_neon_vcaleh_f16:
12217 case NEON::BI__builtin_neon_vcalth_f16:
12218 case NEON::BI__builtin_neon_vcageh_f16:
12219 case NEON::BI__builtin_neon_vcagth_f16: {
12220 unsigned Int;
12221 llvm::Type* InTy = Int32Ty;
12222 llvm::Type* FTy = HalfTy;
12223 llvm::Type *Tys[2] = {InTy, FTy};
12224 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12225 switch (BuiltinID) {
12226 default: llvm_unreachable("missing builtin ID in switch!");
12227 case NEON::BI__builtin_neon_vcageh_f16:
12228 Int = Intrinsic::aarch64_neon_facge; break;
12229 case NEON::BI__builtin_neon_vcagth_f16:
12230 Int = Intrinsic::aarch64_neon_facgt; break;
12231 case NEON::BI__builtin_neon_vcaleh_f16:
12232 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
12233 case NEON::BI__builtin_neon_vcalth_f16:
12234 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
12235 }
12236 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
12237 return Builder.CreateTrunc(Ops[0], Int16Ty);
12238 }
12239 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12240 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
12241 unsigned Int;
12242 llvm::Type* InTy = Int32Ty;
12243 llvm::Type* FTy = HalfTy;
12244 llvm::Type *Tys[2] = {InTy, FTy};
12245 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12246 switch (BuiltinID) {
12247 default: llvm_unreachable("missing builtin ID in switch!");
12248 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
12249 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
12250 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
12251 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
12252 }
12253 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12254 return Builder.CreateTrunc(Ops[0], Int16Ty);
12255 }
12256 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12257 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
12258 unsigned Int;
12259 llvm::Type* FTy = HalfTy;
12260 llvm::Type* InTy = Int32Ty;
12261 llvm::Type *Tys[2] = {FTy, InTy};
12262 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12263 switch (BuiltinID) {
12264 default: llvm_unreachable("missing builtin ID in switch!");
12265 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
12266 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
12267 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
12268 break;
12269 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
12270 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
12271 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
12272 break;
12273 }
12274 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
12275 }
12276 case NEON::BI__builtin_neon_vpaddd_s64: {
12277 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
12278 Value *Vec = EmitScalarExpr(E->getArg(0));
12279 // The vector is v2f64, so make sure it's bitcast to that.
12280 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
12281 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12282 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12283 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12284 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12285 // Pairwise addition of a v2f64 into a scalar f64.
12286 return Builder.CreateAdd(Op0, Op1, "vpaddd");
12287 }
12288 case NEON::BI__builtin_neon_vpaddd_f64: {
12289 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
12290 Value *Vec = EmitScalarExpr(E->getArg(0));
12291 // The vector is v2f64, so make sure it's bitcast to that.
12292 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
12293 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12294 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12295 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12296 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12297 // Pairwise addition of a v2f64 into a scalar f64.
12298 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12299 }
12300 case NEON::BI__builtin_neon_vpadds_f32: {
12301 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
12302 Value *Vec = EmitScalarExpr(E->getArg(0));
12303 // The vector is v2f32, so make sure it's bitcast to that.
12304 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
12305 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
12306 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
12307 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
12308 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
12309 // Pairwise addition of a v2f32 into a scalar f32.
12310 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
12311 }
12312 case NEON::BI__builtin_neon_vceqzd_s64:
12313 case NEON::BI__builtin_neon_vceqzd_f64:
12314 case NEON::BI__builtin_neon_vceqzs_f32:
12315 case NEON::BI__builtin_neon_vceqzh_f16:
12316 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12318 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12319 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
12320 case NEON::BI__builtin_neon_vcgezd_s64:
12321 case NEON::BI__builtin_neon_vcgezd_f64:
12322 case NEON::BI__builtin_neon_vcgezs_f32:
12323 case NEON::BI__builtin_neon_vcgezh_f16:
12324 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12326 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12327 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
12328 case NEON::BI__builtin_neon_vclezd_s64:
12329 case NEON::BI__builtin_neon_vclezd_f64:
12330 case NEON::BI__builtin_neon_vclezs_f32:
12331 case NEON::BI__builtin_neon_vclezh_f16:
12332 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12334 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12335 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
12336 case NEON::BI__builtin_neon_vcgtzd_s64:
12337 case NEON::BI__builtin_neon_vcgtzd_f64:
12338 case NEON::BI__builtin_neon_vcgtzs_f32:
12339 case NEON::BI__builtin_neon_vcgtzh_f16:
12340 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12342 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12343 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
12344 case NEON::BI__builtin_neon_vcltzd_s64:
12345 case NEON::BI__builtin_neon_vcltzd_f64:
12346 case NEON::BI__builtin_neon_vcltzs_f32:
12347 case NEON::BI__builtin_neon_vcltzh_f16:
12348 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12350 Ops[0], ConvertType(E->getCallReturnType(getContext())),
12351 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
12352
12353 case NEON::BI__builtin_neon_vceqzd_u64: {
12354 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12355 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12356 Ops[0] =
12357 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
12358 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
12359 }
12360 case NEON::BI__builtin_neon_vceqd_f64:
12361 case NEON::BI__builtin_neon_vcled_f64:
12362 case NEON::BI__builtin_neon_vcltd_f64:
12363 case NEON::BI__builtin_neon_vcged_f64:
12364 case NEON::BI__builtin_neon_vcgtd_f64: {
12365 llvm::CmpInst::Predicate P;
12366 switch (BuiltinID) {
12367 default: llvm_unreachable("missing builtin ID in switch!");
12368 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
12369 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
12370 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
12371 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
12372 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
12373 }
12374 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12375 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12376 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12377 if (P == llvm::FCmpInst::FCMP_OEQ)
12378 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12379 else
12380 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12381 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
12382 }
12383 case NEON::BI__builtin_neon_vceqs_f32:
12384 case NEON::BI__builtin_neon_vcles_f32:
12385 case NEON::BI__builtin_neon_vclts_f32:
12386 case NEON::BI__builtin_neon_vcges_f32:
12387 case NEON::BI__builtin_neon_vcgts_f32: {
12388 llvm::CmpInst::Predicate P;
12389 switch (BuiltinID) {
12390 default: llvm_unreachable("missing builtin ID in switch!");
12391 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
12392 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
12393 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
12394 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
12395 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
12396 }
12397 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12398 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
12399 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
12400 if (P == llvm::FCmpInst::FCMP_OEQ)
12401 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12402 else
12403 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12404 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
12405 }
12406 case NEON::BI__builtin_neon_vceqh_f16:
12407 case NEON::BI__builtin_neon_vcleh_f16:
12408 case NEON::BI__builtin_neon_vclth_f16:
12409 case NEON::BI__builtin_neon_vcgeh_f16:
12410 case NEON::BI__builtin_neon_vcgth_f16: {
12411 llvm::CmpInst::Predicate P;
12412 switch (BuiltinID) {
12413 default: llvm_unreachable("missing builtin ID in switch!");
12414 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
12415 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
12416 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
12417 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
12418 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
12419 }
12420 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12421 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
12422 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
12423 if (P == llvm::FCmpInst::FCMP_OEQ)
12424 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
12425 else
12426 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
12427 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
12428 }
12429 case NEON::BI__builtin_neon_vceqd_s64:
12430 case NEON::BI__builtin_neon_vceqd_u64:
12431 case NEON::BI__builtin_neon_vcgtd_s64:
12432 case NEON::BI__builtin_neon_vcgtd_u64:
12433 case NEON::BI__builtin_neon_vcltd_s64:
12434 case NEON::BI__builtin_neon_vcltd_u64:
12435 case NEON::BI__builtin_neon_vcged_u64:
12436 case NEON::BI__builtin_neon_vcged_s64:
12437 case NEON::BI__builtin_neon_vcled_u64:
12438 case NEON::BI__builtin_neon_vcled_s64: {
12439 llvm::CmpInst::Predicate P;
12440 switch (BuiltinID) {
12441 default: llvm_unreachable("missing builtin ID in switch!");
12442 case NEON::BI__builtin_neon_vceqd_s64:
12443 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12444 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12445 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12446 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12447 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12448 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12449 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12450 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12451 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12452 }
12453 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12454 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12455 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12456 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12457 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12458 }
12459 case NEON::BI__builtin_neon_vtstd_s64:
12460 case NEON::BI__builtin_neon_vtstd_u64: {
12461 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12462 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12463 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12464 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12465 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12466 llvm::Constant::getNullValue(Int64Ty));
12467 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12468 }
12469 case NEON::BI__builtin_neon_vset_lane_i8:
12470 case NEON::BI__builtin_neon_vset_lane_i16:
12471 case NEON::BI__builtin_neon_vset_lane_i32:
12472 case NEON::BI__builtin_neon_vset_lane_i64:
12473 case NEON::BI__builtin_neon_vset_lane_bf16:
12474 case NEON::BI__builtin_neon_vset_lane_f32:
12475 case NEON::BI__builtin_neon_vsetq_lane_i8:
12476 case NEON::BI__builtin_neon_vsetq_lane_i16:
12477 case NEON::BI__builtin_neon_vsetq_lane_i32:
12478 case NEON::BI__builtin_neon_vsetq_lane_i64:
12479 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12480 case NEON::BI__builtin_neon_vsetq_lane_f32:
12481 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12482 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12483 case NEON::BI__builtin_neon_vset_lane_f64:
12484 // The vector type needs a cast for the v1f64 variant.
12485 Ops[1] =
12486 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12487 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12488 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12489 case NEON::BI__builtin_neon_vsetq_lane_f64:
12490 // The vector type needs a cast for the v2f64 variant.
12491 Ops[1] =
12492 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12493 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12494 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12495
12496 case NEON::BI__builtin_neon_vget_lane_i8:
12497 case NEON::BI__builtin_neon_vdupb_lane_i8:
12498 Ops[0] =
12499 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12500 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12501 "vget_lane");
12502 case NEON::BI__builtin_neon_vgetq_lane_i8:
12503 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12504 Ops[0] =
12505 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12506 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12507 "vgetq_lane");
12508 case NEON::BI__builtin_neon_vget_lane_i16:
12509 case NEON::BI__builtin_neon_vduph_lane_i16:
12510 Ops[0] =
12511 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12512 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12513 "vget_lane");
12514 case NEON::BI__builtin_neon_vgetq_lane_i16:
12515 case NEON::BI__builtin_neon_vduph_laneq_i16:
12516 Ops[0] =
12517 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12518 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12519 "vgetq_lane");
12520 case NEON::BI__builtin_neon_vget_lane_i32:
12521 case NEON::BI__builtin_neon_vdups_lane_i32:
12522 Ops[0] =
12523 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12524 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12525 "vget_lane");
12526 case NEON::BI__builtin_neon_vdups_lane_f32:
12527 Ops[0] =
12528 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12529 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12530 "vdups_lane");
12531 case NEON::BI__builtin_neon_vgetq_lane_i32:
12532 case NEON::BI__builtin_neon_vdups_laneq_i32:
12533 Ops[0] =
12534 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12535 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12536 "vgetq_lane");
12537 case NEON::BI__builtin_neon_vget_lane_i64:
12538 case NEON::BI__builtin_neon_vdupd_lane_i64:
12539 Ops[0] =
12540 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12541 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12542 "vget_lane");
12543 case NEON::BI__builtin_neon_vdupd_lane_f64:
12544 Ops[0] =
12545 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12546 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12547 "vdupd_lane");
12548 case NEON::BI__builtin_neon_vgetq_lane_i64:
12549 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12550 Ops[0] =
12551 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12552 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12553 "vgetq_lane");
12554 case NEON::BI__builtin_neon_vget_lane_f32:
12555 Ops[0] =
12556 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12557 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12558 "vget_lane");
12559 case NEON::BI__builtin_neon_vget_lane_f64:
12560 Ops[0] =
12561 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12562 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12563 "vget_lane");
12564 case NEON::BI__builtin_neon_vgetq_lane_f32:
12565 case NEON::BI__builtin_neon_vdups_laneq_f32:
12566 Ops[0] =
12567 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12568 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12569 "vgetq_lane");
12570 case NEON::BI__builtin_neon_vgetq_lane_f64:
12571 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12572 Ops[0] =
12573 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12574 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12575 "vgetq_lane");
12576 case NEON::BI__builtin_neon_vaddh_f16:
12577 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12578 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12579 case NEON::BI__builtin_neon_vsubh_f16:
12580 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12581 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12582 case NEON::BI__builtin_neon_vmulh_f16:
12583 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12584 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12585 case NEON::BI__builtin_neon_vdivh_f16:
12586 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12587 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12588 case NEON::BI__builtin_neon_vfmah_f16:
12589 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12591 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12592 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12593 case NEON::BI__builtin_neon_vfmsh_f16: {
12594 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12595
12596 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12598 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12599 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12600 }
12601 case NEON::BI__builtin_neon_vaddd_s64:
12602 case NEON::BI__builtin_neon_vaddd_u64:
12603 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12604 case NEON::BI__builtin_neon_vsubd_s64:
12605 case NEON::BI__builtin_neon_vsubd_u64:
12606 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12607 case NEON::BI__builtin_neon_vqdmlalh_s16:
12608 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12609 SmallVector<Value *, 2> ProductOps;
12610 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12611 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12612 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12613 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12614 ProductOps, "vqdmlXl");
12615 Constant *CI = ConstantInt::get(SizeTy, 0);
12616 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12617
12618 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12619 ? Intrinsic::aarch64_neon_sqadd
12620 : Intrinsic::aarch64_neon_sqsub;
12621 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12622 }
12623 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12624 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12625 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12626 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12627 Ops, "vqshlu_n");
12628 }
12629 case NEON::BI__builtin_neon_vqshld_n_u64:
12630 case NEON::BI__builtin_neon_vqshld_n_s64: {
12631 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12632 ? Intrinsic::aarch64_neon_uqshl
12633 : Intrinsic::aarch64_neon_sqshl;
12634 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12635 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12636 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12637 }
12638 case NEON::BI__builtin_neon_vrshrd_n_u64:
12639 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12640 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12641 ? Intrinsic::aarch64_neon_urshl
12642 : Intrinsic::aarch64_neon_srshl;
12643 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12644 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12645 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12646 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12647 }
12648 case NEON::BI__builtin_neon_vrsrad_n_u64:
12649 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12650 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12651 ? Intrinsic::aarch64_neon_urshl
12652 : Intrinsic::aarch64_neon_srshl;
12653 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12654 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12655 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12656 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12657 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12658 }
12659 case NEON::BI__builtin_neon_vshld_n_s64:
12660 case NEON::BI__builtin_neon_vshld_n_u64: {
12661 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12662 return Builder.CreateShl(
12663 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12664 }
12665 case NEON::BI__builtin_neon_vshrd_n_s64: {
12666 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12667 return Builder.CreateAShr(
12668 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12669 Amt->getZExtValue())),
12670 "shrd_n");
12671 }
12672 case NEON::BI__builtin_neon_vshrd_n_u64: {
12673 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12674 uint64_t ShiftAmt = Amt->getZExtValue();
12675 // Right-shifting an unsigned value by its size yields 0.
12676 if (ShiftAmt == 64)
12677 return ConstantInt::get(Int64Ty, 0);
12678 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12679 "shrd_n");
12680 }
12681 case NEON::BI__builtin_neon_vsrad_n_s64: {
12682 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12683 Ops[1] = Builder.CreateAShr(
12684 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12685 Amt->getZExtValue())),
12686 "shrd_n");
12687 return Builder.CreateAdd(Ops[0], Ops[1]);
12688 }
12689 case NEON::BI__builtin_neon_vsrad_n_u64: {
12690 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12691 uint64_t ShiftAmt = Amt->getZExtValue();
12692 // Right-shifting an unsigned value by its size yields 0.
12693 // As Op + 0 = Op, return Ops[0] directly.
12694 if (ShiftAmt == 64)
12695 return Ops[0];
12696 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12697 "shrd_n");
12698 return Builder.CreateAdd(Ops[0], Ops[1]);
12699 }
12700 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12701 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12702 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12703 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12704 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12705 "lane");
12706 SmallVector<Value *, 2> ProductOps;
12707 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12708 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12709 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12710 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12711 ProductOps, "vqdmlXl");
12712 Constant *CI = ConstantInt::get(SizeTy, 0);
12713 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12714 Ops.pop_back();
12715
12716 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12717 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12718 ? Intrinsic::aarch64_neon_sqadd
12719 : Intrinsic::aarch64_neon_sqsub;
12720 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12721 }
12722 case NEON::BI__builtin_neon_vqdmlals_s32:
12723 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12724 SmallVector<Value *, 2> ProductOps;
12725 ProductOps.push_back(Ops[1]);
12726 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12727 Ops[1] =
12728 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12729 ProductOps, "vqdmlXl");
12730
12731 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12732 ? Intrinsic::aarch64_neon_sqadd
12733 : Intrinsic::aarch64_neon_sqsub;
12734 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12735 }
12736 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12737 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12738 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12739 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12740 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12741 "lane");
12742 SmallVector<Value *, 2> ProductOps;
12743 ProductOps.push_back(Ops[1]);
12744 ProductOps.push_back(Ops[2]);
12745 Ops[1] =
12746 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12747 ProductOps, "vqdmlXl");
12748 Ops.pop_back();
12749
12750 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12751 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12752 ? Intrinsic::aarch64_neon_sqadd
12753 : Intrinsic::aarch64_neon_sqsub;
12754 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12755 }
12756 case NEON::BI__builtin_neon_vget_lane_bf16:
12757 case NEON::BI__builtin_neon_vduph_lane_bf16:
12758 case NEON::BI__builtin_neon_vduph_lane_f16: {
12759 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12760 "vget_lane");
12761 }
12762 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12763 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12764 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12765 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12766 "vgetq_lane");
12767 }
12768
12769 case clang::AArch64::BI_InterlockedAdd:
12770 case clang::AArch64::BI_InterlockedAdd64: {
12771 Address DestAddr = CheckAtomicAlignment(*this, E);
12772 Value *Val = EmitScalarExpr(E->getArg(1));
12773 AtomicRMWInst *RMWI =
12774 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12775 llvm::AtomicOrdering::SequentiallyConsistent);
12776 return Builder.CreateAdd(RMWI, Val);
12777 }
12778 }
12779
12780 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12781 llvm::Type *Ty = VTy;
12782 if (!Ty)
12783 return nullptr;
12784
12785 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12786 // defer to common code if it's been added to our special map.
12789
12790 if (Builtin)
12792 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12793 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12794 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12795
12796 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12797 return V;
12798
12799 unsigned Int;
12800 switch (BuiltinID) {
12801 default: return nullptr;
12802 case NEON::BI__builtin_neon_vbsl_v:
12803 case NEON::BI__builtin_neon_vbslq_v: {
12804 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12805 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12806 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12807 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12808
12809 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12810 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12811 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12812 return Builder.CreateBitCast(Ops[0], Ty);
12813 }
12814 case NEON::BI__builtin_neon_vfma_lane_v:
12815 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12816 // The ARM builtins (and instructions) have the addend as the first
12817 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12818 Value *Addend = Ops[0];
12819 Value *Multiplicand = Ops[1];
12820 Value *LaneSource = Ops[2];
12821 Ops[0] = Multiplicand;
12822 Ops[1] = LaneSource;
12823 Ops[2] = Addend;
12824
12825 // Now adjust things to handle the lane access.
12826 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12827 ? llvm::FixedVectorType::get(VTy->getElementType(),
12828 VTy->getNumElements() / 2)
12829 : VTy;
12830 llvm::Constant *cst = cast<Constant>(Ops[3]);
12831 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12832 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12833 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12834
12835 Ops.pop_back();
12836 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12837 : Intrinsic::fma;
12838 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12839 }
12840 case NEON::BI__builtin_neon_vfma_laneq_v: {
12841 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12842 // v1f64 fma should be mapped to Neon scalar f64 fma
12843 if (VTy && VTy->getElementType() == DoubleTy) {
12844 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12845 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12846 llvm::FixedVectorType *VTy =
12848 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12849 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12850 Value *Result;
12852 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12853 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12854 return Builder.CreateBitCast(Result, Ty);
12855 }
12856 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12857 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12858
12859 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12860 VTy->getNumElements() * 2);
12861 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12862 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12863 cast<ConstantInt>(Ops[3]));
12864 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12865
12867 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12868 {Ops[2], Ops[1], Ops[0]});
12869 }
12870 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12871 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12872 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12873
12874 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12875 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12877 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12878 {Ops[2], Ops[1], Ops[0]});
12879 }
12880 case NEON::BI__builtin_neon_vfmah_lane_f16:
12881 case NEON::BI__builtin_neon_vfmas_lane_f32:
12882 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12883 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12884 case NEON::BI__builtin_neon_vfmad_lane_f64:
12885 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12886 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12887 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12888 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12890 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12891 {Ops[1], Ops[2], Ops[0]});
12892 }
12893 case NEON::BI__builtin_neon_vmull_v:
12894 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12895 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12896 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12897 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12898 case NEON::BI__builtin_neon_vmax_v:
12899 case NEON::BI__builtin_neon_vmaxq_v:
12900 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12901 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12902 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12903 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12904 case NEON::BI__builtin_neon_vmaxh_f16: {
12905 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12906 Int = Intrinsic::aarch64_neon_fmax;
12907 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12908 }
12909 case NEON::BI__builtin_neon_vmin_v:
12910 case NEON::BI__builtin_neon_vminq_v:
12911 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12912 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12913 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12914 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12915 case NEON::BI__builtin_neon_vminh_f16: {
12916 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12917 Int = Intrinsic::aarch64_neon_fmin;
12918 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12919 }
12920 case NEON::BI__builtin_neon_vabd_v:
12921 case NEON::BI__builtin_neon_vabdq_v:
12922 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12923 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12924 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12925 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12926 case NEON::BI__builtin_neon_vpadal_v:
12927 case NEON::BI__builtin_neon_vpadalq_v: {
12928 unsigned ArgElts = VTy->getNumElements();
12929 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12930 unsigned BitWidth = EltTy->getBitWidth();
12931 auto *ArgTy = llvm::FixedVectorType::get(
12932 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12933 llvm::Type* Tys[2] = { VTy, ArgTy };
12934 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12936 TmpOps.push_back(Ops[1]);
12937 Function *F = CGM.getIntrinsic(Int, Tys);
12938 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12939 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12940 return Builder.CreateAdd(tmp, addend);
12941 }
12942 case NEON::BI__builtin_neon_vpmin_v:
12943 case NEON::BI__builtin_neon_vpminq_v:
12944 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12945 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12946 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12947 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12948 case NEON::BI__builtin_neon_vpmax_v:
12949 case NEON::BI__builtin_neon_vpmaxq_v:
12950 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12951 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12952 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12953 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12954 case NEON::BI__builtin_neon_vminnm_v:
12955 case NEON::BI__builtin_neon_vminnmq_v:
12956 Int = Intrinsic::aarch64_neon_fminnm;
12957 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12958 case NEON::BI__builtin_neon_vminnmh_f16:
12959 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12960 Int = Intrinsic::aarch64_neon_fminnm;
12961 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12962 case NEON::BI__builtin_neon_vmaxnm_v:
12963 case NEON::BI__builtin_neon_vmaxnmq_v:
12964 Int = Intrinsic::aarch64_neon_fmaxnm;
12965 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12966 case NEON::BI__builtin_neon_vmaxnmh_f16:
12967 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12968 Int = Intrinsic::aarch64_neon_fmaxnm;
12969 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12970 case NEON::BI__builtin_neon_vrecpss_f32: {
12971 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12972 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12973 Ops, "vrecps");
12974 }
12975 case NEON::BI__builtin_neon_vrecpsd_f64:
12976 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12977 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12978 Ops, "vrecps");
12979 case NEON::BI__builtin_neon_vrecpsh_f16:
12980 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12981 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12982 Ops, "vrecps");
12983 case NEON::BI__builtin_neon_vqshrun_n_v:
12984 Int = Intrinsic::aarch64_neon_sqshrun;
12985 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12986 case NEON::BI__builtin_neon_vqrshrun_n_v:
12987 Int = Intrinsic::aarch64_neon_sqrshrun;
12988 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12989 case NEON::BI__builtin_neon_vqshrn_n_v:
12990 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12991 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12992 case NEON::BI__builtin_neon_vrshrn_n_v:
12993 Int = Intrinsic::aarch64_neon_rshrn;
12994 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12995 case NEON::BI__builtin_neon_vqrshrn_n_v:
12996 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12997 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12998 case NEON::BI__builtin_neon_vrndah_f16: {
12999 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13000 Int = Builder.getIsFPConstrained()
13001 ? Intrinsic::experimental_constrained_round
13002 : Intrinsic::round;
13003 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
13004 }
13005 case NEON::BI__builtin_neon_vrnda_v:
13006 case NEON::BI__builtin_neon_vrndaq_v: {
13007 Int = Builder.getIsFPConstrained()
13008 ? Intrinsic::experimental_constrained_round
13009 : Intrinsic::round;
13010 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
13011 }
13012 case NEON::BI__builtin_neon_vrndih_f16: {
13013 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13014 Int = Builder.getIsFPConstrained()
13015 ? Intrinsic::experimental_constrained_nearbyint
13016 : Intrinsic::nearbyint;
13017 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
13018 }
13019 case NEON::BI__builtin_neon_vrndmh_f16: {
13020 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13021 Int = Builder.getIsFPConstrained()
13022 ? Intrinsic::experimental_constrained_floor
13023 : Intrinsic::floor;
13024 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
13025 }
13026 case NEON::BI__builtin_neon_vrndm_v:
13027 case NEON::BI__builtin_neon_vrndmq_v: {
13028 Int = Builder.getIsFPConstrained()
13029 ? Intrinsic::experimental_constrained_floor
13030 : Intrinsic::floor;
13031 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
13032 }
13033 case NEON::BI__builtin_neon_vrndnh_f16: {
13034 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13035 Int = Builder.getIsFPConstrained()
13036 ? Intrinsic::experimental_constrained_roundeven
13037 : Intrinsic::roundeven;
13038 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
13039 }
13040 case NEON::BI__builtin_neon_vrndn_v:
13041 case NEON::BI__builtin_neon_vrndnq_v: {
13042 Int = Builder.getIsFPConstrained()
13043 ? Intrinsic::experimental_constrained_roundeven
13044 : Intrinsic::roundeven;
13045 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
13046 }
13047 case NEON::BI__builtin_neon_vrndns_f32: {
13048 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13049 Int = Builder.getIsFPConstrained()
13050 ? Intrinsic::experimental_constrained_roundeven
13051 : Intrinsic::roundeven;
13052 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
13053 }
13054 case NEON::BI__builtin_neon_vrndph_f16: {
13055 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13056 Int = Builder.getIsFPConstrained()
13057 ? Intrinsic::experimental_constrained_ceil
13058 : Intrinsic::ceil;
13059 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
13060 }
13061 case NEON::BI__builtin_neon_vrndp_v:
13062 case NEON::BI__builtin_neon_vrndpq_v: {
13063 Int = Builder.getIsFPConstrained()
13064 ? Intrinsic::experimental_constrained_ceil
13065 : Intrinsic::ceil;
13066 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
13067 }
13068 case NEON::BI__builtin_neon_vrndxh_f16: {
13069 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13070 Int = Builder.getIsFPConstrained()
13071 ? Intrinsic::experimental_constrained_rint
13072 : Intrinsic::rint;
13073 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
13074 }
13075 case NEON::BI__builtin_neon_vrndx_v:
13076 case NEON::BI__builtin_neon_vrndxq_v: {
13077 Int = Builder.getIsFPConstrained()
13078 ? Intrinsic::experimental_constrained_rint
13079 : Intrinsic::rint;
13080 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
13081 }
13082 case NEON::BI__builtin_neon_vrndh_f16: {
13083 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13084 Int = Builder.getIsFPConstrained()
13085 ? Intrinsic::experimental_constrained_trunc
13086 : Intrinsic::trunc;
13087 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
13088 }
13089 case NEON::BI__builtin_neon_vrnd32x_f32:
13090 case NEON::BI__builtin_neon_vrnd32xq_f32:
13091 case NEON::BI__builtin_neon_vrnd32x_f64:
13092 case NEON::BI__builtin_neon_vrnd32xq_f64: {
13093 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13094 Int = Intrinsic::aarch64_neon_frint32x;
13095 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
13096 }
13097 case NEON::BI__builtin_neon_vrnd32z_f32:
13098 case NEON::BI__builtin_neon_vrnd32zq_f32:
13099 case NEON::BI__builtin_neon_vrnd32z_f64:
13100 case NEON::BI__builtin_neon_vrnd32zq_f64: {
13101 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13102 Int = Intrinsic::aarch64_neon_frint32z;
13103 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
13104 }
13105 case NEON::BI__builtin_neon_vrnd64x_f32:
13106 case NEON::BI__builtin_neon_vrnd64xq_f32:
13107 case NEON::BI__builtin_neon_vrnd64x_f64:
13108 case NEON::BI__builtin_neon_vrnd64xq_f64: {
13109 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13110 Int = Intrinsic::aarch64_neon_frint64x;
13111 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
13112 }
13113 case NEON::BI__builtin_neon_vrnd64z_f32:
13114 case NEON::BI__builtin_neon_vrnd64zq_f32:
13115 case NEON::BI__builtin_neon_vrnd64z_f64:
13116 case NEON::BI__builtin_neon_vrnd64zq_f64: {
13117 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13118 Int = Intrinsic::aarch64_neon_frint64z;
13119 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
13120 }
13121 case NEON::BI__builtin_neon_vrnd_v:
13122 case NEON::BI__builtin_neon_vrndq_v: {
13123 Int = Builder.getIsFPConstrained()
13124 ? Intrinsic::experimental_constrained_trunc
13125 : Intrinsic::trunc;
13126 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
13127 }
13128 case NEON::BI__builtin_neon_vcvt_f64_v:
13129 case NEON::BI__builtin_neon_vcvtq_f64_v:
13130 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13131 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
13132 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
13133 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
13134 case NEON::BI__builtin_neon_vcvt_f64_f32: {
13135 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
13136 "unexpected vcvt_f64_f32 builtin");
13137 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
13138 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13139
13140 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
13141 }
13142 case NEON::BI__builtin_neon_vcvt_f32_f64: {
13143 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
13144 "unexpected vcvt_f32_f64 builtin");
13145 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
13146 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
13147
13148 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
13149 }
13150 case NEON::BI__builtin_neon_vcvt_s32_v:
13151 case NEON::BI__builtin_neon_vcvt_u32_v:
13152 case NEON::BI__builtin_neon_vcvt_s64_v:
13153 case NEON::BI__builtin_neon_vcvt_u64_v:
13154 case NEON::BI__builtin_neon_vcvt_s16_f16:
13155 case NEON::BI__builtin_neon_vcvt_u16_f16:
13156 case NEON::BI__builtin_neon_vcvtq_s32_v:
13157 case NEON::BI__builtin_neon_vcvtq_u32_v:
13158 case NEON::BI__builtin_neon_vcvtq_s64_v:
13159 case NEON::BI__builtin_neon_vcvtq_u64_v:
13160 case NEON::BI__builtin_neon_vcvtq_s16_f16:
13161 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
13162 Int =
13163 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
13164 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
13165 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
13166 }
13167 case NEON::BI__builtin_neon_vcvta_s16_f16:
13168 case NEON::BI__builtin_neon_vcvta_u16_f16:
13169 case NEON::BI__builtin_neon_vcvta_s32_v:
13170 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
13171 case NEON::BI__builtin_neon_vcvtaq_s32_v:
13172 case NEON::BI__builtin_neon_vcvta_u32_v:
13173 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
13174 case NEON::BI__builtin_neon_vcvtaq_u32_v:
13175 case NEON::BI__builtin_neon_vcvta_s64_v:
13176 case NEON::BI__builtin_neon_vcvtaq_s64_v:
13177 case NEON::BI__builtin_neon_vcvta_u64_v:
13178 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
13179 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
13180 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13181 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
13182 }
13183 case NEON::BI__builtin_neon_vcvtm_s16_f16:
13184 case NEON::BI__builtin_neon_vcvtm_s32_v:
13185 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
13186 case NEON::BI__builtin_neon_vcvtmq_s32_v:
13187 case NEON::BI__builtin_neon_vcvtm_u16_f16:
13188 case NEON::BI__builtin_neon_vcvtm_u32_v:
13189 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
13190 case NEON::BI__builtin_neon_vcvtmq_u32_v:
13191 case NEON::BI__builtin_neon_vcvtm_s64_v:
13192 case NEON::BI__builtin_neon_vcvtmq_s64_v:
13193 case NEON::BI__builtin_neon_vcvtm_u64_v:
13194 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
13195 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
13196 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13197 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
13198 }
13199 case NEON::BI__builtin_neon_vcvtn_s16_f16:
13200 case NEON::BI__builtin_neon_vcvtn_s32_v:
13201 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
13202 case NEON::BI__builtin_neon_vcvtnq_s32_v:
13203 case NEON::BI__builtin_neon_vcvtn_u16_f16:
13204 case NEON::BI__builtin_neon_vcvtn_u32_v:
13205 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
13206 case NEON::BI__builtin_neon_vcvtnq_u32_v:
13207 case NEON::BI__builtin_neon_vcvtn_s64_v:
13208 case NEON::BI__builtin_neon_vcvtnq_s64_v:
13209 case NEON::BI__builtin_neon_vcvtn_u64_v:
13210 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
13211 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
13212 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13213 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
13214 }
13215 case NEON::BI__builtin_neon_vcvtp_s16_f16:
13216 case NEON::BI__builtin_neon_vcvtp_s32_v:
13217 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
13218 case NEON::BI__builtin_neon_vcvtpq_s32_v:
13219 case NEON::BI__builtin_neon_vcvtp_u16_f16:
13220 case NEON::BI__builtin_neon_vcvtp_u32_v:
13221 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
13222 case NEON::BI__builtin_neon_vcvtpq_u32_v:
13223 case NEON::BI__builtin_neon_vcvtp_s64_v:
13224 case NEON::BI__builtin_neon_vcvtpq_s64_v:
13225 case NEON::BI__builtin_neon_vcvtp_u64_v:
13226 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
13227 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
13228 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
13229 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
13230 }
13231 case NEON::BI__builtin_neon_vmulx_v:
13232 case NEON::BI__builtin_neon_vmulxq_v: {
13233 Int = Intrinsic::aarch64_neon_fmulx;
13234 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
13235 }
13236 case NEON::BI__builtin_neon_vmulxh_lane_f16:
13237 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
13238 // vmulx_lane should be mapped to Neon scalar mulx after
13239 // extracting the scalar element
13240 Ops.push_back(EmitScalarExpr(E->getArg(2)));
13241 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13242 Ops.pop_back();
13243 Int = Intrinsic::aarch64_neon_fmulx;
13244 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
13245 }
13246 case NEON::BI__builtin_neon_vmul_lane_v:
13247 case NEON::BI__builtin_neon_vmul_laneq_v: {
13248 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
13249 bool Quad = false;
13250 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
13251 Quad = true;
13252 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13253 llvm::FixedVectorType *VTy =
13255 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13256 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
13257 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
13258 return Builder.CreateBitCast(Result, Ty);
13259 }
13260 case NEON::BI__builtin_neon_vnegd_s64:
13261 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
13262 case NEON::BI__builtin_neon_vnegh_f16:
13263 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
13264 case NEON::BI__builtin_neon_vpmaxnm_v:
13265 case NEON::BI__builtin_neon_vpmaxnmq_v: {
13266 Int = Intrinsic::aarch64_neon_fmaxnmp;
13267 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
13268 }
13269 case NEON::BI__builtin_neon_vpminnm_v:
13270 case NEON::BI__builtin_neon_vpminnmq_v: {
13271 Int = Intrinsic::aarch64_neon_fminnmp;
13272 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
13273 }
13274 case NEON::BI__builtin_neon_vsqrth_f16: {
13275 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13276 Int = Builder.getIsFPConstrained()
13277 ? Intrinsic::experimental_constrained_sqrt
13278 : Intrinsic::sqrt;
13279 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
13280 }
13281 case NEON::BI__builtin_neon_vsqrt_v:
13282 case NEON::BI__builtin_neon_vsqrtq_v: {
13283 Int = Builder.getIsFPConstrained()
13284 ? Intrinsic::experimental_constrained_sqrt
13285 : Intrinsic::sqrt;
13286 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13287 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
13288 }
13289 case NEON::BI__builtin_neon_vrbit_v:
13290 case NEON::BI__builtin_neon_vrbitq_v: {
13291 Int = Intrinsic::bitreverse;
13292 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
13293 }
13294 case NEON::BI__builtin_neon_vaddv_u8:
13295 // FIXME: These are handled by the AArch64 scalar code.
13296 usgn = true;
13297 [[fallthrough]];
13298 case NEON::BI__builtin_neon_vaddv_s8: {
13299 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13300 Ty = Int32Ty;
13301 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13302 llvm::Type *Tys[2] = { Ty, VTy };
13303 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13304 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13305 return Builder.CreateTrunc(Ops[0], Int8Ty);
13306 }
13307 case NEON::BI__builtin_neon_vaddv_u16:
13308 usgn = true;
13309 [[fallthrough]];
13310 case NEON::BI__builtin_neon_vaddv_s16: {
13311 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13312 Ty = Int32Ty;
13313 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13314 llvm::Type *Tys[2] = { Ty, VTy };
13315 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13316 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13317 return Builder.CreateTrunc(Ops[0], Int16Ty);
13318 }
13319 case NEON::BI__builtin_neon_vaddvq_u8:
13320 usgn = true;
13321 [[fallthrough]];
13322 case NEON::BI__builtin_neon_vaddvq_s8: {
13323 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13324 Ty = Int32Ty;
13325 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13326 llvm::Type *Tys[2] = { Ty, VTy };
13327 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13328 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13329 return Builder.CreateTrunc(Ops[0], Int8Ty);
13330 }
13331 case NEON::BI__builtin_neon_vaddvq_u16:
13332 usgn = true;
13333 [[fallthrough]];
13334 case NEON::BI__builtin_neon_vaddvq_s16: {
13335 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
13336 Ty = Int32Ty;
13337 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13338 llvm::Type *Tys[2] = { Ty, VTy };
13339 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13340 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
13341 return Builder.CreateTrunc(Ops[0], Int16Ty);
13342 }
13343 case NEON::BI__builtin_neon_vmaxv_u8: {
13344 Int = Intrinsic::aarch64_neon_umaxv;
13345 Ty = Int32Ty;
13346 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13347 llvm::Type *Tys[2] = { Ty, VTy };
13348 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13349 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13350 return Builder.CreateTrunc(Ops[0], Int8Ty);
13351 }
13352 case NEON::BI__builtin_neon_vmaxv_u16: {
13353 Int = Intrinsic::aarch64_neon_umaxv;
13354 Ty = Int32Ty;
13355 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13356 llvm::Type *Tys[2] = { Ty, VTy };
13357 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13358 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13359 return Builder.CreateTrunc(Ops[0], Int16Ty);
13360 }
13361 case NEON::BI__builtin_neon_vmaxvq_u8: {
13362 Int = Intrinsic::aarch64_neon_umaxv;
13363 Ty = Int32Ty;
13364 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13365 llvm::Type *Tys[2] = { Ty, VTy };
13366 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13367 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13368 return Builder.CreateTrunc(Ops[0], Int8Ty);
13369 }
13370 case NEON::BI__builtin_neon_vmaxvq_u16: {
13371 Int = Intrinsic::aarch64_neon_umaxv;
13372 Ty = Int32Ty;
13373 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13374 llvm::Type *Tys[2] = { Ty, VTy };
13375 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13376 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13377 return Builder.CreateTrunc(Ops[0], Int16Ty);
13378 }
13379 case NEON::BI__builtin_neon_vmaxv_s8: {
13380 Int = Intrinsic::aarch64_neon_smaxv;
13381 Ty = Int32Ty;
13382 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13383 llvm::Type *Tys[2] = { Ty, VTy };
13384 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13385 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13386 return Builder.CreateTrunc(Ops[0], Int8Ty);
13387 }
13388 case NEON::BI__builtin_neon_vmaxv_s16: {
13389 Int = Intrinsic::aarch64_neon_smaxv;
13390 Ty = Int32Ty;
13391 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13392 llvm::Type *Tys[2] = { Ty, VTy };
13393 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13394 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13395 return Builder.CreateTrunc(Ops[0], Int16Ty);
13396 }
13397 case NEON::BI__builtin_neon_vmaxvq_s8: {
13398 Int = Intrinsic::aarch64_neon_smaxv;
13399 Ty = Int32Ty;
13400 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13401 llvm::Type *Tys[2] = { Ty, VTy };
13402 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13403 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13404 return Builder.CreateTrunc(Ops[0], Int8Ty);
13405 }
13406 case NEON::BI__builtin_neon_vmaxvq_s16: {
13407 Int = Intrinsic::aarch64_neon_smaxv;
13408 Ty = Int32Ty;
13409 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13410 llvm::Type *Tys[2] = { Ty, VTy };
13411 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13412 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13413 return Builder.CreateTrunc(Ops[0], Int16Ty);
13414 }
13415 case NEON::BI__builtin_neon_vmaxv_f16: {
13416 Int = Intrinsic::aarch64_neon_fmaxv;
13417 Ty = HalfTy;
13418 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13419 llvm::Type *Tys[2] = { Ty, VTy };
13420 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13421 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13422 return Builder.CreateTrunc(Ops[0], HalfTy);
13423 }
13424 case NEON::BI__builtin_neon_vmaxvq_f16: {
13425 Int = Intrinsic::aarch64_neon_fmaxv;
13426 Ty = HalfTy;
13427 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13428 llvm::Type *Tys[2] = { Ty, VTy };
13429 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13430 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
13431 return Builder.CreateTrunc(Ops[0], HalfTy);
13432 }
13433 case NEON::BI__builtin_neon_vminv_u8: {
13434 Int = Intrinsic::aarch64_neon_uminv;
13435 Ty = Int32Ty;
13436 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13437 llvm::Type *Tys[2] = { Ty, VTy };
13438 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13439 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13440 return Builder.CreateTrunc(Ops[0], Int8Ty);
13441 }
13442 case NEON::BI__builtin_neon_vminv_u16: {
13443 Int = Intrinsic::aarch64_neon_uminv;
13444 Ty = Int32Ty;
13445 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13446 llvm::Type *Tys[2] = { Ty, VTy };
13447 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13448 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13449 return Builder.CreateTrunc(Ops[0], Int16Ty);
13450 }
13451 case NEON::BI__builtin_neon_vminvq_u8: {
13452 Int = Intrinsic::aarch64_neon_uminv;
13453 Ty = Int32Ty;
13454 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13455 llvm::Type *Tys[2] = { Ty, VTy };
13456 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13457 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13458 return Builder.CreateTrunc(Ops[0], Int8Ty);
13459 }
13460 case NEON::BI__builtin_neon_vminvq_u16: {
13461 Int = Intrinsic::aarch64_neon_uminv;
13462 Ty = Int32Ty;
13463 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13464 llvm::Type *Tys[2] = { Ty, VTy };
13465 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13466 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13467 return Builder.CreateTrunc(Ops[0], Int16Ty);
13468 }
13469 case NEON::BI__builtin_neon_vminv_s8: {
13470 Int = Intrinsic::aarch64_neon_sminv;
13471 Ty = Int32Ty;
13472 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13473 llvm::Type *Tys[2] = { Ty, VTy };
13474 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13475 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13476 return Builder.CreateTrunc(Ops[0], Int8Ty);
13477 }
13478 case NEON::BI__builtin_neon_vminv_s16: {
13479 Int = Intrinsic::aarch64_neon_sminv;
13480 Ty = Int32Ty;
13481 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13482 llvm::Type *Tys[2] = { Ty, VTy };
13483 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13484 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13485 return Builder.CreateTrunc(Ops[0], Int16Ty);
13486 }
13487 case NEON::BI__builtin_neon_vminvq_s8: {
13488 Int = Intrinsic::aarch64_neon_sminv;
13489 Ty = Int32Ty;
13490 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13491 llvm::Type *Tys[2] = { Ty, VTy };
13492 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13493 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13494 return Builder.CreateTrunc(Ops[0], Int8Ty);
13495 }
13496 case NEON::BI__builtin_neon_vminvq_s16: {
13497 Int = Intrinsic::aarch64_neon_sminv;
13498 Ty = Int32Ty;
13499 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13500 llvm::Type *Tys[2] = { Ty, VTy };
13501 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13502 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13503 return Builder.CreateTrunc(Ops[0], Int16Ty);
13504 }
13505 case NEON::BI__builtin_neon_vminv_f16: {
13506 Int = Intrinsic::aarch64_neon_fminv;
13507 Ty = HalfTy;
13508 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13509 llvm::Type *Tys[2] = { Ty, VTy };
13510 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13511 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13512 return Builder.CreateTrunc(Ops[0], HalfTy);
13513 }
13514 case NEON::BI__builtin_neon_vminvq_f16: {
13515 Int = Intrinsic::aarch64_neon_fminv;
13516 Ty = HalfTy;
13517 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13518 llvm::Type *Tys[2] = { Ty, VTy };
13519 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13520 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13521 return Builder.CreateTrunc(Ops[0], HalfTy);
13522 }
13523 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13524 Int = Intrinsic::aarch64_neon_fmaxnmv;
13525 Ty = HalfTy;
13526 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13527 llvm::Type *Tys[2] = { Ty, VTy };
13528 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13529 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13530 return Builder.CreateTrunc(Ops[0], HalfTy);
13531 }
13532 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13533 Int = Intrinsic::aarch64_neon_fmaxnmv;
13534 Ty = HalfTy;
13535 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13536 llvm::Type *Tys[2] = { Ty, VTy };
13537 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13538 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13539 return Builder.CreateTrunc(Ops[0], HalfTy);
13540 }
13541 case NEON::BI__builtin_neon_vminnmv_f16: {
13542 Int = Intrinsic::aarch64_neon_fminnmv;
13543 Ty = HalfTy;
13544 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13545 llvm::Type *Tys[2] = { Ty, VTy };
13546 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13547 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13548 return Builder.CreateTrunc(Ops[0], HalfTy);
13549 }
13550 case NEON::BI__builtin_neon_vminnmvq_f16: {
13551 Int = Intrinsic::aarch64_neon_fminnmv;
13552 Ty = HalfTy;
13553 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13554 llvm::Type *Tys[2] = { Ty, VTy };
13555 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13556 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13557 return Builder.CreateTrunc(Ops[0], HalfTy);
13558 }
13559 case NEON::BI__builtin_neon_vmul_n_f64: {
13560 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13561 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13562 return Builder.CreateFMul(Ops[0], RHS);
13563 }
13564 case NEON::BI__builtin_neon_vaddlv_u8: {
13565 Int = Intrinsic::aarch64_neon_uaddlv;
13566 Ty = Int32Ty;
13567 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13568 llvm::Type *Tys[2] = { Ty, VTy };
13569 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13570 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13571 return Builder.CreateTrunc(Ops[0], Int16Ty);
13572 }
13573 case NEON::BI__builtin_neon_vaddlv_u16: {
13574 Int = Intrinsic::aarch64_neon_uaddlv;
13575 Ty = Int32Ty;
13576 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13577 llvm::Type *Tys[2] = { Ty, VTy };
13578 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13579 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13580 }
13581 case NEON::BI__builtin_neon_vaddlvq_u8: {
13582 Int = Intrinsic::aarch64_neon_uaddlv;
13583 Ty = Int32Ty;
13584 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13585 llvm::Type *Tys[2] = { Ty, VTy };
13586 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13587 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13588 return Builder.CreateTrunc(Ops[0], Int16Ty);
13589 }
13590 case NEON::BI__builtin_neon_vaddlvq_u16: {
13591 Int = Intrinsic::aarch64_neon_uaddlv;
13592 Ty = Int32Ty;
13593 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13594 llvm::Type *Tys[2] = { Ty, VTy };
13595 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13596 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13597 }
13598 case NEON::BI__builtin_neon_vaddlv_s8: {
13599 Int = Intrinsic::aarch64_neon_saddlv;
13600 Ty = Int32Ty;
13601 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13602 llvm::Type *Tys[2] = { Ty, VTy };
13603 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13604 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13605 return Builder.CreateTrunc(Ops[0], Int16Ty);
13606 }
13607 case NEON::BI__builtin_neon_vaddlv_s16: {
13608 Int = Intrinsic::aarch64_neon_saddlv;
13609 Ty = Int32Ty;
13610 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13611 llvm::Type *Tys[2] = { Ty, VTy };
13612 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13613 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13614 }
13615 case NEON::BI__builtin_neon_vaddlvq_s8: {
13616 Int = Intrinsic::aarch64_neon_saddlv;
13617 Ty = Int32Ty;
13618 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13619 llvm::Type *Tys[2] = { Ty, VTy };
13620 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13621 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13622 return Builder.CreateTrunc(Ops[0], Int16Ty);
13623 }
13624 case NEON::BI__builtin_neon_vaddlvq_s16: {
13625 Int = Intrinsic::aarch64_neon_saddlv;
13626 Ty = Int32Ty;
13627 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13628 llvm::Type *Tys[2] = { Ty, VTy };
13629 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13630 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13631 }
13632 case NEON::BI__builtin_neon_vsri_n_v:
13633 case NEON::BI__builtin_neon_vsriq_n_v: {
13634 Int = Intrinsic::aarch64_neon_vsri;
13635 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13636 return EmitNeonCall(Intrin, Ops, "vsri_n");
13637 }
13638 case NEON::BI__builtin_neon_vsli_n_v:
13639 case NEON::BI__builtin_neon_vsliq_n_v: {
13640 Int = Intrinsic::aarch64_neon_vsli;
13641 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13642 return EmitNeonCall(Intrin, Ops, "vsli_n");
13643 }
13644 case NEON::BI__builtin_neon_vsra_n_v:
13645 case NEON::BI__builtin_neon_vsraq_n_v:
13646 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13647 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13648 return Builder.CreateAdd(Ops[0], Ops[1]);
13649 case NEON::BI__builtin_neon_vrsra_n_v:
13650 case NEON::BI__builtin_neon_vrsraq_n_v: {
13651 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13653 TmpOps.push_back(Ops[1]);
13654 TmpOps.push_back(Ops[2]);
13655 Function* F = CGM.getIntrinsic(Int, Ty);
13656 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13657 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13658 return Builder.CreateAdd(Ops[0], tmp);
13659 }
13660 case NEON::BI__builtin_neon_vld1_v:
13661 case NEON::BI__builtin_neon_vld1q_v: {
13662 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13663 }
13664 case NEON::BI__builtin_neon_vst1_v:
13665 case NEON::BI__builtin_neon_vst1q_v:
13666 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13667 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13668 case NEON::BI__builtin_neon_vld1_lane_v:
13669 case NEON::BI__builtin_neon_vld1q_lane_v: {
13670 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13671 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13672 PtrOp0.getAlignment());
13673 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13674 }
13675 case NEON::BI__builtin_neon_vldap1_lane_s64:
13676 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13677 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13678 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13679 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13680 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13681 Ops[0] = LI;
13682 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13683 }
13684 case NEON::BI__builtin_neon_vld1_dup_v:
13685 case NEON::BI__builtin_neon_vld1q_dup_v: {
13686 Value *V = PoisonValue::get(Ty);
13687 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13688 PtrOp0.getAlignment());
13689 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13690 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13691 return EmitNeonSplat(Ops[0], CI);
13692 }
13693 case NEON::BI__builtin_neon_vst1_lane_v:
13694 case NEON::BI__builtin_neon_vst1q_lane_v:
13695 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13696 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13697 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13698 case NEON::BI__builtin_neon_vstl1_lane_s64:
13699 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13700 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13701 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13702 llvm::StoreInst *SI =
13703 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13704 SI->setAtomic(llvm::AtomicOrdering::Release);
13705 return SI;
13706 }
13707 case NEON::BI__builtin_neon_vld2_v:
13708 case NEON::BI__builtin_neon_vld2q_v: {
13709 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13710 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13711 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13712 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13713 }
13714 case NEON::BI__builtin_neon_vld3_v:
13715 case NEON::BI__builtin_neon_vld3q_v: {
13716 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13717 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13718 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13719 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13720 }
13721 case NEON::BI__builtin_neon_vld4_v:
13722 case NEON::BI__builtin_neon_vld4q_v: {
13723 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13724 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13725 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13726 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13727 }
13728 case NEON::BI__builtin_neon_vld2_dup_v:
13729 case NEON::BI__builtin_neon_vld2q_dup_v: {
13730 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13731 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13732 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13733 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13734 }
13735 case NEON::BI__builtin_neon_vld3_dup_v:
13736 case NEON::BI__builtin_neon_vld3q_dup_v: {
13737 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13738 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13739 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13740 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13741 }
13742 case NEON::BI__builtin_neon_vld4_dup_v:
13743 case NEON::BI__builtin_neon_vld4q_dup_v: {
13744 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13745 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13746 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13747 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13748 }
13749 case NEON::BI__builtin_neon_vld2_lane_v:
13750 case NEON::BI__builtin_neon_vld2q_lane_v: {
13751 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13752 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13753 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13754 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13755 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13756 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13757 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13758 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13759 }
13760 case NEON::BI__builtin_neon_vld3_lane_v:
13761 case NEON::BI__builtin_neon_vld3q_lane_v: {
13762 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13763 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13764 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13765 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13766 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13767 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13768 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13769 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13770 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13771 }
13772 case NEON::BI__builtin_neon_vld4_lane_v:
13773 case NEON::BI__builtin_neon_vld4q_lane_v: {
13774 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13775 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13776 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13777 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13778 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13779 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13780 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13781 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13782 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13783 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13784 }
13785 case NEON::BI__builtin_neon_vst2_v:
13786 case NEON::BI__builtin_neon_vst2q_v: {
13787 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13788 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13789 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13790 Ops, "");
13791 }
13792 case NEON::BI__builtin_neon_vst2_lane_v:
13793 case NEON::BI__builtin_neon_vst2q_lane_v: {
13794 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13795 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13796 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13797 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13798 Ops, "");
13799 }
13800 case NEON::BI__builtin_neon_vst3_v:
13801 case NEON::BI__builtin_neon_vst3q_v: {
13802 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13803 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13804 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13805 Ops, "");
13806 }
13807 case NEON::BI__builtin_neon_vst3_lane_v:
13808 case NEON::BI__builtin_neon_vst3q_lane_v: {
13809 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13810 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13811 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13812 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13813 Ops, "");
13814 }
13815 case NEON::BI__builtin_neon_vst4_v:
13816 case NEON::BI__builtin_neon_vst4q_v: {
13817 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13818 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13819 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13820 Ops, "");
13821 }
13822 case NEON::BI__builtin_neon_vst4_lane_v:
13823 case NEON::BI__builtin_neon_vst4q_lane_v: {
13824 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13825 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13826 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13827 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13828 Ops, "");
13829 }
13830 case NEON::BI__builtin_neon_vtrn_v:
13831 case NEON::BI__builtin_neon_vtrnq_v: {
13832 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13833 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13834 Value *SV = nullptr;
13835
13836 for (unsigned vi = 0; vi != 2; ++vi) {
13837 SmallVector<int, 16> Indices;
13838 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13839 Indices.push_back(i+vi);
13840 Indices.push_back(i+e+vi);
13841 }
13842 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13843 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13844 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13845 }
13846 return SV;
13847 }
13848 case NEON::BI__builtin_neon_vuzp_v:
13849 case NEON::BI__builtin_neon_vuzpq_v: {
13850 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13851 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13852 Value *SV = nullptr;
13853
13854 for (unsigned vi = 0; vi != 2; ++vi) {
13855 SmallVector<int, 16> Indices;
13856 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13857 Indices.push_back(2*i+vi);
13858
13859 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13860 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13861 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13862 }
13863 return SV;
13864 }
13865 case NEON::BI__builtin_neon_vzip_v:
13866 case NEON::BI__builtin_neon_vzipq_v: {
13867 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13868 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13869 Value *SV = nullptr;
13870
13871 for (unsigned vi = 0; vi != 2; ++vi) {
13872 SmallVector<int, 16> Indices;
13873 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13874 Indices.push_back((i + vi*e) >> 1);
13875 Indices.push_back(((i + vi*e) >> 1)+e);
13876 }
13877 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13878 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13879 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13880 }
13881 return SV;
13882 }
13883 case NEON::BI__builtin_neon_vqtbl1q_v: {
13884 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13885 Ops, "vtbl1");
13886 }
13887 case NEON::BI__builtin_neon_vqtbl2q_v: {
13888 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13889 Ops, "vtbl2");
13890 }
13891 case NEON::BI__builtin_neon_vqtbl3q_v: {
13892 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13893 Ops, "vtbl3");
13894 }
13895 case NEON::BI__builtin_neon_vqtbl4q_v: {
13896 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13897 Ops, "vtbl4");
13898 }
13899 case NEON::BI__builtin_neon_vqtbx1q_v: {
13900 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13901 Ops, "vtbx1");
13902 }
13903 case NEON::BI__builtin_neon_vqtbx2q_v: {
13904 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13905 Ops, "vtbx2");
13906 }
13907 case NEON::BI__builtin_neon_vqtbx3q_v: {
13908 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13909 Ops, "vtbx3");
13910 }
13911 case NEON::BI__builtin_neon_vqtbx4q_v: {
13912 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13913 Ops, "vtbx4");
13914 }
13915 case NEON::BI__builtin_neon_vsqadd_v:
13916 case NEON::BI__builtin_neon_vsqaddq_v: {
13917 Int = Intrinsic::aarch64_neon_usqadd;
13918 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13919 }
13920 case NEON::BI__builtin_neon_vuqadd_v:
13921 case NEON::BI__builtin_neon_vuqaddq_v: {
13922 Int = Intrinsic::aarch64_neon_suqadd;
13923 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13924 }
13925
13926 case NEON::BI__builtin_neon_vluti2_laneq_bf16:
13927 case NEON::BI__builtin_neon_vluti2_laneq_f16:
13928 case NEON::BI__builtin_neon_vluti2_laneq_p16:
13929 case NEON::BI__builtin_neon_vluti2_laneq_p8:
13930 case NEON::BI__builtin_neon_vluti2_laneq_s16:
13931 case NEON::BI__builtin_neon_vluti2_laneq_s8:
13932 case NEON::BI__builtin_neon_vluti2_laneq_u16:
13933 case NEON::BI__builtin_neon_vluti2_laneq_u8: {
13934 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13935 llvm::Type *Tys[2];
13936 Tys[0] = Ty;
13937 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13938 /*isQuad*/ false));
13939 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13940 }
13941 case NEON::BI__builtin_neon_vluti2q_laneq_bf16:
13942 case NEON::BI__builtin_neon_vluti2q_laneq_f16:
13943 case NEON::BI__builtin_neon_vluti2q_laneq_p16:
13944 case NEON::BI__builtin_neon_vluti2q_laneq_p8:
13945 case NEON::BI__builtin_neon_vluti2q_laneq_s16:
13946 case NEON::BI__builtin_neon_vluti2q_laneq_s8:
13947 case NEON::BI__builtin_neon_vluti2q_laneq_u16:
13948 case NEON::BI__builtin_neon_vluti2q_laneq_u8: {
13949 Int = Intrinsic::aarch64_neon_vluti2_laneq;
13950 llvm::Type *Tys[2];
13951 Tys[0] = Ty;
13952 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13953 /*isQuad*/ true));
13954 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq");
13955 }
13956 case NEON::BI__builtin_neon_vluti2_lane_bf16:
13957 case NEON::BI__builtin_neon_vluti2_lane_f16:
13958 case NEON::BI__builtin_neon_vluti2_lane_p16:
13959 case NEON::BI__builtin_neon_vluti2_lane_p8:
13960 case NEON::BI__builtin_neon_vluti2_lane_s16:
13961 case NEON::BI__builtin_neon_vluti2_lane_s8:
13962 case NEON::BI__builtin_neon_vluti2_lane_u16:
13963 case NEON::BI__builtin_neon_vluti2_lane_u8: {
13964 Int = Intrinsic::aarch64_neon_vluti2_lane;
13965 llvm::Type *Tys[2];
13966 Tys[0] = Ty;
13967 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13968 /*isQuad*/ false));
13969 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13970 }
13971 case NEON::BI__builtin_neon_vluti2q_lane_bf16:
13972 case NEON::BI__builtin_neon_vluti2q_lane_f16:
13973 case NEON::BI__builtin_neon_vluti2q_lane_p16:
13974 case NEON::BI__builtin_neon_vluti2q_lane_p8:
13975 case NEON::BI__builtin_neon_vluti2q_lane_s16:
13976 case NEON::BI__builtin_neon_vluti2q_lane_s8:
13977 case NEON::BI__builtin_neon_vluti2q_lane_u16:
13978 case NEON::BI__builtin_neon_vluti2q_lane_u8: {
13979 Int = Intrinsic::aarch64_neon_vluti2_lane;
13980 llvm::Type *Tys[2];
13981 Tys[0] = Ty;
13982 Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
13983 /*isQuad*/ true));
13984 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane");
13985 }
13986 case NEON::BI__builtin_neon_vluti4q_lane_p8:
13987 case NEON::BI__builtin_neon_vluti4q_lane_s8:
13988 case NEON::BI__builtin_neon_vluti4q_lane_u8: {
13989 Int = Intrinsic::aarch64_neon_vluti4q_lane;
13990 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane");
13991 }
13992 case NEON::BI__builtin_neon_vluti4q_laneq_p8:
13993 case NEON::BI__builtin_neon_vluti4q_laneq_s8:
13994 case NEON::BI__builtin_neon_vluti4q_laneq_u8: {
13995 Int = Intrinsic::aarch64_neon_vluti4q_laneq;
13996 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq");
13997 }
13998 case NEON::BI__builtin_neon_vluti4q_lane_bf16_x2:
13999 case NEON::BI__builtin_neon_vluti4q_lane_f16_x2:
14000 case NEON::BI__builtin_neon_vluti4q_lane_p16_x2:
14001 case NEON::BI__builtin_neon_vluti4q_lane_s16_x2:
14002 case NEON::BI__builtin_neon_vluti4q_lane_u16_x2: {
14003 Int = Intrinsic::aarch64_neon_vluti4q_lane_x2;
14004 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane_x2");
14005 }
14006 case NEON::BI__builtin_neon_vluti4q_laneq_bf16_x2:
14007 case NEON::BI__builtin_neon_vluti4q_laneq_f16_x2:
14008 case NEON::BI__builtin_neon_vluti4q_laneq_p16_x2:
14009 case NEON::BI__builtin_neon_vluti4q_laneq_s16_x2:
14010 case NEON::BI__builtin_neon_vluti4q_laneq_u16_x2: {
14011 Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
14012 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
14013 }
14014
14015 case NEON::BI__builtin_neon_vamin_f16:
14016 case NEON::BI__builtin_neon_vaminq_f16:
14017 case NEON::BI__builtin_neon_vamin_f32:
14018 case NEON::BI__builtin_neon_vaminq_f32:
14019 case NEON::BI__builtin_neon_vaminq_f64: {
14020 Int = Intrinsic::aarch64_neon_famin;
14021 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
14022 }
14023 case NEON::BI__builtin_neon_vamax_f16:
14024 case NEON::BI__builtin_neon_vamaxq_f16:
14025 case NEON::BI__builtin_neon_vamax_f32:
14026 case NEON::BI__builtin_neon_vamaxq_f32:
14027 case NEON::BI__builtin_neon_vamaxq_f64: {
14028 Int = Intrinsic::aarch64_neon_famax;
14029 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
14030 }
14031 case NEON::BI__builtin_neon_vscale_f16:
14032 case NEON::BI__builtin_neon_vscaleq_f16:
14033 case NEON::BI__builtin_neon_vscale_f32:
14034 case NEON::BI__builtin_neon_vscaleq_f32:
14035 case NEON::BI__builtin_neon_vscaleq_f64: {
14036 Int = Intrinsic::aarch64_neon_fp8_fscale;
14037 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fscale");
14038 }
14039 }
14040}
14041
14042Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
14043 const CallExpr *E) {
14044 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
14045 BuiltinID == BPF::BI__builtin_btf_type_id ||
14046 BuiltinID == BPF::BI__builtin_preserve_type_info ||
14047 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
14048 "unexpected BPF builtin");
14049
14050 // A sequence number, injected into IR builtin functions, to
14051 // prevent CSE given the only difference of the function
14052 // may just be the debuginfo metadata.
14053 static uint32_t BuiltinSeqNum;
14054
14055 switch (BuiltinID) {
14056 default:
14057 llvm_unreachable("Unexpected BPF builtin");
14058 case BPF::BI__builtin_preserve_field_info: {
14059 const Expr *Arg = E->getArg(0);
14060 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
14061
14062 if (!getDebugInfo()) {
14063 CGM.Error(E->getExprLoc(),
14064 "using __builtin_preserve_field_info() without -g");
14065 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14066 : EmitLValue(Arg).emitRawPointer(*this);
14067 }
14068
14069 // Enable underlying preserve_*_access_index() generation.
14070 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
14071 IsInPreservedAIRegion = true;
14072 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
14073 : EmitLValue(Arg).emitRawPointer(*this);
14074 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
14075
14076 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14077 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
14078
14079 // Built the IR for the preserve_field_info intrinsic.
14080 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getOrInsertDeclaration(
14081 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
14082 {FieldAddr->getType()});
14083 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
14084 }
14085 case BPF::BI__builtin_btf_type_id:
14086 case BPF::BI__builtin_preserve_type_info: {
14087 if (!getDebugInfo()) {
14088 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14089 return nullptr;
14090 }
14091
14092 const Expr *Arg0 = E->getArg(0);
14093 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14094 Arg0->getType(), Arg0->getExprLoc());
14095
14096 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14097 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14098 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14099
14100 llvm::Function *FnDecl;
14101 if (BuiltinID == BPF::BI__builtin_btf_type_id)
14102 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14103 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
14104 else
14105 FnDecl = llvm::Intrinsic::getOrInsertDeclaration(
14106 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
14107 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
14108 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14109 return Fn;
14110 }
14111 case BPF::BI__builtin_preserve_enum_value: {
14112 if (!getDebugInfo()) {
14113 CGM.Error(E->getExprLoc(), "using builtin function without -g");
14114 return nullptr;
14115 }
14116
14117 const Expr *Arg0 = E->getArg(0);
14118 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
14119 Arg0->getType(), Arg0->getExprLoc());
14120
14121 // Find enumerator
14122 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
14123 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
14124 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
14125 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
14126
14127 auto InitVal = Enumerator->getInitVal();
14128 std::string InitValStr;
14129 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
14130 InitValStr = std::to_string(InitVal.getSExtValue());
14131 else
14132 InitValStr = std::to_string(InitVal.getZExtValue());
14133 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
14134 Value *EnumStrVal = Builder.CreateGlobalString(EnumStr);
14135
14136 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
14137 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
14138 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
14139
14140 llvm::Function *IntrinsicFn = llvm::Intrinsic::getOrInsertDeclaration(
14141 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
14142 CallInst *Fn =
14143 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
14144 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
14145 return Fn;
14146 }
14147 }
14148}
14149
14150llvm::Value *CodeGenFunction::
14152 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
14153 "Not a power-of-two sized vector!");
14154 bool AllConstants = true;
14155 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
14156 AllConstants &= isa<Constant>(Ops[i]);
14157
14158 // If this is a constant vector, create a ConstantVector.
14159 if (AllConstants) {
14161 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14162 CstOps.push_back(cast<Constant>(Ops[i]));
14163 return llvm::ConstantVector::get(CstOps);
14164 }
14165
14166 // Otherwise, insertelement the values to build the vector.
14167 Value *Result = llvm::PoisonValue::get(
14168 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
14169
14170 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
14171 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
14172
14173 return Result;
14174}
14175
14176// Convert the mask from an integer type to a vector of i1.
14178 unsigned NumElts) {
14179
14180 auto *MaskTy = llvm::FixedVectorType::get(
14181 CGF.Builder.getInt1Ty(),
14182 cast<IntegerType>(Mask->getType())->getBitWidth());
14183 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
14184
14185 // If we have less than 8 elements, then the starting mask was an i8 and
14186 // we need to extract down to the right number of elements.
14187 if (NumElts < 8) {
14188 int Indices[4];
14189 for (unsigned i = 0; i != NumElts; ++i)
14190 Indices[i] = i;
14191 MaskVec = CGF.Builder.CreateShuffleVector(
14192 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
14193 }
14194 return MaskVec;
14195}
14196
14198 Align Alignment) {
14199 Value *Ptr = Ops[0];
14200
14201 Value *MaskVec = getMaskVecValue(
14202 CGF, Ops[2],
14203 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
14204
14205 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
14206}
14207
14209 Align Alignment) {
14210 llvm::Type *Ty = Ops[1]->getType();
14211 Value *Ptr = Ops[0];
14212
14213 Value *MaskVec = getMaskVecValue(
14214 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
14215
14216 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
14217}
14218
14220 ArrayRef<Value *> Ops) {
14221 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
14222 Value *Ptr = Ops[0];
14223
14224 Value *MaskVec = getMaskVecValue(
14225 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
14226
14227 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
14228 ResultTy);
14229 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
14230}
14231
14234 bool IsCompress) {
14235 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14236
14237 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14238
14239 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
14240 : Intrinsic::x86_avx512_mask_expand;
14241 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
14242 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
14243}
14244
14246 ArrayRef<Value *> Ops) {
14247 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
14248 Value *Ptr = Ops[0];
14249
14250 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
14251
14252 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
14253 ResultTy);
14254 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
14255}
14256
14257static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
14259 bool InvertLHS = false) {
14260 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
14261 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
14262 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
14263
14264 if (InvertLHS)
14265 LHS = CGF.Builder.CreateNot(LHS);
14266
14267 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
14268 Ops[0]->getType());
14269}
14270
14272 Value *Amt, bool IsRight) {
14273 llvm::Type *Ty = Op0->getType();
14274
14275 // Amount may be scalar immediate, in which case create a splat vector.
14276 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
14277 // we only care about the lowest log2 bits anyway.
14278 if (Amt->getType() != Ty) {
14279 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
14280 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
14281 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
14282 }
14283
14284 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
14285 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
14286 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
14287}
14288
14290 bool IsSigned) {
14291 Value *Op0 = Ops[0];
14292 Value *Op1 = Ops[1];
14293 llvm::Type *Ty = Op0->getType();
14294 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
14295
14296 CmpInst::Predicate Pred;
14297 switch (Imm) {
14298 case 0x0:
14299 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
14300 break;
14301 case 0x1:
14302 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
14303 break;
14304 case 0x2:
14305 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
14306 break;
14307 case 0x3:
14308 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
14309 break;
14310 case 0x4:
14311 Pred = ICmpInst::ICMP_EQ;
14312 break;
14313 case 0x5:
14314 Pred = ICmpInst::ICMP_NE;
14315 break;
14316 case 0x6:
14317 return llvm::Constant::getNullValue(Ty); // FALSE
14318 case 0x7:
14319 return llvm::Constant::getAllOnesValue(Ty); // TRUE
14320 default:
14321 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
14322 }
14323
14324 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
14325 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
14326 return Res;
14327}
14328
14330 Value *Mask, Value *Op0, Value *Op1) {
14331
14332 // If the mask is all ones just return first argument.
14333 if (const auto *C = dyn_cast<Constant>(Mask))
14334 if (C->isAllOnesValue())
14335 return Op0;
14336
14337 Mask = getMaskVecValue(
14338 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
14339
14340 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14341}
14342
14344 Value *Mask, Value *Op0, Value *Op1) {
14345 // If the mask is all ones just return first argument.
14346 if (const auto *C = dyn_cast<Constant>(Mask))
14347 if (C->isAllOnesValue())
14348 return Op0;
14349
14350 auto *MaskTy = llvm::FixedVectorType::get(
14351 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
14352 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
14353 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
14354 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
14355}
14356
14358 unsigned NumElts, Value *MaskIn) {
14359 if (MaskIn) {
14360 const auto *C = dyn_cast<Constant>(MaskIn);
14361 if (!C || !C->isAllOnesValue())
14362 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
14363 }
14364
14365 if (NumElts < 8) {
14366 int Indices[8];
14367 for (unsigned i = 0; i != NumElts; ++i)
14368 Indices[i] = i;
14369 for (unsigned i = NumElts; i != 8; ++i)
14370 Indices[i] = i % NumElts + NumElts;
14371 Cmp = CGF.Builder.CreateShuffleVector(
14372 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
14373 }
14374
14375 return CGF.Builder.CreateBitCast(Cmp,
14376 IntegerType::get(CGF.getLLVMContext(),
14377 std::max(NumElts, 8U)));
14378}
14379
14381 bool Signed, ArrayRef<Value *> Ops) {
14382 assert((Ops.size() == 2 || Ops.size() == 4) &&
14383 "Unexpected number of arguments");
14384 unsigned NumElts =
14385 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14386 Value *Cmp;
14387
14388 if (CC == 3) {
14389 Cmp = Constant::getNullValue(
14390 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14391 } else if (CC == 7) {
14392 Cmp = Constant::getAllOnesValue(
14393 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
14394 } else {
14395 ICmpInst::Predicate Pred;
14396 switch (CC) {
14397 default: llvm_unreachable("Unknown condition code");
14398 case 0: Pred = ICmpInst::ICMP_EQ; break;
14399 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
14400 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
14401 case 4: Pred = ICmpInst::ICMP_NE; break;
14402 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
14403 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
14404 }
14405 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
14406 }
14407
14408 Value *MaskIn = nullptr;
14409 if (Ops.size() == 4)
14410 MaskIn = Ops[3];
14411
14412 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
14413}
14414
14416 Value *Zero = Constant::getNullValue(In->getType());
14417 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
14418}
14419
14421 ArrayRef<Value *> Ops, bool IsSigned) {
14422 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
14423 llvm::Type *Ty = Ops[1]->getType();
14424
14425 Value *Res;
14426 if (Rnd != 4) {
14427 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
14428 : Intrinsic::x86_avx512_uitofp_round;
14429 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
14430 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
14431 } else {
14432 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14433 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
14434 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
14435 }
14436
14437 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14438}
14439
14440// Lowers X86 FMA intrinsics to IR.
14442 ArrayRef<Value *> Ops, unsigned BuiltinID,
14443 bool IsAddSub) {
14444
14445 bool Subtract = false;
14446 Intrinsic::ID IID = Intrinsic::not_intrinsic;
14447 switch (BuiltinID) {
14448 default: break;
14449 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14450 Subtract = true;
14451 [[fallthrough]];
14452 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14453 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14454 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14455 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
14456 break;
14457 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14458 Subtract = true;
14459 [[fallthrough]];
14460 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14461 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14462 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14463 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
14464 break;
14465 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14466 Subtract = true;
14467 [[fallthrough]];
14468 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14469 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14470 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14471 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
14472 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14473 Subtract = true;
14474 [[fallthrough]];
14475 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14476 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14477 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14478 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
14479 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14480 Subtract = true;
14481 [[fallthrough]];
14482 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14483 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14484 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14485 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
14486 break;
14487 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14488 Subtract = true;
14489 [[fallthrough]];
14490 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14491 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14492 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14493 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
14494 break;
14495 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14496 Subtract = true;
14497 LLVM_FALLTHROUGH;
14498 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14499 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14500 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14501 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
14502 break;
14503 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14504 Subtract = true;
14505 LLVM_FALLTHROUGH;
14506 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14507 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14508 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14509 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
14510 break;
14511 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14512 Subtract = true;
14513 LLVM_FALLTHROUGH;
14514 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14515 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14516 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14517 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
14518 break;
14519 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14520 Subtract = true;
14521 LLVM_FALLTHROUGH;
14522 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14523 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14524 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14525 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
14526 break;
14527 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14528 Subtract = true;
14529 LLVM_FALLTHROUGH;
14530 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14531 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14532 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14533 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
14534 break;
14535 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14536 Subtract = true;
14537 LLVM_FALLTHROUGH;
14538 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14539 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14540 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14541 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
14542 break;
14543 }
14544
14545 Value *A = Ops[0];
14546 Value *B = Ops[1];
14547 Value *C = Ops[2];
14548
14549 if (Subtract)
14550 C = CGF.Builder.CreateFNeg(C);
14551
14552 Value *Res;
14553
14554 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14555 if (IID != Intrinsic::not_intrinsic &&
14556 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14557 IsAddSub)) {
14558 Function *Intr = CGF.CGM.getIntrinsic(IID);
14559 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14560 } else {
14561 llvm::Type *Ty = A->getType();
14562 Function *FMA;
14563 if (CGF.Builder.getIsFPConstrained()) {
14564 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14565 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14566 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14567 } else {
14568 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14569 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14570 }
14571 }
14572
14573 // Handle any required masking.
14574 Value *MaskFalseVal = nullptr;
14575 switch (BuiltinID) {
14576 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14577 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14578 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14579 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14580 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14581 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14582 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14583 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14584 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14585 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14586 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14587 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14588 MaskFalseVal = Ops[0];
14589 break;
14590 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14591 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14592 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14593 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14594 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14595 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14596 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14597 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14598 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14599 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14600 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14601 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14602 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14603 break;
14604 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14605 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14606 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14607 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14608 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14609 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14610 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14611 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14612 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14613 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14614 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14615 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14616 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14617 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14618 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14619 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14620 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14621 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14622 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14623 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14624 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14625 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14626 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14627 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14628 MaskFalseVal = Ops[2];
14629 break;
14630 }
14631
14632 if (MaskFalseVal)
14633 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14634
14635 return Res;
14636}
14637
14639 MutableArrayRef<Value *> Ops, Value *Upper,
14640 bool ZeroMask = false, unsigned PTIdx = 0,
14641 bool NegAcc = false) {
14642 unsigned Rnd = 4;
14643 if (Ops.size() > 4)
14644 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14645
14646 if (NegAcc)
14647 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14648
14649 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14650 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14651 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14652 Value *Res;
14653 if (Rnd != 4) {
14654 Intrinsic::ID IID;
14655
14656 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14657 case 16:
14658 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14659 break;
14660 case 32:
14661 IID = Intrinsic::x86_avx512_vfmadd_f32;
14662 break;
14663 case 64:
14664 IID = Intrinsic::x86_avx512_vfmadd_f64;
14665 break;
14666 default:
14667 llvm_unreachable("Unexpected size");
14668 }
14669 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14670 {Ops[0], Ops[1], Ops[2], Ops[4]});
14671 } else if (CGF.Builder.getIsFPConstrained()) {
14672 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14673 Function *FMA = CGF.CGM.getIntrinsic(
14674 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14675 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14676 } else {
14677 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14678 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14679 }
14680 // If we have more than 3 arguments, we need to do masking.
14681 if (Ops.size() > 3) {
14682 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14683 : Ops[PTIdx];
14684
14685 // If we negated the accumulator and the its the PassThru value we need to
14686 // bypass the negate. Conveniently Upper should be the same thing in this
14687 // case.
14688 if (NegAcc && PTIdx == 2)
14689 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14690
14691 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14692 }
14693 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14694}
14695
14696static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14697 ArrayRef<Value *> Ops) {
14698 llvm::Type *Ty = Ops[0]->getType();
14699 // Arguments have a vXi32 type so cast to vXi64.
14700 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14701 Ty->getPrimitiveSizeInBits() / 64);
14702 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14703 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14704
14705 if (IsSigned) {
14706 // Shift left then arithmetic shift right.
14707 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14708 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14709 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14710 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14711 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14712 } else {
14713 // Clear the upper bits.
14714 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14715 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14716 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14717 }
14718
14719 return CGF.Builder.CreateMul(LHS, RHS);
14720}
14721
14722// Emit a masked pternlog intrinsic. This only exists because the header has to
14723// use a macro and we aren't able to pass the input argument to a pternlog
14724// builtin and a select builtin without evaluating it twice.
14725static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14726 ArrayRef<Value *> Ops) {
14727 llvm::Type *Ty = Ops[0]->getType();
14728
14729 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14730 unsigned EltWidth = Ty->getScalarSizeInBits();
14731 Intrinsic::ID IID;
14732 if (VecWidth == 128 && EltWidth == 32)
14733 IID = Intrinsic::x86_avx512_pternlog_d_128;
14734 else if (VecWidth == 256 && EltWidth == 32)
14735 IID = Intrinsic::x86_avx512_pternlog_d_256;
14736 else if (VecWidth == 512 && EltWidth == 32)
14737 IID = Intrinsic::x86_avx512_pternlog_d_512;
14738 else if (VecWidth == 128 && EltWidth == 64)
14739 IID = Intrinsic::x86_avx512_pternlog_q_128;
14740 else if (VecWidth == 256 && EltWidth == 64)
14741 IID = Intrinsic::x86_avx512_pternlog_q_256;
14742 else if (VecWidth == 512 && EltWidth == 64)
14743 IID = Intrinsic::x86_avx512_pternlog_q_512;
14744 else
14745 llvm_unreachable("Unexpected intrinsic");
14746
14747 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14748 Ops.drop_back());
14749 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14750 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14751}
14752
14754 llvm::Type *DstTy) {
14755 unsigned NumberOfElements =
14756 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14757 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14758 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14759}
14760
14761Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14762 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14763 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14764 return EmitX86CpuIs(CPUStr);
14765}
14766
14767// Convert F16 halfs to floats.
14770 llvm::Type *DstTy) {
14771 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14772 "Unknown cvtph2ps intrinsic");
14773
14774 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14775 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14776 Function *F =
14777 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14778 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14779 }
14780
14781 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14782 Value *Src = Ops[0];
14783
14784 // Extract the subvector.
14785 if (NumDstElts !=
14786 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14787 assert(NumDstElts == 4 && "Unexpected vector size");
14788 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14789 }
14790
14791 // Bitcast from vXi16 to vXf16.
14792 auto *HalfTy = llvm::FixedVectorType::get(
14793 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14794 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14795
14796 // Perform the fp-extension.
14797 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14798
14799 if (Ops.size() >= 3)
14800 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14801 return Res;
14802}
14803
14804Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14805
14806 llvm::Type *Int32Ty = Builder.getInt32Ty();
14807
14808 // Matching the struct layout from the compiler-rt/libgcc structure that is
14809 // filled in:
14810 // unsigned int __cpu_vendor;
14811 // unsigned int __cpu_type;
14812 // unsigned int __cpu_subtype;
14813 // unsigned int __cpu_features[1];
14814 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14815 llvm::ArrayType::get(Int32Ty, 1));
14816
14817 // Grab the global __cpu_model.
14818 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14819 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14820
14821 // Calculate the index needed to access the correct field based on the
14822 // range. Also adjust the expected value.
14823 unsigned Index;
14824 unsigned Value;
14825 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14826#define X86_VENDOR(ENUM, STRING) \
14827 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14828#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14829 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14830#define X86_CPU_TYPE(ENUM, STR) \
14831 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14832#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14833 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14834#define X86_CPU_SUBTYPE(ENUM, STR) \
14835 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14836#include "llvm/TargetParser/X86TargetParser.def"
14837 .Default({0, 0});
14838 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14839
14840 // Grab the appropriate field from __cpu_model.
14841 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14842 ConstantInt::get(Int32Ty, Index)};
14843 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14844 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14846
14847 // Check the value of the field against the requested value.
14848 return Builder.CreateICmpEQ(CpuValue,
14849 llvm::ConstantInt::get(Int32Ty, Value));
14850}
14851
14852Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14853 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14854 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14855 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14856 return Builder.getFalse();
14857 return EmitX86CpuSupports(FeatureStr);
14858}
14859
14860Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14861 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14862}
14863
14864llvm::Value *
14865CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14866 Value *Result = Builder.getTrue();
14867 if (FeatureMask[0] != 0) {
14868 // Matching the struct layout from the compiler-rt/libgcc structure that is
14869 // filled in:
14870 // unsigned int __cpu_vendor;
14871 // unsigned int __cpu_type;
14872 // unsigned int __cpu_subtype;
14873 // unsigned int __cpu_features[1];
14874 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14875 llvm::ArrayType::get(Int32Ty, 1));
14876
14877 // Grab the global __cpu_model.
14878 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14879 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14880
14881 // Grab the first (0th) element from the field __cpu_features off of the
14882 // global in the struct STy.
14883 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14884 Builder.getInt32(0)};
14885 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14886 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14888
14889 // Check the value of the bit corresponding to the feature requested.
14890 Value *Mask = Builder.getInt32(FeatureMask[0]);
14891 Value *Bitset = Builder.CreateAnd(Features, Mask);
14892 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14893 Result = Builder.CreateAnd(Result, Cmp);
14894 }
14895
14896 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14897 llvm::Constant *CpuFeatures2 =
14898 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14899 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14900 for (int i = 1; i != 4; ++i) {
14901 const uint32_t M = FeatureMask[i];
14902 if (!M)
14903 continue;
14904 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14905 Value *Features = Builder.CreateAlignedLoad(
14906 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14908 // Check the value of the bit corresponding to the feature requested.
14909 Value *Mask = Builder.getInt32(M);
14910 Value *Bitset = Builder.CreateAnd(Features, Mask);
14911 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14912 Result = Builder.CreateAnd(Result, Cmp);
14913 }
14914
14915 return Result;
14916}
14917
14918Value *CodeGenFunction::EmitAArch64CpuInit() {
14919 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14920 llvm::FunctionCallee Func =
14921 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14922 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14923 cast<llvm::GlobalValue>(Func.getCallee())
14924 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14925 return Builder.CreateCall(Func);
14926}
14927
14929 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14930 llvm::FunctionCallee Func =
14931 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14932 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14933 CalleeGV->setDSOLocal(true);
14934 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14935 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14936}
14937
14938Value *CodeGenFunction::EmitX86CpuInit() {
14939 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14940 /*Variadic*/ false);
14941 llvm::FunctionCallee Func =
14942 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14943 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14944 cast<llvm::GlobalValue>(Func.getCallee())
14945 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14946 return Builder.CreateCall(Func);
14947}
14948
14949Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14950 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14951 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14953 ArgStr.split(Features, "+");
14954 for (auto &Feature : Features) {
14955 Feature = Feature.trim();
14956 if (!llvm::AArch64::parseFMVExtension(Feature))
14957 return Builder.getFalse();
14958 if (Feature != "default")
14959 Features.push_back(Feature);
14960 }
14961 return EmitAArch64CpuSupports(Features);
14962}
14963
14964llvm::Value *
14965CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14966 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14967 Value *Result = Builder.getTrue();
14968 if (FeaturesMask != 0) {
14969 // Get features from structure in runtime library
14970 // struct {
14971 // unsigned long long features;
14972 // } __aarch64_cpu_features;
14973 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14974 llvm::Constant *AArch64CPUFeatures =
14975 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14976 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14977 llvm::Value *CpuFeatures = Builder.CreateGEP(
14978 STy, AArch64CPUFeatures,
14979 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14980 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14982 Value *Mask = Builder.getInt64(FeaturesMask);
14983 Value *Bitset = Builder.CreateAnd(Features, Mask);
14984 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14985 Result = Builder.CreateAnd(Result, Cmp);
14986 }
14987 return Result;
14988}
14989
14991
14992 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14993 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14994 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14995 return Builder.getFalse();
14996
14997 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
14998}
14999
15000static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
15001 CodeGenModule &CGM) {
15002 llvm::Type *Int32Ty = Builder.getInt32Ty();
15003 llvm::Type *Int64Ty = Builder.getInt64Ty();
15004 llvm::ArrayType *ArrayOfInt64Ty =
15005 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
15006 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
15007 llvm::Constant *RISCVFeaturesBits =
15008 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
15009 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
15010 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
15011 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
15012 IndexVal};
15013 Value *Ptr =
15014 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
15015 Value *FeaturesBit =
15016 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
15017 return FeaturesBit;
15018}
15019
15021 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
15022 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
15023
15024 for (auto Feat : FeaturesStrs) {
15025 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
15026
15027 // If there isn't BitPos for this feature, skip this version.
15028 // It also report the warning to user during compilation.
15029 if (BitPos == -1)
15030 return Builder.getFalse();
15031
15032 RequireBitMasks[GroupID] |= (1ULL << BitPos);
15033 }
15034
15035 Value *Result = nullptr;
15036 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
15037 if (RequireBitMasks[Idx] == 0)
15038 continue;
15039
15040 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
15041 Value *Bitset =
15042 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
15043 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
15044 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
15045 }
15046
15047 assert(Result && "Should have value here.");
15048
15049 return Result;
15050}
15051
15052Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
15053 const CallExpr *E) {
15054 if (BuiltinID == Builtin::BI__builtin_cpu_is)
15055 return EmitX86CpuIs(E);
15056 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
15057 return EmitX86CpuSupports(E);
15058 if (BuiltinID == Builtin::BI__builtin_cpu_init)
15059 return EmitX86CpuInit();
15060
15061 // Handle MSVC intrinsics before argument evaluation to prevent double
15062 // evaluation.
15063 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
15064 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
15065
15067 bool IsMaskFCmp = false;
15068 bool IsConjFMA = false;
15069
15070 // Find out if any arguments are required to be integer constant expressions.
15071 unsigned ICEArguments = 0;
15073 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
15074 assert(Error == ASTContext::GE_None && "Should not codegen an error");
15075
15076 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
15077 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
15078 }
15079
15080 // These exist so that the builtin that takes an immediate can be bounds
15081 // checked by clang to avoid passing bad immediates to the backend. Since
15082 // AVX has a larger immediate than SSE we would need separate builtins to
15083 // do the different bounds checking. Rather than create a clang specific
15084 // SSE only builtin, this implements eight separate builtins to match gcc
15085 // implementation.
15086 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
15087 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
15088 llvm::Function *F = CGM.getIntrinsic(ID);
15089 return Builder.CreateCall(F, Ops);
15090 };
15091
15092 // For the vector forms of FP comparisons, translate the builtins directly to
15093 // IR.
15094 // TODO: The builtins could be removed if the SSE header files used vector
15095 // extension comparisons directly (vector ordered/unordered may need
15096 // additional support via __builtin_isnan()).
15097 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
15098 bool IsSignaling) {
15099 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
15100 Value *Cmp;
15101 if (IsSignaling)
15102 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
15103 else
15104 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
15105 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
15106 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
15107 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
15108 return Builder.CreateBitCast(Sext, FPVecTy);
15109 };
15110
15111 switch (BuiltinID) {
15112 default: return nullptr;
15113 case X86::BI_mm_prefetch: {
15114 Value *Address = Ops[0];
15115 ConstantInt *C = cast<ConstantInt>(Ops[1]);
15116 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
15117 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
15118 Value *Data = ConstantInt::get(Int32Ty, 1);
15119 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
15120 return Builder.CreateCall(F, {Address, RW, Locality, Data});
15121 }
15122 case X86::BI_mm_clflush: {
15123 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
15124 Ops[0]);
15125 }
15126 case X86::BI_mm_lfence: {
15127 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
15128 }
15129 case X86::BI_mm_mfence: {
15130 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
15131 }
15132 case X86::BI_mm_sfence: {
15133 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
15134 }
15135 case X86::BI_mm_pause: {
15136 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
15137 }
15138 case X86::BI__rdtsc: {
15139 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
15140 }
15141 case X86::BI__builtin_ia32_rdtscp: {
15142 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
15143 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
15144 Ops[0]);
15145 return Builder.CreateExtractValue(Call, 0);
15146 }
15147 case X86::BI__builtin_ia32_lzcnt_u16:
15148 case X86::BI__builtin_ia32_lzcnt_u32:
15149 case X86::BI__builtin_ia32_lzcnt_u64: {
15150 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
15151 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15152 }
15153 case X86::BI__builtin_ia32_tzcnt_u16:
15154 case X86::BI__builtin_ia32_tzcnt_u32:
15155 case X86::BI__builtin_ia32_tzcnt_u64: {
15156 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
15157 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
15158 }
15159 case X86::BI__builtin_ia32_undef128:
15160 case X86::BI__builtin_ia32_undef256:
15161 case X86::BI__builtin_ia32_undef512:
15162 // The x86 definition of "undef" is not the same as the LLVM definition
15163 // (PR32176). We leave optimizing away an unnecessary zero constant to the
15164 // IR optimizer and backend.
15165 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
15166 // value, we should use that here instead of a zero.
15167 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15168 case X86::BI__builtin_ia32_vec_ext_v4hi:
15169 case X86::BI__builtin_ia32_vec_ext_v16qi:
15170 case X86::BI__builtin_ia32_vec_ext_v8hi:
15171 case X86::BI__builtin_ia32_vec_ext_v4si:
15172 case X86::BI__builtin_ia32_vec_ext_v4sf:
15173 case X86::BI__builtin_ia32_vec_ext_v2di:
15174 case X86::BI__builtin_ia32_vec_ext_v32qi:
15175 case X86::BI__builtin_ia32_vec_ext_v16hi:
15176 case X86::BI__builtin_ia32_vec_ext_v8si:
15177 case X86::BI__builtin_ia32_vec_ext_v4di: {
15178 unsigned NumElts =
15179 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15180 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15181 Index &= NumElts - 1;
15182 // These builtins exist so we can ensure the index is an ICE and in range.
15183 // Otherwise we could just do this in the header file.
15184 return Builder.CreateExtractElement(Ops[0], Index);
15185 }
15186 case X86::BI__builtin_ia32_vec_set_v4hi:
15187 case X86::BI__builtin_ia32_vec_set_v16qi:
15188 case X86::BI__builtin_ia32_vec_set_v8hi:
15189 case X86::BI__builtin_ia32_vec_set_v4si:
15190 case X86::BI__builtin_ia32_vec_set_v2di:
15191 case X86::BI__builtin_ia32_vec_set_v32qi:
15192 case X86::BI__builtin_ia32_vec_set_v16hi:
15193 case X86::BI__builtin_ia32_vec_set_v8si:
15194 case X86::BI__builtin_ia32_vec_set_v4di: {
15195 unsigned NumElts =
15196 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15197 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15198 Index &= NumElts - 1;
15199 // These builtins exist so we can ensure the index is an ICE and in range.
15200 // Otherwise we could just do this in the header file.
15201 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
15202 }
15203 case X86::BI_mm_setcsr:
15204 case X86::BI__builtin_ia32_ldmxcsr: {
15205 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
15206 Builder.CreateStore(Ops[0], Tmp);
15207 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
15208 Tmp.getPointer());
15209 }
15210 case X86::BI_mm_getcsr:
15211 case X86::BI__builtin_ia32_stmxcsr: {
15213 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
15214 Tmp.getPointer());
15215 return Builder.CreateLoad(Tmp, "stmxcsr");
15216 }
15217 case X86::BI__builtin_ia32_xsave:
15218 case X86::BI__builtin_ia32_xsave64:
15219 case X86::BI__builtin_ia32_xrstor:
15220 case X86::BI__builtin_ia32_xrstor64:
15221 case X86::BI__builtin_ia32_xsaveopt:
15222 case X86::BI__builtin_ia32_xsaveopt64:
15223 case X86::BI__builtin_ia32_xrstors:
15224 case X86::BI__builtin_ia32_xrstors64:
15225 case X86::BI__builtin_ia32_xsavec:
15226 case X86::BI__builtin_ia32_xsavec64:
15227 case X86::BI__builtin_ia32_xsaves:
15228 case X86::BI__builtin_ia32_xsaves64:
15229 case X86::BI__builtin_ia32_xsetbv:
15230 case X86::BI_xsetbv: {
15231 Intrinsic::ID ID;
15232#define INTRINSIC_X86_XSAVE_ID(NAME) \
15233 case X86::BI__builtin_ia32_##NAME: \
15234 ID = Intrinsic::x86_##NAME; \
15235 break
15236 switch (BuiltinID) {
15237 default: llvm_unreachable("Unsupported intrinsic!");
15239 INTRINSIC_X86_XSAVE_ID(xsave64);
15240 INTRINSIC_X86_XSAVE_ID(xrstor);
15241 INTRINSIC_X86_XSAVE_ID(xrstor64);
15242 INTRINSIC_X86_XSAVE_ID(xsaveopt);
15243 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
15244 INTRINSIC_X86_XSAVE_ID(xrstors);
15245 INTRINSIC_X86_XSAVE_ID(xrstors64);
15246 INTRINSIC_X86_XSAVE_ID(xsavec);
15247 INTRINSIC_X86_XSAVE_ID(xsavec64);
15248 INTRINSIC_X86_XSAVE_ID(xsaves);
15249 INTRINSIC_X86_XSAVE_ID(xsaves64);
15250 INTRINSIC_X86_XSAVE_ID(xsetbv);
15251 case X86::BI_xsetbv:
15252 ID = Intrinsic::x86_xsetbv;
15253 break;
15254 }
15255#undef INTRINSIC_X86_XSAVE_ID
15256 Value *Mhi = Builder.CreateTrunc(
15257 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
15258 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
15259 Ops[1] = Mhi;
15260 Ops.push_back(Mlo);
15261 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
15262 }
15263 case X86::BI__builtin_ia32_xgetbv:
15264 case X86::BI_xgetbv:
15265 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
15266 case X86::BI__builtin_ia32_storedqudi128_mask:
15267 case X86::BI__builtin_ia32_storedqusi128_mask:
15268 case X86::BI__builtin_ia32_storedquhi128_mask:
15269 case X86::BI__builtin_ia32_storedquqi128_mask:
15270 case X86::BI__builtin_ia32_storeupd128_mask:
15271 case X86::BI__builtin_ia32_storeups128_mask:
15272 case X86::BI__builtin_ia32_storedqudi256_mask:
15273 case X86::BI__builtin_ia32_storedqusi256_mask:
15274 case X86::BI__builtin_ia32_storedquhi256_mask:
15275 case X86::BI__builtin_ia32_storedquqi256_mask:
15276 case X86::BI__builtin_ia32_storeupd256_mask:
15277 case X86::BI__builtin_ia32_storeups256_mask:
15278 case X86::BI__builtin_ia32_storedqudi512_mask:
15279 case X86::BI__builtin_ia32_storedqusi512_mask:
15280 case X86::BI__builtin_ia32_storedquhi512_mask:
15281 case X86::BI__builtin_ia32_storedquqi512_mask:
15282 case X86::BI__builtin_ia32_storeupd512_mask:
15283 case X86::BI__builtin_ia32_storeups512_mask:
15284 return EmitX86MaskedStore(*this, Ops, Align(1));
15285
15286 case X86::BI__builtin_ia32_storesbf16128_mask:
15287 case X86::BI__builtin_ia32_storesh128_mask:
15288 case X86::BI__builtin_ia32_storess128_mask:
15289 case X86::BI__builtin_ia32_storesd128_mask:
15290 return EmitX86MaskedStore(*this, Ops, Align(1));
15291
15292 case X86::BI__builtin_ia32_cvtmask2b128:
15293 case X86::BI__builtin_ia32_cvtmask2b256:
15294 case X86::BI__builtin_ia32_cvtmask2b512:
15295 case X86::BI__builtin_ia32_cvtmask2w128:
15296 case X86::BI__builtin_ia32_cvtmask2w256:
15297 case X86::BI__builtin_ia32_cvtmask2w512:
15298 case X86::BI__builtin_ia32_cvtmask2d128:
15299 case X86::BI__builtin_ia32_cvtmask2d256:
15300 case X86::BI__builtin_ia32_cvtmask2d512:
15301 case X86::BI__builtin_ia32_cvtmask2q128:
15302 case X86::BI__builtin_ia32_cvtmask2q256:
15303 case X86::BI__builtin_ia32_cvtmask2q512:
15304 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
15305
15306 case X86::BI__builtin_ia32_cvtb2mask128:
15307 case X86::BI__builtin_ia32_cvtb2mask256:
15308 case X86::BI__builtin_ia32_cvtb2mask512:
15309 case X86::BI__builtin_ia32_cvtw2mask128:
15310 case X86::BI__builtin_ia32_cvtw2mask256:
15311 case X86::BI__builtin_ia32_cvtw2mask512:
15312 case X86::BI__builtin_ia32_cvtd2mask128:
15313 case X86::BI__builtin_ia32_cvtd2mask256:
15314 case X86::BI__builtin_ia32_cvtd2mask512:
15315 case X86::BI__builtin_ia32_cvtq2mask128:
15316 case X86::BI__builtin_ia32_cvtq2mask256:
15317 case X86::BI__builtin_ia32_cvtq2mask512:
15318 return EmitX86ConvertToMask(*this, Ops[0]);
15319
15320 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
15321 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
15322 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
15323 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
15324 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
15325 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
15326 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
15327 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
15328 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
15329 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
15330 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
15331 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
15332 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
15333 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
15334 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
15335 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
15336 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
15337 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
15338 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
15339 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
15340 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
15341 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
15342 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
15343 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
15344 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
15345 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
15346
15347 case X86::BI__builtin_ia32_vfmaddss3:
15348 case X86::BI__builtin_ia32_vfmaddsd3:
15349 case X86::BI__builtin_ia32_vfmaddsh3_mask:
15350 case X86::BI__builtin_ia32_vfmaddss3_mask:
15351 case X86::BI__builtin_ia32_vfmaddsd3_mask:
15352 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
15353 case X86::BI__builtin_ia32_vfmaddss:
15354 case X86::BI__builtin_ia32_vfmaddsd:
15355 return EmitScalarFMAExpr(*this, E, Ops,
15356 Constant::getNullValue(Ops[0]->getType()));
15357 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
15358 case X86::BI__builtin_ia32_vfmaddss3_maskz:
15359 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
15360 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
15361 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
15362 case X86::BI__builtin_ia32_vfmaddss3_mask3:
15363 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
15364 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
15365 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
15366 case X86::BI__builtin_ia32_vfmsubss3_mask3:
15367 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
15368 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
15369 /*NegAcc*/ true);
15370 case X86::BI__builtin_ia32_vfmaddph:
15371 case X86::BI__builtin_ia32_vfmaddps:
15372 case X86::BI__builtin_ia32_vfmaddpd:
15373 case X86::BI__builtin_ia32_vfmaddph256:
15374 case X86::BI__builtin_ia32_vfmaddps256:
15375 case X86::BI__builtin_ia32_vfmaddpd256:
15376 case X86::BI__builtin_ia32_vfmaddph512_mask:
15377 case X86::BI__builtin_ia32_vfmaddph512_maskz:
15378 case X86::BI__builtin_ia32_vfmaddph512_mask3:
15379 case X86::BI__builtin_ia32_vfmaddnepbh128:
15380 case X86::BI__builtin_ia32_vfmaddnepbh256:
15381 case X86::BI__builtin_ia32_vfmaddnepbh512:
15382 case X86::BI__builtin_ia32_vfmaddps512_mask:
15383 case X86::BI__builtin_ia32_vfmaddps512_maskz:
15384 case X86::BI__builtin_ia32_vfmaddps512_mask3:
15385 case X86::BI__builtin_ia32_vfmsubps512_mask3:
15386 case X86::BI__builtin_ia32_vfmaddpd512_mask:
15387 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
15388 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
15389 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
15390 case X86::BI__builtin_ia32_vfmsubph512_mask3:
15391 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
15392 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
15393 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
15394 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
15395 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
15396 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
15397 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
15398 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
15399 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
15400 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
15401 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
15402 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
15403 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
15404 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
15405 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
15406 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
15407 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
15408 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
15409 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
15410 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
15411 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
15412 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
15413 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
15414 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
15415 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
15416 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
15417 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
15418 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
15419 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
15420 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
15421 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
15422 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
15423 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
15424 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
15425 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
15426 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
15427 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
15428 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
15429
15430 case X86::BI__builtin_ia32_movdqa32store128_mask:
15431 case X86::BI__builtin_ia32_movdqa64store128_mask:
15432 case X86::BI__builtin_ia32_storeaps128_mask:
15433 case X86::BI__builtin_ia32_storeapd128_mask:
15434 case X86::BI__builtin_ia32_movdqa32store256_mask:
15435 case X86::BI__builtin_ia32_movdqa64store256_mask:
15436 case X86::BI__builtin_ia32_storeaps256_mask:
15437 case X86::BI__builtin_ia32_storeapd256_mask:
15438 case X86::BI__builtin_ia32_movdqa32store512_mask:
15439 case X86::BI__builtin_ia32_movdqa64store512_mask:
15440 case X86::BI__builtin_ia32_storeaps512_mask:
15441 case X86::BI__builtin_ia32_storeapd512_mask:
15442 return EmitX86MaskedStore(
15443 *this, Ops,
15444 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15445
15446 case X86::BI__builtin_ia32_loadups128_mask:
15447 case X86::BI__builtin_ia32_loadups256_mask:
15448 case X86::BI__builtin_ia32_loadups512_mask:
15449 case X86::BI__builtin_ia32_loadupd128_mask:
15450 case X86::BI__builtin_ia32_loadupd256_mask:
15451 case X86::BI__builtin_ia32_loadupd512_mask:
15452 case X86::BI__builtin_ia32_loaddquqi128_mask:
15453 case X86::BI__builtin_ia32_loaddquqi256_mask:
15454 case X86::BI__builtin_ia32_loaddquqi512_mask:
15455 case X86::BI__builtin_ia32_loaddquhi128_mask:
15456 case X86::BI__builtin_ia32_loaddquhi256_mask:
15457 case X86::BI__builtin_ia32_loaddquhi512_mask:
15458 case X86::BI__builtin_ia32_loaddqusi128_mask:
15459 case X86::BI__builtin_ia32_loaddqusi256_mask:
15460 case X86::BI__builtin_ia32_loaddqusi512_mask:
15461 case X86::BI__builtin_ia32_loaddqudi128_mask:
15462 case X86::BI__builtin_ia32_loaddqudi256_mask:
15463 case X86::BI__builtin_ia32_loaddqudi512_mask:
15464 return EmitX86MaskedLoad(*this, Ops, Align(1));
15465
15466 case X86::BI__builtin_ia32_loadsbf16128_mask:
15467 case X86::BI__builtin_ia32_loadsh128_mask:
15468 case X86::BI__builtin_ia32_loadss128_mask:
15469 case X86::BI__builtin_ia32_loadsd128_mask:
15470 return EmitX86MaskedLoad(*this, Ops, Align(1));
15471
15472 case X86::BI__builtin_ia32_loadaps128_mask:
15473 case X86::BI__builtin_ia32_loadaps256_mask:
15474 case X86::BI__builtin_ia32_loadaps512_mask:
15475 case X86::BI__builtin_ia32_loadapd128_mask:
15476 case X86::BI__builtin_ia32_loadapd256_mask:
15477 case X86::BI__builtin_ia32_loadapd512_mask:
15478 case X86::BI__builtin_ia32_movdqa32load128_mask:
15479 case X86::BI__builtin_ia32_movdqa32load256_mask:
15480 case X86::BI__builtin_ia32_movdqa32load512_mask:
15481 case X86::BI__builtin_ia32_movdqa64load128_mask:
15482 case X86::BI__builtin_ia32_movdqa64load256_mask:
15483 case X86::BI__builtin_ia32_movdqa64load512_mask:
15484 return EmitX86MaskedLoad(
15485 *this, Ops,
15486 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
15487
15488 case X86::BI__builtin_ia32_expandloaddf128_mask:
15489 case X86::BI__builtin_ia32_expandloaddf256_mask:
15490 case X86::BI__builtin_ia32_expandloaddf512_mask:
15491 case X86::BI__builtin_ia32_expandloadsf128_mask:
15492 case X86::BI__builtin_ia32_expandloadsf256_mask:
15493 case X86::BI__builtin_ia32_expandloadsf512_mask:
15494 case X86::BI__builtin_ia32_expandloaddi128_mask:
15495 case X86::BI__builtin_ia32_expandloaddi256_mask:
15496 case X86::BI__builtin_ia32_expandloaddi512_mask:
15497 case X86::BI__builtin_ia32_expandloadsi128_mask:
15498 case X86::BI__builtin_ia32_expandloadsi256_mask:
15499 case X86::BI__builtin_ia32_expandloadsi512_mask:
15500 case X86::BI__builtin_ia32_expandloadhi128_mask:
15501 case X86::BI__builtin_ia32_expandloadhi256_mask:
15502 case X86::BI__builtin_ia32_expandloadhi512_mask:
15503 case X86::BI__builtin_ia32_expandloadqi128_mask:
15504 case X86::BI__builtin_ia32_expandloadqi256_mask:
15505 case X86::BI__builtin_ia32_expandloadqi512_mask:
15506 return EmitX86ExpandLoad(*this, Ops);
15507
15508 case X86::BI__builtin_ia32_compressstoredf128_mask:
15509 case X86::BI__builtin_ia32_compressstoredf256_mask:
15510 case X86::BI__builtin_ia32_compressstoredf512_mask:
15511 case X86::BI__builtin_ia32_compressstoresf128_mask:
15512 case X86::BI__builtin_ia32_compressstoresf256_mask:
15513 case X86::BI__builtin_ia32_compressstoresf512_mask:
15514 case X86::BI__builtin_ia32_compressstoredi128_mask:
15515 case X86::BI__builtin_ia32_compressstoredi256_mask:
15516 case X86::BI__builtin_ia32_compressstoredi512_mask:
15517 case X86::BI__builtin_ia32_compressstoresi128_mask:
15518 case X86::BI__builtin_ia32_compressstoresi256_mask:
15519 case X86::BI__builtin_ia32_compressstoresi512_mask:
15520 case X86::BI__builtin_ia32_compressstorehi128_mask:
15521 case X86::BI__builtin_ia32_compressstorehi256_mask:
15522 case X86::BI__builtin_ia32_compressstorehi512_mask:
15523 case X86::BI__builtin_ia32_compressstoreqi128_mask:
15524 case X86::BI__builtin_ia32_compressstoreqi256_mask:
15525 case X86::BI__builtin_ia32_compressstoreqi512_mask:
15526 return EmitX86CompressStore(*this, Ops);
15527
15528 case X86::BI__builtin_ia32_expanddf128_mask:
15529 case X86::BI__builtin_ia32_expanddf256_mask:
15530 case X86::BI__builtin_ia32_expanddf512_mask:
15531 case X86::BI__builtin_ia32_expandsf128_mask:
15532 case X86::BI__builtin_ia32_expandsf256_mask:
15533 case X86::BI__builtin_ia32_expandsf512_mask:
15534 case X86::BI__builtin_ia32_expanddi128_mask:
15535 case X86::BI__builtin_ia32_expanddi256_mask:
15536 case X86::BI__builtin_ia32_expanddi512_mask:
15537 case X86::BI__builtin_ia32_expandsi128_mask:
15538 case X86::BI__builtin_ia32_expandsi256_mask:
15539 case X86::BI__builtin_ia32_expandsi512_mask:
15540 case X86::BI__builtin_ia32_expandhi128_mask:
15541 case X86::BI__builtin_ia32_expandhi256_mask:
15542 case X86::BI__builtin_ia32_expandhi512_mask:
15543 case X86::BI__builtin_ia32_expandqi128_mask:
15544 case X86::BI__builtin_ia32_expandqi256_mask:
15545 case X86::BI__builtin_ia32_expandqi512_mask:
15546 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15547
15548 case X86::BI__builtin_ia32_compressdf128_mask:
15549 case X86::BI__builtin_ia32_compressdf256_mask:
15550 case X86::BI__builtin_ia32_compressdf512_mask:
15551 case X86::BI__builtin_ia32_compresssf128_mask:
15552 case X86::BI__builtin_ia32_compresssf256_mask:
15553 case X86::BI__builtin_ia32_compresssf512_mask:
15554 case X86::BI__builtin_ia32_compressdi128_mask:
15555 case X86::BI__builtin_ia32_compressdi256_mask:
15556 case X86::BI__builtin_ia32_compressdi512_mask:
15557 case X86::BI__builtin_ia32_compresssi128_mask:
15558 case X86::BI__builtin_ia32_compresssi256_mask:
15559 case X86::BI__builtin_ia32_compresssi512_mask:
15560 case X86::BI__builtin_ia32_compresshi128_mask:
15561 case X86::BI__builtin_ia32_compresshi256_mask:
15562 case X86::BI__builtin_ia32_compresshi512_mask:
15563 case X86::BI__builtin_ia32_compressqi128_mask:
15564 case X86::BI__builtin_ia32_compressqi256_mask:
15565 case X86::BI__builtin_ia32_compressqi512_mask:
15566 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15567
15568 case X86::BI__builtin_ia32_gather3div2df:
15569 case X86::BI__builtin_ia32_gather3div2di:
15570 case X86::BI__builtin_ia32_gather3div4df:
15571 case X86::BI__builtin_ia32_gather3div4di:
15572 case X86::BI__builtin_ia32_gather3div4sf:
15573 case X86::BI__builtin_ia32_gather3div4si:
15574 case X86::BI__builtin_ia32_gather3div8sf:
15575 case X86::BI__builtin_ia32_gather3div8si:
15576 case X86::BI__builtin_ia32_gather3siv2df:
15577 case X86::BI__builtin_ia32_gather3siv2di:
15578 case X86::BI__builtin_ia32_gather3siv4df:
15579 case X86::BI__builtin_ia32_gather3siv4di:
15580 case X86::BI__builtin_ia32_gather3siv4sf:
15581 case X86::BI__builtin_ia32_gather3siv4si:
15582 case X86::BI__builtin_ia32_gather3siv8sf:
15583 case X86::BI__builtin_ia32_gather3siv8si:
15584 case X86::BI__builtin_ia32_gathersiv8df:
15585 case X86::BI__builtin_ia32_gathersiv16sf:
15586 case X86::BI__builtin_ia32_gatherdiv8df:
15587 case X86::BI__builtin_ia32_gatherdiv16sf:
15588 case X86::BI__builtin_ia32_gathersiv8di:
15589 case X86::BI__builtin_ia32_gathersiv16si:
15590 case X86::BI__builtin_ia32_gatherdiv8di:
15591 case X86::BI__builtin_ia32_gatherdiv16si: {
15592 Intrinsic::ID IID;
15593 switch (BuiltinID) {
15594 default: llvm_unreachable("Unexpected builtin");
15595 case X86::BI__builtin_ia32_gather3div2df:
15596 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15597 break;
15598 case X86::BI__builtin_ia32_gather3div2di:
15599 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15600 break;
15601 case X86::BI__builtin_ia32_gather3div4df:
15602 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15603 break;
15604 case X86::BI__builtin_ia32_gather3div4di:
15605 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15606 break;
15607 case X86::BI__builtin_ia32_gather3div4sf:
15608 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15609 break;
15610 case X86::BI__builtin_ia32_gather3div4si:
15611 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15612 break;
15613 case X86::BI__builtin_ia32_gather3div8sf:
15614 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15615 break;
15616 case X86::BI__builtin_ia32_gather3div8si:
15617 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15618 break;
15619 case X86::BI__builtin_ia32_gather3siv2df:
15620 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15621 break;
15622 case X86::BI__builtin_ia32_gather3siv2di:
15623 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15624 break;
15625 case X86::BI__builtin_ia32_gather3siv4df:
15626 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15627 break;
15628 case X86::BI__builtin_ia32_gather3siv4di:
15629 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15630 break;
15631 case X86::BI__builtin_ia32_gather3siv4sf:
15632 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15633 break;
15634 case X86::BI__builtin_ia32_gather3siv4si:
15635 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15636 break;
15637 case X86::BI__builtin_ia32_gather3siv8sf:
15638 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15639 break;
15640 case X86::BI__builtin_ia32_gather3siv8si:
15641 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15642 break;
15643 case X86::BI__builtin_ia32_gathersiv8df:
15644 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15645 break;
15646 case X86::BI__builtin_ia32_gathersiv16sf:
15647 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15648 break;
15649 case X86::BI__builtin_ia32_gatherdiv8df:
15650 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15651 break;
15652 case X86::BI__builtin_ia32_gatherdiv16sf:
15653 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15654 break;
15655 case X86::BI__builtin_ia32_gathersiv8di:
15656 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15657 break;
15658 case X86::BI__builtin_ia32_gathersiv16si:
15659 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15660 break;
15661 case X86::BI__builtin_ia32_gatherdiv8di:
15662 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15663 break;
15664 case X86::BI__builtin_ia32_gatherdiv16si:
15665 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15666 break;
15667 }
15668
15669 unsigned MinElts = std::min(
15670 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15671 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15672 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15673 Function *Intr = CGM.getIntrinsic(IID);
15674 return Builder.CreateCall(Intr, Ops);
15675 }
15676
15677 case X86::BI__builtin_ia32_scattersiv8df:
15678 case X86::BI__builtin_ia32_scattersiv16sf:
15679 case X86::BI__builtin_ia32_scatterdiv8df:
15680 case X86::BI__builtin_ia32_scatterdiv16sf:
15681 case X86::BI__builtin_ia32_scattersiv8di:
15682 case X86::BI__builtin_ia32_scattersiv16si:
15683 case X86::BI__builtin_ia32_scatterdiv8di:
15684 case X86::BI__builtin_ia32_scatterdiv16si:
15685 case X86::BI__builtin_ia32_scatterdiv2df:
15686 case X86::BI__builtin_ia32_scatterdiv2di:
15687 case X86::BI__builtin_ia32_scatterdiv4df:
15688 case X86::BI__builtin_ia32_scatterdiv4di:
15689 case X86::BI__builtin_ia32_scatterdiv4sf:
15690 case X86::BI__builtin_ia32_scatterdiv4si:
15691 case X86::BI__builtin_ia32_scatterdiv8sf:
15692 case X86::BI__builtin_ia32_scatterdiv8si:
15693 case X86::BI__builtin_ia32_scattersiv2df:
15694 case X86::BI__builtin_ia32_scattersiv2di:
15695 case X86::BI__builtin_ia32_scattersiv4df:
15696 case X86::BI__builtin_ia32_scattersiv4di:
15697 case X86::BI__builtin_ia32_scattersiv4sf:
15698 case X86::BI__builtin_ia32_scattersiv4si:
15699 case X86::BI__builtin_ia32_scattersiv8sf:
15700 case X86::BI__builtin_ia32_scattersiv8si: {
15701 Intrinsic::ID IID;
15702 switch (BuiltinID) {
15703 default: llvm_unreachable("Unexpected builtin");
15704 case X86::BI__builtin_ia32_scattersiv8df:
15705 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15706 break;
15707 case X86::BI__builtin_ia32_scattersiv16sf:
15708 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15709 break;
15710 case X86::BI__builtin_ia32_scatterdiv8df:
15711 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15712 break;
15713 case X86::BI__builtin_ia32_scatterdiv16sf:
15714 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15715 break;
15716 case X86::BI__builtin_ia32_scattersiv8di:
15717 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15718 break;
15719 case X86::BI__builtin_ia32_scattersiv16si:
15720 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15721 break;
15722 case X86::BI__builtin_ia32_scatterdiv8di:
15723 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15724 break;
15725 case X86::BI__builtin_ia32_scatterdiv16si:
15726 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15727 break;
15728 case X86::BI__builtin_ia32_scatterdiv2df:
15729 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15730 break;
15731 case X86::BI__builtin_ia32_scatterdiv2di:
15732 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15733 break;
15734 case X86::BI__builtin_ia32_scatterdiv4df:
15735 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15736 break;
15737 case X86::BI__builtin_ia32_scatterdiv4di:
15738 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15739 break;
15740 case X86::BI__builtin_ia32_scatterdiv4sf:
15741 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15742 break;
15743 case X86::BI__builtin_ia32_scatterdiv4si:
15744 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15745 break;
15746 case X86::BI__builtin_ia32_scatterdiv8sf:
15747 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15748 break;
15749 case X86::BI__builtin_ia32_scatterdiv8si:
15750 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15751 break;
15752 case X86::BI__builtin_ia32_scattersiv2df:
15753 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15754 break;
15755 case X86::BI__builtin_ia32_scattersiv2di:
15756 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15757 break;
15758 case X86::BI__builtin_ia32_scattersiv4df:
15759 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15760 break;
15761 case X86::BI__builtin_ia32_scattersiv4di:
15762 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15763 break;
15764 case X86::BI__builtin_ia32_scattersiv4sf:
15765 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15766 break;
15767 case X86::BI__builtin_ia32_scattersiv4si:
15768 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15769 break;
15770 case X86::BI__builtin_ia32_scattersiv8sf:
15771 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15772 break;
15773 case X86::BI__builtin_ia32_scattersiv8si:
15774 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15775 break;
15776 }
15777
15778 unsigned MinElts = std::min(
15779 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15780 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15781 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15782 Function *Intr = CGM.getIntrinsic(IID);
15783 return Builder.CreateCall(Intr, Ops);
15784 }
15785
15786 case X86::BI__builtin_ia32_vextractf128_pd256:
15787 case X86::BI__builtin_ia32_vextractf128_ps256:
15788 case X86::BI__builtin_ia32_vextractf128_si256:
15789 case X86::BI__builtin_ia32_extract128i256:
15790 case X86::BI__builtin_ia32_extractf64x4_mask:
15791 case X86::BI__builtin_ia32_extractf32x4_mask:
15792 case X86::BI__builtin_ia32_extracti64x4_mask:
15793 case X86::BI__builtin_ia32_extracti32x4_mask:
15794 case X86::BI__builtin_ia32_extractf32x8_mask:
15795 case X86::BI__builtin_ia32_extracti32x8_mask:
15796 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15797 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15798 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15799 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15800 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15801 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15802 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15803 unsigned NumElts = DstTy->getNumElements();
15804 unsigned SrcNumElts =
15805 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15806 unsigned SubVectors = SrcNumElts / NumElts;
15807 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15808 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15809 Index &= SubVectors - 1; // Remove any extra bits.
15810 Index *= NumElts;
15811
15812 int Indices[16];
15813 for (unsigned i = 0; i != NumElts; ++i)
15814 Indices[i] = i + Index;
15815
15816 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15817 "extract");
15818
15819 if (Ops.size() == 4)
15820 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15821
15822 return Res;
15823 }
15824 case X86::BI__builtin_ia32_vinsertf128_pd256:
15825 case X86::BI__builtin_ia32_vinsertf128_ps256:
15826 case X86::BI__builtin_ia32_vinsertf128_si256:
15827 case X86::BI__builtin_ia32_insert128i256:
15828 case X86::BI__builtin_ia32_insertf64x4:
15829 case X86::BI__builtin_ia32_insertf32x4:
15830 case X86::BI__builtin_ia32_inserti64x4:
15831 case X86::BI__builtin_ia32_inserti32x4:
15832 case X86::BI__builtin_ia32_insertf32x8:
15833 case X86::BI__builtin_ia32_inserti32x8:
15834 case X86::BI__builtin_ia32_insertf32x4_256:
15835 case X86::BI__builtin_ia32_inserti32x4_256:
15836 case X86::BI__builtin_ia32_insertf64x2_256:
15837 case X86::BI__builtin_ia32_inserti64x2_256:
15838 case X86::BI__builtin_ia32_insertf64x2_512:
15839 case X86::BI__builtin_ia32_inserti64x2_512: {
15840 unsigned DstNumElts =
15841 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15842 unsigned SrcNumElts =
15843 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15844 unsigned SubVectors = DstNumElts / SrcNumElts;
15845 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15846 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15847 Index &= SubVectors - 1; // Remove any extra bits.
15848 Index *= SrcNumElts;
15849
15850 int Indices[16];
15851 for (unsigned i = 0; i != DstNumElts; ++i)
15852 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15853
15854 Value *Op1 = Builder.CreateShuffleVector(
15855 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15856
15857 for (unsigned i = 0; i != DstNumElts; ++i) {
15858 if (i >= Index && i < (Index + SrcNumElts))
15859 Indices[i] = (i - Index) + DstNumElts;
15860 else
15861 Indices[i] = i;
15862 }
15863
15864 return Builder.CreateShuffleVector(Ops[0], Op1,
15865 ArrayRef(Indices, DstNumElts), "insert");
15866 }
15867 case X86::BI__builtin_ia32_pmovqd512_mask:
15868 case X86::BI__builtin_ia32_pmovwb512_mask: {
15869 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15870 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15871 }
15872 case X86::BI__builtin_ia32_pmovdb512_mask:
15873 case X86::BI__builtin_ia32_pmovdw512_mask:
15874 case X86::BI__builtin_ia32_pmovqw512_mask: {
15875 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15876 if (C->isAllOnesValue())
15877 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15878
15879 Intrinsic::ID IID;
15880 switch (BuiltinID) {
15881 default: llvm_unreachable("Unsupported intrinsic!");
15882 case X86::BI__builtin_ia32_pmovdb512_mask:
15883 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15884 break;
15885 case X86::BI__builtin_ia32_pmovdw512_mask:
15886 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15887 break;
15888 case X86::BI__builtin_ia32_pmovqw512_mask:
15889 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15890 break;
15891 }
15892
15893 Function *Intr = CGM.getIntrinsic(IID);
15894 return Builder.CreateCall(Intr, Ops);
15895 }
15896 case X86::BI__builtin_ia32_pblendw128:
15897 case X86::BI__builtin_ia32_blendpd:
15898 case X86::BI__builtin_ia32_blendps:
15899 case X86::BI__builtin_ia32_blendpd256:
15900 case X86::BI__builtin_ia32_blendps256:
15901 case X86::BI__builtin_ia32_pblendw256:
15902 case X86::BI__builtin_ia32_pblendd128:
15903 case X86::BI__builtin_ia32_pblendd256: {
15904 unsigned NumElts =
15905 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15906 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15907
15908 int Indices[16];
15909 // If there are more than 8 elements, the immediate is used twice so make
15910 // sure we handle that.
15911 for (unsigned i = 0; i != NumElts; ++i)
15912 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15913
15914 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15915 ArrayRef(Indices, NumElts), "blend");
15916 }
15917 case X86::BI__builtin_ia32_pshuflw:
15918 case X86::BI__builtin_ia32_pshuflw256:
15919 case X86::BI__builtin_ia32_pshuflw512: {
15920 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15921 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15922 unsigned NumElts = Ty->getNumElements();
15923
15924 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15925 Imm = (Imm & 0xff) * 0x01010101;
15926
15927 int Indices[32];
15928 for (unsigned l = 0; l != NumElts; l += 8) {
15929 for (unsigned i = 0; i != 4; ++i) {
15930 Indices[l + i] = l + (Imm & 3);
15931 Imm >>= 2;
15932 }
15933 for (unsigned i = 4; i != 8; ++i)
15934 Indices[l + i] = l + i;
15935 }
15936
15937 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15938 "pshuflw");
15939 }
15940 case X86::BI__builtin_ia32_pshufhw:
15941 case X86::BI__builtin_ia32_pshufhw256:
15942 case X86::BI__builtin_ia32_pshufhw512: {
15943 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15944 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15945 unsigned NumElts = Ty->getNumElements();
15946
15947 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15948 Imm = (Imm & 0xff) * 0x01010101;
15949
15950 int Indices[32];
15951 for (unsigned l = 0; l != NumElts; l += 8) {
15952 for (unsigned i = 0; i != 4; ++i)
15953 Indices[l + i] = l + i;
15954 for (unsigned i = 4; i != 8; ++i) {
15955 Indices[l + i] = l + 4 + (Imm & 3);
15956 Imm >>= 2;
15957 }
15958 }
15959
15960 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15961 "pshufhw");
15962 }
15963 case X86::BI__builtin_ia32_pshufd:
15964 case X86::BI__builtin_ia32_pshufd256:
15965 case X86::BI__builtin_ia32_pshufd512:
15966 case X86::BI__builtin_ia32_vpermilpd:
15967 case X86::BI__builtin_ia32_vpermilps:
15968 case X86::BI__builtin_ia32_vpermilpd256:
15969 case X86::BI__builtin_ia32_vpermilps256:
15970 case X86::BI__builtin_ia32_vpermilpd512:
15971 case X86::BI__builtin_ia32_vpermilps512: {
15972 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15973 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15974 unsigned NumElts = Ty->getNumElements();
15975 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15976 unsigned NumLaneElts = NumElts / NumLanes;
15977
15978 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15979 Imm = (Imm & 0xff) * 0x01010101;
15980
15981 int Indices[16];
15982 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15983 for (unsigned i = 0; i != NumLaneElts; ++i) {
15984 Indices[i + l] = (Imm % NumLaneElts) + l;
15985 Imm /= NumLaneElts;
15986 }
15987 }
15988
15989 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15990 "permil");
15991 }
15992 case X86::BI__builtin_ia32_shufpd:
15993 case X86::BI__builtin_ia32_shufpd256:
15994 case X86::BI__builtin_ia32_shufpd512:
15995 case X86::BI__builtin_ia32_shufps:
15996 case X86::BI__builtin_ia32_shufps256:
15997 case X86::BI__builtin_ia32_shufps512: {
15998 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15999 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16000 unsigned NumElts = Ty->getNumElements();
16001 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
16002 unsigned NumLaneElts = NumElts / NumLanes;
16003
16004 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
16005 Imm = (Imm & 0xff) * 0x01010101;
16006
16007 int Indices[16];
16008 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16009 for (unsigned i = 0; i != NumLaneElts; ++i) {
16010 unsigned Index = Imm % NumLaneElts;
16011 Imm /= NumLaneElts;
16012 if (i >= (NumLaneElts / 2))
16013 Index += NumElts;
16014 Indices[l + i] = l + Index;
16015 }
16016 }
16017
16018 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16019 ArrayRef(Indices, NumElts), "shufp");
16020 }
16021 case X86::BI__builtin_ia32_permdi256:
16022 case X86::BI__builtin_ia32_permdf256:
16023 case X86::BI__builtin_ia32_permdi512:
16024 case X86::BI__builtin_ia32_permdf512: {
16025 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16026 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16027 unsigned NumElts = Ty->getNumElements();
16028
16029 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
16030 int Indices[8];
16031 for (unsigned l = 0; l != NumElts; l += 4)
16032 for (unsigned i = 0; i != 4; ++i)
16033 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
16034
16035 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
16036 "perm");
16037 }
16038 case X86::BI__builtin_ia32_palignr128:
16039 case X86::BI__builtin_ia32_palignr256:
16040 case X86::BI__builtin_ia32_palignr512: {
16041 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16042
16043 unsigned NumElts =
16044 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16045 assert(NumElts % 16 == 0);
16046
16047 // If palignr is shifting the pair of vectors more than the size of two
16048 // lanes, emit zero.
16049 if (ShiftVal >= 32)
16050 return llvm::Constant::getNullValue(ConvertType(E->getType()));
16051
16052 // If palignr is shifting the pair of input vectors more than one lane,
16053 // but less than two lanes, convert to shifting in zeroes.
16054 if (ShiftVal > 16) {
16055 ShiftVal -= 16;
16056 Ops[1] = Ops[0];
16057 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
16058 }
16059
16060 int Indices[64];
16061 // 256-bit palignr operates on 128-bit lanes so we need to handle that
16062 for (unsigned l = 0; l != NumElts; l += 16) {
16063 for (unsigned i = 0; i != 16; ++i) {
16064 unsigned Idx = ShiftVal + i;
16065 if (Idx >= 16)
16066 Idx += NumElts - 16; // End of lane, switch operand.
16067 Indices[l + i] = Idx + l;
16068 }
16069 }
16070
16071 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16072 ArrayRef(Indices, NumElts), "palignr");
16073 }
16074 case X86::BI__builtin_ia32_alignd128:
16075 case X86::BI__builtin_ia32_alignd256:
16076 case X86::BI__builtin_ia32_alignd512:
16077 case X86::BI__builtin_ia32_alignq128:
16078 case X86::BI__builtin_ia32_alignq256:
16079 case X86::BI__builtin_ia32_alignq512: {
16080 unsigned NumElts =
16081 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16082 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
16083
16084 // Mask the shift amount to width of a vector.
16085 ShiftVal &= NumElts - 1;
16086
16087 int Indices[16];
16088 for (unsigned i = 0; i != NumElts; ++i)
16089 Indices[i] = i + ShiftVal;
16090
16091 return Builder.CreateShuffleVector(Ops[1], Ops[0],
16092 ArrayRef(Indices, NumElts), "valign");
16093 }
16094 case X86::BI__builtin_ia32_shuf_f32x4_256:
16095 case X86::BI__builtin_ia32_shuf_f64x2_256:
16096 case X86::BI__builtin_ia32_shuf_i32x4_256:
16097 case X86::BI__builtin_ia32_shuf_i64x2_256:
16098 case X86::BI__builtin_ia32_shuf_f32x4:
16099 case X86::BI__builtin_ia32_shuf_f64x2:
16100 case X86::BI__builtin_ia32_shuf_i32x4:
16101 case X86::BI__builtin_ia32_shuf_i64x2: {
16102 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16103 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
16104 unsigned NumElts = Ty->getNumElements();
16105 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
16106 unsigned NumLaneElts = NumElts / NumLanes;
16107
16108 int Indices[16];
16109 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
16110 unsigned Index = (Imm % NumLanes) * NumLaneElts;
16111 Imm /= NumLanes; // Discard the bits we just used.
16112 if (l >= (NumElts / 2))
16113 Index += NumElts; // Switch to other source.
16114 for (unsigned i = 0; i != NumLaneElts; ++i) {
16115 Indices[l + i] = Index + i;
16116 }
16117 }
16118
16119 return Builder.CreateShuffleVector(Ops[0], Ops[1],
16120 ArrayRef(Indices, NumElts), "shuf");
16121 }
16122
16123 case X86::BI__builtin_ia32_vperm2f128_pd256:
16124 case X86::BI__builtin_ia32_vperm2f128_ps256:
16125 case X86::BI__builtin_ia32_vperm2f128_si256:
16126 case X86::BI__builtin_ia32_permti256: {
16127 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
16128 unsigned NumElts =
16129 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16130
16131 // This takes a very simple approach since there are two lanes and a
16132 // shuffle can have 2 inputs. So we reserve the first input for the first
16133 // lane and the second input for the second lane. This may result in
16134 // duplicate sources, but this can be dealt with in the backend.
16135
16136 Value *OutOps[2];
16137 int Indices[8];
16138 for (unsigned l = 0; l != 2; ++l) {
16139 // Determine the source for this lane.
16140 if (Imm & (1 << ((l * 4) + 3)))
16141 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
16142 else if (Imm & (1 << ((l * 4) + 1)))
16143 OutOps[l] = Ops[1];
16144 else
16145 OutOps[l] = Ops[0];
16146
16147 for (unsigned i = 0; i != NumElts/2; ++i) {
16148 // Start with ith element of the source for this lane.
16149 unsigned Idx = (l * NumElts) + i;
16150 // If bit 0 of the immediate half is set, switch to the high half of
16151 // the source.
16152 if (Imm & (1 << (l * 4)))
16153 Idx += NumElts/2;
16154 Indices[(l * (NumElts/2)) + i] = Idx;
16155 }
16156 }
16157
16158 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
16159 ArrayRef(Indices, NumElts), "vperm");
16160 }
16161
16162 case X86::BI__builtin_ia32_pslldqi128_byteshift:
16163 case X86::BI__builtin_ia32_pslldqi256_byteshift:
16164 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
16165 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16166 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16167 // Builtin type is vXi64 so multiply by 8 to get bytes.
16168 unsigned NumElts = ResultType->getNumElements() * 8;
16169
16170 // If pslldq is shifting the vector more than 15 bytes, emit zero.
16171 if (ShiftVal >= 16)
16172 return llvm::Constant::getNullValue(ResultType);
16173
16174 int Indices[64];
16175 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
16176 for (unsigned l = 0; l != NumElts; l += 16) {
16177 for (unsigned i = 0; i != 16; ++i) {
16178 unsigned Idx = NumElts + i - ShiftVal;
16179 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
16180 Indices[l + i] = Idx + l;
16181 }
16182 }
16183
16184 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16185 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16186 Value *Zero = llvm::Constant::getNullValue(VecTy);
16187 Value *SV = Builder.CreateShuffleVector(
16188 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
16189 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
16190 }
16191 case X86::BI__builtin_ia32_psrldqi128_byteshift:
16192 case X86::BI__builtin_ia32_psrldqi256_byteshift:
16193 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
16194 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16195 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
16196 // Builtin type is vXi64 so multiply by 8 to get bytes.
16197 unsigned NumElts = ResultType->getNumElements() * 8;
16198
16199 // If psrldq is shifting the vector more than 15 bytes, emit zero.
16200 if (ShiftVal >= 16)
16201 return llvm::Constant::getNullValue(ResultType);
16202
16203 int Indices[64];
16204 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
16205 for (unsigned l = 0; l != NumElts; l += 16) {
16206 for (unsigned i = 0; i != 16; ++i) {
16207 unsigned Idx = i + ShiftVal;
16208 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
16209 Indices[l + i] = Idx + l;
16210 }
16211 }
16212
16213 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
16214 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
16215 Value *Zero = llvm::Constant::getNullValue(VecTy);
16216 Value *SV = Builder.CreateShuffleVector(
16217 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
16218 return Builder.CreateBitCast(SV, ResultType, "cast");
16219 }
16220 case X86::BI__builtin_ia32_kshiftliqi:
16221 case X86::BI__builtin_ia32_kshiftlihi:
16222 case X86::BI__builtin_ia32_kshiftlisi:
16223 case X86::BI__builtin_ia32_kshiftlidi: {
16224 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16225 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16226
16227 if (ShiftVal >= NumElts)
16228 return llvm::Constant::getNullValue(Ops[0]->getType());
16229
16230 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16231
16232 int Indices[64];
16233 for (unsigned i = 0; i != NumElts; ++i)
16234 Indices[i] = NumElts + i - ShiftVal;
16235
16236 Value *Zero = llvm::Constant::getNullValue(In->getType());
16237 Value *SV = Builder.CreateShuffleVector(
16238 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
16239 return Builder.CreateBitCast(SV, Ops[0]->getType());
16240 }
16241 case X86::BI__builtin_ia32_kshiftriqi:
16242 case X86::BI__builtin_ia32_kshiftrihi:
16243 case X86::BI__builtin_ia32_kshiftrisi:
16244 case X86::BI__builtin_ia32_kshiftridi: {
16245 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
16246 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16247
16248 if (ShiftVal >= NumElts)
16249 return llvm::Constant::getNullValue(Ops[0]->getType());
16250
16251 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
16252
16253 int Indices[64];
16254 for (unsigned i = 0; i != NumElts; ++i)
16255 Indices[i] = i + ShiftVal;
16256
16257 Value *Zero = llvm::Constant::getNullValue(In->getType());
16258 Value *SV = Builder.CreateShuffleVector(
16259 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
16260 return Builder.CreateBitCast(SV, Ops[0]->getType());
16261 }
16262 case X86::BI__builtin_ia32_movnti:
16263 case X86::BI__builtin_ia32_movnti64:
16264 case X86::BI__builtin_ia32_movntsd:
16265 case X86::BI__builtin_ia32_movntss: {
16266 llvm::MDNode *Node = llvm::MDNode::get(
16267 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
16268
16269 Value *Ptr = Ops[0];
16270 Value *Src = Ops[1];
16271
16272 // Extract the 0'th element of the source vector.
16273 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
16274 BuiltinID == X86::BI__builtin_ia32_movntss)
16275 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
16276
16277 // Unaligned nontemporal store of the scalar value.
16278 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
16279 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
16280 SI->setAlignment(llvm::Align(1));
16281 return SI;
16282 }
16283 // Rotate is a special case of funnel shift - 1st 2 args are the same.
16284 case X86::BI__builtin_ia32_vprotb:
16285 case X86::BI__builtin_ia32_vprotw:
16286 case X86::BI__builtin_ia32_vprotd:
16287 case X86::BI__builtin_ia32_vprotq:
16288 case X86::BI__builtin_ia32_vprotbi:
16289 case X86::BI__builtin_ia32_vprotwi:
16290 case X86::BI__builtin_ia32_vprotdi:
16291 case X86::BI__builtin_ia32_vprotqi:
16292 case X86::BI__builtin_ia32_prold128:
16293 case X86::BI__builtin_ia32_prold256:
16294 case X86::BI__builtin_ia32_prold512:
16295 case X86::BI__builtin_ia32_prolq128:
16296 case X86::BI__builtin_ia32_prolq256:
16297 case X86::BI__builtin_ia32_prolq512:
16298 case X86::BI__builtin_ia32_prolvd128:
16299 case X86::BI__builtin_ia32_prolvd256:
16300 case X86::BI__builtin_ia32_prolvd512:
16301 case X86::BI__builtin_ia32_prolvq128:
16302 case X86::BI__builtin_ia32_prolvq256:
16303 case X86::BI__builtin_ia32_prolvq512:
16304 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
16305 case X86::BI__builtin_ia32_prord128:
16306 case X86::BI__builtin_ia32_prord256:
16307 case X86::BI__builtin_ia32_prord512:
16308 case X86::BI__builtin_ia32_prorq128:
16309 case X86::BI__builtin_ia32_prorq256:
16310 case X86::BI__builtin_ia32_prorq512:
16311 case X86::BI__builtin_ia32_prorvd128:
16312 case X86::BI__builtin_ia32_prorvd256:
16313 case X86::BI__builtin_ia32_prorvd512:
16314 case X86::BI__builtin_ia32_prorvq128:
16315 case X86::BI__builtin_ia32_prorvq256:
16316 case X86::BI__builtin_ia32_prorvq512:
16317 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
16318 case X86::BI__builtin_ia32_selectb_128:
16319 case X86::BI__builtin_ia32_selectb_256:
16320 case X86::BI__builtin_ia32_selectb_512:
16321 case X86::BI__builtin_ia32_selectw_128:
16322 case X86::BI__builtin_ia32_selectw_256:
16323 case X86::BI__builtin_ia32_selectw_512:
16324 case X86::BI__builtin_ia32_selectd_128:
16325 case X86::BI__builtin_ia32_selectd_256:
16326 case X86::BI__builtin_ia32_selectd_512:
16327 case X86::BI__builtin_ia32_selectq_128:
16328 case X86::BI__builtin_ia32_selectq_256:
16329 case X86::BI__builtin_ia32_selectq_512:
16330 case X86::BI__builtin_ia32_selectph_128:
16331 case X86::BI__builtin_ia32_selectph_256:
16332 case X86::BI__builtin_ia32_selectph_512:
16333 case X86::BI__builtin_ia32_selectpbf_128:
16334 case X86::BI__builtin_ia32_selectpbf_256:
16335 case X86::BI__builtin_ia32_selectpbf_512:
16336 case X86::BI__builtin_ia32_selectps_128:
16337 case X86::BI__builtin_ia32_selectps_256:
16338 case X86::BI__builtin_ia32_selectps_512:
16339 case X86::BI__builtin_ia32_selectpd_128:
16340 case X86::BI__builtin_ia32_selectpd_256:
16341 case X86::BI__builtin_ia32_selectpd_512:
16342 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
16343 case X86::BI__builtin_ia32_selectsh_128:
16344 case X86::BI__builtin_ia32_selectsbf_128:
16345 case X86::BI__builtin_ia32_selectss_128:
16346 case X86::BI__builtin_ia32_selectsd_128: {
16347 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16348 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16349 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
16350 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
16351 }
16352 case X86::BI__builtin_ia32_cmpb128_mask:
16353 case X86::BI__builtin_ia32_cmpb256_mask:
16354 case X86::BI__builtin_ia32_cmpb512_mask:
16355 case X86::BI__builtin_ia32_cmpw128_mask:
16356 case X86::BI__builtin_ia32_cmpw256_mask:
16357 case X86::BI__builtin_ia32_cmpw512_mask:
16358 case X86::BI__builtin_ia32_cmpd128_mask:
16359 case X86::BI__builtin_ia32_cmpd256_mask:
16360 case X86::BI__builtin_ia32_cmpd512_mask:
16361 case X86::BI__builtin_ia32_cmpq128_mask:
16362 case X86::BI__builtin_ia32_cmpq256_mask:
16363 case X86::BI__builtin_ia32_cmpq512_mask: {
16364 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16365 return EmitX86MaskedCompare(*this, CC, true, Ops);
16366 }
16367 case X86::BI__builtin_ia32_ucmpb128_mask:
16368 case X86::BI__builtin_ia32_ucmpb256_mask:
16369 case X86::BI__builtin_ia32_ucmpb512_mask:
16370 case X86::BI__builtin_ia32_ucmpw128_mask:
16371 case X86::BI__builtin_ia32_ucmpw256_mask:
16372 case X86::BI__builtin_ia32_ucmpw512_mask:
16373 case X86::BI__builtin_ia32_ucmpd128_mask:
16374 case X86::BI__builtin_ia32_ucmpd256_mask:
16375 case X86::BI__builtin_ia32_ucmpd512_mask:
16376 case X86::BI__builtin_ia32_ucmpq128_mask:
16377 case X86::BI__builtin_ia32_ucmpq256_mask:
16378 case X86::BI__builtin_ia32_ucmpq512_mask: {
16379 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
16380 return EmitX86MaskedCompare(*this, CC, false, Ops);
16381 }
16382 case X86::BI__builtin_ia32_vpcomb:
16383 case X86::BI__builtin_ia32_vpcomw:
16384 case X86::BI__builtin_ia32_vpcomd:
16385 case X86::BI__builtin_ia32_vpcomq:
16386 return EmitX86vpcom(*this, Ops, true);
16387 case X86::BI__builtin_ia32_vpcomub:
16388 case X86::BI__builtin_ia32_vpcomuw:
16389 case X86::BI__builtin_ia32_vpcomud:
16390 case X86::BI__builtin_ia32_vpcomuq:
16391 return EmitX86vpcom(*this, Ops, false);
16392
16393 case X86::BI__builtin_ia32_kortestcqi:
16394 case X86::BI__builtin_ia32_kortestchi:
16395 case X86::BI__builtin_ia32_kortestcsi:
16396 case X86::BI__builtin_ia32_kortestcdi: {
16397 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16398 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
16399 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16400 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16401 }
16402 case X86::BI__builtin_ia32_kortestzqi:
16403 case X86::BI__builtin_ia32_kortestzhi:
16404 case X86::BI__builtin_ia32_kortestzsi:
16405 case X86::BI__builtin_ia32_kortestzdi: {
16406 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
16407 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
16408 Value *Cmp = Builder.CreateICmpEQ(Or, C);
16409 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
16410 }
16411
16412 case X86::BI__builtin_ia32_ktestcqi:
16413 case X86::BI__builtin_ia32_ktestzqi:
16414 case X86::BI__builtin_ia32_ktestchi:
16415 case X86::BI__builtin_ia32_ktestzhi:
16416 case X86::BI__builtin_ia32_ktestcsi:
16417 case X86::BI__builtin_ia32_ktestzsi:
16418 case X86::BI__builtin_ia32_ktestcdi:
16419 case X86::BI__builtin_ia32_ktestzdi: {
16420 Intrinsic::ID IID;
16421 switch (BuiltinID) {
16422 default: llvm_unreachable("Unsupported intrinsic!");
16423 case X86::BI__builtin_ia32_ktestcqi:
16424 IID = Intrinsic::x86_avx512_ktestc_b;
16425 break;
16426 case X86::BI__builtin_ia32_ktestzqi:
16427 IID = Intrinsic::x86_avx512_ktestz_b;
16428 break;
16429 case X86::BI__builtin_ia32_ktestchi:
16430 IID = Intrinsic::x86_avx512_ktestc_w;
16431 break;
16432 case X86::BI__builtin_ia32_ktestzhi:
16433 IID = Intrinsic::x86_avx512_ktestz_w;
16434 break;
16435 case X86::BI__builtin_ia32_ktestcsi:
16436 IID = Intrinsic::x86_avx512_ktestc_d;
16437 break;
16438 case X86::BI__builtin_ia32_ktestzsi:
16439 IID = Intrinsic::x86_avx512_ktestz_d;
16440 break;
16441 case X86::BI__builtin_ia32_ktestcdi:
16442 IID = Intrinsic::x86_avx512_ktestc_q;
16443 break;
16444 case X86::BI__builtin_ia32_ktestzdi:
16445 IID = Intrinsic::x86_avx512_ktestz_q;
16446 break;
16447 }
16448
16449 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16450 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16451 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16452 Function *Intr = CGM.getIntrinsic(IID);
16453 return Builder.CreateCall(Intr, {LHS, RHS});
16454 }
16455
16456 case X86::BI__builtin_ia32_kaddqi:
16457 case X86::BI__builtin_ia32_kaddhi:
16458 case X86::BI__builtin_ia32_kaddsi:
16459 case X86::BI__builtin_ia32_kadddi: {
16460 Intrinsic::ID IID;
16461 switch (BuiltinID) {
16462 default: llvm_unreachable("Unsupported intrinsic!");
16463 case X86::BI__builtin_ia32_kaddqi:
16464 IID = Intrinsic::x86_avx512_kadd_b;
16465 break;
16466 case X86::BI__builtin_ia32_kaddhi:
16467 IID = Intrinsic::x86_avx512_kadd_w;
16468 break;
16469 case X86::BI__builtin_ia32_kaddsi:
16470 IID = Intrinsic::x86_avx512_kadd_d;
16471 break;
16472 case X86::BI__builtin_ia32_kadddi:
16473 IID = Intrinsic::x86_avx512_kadd_q;
16474 break;
16475 }
16476
16477 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16478 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16479 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16480 Function *Intr = CGM.getIntrinsic(IID);
16481 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
16482 return Builder.CreateBitCast(Res, Ops[0]->getType());
16483 }
16484 case X86::BI__builtin_ia32_kandqi:
16485 case X86::BI__builtin_ia32_kandhi:
16486 case X86::BI__builtin_ia32_kandsi:
16487 case X86::BI__builtin_ia32_kanddi:
16488 return EmitX86MaskLogic(*this, Instruction::And, Ops);
16489 case X86::BI__builtin_ia32_kandnqi:
16490 case X86::BI__builtin_ia32_kandnhi:
16491 case X86::BI__builtin_ia32_kandnsi:
16492 case X86::BI__builtin_ia32_kandndi:
16493 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
16494 case X86::BI__builtin_ia32_korqi:
16495 case X86::BI__builtin_ia32_korhi:
16496 case X86::BI__builtin_ia32_korsi:
16497 case X86::BI__builtin_ia32_kordi:
16498 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
16499 case X86::BI__builtin_ia32_kxnorqi:
16500 case X86::BI__builtin_ia32_kxnorhi:
16501 case X86::BI__builtin_ia32_kxnorsi:
16502 case X86::BI__builtin_ia32_kxnordi:
16503 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
16504 case X86::BI__builtin_ia32_kxorqi:
16505 case X86::BI__builtin_ia32_kxorhi:
16506 case X86::BI__builtin_ia32_kxorsi:
16507 case X86::BI__builtin_ia32_kxordi:
16508 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
16509 case X86::BI__builtin_ia32_knotqi:
16510 case X86::BI__builtin_ia32_knothi:
16511 case X86::BI__builtin_ia32_knotsi:
16512 case X86::BI__builtin_ia32_knotdi: {
16513 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16514 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16515 return Builder.CreateBitCast(Builder.CreateNot(Res),
16516 Ops[0]->getType());
16517 }
16518 case X86::BI__builtin_ia32_kmovb:
16519 case X86::BI__builtin_ia32_kmovw:
16520 case X86::BI__builtin_ia32_kmovd:
16521 case X86::BI__builtin_ia32_kmovq: {
16522 // Bitcast to vXi1 type and then back to integer. This gets the mask
16523 // register type into the IR, but might be optimized out depending on
16524 // what's around it.
16525 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16526 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
16527 return Builder.CreateBitCast(Res, Ops[0]->getType());
16528 }
16529
16530 case X86::BI__builtin_ia32_kunpckdi:
16531 case X86::BI__builtin_ia32_kunpcksi:
16532 case X86::BI__builtin_ia32_kunpckhi: {
16533 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
16534 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
16535 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
16536 int Indices[64];
16537 for (unsigned i = 0; i != NumElts; ++i)
16538 Indices[i] = i;
16539
16540 // First extract half of each vector. This gives better codegen than
16541 // doing it in a single shuffle.
16542 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
16543 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16544 // Concat the vectors.
16545 // NOTE: Operands are swapped to match the intrinsic definition.
16546 Value *Res =
16547 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16548 return Builder.CreateBitCast(Res, Ops[0]->getType());
16549 }
16550
16551 case X86::BI__builtin_ia32_vplzcntd_128:
16552 case X86::BI__builtin_ia32_vplzcntd_256:
16553 case X86::BI__builtin_ia32_vplzcntd_512:
16554 case X86::BI__builtin_ia32_vplzcntq_128:
16555 case X86::BI__builtin_ia32_vplzcntq_256:
16556 case X86::BI__builtin_ia32_vplzcntq_512: {
16557 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16558 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16559 }
16560 case X86::BI__builtin_ia32_sqrtss:
16561 case X86::BI__builtin_ia32_sqrtsd: {
16562 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16563 Function *F;
16564 if (Builder.getIsFPConstrained()) {
16565 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16566 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16567 A->getType());
16568 A = Builder.CreateConstrainedFPCall(F, {A});
16569 } else {
16570 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16571 A = Builder.CreateCall(F, {A});
16572 }
16573 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16574 }
16575 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16576 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16577 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16578 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16579 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16580 // otherwise keep the intrinsic.
16581 if (CC != 4) {
16582 Intrinsic::ID IID;
16583
16584 switch (BuiltinID) {
16585 default:
16586 llvm_unreachable("Unsupported intrinsic!");
16587 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16588 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16589 break;
16590 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16591 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16592 break;
16593 case X86::BI__builtin_ia32_sqrtss_round_mask:
16594 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16595 break;
16596 }
16597 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16598 }
16599 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16600 Function *F;
16601 if (Builder.getIsFPConstrained()) {
16602 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16603 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16604 A->getType());
16605 A = Builder.CreateConstrainedFPCall(F, A);
16606 } else {
16607 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16608 A = Builder.CreateCall(F, A);
16609 }
16610 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16611 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16612 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16613 }
16614 case X86::BI__builtin_ia32_sqrtpd256:
16615 case X86::BI__builtin_ia32_sqrtpd:
16616 case X86::BI__builtin_ia32_sqrtps256:
16617 case X86::BI__builtin_ia32_sqrtps:
16618 case X86::BI__builtin_ia32_sqrtph256:
16619 case X86::BI__builtin_ia32_sqrtph:
16620 case X86::BI__builtin_ia32_sqrtph512:
16621 case X86::BI__builtin_ia32_vsqrtnepbf16256:
16622 case X86::BI__builtin_ia32_vsqrtnepbf16:
16623 case X86::BI__builtin_ia32_vsqrtnepbf16512:
16624 case X86::BI__builtin_ia32_sqrtps512:
16625 case X86::BI__builtin_ia32_sqrtpd512: {
16626 if (Ops.size() == 2) {
16627 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16628 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16629 // otherwise keep the intrinsic.
16630 if (CC != 4) {
16631 Intrinsic::ID IID;
16632
16633 switch (BuiltinID) {
16634 default:
16635 llvm_unreachable("Unsupported intrinsic!");
16636 case X86::BI__builtin_ia32_sqrtph512:
16637 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16638 break;
16639 case X86::BI__builtin_ia32_sqrtps512:
16640 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16641 break;
16642 case X86::BI__builtin_ia32_sqrtpd512:
16643 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16644 break;
16645 }
16646 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16647 }
16648 }
16649 if (Builder.getIsFPConstrained()) {
16650 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16651 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16652 Ops[0]->getType());
16653 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16654 } else {
16655 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16656 return Builder.CreateCall(F, Ops[0]);
16657 }
16658 }
16659
16660 case X86::BI__builtin_ia32_pmuludq128:
16661 case X86::BI__builtin_ia32_pmuludq256:
16662 case X86::BI__builtin_ia32_pmuludq512:
16663 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16664
16665 case X86::BI__builtin_ia32_pmuldq128:
16666 case X86::BI__builtin_ia32_pmuldq256:
16667 case X86::BI__builtin_ia32_pmuldq512:
16668 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16669
16670 case X86::BI__builtin_ia32_pternlogd512_mask:
16671 case X86::BI__builtin_ia32_pternlogq512_mask:
16672 case X86::BI__builtin_ia32_pternlogd128_mask:
16673 case X86::BI__builtin_ia32_pternlogd256_mask:
16674 case X86::BI__builtin_ia32_pternlogq128_mask:
16675 case X86::BI__builtin_ia32_pternlogq256_mask:
16676 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16677
16678 case X86::BI__builtin_ia32_pternlogd512_maskz:
16679 case X86::BI__builtin_ia32_pternlogq512_maskz:
16680 case X86::BI__builtin_ia32_pternlogd128_maskz:
16681 case X86::BI__builtin_ia32_pternlogd256_maskz:
16682 case X86::BI__builtin_ia32_pternlogq128_maskz:
16683 case X86::BI__builtin_ia32_pternlogq256_maskz:
16684 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16685
16686 case X86::BI__builtin_ia32_vpshldd128:
16687 case X86::BI__builtin_ia32_vpshldd256:
16688 case X86::BI__builtin_ia32_vpshldd512:
16689 case X86::BI__builtin_ia32_vpshldq128:
16690 case X86::BI__builtin_ia32_vpshldq256:
16691 case X86::BI__builtin_ia32_vpshldq512:
16692 case X86::BI__builtin_ia32_vpshldw128:
16693 case X86::BI__builtin_ia32_vpshldw256:
16694 case X86::BI__builtin_ia32_vpshldw512:
16695 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16696
16697 case X86::BI__builtin_ia32_vpshrdd128:
16698 case X86::BI__builtin_ia32_vpshrdd256:
16699 case X86::BI__builtin_ia32_vpshrdd512:
16700 case X86::BI__builtin_ia32_vpshrdq128:
16701 case X86::BI__builtin_ia32_vpshrdq256:
16702 case X86::BI__builtin_ia32_vpshrdq512:
16703 case X86::BI__builtin_ia32_vpshrdw128:
16704 case X86::BI__builtin_ia32_vpshrdw256:
16705 case X86::BI__builtin_ia32_vpshrdw512:
16706 // Ops 0 and 1 are swapped.
16707 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16708
16709 case X86::BI__builtin_ia32_vpshldvd128:
16710 case X86::BI__builtin_ia32_vpshldvd256:
16711 case X86::BI__builtin_ia32_vpshldvd512:
16712 case X86::BI__builtin_ia32_vpshldvq128:
16713 case X86::BI__builtin_ia32_vpshldvq256:
16714 case X86::BI__builtin_ia32_vpshldvq512:
16715 case X86::BI__builtin_ia32_vpshldvw128:
16716 case X86::BI__builtin_ia32_vpshldvw256:
16717 case X86::BI__builtin_ia32_vpshldvw512:
16718 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16719
16720 case X86::BI__builtin_ia32_vpshrdvd128:
16721 case X86::BI__builtin_ia32_vpshrdvd256:
16722 case X86::BI__builtin_ia32_vpshrdvd512:
16723 case X86::BI__builtin_ia32_vpshrdvq128:
16724 case X86::BI__builtin_ia32_vpshrdvq256:
16725 case X86::BI__builtin_ia32_vpshrdvq512:
16726 case X86::BI__builtin_ia32_vpshrdvw128:
16727 case X86::BI__builtin_ia32_vpshrdvw256:
16728 case X86::BI__builtin_ia32_vpshrdvw512:
16729 // Ops 0 and 1 are swapped.
16730 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16731
16732 // Reductions
16733 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16734 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16735 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16736 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16737 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16738 Function *F =
16739 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16740 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16741 Builder.getFastMathFlags().setAllowReassoc();
16742 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16743 }
16744 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16745 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16746 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16747 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16748 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16749 Function *F =
16750 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16751 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16752 Builder.getFastMathFlags().setAllowReassoc();
16753 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16754 }
16755 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16756 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16757 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16758 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16759 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16760 Function *F =
16761 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16762 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16763 Builder.getFastMathFlags().setNoNaNs();
16764 return Builder.CreateCall(F, {Ops[0]});
16765 }
16766 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16767 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16768 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16769 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16770 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16771 Function *F =
16772 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16773 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16774 Builder.getFastMathFlags().setNoNaNs();
16775 return Builder.CreateCall(F, {Ops[0]});
16776 }
16777
16778 case X86::BI__builtin_ia32_rdrand16_step:
16779 case X86::BI__builtin_ia32_rdrand32_step:
16780 case X86::BI__builtin_ia32_rdrand64_step:
16781 case X86::BI__builtin_ia32_rdseed16_step:
16782 case X86::BI__builtin_ia32_rdseed32_step:
16783 case X86::BI__builtin_ia32_rdseed64_step: {
16784 Intrinsic::ID ID;
16785 switch (BuiltinID) {
16786 default: llvm_unreachable("Unsupported intrinsic!");
16787 case X86::BI__builtin_ia32_rdrand16_step:
16788 ID = Intrinsic::x86_rdrand_16;
16789 break;
16790 case X86::BI__builtin_ia32_rdrand32_step:
16791 ID = Intrinsic::x86_rdrand_32;
16792 break;
16793 case X86::BI__builtin_ia32_rdrand64_step:
16794 ID = Intrinsic::x86_rdrand_64;
16795 break;
16796 case X86::BI__builtin_ia32_rdseed16_step:
16797 ID = Intrinsic::x86_rdseed_16;
16798 break;
16799 case X86::BI__builtin_ia32_rdseed32_step:
16800 ID = Intrinsic::x86_rdseed_32;
16801 break;
16802 case X86::BI__builtin_ia32_rdseed64_step:
16803 ID = Intrinsic::x86_rdseed_64;
16804 break;
16805 }
16806
16807 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16808 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16809 Ops[0]);
16810 return Builder.CreateExtractValue(Call, 1);
16811 }
16812 case X86::BI__builtin_ia32_addcarryx_u32:
16813 case X86::BI__builtin_ia32_addcarryx_u64:
16814 case X86::BI__builtin_ia32_subborrow_u32:
16815 case X86::BI__builtin_ia32_subborrow_u64: {
16816 Intrinsic::ID IID;
16817 switch (BuiltinID) {
16818 default: llvm_unreachable("Unsupported intrinsic!");
16819 case X86::BI__builtin_ia32_addcarryx_u32:
16820 IID = Intrinsic::x86_addcarry_32;
16821 break;
16822 case X86::BI__builtin_ia32_addcarryx_u64:
16823 IID = Intrinsic::x86_addcarry_64;
16824 break;
16825 case X86::BI__builtin_ia32_subborrow_u32:
16826 IID = Intrinsic::x86_subborrow_32;
16827 break;
16828 case X86::BI__builtin_ia32_subborrow_u64:
16829 IID = Intrinsic::x86_subborrow_64;
16830 break;
16831 }
16832
16833 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16834 { Ops[0], Ops[1], Ops[2] });
16835 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16836 Ops[3]);
16837 return Builder.CreateExtractValue(Call, 0);
16838 }
16839
16840 case X86::BI__builtin_ia32_fpclassps128_mask:
16841 case X86::BI__builtin_ia32_fpclassps256_mask:
16842 case X86::BI__builtin_ia32_fpclassps512_mask:
16843 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16844 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16845 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16846 case X86::BI__builtin_ia32_fpclassph128_mask:
16847 case X86::BI__builtin_ia32_fpclassph256_mask:
16848 case X86::BI__builtin_ia32_fpclassph512_mask:
16849 case X86::BI__builtin_ia32_fpclasspd128_mask:
16850 case X86::BI__builtin_ia32_fpclasspd256_mask:
16851 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16852 unsigned NumElts =
16853 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16854 Value *MaskIn = Ops[2];
16855 Ops.erase(&Ops[2]);
16856
16857 Intrinsic::ID ID;
16858 switch (BuiltinID) {
16859 default: llvm_unreachable("Unsupported intrinsic!");
16860 case X86::BI__builtin_ia32_vfpclasspbf16128_mask:
16861 ID = Intrinsic::x86_avx10_fpclass_nepbf16_128;
16862 break;
16863 case X86::BI__builtin_ia32_vfpclasspbf16256_mask:
16864 ID = Intrinsic::x86_avx10_fpclass_nepbf16_256;
16865 break;
16866 case X86::BI__builtin_ia32_vfpclasspbf16512_mask:
16867 ID = Intrinsic::x86_avx10_fpclass_nepbf16_512;
16868 break;
16869 case X86::BI__builtin_ia32_fpclassph128_mask:
16870 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16871 break;
16872 case X86::BI__builtin_ia32_fpclassph256_mask:
16873 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16874 break;
16875 case X86::BI__builtin_ia32_fpclassph512_mask:
16876 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16877 break;
16878 case X86::BI__builtin_ia32_fpclassps128_mask:
16879 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16880 break;
16881 case X86::BI__builtin_ia32_fpclassps256_mask:
16882 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16883 break;
16884 case X86::BI__builtin_ia32_fpclassps512_mask:
16885 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16886 break;
16887 case X86::BI__builtin_ia32_fpclasspd128_mask:
16888 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16889 break;
16890 case X86::BI__builtin_ia32_fpclasspd256_mask:
16891 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16892 break;
16893 case X86::BI__builtin_ia32_fpclasspd512_mask:
16894 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16895 break;
16896 }
16897
16898 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16899 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16900 }
16901
16902 case X86::BI__builtin_ia32_vp2intersect_q_512:
16903 case X86::BI__builtin_ia32_vp2intersect_q_256:
16904 case X86::BI__builtin_ia32_vp2intersect_q_128:
16905 case X86::BI__builtin_ia32_vp2intersect_d_512:
16906 case X86::BI__builtin_ia32_vp2intersect_d_256:
16907 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16908 unsigned NumElts =
16909 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16910 Intrinsic::ID ID;
16911
16912 switch (BuiltinID) {
16913 default: llvm_unreachable("Unsupported intrinsic!");
16914 case X86::BI__builtin_ia32_vp2intersect_q_512:
16915 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16916 break;
16917 case X86::BI__builtin_ia32_vp2intersect_q_256:
16918 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16919 break;
16920 case X86::BI__builtin_ia32_vp2intersect_q_128:
16921 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16922 break;
16923 case X86::BI__builtin_ia32_vp2intersect_d_512:
16924 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16925 break;
16926 case X86::BI__builtin_ia32_vp2intersect_d_256:
16927 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16928 break;
16929 case X86::BI__builtin_ia32_vp2intersect_d_128:
16930 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16931 break;
16932 }
16933
16934 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16935 Value *Result = Builder.CreateExtractValue(Call, 0);
16936 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16938
16939 Result = Builder.CreateExtractValue(Call, 1);
16940 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16942 }
16943
16944 case X86::BI__builtin_ia32_vpmultishiftqb128:
16945 case X86::BI__builtin_ia32_vpmultishiftqb256:
16946 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16947 Intrinsic::ID ID;
16948 switch (BuiltinID) {
16949 default: llvm_unreachable("Unsupported intrinsic!");
16950 case X86::BI__builtin_ia32_vpmultishiftqb128:
16951 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16952 break;
16953 case X86::BI__builtin_ia32_vpmultishiftqb256:
16954 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16955 break;
16956 case X86::BI__builtin_ia32_vpmultishiftqb512:
16957 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16958 break;
16959 }
16960
16961 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16962 }
16963
16964 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16965 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16966 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16967 unsigned NumElts =
16968 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16969 Value *MaskIn = Ops[2];
16970 Ops.erase(&Ops[2]);
16971
16972 Intrinsic::ID ID;
16973 switch (BuiltinID) {
16974 default: llvm_unreachable("Unsupported intrinsic!");
16975 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16976 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16977 break;
16978 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16979 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16980 break;
16981 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16982 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16983 break;
16984 }
16985
16986 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16987 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16988 }
16989
16990 // packed comparison intrinsics
16991 case X86::BI__builtin_ia32_cmpeqps:
16992 case X86::BI__builtin_ia32_cmpeqpd:
16993 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16994 case X86::BI__builtin_ia32_cmpltps:
16995 case X86::BI__builtin_ia32_cmpltpd:
16996 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16997 case X86::BI__builtin_ia32_cmpleps:
16998 case X86::BI__builtin_ia32_cmplepd:
16999 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
17000 case X86::BI__builtin_ia32_cmpunordps:
17001 case X86::BI__builtin_ia32_cmpunordpd:
17002 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
17003 case X86::BI__builtin_ia32_cmpneqps:
17004 case X86::BI__builtin_ia32_cmpneqpd:
17005 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
17006 case X86::BI__builtin_ia32_cmpnltps:
17007 case X86::BI__builtin_ia32_cmpnltpd:
17008 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
17009 case X86::BI__builtin_ia32_cmpnleps:
17010 case X86::BI__builtin_ia32_cmpnlepd:
17011 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
17012 case X86::BI__builtin_ia32_cmpordps:
17013 case X86::BI__builtin_ia32_cmpordpd:
17014 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
17015 case X86::BI__builtin_ia32_cmpph128_mask:
17016 case X86::BI__builtin_ia32_cmpph256_mask:
17017 case X86::BI__builtin_ia32_cmpph512_mask:
17018 case X86::BI__builtin_ia32_cmpps128_mask:
17019 case X86::BI__builtin_ia32_cmpps256_mask:
17020 case X86::BI__builtin_ia32_cmpps512_mask:
17021 case X86::BI__builtin_ia32_cmppd128_mask:
17022 case X86::BI__builtin_ia32_cmppd256_mask:
17023 case X86::BI__builtin_ia32_cmppd512_mask:
17024 case X86::BI__builtin_ia32_vcmppd256_round_mask:
17025 case X86::BI__builtin_ia32_vcmpps256_round_mask:
17026 case X86::BI__builtin_ia32_vcmpph256_round_mask:
17027 case X86::BI__builtin_ia32_vcmppbf16512_mask:
17028 case X86::BI__builtin_ia32_vcmppbf16256_mask:
17029 case X86::BI__builtin_ia32_vcmppbf16128_mask:
17030 IsMaskFCmp = true;
17031 [[fallthrough]];
17032 case X86::BI__builtin_ia32_cmpps:
17033 case X86::BI__builtin_ia32_cmpps256:
17034 case X86::BI__builtin_ia32_cmppd:
17035 case X86::BI__builtin_ia32_cmppd256: {
17036 // Lowering vector comparisons to fcmp instructions, while
17037 // ignoring signalling behaviour requested
17038 // ignoring rounding mode requested
17039 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
17040
17041 // The third argument is the comparison condition, and integer in the
17042 // range [0, 31]
17043 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
17044
17045 // Lowering to IR fcmp instruction.
17046 // Ignoring requested signaling behaviour,
17047 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
17048 FCmpInst::Predicate Pred;
17049 bool IsSignaling;
17050 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
17051 // behavior is inverted. We'll handle that after the switch.
17052 switch (CC & 0xf) {
17053 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
17054 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
17055 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
17056 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
17057 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
17058 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
17059 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
17060 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
17061 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
17062 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
17063 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
17064 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
17065 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
17066 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
17067 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
17068 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
17069 default: llvm_unreachable("Unhandled CC");
17070 }
17071
17072 // Invert the signalling behavior for 16-31.
17073 if (CC & 0x10)
17074 IsSignaling = !IsSignaling;
17075
17076 // If the predicate is true or false and we're using constrained intrinsics,
17077 // we don't have a compare intrinsic we can use. Just use the legacy X86
17078 // specific intrinsic.
17079 // If the intrinsic is mask enabled and we're using constrained intrinsics,
17080 // use the legacy X86 specific intrinsic.
17081 if (Builder.getIsFPConstrained() &&
17082 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
17083 IsMaskFCmp)) {
17084
17085 Intrinsic::ID IID;
17086 switch (BuiltinID) {
17087 default: llvm_unreachable("Unexpected builtin");
17088 case X86::BI__builtin_ia32_cmpps:
17089 IID = Intrinsic::x86_sse_cmp_ps;
17090 break;
17091 case X86::BI__builtin_ia32_cmpps256:
17092 IID = Intrinsic::x86_avx_cmp_ps_256;
17093 break;
17094 case X86::BI__builtin_ia32_cmppd:
17095 IID = Intrinsic::x86_sse2_cmp_pd;
17096 break;
17097 case X86::BI__builtin_ia32_cmppd256:
17098 IID = Intrinsic::x86_avx_cmp_pd_256;
17099 break;
17100 case X86::BI__builtin_ia32_cmpph128_mask:
17101 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
17102 break;
17103 case X86::BI__builtin_ia32_cmpph256_mask:
17104 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
17105 break;
17106 case X86::BI__builtin_ia32_cmpph512_mask:
17107 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
17108 break;
17109 case X86::BI__builtin_ia32_cmpps512_mask:
17110 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
17111 break;
17112 case X86::BI__builtin_ia32_cmppd512_mask:
17113 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
17114 break;
17115 case X86::BI__builtin_ia32_cmpps128_mask:
17116 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
17117 break;
17118 case X86::BI__builtin_ia32_cmpps256_mask:
17119 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
17120 break;
17121 case X86::BI__builtin_ia32_cmppd128_mask:
17122 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
17123 break;
17124 case X86::BI__builtin_ia32_cmppd256_mask:
17125 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
17126 break;
17127 }
17128
17129 Function *Intr = CGM.getIntrinsic(IID);
17130 if (IsMaskFCmp) {
17131 unsigned NumElts =
17132 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17133 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
17134 Value *Cmp = Builder.CreateCall(Intr, Ops);
17135 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
17136 }
17137
17138 return Builder.CreateCall(Intr, Ops);
17139 }
17140
17141 // Builtins without the _mask suffix return a vector of integers
17142 // of the same width as the input vectors
17143 if (IsMaskFCmp) {
17144 // We ignore SAE if strict FP is disabled. We only keep precise
17145 // exception behavior under strict FP.
17146 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
17147 // object will be required.
17148 unsigned NumElts =
17149 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
17150 Value *Cmp;
17151 if (IsSignaling)
17152 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
17153 else
17154 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
17155 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
17156 }
17157
17158 return getVectorFCmpIR(Pred, IsSignaling);
17159 }
17160
17161 // SSE scalar comparison intrinsics
17162 case X86::BI__builtin_ia32_cmpeqss:
17163 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
17164 case X86::BI__builtin_ia32_cmpltss:
17165 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
17166 case X86::BI__builtin_ia32_cmpless:
17167 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
17168 case X86::BI__builtin_ia32_cmpunordss:
17169 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
17170 case X86::BI__builtin_ia32_cmpneqss:
17171 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
17172 case X86::BI__builtin_ia32_cmpnltss:
17173 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
17174 case X86::BI__builtin_ia32_cmpnless:
17175 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
17176 case X86::BI__builtin_ia32_cmpordss:
17177 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
17178 case X86::BI__builtin_ia32_cmpeqsd:
17179 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
17180 case X86::BI__builtin_ia32_cmpltsd:
17181 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
17182 case X86::BI__builtin_ia32_cmplesd:
17183 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
17184 case X86::BI__builtin_ia32_cmpunordsd:
17185 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
17186 case X86::BI__builtin_ia32_cmpneqsd:
17187 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
17188 case X86::BI__builtin_ia32_cmpnltsd:
17189 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
17190 case X86::BI__builtin_ia32_cmpnlesd:
17191 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
17192 case X86::BI__builtin_ia32_cmpordsd:
17193 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
17194
17195 // f16c half2float intrinsics
17196 case X86::BI__builtin_ia32_vcvtph2ps:
17197 case X86::BI__builtin_ia32_vcvtph2ps256:
17198 case X86::BI__builtin_ia32_vcvtph2ps_mask:
17199 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
17200 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
17201 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
17202 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
17203 }
17204
17205 // AVX512 bf16 intrinsics
17206 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
17207 Ops[2] = getMaskVecValue(
17208 *this, Ops[2],
17209 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
17210 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
17211 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17212 }
17213 case X86::BI__builtin_ia32_cvtsbf162ss_32:
17214 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
17215
17216 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17217 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
17218 Intrinsic::ID IID;
17219 switch (BuiltinID) {
17220 default: llvm_unreachable("Unsupported intrinsic!");
17221 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
17222 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
17223 break;
17224 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
17225 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
17226 break;
17227 }
17228 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
17229 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
17230 }
17231
17232 case X86::BI__cpuid:
17233 case X86::BI__cpuidex: {
17234 Value *FuncId = EmitScalarExpr(E->getArg(1));
17235 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
17236 ? EmitScalarExpr(E->getArg(2))
17237 : llvm::ConstantInt::get(Int32Ty, 0);
17238
17239 llvm::StructType *CpuidRetTy =
17240 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
17241 llvm::FunctionType *FTy =
17242 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
17243
17244 StringRef Asm, Constraints;
17245 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
17246 Asm = "cpuid";
17247 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
17248 } else {
17249 // x86-64 uses %rbx as the base register, so preserve it.
17250 Asm = "xchgq %rbx, ${1:q}\n"
17251 "cpuid\n"
17252 "xchgq %rbx, ${1:q}";
17253 Constraints = "={ax},=r,={cx},={dx},0,2";
17254 }
17255
17256 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
17257 /*hasSideEffects=*/false);
17258 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
17259 Value *BasePtr = EmitScalarExpr(E->getArg(0));
17260 Value *Store = nullptr;
17261 for (unsigned i = 0; i < 4; i++) {
17262 Value *Extracted = Builder.CreateExtractValue(IACall, i);
17263 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
17264 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
17265 }
17266
17267 // Return the last store instruction to signal that we have emitted the
17268 // the intrinsic.
17269 return Store;
17270 }
17271
17272 case X86::BI__emul:
17273 case X86::BI__emulu: {
17274 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
17275 bool isSigned = (BuiltinID == X86::BI__emul);
17276 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
17277 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
17278 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
17279 }
17280 case X86::BI__mulh:
17281 case X86::BI__umulh:
17282 case X86::BI_mul128:
17283 case X86::BI_umul128: {
17284 llvm::Type *ResType = ConvertType(E->getType());
17285 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17286
17287 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
17288 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
17289 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
17290
17291 Value *MulResult, *HigherBits;
17292 if (IsSigned) {
17293 MulResult = Builder.CreateNSWMul(LHS, RHS);
17294 HigherBits = Builder.CreateAShr(MulResult, 64);
17295 } else {
17296 MulResult = Builder.CreateNUWMul(LHS, RHS);
17297 HigherBits = Builder.CreateLShr(MulResult, 64);
17298 }
17299 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
17300
17301 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
17302 return HigherBits;
17303
17304 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
17305 Builder.CreateStore(HigherBits, HighBitsAddress);
17306 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
17307 }
17308
17309 case X86::BI__faststorefence: {
17310 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17311 llvm::SyncScope::System);
17312 }
17313 case X86::BI__shiftleft128:
17314 case X86::BI__shiftright128: {
17315 llvm::Function *F = CGM.getIntrinsic(
17316 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
17317 Int64Ty);
17318 // Flip low/high ops and zero-extend amount to matching type.
17319 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
17320 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
17321 std::swap(Ops[0], Ops[1]);
17322 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
17323 return Builder.CreateCall(F, Ops);
17324 }
17325 case X86::BI_ReadWriteBarrier:
17326 case X86::BI_ReadBarrier:
17327 case X86::BI_WriteBarrier: {
17328 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
17329 llvm::SyncScope::SingleThread);
17330 }
17331
17332 case X86::BI_AddressOfReturnAddress: {
17333 Function *F =
17334 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
17335 return Builder.CreateCall(F);
17336 }
17337 case X86::BI__stosb: {
17338 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
17339 // instruction, but it will create a memset that won't be optimized away.
17340 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
17341 }
17342 // Corresponding to intrisics which will return 2 tiles (tile0_tile1).
17343 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17344 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17345 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17346 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17347 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17348 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17349 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17350 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal: {
17351 Intrinsic::ID IID;
17352 switch (BuiltinID) {
17353 default:
17354 llvm_unreachable("Unsupported intrinsic!");
17355 case X86::BI__builtin_ia32_t2rpntlvwz0_internal:
17356 IID = Intrinsic::x86_t2rpntlvwz0_internal;
17357 break;
17358 case X86::BI__builtin_ia32_t2rpntlvwz0rs_internal:
17359 IID = Intrinsic::x86_t2rpntlvwz0rs_internal;
17360 break;
17361 case X86::BI__builtin_ia32_t2rpntlvwz0t1_internal:
17362 IID = Intrinsic::x86_t2rpntlvwz0t1_internal;
17363 break;
17364 case X86::BI__builtin_ia32_t2rpntlvwz0rst1_internal:
17365 IID = Intrinsic::x86_t2rpntlvwz0rst1_internal;
17366 break;
17367 case X86::BI__builtin_ia32_t2rpntlvwz1_internal:
17368 IID = Intrinsic::x86_t2rpntlvwz1_internal;
17369 break;
17370 case X86::BI__builtin_ia32_t2rpntlvwz1rs_internal:
17371 IID = Intrinsic::x86_t2rpntlvwz1rs_internal;
17372 break;
17373 case X86::BI__builtin_ia32_t2rpntlvwz1t1_internal:
17374 IID = Intrinsic::x86_t2rpntlvwz1t1_internal;
17375 break;
17376 case X86::BI__builtin_ia32_t2rpntlvwz1rst1_internal:
17377 IID = Intrinsic::x86_t2rpntlvwz1rst1_internal;
17378 break;
17379 }
17380
17381 // Ops = (Row0, Col0, Col1, DstPtr0, DstPtr1, SrcPtr, Stride)
17382 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
17383 {Ops[0], Ops[1], Ops[2], Ops[5], Ops[6]});
17384
17385 auto *PtrTy = E->getArg(3)->getType()->getAs<PointerType>();
17386 assert(PtrTy && "arg3 must be of pointer type");
17387 QualType PtreeTy = PtrTy->getPointeeType();
17388 llvm::Type *TyPtee = ConvertType(PtreeTy);
17389
17390 // Bitcast amx type (x86_amx) to vector type (256 x i32)
17391 // Then store tile0 into DstPtr0
17392 Value *T0 = Builder.CreateExtractValue(Call, 0);
17393 Value *VecT0 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17394 {TyPtee}, {T0});
17395 Builder.CreateDefaultAlignedStore(VecT0, Ops[3]);
17396
17397 // Then store tile1 into DstPtr1
17398 Value *T1 = Builder.CreateExtractValue(Call, 1);
17399 Value *VecT1 = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
17400 {TyPtee}, {T1});
17401 Value *Store = Builder.CreateDefaultAlignedStore(VecT1, Ops[4]);
17402
17403 // Note: Here we escape directly use x86_tilestored64_internal to store
17404 // the results due to it can't make sure the Mem written scope. This may
17405 // cause shapes reloads after first amx intrinsic, which current amx reg-
17406 // ister allocation has no ability to handle it.
17407
17408 return Store;
17409 }
17410 case X86::BI__ud2:
17411 // llvm.trap makes a ud2a instruction on x86.
17412 return EmitTrapCall(Intrinsic::trap);
17413 case X86::BI__int2c: {
17414 // This syscall signals a driver assertion failure in x86 NT kernels.
17415 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
17416 llvm::InlineAsm *IA =
17417 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
17418 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
17419 getLLVMContext(), llvm::AttributeList::FunctionIndex,
17420 llvm::Attribute::NoReturn);
17421 llvm::CallInst *CI = Builder.CreateCall(IA);
17422 CI->setAttributes(NoReturnAttr);
17423 return CI;
17424 }
17425 case X86::BI__readfsbyte:
17426 case X86::BI__readfsword:
17427 case X86::BI__readfsdword:
17428 case X86::BI__readfsqword: {
17429 llvm::Type *IntTy = ConvertType(E->getType());
17430 Value *Ptr = Builder.CreateIntToPtr(
17431 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
17432 LoadInst *Load = Builder.CreateAlignedLoad(
17433 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17434 Load->setVolatile(true);
17435 return Load;
17436 }
17437 case X86::BI__readgsbyte:
17438 case X86::BI__readgsword:
17439 case X86::BI__readgsdword:
17440 case X86::BI__readgsqword: {
17441 llvm::Type *IntTy = ConvertType(E->getType());
17442 Value *Ptr = Builder.CreateIntToPtr(
17443 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
17444 LoadInst *Load = Builder.CreateAlignedLoad(
17445 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
17446 Load->setVolatile(true);
17447 return Load;
17448 }
17449 case X86::BI__builtin_ia32_encodekey128_u32: {
17450 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
17451
17452 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
17453
17454 for (int i = 0; i < 3; ++i) {
17455 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17456 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
17457 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17458 }
17459
17460 return Builder.CreateExtractValue(Call, 0);
17461 }
17462 case X86::BI__builtin_ia32_encodekey256_u32: {
17463 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
17464
17465 Value *Call =
17466 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
17467
17468 for (int i = 0; i < 4; ++i) {
17469 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17470 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
17471 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
17472 }
17473
17474 return Builder.CreateExtractValue(Call, 0);
17475 }
17476 case X86::BI__builtin_ia32_aesenc128kl_u8:
17477 case X86::BI__builtin_ia32_aesdec128kl_u8:
17478 case X86::BI__builtin_ia32_aesenc256kl_u8:
17479 case X86::BI__builtin_ia32_aesdec256kl_u8: {
17480 Intrinsic::ID IID;
17481 StringRef BlockName;
17482 switch (BuiltinID) {
17483 default:
17484 llvm_unreachable("Unexpected builtin");
17485 case X86::BI__builtin_ia32_aesenc128kl_u8:
17486 IID = Intrinsic::x86_aesenc128kl;
17487 BlockName = "aesenc128kl";
17488 break;
17489 case X86::BI__builtin_ia32_aesdec128kl_u8:
17490 IID = Intrinsic::x86_aesdec128kl;
17491 BlockName = "aesdec128kl";
17492 break;
17493 case X86::BI__builtin_ia32_aesenc256kl_u8:
17494 IID = Intrinsic::x86_aesenc256kl;
17495 BlockName = "aesenc256kl";
17496 break;
17497 case X86::BI__builtin_ia32_aesdec256kl_u8:
17498 IID = Intrinsic::x86_aesdec256kl;
17499 BlockName = "aesdec256kl";
17500 break;
17501 }
17502
17503 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
17504
17505 BasicBlock *NoError =
17506 createBasicBlock(BlockName + "_no_error", this->CurFn);
17507 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17508 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17509
17510 Value *Ret = Builder.CreateExtractValue(Call, 0);
17511 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17512 Value *Out = Builder.CreateExtractValue(Call, 1);
17513 Builder.CreateCondBr(Succ, NoError, Error);
17514
17515 Builder.SetInsertPoint(NoError);
17517 Builder.CreateBr(End);
17518
17519 Builder.SetInsertPoint(Error);
17520 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17521 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
17522 Builder.CreateBr(End);
17523
17524 Builder.SetInsertPoint(End);
17525 return Builder.CreateExtractValue(Call, 0);
17526 }
17527 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17528 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17529 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17530 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
17531 Intrinsic::ID IID;
17532 StringRef BlockName;
17533 switch (BuiltinID) {
17534 case X86::BI__builtin_ia32_aesencwide128kl_u8:
17535 IID = Intrinsic::x86_aesencwide128kl;
17536 BlockName = "aesencwide128kl";
17537 break;
17538 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
17539 IID = Intrinsic::x86_aesdecwide128kl;
17540 BlockName = "aesdecwide128kl";
17541 break;
17542 case X86::BI__builtin_ia32_aesencwide256kl_u8:
17543 IID = Intrinsic::x86_aesencwide256kl;
17544 BlockName = "aesencwide256kl";
17545 break;
17546 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
17547 IID = Intrinsic::x86_aesdecwide256kl;
17548 BlockName = "aesdecwide256kl";
17549 break;
17550 }
17551
17552 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
17553 Value *InOps[9];
17554 InOps[0] = Ops[2];
17555 for (int i = 0; i != 8; ++i) {
17556 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
17557 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
17558 }
17559
17560 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
17561
17562 BasicBlock *NoError =
17563 createBasicBlock(BlockName + "_no_error", this->CurFn);
17564 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
17565 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
17566
17567 Value *Ret = Builder.CreateExtractValue(Call, 0);
17568 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
17569 Builder.CreateCondBr(Succ, NoError, Error);
17570
17571 Builder.SetInsertPoint(NoError);
17572 for (int i = 0; i != 8; ++i) {
17573 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
17574 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
17575 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
17576 }
17577 Builder.CreateBr(End);
17578
17579 Builder.SetInsertPoint(Error);
17580 for (int i = 0; i != 8; ++i) {
17581 Value *Out = Builder.CreateExtractValue(Call, i + 1);
17582 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
17583 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
17584 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
17585 }
17586 Builder.CreateBr(End);
17587
17588 Builder.SetInsertPoint(End);
17589 return Builder.CreateExtractValue(Call, 0);
17590 }
17591 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
17592 IsConjFMA = true;
17593 [[fallthrough]];
17594 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
17595 Intrinsic::ID IID = IsConjFMA
17596 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
17597 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
17598 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17599 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17600 }
17601 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
17602 IsConjFMA = true;
17603 LLVM_FALLTHROUGH;
17604 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
17605 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
17606 : Intrinsic::x86_avx10_mask_vfmaddcph256;
17607 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17608 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
17609 }
17610 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
17611 IsConjFMA = true;
17612 [[fallthrough]];
17613 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
17614 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17615 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17616 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17617 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
17618 return EmitX86Select(*this, And, Call, Ops[0]);
17619 }
17620 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
17621 IsConjFMA = true;
17622 [[fallthrough]];
17623 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
17624 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
17625 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
17626 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
17627 static constexpr int Mask[] = {0, 5, 6, 7};
17628 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
17629 }
17630 case X86::BI__builtin_ia32_prefetchi:
17631 return Builder.CreateCall(
17632 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17633 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17634 llvm::ConstantInt::get(Int32Ty, 0)});
17635 }
17636}
17637
17638Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17639 const CallExpr *E) {
17640 // Do not emit the builtin arguments in the arguments of a function call,
17641 // because the evaluation order of function arguments is not specified in C++.
17642 // This is important when testing to ensure the arguments are emitted in the
17643 // same order every time. Eg:
17644 // Instead of:
17645 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17646 // EmitScalarExpr(E->getArg(1)), "swdiv");
17647 // Use:
17648 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17649 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17650 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17651
17652 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17653
17654#include "llvm/TargetParser/PPCTargetParser.def"
17655 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17656 unsigned Mask, CmpInst::Predicate CompOp,
17657 unsigned OpValue) -> Value * {
17658 if (SupportMethod == BUILTIN_PPC_FALSE)
17659 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17660
17661 if (SupportMethod == BUILTIN_PPC_TRUE)
17662 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17663
17664 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17665
17666 llvm::Value *FieldValue = nullptr;
17667 if (SupportMethod == USE_SYS_CONF) {
17668 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17669 llvm::Constant *SysConf =
17670 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17671
17672 // Grab the appropriate field from _system_configuration.
17673 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17674 ConstantInt::get(Int32Ty, FieldIdx)};
17675
17676 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17677 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17679 } else if (SupportMethod == SYS_CALL) {
17680 llvm::FunctionType *FTy =
17681 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17682 llvm::FunctionCallee Func =
17683 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17684
17685 FieldValue =
17686 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17687 }
17688 assert(FieldValue &&
17689 "SupportMethod value is not defined in PPCTargetParser.def.");
17690
17691 if (Mask)
17692 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17693
17694 llvm::Type *ValueType = FieldValue->getType();
17695 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17696 assert(
17697 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17698 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17699
17700 return Builder.CreateICmp(
17701 CompOp, FieldValue,
17702 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17703 };
17704
17705 switch (BuiltinID) {
17706 default: return nullptr;
17707
17708 case Builtin::BI__builtin_cpu_is: {
17709 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17710 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17711 llvm::Triple Triple = getTarget().getTriple();
17712
17713 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17714 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17715
17716 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17717 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17718#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17719 AIXID) \
17720 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17721#include "llvm/TargetParser/PPCTargetParser.def"
17722 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17723 BUILTIN_PPC_UNSUPPORTED, 0}));
17724
17725 if (Triple.isOSAIX()) {
17726 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17727 "Invalid CPU name. Missed by SemaChecking?");
17728 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17729 ICmpInst::ICMP_EQ, AIXIDValue);
17730 }
17731
17732 assert(Triple.isOSLinux() &&
17733 "__builtin_cpu_is() is only supported for AIX and Linux.");
17734
17735 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17736 "Invalid CPU name. Missed by SemaChecking?");
17737
17738 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17739 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17740
17741 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17742 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17743 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17744 return Builder.CreateICmpEQ(TheCall,
17745 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17746 }
17747 case Builtin::BI__builtin_cpu_supports: {
17748 llvm::Triple Triple = getTarget().getTriple();
17749 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17750 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17751 if (Triple.isOSAIX()) {
17752 unsigned SupportMethod, FieldIdx, Mask, Value;
17753 CmpInst::Predicate CompOp;
17754 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17755 unsigned>
17756 CPUSupportType;
17757 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17758 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17759#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17760 VALUE) \
17761 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17762#include "llvm/TargetParser/PPCTargetParser.def"
17763 .Default({BUILTIN_PPC_FALSE, 0, 0,
17764 CmpInst::Predicate(), 0}));
17765 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17766 Value);
17767 }
17768
17769 assert(Triple.isOSLinux() &&
17770 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17771 unsigned FeatureWord;
17772 unsigned BitMask;
17773 std::tie(FeatureWord, BitMask) =
17774 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17775#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17776 .Case(Name, {FA_WORD, Bitmask})
17777#include "llvm/TargetParser/PPCTargetParser.def"
17778 .Default({0, 0});
17779 if (!BitMask)
17780 return Builder.getFalse();
17781 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17782 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17783 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17784 Value *Mask =
17785 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17786 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17787#undef PPC_FAWORD_HWCAP
17788#undef PPC_FAWORD_HWCAP2
17789#undef PPC_FAWORD_CPUID
17790 }
17791
17792 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17793 // call __builtin_readcyclecounter.
17794 case PPC::BI__builtin_ppc_get_timebase:
17795 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17796
17797 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17798 case PPC::BI__builtin_altivec_lvx:
17799 case PPC::BI__builtin_altivec_lvxl:
17800 case PPC::BI__builtin_altivec_lvebx:
17801 case PPC::BI__builtin_altivec_lvehx:
17802 case PPC::BI__builtin_altivec_lvewx:
17803 case PPC::BI__builtin_altivec_lvsl:
17804 case PPC::BI__builtin_altivec_lvsr:
17805 case PPC::BI__builtin_vsx_lxvd2x:
17806 case PPC::BI__builtin_vsx_lxvw4x:
17807 case PPC::BI__builtin_vsx_lxvd2x_be:
17808 case PPC::BI__builtin_vsx_lxvw4x_be:
17809 case PPC::BI__builtin_vsx_lxvl:
17810 case PPC::BI__builtin_vsx_lxvll:
17811 {
17813 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17814 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17815 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17816 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17817 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17818 Ops.pop_back();
17819 }
17820
17821 switch (BuiltinID) {
17822 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17823 case PPC::BI__builtin_altivec_lvx:
17824 ID = Intrinsic::ppc_altivec_lvx;
17825 break;
17826 case PPC::BI__builtin_altivec_lvxl:
17827 ID = Intrinsic::ppc_altivec_lvxl;
17828 break;
17829 case PPC::BI__builtin_altivec_lvebx:
17830 ID = Intrinsic::ppc_altivec_lvebx;
17831 break;
17832 case PPC::BI__builtin_altivec_lvehx:
17833 ID = Intrinsic::ppc_altivec_lvehx;
17834 break;
17835 case PPC::BI__builtin_altivec_lvewx:
17836 ID = Intrinsic::ppc_altivec_lvewx;
17837 break;
17838 case PPC::BI__builtin_altivec_lvsl:
17839 ID = Intrinsic::ppc_altivec_lvsl;
17840 break;
17841 case PPC::BI__builtin_altivec_lvsr:
17842 ID = Intrinsic::ppc_altivec_lvsr;
17843 break;
17844 case PPC::BI__builtin_vsx_lxvd2x:
17845 ID = Intrinsic::ppc_vsx_lxvd2x;
17846 break;
17847 case PPC::BI__builtin_vsx_lxvw4x:
17848 ID = Intrinsic::ppc_vsx_lxvw4x;
17849 break;
17850 case PPC::BI__builtin_vsx_lxvd2x_be:
17851 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17852 break;
17853 case PPC::BI__builtin_vsx_lxvw4x_be:
17854 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17855 break;
17856 case PPC::BI__builtin_vsx_lxvl:
17857 ID = Intrinsic::ppc_vsx_lxvl;
17858 break;
17859 case PPC::BI__builtin_vsx_lxvll:
17860 ID = Intrinsic::ppc_vsx_lxvll;
17861 break;
17862 }
17863 llvm::Function *F = CGM.getIntrinsic(ID);
17864 return Builder.CreateCall(F, Ops, "");
17865 }
17866
17867 // vec_st, vec_xst_be
17868 case PPC::BI__builtin_altivec_stvx:
17869 case PPC::BI__builtin_altivec_stvxl:
17870 case PPC::BI__builtin_altivec_stvebx:
17871 case PPC::BI__builtin_altivec_stvehx:
17872 case PPC::BI__builtin_altivec_stvewx:
17873 case PPC::BI__builtin_vsx_stxvd2x:
17874 case PPC::BI__builtin_vsx_stxvw4x:
17875 case PPC::BI__builtin_vsx_stxvd2x_be:
17876 case PPC::BI__builtin_vsx_stxvw4x_be:
17877 case PPC::BI__builtin_vsx_stxvl:
17878 case PPC::BI__builtin_vsx_stxvll:
17879 {
17881 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17882 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17883 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17884 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17885 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17886 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17887 Ops.pop_back();
17888 }
17889
17890 switch (BuiltinID) {
17891 default: llvm_unreachable("Unsupported st intrinsic!");
17892 case PPC::BI__builtin_altivec_stvx:
17893 ID = Intrinsic::ppc_altivec_stvx;
17894 break;
17895 case PPC::BI__builtin_altivec_stvxl:
17896 ID = Intrinsic::ppc_altivec_stvxl;
17897 break;
17898 case PPC::BI__builtin_altivec_stvebx:
17899 ID = Intrinsic::ppc_altivec_stvebx;
17900 break;
17901 case PPC::BI__builtin_altivec_stvehx:
17902 ID = Intrinsic::ppc_altivec_stvehx;
17903 break;
17904 case PPC::BI__builtin_altivec_stvewx:
17905 ID = Intrinsic::ppc_altivec_stvewx;
17906 break;
17907 case PPC::BI__builtin_vsx_stxvd2x:
17908 ID = Intrinsic::ppc_vsx_stxvd2x;
17909 break;
17910 case PPC::BI__builtin_vsx_stxvw4x:
17911 ID = Intrinsic::ppc_vsx_stxvw4x;
17912 break;
17913 case PPC::BI__builtin_vsx_stxvd2x_be:
17914 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17915 break;
17916 case PPC::BI__builtin_vsx_stxvw4x_be:
17917 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17918 break;
17919 case PPC::BI__builtin_vsx_stxvl:
17920 ID = Intrinsic::ppc_vsx_stxvl;
17921 break;
17922 case PPC::BI__builtin_vsx_stxvll:
17923 ID = Intrinsic::ppc_vsx_stxvll;
17924 break;
17925 }
17926 llvm::Function *F = CGM.getIntrinsic(ID);
17927 return Builder.CreateCall(F, Ops, "");
17928 }
17929 case PPC::BI__builtin_vsx_ldrmb: {
17930 // Essentially boils down to performing an unaligned VMX load sequence so
17931 // as to avoid crossing a page boundary and then shuffling the elements
17932 // into the right side of the vector register.
17933 Value *Op0 = EmitScalarExpr(E->getArg(0));
17934 Value *Op1 = EmitScalarExpr(E->getArg(1));
17935 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17936 llvm::Type *ResTy = ConvertType(E->getType());
17937 bool IsLE = getTarget().isLittleEndian();
17938
17939 // If the user wants the entire vector, just load the entire vector.
17940 if (NumBytes == 16) {
17941 Value *LD =
17943 if (!IsLE)
17944 return LD;
17945
17946 // Reverse the bytes on LE.
17947 SmallVector<int, 16> RevMask;
17948 for (int Idx = 0; Idx < 16; Idx++)
17949 RevMask.push_back(15 - Idx);
17950 return Builder.CreateShuffleVector(LD, LD, RevMask);
17951 }
17952
17953 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17954 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17955 : Intrinsic::ppc_altivec_lvsl);
17956 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17957 Value *HiMem = Builder.CreateGEP(
17958 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17959 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17960 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17961 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17962
17963 Op0 = IsLE ? HiLd : LoLd;
17964 Op1 = IsLE ? LoLd : HiLd;
17965 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17966 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17967
17968 if (IsLE) {
17969 SmallVector<int, 16> Consts;
17970 for (int Idx = 0; Idx < 16; Idx++) {
17971 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17972 : 16 - (NumBytes - Idx);
17973 Consts.push_back(Val);
17974 }
17975 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17976 Zero, Consts);
17977 }
17979 for (int Idx = 0; Idx < 16; Idx++)
17980 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17981 Value *Mask2 = ConstantVector::get(Consts);
17982 return Builder.CreateBitCast(
17983 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17984 }
17985 case PPC::BI__builtin_vsx_strmb: {
17986 Value *Op0 = EmitScalarExpr(E->getArg(0));
17987 Value *Op1 = EmitScalarExpr(E->getArg(1));
17988 Value *Op2 = EmitScalarExpr(E->getArg(2));
17989 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17990 bool IsLE = getTarget().isLittleEndian();
17991 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17992 // Storing the whole vector, simply store it on BE and reverse bytes and
17993 // store on LE.
17994 if (Width == 16) {
17995 Value *StVec = Op2;
17996 if (IsLE) {
17997 SmallVector<int, 16> RevMask;
17998 for (int Idx = 0; Idx < 16; Idx++)
17999 RevMask.push_back(15 - Idx);
18000 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
18001 }
18002 return Builder.CreateStore(
18003 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
18004 }
18005 auto *ConvTy = Int64Ty;
18006 unsigned NumElts = 0;
18007 switch (Width) {
18008 default:
18009 llvm_unreachable("width for stores must be a power of 2");
18010 case 8:
18011 ConvTy = Int64Ty;
18012 NumElts = 2;
18013 break;
18014 case 4:
18015 ConvTy = Int32Ty;
18016 NumElts = 4;
18017 break;
18018 case 2:
18019 ConvTy = Int16Ty;
18020 NumElts = 8;
18021 break;
18022 case 1:
18023 ConvTy = Int8Ty;
18024 NumElts = 16;
18025 break;
18026 }
18027 Value *Vec = Builder.CreateBitCast(
18028 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
18029 Value *Ptr =
18030 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
18031 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
18032 if (IsLE && Width > 1) {
18033 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
18034 Elt = Builder.CreateCall(F, Elt);
18035 }
18036 return Builder.CreateStore(
18037 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
18038 };
18039 unsigned Stored = 0;
18040 unsigned RemainingBytes = NumBytes;
18041 Value *Result;
18042 if (NumBytes == 16)
18043 return StoreSubVec(16, 0, 0);
18044 if (NumBytes >= 8) {
18045 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
18046 RemainingBytes -= 8;
18047 Stored += 8;
18048 }
18049 if (RemainingBytes >= 4) {
18050 Result = StoreSubVec(4, NumBytes - Stored - 4,
18051 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
18052 RemainingBytes -= 4;
18053 Stored += 4;
18054 }
18055 if (RemainingBytes >= 2) {
18056 Result = StoreSubVec(2, NumBytes - Stored - 2,
18057 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
18058 RemainingBytes -= 2;
18059 Stored += 2;
18060 }
18061 if (RemainingBytes)
18062 Result =
18063 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
18064 return Result;
18065 }
18066 // Square root
18067 case PPC::BI__builtin_vsx_xvsqrtsp:
18068 case PPC::BI__builtin_vsx_xvsqrtdp: {
18069 llvm::Type *ResultType = ConvertType(E->getType());
18070 Value *X = EmitScalarExpr(E->getArg(0));
18071 if (Builder.getIsFPConstrained()) {
18072 llvm::Function *F = CGM.getIntrinsic(
18073 Intrinsic::experimental_constrained_sqrt, ResultType);
18074 return Builder.CreateConstrainedFPCall(F, X);
18075 } else {
18076 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18077 return Builder.CreateCall(F, X);
18078 }
18079 }
18080 // Count leading zeros
18081 case PPC::BI__builtin_altivec_vclzb:
18082 case PPC::BI__builtin_altivec_vclzh:
18083 case PPC::BI__builtin_altivec_vclzw:
18084 case PPC::BI__builtin_altivec_vclzd: {
18085 llvm::Type *ResultType = ConvertType(E->getType());
18086 Value *X = EmitScalarExpr(E->getArg(0));
18087 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18088 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
18089 return Builder.CreateCall(F, {X, Undef});
18090 }
18091 case PPC::BI__builtin_altivec_vctzb:
18092 case PPC::BI__builtin_altivec_vctzh:
18093 case PPC::BI__builtin_altivec_vctzw:
18094 case PPC::BI__builtin_altivec_vctzd: {
18095 llvm::Type *ResultType = ConvertType(E->getType());
18096 Value *X = EmitScalarExpr(E->getArg(0));
18097 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
18098 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
18099 return Builder.CreateCall(F, {X, Undef});
18100 }
18101 case PPC::BI__builtin_altivec_vinsd:
18102 case PPC::BI__builtin_altivec_vinsw:
18103 case PPC::BI__builtin_altivec_vinsd_elt:
18104 case PPC::BI__builtin_altivec_vinsw_elt: {
18105 llvm::Type *ResultType = ConvertType(E->getType());
18106 Value *Op0 = EmitScalarExpr(E->getArg(0));
18107 Value *Op1 = EmitScalarExpr(E->getArg(1));
18108 Value *Op2 = EmitScalarExpr(E->getArg(2));
18109
18110 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18111 BuiltinID == PPC::BI__builtin_altivec_vinsd);
18112
18113 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
18114 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
18115
18116 // The third argument must be a compile time constant.
18117 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18118 assert(ArgCI &&
18119 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
18120
18121 // Valid value for the third argument is dependent on the input type and
18122 // builtin called.
18123 int ValidMaxValue = 0;
18124 if (IsUnaligned)
18125 ValidMaxValue = (Is32bit) ? 12 : 8;
18126 else
18127 ValidMaxValue = (Is32bit) ? 3 : 1;
18128
18129 // Get value of third argument.
18130 int64_t ConstArg = ArgCI->getSExtValue();
18131
18132 // Compose range checking error message.
18133 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
18134 RangeErrMsg += " number " + llvm::to_string(ConstArg);
18135 RangeErrMsg += " is outside of the valid range [0, ";
18136 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
18137
18138 // Issue error if third argument is not within the valid range.
18139 if (ConstArg < 0 || ConstArg > ValidMaxValue)
18140 CGM.Error(E->getExprLoc(), RangeErrMsg);
18141
18142 // Input to vec_replace_elt is an element index, convert to byte index.
18143 if (!IsUnaligned) {
18144 ConstArg *= Is32bit ? 4 : 8;
18145 // Fix the constant according to endianess.
18146 if (getTarget().isLittleEndian())
18147 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
18148 }
18149
18150 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
18151 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
18152 // Casting input to vector int as per intrinsic definition.
18153 Op0 =
18154 Is32bit
18155 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
18156 : Builder.CreateBitCast(Op0,
18157 llvm::FixedVectorType::get(Int64Ty, 2));
18158 return Builder.CreateBitCast(
18159 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
18160 }
18161 case PPC::BI__builtin_altivec_vadduqm:
18162 case PPC::BI__builtin_altivec_vsubuqm: {
18163 Value *Op0 = EmitScalarExpr(E->getArg(0));
18164 Value *Op1 = EmitScalarExpr(E->getArg(1));
18165 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
18166 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
18167 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
18168 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
18169 return Builder.CreateAdd(Op0, Op1, "vadduqm");
18170 else
18171 return Builder.CreateSub(Op0, Op1, "vsubuqm");
18172 }
18173 case PPC::BI__builtin_altivec_vaddcuq_c:
18174 case PPC::BI__builtin_altivec_vsubcuq_c: {
18176 Value *Op0 = EmitScalarExpr(E->getArg(0));
18177 Value *Op1 = EmitScalarExpr(E->getArg(1));
18178 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18179 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18180 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18181 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18182 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
18183 ? Intrinsic::ppc_altivec_vaddcuq
18184 : Intrinsic::ppc_altivec_vsubcuq;
18185 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18186 }
18187 case PPC::BI__builtin_altivec_vaddeuqm_c:
18188 case PPC::BI__builtin_altivec_vaddecuq_c:
18189 case PPC::BI__builtin_altivec_vsubeuqm_c:
18190 case PPC::BI__builtin_altivec_vsubecuq_c: {
18192 Value *Op0 = EmitScalarExpr(E->getArg(0));
18193 Value *Op1 = EmitScalarExpr(E->getArg(1));
18194 Value *Op2 = EmitScalarExpr(E->getArg(2));
18195 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
18196 llvm::IntegerType::get(getLLVMContext(), 128), 1);
18197 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
18198 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
18199 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
18200 switch (BuiltinID) {
18201 default:
18202 llvm_unreachable("Unsupported intrinsic!");
18203 case PPC::BI__builtin_altivec_vaddeuqm_c:
18204 ID = Intrinsic::ppc_altivec_vaddeuqm;
18205 break;
18206 case PPC::BI__builtin_altivec_vaddecuq_c:
18207 ID = Intrinsic::ppc_altivec_vaddecuq;
18208 break;
18209 case PPC::BI__builtin_altivec_vsubeuqm_c:
18210 ID = Intrinsic::ppc_altivec_vsubeuqm;
18211 break;
18212 case PPC::BI__builtin_altivec_vsubecuq_c:
18213 ID = Intrinsic::ppc_altivec_vsubecuq;
18214 break;
18215 }
18216 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
18217 }
18218 case PPC::BI__builtin_ppc_rldimi:
18219 case PPC::BI__builtin_ppc_rlwimi: {
18220 Value *Op0 = EmitScalarExpr(E->getArg(0));
18221 Value *Op1 = EmitScalarExpr(E->getArg(1));
18222 Value *Op2 = EmitScalarExpr(E->getArg(2));
18223 Value *Op3 = EmitScalarExpr(E->getArg(3));
18224 // rldimi is 64-bit instruction, expand the intrinsic before isel to
18225 // leverage peephole and avoid legalization efforts.
18226 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
18227 !getTarget().getTriple().isPPC64()) {
18228 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
18229 Op2 = Builder.CreateZExt(Op2, Int64Ty);
18230 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
18231 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
18232 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
18233 }
18234 return Builder.CreateCall(
18235 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
18236 ? Intrinsic::ppc_rldimi
18237 : Intrinsic::ppc_rlwimi),
18238 {Op0, Op1, Op2, Op3});
18239 }
18240 case PPC::BI__builtin_ppc_rlwnm: {
18241 Value *Op0 = EmitScalarExpr(E->getArg(0));
18242 Value *Op1 = EmitScalarExpr(E->getArg(1));
18243 Value *Op2 = EmitScalarExpr(E->getArg(2));
18244 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
18245 {Op0, Op1, Op2});
18246 }
18247 case PPC::BI__builtin_ppc_poppar4:
18248 case PPC::BI__builtin_ppc_poppar8: {
18249 Value *Op0 = EmitScalarExpr(E->getArg(0));
18250 llvm::Type *ArgType = Op0->getType();
18251 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
18252 Value *Tmp = Builder.CreateCall(F, Op0);
18253
18254 llvm::Type *ResultType = ConvertType(E->getType());
18255 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
18256 if (Result->getType() != ResultType)
18257 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
18258 "cast");
18259 return Result;
18260 }
18261 case PPC::BI__builtin_ppc_cmpb: {
18262 Value *Op0 = EmitScalarExpr(E->getArg(0));
18263 Value *Op1 = EmitScalarExpr(E->getArg(1));
18264 if (getTarget().getTriple().isPPC64()) {
18265 Function *F =
18266 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
18267 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
18268 }
18269 // For 32 bit, emit the code as below:
18270 // %conv = trunc i64 %a to i32
18271 // %conv1 = trunc i64 %b to i32
18272 // %shr = lshr i64 %a, 32
18273 // %conv2 = trunc i64 %shr to i32
18274 // %shr3 = lshr i64 %b, 32
18275 // %conv4 = trunc i64 %shr3 to i32
18276 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
18277 // %conv5 = zext i32 %0 to i64
18278 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
18279 // %conv614 = zext i32 %1 to i64
18280 // %shl = shl nuw i64 %conv614, 32
18281 // %or = or i64 %shl, %conv5
18282 // ret i64 %or
18283 Function *F =
18284 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
18285 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
18286 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
18287 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
18288 Value *ArgOneHi =
18289 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
18290 Value *ArgTwoHi =
18291 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
18292 Value *ResLo = Builder.CreateZExt(
18293 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
18294 Value *ResHiShift = Builder.CreateZExt(
18295 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
18296 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
18297 return Builder.CreateOr(ResLo, ResHi);
18298 }
18299 // Copy sign
18300 case PPC::BI__builtin_vsx_xvcpsgnsp:
18301 case PPC::BI__builtin_vsx_xvcpsgndp: {
18302 llvm::Type *ResultType = ConvertType(E->getType());
18303 Value *X = EmitScalarExpr(E->getArg(0));
18304 Value *Y = EmitScalarExpr(E->getArg(1));
18305 ID = Intrinsic::copysign;
18306 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18307 return Builder.CreateCall(F, {X, Y});
18308 }
18309 // Rounding/truncation
18310 case PPC::BI__builtin_vsx_xvrspip:
18311 case PPC::BI__builtin_vsx_xvrdpip:
18312 case PPC::BI__builtin_vsx_xvrdpim:
18313 case PPC::BI__builtin_vsx_xvrspim:
18314 case PPC::BI__builtin_vsx_xvrdpi:
18315 case PPC::BI__builtin_vsx_xvrspi:
18316 case PPC::BI__builtin_vsx_xvrdpic:
18317 case PPC::BI__builtin_vsx_xvrspic:
18318 case PPC::BI__builtin_vsx_xvrdpiz:
18319 case PPC::BI__builtin_vsx_xvrspiz: {
18320 llvm::Type *ResultType = ConvertType(E->getType());
18321 Value *X = EmitScalarExpr(E->getArg(0));
18322 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
18323 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
18324 ID = Builder.getIsFPConstrained()
18325 ? Intrinsic::experimental_constrained_floor
18326 : Intrinsic::floor;
18327 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
18328 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
18329 ID = Builder.getIsFPConstrained()
18330 ? Intrinsic::experimental_constrained_round
18331 : Intrinsic::round;
18332 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
18333 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
18334 ID = Builder.getIsFPConstrained()
18335 ? Intrinsic::experimental_constrained_rint
18336 : Intrinsic::rint;
18337 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
18338 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
18339 ID = Builder.getIsFPConstrained()
18340 ? Intrinsic::experimental_constrained_ceil
18341 : Intrinsic::ceil;
18342 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
18343 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
18344 ID = Builder.getIsFPConstrained()
18345 ? Intrinsic::experimental_constrained_trunc
18346 : Intrinsic::trunc;
18347 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
18348 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
18349 : Builder.CreateCall(F, X);
18350 }
18351
18352 // Absolute value
18353 case PPC::BI__builtin_vsx_xvabsdp:
18354 case PPC::BI__builtin_vsx_xvabssp: {
18355 llvm::Type *ResultType = ConvertType(E->getType());
18356 Value *X = EmitScalarExpr(E->getArg(0));
18357 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
18358 return Builder.CreateCall(F, X);
18359 }
18360
18361 // Fastmath by default
18362 case PPC::BI__builtin_ppc_recipdivf:
18363 case PPC::BI__builtin_ppc_recipdivd:
18364 case PPC::BI__builtin_ppc_rsqrtf:
18365 case PPC::BI__builtin_ppc_rsqrtd: {
18366 FastMathFlags FMF = Builder.getFastMathFlags();
18367 Builder.getFastMathFlags().setFast();
18368 llvm::Type *ResultType = ConvertType(E->getType());
18369 Value *X = EmitScalarExpr(E->getArg(0));
18370
18371 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
18372 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
18373 Value *Y = EmitScalarExpr(E->getArg(1));
18374 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
18375 Builder.getFastMathFlags() &= (FMF);
18376 return FDiv;
18377 }
18378 auto *One = ConstantFP::get(ResultType, 1.0);
18379 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
18380 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
18381 Builder.getFastMathFlags() &= (FMF);
18382 return FDiv;
18383 }
18384 case PPC::BI__builtin_ppc_alignx: {
18385 Value *Op0 = EmitScalarExpr(E->getArg(0));
18386 Value *Op1 = EmitScalarExpr(E->getArg(1));
18387 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
18388 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
18389 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
18390 llvm::Value::MaximumAlignment);
18391
18392 emitAlignmentAssumption(Op1, E->getArg(1),
18393 /*The expr loc is sufficient.*/ SourceLocation(),
18394 AlignmentCI, nullptr);
18395 return Op1;
18396 }
18397 case PPC::BI__builtin_ppc_rdlam: {
18398 Value *Op0 = EmitScalarExpr(E->getArg(0));
18399 Value *Op1 = EmitScalarExpr(E->getArg(1));
18400 Value *Op2 = EmitScalarExpr(E->getArg(2));
18401 llvm::Type *Ty = Op0->getType();
18402 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
18403 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
18404 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
18405 return Builder.CreateAnd(Rotate, Op2);
18406 }
18407 case PPC::BI__builtin_ppc_load2r: {
18408 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
18409 Value *Op0 = EmitScalarExpr(E->getArg(0));
18410 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
18411 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
18412 }
18413 // FMA variations
18414 case PPC::BI__builtin_ppc_fnmsub:
18415 case PPC::BI__builtin_ppc_fnmsubs:
18416 case PPC::BI__builtin_vsx_xvmaddadp:
18417 case PPC::BI__builtin_vsx_xvmaddasp:
18418 case PPC::BI__builtin_vsx_xvnmaddadp:
18419 case PPC::BI__builtin_vsx_xvnmaddasp:
18420 case PPC::BI__builtin_vsx_xvmsubadp:
18421 case PPC::BI__builtin_vsx_xvmsubasp:
18422 case PPC::BI__builtin_vsx_xvnmsubadp:
18423 case PPC::BI__builtin_vsx_xvnmsubasp: {
18424 llvm::Type *ResultType = ConvertType(E->getType());
18425 Value *X = EmitScalarExpr(E->getArg(0));
18426 Value *Y = EmitScalarExpr(E->getArg(1));
18427 Value *Z = EmitScalarExpr(E->getArg(2));
18428 llvm::Function *F;
18429 if (Builder.getIsFPConstrained())
18430 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
18431 else
18432 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
18433 switch (BuiltinID) {
18434 case PPC::BI__builtin_vsx_xvmaddadp:
18435 case PPC::BI__builtin_vsx_xvmaddasp:
18436 if (Builder.getIsFPConstrained())
18437 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
18438 else
18439 return Builder.CreateCall(F, {X, Y, Z});
18440 case PPC::BI__builtin_vsx_xvnmaddadp:
18441 case PPC::BI__builtin_vsx_xvnmaddasp:
18442 if (Builder.getIsFPConstrained())
18443 return Builder.CreateFNeg(
18444 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
18445 else
18446 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
18447 case PPC::BI__builtin_vsx_xvmsubadp:
18448 case PPC::BI__builtin_vsx_xvmsubasp:
18449 if (Builder.getIsFPConstrained())
18450 return Builder.CreateConstrainedFPCall(
18451 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18452 else
18453 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
18454 case PPC::BI__builtin_ppc_fnmsub:
18455 case PPC::BI__builtin_ppc_fnmsubs:
18456 case PPC::BI__builtin_vsx_xvnmsubadp:
18457 case PPC::BI__builtin_vsx_xvnmsubasp:
18458 if (Builder.getIsFPConstrained())
18459 return Builder.CreateFNeg(
18460 Builder.CreateConstrainedFPCall(
18461 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
18462 "neg");
18463 else
18464 return Builder.CreateCall(
18465 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
18466 }
18467 llvm_unreachable("Unknown FMA operation");
18468 return nullptr; // Suppress no-return warning
18469 }
18470
18471 case PPC::BI__builtin_vsx_insertword: {
18472 Value *Op0 = EmitScalarExpr(E->getArg(0));
18473 Value *Op1 = EmitScalarExpr(E->getArg(1));
18474 Value *Op2 = EmitScalarExpr(E->getArg(2));
18475 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
18476
18477 // Third argument is a compile time constant int. It must be clamped to
18478 // to the range [0, 12].
18479 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18480 assert(ArgCI &&
18481 "Third arg to xxinsertw intrinsic must be constant integer");
18482 const int64_t MaxIndex = 12;
18483 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18484
18485 // The builtin semantics don't exactly match the xxinsertw instructions
18486 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
18487 // word from the first argument, and inserts it in the second argument. The
18488 // instruction extracts the word from its second input register and inserts
18489 // it into its first input register, so swap the first and second arguments.
18490 std::swap(Op0, Op1);
18491
18492 // Need to cast the second argument from a vector of unsigned int to a
18493 // vector of long long.
18494 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18495
18496 if (getTarget().isLittleEndian()) {
18497 // Reverse the double words in the vector we will extract from.
18498 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18499 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
18500
18501 // Reverse the index.
18502 Index = MaxIndex - Index;
18503 }
18504
18505 // Intrinsic expects the first arg to be a vector of int.
18506 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18507 Op2 = ConstantInt::getSigned(Int32Ty, Index);
18508 return Builder.CreateCall(F, {Op0, Op1, Op2});
18509 }
18510
18511 case PPC::BI__builtin_vsx_extractuword: {
18512 Value *Op0 = EmitScalarExpr(E->getArg(0));
18513 Value *Op1 = EmitScalarExpr(E->getArg(1));
18514 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
18515
18516 // Intrinsic expects the first argument to be a vector of doublewords.
18517 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18518
18519 // The second argument is a compile time constant int that needs to
18520 // be clamped to the range [0, 12].
18521 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
18522 assert(ArgCI &&
18523 "Second Arg to xxextractuw intrinsic must be a constant integer!");
18524 const int64_t MaxIndex = 12;
18525 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
18526
18527 if (getTarget().isLittleEndian()) {
18528 // Reverse the index.
18529 Index = MaxIndex - Index;
18530 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18531
18532 // Emit the call, then reverse the double words of the results vector.
18533 Value *Call = Builder.CreateCall(F, {Op0, Op1});
18534
18535 Value *ShuffleCall =
18536 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
18537 return ShuffleCall;
18538 } else {
18539 Op1 = ConstantInt::getSigned(Int32Ty, Index);
18540 return Builder.CreateCall(F, {Op0, Op1});
18541 }
18542 }
18543
18544 case PPC::BI__builtin_vsx_xxpermdi: {
18545 Value *Op0 = EmitScalarExpr(E->getArg(0));
18546 Value *Op1 = EmitScalarExpr(E->getArg(1));
18547 Value *Op2 = EmitScalarExpr(E->getArg(2));
18548 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18549 assert(ArgCI && "Third arg must be constant integer!");
18550
18551 unsigned Index = ArgCI->getZExtValue();
18552 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
18553 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
18554
18555 // Account for endianness by treating this as just a shuffle. So we use the
18556 // same indices for both LE and BE in order to produce expected results in
18557 // both cases.
18558 int ElemIdx0 = (Index & 2) >> 1;
18559 int ElemIdx1 = 2 + (Index & 1);
18560
18561 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
18562 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18563 QualType BIRetType = E->getType();
18564 auto RetTy = ConvertType(BIRetType);
18565 return Builder.CreateBitCast(ShuffleCall, RetTy);
18566 }
18567
18568 case PPC::BI__builtin_vsx_xxsldwi: {
18569 Value *Op0 = EmitScalarExpr(E->getArg(0));
18570 Value *Op1 = EmitScalarExpr(E->getArg(1));
18571 Value *Op2 = EmitScalarExpr(E->getArg(2));
18572 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
18573 assert(ArgCI && "Third argument must be a compile time constant");
18574 unsigned Index = ArgCI->getZExtValue() & 0x3;
18575 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
18576 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
18577
18578 // Create a shuffle mask
18579 int ElemIdx0;
18580 int ElemIdx1;
18581 int ElemIdx2;
18582 int ElemIdx3;
18583 if (getTarget().isLittleEndian()) {
18584 // Little endian element N comes from element 8+N-Index of the
18585 // concatenated wide vector (of course, using modulo arithmetic on
18586 // the total number of elements).
18587 ElemIdx0 = (8 - Index) % 8;
18588 ElemIdx1 = (9 - Index) % 8;
18589 ElemIdx2 = (10 - Index) % 8;
18590 ElemIdx3 = (11 - Index) % 8;
18591 } else {
18592 // Big endian ElemIdx<N> = Index + N
18593 ElemIdx0 = Index;
18594 ElemIdx1 = Index + 1;
18595 ElemIdx2 = Index + 2;
18596 ElemIdx3 = Index + 3;
18597 }
18598
18599 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
18600 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
18601 QualType BIRetType = E->getType();
18602 auto RetTy = ConvertType(BIRetType);
18603 return Builder.CreateBitCast(ShuffleCall, RetTy);
18604 }
18605
18606 case PPC::BI__builtin_pack_vector_int128: {
18607 Value *Op0 = EmitScalarExpr(E->getArg(0));
18608 Value *Op1 = EmitScalarExpr(E->getArg(1));
18609 bool isLittleEndian = getTarget().isLittleEndian();
18610 Value *PoisonValue =
18611 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
18612 Value *Res = Builder.CreateInsertElement(
18613 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
18614 Res = Builder.CreateInsertElement(Res, Op1,
18615 (uint64_t)(isLittleEndian ? 0 : 1));
18616 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
18617 }
18618
18619 case PPC::BI__builtin_unpack_vector_int128: {
18620 Value *Op0 = EmitScalarExpr(E->getArg(0));
18621 Value *Op1 = EmitScalarExpr(E->getArg(1));
18622 ConstantInt *Index = cast<ConstantInt>(Op1);
18623 Value *Unpacked = Builder.CreateBitCast(
18624 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18625
18626 if (getTarget().isLittleEndian())
18627 Index =
18628 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18629
18630 return Builder.CreateExtractElement(Unpacked, Index);
18631 }
18632
18633 case PPC::BI__builtin_ppc_sthcx: {
18634 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18635 Value *Op0 = EmitScalarExpr(E->getArg(0));
18636 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18637 return Builder.CreateCall(F, {Op0, Op1});
18638 }
18639
18640 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18641 // Some of the MMA instructions accumulate their result into an existing
18642 // accumulator whereas the others generate a new accumulator. So we need to
18643 // use custom code generation to expand a builtin call with a pointer to a
18644 // load (if the corresponding instruction accumulates its result) followed by
18645 // the call to the intrinsic and a store of the result.
18646#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18647 case PPC::BI__builtin_##Name:
18648#include "clang/Basic/BuiltinsPPC.def"
18649 {
18651 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18652 if (E->getArg(i)->getType()->isArrayType())
18653 Ops.push_back(
18654 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18655 else
18656 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18657 // The first argument of these two builtins is a pointer used to store their
18658 // result. However, the llvm intrinsics return their result in multiple
18659 // return values. So, here we emit code extracting these values from the
18660 // intrinsic results and storing them using that pointer.
18661 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18662 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18663 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18664 unsigned NumVecs = 2;
18665 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18666 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18667 NumVecs = 4;
18668 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18669 }
18670 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18671 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18672 Value *Vec = Builder.CreateLoad(Addr);
18673 Value *Call = Builder.CreateCall(F, {Vec});
18674 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18675 Value *Ptr = Ops[0];
18676 for (unsigned i=0; i<NumVecs; i++) {
18677 Value *Vec = Builder.CreateExtractValue(Call, i);
18678 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18679 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18680 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18681 }
18682 return Call;
18683 }
18684 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18685 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18686 // Reverse the order of the operands for LE, so the
18687 // same builtin call can be used on both LE and BE
18688 // without the need for the programmer to swap operands.
18689 // The operands are reversed starting from the second argument,
18690 // the first operand is the pointer to the pair/accumulator
18691 // that is being built.
18692 if (getTarget().isLittleEndian())
18693 std::reverse(Ops.begin() + 1, Ops.end());
18694 }
18695 bool Accumulate;
18696 switch (BuiltinID) {
18697 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18698 case PPC::BI__builtin_##Name: \
18699 ID = Intrinsic::ppc_##Intr; \
18700 Accumulate = Acc; \
18701 break;
18702 #include "clang/Basic/BuiltinsPPC.def"
18703 }
18704 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18705 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18706 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18707 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18708 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18709 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18710 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18711 } else {
18712 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18713 }
18714 Ops.pop_back();
18715 llvm::Function *F = CGM.getIntrinsic(ID);
18716 return Builder.CreateCall(F, Ops, "");
18717 }
18718 SmallVector<Value*, 4> CallOps;
18719 if (Accumulate) {
18720 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18721 Value *Acc = Builder.CreateLoad(Addr);
18722 CallOps.push_back(Acc);
18723 }
18724 for (unsigned i=1; i<Ops.size(); i++)
18725 CallOps.push_back(Ops[i]);
18726 llvm::Function *F = CGM.getIntrinsic(ID);
18727 Value *Call = Builder.CreateCall(F, CallOps);
18728 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign());
18729 }
18730
18731 case PPC::BI__builtin_ppc_compare_and_swap:
18732 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18733 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18734 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18735 Value *OldVal = Builder.CreateLoad(OldValAddr);
18736 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18737 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18738 Value *Op2 = EmitScalarExpr(E->getArg(2));
18739 auto Pair = EmitAtomicCompareExchange(
18740 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18741 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18742 // Unlike c11's atomic_compare_exchange, according to
18743 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18744 // > In either case, the contents of the memory location specified by addr
18745 // > are copied into the memory location specified by old_val_addr.
18746 // But it hasn't specified storing to OldValAddr is atomic or not and
18747 // which order to use. Now following XL's codegen, treat it as a normal
18748 // store.
18749 Value *LoadedVal = Pair.first.getScalarVal();
18750 Builder.CreateStore(LoadedVal, OldValAddr);
18751 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18752 }
18753 case PPC::BI__builtin_ppc_fetch_and_add:
18754 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18755 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18756 llvm::AtomicOrdering::Monotonic);
18757 }
18758 case PPC::BI__builtin_ppc_fetch_and_and:
18759 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18760 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18761 llvm::AtomicOrdering::Monotonic);
18762 }
18763
18764 case PPC::BI__builtin_ppc_fetch_and_or:
18765 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18766 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18767 llvm::AtomicOrdering::Monotonic);
18768 }
18769 case PPC::BI__builtin_ppc_fetch_and_swap:
18770 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18771 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18772 llvm::AtomicOrdering::Monotonic);
18773 }
18774 case PPC::BI__builtin_ppc_ldarx:
18775 case PPC::BI__builtin_ppc_lwarx:
18776 case PPC::BI__builtin_ppc_lharx:
18777 case PPC::BI__builtin_ppc_lbarx:
18778 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18779 case PPC::BI__builtin_ppc_mfspr: {
18780 Value *Op0 = EmitScalarExpr(E->getArg(0));
18781 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18782 ? Int32Ty
18783 : Int64Ty;
18784 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18785 return Builder.CreateCall(F, {Op0});
18786 }
18787 case PPC::BI__builtin_ppc_mtspr: {
18788 Value *Op0 = EmitScalarExpr(E->getArg(0));
18789 Value *Op1 = EmitScalarExpr(E->getArg(1));
18790 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18791 ? Int32Ty
18792 : Int64Ty;
18793 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18794 return Builder.CreateCall(F, {Op0, Op1});
18795 }
18796 case PPC::BI__builtin_ppc_popcntb: {
18797 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18798 llvm::Type *ArgType = ArgValue->getType();
18799 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18800 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18801 }
18802 case PPC::BI__builtin_ppc_mtfsf: {
18803 // The builtin takes a uint32 that needs to be cast to an
18804 // f64 to be passed to the intrinsic.
18805 Value *Op0 = EmitScalarExpr(E->getArg(0));
18806 Value *Op1 = EmitScalarExpr(E->getArg(1));
18807 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18808 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18809 return Builder.CreateCall(F, {Op0, Cast}, "");
18810 }
18811
18812 case PPC::BI__builtin_ppc_swdiv_nochk:
18813 case PPC::BI__builtin_ppc_swdivs_nochk: {
18814 Value *Op0 = EmitScalarExpr(E->getArg(0));
18815 Value *Op1 = EmitScalarExpr(E->getArg(1));
18816 FastMathFlags FMF = Builder.getFastMathFlags();
18817 Builder.getFastMathFlags().setFast();
18818 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18819 Builder.getFastMathFlags() &= (FMF);
18820 return FDiv;
18821 }
18822 case PPC::BI__builtin_ppc_fric:
18824 *this, E, Intrinsic::rint,
18825 Intrinsic::experimental_constrained_rint))
18826 .getScalarVal();
18827 case PPC::BI__builtin_ppc_frim:
18828 case PPC::BI__builtin_ppc_frims:
18830 *this, E, Intrinsic::floor,
18831 Intrinsic::experimental_constrained_floor))
18832 .getScalarVal();
18833 case PPC::BI__builtin_ppc_frin:
18834 case PPC::BI__builtin_ppc_frins:
18836 *this, E, Intrinsic::round,
18837 Intrinsic::experimental_constrained_round))
18838 .getScalarVal();
18839 case PPC::BI__builtin_ppc_frip:
18840 case PPC::BI__builtin_ppc_frips:
18842 *this, E, Intrinsic::ceil,
18843 Intrinsic::experimental_constrained_ceil))
18844 .getScalarVal();
18845 case PPC::BI__builtin_ppc_friz:
18846 case PPC::BI__builtin_ppc_frizs:
18848 *this, E, Intrinsic::trunc,
18849 Intrinsic::experimental_constrained_trunc))
18850 .getScalarVal();
18851 case PPC::BI__builtin_ppc_fsqrt:
18852 case PPC::BI__builtin_ppc_fsqrts:
18854 *this, E, Intrinsic::sqrt,
18855 Intrinsic::experimental_constrained_sqrt))
18856 .getScalarVal();
18857 case PPC::BI__builtin_ppc_test_data_class: {
18858 Value *Op0 = EmitScalarExpr(E->getArg(0));
18859 Value *Op1 = EmitScalarExpr(E->getArg(1));
18860 return Builder.CreateCall(
18861 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18862 {Op0, Op1}, "test_data_class");
18863 }
18864 case PPC::BI__builtin_ppc_maxfe: {
18865 Value *Op0 = EmitScalarExpr(E->getArg(0));
18866 Value *Op1 = EmitScalarExpr(E->getArg(1));
18867 Value *Op2 = EmitScalarExpr(E->getArg(2));
18868 Value *Op3 = EmitScalarExpr(E->getArg(3));
18869 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18870 {Op0, Op1, Op2, Op3});
18871 }
18872 case PPC::BI__builtin_ppc_maxfl: {
18873 Value *Op0 = EmitScalarExpr(E->getArg(0));
18874 Value *Op1 = EmitScalarExpr(E->getArg(1));
18875 Value *Op2 = EmitScalarExpr(E->getArg(2));
18876 Value *Op3 = EmitScalarExpr(E->getArg(3));
18877 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18878 {Op0, Op1, Op2, Op3});
18879 }
18880 case PPC::BI__builtin_ppc_maxfs: {
18881 Value *Op0 = EmitScalarExpr(E->getArg(0));
18882 Value *Op1 = EmitScalarExpr(E->getArg(1));
18883 Value *Op2 = EmitScalarExpr(E->getArg(2));
18884 Value *Op3 = EmitScalarExpr(E->getArg(3));
18885 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18886 {Op0, Op1, Op2, Op3});
18887 }
18888 case PPC::BI__builtin_ppc_minfe: {
18889 Value *Op0 = EmitScalarExpr(E->getArg(0));
18890 Value *Op1 = EmitScalarExpr(E->getArg(1));
18891 Value *Op2 = EmitScalarExpr(E->getArg(2));
18892 Value *Op3 = EmitScalarExpr(E->getArg(3));
18893 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18894 {Op0, Op1, Op2, Op3});
18895 }
18896 case PPC::BI__builtin_ppc_minfl: {
18897 Value *Op0 = EmitScalarExpr(E->getArg(0));
18898 Value *Op1 = EmitScalarExpr(E->getArg(1));
18899 Value *Op2 = EmitScalarExpr(E->getArg(2));
18900 Value *Op3 = EmitScalarExpr(E->getArg(3));
18901 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18902 {Op0, Op1, Op2, Op3});
18903 }
18904 case PPC::BI__builtin_ppc_minfs: {
18905 Value *Op0 = EmitScalarExpr(E->getArg(0));
18906 Value *Op1 = EmitScalarExpr(E->getArg(1));
18907 Value *Op2 = EmitScalarExpr(E->getArg(2));
18908 Value *Op3 = EmitScalarExpr(E->getArg(3));
18909 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18910 {Op0, Op1, Op2, Op3});
18911 }
18912 case PPC::BI__builtin_ppc_swdiv:
18913 case PPC::BI__builtin_ppc_swdivs: {
18914 Value *Op0 = EmitScalarExpr(E->getArg(0));
18915 Value *Op1 = EmitScalarExpr(E->getArg(1));
18916 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18917 }
18918 case PPC::BI__builtin_ppc_set_fpscr_rn:
18919 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18920 {EmitScalarExpr(E->getArg(0))});
18921 case PPC::BI__builtin_ppc_mffs:
18922 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18923 }
18924}
18925
18926namespace {
18927// If \p E is not null pointer, insert address space cast to match return
18928// type of \p E if necessary.
18929Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18930 const CallExpr *E = nullptr) {
18931 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18932 auto *Call = CGF.Builder.CreateCall(F);
18933 Call->addRetAttr(
18934 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18935 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18936 if (!E)
18937 return Call;
18938 QualType BuiltinRetType = E->getType();
18939 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18940 if (RetTy == Call->getType())
18941 return Call;
18942 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18943}
18944
18945Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18946 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18947 auto *Call = CGF.Builder.CreateCall(F);
18948 Call->addRetAttr(
18949 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18950 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18951 return Call;
18952}
18953
18954// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18955/// Emit code based on Code Object ABI version.
18956/// COV_4 : Emit code to use dispatch ptr
18957/// COV_5+ : Emit code to use implicitarg ptr
18958/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18959/// and use its value for COV_4 or COV_5+ approach. It is used for
18960/// compiling device libraries in an ABI-agnostic way.
18961///
18962/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18963/// clang during compilation of user code.
18964Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18965 llvm::LoadInst *LD;
18966
18967 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18968
18969 if (Cov == CodeObjectVersionKind::COV_None) {
18970 StringRef Name = "__oclc_ABI_version";
18971 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18972 if (!ABIVersionC)
18973 ABIVersionC = new llvm::GlobalVariable(
18974 CGF.CGM.getModule(), CGF.Int32Ty, false,
18975 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18976 llvm::GlobalVariable::NotThreadLocal,
18978
18979 // This load will be eliminated by the IPSCCP because it is constant
18980 // weak_odr without externally_initialized. Either changing it to weak or
18981 // adding externally_initialized will keep the load.
18982 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18983 CGF.CGM.getIntAlign());
18984
18985 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18986 ABIVersion,
18987 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18988
18989 // Indexing the implicit kernarg segment.
18990 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18991 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18992
18993 // Indexing the HSA kernel_dispatch_packet struct.
18994 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18995 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18996
18997 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18998 LD = CGF.Builder.CreateLoad(
19000 } else {
19001 Value *GEP = nullptr;
19002 if (Cov >= CodeObjectVersionKind::COV_5) {
19003 // Indexing the implicit kernarg segment.
19004 GEP = CGF.Builder.CreateConstGEP1_32(
19005 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
19006 } else {
19007 // Indexing the HSA kernel_dispatch_packet struct.
19008 GEP = CGF.Builder.CreateConstGEP1_32(
19009 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
19010 }
19011 LD = CGF.Builder.CreateLoad(
19013 }
19014
19015 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
19016 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
19017 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
19018 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
19019 LD->setMetadata(llvm::LLVMContext::MD_noundef,
19020 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19021 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19022 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19023 return LD;
19024}
19025
19026// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
19027Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
19028 const unsigned XOffset = 12;
19029 auto *DP = EmitAMDGPUDispatchPtr(CGF);
19030 // Indexing the HSA kernel_dispatch_packet struct.
19031 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
19032 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
19033 auto *LD = CGF.Builder.CreateLoad(
19035
19036 llvm::MDBuilder MDB(CGF.getLLVMContext());
19037
19038 // Known non-zero.
19039 LD->setMetadata(llvm::LLVMContext::MD_range,
19040 MDB.createRange(APInt(32, 1), APInt::getZero(32)));
19041 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
19042 llvm::MDNode::get(CGF.getLLVMContext(), {}));
19043 return LD;
19044}
19045} // namespace
19046
19047// For processing memory ordering and memory scope arguments of various
19048// amdgcn builtins.
19049// \p Order takes a C++11 comptabile memory-ordering specifier and converts
19050// it into LLVM's memory ordering specifier using atomic C ABI, and writes
19051// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
19052// specific SyncScopeID and writes it to \p SSID.
19054 llvm::AtomicOrdering &AO,
19055 llvm::SyncScope::ID &SSID) {
19056 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
19057
19058 // Map C11/C++11 memory ordering to LLVM memory ordering
19059 assert(llvm::isValidAtomicOrderingCABI(ord));
19060 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
19061 case llvm::AtomicOrderingCABI::acquire:
19062 case llvm::AtomicOrderingCABI::consume:
19063 AO = llvm::AtomicOrdering::Acquire;
19064 break;
19065 case llvm::AtomicOrderingCABI::release:
19066 AO = llvm::AtomicOrdering::Release;
19067 break;
19068 case llvm::AtomicOrderingCABI::acq_rel:
19069 AO = llvm::AtomicOrdering::AcquireRelease;
19070 break;
19071 case llvm::AtomicOrderingCABI::seq_cst:
19072 AO = llvm::AtomicOrdering::SequentiallyConsistent;
19073 break;
19074 case llvm::AtomicOrderingCABI::relaxed:
19075 AO = llvm::AtomicOrdering::Monotonic;
19076 break;
19077 }
19078
19079 // Some of the atomic builtins take the scope as a string name.
19080 StringRef scp;
19081 if (llvm::getConstantStringInfo(Scope, scp)) {
19082 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
19083 return;
19084 }
19085
19086 // Older builtins had an enum argument for the memory scope.
19087 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
19088 switch (scope) {
19089 case 0: // __MEMORY_SCOPE_SYSTEM
19090 SSID = llvm::SyncScope::System;
19091 break;
19092 case 1: // __MEMORY_SCOPE_DEVICE
19093 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19094 break;
19095 case 2: // __MEMORY_SCOPE_WRKGRP
19096 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
19097 break;
19098 case 3: // __MEMORY_SCOPE_WVFRNT
19099 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
19100 break;
19101 case 4: // __MEMORY_SCOPE_SINGLE
19102 SSID = llvm::SyncScope::SingleThread;
19103 break;
19104 default:
19105 SSID = llvm::SyncScope::System;
19106 break;
19107 }
19108}
19109
19110llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
19111 unsigned Idx,
19112 const CallExpr *E) {
19113 llvm::Value *Arg = nullptr;
19114 if ((ICEArguments & (1 << Idx)) == 0) {
19115 Arg = EmitScalarExpr(E->getArg(Idx));
19116 } else {
19117 // If this is required to be a constant, constant fold it so that we
19118 // know that the generated intrinsic gets a ConstantInt.
19119 std::optional<llvm::APSInt> Result =
19120 E->getArg(Idx)->getIntegerConstantExpr(getContext());
19121 assert(Result && "Expected argument to be a constant");
19122 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
19123 }
19124 return Arg;
19125}
19126
19127// Return dot product intrinsic that corresponds to the QT scalar type
19128static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19129 if (QT->isFloatingType())
19130 return RT.getFDotIntrinsic();
19131 if (QT->isSignedIntegerType())
19132 return RT.getSDotIntrinsic();
19133 assert(QT->isUnsignedIntegerType());
19134 return RT.getUDotIntrinsic();
19135}
19136
19137static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
19139 return RT.getFirstBitSHighIntrinsic();
19140 }
19141
19143 return RT.getFirstBitUHighIntrinsic();
19144}
19145
19147 const CallExpr *E,
19148 ReturnValueSlot ReturnValue) {
19149 if (!getLangOpts().HLSL)
19150 return nullptr;
19151
19152 switch (BuiltinID) {
19153 case Builtin::BI__builtin_hlsl_resource_getpointer: {
19154 Value *HandleOp = EmitScalarExpr(E->getArg(0));
19155 Value *IndexOp = EmitScalarExpr(E->getArg(1));
19156
19157 // TODO: Map to an hlsl_device address space.
19158 llvm::Type *RetTy = llvm::PointerType::getUnqual(getLLVMContext());
19159
19160 return Builder.CreateIntrinsic(RetTy, Intrinsic::dx_resource_getpointer,
19161 ArrayRef<Value *>{HandleOp, IndexOp});
19162 }
19163 case Builtin::BI__builtin_hlsl_all: {
19164 Value *Op0 = EmitScalarExpr(E->getArg(0));
19165 return Builder.CreateIntrinsic(
19166 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19167 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19168 "hlsl.all");
19169 }
19170 case Builtin::BI__builtin_hlsl_any: {
19171 Value *Op0 = EmitScalarExpr(E->getArg(0));
19172 return Builder.CreateIntrinsic(
19173 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
19174 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
19175 "hlsl.any");
19176 }
19177 case Builtin::BI__builtin_hlsl_asdouble:
19178 return handleAsDoubleBuiltin(*this, E);
19179 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
19180 Value *OpX = EmitScalarExpr(E->getArg(0));
19181 Value *OpMin = EmitScalarExpr(E->getArg(1));
19182 Value *OpMax = EmitScalarExpr(E->getArg(2));
19183
19184 QualType Ty = E->getArg(0)->getType();
19185 if (auto *VecTy = Ty->getAs<VectorType>())
19186 Ty = VecTy->getElementType();
19187
19188 Intrinsic::ID Intr;
19189 if (Ty->isFloatingType()) {
19190 Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
19191 } else if (Ty->isUnsignedIntegerType()) {
19192 Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
19193 } else {
19194 assert(Ty->isSignedIntegerType());
19195 Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
19196 }
19197 return Builder.CreateIntrinsic(
19198 /*ReturnType=*/OpX->getType(), Intr,
19199 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
19200 }
19201 case Builtin::BI__builtin_hlsl_cross: {
19202 Value *Op0 = EmitScalarExpr(E->getArg(0));
19203 Value *Op1 = EmitScalarExpr(E->getArg(1));
19204 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19205 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19206 "cross operands must have a float representation");
19207 // make sure each vector has exactly 3 elements
19208 assert(
19209 E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19210 E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
19211 "input vectors must have 3 elements each");
19212 return Builder.CreateIntrinsic(
19213 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
19214 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
19215 }
19216 case Builtin::BI__builtin_hlsl_dot: {
19217 Value *Op0 = EmitScalarExpr(E->getArg(0));
19218 Value *Op1 = EmitScalarExpr(E->getArg(1));
19219 llvm::Type *T0 = Op0->getType();
19220 llvm::Type *T1 = Op1->getType();
19221
19222 // If the arguments are scalars, just emit a multiply
19223 if (!T0->isVectorTy() && !T1->isVectorTy()) {
19224 if (T0->isFloatingPointTy())
19225 return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
19226
19227 if (T0->isIntegerTy())
19228 return Builder.CreateMul(Op0, Op1, "hlsl.dot");
19229
19230 llvm_unreachable(
19231 "Scalar dot product is only supported on ints and floats.");
19232 }
19233 // For vectors, validate types and emit the appropriate intrinsic
19234
19235 // A VectorSplat should have happened
19236 assert(T0->isVectorTy() && T1->isVectorTy() &&
19237 "Dot product of vector and scalar is not supported.");
19238
19239 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
19240 [[maybe_unused]] auto *VecTy1 =
19241 E->getArg(1)->getType()->getAs<VectorType>();
19242
19243 assert(VecTy0->getElementType() == VecTy1->getElementType() &&
19244 "Dot product of vectors need the same element types.");
19245
19246 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
19247 "Dot product requires vectors to be of the same size.");
19248
19249 return Builder.CreateIntrinsic(
19250 /*ReturnType=*/T0->getScalarType(),
19251 getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
19252 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
19253 }
19254 case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
19255 Value *A = EmitScalarExpr(E->getArg(0));
19256 Value *B = EmitScalarExpr(E->getArg(1));
19257 Value *C = EmitScalarExpr(E->getArg(2));
19258
19259 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
19260 return Builder.CreateIntrinsic(
19261 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19262 "hlsl.dot4add.i8packed");
19263 }
19264 case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
19265 Value *A = EmitScalarExpr(E->getArg(0));
19266 Value *B = EmitScalarExpr(E->getArg(1));
19267 Value *C = EmitScalarExpr(E->getArg(2));
19268
19269 Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
19270 return Builder.CreateIntrinsic(
19271 /*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
19272 "hlsl.dot4add.u8packed");
19273 }
19274 case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19275
19276 Value *X = EmitScalarExpr(E->getArg(0));
19277
19278 return Builder.CreateIntrinsic(
19279 /*ReturnType=*/ConvertType(E->getType()),
19281 ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
19282 }
19283 case Builtin::BI__builtin_hlsl_lerp: {
19284 Value *X = EmitScalarExpr(E->getArg(0));
19285 Value *Y = EmitScalarExpr(E->getArg(1));
19286 Value *S = EmitScalarExpr(E->getArg(2));
19287 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19288 llvm_unreachable("lerp operand must have a float representation");
19289 return Builder.CreateIntrinsic(
19290 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
19291 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
19292 }
19293 case Builtin::BI__builtin_hlsl_length: {
19294 Value *X = EmitScalarExpr(E->getArg(0));
19295
19296 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19297 "length operand must have a float representation");
19298 // if the operand is a scalar, we can use the fabs llvm intrinsic directly
19299 if (!E->getArg(0)->getType()->isVectorType())
19300 return EmitFAbs(*this, X);
19301
19302 return Builder.CreateIntrinsic(
19303 /*ReturnType=*/X->getType()->getScalarType(),
19304 CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
19305 nullptr, "hlsl.length");
19306 }
19307 case Builtin::BI__builtin_hlsl_normalize: {
19308 Value *X = EmitScalarExpr(E->getArg(0));
19309
19310 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19311 "normalize operand must have a float representation");
19312
19313 return Builder.CreateIntrinsic(
19314 /*ReturnType=*/X->getType(),
19315 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
19316 nullptr, "hlsl.normalize");
19317 }
19318 case Builtin::BI__builtin_hlsl_elementwise_degrees: {
19319 Value *X = EmitScalarExpr(E->getArg(0));
19320
19321 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19322 "degree operand must have a float representation");
19323
19324 return Builder.CreateIntrinsic(
19325 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
19326 ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
19327 }
19328 case Builtin::BI__builtin_hlsl_elementwise_frac: {
19329 Value *Op0 = EmitScalarExpr(E->getArg(0));
19330 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19331 llvm_unreachable("frac operand must have a float representation");
19332 return Builder.CreateIntrinsic(
19333 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
19334 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
19335}
19336case Builtin::BI__builtin_hlsl_elementwise_isinf: {
19337 Value *Op0 = EmitScalarExpr(E->getArg(0));
19338 llvm::Type *Xty = Op0->getType();
19339 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
19340 if (Xty->isVectorTy()) {
19341 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
19342 retType = llvm::VectorType::get(
19343 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19344 }
19345 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19346 llvm_unreachable("isinf operand must have a float representation");
19347 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
19348 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
19349 }
19350 case Builtin::BI__builtin_hlsl_mad: {
19351 Value *M = EmitScalarExpr(E->getArg(0));
19352 Value *A = EmitScalarExpr(E->getArg(1));
19353 Value *B = EmitScalarExpr(E->getArg(2));
19354 if (E->getArg(0)->getType()->hasFloatingRepresentation())
19355 return Builder.CreateIntrinsic(
19356 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
19357 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
19358
19359 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
19360 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19361 return Builder.CreateIntrinsic(
19362 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
19363 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
19364
19365 Value *Mul = Builder.CreateNSWMul(M, A);
19366 return Builder.CreateNSWAdd(Mul, B);
19367 }
19368 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
19369 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
19370 return Builder.CreateIntrinsic(
19371 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
19372 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
19373
19374 Value *Mul = Builder.CreateNUWMul(M, A);
19375 return Builder.CreateNUWAdd(Mul, B);
19376 }
19377 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
19378 Value *Op0 = EmitScalarExpr(E->getArg(0));
19379 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19380 llvm_unreachable("rcp operand must have a float representation");
19381 llvm::Type *Ty = Op0->getType();
19382 llvm::Type *EltTy = Ty->getScalarType();
19383 Constant *One = Ty->isVectorTy()
19384 ? ConstantVector::getSplat(
19385 ElementCount::getFixed(
19386 cast<FixedVectorType>(Ty)->getNumElements()),
19387 ConstantFP::get(EltTy, 1.0))
19388 : ConstantFP::get(EltTy, 1.0);
19389 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
19390 }
19391 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
19392 Value *Op0 = EmitScalarExpr(E->getArg(0));
19393 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
19394 llvm_unreachable("rsqrt operand must have a float representation");
19395 return Builder.CreateIntrinsic(
19396 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
19397 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
19398 }
19399 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
19400 Value *Op0 = EmitScalarExpr(E->getArg(0));
19401 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19402 "saturate operand must have a float representation");
19403 return Builder.CreateIntrinsic(
19404 /*ReturnType=*/Op0->getType(),
19405 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
19406 nullptr, "hlsl.saturate");
19407 }
19408 case Builtin::BI__builtin_hlsl_select: {
19409 Value *OpCond = EmitScalarExpr(E->getArg(0));
19410 RValue RValTrue = EmitAnyExpr(E->getArg(1));
19411 Value *OpTrue =
19412 RValTrue.isScalar()
19413 ? RValTrue.getScalarVal()
19414 : RValTrue.getAggregatePointer(E->getArg(1)->getType(), *this);
19415 RValue RValFalse = EmitAnyExpr(E->getArg(2));
19416 Value *OpFalse =
19417 RValFalse.isScalar()
19418 ? RValFalse.getScalarVal()
19419 : RValFalse.getAggregatePointer(E->getArg(2)->getType(), *this);
19420
19421 Value *SelectVal =
19422 Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
19423 if (!RValTrue.isScalar())
19424 Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
19425 ReturnValue.isVolatile());
19426
19427 return SelectVal;
19428 }
19429 case Builtin::BI__builtin_hlsl_step: {
19430 Value *Op0 = EmitScalarExpr(E->getArg(0));
19431 Value *Op1 = EmitScalarExpr(E->getArg(1));
19432 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19433 E->getArg(1)->getType()->hasFloatingRepresentation() &&
19434 "step operands must have a float representation");
19435 return Builder.CreateIntrinsic(
19436 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
19437 ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
19438 }
19439 case Builtin::BI__builtin_hlsl_wave_active_all_true: {
19440 Value *Op = EmitScalarExpr(E->getArg(0));
19441 assert(Op->getType()->isIntegerTy(1) &&
19442 "Intrinsic WaveActiveAllTrue operand must be a bool");
19443
19444 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
19445 return EmitRuntimeCall(
19446 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19447 }
19448 case Builtin::BI__builtin_hlsl_wave_active_any_true: {
19449 Value *Op = EmitScalarExpr(E->getArg(0));
19450 assert(Op->getType()->isIntegerTy(1) &&
19451 "Intrinsic WaveActiveAnyTrue operand must be a bool");
19452
19453 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
19454 return EmitRuntimeCall(
19455 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
19456 }
19457 case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
19458 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19459 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
19460 return EmitRuntimeCall(
19461 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
19462 ArrayRef{OpExpr});
19463 }
19464 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
19465 // We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
19466 // defined in SPIRVBuiltins.td. So instead we manually get the matching name
19467 // for the DirectX intrinsic and the demangled builtin name
19468 switch (CGM.getTarget().getTriple().getArch()) {
19469 case llvm::Triple::dxil:
19470 return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
19471 &CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
19472 case llvm::Triple::spirv:
19474 llvm::FunctionType::get(IntTy, {}, false),
19475 "__hlsl_wave_get_lane_index", {}, false, true));
19476 default:
19477 llvm_unreachable(
19478 "Intrinsic WaveGetLaneIndex not supported by target architecture");
19479 }
19480 }
19481 case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
19482 Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
19483 return EmitRuntimeCall(
19484 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19485 }
19486 case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
19487 // Due to the use of variadic arguments we must explicitly retreive them and
19488 // create our function type.
19489 Value *OpExpr = EmitScalarExpr(E->getArg(0));
19490 Value *OpIndex = EmitScalarExpr(E->getArg(1));
19491 llvm::FunctionType *FT = llvm::FunctionType::get(
19492 OpExpr->getType(), ArrayRef{OpExpr->getType(), OpIndex->getType()},
19493 false);
19494
19495 // Get overloaded name
19496 std::string Name =
19497 Intrinsic::getName(CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
19498 ArrayRef{OpExpr->getType()}, &CGM.getModule());
19499 return EmitRuntimeCall(CGM.CreateRuntimeFunction(FT, Name, {},
19500 /*Local=*/false,
19501 /*AssumeConvergent=*/true),
19502 ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
19503 }
19504 case Builtin::BI__builtin_hlsl_elementwise_sign: {
19505 auto *Arg0 = E->getArg(0);
19506 Value *Op0 = EmitScalarExpr(Arg0);
19507 llvm::Type *Xty = Op0->getType();
19508 llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
19509 if (Xty->isVectorTy()) {
19510 auto *XVecTy = Arg0->getType()->getAs<VectorType>();
19511 retType = llvm::VectorType::get(
19512 retType, ElementCount::getFixed(XVecTy->getNumElements()));
19513 }
19514 assert((Arg0->getType()->hasFloatingRepresentation() ||
19515 Arg0->getType()->hasIntegerRepresentation()) &&
19516 "sign operand must have a float or int representation");
19517
19519 Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
19520 return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
19521 ConstantInt::get(retType, 1), "hlsl.sign");
19522 }
19523
19524 return Builder.CreateIntrinsic(
19525 retType, CGM.getHLSLRuntime().getSignIntrinsic(),
19526 ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
19527 }
19528 case Builtin::BI__builtin_hlsl_elementwise_radians: {
19529 Value *Op0 = EmitScalarExpr(E->getArg(0));
19530 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19531 "radians operand must have a float representation");
19532 return Builder.CreateIntrinsic(
19533 /*ReturnType=*/Op0->getType(),
19534 CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
19535 nullptr, "hlsl.radians");
19536 }
19537 case Builtin::BI__builtin_hlsl_buffer_update_counter: {
19538 Value *ResHandle = EmitScalarExpr(E->getArg(0));
19539 Value *Offset = EmitScalarExpr(E->getArg(1));
19540 Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
19541 return Builder.CreateIntrinsic(
19542 /*ReturnType=*/Offset->getType(),
19543 CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
19544 ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
19545 }
19546 case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
19547
19548 assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
19549 E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
19550 E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
19551 "asuint operands types mismatch");
19552 return handleHlslSplitdouble(E, this);
19553 }
19554 case Builtin::BI__builtin_hlsl_elementwise_clip:
19555 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
19556 "clip operands types mismatch");
19557 return handleHlslClip(E, this);
19558 case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
19559 Intrinsic::ID ID =
19560 CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
19561 return EmitRuntimeCall(
19562 Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
19563 }
19564 }
19565 return nullptr;
19566}
19567
19568void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
19569 const CallExpr *E) {
19570 constexpr const char *Tag = "amdgpu-as";
19571
19572 LLVMContext &Ctx = Inst->getContext();
19574 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
19575 llvm::Value *V = EmitScalarExpr(E->getArg(K));
19576 StringRef AS;
19577 if (llvm::getConstantStringInfo(V, AS)) {
19578 MMRAs.push_back({Tag, AS});
19579 // TODO: Delete the resulting unused constant?
19580 continue;
19581 }
19582 CGM.Error(E->getExprLoc(),
19583 "expected an address space name as a string literal");
19584 }
19585
19586 llvm::sort(MMRAs);
19587 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
19588 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
19589}
19590
19592 const CallExpr *E) {
19593 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
19594 llvm::SyncScope::ID SSID;
19595 switch (BuiltinID) {
19596 case AMDGPU::BI__builtin_amdgcn_div_scale:
19597 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
19598 // Translate from the intrinsics's struct return to the builtin's out
19599 // argument.
19600
19601 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
19602
19603 llvm::Value *X = EmitScalarExpr(E->getArg(0));
19604 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
19605 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
19606
19607 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
19608 X->getType());
19609
19610 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
19611
19612 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
19613 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
19614
19615 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
19616
19617 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
19618 Builder.CreateStore(FlagExt, FlagOutPtr);
19619 return Result;
19620 }
19621 case AMDGPU::BI__builtin_amdgcn_div_fmas:
19622 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
19623 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19624 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19625 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19626 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
19627
19628 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
19629 Src0->getType());
19630 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
19631 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
19632 }
19633
19634 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
19635 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19636 Intrinsic::amdgcn_ds_swizzle);
19637 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
19638 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
19639 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
19641 // Find out if any arguments are required to be integer constant
19642 // expressions.
19643 unsigned ICEArguments = 0;
19645 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
19646 assert(Error == ASTContext::GE_None && "Should not codegen an error");
19647 llvm::Type *DataTy = ConvertType(E->getArg(0)->getType());
19648 unsigned Size = DataTy->getPrimitiveSizeInBits();
19649 llvm::Type *IntTy =
19650 llvm::IntegerType::get(Builder.getContext(), std::max(Size, 32u));
19651 Function *F =
19652 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp8
19653 ? Intrinsic::amdgcn_mov_dpp8
19654 : Intrinsic::amdgcn_update_dpp,
19655 IntTy);
19656 assert(E->getNumArgs() == 5 || E->getNumArgs() == 6 ||
19657 E->getNumArgs() == 2);
19658 bool InsertOld = BuiltinID == AMDGPU::BI__builtin_amdgcn_mov_dpp;
19659 if (InsertOld)
19660 Args.push_back(llvm::PoisonValue::get(IntTy));
19661 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
19662 llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
19663 if (I < (BuiltinID == AMDGPU::BI__builtin_amdgcn_update_dpp ? 2u : 1u) &&
19664 Size < 32) {
19665 if (!DataTy->isIntegerTy())
19666 V = Builder.CreateBitCast(
19667 V, llvm::IntegerType::get(Builder.getContext(), Size));
19668 V = Builder.CreateZExtOrBitCast(V, IntTy);
19669 }
19670 llvm::Type *ExpTy =
19671 F->getFunctionType()->getFunctionParamType(I + InsertOld);
19672 Args.push_back(Builder.CreateTruncOrBitCast(V, ExpTy));
19673 }
19674 Value *V = Builder.CreateCall(F, Args);
19675 if (Size < 32 && !DataTy->isIntegerTy())
19676 V = Builder.CreateTrunc(
19677 V, llvm::IntegerType::get(Builder.getContext(), Size));
19678 return Builder.CreateTruncOrBitCast(V, DataTy);
19679 }
19680 case AMDGPU::BI__builtin_amdgcn_permlane16:
19681 case AMDGPU::BI__builtin_amdgcn_permlanex16:
19682 return emitBuiltinWithOneOverloadedType<6>(
19683 *this, E,
19684 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
19685 ? Intrinsic::amdgcn_permlane16
19686 : Intrinsic::amdgcn_permlanex16);
19687 case AMDGPU::BI__builtin_amdgcn_permlane64:
19688 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19689 Intrinsic::amdgcn_permlane64);
19690 case AMDGPU::BI__builtin_amdgcn_readlane:
19691 return emitBuiltinWithOneOverloadedType<2>(*this, E,
19692 Intrinsic::amdgcn_readlane);
19693 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
19694 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19695 Intrinsic::amdgcn_readfirstlane);
19696 case AMDGPU::BI__builtin_amdgcn_div_fixup:
19697 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
19698 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
19699 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19700 Intrinsic::amdgcn_div_fixup);
19701 case AMDGPU::BI__builtin_amdgcn_trig_preop:
19702 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
19703 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
19704 case AMDGPU::BI__builtin_amdgcn_rcp:
19705 case AMDGPU::BI__builtin_amdgcn_rcpf:
19706 case AMDGPU::BI__builtin_amdgcn_rcph:
19707 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
19708 case AMDGPU::BI__builtin_amdgcn_sqrt:
19709 case AMDGPU::BI__builtin_amdgcn_sqrtf:
19710 case AMDGPU::BI__builtin_amdgcn_sqrth:
19711 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19712 Intrinsic::amdgcn_sqrt);
19713 case AMDGPU::BI__builtin_amdgcn_rsq:
19714 case AMDGPU::BI__builtin_amdgcn_rsqf:
19715 case AMDGPU::BI__builtin_amdgcn_rsqh:
19716 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
19717 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
19718 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
19719 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19720 Intrinsic::amdgcn_rsq_clamp);
19721 case AMDGPU::BI__builtin_amdgcn_sinf:
19722 case AMDGPU::BI__builtin_amdgcn_sinh:
19723 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
19724 case AMDGPU::BI__builtin_amdgcn_cosf:
19725 case AMDGPU::BI__builtin_amdgcn_cosh:
19726 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
19727 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
19728 return EmitAMDGPUDispatchPtr(*this, E);
19729 case AMDGPU::BI__builtin_amdgcn_logf:
19730 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
19731 case AMDGPU::BI__builtin_amdgcn_exp2f:
19732 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19733 Intrinsic::amdgcn_exp2);
19734 case AMDGPU::BI__builtin_amdgcn_log_clampf:
19735 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19736 Intrinsic::amdgcn_log_clamp);
19737 case AMDGPU::BI__builtin_amdgcn_ldexp:
19738 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
19739 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19740 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19741 llvm::Function *F =
19742 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
19743 return Builder.CreateCall(F, {Src0, Src1});
19744 }
19745 case AMDGPU::BI__builtin_amdgcn_ldexph: {
19746 // The raw instruction has a different behavior for out of bounds exponent
19747 // values (implicit truncation instead of saturate to short_min/short_max).
19748 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19749 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19750 llvm::Function *F =
19751 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
19752 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
19753 }
19754 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
19755 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
19756 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
19757 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19758 Intrinsic::amdgcn_frexp_mant);
19759 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
19760 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
19761 Value *Src0 = EmitScalarExpr(E->getArg(0));
19762 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19763 { Builder.getInt32Ty(), Src0->getType() });
19764 return Builder.CreateCall(F, Src0);
19765 }
19766 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
19767 Value *Src0 = EmitScalarExpr(E->getArg(0));
19768 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
19769 { Builder.getInt16Ty(), Src0->getType() });
19770 return Builder.CreateCall(F, Src0);
19771 }
19772 case AMDGPU::BI__builtin_amdgcn_fract:
19773 case AMDGPU::BI__builtin_amdgcn_fractf:
19774 case AMDGPU::BI__builtin_amdgcn_fracth:
19775 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19776 Intrinsic::amdgcn_fract);
19777 case AMDGPU::BI__builtin_amdgcn_lerp:
19778 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19779 Intrinsic::amdgcn_lerp);
19780 case AMDGPU::BI__builtin_amdgcn_ubfe:
19781 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19782 Intrinsic::amdgcn_ubfe);
19783 case AMDGPU::BI__builtin_amdgcn_sbfe:
19784 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19785 Intrinsic::amdgcn_sbfe);
19786 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
19787 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
19788 llvm::Type *ResultType = ConvertType(E->getType());
19789 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19790 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
19791 return Builder.CreateCall(F, { Src });
19792 }
19793 case AMDGPU::BI__builtin_amdgcn_uicmp:
19794 case AMDGPU::BI__builtin_amdgcn_uicmpl:
19795 case AMDGPU::BI__builtin_amdgcn_sicmp:
19796 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
19797 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19798 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19799 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19800
19801 // FIXME-GFX10: How should 32 bit mask be handled?
19802 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
19803 { Builder.getInt64Ty(), Src0->getType() });
19804 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19805 }
19806 case AMDGPU::BI__builtin_amdgcn_fcmp:
19807 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
19808 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19809 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19810 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19811
19812 // FIXME-GFX10: How should 32 bit mask be handled?
19813 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
19814 { Builder.getInt64Ty(), Src0->getType() });
19815 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19816 }
19817 case AMDGPU::BI__builtin_amdgcn_class:
19818 case AMDGPU::BI__builtin_amdgcn_classf:
19819 case AMDGPU::BI__builtin_amdgcn_classh:
19820 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
19821 case AMDGPU::BI__builtin_amdgcn_fmed3f:
19822 case AMDGPU::BI__builtin_amdgcn_fmed3h:
19823 return emitBuiltinWithOneOverloadedType<3>(*this, E,
19824 Intrinsic::amdgcn_fmed3);
19825 case AMDGPU::BI__builtin_amdgcn_ds_append:
19826 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
19827 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
19828 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
19829 Value *Src0 = EmitScalarExpr(E->getArg(0));
19830 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
19831 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
19832 }
19833 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19834 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19835 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19836 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19837 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19838 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19839 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19840 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19841 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19842 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19843 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19844 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19845 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19846 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16: {
19847 Intrinsic::ID IID;
19848 switch (BuiltinID) {
19849 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
19850 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
19851 IID = Intrinsic::amdgcn_global_load_tr_b64;
19852 break;
19853 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
19854 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
19855 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
19856 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
19857 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
19858 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
19859 IID = Intrinsic::amdgcn_global_load_tr_b128;
19860 break;
19861 case AMDGPU::BI__builtin_amdgcn_ds_read_tr4_b64_v2i32:
19862 IID = Intrinsic::amdgcn_ds_read_tr4_b64;
19863 break;
19864 case AMDGPU::BI__builtin_amdgcn_ds_read_tr8_b64_v2i32:
19865 IID = Intrinsic::amdgcn_ds_read_tr8_b64;
19866 break;
19867 case AMDGPU::BI__builtin_amdgcn_ds_read_tr6_b96_v3i32:
19868 IID = Intrinsic::amdgcn_ds_read_tr6_b96;
19869 break;
19870 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4i16:
19871 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4f16:
19872 case AMDGPU::BI__builtin_amdgcn_ds_read_tr16_b64_v4bf16:
19873 IID = Intrinsic::amdgcn_ds_read_tr16_b64;
19874 break;
19875 }
19876 llvm::Type *LoadTy = ConvertType(E->getType());
19877 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
19878 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
19879 return Builder.CreateCall(F, {Addr});
19880 }
19881 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
19882 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
19883 {llvm::Type::getInt64Ty(getLLVMContext())});
19884 return Builder.CreateCall(F);
19885 }
19886 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
19887 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
19888 {llvm::Type::getInt64Ty(getLLVMContext())});
19889 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
19890 return Builder.CreateCall(F, {Env});
19891 }
19892 case AMDGPU::BI__builtin_amdgcn_read_exec:
19893 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
19894 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19895 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
19896 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19897 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
19898 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19899 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19900 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19901 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19902 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
19903 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
19904 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
19905 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
19906 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
19907 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
19908
19909 // The builtins take these arguments as vec4 where the last element is
19910 // ignored. The intrinsic takes them as vec3.
19911 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19912 ArrayRef<int>{0, 1, 2});
19913 RayDir =
19914 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
19915 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19916 ArrayRef<int>{0, 1, 2});
19917
19918 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
19919 {NodePtr->getType(), RayDir->getType()});
19920 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19921 RayInverseDir, TextureDescr});
19922 }
19923
19924 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19926 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19927 Args.push_back(EmitScalarExpr(E->getArg(i)));
19928
19929 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19930 Value *Call = Builder.CreateCall(F, Args);
19931 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19932 Value *A = Builder.CreateExtractValue(Call, 1);
19933 llvm::Type *RetTy = ConvertType(E->getType());
19934 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19935 (uint64_t)0);
19936 return Builder.CreateInsertElement(I0, A, 1);
19937 }
19938 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
19939 case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
19940 llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
19942 BuiltinID == AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19943 ? Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4
19944 : Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4,
19945 {VT, VT});
19946
19948 for (unsigned I = 0, N = E->getNumArgs(); I != N; ++I)
19949 Args.push_back(EmitScalarExpr(E->getArg(I)));
19950 return Builder.CreateCall(F, Args);
19951 }
19952 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19953 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19954 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19955 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19956 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19957 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19958 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19959 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19960 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19961 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19962 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19963 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19964 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19965 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19966 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19967 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19968 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19969 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19970 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19971 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19972 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19973 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19974 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19975 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19976 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19977 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19978 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19979 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19980 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19981 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19982 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19983 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19984 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19985 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19986 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19987 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19988 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19989 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19990 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19991 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19992 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19993 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19994 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19995 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19996 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19997 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19998 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19999 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20000 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20001 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20002 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20003 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20004 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20005 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20006 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20007 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20008 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20009 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20010 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20011 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
20012
20013 // These operations perform a matrix multiplication and accumulation of
20014 // the form:
20015 // D = A * B + C
20016 // We need to specify one type for matrices AB and one for matrices CD.
20017 // Sparse matrix operations can have different types for A and B as well as
20018 // an additional type for sparsity index.
20019 // Destination type should be put before types used for source operands.
20020 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
20021 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
20022 // There is no need for the variable opsel argument, so always set it to
20023 // "false".
20024 bool AppendFalseForOpselArg = false;
20025 unsigned BuiltinWMMAOp;
20026
20027 switch (BuiltinID) {
20028 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
20029 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
20030 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
20031 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
20032 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20033 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
20034 break;
20035 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
20036 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
20037 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
20038 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
20039 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20040 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
20041 break;
20042 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
20043 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
20044 AppendFalseForOpselArg = true;
20045 [[fallthrough]];
20046 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
20047 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
20048 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20049 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
20050 break;
20051 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
20052 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
20053 AppendFalseForOpselArg = true;
20054 [[fallthrough]];
20055 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
20056 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
20057 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20058 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
20059 break;
20060 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
20061 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
20062 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20063 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
20064 break;
20065 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
20066 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
20067 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20068 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
20069 break;
20070 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
20071 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
20072 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
20073 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
20074 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20075 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
20076 break;
20077 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
20078 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
20079 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
20080 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
20081 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20082 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
20083 break;
20084 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
20085 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
20086 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20087 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
20088 break;
20089 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
20090 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
20091 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20092 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
20093 break;
20094 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
20095 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
20096 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20097 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
20098 break;
20099 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
20100 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
20101 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
20102 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
20103 break;
20104 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
20105 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
20106 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
20107 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
20108 break;
20109 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
20110 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
20111 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20112 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
20113 break;
20114 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
20115 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
20116 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20117 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
20118 break;
20119 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
20120 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
20121 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20122 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
20123 break;
20124 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
20125 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
20126 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20127 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
20128 break;
20129 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
20130 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
20131 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20132 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
20133 break;
20134 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
20135 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
20136 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20137 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
20138 break;
20139 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
20140 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
20141 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
20142 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
20143 break;
20144 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
20145 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
20146 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20147 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
20148 break;
20149 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
20150 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
20151 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20152 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
20153 break;
20154 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
20155 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
20156 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20157 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
20158 break;
20159 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
20160 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
20161 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
20162 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
20163 break;
20164 }
20165
20167 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
20168 Args.push_back(EmitScalarExpr(E->getArg(i)));
20169 if (AppendFalseForOpselArg)
20170 Args.push_back(Builder.getFalse());
20171
20173 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
20174 ArgTypes.push_back(Args[ArgIdx]->getType());
20175
20176 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
20177 return Builder.CreateCall(F, Args);
20178 }
20179
20180 // amdgcn workitem
20181 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
20182 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
20183 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
20184 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
20185 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
20186 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
20187
20188 // amdgcn workgroup size
20189 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
20190 return EmitAMDGPUWorkGroupSize(*this, 0);
20191 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
20192 return EmitAMDGPUWorkGroupSize(*this, 1);
20193 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
20194 return EmitAMDGPUWorkGroupSize(*this, 2);
20195
20196 // amdgcn grid size
20197 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
20198 return EmitAMDGPUGridSize(*this, 0);
20199 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
20200 return EmitAMDGPUGridSize(*this, 1);
20201 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
20202 return EmitAMDGPUGridSize(*this, 2);
20203
20204 // r600 intrinsics
20205 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
20206 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
20207 return emitBuiltinWithOneOverloadedType<1>(*this, E,
20208 Intrinsic::r600_recipsqrt_ieee);
20209 case AMDGPU::BI__builtin_r600_read_tidig_x:
20210 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
20211 case AMDGPU::BI__builtin_r600_read_tidig_y:
20212 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
20213 case AMDGPU::BI__builtin_r600_read_tidig_z:
20214 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
20215 case AMDGPU::BI__builtin_amdgcn_alignbit: {
20216 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
20217 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
20218 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
20219 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
20220 return Builder.CreateCall(F, { Src0, Src1, Src2 });
20221 }
20222 case AMDGPU::BI__builtin_amdgcn_fence: {
20224 EmitScalarExpr(E->getArg(1)), AO, SSID);
20225 FenceInst *Fence = Builder.CreateFence(AO, SSID);
20226 if (E->getNumArgs() > 2)
20228 return Fence;
20229 }
20230 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20231 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20232 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20233 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20234 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20235 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20236 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20237 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20238 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20239 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20240 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20241 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20242 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20243 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20244 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20245 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20246 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20247 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20248 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20249 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20250 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20251 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20252 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
20253 llvm::AtomicRMWInst::BinOp BinOp;
20254 switch (BuiltinID) {
20255 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
20256 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
20257 BinOp = llvm::AtomicRMWInst::UIncWrap;
20258 break;
20259 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
20260 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
20261 BinOp = llvm::AtomicRMWInst::UDecWrap;
20262 break;
20263 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
20264 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
20265 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
20266 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
20267 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
20268 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
20269 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
20270 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
20271 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
20272 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
20273 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
20274 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
20275 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
20276 BinOp = llvm::AtomicRMWInst::FAdd;
20277 break;
20278 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
20279 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
20280 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
20281 BinOp = llvm::AtomicRMWInst::FMin;
20282 break;
20283 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
20284 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
20285 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
20286 BinOp = llvm::AtomicRMWInst::FMax;
20287 break;
20288 }
20289
20290 Address Ptr = CheckAtomicAlignment(*this, E);
20291 Value *Val = EmitScalarExpr(E->getArg(1));
20292 llvm::Type *OrigTy = Val->getType();
20293 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
20294
20295 bool Volatile;
20296
20297 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
20298 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
20299 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
20300 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
20301 Volatile =
20302 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
20303 } else {
20304 // Infer volatile from the passed type.
20305 Volatile =
20307 }
20308
20309 if (E->getNumArgs() >= 4) {
20310 // Some of the builtins have explicit ordering and scope arguments.
20312 EmitScalarExpr(E->getArg(3)), AO, SSID);
20313 } else {
20314 // Most of the builtins do not have syncscope/order arguments. For DS
20315 // atomics the scope doesn't really matter, as they implicitly operate at
20316 // workgroup scope.
20317 //
20318 // The global/flat cases need to use agent scope to consistently produce
20319 // the native instruction instead of a cmpxchg expansion.
20320 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
20321 AO = AtomicOrdering::Monotonic;
20322
20323 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
20324 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
20325 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
20326 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
20327 llvm::Type *V2BF16Ty = FixedVectorType::get(
20328 llvm::Type::getBFloatTy(Builder.getContext()), 2);
20329 Val = Builder.CreateBitCast(Val, V2BF16Ty);
20330 }
20331 }
20332
20333 llvm::AtomicRMWInst *RMW =
20334 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
20335 if (Volatile)
20336 RMW->setVolatile(true);
20337
20338 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
20339 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
20340 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
20341 // instruction for flat and global operations.
20342 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
20343 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
20344
20345 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
20346 // instruction, but this only matters for float fadd.
20347 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
20348 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
20349 }
20350
20351 return Builder.CreateBitCast(RMW, OrigTy);
20352 }
20353 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
20354 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
20355 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
20356 llvm::Type *ResultType = ConvertType(E->getType());
20357 // s_sendmsg_rtn is mangled using return type only.
20358 Function *F =
20359 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
20360 return Builder.CreateCall(F, {Arg});
20361 }
20362 case AMDGPU::BI__builtin_amdgcn_permlane16_swap:
20363 case AMDGPU::BI__builtin_amdgcn_permlane32_swap: {
20364 // Because builtin types are limited, and the intrinsic uses a struct/pair
20365 // output, marshal the pair-of-i32 to <2 x i32>.
20366 Value *VDstOld = EmitScalarExpr(E->getArg(0));
20367 Value *VSrcOld = EmitScalarExpr(E->getArg(1));
20368 Value *FI = EmitScalarExpr(E->getArg(2));
20369 Value *BoundCtrl = EmitScalarExpr(E->getArg(3));
20370 Function *F =
20371 CGM.getIntrinsic(BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16_swap
20372 ? Intrinsic::amdgcn_permlane16_swap
20373 : Intrinsic::amdgcn_permlane32_swap);
20374 llvm::CallInst *Call =
20375 Builder.CreateCall(F, {VDstOld, VSrcOld, FI, BoundCtrl});
20376
20377 llvm::Value *Elt0 = Builder.CreateExtractValue(Call, 0);
20378 llvm::Value *Elt1 = Builder.CreateExtractValue(Call, 1);
20379
20380 llvm::Type *ResultType = ConvertType(E->getType());
20381
20382 llvm::Value *Insert0 = Builder.CreateInsertElement(
20383 llvm::PoisonValue::get(ResultType), Elt0, UINT64_C(0));
20384 llvm::Value *AsVector =
20385 Builder.CreateInsertElement(Insert0, Elt1, UINT64_C(1));
20386 return AsVector;
20387 }
20388 case AMDGPU::BI__builtin_amdgcn_bitop3_b32:
20389 case AMDGPU::BI__builtin_amdgcn_bitop3_b16:
20390 return emitQuaternaryBuiltin(*this, E, Intrinsic::amdgcn_bitop3);
20391 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
20392 return emitBuiltinWithOneOverloadedType<4>(
20393 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
20394 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
20395 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
20396 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
20397 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
20398 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
20399 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
20400 return emitBuiltinWithOneOverloadedType<5>(
20401 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
20402 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20403 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20404 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20405 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20406 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20407 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
20408 llvm::Type *RetTy = nullptr;
20409 switch (BuiltinID) {
20410 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
20411 RetTy = Int8Ty;
20412 break;
20413 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
20414 RetTy = Int16Ty;
20415 break;
20416 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
20417 RetTy = Int32Ty;
20418 break;
20419 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
20420 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
20421 break;
20422 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
20423 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
20424 break;
20425 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
20426 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
20427 break;
20428 }
20429 Function *F =
20430 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
20431 return Builder.CreateCall(
20432 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
20433 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
20434 }
20435 case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
20436 return emitBuiltinWithOneOverloadedType<2>(
20437 *this, E, Intrinsic::amdgcn_s_prefetch_data);
20438 default:
20439 return nullptr;
20440 }
20441}
20442
20443/// Handle a SystemZ function in which the final argument is a pointer
20444/// to an int that receives the post-instruction CC value. At the LLVM level
20445/// this is represented as a function that returns a {result, cc} pair.
20447 unsigned IntrinsicID,
20448 const CallExpr *E) {
20449 unsigned NumArgs = E->getNumArgs() - 1;
20450 SmallVector<Value *, 8> Args(NumArgs);
20451 for (unsigned I = 0; I < NumArgs; ++I)
20452 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
20453 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
20454 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
20455 Value *Call = CGF.Builder.CreateCall(F, Args);
20456 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
20457 CGF.Builder.CreateStore(CC, CCPtr);
20458 return CGF.Builder.CreateExtractValue(Call, 0);
20459}
20460
20462 const CallExpr *E) {
20463 switch (BuiltinID) {
20464 case SystemZ::BI__builtin_tbegin: {
20465 Value *TDB = EmitScalarExpr(E->getArg(0));
20466 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20467 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
20468 return Builder.CreateCall(F, {TDB, Control});
20469 }
20470 case SystemZ::BI__builtin_tbegin_nofloat: {
20471 Value *TDB = EmitScalarExpr(E->getArg(0));
20472 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
20473 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
20474 return Builder.CreateCall(F, {TDB, Control});
20475 }
20476 case SystemZ::BI__builtin_tbeginc: {
20477 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
20478 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
20479 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
20480 return Builder.CreateCall(F, {TDB, Control});
20481 }
20482 case SystemZ::BI__builtin_tabort: {
20483 Value *Data = EmitScalarExpr(E->getArg(0));
20484 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
20485 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
20486 }
20487 case SystemZ::BI__builtin_non_tx_store: {
20488 Value *Address = EmitScalarExpr(E->getArg(0));
20489 Value *Data = EmitScalarExpr(E->getArg(1));
20490 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
20491 return Builder.CreateCall(F, {Data, Address});
20492 }
20493
20494 // Vector builtins. Note that most vector builtins are mapped automatically
20495 // to target-specific LLVM intrinsics. The ones handled specially here can
20496 // be represented via standard LLVM IR, which is preferable to enable common
20497 // LLVM optimizations.
20498
20499 case SystemZ::BI__builtin_s390_vclzb:
20500 case SystemZ::BI__builtin_s390_vclzh:
20501 case SystemZ::BI__builtin_s390_vclzf:
20502 case SystemZ::BI__builtin_s390_vclzg: {
20503 llvm::Type *ResultType = ConvertType(E->getType());
20504 Value *X = EmitScalarExpr(E->getArg(0));
20505 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20506 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
20507 return Builder.CreateCall(F, {X, Undef});
20508 }
20509
20510 case SystemZ::BI__builtin_s390_vctzb:
20511 case SystemZ::BI__builtin_s390_vctzh:
20512 case SystemZ::BI__builtin_s390_vctzf:
20513 case SystemZ::BI__builtin_s390_vctzg: {
20514 llvm::Type *ResultType = ConvertType(E->getType());
20515 Value *X = EmitScalarExpr(E->getArg(0));
20516 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
20517 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
20518 return Builder.CreateCall(F, {X, Undef});
20519 }
20520
20521 case SystemZ::BI__builtin_s390_verllb:
20522 case SystemZ::BI__builtin_s390_verllh:
20523 case SystemZ::BI__builtin_s390_verllf:
20524 case SystemZ::BI__builtin_s390_verllg: {
20525 llvm::Type *ResultType = ConvertType(E->getType());
20526 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20527 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20528 // Splat scalar rotate amount to vector type.
20529 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
20530 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
20531 Amt = Builder.CreateVectorSplat(NumElts, Amt);
20532 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20533 return Builder.CreateCall(F, { Src, Src, Amt });
20534 }
20535
20536 case SystemZ::BI__builtin_s390_verllvb:
20537 case SystemZ::BI__builtin_s390_verllvh:
20538 case SystemZ::BI__builtin_s390_verllvf:
20539 case SystemZ::BI__builtin_s390_verllvg: {
20540 llvm::Type *ResultType = ConvertType(E->getType());
20541 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
20542 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
20543 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
20544 return Builder.CreateCall(F, { Src, Src, Amt });
20545 }
20546
20547 case SystemZ::BI__builtin_s390_vfsqsb:
20548 case SystemZ::BI__builtin_s390_vfsqdb: {
20549 llvm::Type *ResultType = ConvertType(E->getType());
20550 Value *X = EmitScalarExpr(E->getArg(0));
20551 if (Builder.getIsFPConstrained()) {
20552 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
20553 return Builder.CreateConstrainedFPCall(F, { X });
20554 } else {
20555 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
20556 return Builder.CreateCall(F, X);
20557 }
20558 }
20559 case SystemZ::BI__builtin_s390_vfmasb:
20560 case SystemZ::BI__builtin_s390_vfmadb: {
20561 llvm::Type *ResultType = ConvertType(E->getType());
20562 Value *X = EmitScalarExpr(E->getArg(0));
20563 Value *Y = EmitScalarExpr(E->getArg(1));
20564 Value *Z = EmitScalarExpr(E->getArg(2));
20565 if (Builder.getIsFPConstrained()) {
20566 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20567 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
20568 } else {
20569 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20570 return Builder.CreateCall(F, {X, Y, Z});
20571 }
20572 }
20573 case SystemZ::BI__builtin_s390_vfmssb:
20574 case SystemZ::BI__builtin_s390_vfmsdb: {
20575 llvm::Type *ResultType = ConvertType(E->getType());
20576 Value *X = EmitScalarExpr(E->getArg(0));
20577 Value *Y = EmitScalarExpr(E->getArg(1));
20578 Value *Z = EmitScalarExpr(E->getArg(2));
20579 if (Builder.getIsFPConstrained()) {
20580 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20581 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20582 } else {
20583 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20584 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
20585 }
20586 }
20587 case SystemZ::BI__builtin_s390_vfnmasb:
20588 case SystemZ::BI__builtin_s390_vfnmadb: {
20589 llvm::Type *ResultType = ConvertType(E->getType());
20590 Value *X = EmitScalarExpr(E->getArg(0));
20591 Value *Y = EmitScalarExpr(E->getArg(1));
20592 Value *Z = EmitScalarExpr(E->getArg(2));
20593 if (Builder.getIsFPConstrained()) {
20594 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20595 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
20596 } else {
20597 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20598 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
20599 }
20600 }
20601 case SystemZ::BI__builtin_s390_vfnmssb:
20602 case SystemZ::BI__builtin_s390_vfnmsdb: {
20603 llvm::Type *ResultType = ConvertType(E->getType());
20604 Value *X = EmitScalarExpr(E->getArg(0));
20605 Value *Y = EmitScalarExpr(E->getArg(1));
20606 Value *Z = EmitScalarExpr(E->getArg(2));
20607 if (Builder.getIsFPConstrained()) {
20608 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
20609 Value *NegZ = Builder.CreateFNeg(Z, "sub");
20610 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
20611 } else {
20612 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
20613 Value *NegZ = Builder.CreateFNeg(Z, "neg");
20614 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
20615 }
20616 }
20617 case SystemZ::BI__builtin_s390_vflpsb:
20618 case SystemZ::BI__builtin_s390_vflpdb: {
20619 llvm::Type *ResultType = ConvertType(E->getType());
20620 Value *X = EmitScalarExpr(E->getArg(0));
20621 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20622 return Builder.CreateCall(F, X);
20623 }
20624 case SystemZ::BI__builtin_s390_vflnsb:
20625 case SystemZ::BI__builtin_s390_vflndb: {
20626 llvm::Type *ResultType = ConvertType(E->getType());
20627 Value *X = EmitScalarExpr(E->getArg(0));
20628 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
20629 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
20630 }
20631 case SystemZ::BI__builtin_s390_vfisb:
20632 case SystemZ::BI__builtin_s390_vfidb: {
20633 llvm::Type *ResultType = ConvertType(E->getType());
20634 Value *X = EmitScalarExpr(E->getArg(0));
20635 // Constant-fold the M4 and M5 mask arguments.
20636 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
20637 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20638 // Check whether this instance can be represented via a LLVM standard
20639 // intrinsic. We only support some combinations of M4 and M5.
20640 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20641 Intrinsic::ID CI;
20642 switch (M4.getZExtValue()) {
20643 default: break;
20644 case 0: // IEEE-inexact exception allowed
20645 switch (M5.getZExtValue()) {
20646 default: break;
20647 case 0: ID = Intrinsic::rint;
20648 CI = Intrinsic::experimental_constrained_rint; break;
20649 }
20650 break;
20651 case 4: // IEEE-inexact exception suppressed
20652 switch (M5.getZExtValue()) {
20653 default: break;
20654 case 0: ID = Intrinsic::nearbyint;
20655 CI = Intrinsic::experimental_constrained_nearbyint; break;
20656 case 1: ID = Intrinsic::round;
20657 CI = Intrinsic::experimental_constrained_round; break;
20658 case 5: ID = Intrinsic::trunc;
20659 CI = Intrinsic::experimental_constrained_trunc; break;
20660 case 6: ID = Intrinsic::ceil;
20661 CI = Intrinsic::experimental_constrained_ceil; break;
20662 case 7: ID = Intrinsic::floor;
20663 CI = Intrinsic::experimental_constrained_floor; break;
20664 }
20665 break;
20666 }
20667 if (ID != Intrinsic::not_intrinsic) {
20668 if (Builder.getIsFPConstrained()) {
20669 Function *F = CGM.getIntrinsic(CI, ResultType);
20670 return Builder.CreateConstrainedFPCall(F, X);
20671 } else {
20672 Function *F = CGM.getIntrinsic(ID, ResultType);
20673 return Builder.CreateCall(F, X);
20674 }
20675 }
20676 switch (BuiltinID) { // FIXME: constrained version?
20677 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
20678 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
20679 default: llvm_unreachable("Unknown BuiltinID");
20680 }
20681 Function *F = CGM.getIntrinsic(ID);
20682 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20683 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
20684 return Builder.CreateCall(F, {X, M4Value, M5Value});
20685 }
20686 case SystemZ::BI__builtin_s390_vfmaxsb:
20687 case SystemZ::BI__builtin_s390_vfmaxdb: {
20688 llvm::Type *ResultType = ConvertType(E->getType());
20689 Value *X = EmitScalarExpr(E->getArg(0));
20690 Value *Y = EmitScalarExpr(E->getArg(1));
20691 // Constant-fold the M4 mask argument.
20692 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20693 // Check whether this instance can be represented via a LLVM standard
20694 // intrinsic. We only support some values of M4.
20695 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20696 Intrinsic::ID CI;
20697 switch (M4.getZExtValue()) {
20698 default: break;
20699 case 4: ID = Intrinsic::maxnum;
20700 CI = Intrinsic::experimental_constrained_maxnum; break;
20701 }
20702 if (ID != Intrinsic::not_intrinsic) {
20703 if (Builder.getIsFPConstrained()) {
20704 Function *F = CGM.getIntrinsic(CI, ResultType);
20705 return Builder.CreateConstrainedFPCall(F, {X, Y});
20706 } else {
20707 Function *F = CGM.getIntrinsic(ID, ResultType);
20708 return Builder.CreateCall(F, {X, Y});
20709 }
20710 }
20711 switch (BuiltinID) {
20712 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
20713 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
20714 default: llvm_unreachable("Unknown BuiltinID");
20715 }
20716 Function *F = CGM.getIntrinsic(ID);
20717 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20718 return Builder.CreateCall(F, {X, Y, M4Value});
20719 }
20720 case SystemZ::BI__builtin_s390_vfminsb:
20721 case SystemZ::BI__builtin_s390_vfmindb: {
20722 llvm::Type *ResultType = ConvertType(E->getType());
20723 Value *X = EmitScalarExpr(E->getArg(0));
20724 Value *Y = EmitScalarExpr(E->getArg(1));
20725 // Constant-fold the M4 mask argument.
20726 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
20727 // Check whether this instance can be represented via a LLVM standard
20728 // intrinsic. We only support some values of M4.
20729 Intrinsic::ID ID = Intrinsic::not_intrinsic;
20730 Intrinsic::ID CI;
20731 switch (M4.getZExtValue()) {
20732 default: break;
20733 case 4: ID = Intrinsic::minnum;
20734 CI = Intrinsic::experimental_constrained_minnum; break;
20735 }
20736 if (ID != Intrinsic::not_intrinsic) {
20737 if (Builder.getIsFPConstrained()) {
20738 Function *F = CGM.getIntrinsic(CI, ResultType);
20739 return Builder.CreateConstrainedFPCall(F, {X, Y});
20740 } else {
20741 Function *F = CGM.getIntrinsic(ID, ResultType);
20742 return Builder.CreateCall(F, {X, Y});
20743 }
20744 }
20745 switch (BuiltinID) {
20746 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
20747 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
20748 default: llvm_unreachable("Unknown BuiltinID");
20749 }
20750 Function *F = CGM.getIntrinsic(ID);
20751 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
20752 return Builder.CreateCall(F, {X, Y, M4Value});
20753 }
20754
20755 case SystemZ::BI__builtin_s390_vlbrh:
20756 case SystemZ::BI__builtin_s390_vlbrf:
20757 case SystemZ::BI__builtin_s390_vlbrg: {
20758 llvm::Type *ResultType = ConvertType(E->getType());
20759 Value *X = EmitScalarExpr(E->getArg(0));
20760 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
20761 return Builder.CreateCall(F, X);
20762 }
20763
20764 // Vector intrinsics that output the post-instruction CC value.
20765
20766#define INTRINSIC_WITH_CC(NAME) \
20767 case SystemZ::BI__builtin_##NAME: \
20768 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
20769
20770 INTRINSIC_WITH_CC(s390_vpkshs);
20771 INTRINSIC_WITH_CC(s390_vpksfs);
20772 INTRINSIC_WITH_CC(s390_vpksgs);
20773
20774 INTRINSIC_WITH_CC(s390_vpklshs);
20775 INTRINSIC_WITH_CC(s390_vpklsfs);
20776 INTRINSIC_WITH_CC(s390_vpklsgs);
20777
20778 INTRINSIC_WITH_CC(s390_vceqbs);
20779 INTRINSIC_WITH_CC(s390_vceqhs);
20780 INTRINSIC_WITH_CC(s390_vceqfs);
20781 INTRINSIC_WITH_CC(s390_vceqgs);
20782
20783 INTRINSIC_WITH_CC(s390_vchbs);
20784 INTRINSIC_WITH_CC(s390_vchhs);
20785 INTRINSIC_WITH_CC(s390_vchfs);
20786 INTRINSIC_WITH_CC(s390_vchgs);
20787
20788 INTRINSIC_WITH_CC(s390_vchlbs);
20789 INTRINSIC_WITH_CC(s390_vchlhs);
20790 INTRINSIC_WITH_CC(s390_vchlfs);
20791 INTRINSIC_WITH_CC(s390_vchlgs);
20792
20793 INTRINSIC_WITH_CC(s390_vfaebs);
20794 INTRINSIC_WITH_CC(s390_vfaehs);
20795 INTRINSIC_WITH_CC(s390_vfaefs);
20796
20797 INTRINSIC_WITH_CC(s390_vfaezbs);
20798 INTRINSIC_WITH_CC(s390_vfaezhs);
20799 INTRINSIC_WITH_CC(s390_vfaezfs);
20800
20801 INTRINSIC_WITH_CC(s390_vfeebs);
20802 INTRINSIC_WITH_CC(s390_vfeehs);
20803 INTRINSIC_WITH_CC(s390_vfeefs);
20804
20805 INTRINSIC_WITH_CC(s390_vfeezbs);
20806 INTRINSIC_WITH_CC(s390_vfeezhs);
20807 INTRINSIC_WITH_CC(s390_vfeezfs);
20808
20809 INTRINSIC_WITH_CC(s390_vfenebs);
20810 INTRINSIC_WITH_CC(s390_vfenehs);
20811 INTRINSIC_WITH_CC(s390_vfenefs);
20812
20813 INTRINSIC_WITH_CC(s390_vfenezbs);
20814 INTRINSIC_WITH_CC(s390_vfenezhs);
20815 INTRINSIC_WITH_CC(s390_vfenezfs);
20816
20817 INTRINSIC_WITH_CC(s390_vistrbs);
20818 INTRINSIC_WITH_CC(s390_vistrhs);
20819 INTRINSIC_WITH_CC(s390_vistrfs);
20820
20821 INTRINSIC_WITH_CC(s390_vstrcbs);
20822 INTRINSIC_WITH_CC(s390_vstrchs);
20823 INTRINSIC_WITH_CC(s390_vstrcfs);
20824
20825 INTRINSIC_WITH_CC(s390_vstrczbs);
20826 INTRINSIC_WITH_CC(s390_vstrczhs);
20827 INTRINSIC_WITH_CC(s390_vstrczfs);
20828
20829 INTRINSIC_WITH_CC(s390_vfcesbs);
20830 INTRINSIC_WITH_CC(s390_vfcedbs);
20831 INTRINSIC_WITH_CC(s390_vfchsbs);
20832 INTRINSIC_WITH_CC(s390_vfchdbs);
20833 INTRINSIC_WITH_CC(s390_vfchesbs);
20834 INTRINSIC_WITH_CC(s390_vfchedbs);
20835
20836 INTRINSIC_WITH_CC(s390_vftcisb);
20837 INTRINSIC_WITH_CC(s390_vftcidb);
20838
20839 INTRINSIC_WITH_CC(s390_vstrsb);
20840 INTRINSIC_WITH_CC(s390_vstrsh);
20841 INTRINSIC_WITH_CC(s390_vstrsf);
20842
20843 INTRINSIC_WITH_CC(s390_vstrszb);
20844 INTRINSIC_WITH_CC(s390_vstrszh);
20845 INTRINSIC_WITH_CC(s390_vstrszf);
20846
20847#undef INTRINSIC_WITH_CC
20848
20849 default:
20850 return nullptr;
20851 }
20852}
20853
20854namespace {
20855// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
20856struct NVPTXMmaLdstInfo {
20857 unsigned NumResults; // Number of elements to load/store
20858 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
20859 unsigned IID_col;
20860 unsigned IID_row;
20861};
20862
20863#define MMA_INTR(geom_op_type, layout) \
20864 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
20865#define MMA_LDST(n, geom_op_type) \
20866 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
20867
20868static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
20869 switch (BuiltinID) {
20870 // FP MMA loads
20871 case NVPTX::BI__hmma_m16n16k16_ld_a:
20872 return MMA_LDST(8, m16n16k16_load_a_f16);
20873 case NVPTX::BI__hmma_m16n16k16_ld_b:
20874 return MMA_LDST(8, m16n16k16_load_b_f16);
20875 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20876 return MMA_LDST(4, m16n16k16_load_c_f16);
20877 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20878 return MMA_LDST(8, m16n16k16_load_c_f32);
20879 case NVPTX::BI__hmma_m32n8k16_ld_a:
20880 return MMA_LDST(8, m32n8k16_load_a_f16);
20881 case NVPTX::BI__hmma_m32n8k16_ld_b:
20882 return MMA_LDST(8, m32n8k16_load_b_f16);
20883 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20884 return MMA_LDST(4, m32n8k16_load_c_f16);
20885 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20886 return MMA_LDST(8, m32n8k16_load_c_f32);
20887 case NVPTX::BI__hmma_m8n32k16_ld_a:
20888 return MMA_LDST(8, m8n32k16_load_a_f16);
20889 case NVPTX::BI__hmma_m8n32k16_ld_b:
20890 return MMA_LDST(8, m8n32k16_load_b_f16);
20891 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20892 return MMA_LDST(4, m8n32k16_load_c_f16);
20893 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20894 return MMA_LDST(8, m8n32k16_load_c_f32);
20895
20896 // Integer MMA loads
20897 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20898 return MMA_LDST(2, m16n16k16_load_a_s8);
20899 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20900 return MMA_LDST(2, m16n16k16_load_a_u8);
20901 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20902 return MMA_LDST(2, m16n16k16_load_b_s8);
20903 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20904 return MMA_LDST(2, m16n16k16_load_b_u8);
20905 case NVPTX::BI__imma_m16n16k16_ld_c:
20906 return MMA_LDST(8, m16n16k16_load_c_s32);
20907 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20908 return MMA_LDST(4, m32n8k16_load_a_s8);
20909 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20910 return MMA_LDST(4, m32n8k16_load_a_u8);
20911 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20912 return MMA_LDST(1, m32n8k16_load_b_s8);
20913 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20914 return MMA_LDST(1, m32n8k16_load_b_u8);
20915 case NVPTX::BI__imma_m32n8k16_ld_c:
20916 return MMA_LDST(8, m32n8k16_load_c_s32);
20917 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20918 return MMA_LDST(1, m8n32k16_load_a_s8);
20919 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20920 return MMA_LDST(1, m8n32k16_load_a_u8);
20921 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20922 return MMA_LDST(4, m8n32k16_load_b_s8);
20923 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20924 return MMA_LDST(4, m8n32k16_load_b_u8);
20925 case NVPTX::BI__imma_m8n32k16_ld_c:
20926 return MMA_LDST(8, m8n32k16_load_c_s32);
20927
20928 // Sub-integer MMA loads.
20929 // Only row/col layout is supported by A/B fragments.
20930 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20931 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
20932 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20933 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
20934 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20935 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
20936 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20937 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
20938 case NVPTX::BI__imma_m8n8k32_ld_c:
20939 return MMA_LDST(2, m8n8k32_load_c_s32);
20940 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20941 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
20942 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20943 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
20944 case NVPTX::BI__bmma_m8n8k128_ld_c:
20945 return MMA_LDST(2, m8n8k128_load_c_s32);
20946
20947 // Double MMA loads
20948 case NVPTX::BI__dmma_m8n8k4_ld_a:
20949 return MMA_LDST(1, m8n8k4_load_a_f64);
20950 case NVPTX::BI__dmma_m8n8k4_ld_b:
20951 return MMA_LDST(1, m8n8k4_load_b_f64);
20952 case NVPTX::BI__dmma_m8n8k4_ld_c:
20953 return MMA_LDST(2, m8n8k4_load_c_f64);
20954
20955 // Alternate float MMA loads
20956 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20957 return MMA_LDST(4, m16n16k16_load_a_bf16);
20958 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20959 return MMA_LDST(4, m16n16k16_load_b_bf16);
20960 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20961 return MMA_LDST(2, m8n32k16_load_a_bf16);
20962 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20963 return MMA_LDST(8, m8n32k16_load_b_bf16);
20964 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20965 return MMA_LDST(8, m32n8k16_load_a_bf16);
20966 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20967 return MMA_LDST(2, m32n8k16_load_b_bf16);
20968 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20969 return MMA_LDST(4, m16n16k8_load_a_tf32);
20970 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20971 return MMA_LDST(4, m16n16k8_load_b_tf32);
20972 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
20973 return MMA_LDST(8, m16n16k8_load_c_f32);
20974
20975 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
20976 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
20977 // use fragment C for both loads and stores.
20978 // FP MMA stores.
20979 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20980 return MMA_LDST(4, m16n16k16_store_d_f16);
20981 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20982 return MMA_LDST(8, m16n16k16_store_d_f32);
20983 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20984 return MMA_LDST(4, m32n8k16_store_d_f16);
20985 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20986 return MMA_LDST(8, m32n8k16_store_d_f32);
20987 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20988 return MMA_LDST(4, m8n32k16_store_d_f16);
20989 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20990 return MMA_LDST(8, m8n32k16_store_d_f32);
20991
20992 // Integer and sub-integer MMA stores.
20993 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
20994 // name, integer loads/stores use LLVM's i32.
20995 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20996 return MMA_LDST(8, m16n16k16_store_d_s32);
20997 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20998 return MMA_LDST(8, m32n8k16_store_d_s32);
20999 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21000 return MMA_LDST(8, m8n32k16_store_d_s32);
21001 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21002 return MMA_LDST(2, m8n8k32_store_d_s32);
21003 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21004 return MMA_LDST(2, m8n8k128_store_d_s32);
21005
21006 // Double MMA store
21007 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21008 return MMA_LDST(2, m8n8k4_store_d_f64);
21009
21010 // Alternate float MMA store
21011 case NVPTX::BI__mma_m16n16k8_st_c_f32:
21012 return MMA_LDST(8, m16n16k8_store_d_f32);
21013
21014 default:
21015 llvm_unreachable("Unknown MMA builtin");
21016 }
21017}
21018#undef MMA_LDST
21019#undef MMA_INTR
21020
21021
21022struct NVPTXMmaInfo {
21023 unsigned NumEltsA;
21024 unsigned NumEltsB;
21025 unsigned NumEltsC;
21026 unsigned NumEltsD;
21027
21028 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
21029 // over 'col' for layout. The index of non-satf variants is expected to match
21030 // the undocumented layout constants used by CUDA's mma.hpp.
21031 std::array<unsigned, 8> Variants;
21032
21033 unsigned getMMAIntrinsic(int Layout, bool Satf) {
21034 unsigned Index = Layout + 4 * Satf;
21035 if (Index >= Variants.size())
21036 return 0;
21037 return Variants[Index];
21038 }
21039};
21040
21041 // Returns an intrinsic that matches Layout and Satf for valid combinations of
21042 // Layout and Satf, 0 otherwise.
21043static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
21044 // clang-format off
21045#define MMA_VARIANTS(geom, type) \
21046 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
21047 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21048 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
21049 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
21050#define MMA_SATF_VARIANTS(geom, type) \
21051 MMA_VARIANTS(geom, type), \
21052 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
21053 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21054 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
21055 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
21056// Sub-integer MMA only supports row.col layout.
21057#define MMA_VARIANTS_I4(geom, type) \
21058 0, \
21059 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
21060 0, \
21061 0, \
21062 0, \
21063 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
21064 0, \
21065 0
21066// b1 MMA does not support .satfinite.
21067#define MMA_VARIANTS_B1_XOR(geom, type) \
21068 0, \
21069 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
21070 0, \
21071 0, \
21072 0, \
21073 0, \
21074 0, \
21075 0
21076#define MMA_VARIANTS_B1_AND(geom, type) \
21077 0, \
21078 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
21079 0, \
21080 0, \
21081 0, \
21082 0, \
21083 0, \
21084 0
21085 // clang-format on
21086 switch (BuiltinID) {
21087 // FP MMA
21088 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
21089 // NumEltsN of return value are ordered as A,B,C,D.
21090 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21091 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
21092 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21093 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
21094 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21095 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
21096 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21097 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
21098 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21099 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
21100 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21101 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
21102 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21103 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
21104 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21105 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
21106 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21107 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
21108 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21109 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
21110 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21111 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
21112 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21113 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
21114
21115 // Integer MMA
21116 case NVPTX::BI__imma_m16n16k16_mma_s8:
21117 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
21118 case NVPTX::BI__imma_m16n16k16_mma_u8:
21119 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
21120 case NVPTX::BI__imma_m32n8k16_mma_s8:
21121 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
21122 case NVPTX::BI__imma_m32n8k16_mma_u8:
21123 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
21124 case NVPTX::BI__imma_m8n32k16_mma_s8:
21125 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
21126 case NVPTX::BI__imma_m8n32k16_mma_u8:
21127 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
21128
21129 // Sub-integer MMA
21130 case NVPTX::BI__imma_m8n8k32_mma_s4:
21131 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
21132 case NVPTX::BI__imma_m8n8k32_mma_u4:
21133 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
21134 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21135 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
21136 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21137 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
21138
21139 // Double MMA
21140 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21141 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
21142
21143 // Alternate FP MMA
21144 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21145 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
21146 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21147 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
21148 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21149 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
21150 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
21151 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
21152 default:
21153 llvm_unreachable("Unexpected builtin ID.");
21154 }
21155#undef MMA_VARIANTS
21156#undef MMA_SATF_VARIANTS
21157#undef MMA_VARIANTS_I4
21158#undef MMA_VARIANTS_B1_AND
21159#undef MMA_VARIANTS_B1_XOR
21160}
21161
21162static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
21163 const CallExpr *E) {
21164 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21165 QualType ArgType = E->getArg(0)->getType();
21167 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21168 return CGF.Builder.CreateCall(
21169 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21170 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
21171}
21172
21173static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
21174 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21175 QualType ArgType = E->getArg(0)->getType();
21177 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
21178
21179 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
21180 auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
21181 auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
21182 MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
21183 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
21184
21185 return LD;
21186}
21187
21188static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
21189 const CallExpr *E) {
21190 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
21191 llvm::Type *ElemTy =
21192 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21193 return CGF.Builder.CreateCall(
21194 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
21195 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
21196}
21197
21198static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
21199 CodeGenFunction &CGF, const CallExpr *E,
21200 int SrcSize) {
21201 return E->getNumArgs() == 3
21202 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
21203 {CGF.EmitScalarExpr(E->getArg(0)),
21204 CGF.EmitScalarExpr(E->getArg(1)),
21205 CGF.EmitScalarExpr(E->getArg(2))})
21206 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
21207 {CGF.EmitScalarExpr(E->getArg(0)),
21208 CGF.EmitScalarExpr(E->getArg(1))});
21209}
21210
21211static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
21212 const CallExpr *E, CodeGenFunction &CGF) {
21213 auto &C = CGF.CGM.getContext();
21214 if (!(C.getLangOpts().NativeHalfType ||
21215 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
21216 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
21217 " requires native half type support.");
21218 return nullptr;
21219 }
21220
21221 if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
21222 return MakeLdg(CGF, E);
21223
21224 if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
21225 return MakeLdu(IntrinsicID, CGF, E);
21226
21228 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
21229 auto *FTy = F->getFunctionType();
21230 unsigned ICEArguments = 0;
21232 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
21233 assert(Error == ASTContext::GE_None && "Should not codegen an error");
21234 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
21235 assert((ICEArguments & (1 << i)) == 0);
21236 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
21237 auto *PTy = FTy->getParamType(i);
21238 if (PTy != ArgValue->getType())
21239 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
21240 Args.push_back(ArgValue);
21241 }
21242
21243 return CGF.Builder.CreateCall(F, Args);
21244}
21245} // namespace
21246
21248 const CallExpr *E) {
21249 switch (BuiltinID) {
21250 case NVPTX::BI__nvvm_atom_add_gen_i:
21251 case NVPTX::BI__nvvm_atom_add_gen_l:
21252 case NVPTX::BI__nvvm_atom_add_gen_ll:
21253 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
21254
21255 case NVPTX::BI__nvvm_atom_sub_gen_i:
21256 case NVPTX::BI__nvvm_atom_sub_gen_l:
21257 case NVPTX::BI__nvvm_atom_sub_gen_ll:
21258 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
21259
21260 case NVPTX::BI__nvvm_atom_and_gen_i:
21261 case NVPTX::BI__nvvm_atom_and_gen_l:
21262 case NVPTX::BI__nvvm_atom_and_gen_ll:
21263 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
21264
21265 case NVPTX::BI__nvvm_atom_or_gen_i:
21266 case NVPTX::BI__nvvm_atom_or_gen_l:
21267 case NVPTX::BI__nvvm_atom_or_gen_ll:
21268 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
21269
21270 case NVPTX::BI__nvvm_atom_xor_gen_i:
21271 case NVPTX::BI__nvvm_atom_xor_gen_l:
21272 case NVPTX::BI__nvvm_atom_xor_gen_ll:
21273 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
21274
21275 case NVPTX::BI__nvvm_atom_xchg_gen_i:
21276 case NVPTX::BI__nvvm_atom_xchg_gen_l:
21277 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
21278 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
21279
21280 case NVPTX::BI__nvvm_atom_max_gen_i:
21281 case NVPTX::BI__nvvm_atom_max_gen_l:
21282 case NVPTX::BI__nvvm_atom_max_gen_ll:
21283 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
21284
21285 case NVPTX::BI__nvvm_atom_max_gen_ui:
21286 case NVPTX::BI__nvvm_atom_max_gen_ul:
21287 case NVPTX::BI__nvvm_atom_max_gen_ull:
21288 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
21289
21290 case NVPTX::BI__nvvm_atom_min_gen_i:
21291 case NVPTX::BI__nvvm_atom_min_gen_l:
21292 case NVPTX::BI__nvvm_atom_min_gen_ll:
21293 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
21294
21295 case NVPTX::BI__nvvm_atom_min_gen_ui:
21296 case NVPTX::BI__nvvm_atom_min_gen_ul:
21297 case NVPTX::BI__nvvm_atom_min_gen_ull:
21298 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
21299
21300 case NVPTX::BI__nvvm_atom_cas_gen_us:
21301 case NVPTX::BI__nvvm_atom_cas_gen_i:
21302 case NVPTX::BI__nvvm_atom_cas_gen_l:
21303 case NVPTX::BI__nvvm_atom_cas_gen_ll:
21304 // __nvvm_atom_cas_gen_* should return the old value rather than the
21305 // success flag.
21306 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
21307
21308 case NVPTX::BI__nvvm_atom_add_gen_f:
21309 case NVPTX::BI__nvvm_atom_add_gen_d: {
21310 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
21311 Value *Val = EmitScalarExpr(E->getArg(1));
21312
21313 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
21314 AtomicOrdering::SequentiallyConsistent);
21315 }
21316
21317 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
21318 Value *Ptr = EmitScalarExpr(E->getArg(0));
21319 Value *Val = EmitScalarExpr(E->getArg(1));
21320 Function *FnALI32 =
21321 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
21322 return Builder.CreateCall(FnALI32, {Ptr, Val});
21323 }
21324
21325 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
21326 Value *Ptr = EmitScalarExpr(E->getArg(0));
21327 Value *Val = EmitScalarExpr(E->getArg(1));
21328 Function *FnALD32 =
21329 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
21330 return Builder.CreateCall(FnALD32, {Ptr, Val});
21331 }
21332
21333 case NVPTX::BI__nvvm_ldg_c:
21334 case NVPTX::BI__nvvm_ldg_sc:
21335 case NVPTX::BI__nvvm_ldg_c2:
21336 case NVPTX::BI__nvvm_ldg_sc2:
21337 case NVPTX::BI__nvvm_ldg_c4:
21338 case NVPTX::BI__nvvm_ldg_sc4:
21339 case NVPTX::BI__nvvm_ldg_s:
21340 case NVPTX::BI__nvvm_ldg_s2:
21341 case NVPTX::BI__nvvm_ldg_s4:
21342 case NVPTX::BI__nvvm_ldg_i:
21343 case NVPTX::BI__nvvm_ldg_i2:
21344 case NVPTX::BI__nvvm_ldg_i4:
21345 case NVPTX::BI__nvvm_ldg_l:
21346 case NVPTX::BI__nvvm_ldg_l2:
21347 case NVPTX::BI__nvvm_ldg_ll:
21348 case NVPTX::BI__nvvm_ldg_ll2:
21349 case NVPTX::BI__nvvm_ldg_uc:
21350 case NVPTX::BI__nvvm_ldg_uc2:
21351 case NVPTX::BI__nvvm_ldg_uc4:
21352 case NVPTX::BI__nvvm_ldg_us:
21353 case NVPTX::BI__nvvm_ldg_us2:
21354 case NVPTX::BI__nvvm_ldg_us4:
21355 case NVPTX::BI__nvvm_ldg_ui:
21356 case NVPTX::BI__nvvm_ldg_ui2:
21357 case NVPTX::BI__nvvm_ldg_ui4:
21358 case NVPTX::BI__nvvm_ldg_ul:
21359 case NVPTX::BI__nvvm_ldg_ul2:
21360 case NVPTX::BI__nvvm_ldg_ull:
21361 case NVPTX::BI__nvvm_ldg_ull2:
21362 case NVPTX::BI__nvvm_ldg_f:
21363 case NVPTX::BI__nvvm_ldg_f2:
21364 case NVPTX::BI__nvvm_ldg_f4:
21365 case NVPTX::BI__nvvm_ldg_d:
21366 case NVPTX::BI__nvvm_ldg_d2:
21367 // PTX Interoperability section 2.2: "For a vector with an even number of
21368 // elements, its alignment is set to number of elements times the alignment
21369 // of its member: n*alignof(t)."
21370 return MakeLdg(*this, E);
21371
21372 case NVPTX::BI__nvvm_ldu_c:
21373 case NVPTX::BI__nvvm_ldu_sc:
21374 case NVPTX::BI__nvvm_ldu_c2:
21375 case NVPTX::BI__nvvm_ldu_sc2:
21376 case NVPTX::BI__nvvm_ldu_c4:
21377 case NVPTX::BI__nvvm_ldu_sc4:
21378 case NVPTX::BI__nvvm_ldu_s:
21379 case NVPTX::BI__nvvm_ldu_s2:
21380 case NVPTX::BI__nvvm_ldu_s4:
21381 case NVPTX::BI__nvvm_ldu_i:
21382 case NVPTX::BI__nvvm_ldu_i2:
21383 case NVPTX::BI__nvvm_ldu_i4:
21384 case NVPTX::BI__nvvm_ldu_l:
21385 case NVPTX::BI__nvvm_ldu_l2:
21386 case NVPTX::BI__nvvm_ldu_ll:
21387 case NVPTX::BI__nvvm_ldu_ll2:
21388 case NVPTX::BI__nvvm_ldu_uc:
21389 case NVPTX::BI__nvvm_ldu_uc2:
21390 case NVPTX::BI__nvvm_ldu_uc4:
21391 case NVPTX::BI__nvvm_ldu_us:
21392 case NVPTX::BI__nvvm_ldu_us2:
21393 case NVPTX::BI__nvvm_ldu_us4:
21394 case NVPTX::BI__nvvm_ldu_ui:
21395 case NVPTX::BI__nvvm_ldu_ui2:
21396 case NVPTX::BI__nvvm_ldu_ui4:
21397 case NVPTX::BI__nvvm_ldu_ul:
21398 case NVPTX::BI__nvvm_ldu_ul2:
21399 case NVPTX::BI__nvvm_ldu_ull:
21400 case NVPTX::BI__nvvm_ldu_ull2:
21401 return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
21402 case NVPTX::BI__nvvm_ldu_f:
21403 case NVPTX::BI__nvvm_ldu_f2:
21404 case NVPTX::BI__nvvm_ldu_f4:
21405 case NVPTX::BI__nvvm_ldu_d:
21406 case NVPTX::BI__nvvm_ldu_d2:
21407 return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
21408
21409 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
21410 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
21411 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
21412 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
21413 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
21414 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
21415 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
21416 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
21417 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
21418 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
21419 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
21420 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
21421 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
21422 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
21423 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
21424 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
21425 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
21426 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
21427 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
21428 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
21429 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
21430 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
21431 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
21432 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
21433 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
21434 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
21435 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
21436 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
21437 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
21438 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
21439 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
21440 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
21441 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
21442 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
21443 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
21444 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
21445 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
21446 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
21447 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
21448 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
21449 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
21450 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
21451 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
21452 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
21453 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
21454 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
21455 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
21456 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
21457 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
21458 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
21459 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
21460 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
21461 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
21462 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
21463 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
21464 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
21465 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
21466 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
21467 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
21468 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
21469 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
21470 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
21471 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
21472 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
21473 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
21474 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
21475 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
21476 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
21477 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
21478 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
21479 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
21480 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
21481 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
21482 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
21483 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
21484 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
21485 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
21486 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
21487 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
21488 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
21489 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
21490 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
21491 case NVPTX::BI__nvvm_atom_cta_cas_gen_us:
21492 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
21493 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
21494 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
21495 Value *Ptr = EmitScalarExpr(E->getArg(0));
21496 llvm::Type *ElemTy =
21497 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21498 return Builder.CreateCall(
21500 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
21501 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21502 }
21503 case NVPTX::BI__nvvm_atom_sys_cas_gen_us:
21504 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
21505 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
21506 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
21507 Value *Ptr = EmitScalarExpr(E->getArg(0));
21508 llvm::Type *ElemTy =
21509 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
21510 return Builder.CreateCall(
21512 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
21513 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
21514 }
21515 case NVPTX::BI__nvvm_match_all_sync_i32p:
21516 case NVPTX::BI__nvvm_match_all_sync_i64p: {
21517 Value *Mask = EmitScalarExpr(E->getArg(0));
21518 Value *Val = EmitScalarExpr(E->getArg(1));
21519 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
21520 Value *ResultPair = Builder.CreateCall(
21521 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
21522 ? Intrinsic::nvvm_match_all_sync_i32p
21523 : Intrinsic::nvvm_match_all_sync_i64p),
21524 {Mask, Val});
21525 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
21526 PredOutPtr.getElementType());
21527 Builder.CreateStore(Pred, PredOutPtr);
21528 return Builder.CreateExtractValue(ResultPair, 0);
21529 }
21530
21531 // FP MMA loads
21532 case NVPTX::BI__hmma_m16n16k16_ld_a:
21533 case NVPTX::BI__hmma_m16n16k16_ld_b:
21534 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
21535 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
21536 case NVPTX::BI__hmma_m32n8k16_ld_a:
21537 case NVPTX::BI__hmma_m32n8k16_ld_b:
21538 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
21539 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
21540 case NVPTX::BI__hmma_m8n32k16_ld_a:
21541 case NVPTX::BI__hmma_m8n32k16_ld_b:
21542 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
21543 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
21544 // Integer MMA loads.
21545 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
21546 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
21547 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
21548 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
21549 case NVPTX::BI__imma_m16n16k16_ld_c:
21550 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
21551 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
21552 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
21553 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
21554 case NVPTX::BI__imma_m32n8k16_ld_c:
21555 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
21556 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
21557 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
21558 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
21559 case NVPTX::BI__imma_m8n32k16_ld_c:
21560 // Sub-integer MMA loads.
21561 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
21562 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
21563 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
21564 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
21565 case NVPTX::BI__imma_m8n8k32_ld_c:
21566 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
21567 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
21568 case NVPTX::BI__bmma_m8n8k128_ld_c:
21569 // Double MMA loads.
21570 case NVPTX::BI__dmma_m8n8k4_ld_a:
21571 case NVPTX::BI__dmma_m8n8k4_ld_b:
21572 case NVPTX::BI__dmma_m8n8k4_ld_c:
21573 // Alternate float MMA loads.
21574 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
21575 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
21576 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
21577 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
21578 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
21579 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
21580 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
21581 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
21582 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
21583 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21584 Value *Src = EmitScalarExpr(E->getArg(1));
21585 Value *Ldm = EmitScalarExpr(E->getArg(2));
21586 std::optional<llvm::APSInt> isColMajorArg =
21587 E->getArg(3)->getIntegerConstantExpr(getContext());
21588 if (!isColMajorArg)
21589 return nullptr;
21590 bool isColMajor = isColMajorArg->getSExtValue();
21591 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21592 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21593 if (IID == 0)
21594 return nullptr;
21595
21596 Value *Result =
21597 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
21598
21599 // Save returned values.
21600 assert(II.NumResults);
21601 if (II.NumResults == 1) {
21604 } else {
21605 for (unsigned i = 0; i < II.NumResults; ++i) {
21607 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
21608 Dst.getElementType()),
21610 llvm::ConstantInt::get(IntTy, i)),
21612 }
21613 }
21614 return Result;
21615 }
21616
21617 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
21618 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
21619 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
21620 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
21621 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
21622 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
21623 case NVPTX::BI__imma_m16n16k16_st_c_i32:
21624 case NVPTX::BI__imma_m32n8k16_st_c_i32:
21625 case NVPTX::BI__imma_m8n32k16_st_c_i32:
21626 case NVPTX::BI__imma_m8n8k32_st_c_i32:
21627 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
21628 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
21629 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
21630 Value *Dst = EmitScalarExpr(E->getArg(0));
21631 Address Src = EmitPointerWithAlignment(E->getArg(1));
21632 Value *Ldm = EmitScalarExpr(E->getArg(2));
21633 std::optional<llvm::APSInt> isColMajorArg =
21634 E->getArg(3)->getIntegerConstantExpr(getContext());
21635 if (!isColMajorArg)
21636 return nullptr;
21637 bool isColMajor = isColMajorArg->getSExtValue();
21638 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
21639 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
21640 if (IID == 0)
21641 return nullptr;
21642 Function *Intrinsic =
21643 CGM.getIntrinsic(IID, Dst->getType());
21644 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
21645 SmallVector<Value *, 10> Values = {Dst};
21646 for (unsigned i = 0; i < II.NumResults; ++i) {
21648 Src.getElementType(),
21650 llvm::ConstantInt::get(IntTy, i)),
21652 Values.push_back(Builder.CreateBitCast(V, ParamType));
21653 }
21654 Values.push_back(Ldm);
21655 Value *Result = Builder.CreateCall(Intrinsic, Values);
21656 return Result;
21657 }
21658
21659 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
21660 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
21661 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
21662 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
21663 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
21664 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
21665 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
21666 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
21667 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
21668 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
21669 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
21670 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
21671 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
21672 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
21673 case NVPTX::BI__imma_m16n16k16_mma_s8:
21674 case NVPTX::BI__imma_m16n16k16_mma_u8:
21675 case NVPTX::BI__imma_m32n8k16_mma_s8:
21676 case NVPTX::BI__imma_m32n8k16_mma_u8:
21677 case NVPTX::BI__imma_m8n32k16_mma_s8:
21678 case NVPTX::BI__imma_m8n32k16_mma_u8:
21679 case NVPTX::BI__imma_m8n8k32_mma_s4:
21680 case NVPTX::BI__imma_m8n8k32_mma_u4:
21681 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
21682 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
21683 case NVPTX::BI__dmma_m8n8k4_mma_f64:
21684 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
21685 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
21686 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
21687 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
21688 Address Dst = EmitPointerWithAlignment(E->getArg(0));
21689 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
21690 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
21691 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
21692 std::optional<llvm::APSInt> LayoutArg =
21693 E->getArg(4)->getIntegerConstantExpr(getContext());
21694 if (!LayoutArg)
21695 return nullptr;
21696 int Layout = LayoutArg->getSExtValue();
21697 if (Layout < 0 || Layout > 3)
21698 return nullptr;
21699 llvm::APSInt SatfArg;
21700 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
21701 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
21702 SatfArg = 0; // .b1 does not have satf argument.
21703 else if (std::optional<llvm::APSInt> OptSatfArg =
21704 E->getArg(5)->getIntegerConstantExpr(getContext()))
21705 SatfArg = *OptSatfArg;
21706 else
21707 return nullptr;
21708 bool Satf = SatfArg.getSExtValue();
21709 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
21710 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
21711 if (IID == 0) // Unsupported combination of Layout/Satf.
21712 return nullptr;
21713
21715 Function *Intrinsic = CGM.getIntrinsic(IID);
21716 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
21717 // Load A
21718 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
21720 SrcA.getElementType(),
21721 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
21722 llvm::ConstantInt::get(IntTy, i)),
21724 Values.push_back(Builder.CreateBitCast(V, AType));
21725 }
21726 // Load B
21727 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
21728 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
21730 SrcB.getElementType(),
21731 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
21732 llvm::ConstantInt::get(IntTy, i)),
21734 Values.push_back(Builder.CreateBitCast(V, BType));
21735 }
21736 // Load C
21737 llvm::Type *CType =
21738 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
21739 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
21741 SrcC.getElementType(),
21742 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
21743 llvm::ConstantInt::get(IntTy, i)),
21745 Values.push_back(Builder.CreateBitCast(V, CType));
21746 }
21747 Value *Result = Builder.CreateCall(Intrinsic, Values);
21748 llvm::Type *DType = Dst.getElementType();
21749 for (unsigned i = 0; i < MI.NumEltsD; ++i)
21751 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
21753 llvm::ConstantInt::get(IntTy, i)),
21755 return Result;
21756 }
21757 // The following builtins require half type support
21758 case NVPTX::BI__nvvm_ex2_approx_f16:
21759 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
21760 case NVPTX::BI__nvvm_ex2_approx_f16x2:
21761 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
21762 case NVPTX::BI__nvvm_ff2f16x2_rn:
21763 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
21764 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
21765 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
21766 case NVPTX::BI__nvvm_ff2f16x2_rz:
21767 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
21768 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
21769 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
21770 case NVPTX::BI__nvvm_fma_rn_f16:
21771 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
21772 case NVPTX::BI__nvvm_fma_rn_f16x2:
21773 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
21774 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
21775 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
21776 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
21777 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
21778 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
21779 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
21780 *this);
21781 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
21782 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
21783 *this);
21784 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
21785 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
21786 *this);
21787 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
21788 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
21789 *this);
21790 case NVPTX::BI__nvvm_fma_rn_relu_f16:
21791 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
21792 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
21793 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
21794 case NVPTX::BI__nvvm_fma_rn_sat_f16:
21795 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
21796 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
21797 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
21798 case NVPTX::BI__nvvm_fmax_f16:
21799 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
21800 case NVPTX::BI__nvvm_fmax_f16x2:
21801 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
21802 case NVPTX::BI__nvvm_fmax_ftz_f16:
21803 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
21804 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
21805 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
21806 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
21807 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
21808 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
21809 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
21810 *this);
21811 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
21812 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
21813 E, *this);
21814 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
21815 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
21816 BuiltinID, E, *this);
21817 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
21818 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
21819 *this);
21820 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
21821 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
21822 E, *this);
21823 case NVPTX::BI__nvvm_fmax_nan_f16:
21824 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
21825 case NVPTX::BI__nvvm_fmax_nan_f16x2:
21826 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
21827 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
21828 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
21829 *this);
21830 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
21831 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
21832 E, *this);
21833 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
21834 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
21835 *this);
21836 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
21837 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
21838 *this);
21839 case NVPTX::BI__nvvm_fmin_f16:
21840 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
21841 case NVPTX::BI__nvvm_fmin_f16x2:
21842 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
21843 case NVPTX::BI__nvvm_fmin_ftz_f16:
21844 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
21845 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
21846 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
21847 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
21848 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
21849 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
21850 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
21851 *this);
21852 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
21853 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
21854 E, *this);
21855 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
21856 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
21857 BuiltinID, E, *this);
21858 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
21859 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
21860 *this);
21861 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
21862 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
21863 E, *this);
21864 case NVPTX::BI__nvvm_fmin_nan_f16:
21865 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
21866 case NVPTX::BI__nvvm_fmin_nan_f16x2:
21867 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
21868 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
21869 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
21870 *this);
21871 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
21872 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
21873 E, *this);
21874 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
21875 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
21876 *this);
21877 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
21878 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
21879 *this);
21880 case NVPTX::BI__nvvm_ldg_h:
21881 case NVPTX::BI__nvvm_ldg_h2:
21882 return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
21883 case NVPTX::BI__nvvm_ldu_h:
21884 case NVPTX::BI__nvvm_ldu_h2:
21885 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
21886 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
21887 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
21888 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
21889 4);
21890 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
21891 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
21892 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
21893 8);
21894 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
21895 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
21896 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
21897 16);
21898 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
21899 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
21900 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
21901 16);
21902 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
21903 return Builder.CreateCall(
21904 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
21905 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
21906 return Builder.CreateCall(
21907 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
21908 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
21909 return Builder.CreateCall(
21910 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
21911 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
21912 return Builder.CreateCall(
21913 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
21914 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
21915 return Builder.CreateCall(
21916 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
21917 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
21918 return Builder.CreateCall(
21919 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
21920 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
21921 return Builder.CreateCall(
21922 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
21923 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
21924 return Builder.CreateCall(
21925 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
21926 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
21927 return Builder.CreateCall(
21928 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
21929 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
21930 return Builder.CreateCall(
21931 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
21932 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
21933 return Builder.CreateCall(
21934 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
21935 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
21936 return Builder.CreateCall(
21937 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
21938 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
21939 return Builder.CreateCall(
21940 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
21941 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
21942 return Builder.CreateCall(
21943 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
21944 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
21945 return Builder.CreateCall(
21946 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
21947 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
21948 return Builder.CreateCall(
21949 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
21950 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
21951 return Builder.CreateCall(
21952 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
21953 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
21954 return Builder.CreateCall(
21955 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
21956 case NVPTX::BI__nvvm_is_explicit_cluster:
21957 return Builder.CreateCall(
21958 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
21959 case NVPTX::BI__nvvm_isspacep_shared_cluster:
21960 return Builder.CreateCall(
21961 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
21962 EmitScalarExpr(E->getArg(0)));
21963 case NVPTX::BI__nvvm_mapa:
21964 return Builder.CreateCall(
21965 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
21966 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21967 case NVPTX::BI__nvvm_mapa_shared_cluster:
21968 return Builder.CreateCall(
21969 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
21970 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21971 case NVPTX::BI__nvvm_getctarank:
21972 return Builder.CreateCall(
21973 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
21974 EmitScalarExpr(E->getArg(0)));
21975 case NVPTX::BI__nvvm_getctarank_shared_cluster:
21976 return Builder.CreateCall(
21977 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
21978 EmitScalarExpr(E->getArg(0)));
21979 case NVPTX::BI__nvvm_barrier_cluster_arrive:
21980 return Builder.CreateCall(
21981 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
21982 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
21983 return Builder.CreateCall(
21984 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
21985 case NVPTX::BI__nvvm_barrier_cluster_wait:
21986 return Builder.CreateCall(
21987 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
21988 case NVPTX::BI__nvvm_fence_sc_cluster:
21989 return Builder.CreateCall(
21990 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
21991 default:
21992 return nullptr;
21993 }
21994}
21995
21996namespace {
21997struct BuiltinAlignArgs {
21998 llvm::Value *Src = nullptr;
21999 llvm::Type *SrcType = nullptr;
22000 llvm::Value *Alignment = nullptr;
22001 llvm::Value *Mask = nullptr;
22002 llvm::IntegerType *IntType = nullptr;
22003
22004 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
22005 QualType AstType = E->getArg(0)->getType();
22006 if (AstType->isArrayType())
22007 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
22008 else
22009 Src = CGF.EmitScalarExpr(E->getArg(0));
22010 SrcType = Src->getType();
22011 if (SrcType->isPointerTy()) {
22012 IntType = IntegerType::get(
22013 CGF.getLLVMContext(),
22014 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
22015 } else {
22016 assert(SrcType->isIntegerTy());
22017 IntType = cast<llvm::IntegerType>(SrcType);
22018 }
22019 Alignment = CGF.EmitScalarExpr(E->getArg(1));
22020 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
22021 auto *One = llvm::ConstantInt::get(IntType, 1);
22022 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
22023 }
22024};
22025} // namespace
22026
22027/// Generate (x & (y-1)) == 0.
22029 BuiltinAlignArgs Args(E, *this);
22030 llvm::Value *SrcAddress = Args.Src;
22031 if (Args.SrcType->isPointerTy())
22032 SrcAddress =
22033 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
22034 return RValue::get(Builder.CreateICmpEQ(
22035 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
22036 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
22037}
22038
22039/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
22040/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
22041/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
22043 BuiltinAlignArgs Args(E, *this);
22044 llvm::Value *SrcForMask = Args.Src;
22045 if (AlignUp) {
22046 // When aligning up we have to first add the mask to ensure we go over the
22047 // next alignment value and then align down to the next valid multiple.
22048 // By adding the mask, we ensure that align_up on an already aligned
22049 // value will not change the value.
22050 if (Args.Src->getType()->isPointerTy()) {
22051 if (getLangOpts().isSignedOverflowDefined())
22052 SrcForMask =
22053 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
22054 else
22055 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
22056 /*SignedIndices=*/true,
22057 /*isSubtraction=*/false,
22058 E->getExprLoc(), "over_boundary");
22059 } else {
22060 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
22061 }
22062 }
22063 // Invert the mask to only clear the lower bits.
22064 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
22065 llvm::Value *Result = nullptr;
22066 if (Args.Src->getType()->isPointerTy()) {
22067 Result = Builder.CreateIntrinsic(
22068 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
22069 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
22070 } else {
22071 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
22072 }
22073 assert(Result->getType() == Args.SrcType);
22074 return RValue::get(Result);
22075}
22076
22078 const CallExpr *E) {
22079 switch (BuiltinID) {
22080 case WebAssembly::BI__builtin_wasm_memory_size: {
22081 llvm::Type *ResultType = ConvertType(E->getType());
22082 Value *I = EmitScalarExpr(E->getArg(0));
22083 Function *Callee =
22084 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
22085 return Builder.CreateCall(Callee, I);
22086 }
22087 case WebAssembly::BI__builtin_wasm_memory_grow: {
22088 llvm::Type *ResultType = ConvertType(E->getType());
22089 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
22090 EmitScalarExpr(E->getArg(1))};
22091 Function *Callee =
22092 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
22093 return Builder.CreateCall(Callee, Args);
22094 }
22095 case WebAssembly::BI__builtin_wasm_tls_size: {
22096 llvm::Type *ResultType = ConvertType(E->getType());
22097 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
22098 return Builder.CreateCall(Callee);
22099 }
22100 case WebAssembly::BI__builtin_wasm_tls_align: {
22101 llvm::Type *ResultType = ConvertType(E->getType());
22102 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
22103 return Builder.CreateCall(Callee);
22104 }
22105 case WebAssembly::BI__builtin_wasm_tls_base: {
22106 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
22107 return Builder.CreateCall(Callee);
22108 }
22109 case WebAssembly::BI__builtin_wasm_throw: {
22110 Value *Tag = EmitScalarExpr(E->getArg(0));
22111 Value *Obj = EmitScalarExpr(E->getArg(1));
22112 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
22113 return Builder.CreateCall(Callee, {Tag, Obj});
22114 }
22115 case WebAssembly::BI__builtin_wasm_rethrow: {
22116 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
22117 return Builder.CreateCall(Callee);
22118 }
22119 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
22120 Value *Addr = EmitScalarExpr(E->getArg(0));
22121 Value *Expected = EmitScalarExpr(E->getArg(1));
22122 Value *Timeout = EmitScalarExpr(E->getArg(2));
22123 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
22124 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22125 }
22126 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
22127 Value *Addr = EmitScalarExpr(E->getArg(0));
22128 Value *Expected = EmitScalarExpr(E->getArg(1));
22129 Value *Timeout = EmitScalarExpr(E->getArg(2));
22130 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
22131 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
22132 }
22133 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
22134 Value *Addr = EmitScalarExpr(E->getArg(0));
22135 Value *Count = EmitScalarExpr(E->getArg(1));
22136 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
22137 return Builder.CreateCall(Callee, {Addr, Count});
22138 }
22139 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
22140 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
22141 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
22142 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
22143 Value *Src = EmitScalarExpr(E->getArg(0));
22144 llvm::Type *ResT = ConvertType(E->getType());
22145 Function *Callee =
22146 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
22147 return Builder.CreateCall(Callee, {Src});
22148 }
22149 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
22150 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
22151 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
22152 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
22153 Value *Src = EmitScalarExpr(E->getArg(0));
22154 llvm::Type *ResT = ConvertType(E->getType());
22155 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
22156 {ResT, Src->getType()});
22157 return Builder.CreateCall(Callee, {Src});
22158 }
22159 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
22160 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
22161 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
22162 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
22163 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
22164 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
22165 Value *Src = EmitScalarExpr(E->getArg(0));
22166 llvm::Type *ResT = ConvertType(E->getType());
22167 Function *Callee =
22168 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
22169 return Builder.CreateCall(Callee, {Src});
22170 }
22171 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
22172 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
22173 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
22174 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
22175 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
22176 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
22177 Value *Src = EmitScalarExpr(E->getArg(0));
22178 llvm::Type *ResT = ConvertType(E->getType());
22179 Function *Callee =
22180 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
22181 return Builder.CreateCall(Callee, {Src});
22182 }
22183 case WebAssembly::BI__builtin_wasm_min_f32:
22184 case WebAssembly::BI__builtin_wasm_min_f64:
22185 case WebAssembly::BI__builtin_wasm_min_f16x8:
22186 case WebAssembly::BI__builtin_wasm_min_f32x4:
22187 case WebAssembly::BI__builtin_wasm_min_f64x2: {
22188 Value *LHS = EmitScalarExpr(E->getArg(0));
22189 Value *RHS = EmitScalarExpr(E->getArg(1));
22190 Function *Callee =
22191 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
22192 return Builder.CreateCall(Callee, {LHS, RHS});
22193 }
22194 case WebAssembly::BI__builtin_wasm_max_f32:
22195 case WebAssembly::BI__builtin_wasm_max_f64:
22196 case WebAssembly::BI__builtin_wasm_max_f16x8:
22197 case WebAssembly::BI__builtin_wasm_max_f32x4:
22198 case WebAssembly::BI__builtin_wasm_max_f64x2: {
22199 Value *LHS = EmitScalarExpr(E->getArg(0));
22200 Value *RHS = EmitScalarExpr(E->getArg(1));
22201 Function *Callee =
22202 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
22203 return Builder.CreateCall(Callee, {LHS, RHS});
22204 }
22205 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
22206 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
22207 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
22208 Value *LHS = EmitScalarExpr(E->getArg(0));
22209 Value *RHS = EmitScalarExpr(E->getArg(1));
22210 Function *Callee =
22211 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
22212 return Builder.CreateCall(Callee, {LHS, RHS});
22213 }
22214 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
22215 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
22216 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
22217 Value *LHS = EmitScalarExpr(E->getArg(0));
22218 Value *RHS = EmitScalarExpr(E->getArg(1));
22219 Function *Callee =
22220 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
22221 return Builder.CreateCall(Callee, {LHS, RHS});
22222 }
22223 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22224 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22225 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22226 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22227 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22228 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22229 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22230 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22231 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22232 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22233 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22234 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
22235 unsigned IntNo;
22236 switch (BuiltinID) {
22237 case WebAssembly::BI__builtin_wasm_ceil_f16x8:
22238 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
22239 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
22240 IntNo = Intrinsic::ceil;
22241 break;
22242 case WebAssembly::BI__builtin_wasm_floor_f16x8:
22243 case WebAssembly::BI__builtin_wasm_floor_f32x4:
22244 case WebAssembly::BI__builtin_wasm_floor_f64x2:
22245 IntNo = Intrinsic::floor;
22246 break;
22247 case WebAssembly::BI__builtin_wasm_trunc_f16x8:
22248 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
22249 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
22250 IntNo = Intrinsic::trunc;
22251 break;
22252 case WebAssembly::BI__builtin_wasm_nearest_f16x8:
22253 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
22254 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
22255 IntNo = Intrinsic::nearbyint;
22256 break;
22257 default:
22258 llvm_unreachable("unexpected builtin ID");
22259 }
22260 Value *Value = EmitScalarExpr(E->getArg(0));
22262 return Builder.CreateCall(Callee, Value);
22263 }
22264 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
22265 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
22266 return Builder.CreateCall(Callee);
22267 }
22268 case WebAssembly::BI__builtin_wasm_ref_null_func: {
22269 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
22270 return Builder.CreateCall(Callee);
22271 }
22272 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
22273 Value *Src = EmitScalarExpr(E->getArg(0));
22274 Value *Indices = EmitScalarExpr(E->getArg(1));
22275 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
22276 return Builder.CreateCall(Callee, {Src, Indices});
22277 }
22278 case WebAssembly::BI__builtin_wasm_abs_i8x16:
22279 case WebAssembly::BI__builtin_wasm_abs_i16x8:
22280 case WebAssembly::BI__builtin_wasm_abs_i32x4:
22281 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
22282 Value *Vec = EmitScalarExpr(E->getArg(0));
22283 Value *Neg = Builder.CreateNeg(Vec, "neg");
22284 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
22285 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
22286 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
22287 }
22288 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
22289 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
22290 Value *LHS = EmitScalarExpr(E->getArg(0));
22291 Value *RHS = EmitScalarExpr(E->getArg(1));
22292 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
22293 ConvertType(E->getType()));
22294 return Builder.CreateCall(Callee, {LHS, RHS});
22295 }
22296 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
22297 Value *LHS = EmitScalarExpr(E->getArg(0));
22298 Value *RHS = EmitScalarExpr(E->getArg(1));
22299 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
22300 return Builder.CreateCall(Callee, {LHS, RHS});
22301 }
22302 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22303 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22304 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22305 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
22306 Value *Vec = EmitScalarExpr(E->getArg(0));
22307 unsigned IntNo;
22308 switch (BuiltinID) {
22309 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
22310 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
22311 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
22312 break;
22313 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
22314 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
22315 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
22316 break;
22317 default:
22318 llvm_unreachable("unexpected builtin ID");
22319 }
22320
22322 return Builder.CreateCall(Callee, Vec);
22323 }
22324 case WebAssembly::BI__builtin_wasm_bitselect: {
22325 Value *V1 = EmitScalarExpr(E->getArg(0));
22326 Value *V2 = EmitScalarExpr(E->getArg(1));
22327 Value *C = EmitScalarExpr(E->getArg(2));
22328 Function *Callee =
22329 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
22330 return Builder.CreateCall(Callee, {V1, V2, C});
22331 }
22332 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
22333 Value *LHS = EmitScalarExpr(E->getArg(0));
22334 Value *RHS = EmitScalarExpr(E->getArg(1));
22335 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
22336 return Builder.CreateCall(Callee, {LHS, RHS});
22337 }
22338 case WebAssembly::BI__builtin_wasm_any_true_v128:
22339 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22340 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22341 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22342 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
22343 unsigned IntNo;
22344 switch (BuiltinID) {
22345 case WebAssembly::BI__builtin_wasm_any_true_v128:
22346 IntNo = Intrinsic::wasm_anytrue;
22347 break;
22348 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
22349 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
22350 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
22351 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
22352 IntNo = Intrinsic::wasm_alltrue;
22353 break;
22354 default:
22355 llvm_unreachable("unexpected builtin ID");
22356 }
22357 Value *Vec = EmitScalarExpr(E->getArg(0));
22358 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
22359 return Builder.CreateCall(Callee, {Vec});
22360 }
22361 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
22362 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
22363 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
22364 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
22365 Value *Vec = EmitScalarExpr(E->getArg(0));
22366 Function *Callee =
22367 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
22368 return Builder.CreateCall(Callee, {Vec});
22369 }
22370 case WebAssembly::BI__builtin_wasm_abs_f16x8:
22371 case WebAssembly::BI__builtin_wasm_abs_f32x4:
22372 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
22373 Value *Vec = EmitScalarExpr(E->getArg(0));
22374 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
22375 return Builder.CreateCall(Callee, {Vec});
22376 }
22377 case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
22378 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
22379 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
22380 Value *Vec = EmitScalarExpr(E->getArg(0));
22381 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
22382 return Builder.CreateCall(Callee, {Vec});
22383 }
22384 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22385 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22386 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22387 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
22388 Value *Low = EmitScalarExpr(E->getArg(0));
22389 Value *High = EmitScalarExpr(E->getArg(1));
22390 unsigned IntNo;
22391 switch (BuiltinID) {
22392 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
22393 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
22394 IntNo = Intrinsic::wasm_narrow_signed;
22395 break;
22396 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
22397 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
22398 IntNo = Intrinsic::wasm_narrow_unsigned;
22399 break;
22400 default:
22401 llvm_unreachable("unexpected builtin ID");
22402 }
22403 Function *Callee =
22404 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
22405 return Builder.CreateCall(Callee, {Low, High});
22406 }
22407 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22408 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
22409 Value *Vec = EmitScalarExpr(E->getArg(0));
22410 unsigned IntNo;
22411 switch (BuiltinID) {
22412 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
22413 IntNo = Intrinsic::fptosi_sat;
22414 break;
22415 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
22416 IntNo = Intrinsic::fptoui_sat;
22417 break;
22418 default:
22419 llvm_unreachable("unexpected builtin ID");
22420 }
22421 llvm::Type *SrcT = Vec->getType();
22422 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
22423 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
22424 Value *Trunc = Builder.CreateCall(Callee, Vec);
22425 Value *Splat = Constant::getNullValue(TruncT);
22426 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
22427 }
22428 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
22429 Value *Ops[18];
22430 size_t OpIdx = 0;
22431 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
22432 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
22433 while (OpIdx < 18) {
22434 std::optional<llvm::APSInt> LaneConst =
22435 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
22436 assert(LaneConst && "Constant arg isn't actually constant?");
22437 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
22438 }
22439 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
22440 return Builder.CreateCall(Callee, Ops);
22441 }
22442 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22443 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22444 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22445 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22446 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22447 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
22448 Value *A = EmitScalarExpr(E->getArg(0));
22449 Value *B = EmitScalarExpr(E->getArg(1));
22450 Value *C = EmitScalarExpr(E->getArg(2));
22451 unsigned IntNo;
22452 switch (BuiltinID) {
22453 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
22454 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
22455 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
22456 IntNo = Intrinsic::wasm_relaxed_madd;
22457 break;
22458 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
22459 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
22460 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
22461 IntNo = Intrinsic::wasm_relaxed_nmadd;
22462 break;
22463 default:
22464 llvm_unreachable("unexpected builtin ID");
22465 }
22466 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
22467 return Builder.CreateCall(Callee, {A, B, C});
22468 }
22469 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
22470 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
22471 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
22472 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
22473 Value *A = EmitScalarExpr(E->getArg(0));
22474 Value *B = EmitScalarExpr(E->getArg(1));
22475 Value *C = EmitScalarExpr(E->getArg(2));
22476 Function *Callee =
22477 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
22478 return Builder.CreateCall(Callee, {A, B, C});
22479 }
22480 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
22481 Value *Src = EmitScalarExpr(E->getArg(0));
22482 Value *Indices = EmitScalarExpr(E->getArg(1));
22483 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
22484 return Builder.CreateCall(Callee, {Src, Indices});
22485 }
22486 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22487 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22488 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22489 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
22490 Value *LHS = EmitScalarExpr(E->getArg(0));
22491 Value *RHS = EmitScalarExpr(E->getArg(1));
22492 unsigned IntNo;
22493 switch (BuiltinID) {
22494 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
22495 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
22496 IntNo = Intrinsic::wasm_relaxed_min;
22497 break;
22498 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
22499 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
22500 IntNo = Intrinsic::wasm_relaxed_max;
22501 break;
22502 default:
22503 llvm_unreachable("unexpected builtin ID");
22504 }
22505 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
22506 return Builder.CreateCall(Callee, {LHS, RHS});
22507 }
22508 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22509 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22510 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22511 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
22512 Value *Vec = EmitScalarExpr(E->getArg(0));
22513 unsigned IntNo;
22514 switch (BuiltinID) {
22515 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
22516 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
22517 break;
22518 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
22519 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
22520 break;
22521 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
22522 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
22523 break;
22524 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
22525 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
22526 break;
22527 default:
22528 llvm_unreachable("unexpected builtin ID");
22529 }
22530 Function *Callee = CGM.getIntrinsic(IntNo);
22531 return Builder.CreateCall(Callee, {Vec});
22532 }
22533 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
22534 Value *LHS = EmitScalarExpr(E->getArg(0));
22535 Value *RHS = EmitScalarExpr(E->getArg(1));
22536 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
22537 return Builder.CreateCall(Callee, {LHS, RHS});
22538 }
22539 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
22540 Value *LHS = EmitScalarExpr(E->getArg(0));
22541 Value *RHS = EmitScalarExpr(E->getArg(1));
22542 Function *Callee =
22543 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
22544 return Builder.CreateCall(Callee, {LHS, RHS});
22545 }
22546 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
22547 Value *LHS = EmitScalarExpr(E->getArg(0));
22548 Value *RHS = EmitScalarExpr(E->getArg(1));
22549 Value *Acc = EmitScalarExpr(E->getArg(2));
22550 Function *Callee =
22551 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
22552 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22553 }
22554 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
22555 Value *LHS = EmitScalarExpr(E->getArg(0));
22556 Value *RHS = EmitScalarExpr(E->getArg(1));
22557 Value *Acc = EmitScalarExpr(E->getArg(2));
22558 Function *Callee =
22559 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
22560 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
22561 }
22562 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
22563 Value *Addr = EmitScalarExpr(E->getArg(0));
22564 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
22565 return Builder.CreateCall(Callee, {Addr});
22566 }
22567 case WebAssembly::BI__builtin_wasm_storef16_f32: {
22568 Value *Val = EmitScalarExpr(E->getArg(0));
22569 Value *Addr = EmitScalarExpr(E->getArg(1));
22570 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
22571 return Builder.CreateCall(Callee, {Val, Addr});
22572 }
22573 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
22574 Value *Val = EmitScalarExpr(E->getArg(0));
22575 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
22576 return Builder.CreateCall(Callee, {Val});
22577 }
22578 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
22579 Value *Vector = EmitScalarExpr(E->getArg(0));
22580 Value *Index = EmitScalarExpr(E->getArg(1));
22581 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
22582 return Builder.CreateCall(Callee, {Vector, Index});
22583 }
22584 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
22585 Value *Vector = EmitScalarExpr(E->getArg(0));
22586 Value *Index = EmitScalarExpr(E->getArg(1));
22587 Value *Val = EmitScalarExpr(E->getArg(2));
22588 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
22589 return Builder.CreateCall(Callee, {Vector, Index, Val});
22590 }
22591 case WebAssembly::BI__builtin_wasm_table_get: {
22592 assert(E->getArg(0)->getType()->isArrayType());
22593 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22594 Value *Index = EmitScalarExpr(E->getArg(1));
22597 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
22598 else if (E->getType().isWebAssemblyFuncrefType())
22599 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
22600 else
22601 llvm_unreachable(
22602 "Unexpected reference type for __builtin_wasm_table_get");
22603 return Builder.CreateCall(Callee, {Table, Index});
22604 }
22605 case WebAssembly::BI__builtin_wasm_table_set: {
22606 assert(E->getArg(0)->getType()->isArrayType());
22607 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22608 Value *Index = EmitScalarExpr(E->getArg(1));
22609 Value *Val = EmitScalarExpr(E->getArg(2));
22611 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22612 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
22613 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22614 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
22615 else
22616 llvm_unreachable(
22617 "Unexpected reference type for __builtin_wasm_table_set");
22618 return Builder.CreateCall(Callee, {Table, Index, Val});
22619 }
22620 case WebAssembly::BI__builtin_wasm_table_size: {
22621 assert(E->getArg(0)->getType()->isArrayType());
22622 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22623 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
22624 return Builder.CreateCall(Callee, Value);
22625 }
22626 case WebAssembly::BI__builtin_wasm_table_grow: {
22627 assert(E->getArg(0)->getType()->isArrayType());
22628 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22629 Value *Val = EmitScalarExpr(E->getArg(1));
22630 Value *NElems = EmitScalarExpr(E->getArg(2));
22631
22633 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
22634 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
22635 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22636 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22637 else
22638 llvm_unreachable(
22639 "Unexpected reference type for __builtin_wasm_table_grow");
22640
22641 return Builder.CreateCall(Callee, {Table, Val, NElems});
22642 }
22643 case WebAssembly::BI__builtin_wasm_table_fill: {
22644 assert(E->getArg(0)->getType()->isArrayType());
22645 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22646 Value *Index = EmitScalarExpr(E->getArg(1));
22647 Value *Val = EmitScalarExpr(E->getArg(2));
22648 Value *NElems = EmitScalarExpr(E->getArg(3));
22649
22651 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
22652 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
22653 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
22654 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
22655 else
22656 llvm_unreachable(
22657 "Unexpected reference type for __builtin_wasm_table_fill");
22658
22659 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
22660 }
22661 case WebAssembly::BI__builtin_wasm_table_copy: {
22662 assert(E->getArg(0)->getType()->isArrayType());
22663 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
22664 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
22665 Value *DstIdx = EmitScalarExpr(E->getArg(2));
22666 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
22667 Value *NElems = EmitScalarExpr(E->getArg(4));
22668
22669 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
22670
22671 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
22672 }
22673 default:
22674 return nullptr;
22675 }
22676}
22677
22678static std::pair<Intrinsic::ID, unsigned>
22680 struct Info {
22681 unsigned BuiltinID;
22682 Intrinsic::ID IntrinsicID;
22683 unsigned VecLen;
22684 };
22685 static Info Infos[] = {
22686#define CUSTOM_BUILTIN_MAPPING(x,s) \
22687 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
22688 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
22689 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
22690 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
22691 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
22692 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
22693 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
22694 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
22695 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
22696 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
22697 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
22698 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
22699 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
22700 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
22701 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
22702 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
22703 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
22704 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
22705 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
22706 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
22707 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
22708 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
22709 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
22710 // Legacy builtins that take a vector in place of a vector predicate.
22711 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
22712 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
22713 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
22714 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
22715 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
22716 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
22717 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
22718 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
22719#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
22720#undef CUSTOM_BUILTIN_MAPPING
22721 };
22722
22723 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
22724 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
22725 (void)SortOnce;
22726
22727 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
22728 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
22729 return {Intrinsic::not_intrinsic, 0};
22730
22731 return {F->IntrinsicID, F->VecLen};
22732}
22733
22735 const CallExpr *E) {
22736 Intrinsic::ID ID;
22737 unsigned VecLen;
22738 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
22739
22740 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
22741 // The base pointer is passed by address, so it needs to be loaded.
22742 Address A = EmitPointerWithAlignment(E->getArg(0));
22744 llvm::Value *Base = Builder.CreateLoad(BP);
22745 // The treatment of both loads and stores is the same: the arguments for
22746 // the builtin are the same as the arguments for the intrinsic.
22747 // Load:
22748 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
22749 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
22750 // Store:
22751 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
22752 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
22754 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
22755 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22756
22757 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
22758 // The load intrinsics generate two results (Value, NewBase), stores
22759 // generate one (NewBase). The new base address needs to be stored.
22760 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
22761 : Result;
22762 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
22763 Address Dest = EmitPointerWithAlignment(E->getArg(0));
22764 llvm::Value *RetVal =
22765 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
22766 if (IsLoad)
22767 RetVal = Builder.CreateExtractValue(Result, 0);
22768 return RetVal;
22769 };
22770
22771 // Handle the conversion of bit-reverse load intrinsics to bit code.
22772 // The intrinsic call after this function only reads from memory and the
22773 // write to memory is dealt by the store instruction.
22774 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
22775 // The intrinsic generates one result, which is the new value for the base
22776 // pointer. It needs to be returned. The result of the load instruction is
22777 // passed to intrinsic by address, so the value needs to be stored.
22778 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
22779
22780 // Expressions like &(*pt++) will be incremented per evaluation.
22781 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
22782 // per call.
22783 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
22784 DestAddr = DestAddr.withElementType(Int8Ty);
22785 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
22786
22787 // Operands are Base, Dest, Modifier.
22788 // The intrinsic format in LLVM IR is defined as
22789 // { ValueType, i8* } (i8*, i32).
22790 llvm::Value *Result = Builder.CreateCall(
22791 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
22792
22793 // The value needs to be stored as the variable is passed by reference.
22794 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
22795
22796 // The store needs to be truncated to fit the destination type.
22797 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
22798 // to be handled with stores of respective destination type.
22799 DestVal = Builder.CreateTrunc(DestVal, DestTy);
22800
22801 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
22802 // The updated value of the base pointer is returned.
22803 return Builder.CreateExtractValue(Result, 1);
22804 };
22805
22806 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
22807 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
22808 : Intrinsic::hexagon_V6_vandvrt;
22809 return Builder.CreateCall(CGM.getIntrinsic(ID),
22810 {Vec, Builder.getInt32(-1)});
22811 };
22812 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
22813 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
22814 : Intrinsic::hexagon_V6_vandqrt;
22815 return Builder.CreateCall(CGM.getIntrinsic(ID),
22816 {Pred, Builder.getInt32(-1)});
22817 };
22818
22819 switch (BuiltinID) {
22820 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
22821 // and the corresponding C/C++ builtins use loads/stores to update
22822 // the predicate.
22823 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
22824 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
22825 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
22826 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
22827 // Get the type from the 0-th argument.
22828 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22829 Address PredAddr =
22830 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22831 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
22832 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22833 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
22834
22835 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22836 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22837 PredAddr.getAlignment());
22838 return Builder.CreateExtractValue(Result, 0);
22839 }
22840 // These are identical to the builtins above, except they don't consume
22841 // input carry, only generate carry-out. Since they still produce two
22842 // outputs, generate the store of the predicate, but no load.
22843 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
22844 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
22845 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
22846 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
22847 // Get the type from the 0-th argument.
22848 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
22849 Address PredAddr =
22850 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
22851 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
22852 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
22853
22854 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
22855 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
22856 PredAddr.getAlignment());
22857 return Builder.CreateExtractValue(Result, 0);
22858 }
22859
22860 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
22861 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
22862 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
22863 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
22864 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
22865 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
22866 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
22867 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
22869 const Expr *PredOp = E->getArg(0);
22870 // There will be an implicit cast to a boolean vector. Strip it.
22871 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
22872 if (Cast->getCastKind() == CK_BitCast)
22873 PredOp = Cast->getSubExpr();
22874 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
22875 }
22876 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
22877 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22878 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
22879 }
22880
22881 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
22882 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
22883 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
22884 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
22885 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
22886 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
22887 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
22888 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
22889 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
22890 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
22891 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
22892 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
22893 return MakeCircOp(ID, /*IsLoad=*/true);
22894 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
22895 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
22896 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
22897 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
22898 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
22899 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
22900 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
22901 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
22902 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
22903 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
22904 return MakeCircOp(ID, /*IsLoad=*/false);
22905 case Hexagon::BI__builtin_brev_ldub:
22906 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
22907 case Hexagon::BI__builtin_brev_ldb:
22908 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
22909 case Hexagon::BI__builtin_brev_lduh:
22910 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
22911 case Hexagon::BI__builtin_brev_ldh:
22912 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
22913 case Hexagon::BI__builtin_brev_ldw:
22914 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
22915 case Hexagon::BI__builtin_brev_ldd:
22916 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
22917 } // switch
22918
22919 return nullptr;
22920}
22921
22923 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
22924 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
22925 return EmitRISCVCpuIs(CPUStr);
22926}
22927
22928Value *CodeGenFunction::EmitRISCVCpuIs(StringRef CPUStr) {
22929 llvm::Type *Int32Ty = Builder.getInt32Ty();
22930 llvm::Type *Int64Ty = Builder.getInt64Ty();
22931 llvm::StructType *StructTy = llvm::StructType::get(Int32Ty, Int64Ty, Int64Ty);
22932 llvm::Constant *RISCVCPUModel =
22933 CGM.CreateRuntimeVariable(StructTy, "__riscv_cpu_model");
22934 cast<llvm::GlobalValue>(RISCVCPUModel)->setDSOLocal(true);
22935
22936 auto loadRISCVCPUID = [&](unsigned Index) {
22937 Value *Ptr = Builder.CreateStructGEP(StructTy, RISCVCPUModel, Index);
22938 Value *CPUID = Builder.CreateAlignedLoad(StructTy->getTypeAtIndex(Index),
22939 Ptr, llvm::MaybeAlign());
22940 return CPUID;
22941 };
22942
22943 const llvm::RISCV::CPUModel Model = llvm::RISCV::getCPUModel(CPUStr);
22944
22945 // Compare mvendorid.
22946 Value *VendorID = loadRISCVCPUID(0);
22947 Value *Result =
22948 Builder.CreateICmpEQ(VendorID, Builder.getInt32(Model.MVendorID));
22949
22950 // Compare marchid.
22951 Value *ArchID = loadRISCVCPUID(1);
22952 Result = Builder.CreateAnd(
22953 Result, Builder.CreateICmpEQ(ArchID, Builder.getInt64(Model.MArchID)));
22954
22955 // Compare mimpid.
22956 Value *ImpID = loadRISCVCPUID(2);
22957 Result = Builder.CreateAnd(
22958 Result, Builder.CreateICmpEQ(ImpID, Builder.getInt64(Model.MImpID)));
22959
22960 return Result;
22961}
22962
22964 const CallExpr *E,
22965 ReturnValueSlot ReturnValue) {
22966
22967 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
22968 return EmitRISCVCpuSupports(E);
22969 if (BuiltinID == Builtin::BI__builtin_cpu_init)
22970 return EmitRISCVCpuInit();
22971 if (BuiltinID == Builtin::BI__builtin_cpu_is)
22972 return EmitRISCVCpuIs(E);
22973
22975 llvm::Type *ResultType = ConvertType(E->getType());
22976
22977 // Find out if any arguments are required to be integer constant expressions.
22978 unsigned ICEArguments = 0;
22980 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
22981 if (Error == ASTContext::GE_Missing_type) {
22982 // Vector intrinsics don't have a type string.
22983 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
22984 BuiltinID <= clang::RISCV::LastRVVBuiltin);
22985 ICEArguments = 0;
22986 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
22987 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
22988 ICEArguments = 1 << 1;
22989 } else {
22990 assert(Error == ASTContext::GE_None && "Unexpected error");
22991 }
22992
22993 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
22994 ICEArguments |= (1 << 1);
22995 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
22996 ICEArguments |= (1 << 2);
22997
22998 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
22999 // Handle aggregate argument, namely RVV tuple types in segment load/store
23000 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
23001 LValue L = EmitAggExprToLValue(E->getArg(i));
23002 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
23003 Ops.push_back(AggValue);
23004 continue;
23005 }
23006 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
23007 }
23008
23009 Intrinsic::ID ID = Intrinsic::not_intrinsic;
23010 // The 0th bit simulates the `vta` of RVV
23011 // The 1st bit simulates the `vma` of RVV
23012 constexpr unsigned RVV_VTA = 0x1;
23013 constexpr unsigned RVV_VMA = 0x2;
23014 int PolicyAttrs = 0;
23015 bool IsMasked = false;
23016 // This is used by segment load/store to determine it's llvm type.
23017 unsigned SegInstSEW = 8;
23018
23019 // Required for overloaded intrinsics.
23021 switch (BuiltinID) {
23022 default: llvm_unreachable("unexpected builtin ID");
23023 case RISCV::BI__builtin_riscv_orc_b_32:
23024 case RISCV::BI__builtin_riscv_orc_b_64:
23025 case RISCV::BI__builtin_riscv_clmul_32:
23026 case RISCV::BI__builtin_riscv_clmul_64:
23027 case RISCV::BI__builtin_riscv_clmulh_32:
23028 case RISCV::BI__builtin_riscv_clmulh_64:
23029 case RISCV::BI__builtin_riscv_clmulr_32:
23030 case RISCV::BI__builtin_riscv_clmulr_64:
23031 case RISCV::BI__builtin_riscv_xperm4_32:
23032 case RISCV::BI__builtin_riscv_xperm4_64:
23033 case RISCV::BI__builtin_riscv_xperm8_32:
23034 case RISCV::BI__builtin_riscv_xperm8_64:
23035 case RISCV::BI__builtin_riscv_brev8_32:
23036 case RISCV::BI__builtin_riscv_brev8_64:
23037 case RISCV::BI__builtin_riscv_zip_32:
23038 case RISCV::BI__builtin_riscv_unzip_32: {
23039 switch (BuiltinID) {
23040 default: llvm_unreachable("unexpected builtin ID");
23041 // Zbb
23042 case RISCV::BI__builtin_riscv_orc_b_32:
23043 case RISCV::BI__builtin_riscv_orc_b_64:
23044 ID = Intrinsic::riscv_orc_b;
23045 break;
23046
23047 // Zbc
23048 case RISCV::BI__builtin_riscv_clmul_32:
23049 case RISCV::BI__builtin_riscv_clmul_64:
23050 ID = Intrinsic::riscv_clmul;
23051 break;
23052 case RISCV::BI__builtin_riscv_clmulh_32:
23053 case RISCV::BI__builtin_riscv_clmulh_64:
23054 ID = Intrinsic::riscv_clmulh;
23055 break;
23056 case RISCV::BI__builtin_riscv_clmulr_32:
23057 case RISCV::BI__builtin_riscv_clmulr_64:
23058 ID = Intrinsic::riscv_clmulr;
23059 break;
23060
23061 // Zbkx
23062 case RISCV::BI__builtin_riscv_xperm8_32:
23063 case RISCV::BI__builtin_riscv_xperm8_64:
23064 ID = Intrinsic::riscv_xperm8;
23065 break;
23066 case RISCV::BI__builtin_riscv_xperm4_32:
23067 case RISCV::BI__builtin_riscv_xperm4_64:
23068 ID = Intrinsic::riscv_xperm4;
23069 break;
23070
23071 // Zbkb
23072 case RISCV::BI__builtin_riscv_brev8_32:
23073 case RISCV::BI__builtin_riscv_brev8_64:
23074 ID = Intrinsic::riscv_brev8;
23075 break;
23076 case RISCV::BI__builtin_riscv_zip_32:
23077 ID = Intrinsic::riscv_zip;
23078 break;
23079 case RISCV::BI__builtin_riscv_unzip_32:
23080 ID = Intrinsic::riscv_unzip;
23081 break;
23082 }
23083
23084 IntrinsicTypes = {ResultType};
23085 break;
23086 }
23087
23088 // Zk builtins
23089
23090 // Zknh
23091 case RISCV::BI__builtin_riscv_sha256sig0:
23092 ID = Intrinsic::riscv_sha256sig0;
23093 break;
23094 case RISCV::BI__builtin_riscv_sha256sig1:
23095 ID = Intrinsic::riscv_sha256sig1;
23096 break;
23097 case RISCV::BI__builtin_riscv_sha256sum0:
23098 ID = Intrinsic::riscv_sha256sum0;
23099 break;
23100 case RISCV::BI__builtin_riscv_sha256sum1:
23101 ID = Intrinsic::riscv_sha256sum1;
23102 break;
23103
23104 // Zksed
23105 case RISCV::BI__builtin_riscv_sm4ks:
23106 ID = Intrinsic::riscv_sm4ks;
23107 break;
23108 case RISCV::BI__builtin_riscv_sm4ed:
23109 ID = Intrinsic::riscv_sm4ed;
23110 break;
23111
23112 // Zksh
23113 case RISCV::BI__builtin_riscv_sm3p0:
23114 ID = Intrinsic::riscv_sm3p0;
23115 break;
23116 case RISCV::BI__builtin_riscv_sm3p1:
23117 ID = Intrinsic::riscv_sm3p1;
23118 break;
23119
23120 case RISCV::BI__builtin_riscv_clz_32:
23121 case RISCV::BI__builtin_riscv_clz_64: {
23122 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
23123 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23124 if (Result->getType() != ResultType)
23125 Result =
23126 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23127 return Result;
23128 }
23129 case RISCV::BI__builtin_riscv_ctz_32:
23130 case RISCV::BI__builtin_riscv_ctz_64: {
23131 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
23132 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
23133 if (Result->getType() != ResultType)
23134 Result =
23135 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
23136 return Result;
23137 }
23138
23139 // Zihintntl
23140 case RISCV::BI__builtin_riscv_ntl_load: {
23141 llvm::Type *ResTy = ConvertType(E->getType());
23142 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23143 if (Ops.size() == 2)
23144 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
23145
23146 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23148 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23149 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23150 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23151
23152 int Width;
23153 if(ResTy->isScalableTy()) {
23154 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
23155 llvm::Type *ScalarTy = ResTy->getScalarType();
23156 Width = ScalarTy->getPrimitiveSizeInBits() *
23157 SVTy->getElementCount().getKnownMinValue();
23158 } else
23159 Width = ResTy->getPrimitiveSizeInBits();
23160 LoadInst *Load = Builder.CreateLoad(
23161 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
23162
23163 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23164 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23165 RISCVDomainNode);
23166
23167 return Load;
23168 }
23169 case RISCV::BI__builtin_riscv_ntl_store: {
23170 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
23171 if (Ops.size() == 3)
23172 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
23173
23174 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
23176 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
23177 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
23178 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
23179
23180 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
23181 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
23182 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
23183 RISCVDomainNode);
23184
23185 return Store;
23186 }
23187 // XCValu
23188 case RISCV::BI__builtin_riscv_cv_alu_addN:
23189 ID = Intrinsic::riscv_cv_alu_addN;
23190 break;
23191 case RISCV::BI__builtin_riscv_cv_alu_addRN:
23192 ID = Intrinsic::riscv_cv_alu_addRN;
23193 break;
23194 case RISCV::BI__builtin_riscv_cv_alu_adduN:
23195 ID = Intrinsic::riscv_cv_alu_adduN;
23196 break;
23197 case RISCV::BI__builtin_riscv_cv_alu_adduRN:
23198 ID = Intrinsic::riscv_cv_alu_adduRN;
23199 break;
23200 case RISCV::BI__builtin_riscv_cv_alu_clip:
23201 ID = Intrinsic::riscv_cv_alu_clip;
23202 break;
23203 case RISCV::BI__builtin_riscv_cv_alu_clipu:
23204 ID = Intrinsic::riscv_cv_alu_clipu;
23205 break;
23206 case RISCV::BI__builtin_riscv_cv_alu_extbs:
23207 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23208 "extbs");
23209 case RISCV::BI__builtin_riscv_cv_alu_extbz:
23210 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
23211 "extbz");
23212 case RISCV::BI__builtin_riscv_cv_alu_exths:
23213 return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23214 "exths");
23215 case RISCV::BI__builtin_riscv_cv_alu_exthz:
23216 return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
23217 "exthz");
23218 case RISCV::BI__builtin_riscv_cv_alu_slet:
23219 return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
23220 "sle");
23221 case RISCV::BI__builtin_riscv_cv_alu_sletu:
23222 return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
23223 "sleu");
23224 case RISCV::BI__builtin_riscv_cv_alu_subN:
23225 ID = Intrinsic::riscv_cv_alu_subN;
23226 break;
23227 case RISCV::BI__builtin_riscv_cv_alu_subRN:
23228 ID = Intrinsic::riscv_cv_alu_subRN;
23229 break;
23230 case RISCV::BI__builtin_riscv_cv_alu_subuN:
23231 ID = Intrinsic::riscv_cv_alu_subuN;
23232 break;
23233 case RISCV::BI__builtin_riscv_cv_alu_subuRN:
23234 ID = Intrinsic::riscv_cv_alu_subuRN;
23235 break;
23236
23237 // Vector builtins are handled from here.
23238#include "clang/Basic/riscv_vector_builtin_cg.inc"
23239
23240 // SiFive Vector builtins are handled from here.
23241#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
23242 }
23243
23244 assert(ID != Intrinsic::not_intrinsic);
23245
23246 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
23247 return Builder.CreateCall(F, Ops, "");
23248}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3443
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8929
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9765
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:376
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1393
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6955
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:568
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2286
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2252
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6824
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2743
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9735
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:1008
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9728
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7970
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9955
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7982
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7952
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8997
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2621
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:621
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:946
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:671
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT)
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6951
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7983
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1590
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7987
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
static Value * loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder, CodeGenModule &CGM)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:1068
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:802
static Value * emitQuaternaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:778
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2649
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:645
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:976
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9724
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7984
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9802
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6709
constexpr unsigned SVEBitsPerBlock
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1986
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7794
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6948
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:142
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:688
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:428
static bool HasNoIndirectArgumentsOrResults(CGFunctionInfo const &FnInfo)
Checks no arguments or results are passed indirectly in the ABI (i.e.
Definition: CGBuiltin.cpp:882
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
Value * readX18AsPtr(CodeGenFunction &CGF)
Helper for the read/write/add/inc X18 builtins: read the X18 register and return it as an i8 pointer.
Definition: CGBuiltin.cpp:249
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9829
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1827
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1676
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1454
static Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:765
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6960
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:848
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:839
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2479
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:1016
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1529
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9791
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:725
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:964
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2533
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2672
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6783
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:419
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9817
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:347
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8908
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:78
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8900
@ VolatileRead
Definition: CGBuiltin.cpp:8902
@ NormalRead
Definition: CGBuiltin.cpp:8901
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:513
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:358
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2521
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:473
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:705
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:336
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9757
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7979
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:7279
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:400
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:931
@ UnsignedAlts
Definition: CGBuiltin.cpp:6918
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6923
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6927
@ Use64BitVectors
Definition: CGBuiltin.cpp:6920
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6915
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6925
@ InventFloatType
Definition: CGBuiltin.cpp:6917
@ AddRetType
Definition: CGBuiltin.cpp:6910
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6912
@ VectorizeRetType
Definition: CGBuiltin.cpp:6914
@ VectorRet
Definition: CGBuiltin.cpp:6924
@ Add1ArgType
Definition: CGBuiltin.cpp:6911
@ Use128BitVectors
Definition: CGBuiltin.cpp:6921
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:8045
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:790
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:820
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:657
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2754
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1403
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2487
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:744
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:893
static Value * handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:212
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:411
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1439
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8827
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2246
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:632
static Value * handleHlslClip(const CallExpr *E, CodeGenFunction *CGF)
Definition: CGBuiltin.cpp:101
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7981
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7554
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:147
unsigned Iter
Definition: HTMLLogger.cpp:153
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:144
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
HLSLResourceBindingAttr::RegisterType RegisterType
Definition: SemaHLSL.cpp:43
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
C Language Family Type Representation.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:465
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:188
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1187
IdentifierTable & Idents
Definition: ASTContext.h:680
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:682
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1169
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2196
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2763
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2482
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1160
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:799
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2384
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2387
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3577
QualType getElementType() const
Definition: Type.h:3589
static std::unique_ptr< AtomicScopeModel > create(AtomicScopeModelKind K)
Create an atomic scope model by AtomicScopeModelKind.
Definition: SyncScope.h:273
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:147
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:245
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:249
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:120
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2874
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
ABIArgInfo - Helper class to encapsulate information about how a specific C type should be passed to ...
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:856
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:896
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:913
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:136
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:143
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:292
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:389
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:381
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:180
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:405
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:151
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:398
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:219
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:169
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:108
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:315
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:437
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:365
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:128
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:189
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:261
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:346
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
MutableArrayRef< ArgInfo > arguments()
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:305
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID, bool NoMerge=false)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, QualType Ty)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
llvm::Value * EmitCheckedArgForAssume(const Expr *E)
Emits an argument for a call to a __builtin_assume.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
RValue EmitAnyExpr(const Expr *E, AggValueSlot aggSlot=AggValueSlot::ignored(), bool ignoreResult=false)
EmitAnyExpr - Emit code to compute the specified expression which can have any type.
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
llvm::Value * EmitRISCVCpuIs(const CallExpr *E)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **CallOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Value * GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitSVEPredicateTupleCast(llvm::Value *PredTuple, llvm::StructType *Ty)
llvm::Type * ConvertType(QualType T)
void EmitWritebacks(const CallArgList &Args)
EmitWriteback - Emit callbacks for function.
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, ArrayRef< llvm::Value * > Ops)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2909
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:262
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys={})
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1630
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:667
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:382
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
llvm::Value * getAggregatePointer(QualType PointeeType, CodeGenFunction &CGF) const
Definition: CGValue.h:88
bool isScalar() const
Definition: CGValue.h:64
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:386
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:170
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:160
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:142
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:179
Complex values, per C99 6.2.5p11.
Definition: Type.h:3145
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4232
Represents a sugar type with __counted_by or __sized_by annotations, including their _or_null variant...
Definition: Type.h:3306
DynamicCountPointerKind getKind() const
Definition: Type.h:3336
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2016
T * getAttr() const
Definition: DeclBase.h:576
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:596
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:246
DeclContext * getDeclContext()
Definition: DeclBase.h:451
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:432
bool hasAttr() const
Definition: DeclBase.h:580
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:231
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1493
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3095
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3090
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3086
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3587
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3070
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3963
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:978
Represents a member of a struct/union/class.
Definition: Decl.h:3033
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4705
Represents a function declaration or definition.
Definition: Decl.h:1935
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2672
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3638
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5102
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5402
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3236
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:280
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:296
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7780
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3198
QualType getPointeeType() const
Definition: Type.h:3208
A (possibly-)qualified type.
Definition: Type.h:929
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:8015
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2893
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:8057
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2889
The collection of all-type qualifiers we support.
Definition: Type.h:324
Represents a struct/union/class.
Definition: Decl.h:4148
field_range fields() const
Definition: Decl.h:4354
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool setsFPMR() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:345
bool isUnion() const
Definition: Decl.h:3770
bool areArgsDestroyedLeftToRightInCallee() const
Are arguments to a call destroyed left to right in the callee? This is a fundamental language change,...
Definition: TargetCXXABI.h:188
Exposes information about the current target.
Definition: TargetInfo.h:220
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:311
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:697
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1262
bool isLittleEndian() const
Definition: TargetInfo.h:1673
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:861
bool isBigEndian() const
Definition: TargetInfo.h:1672
TargetCXXABI getCXXABI() const
Get the C++ ABI currently in use.
Definition: TargetInfo.h:1333
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1679
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:733
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1828
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1916
bool isBlockPointerType() const
Definition: Type.h:8200
bool isVoidType() const
Definition: Type.h:8510
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2180
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:710
bool hasIntegerRepresentation() const
Determine whether this type has an integer representation of some sort, e.g., it is an integer type o...
Definition: Type.cpp:2055
bool isArrayType() const
Definition: Type.h:8258
bool isCountAttributedType() const
Definition: Type.cpp:727
bool isPointerType() const
Definition: Type.h:8186
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8550
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8800
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:738
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8625
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2270
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2220
bool isObjCObjectPointerType() const
Definition: Type.h:8328
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2292
bool isVectorType() const
Definition: Type.h:8298
bool isFloatingType() const
Definition: Type.cpp:2283
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2230
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8731
bool isRecordType() const
Definition: Type.h:8286
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2513
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2581
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1920
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:671
QualType getType() const
Definition: Decl.h:682
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4034
unsigned getNumElements() const
Definition: Type.h:4049
QualType getElementType() const
Definition: Type.h:4048
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: FixedPoint.h:19
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1212
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2408
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:447
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:726
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1748
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2181
bool Ret(InterpState &S, CodePtr &PC)
Definition: Interp.h:318
The JSON file list parser is used to communicate input to InstallAPI.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
SyncScope
Defines synch scope values used internally by clang.
Definition: SyncScope.h:42
llvm::StringRef getAsString(SyncScope S)
Definition: SyncScope.h:60
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
int int32_t
unsigned long uint64_t
long int64_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742