clang 20.0.0git
CGBuiltin.cpp
Go to the documentation of this file.
1//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit Builtin calls as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "ABIInfo.h"
14#include "CGCUDARuntime.h"
15#include "CGCXXABI.h"
16#include "CGHLSLRuntime.h"
17#include "CGObjCRuntime.h"
18#include "CGOpenCLRuntime.h"
19#include "CGRecordLayout.h"
20#include "CodeGenFunction.h"
21#include "CodeGenModule.h"
22#include "ConstantEmitter.h"
23#include "PatternInit.h"
24#include "TargetInfo.h"
26#include "clang/AST/Attr.h"
27#include "clang/AST/Decl.h"
28#include "clang/AST/OSLog.h"
35#include "llvm/ADT/APFloat.h"
36#include "llvm/ADT/APInt.h"
37#include "llvm/ADT/FloatingPointMode.h"
38#include "llvm/ADT/SmallPtrSet.h"
39#include "llvm/ADT/StringExtras.h"
40#include "llvm/Analysis/ValueTracking.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/Intrinsics.h"
44#include "llvm/IR/IntrinsicsAArch64.h"
45#include "llvm/IR/IntrinsicsAMDGPU.h"
46#include "llvm/IR/IntrinsicsARM.h"
47#include "llvm/IR/IntrinsicsBPF.h"
48#include "llvm/IR/IntrinsicsDirectX.h"
49#include "llvm/IR/IntrinsicsHexagon.h"
50#include "llvm/IR/IntrinsicsNVPTX.h"
51#include "llvm/IR/IntrinsicsPowerPC.h"
52#include "llvm/IR/IntrinsicsR600.h"
53#include "llvm/IR/IntrinsicsRISCV.h"
54#include "llvm/IR/IntrinsicsS390.h"
55#include "llvm/IR/IntrinsicsVE.h"
56#include "llvm/IR/IntrinsicsWebAssembly.h"
57#include "llvm/IR/IntrinsicsX86.h"
58#include "llvm/IR/MDBuilder.h"
59#include "llvm/IR/MatrixBuilder.h"
60#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
61#include "llvm/Support/AMDGPUAddrSpace.h"
62#include "llvm/Support/ConvertUTF.h"
63#include "llvm/Support/MathExtras.h"
64#include "llvm/Support/ScopedPrinter.h"
65#include "llvm/TargetParser/AArch64TargetParser.h"
66#include "llvm/TargetParser/RISCVISAInfo.h"
67#include "llvm/TargetParser/X86TargetParser.h"
68#include <optional>
69#include <sstream>
70
71using namespace clang;
72using namespace CodeGen;
73using namespace llvm;
74
75static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
76 Align AlignmentInBytes) {
77 ConstantInt *Byte;
78 switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
79 case LangOptions::TrivialAutoVarInitKind::Uninitialized:
80 // Nothing to initialize.
81 return;
82 case LangOptions::TrivialAutoVarInitKind::Zero:
83 Byte = CGF.Builder.getInt8(0x00);
84 break;
85 case LangOptions::TrivialAutoVarInitKind::Pattern: {
86 llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
87 Byte = llvm::dyn_cast<llvm::ConstantInt>(
88 initializationPatternFor(CGF.CGM, Int8));
89 break;
90 }
91 }
92 if (CGF.CGM.stopAutoInit())
93 return;
94 auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
95 I->addAnnotationMetadata("auto-init");
96}
97
98/// getBuiltinLibFunction - Given a builtin id for a function like
99/// "__builtin_fabsf", return a Function* for "fabsf".
101 unsigned BuiltinID) {
102 assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
103
104 // Get the name, skip over the __builtin_ prefix (if necessary).
105 StringRef Name;
106 GlobalDecl D(FD);
107
108 // TODO: This list should be expanded or refactored after all GCC-compatible
109 // std libcall builtins are implemented.
110 static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{
111 {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},
112 {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},
113 {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},
114 {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},
115 {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},
116 {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},
117 {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},
118 {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},
119 {Builtin::BI__builtin_fprintf, "__fprintfieee128"},
120 {Builtin::BI__builtin_printf, "__printfieee128"},
121 {Builtin::BI__builtin_snprintf, "__snprintfieee128"},
122 {Builtin::BI__builtin_sprintf, "__sprintfieee128"},
123 {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},
124 {Builtin::BI__builtin_vprintf, "__vprintfieee128"},
125 {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},
126 {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},
127 {Builtin::BI__builtin_fscanf, "__fscanfieee128"},
128 {Builtin::BI__builtin_scanf, "__scanfieee128"},
129 {Builtin::BI__builtin_sscanf, "__sscanfieee128"},
130 {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},
131 {Builtin::BI__builtin_vscanf, "__vscanfieee128"},
132 {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},
133 {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},
134 };
135
136 // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit
137 // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions
138 // if it is 64-bit 'long double' mode.
139 static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{
140 {Builtin::BI__builtin_frexpl, "frexp"},
141 {Builtin::BI__builtin_ldexpl, "ldexp"},
142 {Builtin::BI__builtin_modfl, "modf"},
143 };
144
145 // If the builtin has been declared explicitly with an assembler label,
146 // use the mangled name. This differs from the plain label on platforms
147 // that prefix labels.
148 if (FD->hasAttr<AsmLabelAttr>())
149 Name = getMangledName(D);
150 else {
151 // TODO: This mutation should also be applied to other targets other than
152 // PPC, after backend supports IEEE 128-bit style libcalls.
153 if (getTriple().isPPC64() &&
154 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&
155 F128Builtins.contains(BuiltinID))
156 Name = F128Builtins[BuiltinID];
157 else if (getTriple().isOSAIX() &&
158 &getTarget().getLongDoubleFormat() ==
159 &llvm::APFloat::IEEEdouble() &&
160 AIXLongDouble64Builtins.contains(BuiltinID))
161 Name = AIXLongDouble64Builtins[BuiltinID];
162 else
163 Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);
164 }
165
166 llvm::FunctionType *Ty =
167 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
168
169 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
170}
171
172/// Emit the conversions required to turn the given value into an
173/// integer of the given size.
174static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
175 QualType T, llvm::IntegerType *IntType) {
176 V = CGF.EmitToMemory(V, T);
177
178 if (V->getType()->isPointerTy())
179 return CGF.Builder.CreatePtrToInt(V, IntType);
180
181 assert(V->getType() == IntType);
182 return V;
183}
184
185static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
186 QualType T, llvm::Type *ResultType) {
187 V = CGF.EmitFromMemory(V, T);
188
189 if (ResultType->isPointerTy())
190 return CGF.Builder.CreateIntToPtr(V, ResultType);
191
192 assert(V->getType() == ResultType);
193 return V;
194}
195
197 ASTContext &Ctx = CGF.getContext();
198 Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
199 unsigned Bytes = Ptr.getElementType()->isPointerTy()
201 : Ptr.getElementType()->getScalarSizeInBits() / 8;
202 unsigned Align = Ptr.getAlignment().getQuantity();
203 if (Align % Bytes != 0) {
204 DiagnosticsEngine &Diags = CGF.CGM.getDiags();
205 Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
206 // Force address to be at least naturally-aligned.
207 return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
208 }
209 return Ptr;
210}
211
212/// Utility to insert an atomic instruction based on Intrinsic::ID
213/// and the expression node.
215 CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
216 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
217
218 QualType T = E->getType();
219 assert(E->getArg(0)->getType()->isPointerType());
221 E->getArg(0)->getType()->getPointeeType()));
222 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
223
224 Address DestAddr = CheckAtomicAlignment(CGF, E);
225
226 llvm::IntegerType *IntType = llvm::IntegerType::get(
227 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
228
229 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
230 llvm::Type *ValueType = Val->getType();
231 Val = EmitToInt(CGF, Val, T, IntType);
232
233 llvm::Value *Result =
234 CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
235 return EmitFromInt(CGF, Result, T, ValueType);
236}
237
239 Value *Val = CGF.EmitScalarExpr(E->getArg(0));
240 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
241
242 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
243 LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
244 LV.setNontemporal(true);
245 CGF.EmitStoreOfScalar(Val, LV, false);
246 return nullptr;
247}
248
250 Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
251
252 LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
253 LV.setNontemporal(true);
254 return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
255}
256
258 llvm::AtomicRMWInst::BinOp Kind,
259 const CallExpr *E) {
260 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
261}
262
263/// Utility to insert an atomic instruction based Intrinsic::ID and
264/// the expression node, where the return value is the result of the
265/// operation.
267 llvm::AtomicRMWInst::BinOp Kind,
268 const CallExpr *E,
269 Instruction::BinaryOps Op,
270 bool Invert = false) {
271 QualType T = E->getType();
272 assert(E->getArg(0)->getType()->isPointerType());
274 E->getArg(0)->getType()->getPointeeType()));
275 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
276
277 Address DestAddr = CheckAtomicAlignment(CGF, E);
278
279 llvm::IntegerType *IntType = llvm::IntegerType::get(
280 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
281
282 llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
283 llvm::Type *ValueType = Val->getType();
284 Val = EmitToInt(CGF, Val, T, IntType);
285
286 llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
287 Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
288 Result = CGF.Builder.CreateBinOp(Op, Result, Val);
289 if (Invert)
290 Result =
291 CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
292 llvm::ConstantInt::getAllOnesValue(IntType));
293 Result = EmitFromInt(CGF, Result, T, ValueType);
294 return RValue::get(Result);
295}
296
297/// Utility to insert an atomic cmpxchg instruction.
298///
299/// @param CGF The current codegen function.
300/// @param E Builtin call expression to convert to cmpxchg.
301/// arg0 - address to operate on
302/// arg1 - value to compare with
303/// arg2 - new value
304/// @param ReturnBool Specifies whether to return success flag of
305/// cmpxchg result or the old value.
306///
307/// @returns result of cmpxchg, according to ReturnBool
308///
309/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
310/// invoke the function EmitAtomicCmpXchgForMSIntrin.
312 bool ReturnBool) {
313 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
314 Address DestAddr = CheckAtomicAlignment(CGF, E);
315
316 llvm::IntegerType *IntType = llvm::IntegerType::get(
317 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
318
319 Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
320 llvm::Type *ValueType = Cmp->getType();
321 Cmp = EmitToInt(CGF, Cmp, T, IntType);
322 Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
323
325 DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
326 llvm::AtomicOrdering::SequentiallyConsistent);
327 if (ReturnBool)
328 // Extract boolean success flag and zext it to int.
329 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
330 CGF.ConvertType(E->getType()));
331 else
332 // Extract old value and emit it using the same type as compare value.
333 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
334 ValueType);
335}
336
337/// This function should be invoked to emit atomic cmpxchg for Microsoft's
338/// _InterlockedCompareExchange* intrinsics which have the following signature:
339/// T _InterlockedCompareExchange(T volatile *Destination,
340/// T Exchange,
341/// T Comparand);
342///
343/// Whereas the llvm 'cmpxchg' instruction has the following syntax:
344/// cmpxchg *Destination, Comparand, Exchange.
345/// So we need to swap Comparand and Exchange when invoking
346/// CreateAtomicCmpXchg. That is the reason we could not use the above utility
347/// function MakeAtomicCmpXchgValue since it expects the arguments to be
348/// already swapped.
349
350static
352 AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
353 assert(E->getArg(0)->getType()->isPointerType());
355 E->getType(), E->getArg(0)->getType()->getPointeeType()));
357 E->getArg(1)->getType()));
359 E->getArg(2)->getType()));
360
361 Address DestAddr = CheckAtomicAlignment(CGF, E);
362
363 auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
364 auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
365
366 // For Release ordering, the failure ordering should be Monotonic.
367 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
368 AtomicOrdering::Monotonic :
369 SuccessOrdering;
370
371 // The atomic instruction is marked volatile for consistency with MSVC. This
372 // blocks the few atomics optimizations that LLVM has. If we want to optimize
373 // _Interlocked* operations in the future, we will have to remove the volatile
374 // marker.
376 DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
377 Result->setVolatile(true);
378 return CGF.Builder.CreateExtractValue(Result, 0);
379}
380
381// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
382// prototyped like this:
383//
384// unsigned char _InterlockedCompareExchange128...(
385// __int64 volatile * _Destination,
386// __int64 _ExchangeHigh,
387// __int64 _ExchangeLow,
388// __int64 * _ComparandResult);
389//
390// Note that Destination is assumed to be at least 16-byte aligned, despite
391// being typed int64.
392
394 const CallExpr *E,
395 AtomicOrdering SuccessOrdering) {
396 assert(E->getNumArgs() == 4);
397 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
398 llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
399 llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
400 Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
401
402 assert(DestPtr->getType()->isPointerTy());
403 assert(!ExchangeHigh->getType()->isPointerTy());
404 assert(!ExchangeLow->getType()->isPointerTy());
405
406 // For Release ordering, the failure ordering should be Monotonic.
407 auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
408 ? AtomicOrdering::Monotonic
409 : SuccessOrdering;
410
411 // Convert to i128 pointers and values. Alignment is also overridden for
412 // destination pointer.
413 llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
414 Address DestAddr(DestPtr, Int128Ty,
416 ComparandAddr = ComparandAddr.withElementType(Int128Ty);
417
418 // (((i128)hi) << 64) | ((i128)lo)
419 ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
420 ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
421 ExchangeHigh =
422 CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
423 llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
424
425 // Load the comparand for the instruction.
426 llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
427
428 auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
429 SuccessOrdering, FailureOrdering);
430
431 // The atomic instruction is marked volatile for consistency with MSVC. This
432 // blocks the few atomics optimizations that LLVM has. If we want to optimize
433 // _Interlocked* operations in the future, we will have to remove the volatile
434 // marker.
435 CXI->setVolatile(true);
436
437 // Store the result as an outparameter.
438 CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
439 ComparandAddr);
440
441 // Get the success boolean and zero extend it to i8.
442 Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
443 return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
444}
445
447 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
448 assert(E->getArg(0)->getType()->isPointerType());
449
450 auto *IntTy = CGF.ConvertType(E->getType());
451 Address DestAddr = CheckAtomicAlignment(CGF, E);
452 auto *Result = CGF.Builder.CreateAtomicRMW(
453 AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
454 return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
455}
456
458 CodeGenFunction &CGF, const CallExpr *E,
459 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
460 assert(E->getArg(0)->getType()->isPointerType());
461
462 auto *IntTy = CGF.ConvertType(E->getType());
463 Address DestAddr = CheckAtomicAlignment(CGF, E);
464 auto *Result = CGF.Builder.CreateAtomicRMW(
465 AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
466 return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
467}
468
469// Build a plain volatile load.
471 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
472 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
473 CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
474 llvm::Type *ITy =
475 llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
476 llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);
477 Load->setVolatile(true);
478 return Load;
479}
480
481// Build a plain volatile store.
483 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
484 Value *Value = CGF.EmitScalarExpr(E->getArg(1));
485 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
486 CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
487 llvm::StoreInst *Store =
488 CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
489 Store->setVolatile(true);
490 return Store;
491}
492
493// Emit a simple mangled intrinsic that has 1 argument and a return type
494// matching the argument type. Depending on mode, this may be a constrained
495// floating-point intrinsic.
497 const CallExpr *E, unsigned IntrinsicID,
498 unsigned ConstrainedIntrinsicID) {
499 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
500
501 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
502 if (CGF.Builder.getIsFPConstrained()) {
503 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
504 return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });
505 } else {
506 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
507 return CGF.Builder.CreateCall(F, Src0);
508 }
509}
510
511// Emit an intrinsic that has 2 operands of the same type as its result.
512// Depending on mode, this may be a constrained floating-point intrinsic.
514 const CallExpr *E, unsigned IntrinsicID,
515 unsigned ConstrainedIntrinsicID) {
516 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
517 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
518
519 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
520 if (CGF.Builder.getIsFPConstrained()) {
521 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
522 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
523 } else {
524 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
525 return CGF.Builder.CreateCall(F, { Src0, Src1 });
526 }
527}
528
529// Has second type mangled argument.
531 CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,
532 llvm::Intrinsic::ID ConstrainedIntrinsicID) {
533 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
534 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
535
536 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
537 if (CGF.Builder.getIsFPConstrained()) {
538 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
539 {Src0->getType(), Src1->getType()});
540 return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
541 }
542
543 Function *F =
544 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});
545 return CGF.Builder.CreateCall(F, {Src0, Src1});
546}
547
548// Emit an intrinsic that has 3 operands of the same type as its result.
549// Depending on mode, this may be a constrained floating-point intrinsic.
551 const CallExpr *E, unsigned IntrinsicID,
552 unsigned ConstrainedIntrinsicID) {
553 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
554 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
555 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
556
557 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
558 if (CGF.Builder.getIsFPConstrained()) {
559 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
560 return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
561 } else {
562 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
563 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
564 }
565}
566
567// Emit an intrinsic where all operands are of the same type as the result.
568// Depending on mode, this may be a constrained floating-point intrinsic.
570 unsigned IntrinsicID,
571 unsigned ConstrainedIntrinsicID,
572 llvm::Type *Ty,
573 ArrayRef<Value *> Args) {
574 Function *F;
575 if (CGF.Builder.getIsFPConstrained())
576 F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);
577 else
578 F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);
579
580 if (CGF.Builder.getIsFPConstrained())
581 return CGF.Builder.CreateConstrainedFPCall(F, Args);
582 else
583 return CGF.Builder.CreateCall(F, Args);
584}
585
586// Emit a simple intrinsic that has N scalar arguments and a return type
587// matching the argument type. It is assumed that only the first argument is
588// overloaded.
589template <unsigned N>
591 unsigned IntrinsicID,
592 llvm::StringRef Name = "") {
593 static_assert(N, "expect non-empty argument");
595 for (unsigned I = 0; I < N; ++I)
596 Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));
597 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());
598 return CGF.Builder.CreateCall(F, Args, Name);
599}
600
601// Emit an intrinsic that has 1 float or double operand, and 1 integer.
603 const CallExpr *E,
604 unsigned IntrinsicID) {
605 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
606 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
607
608 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
609 return CGF.Builder.CreateCall(F, {Src0, Src1});
610}
611
612// Emit an intrinsic that has overloaded integer result and fp operand.
613static Value *
615 unsigned IntrinsicID,
616 unsigned ConstrainedIntrinsicID) {
617 llvm::Type *ResultType = CGF.ConvertType(E->getType());
618 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
619
620 if (CGF.Builder.getIsFPConstrained()) {
621 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
622 Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
623 {ResultType, Src0->getType()});
624 return CGF.Builder.CreateConstrainedFPCall(F, {Src0});
625 } else {
626 Function *F =
627 CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});
628 return CGF.Builder.CreateCall(F, Src0);
629 }
630}
631
633 llvm::Intrinsic::ID IntrinsicID) {
634 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
635 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
636
637 QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();
638 llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);
639 llvm::Function *F =
640 CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});
641 llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);
642
643 llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);
644 LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);
645 CGF.EmitStoreOfScalar(Exp, LV);
646
647 return CGF.Builder.CreateExtractValue(Call, 0);
648}
649
650/// EmitFAbs - Emit a call to @llvm.fabs().
652 Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
653 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
654 Call->setDoesNotAccessMemory();
655 return Call;
656}
657
658/// Emit the computation of the sign bit for a floating point value. Returns
659/// the i1 sign bit value.
661 LLVMContext &C = CGF.CGM.getLLVMContext();
662
663 llvm::Type *Ty = V->getType();
664 int Width = Ty->getPrimitiveSizeInBits();
665 llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
666 V = CGF.Builder.CreateBitCast(V, IntTy);
667 if (Ty->isPPC_FP128Ty()) {
668 // We want the sign bit of the higher-order double. The bitcast we just
669 // did works as if the double-double was stored to memory and then
670 // read as an i128. The "store" will put the higher-order double in the
671 // lower address in both little- and big-Endian modes, but the "load"
672 // will treat those bits as a different part of the i128: the low bits in
673 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
674 // we need to shift the high bits down to the low before truncating.
675 Width >>= 1;
676 if (CGF.getTarget().isBigEndian()) {
677 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
678 V = CGF.Builder.CreateLShr(V, ShiftCst);
679 }
680 // We are truncating value in order to extract the higher-order
681 // double, which we will be using to extract the sign from.
682 IntTy = llvm::IntegerType::get(C, Width);
683 V = CGF.Builder.CreateTrunc(V, IntTy);
684 }
685 Value *Zero = llvm::Constant::getNullValue(IntTy);
686 return CGF.Builder.CreateICmpSLT(V, Zero);
687}
688
690 const CallExpr *E, llvm::Constant *calleeValue) {
691 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
692 CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
693 RValue Call =
694 CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
695
696 if (unsigned BuiltinID = FD->getBuiltinID()) {
697 // Check whether a FP math builtin function, such as BI__builtin_expf
698 ASTContext &Context = CGF.getContext();
699 bool ConstWithoutErrnoAndExceptions =
701 // Restrict to target with errno, for example, MacOS doesn't set errno.
702 // TODO: Support builtin function with complex type returned, eg: cacosh
703 if (ConstWithoutErrnoAndExceptions && CGF.CGM.getLangOpts().MathErrno &&
704 !CGF.Builder.getIsFPConstrained() && Call.isScalar()) {
705 // Emit "int" TBAA metadata on FP math libcalls.
706 clang::QualType IntTy = Context.IntTy;
707 TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);
708 Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal());
709 CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);
710 }
711 }
712 return Call;
713}
714
715/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
716/// depending on IntrinsicID.
717///
718/// \arg CGF The current codegen function.
719/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
720/// \arg X The first argument to the llvm.*.with.overflow.*.
721/// \arg Y The second argument to the llvm.*.with.overflow.*.
722/// \arg Carry The carry returned by the llvm.*.with.overflow.*.
723/// \returns The result (i.e. sum/product) returned by the intrinsic.
724static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
725 const llvm::Intrinsic::ID IntrinsicID,
726 llvm::Value *X, llvm::Value *Y,
727 llvm::Value *&Carry) {
728 // Make sure we have integers of the same width.
729 assert(X->getType() == Y->getType() &&
730 "Arguments must be the same type. (Did you forget to make sure both "
731 "arguments have the same integer width?)");
732
733 Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
734 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
735 Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
736 return CGF.Builder.CreateExtractValue(Tmp, 0);
737}
738
739static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,
740 int low, int high) {
741 Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
742 llvm::CallInst *Call = CGF.Builder.CreateCall(F);
743 llvm::ConstantRange CR(APInt(32, low), APInt(32, high));
744 Call->addRangeRetAttr(CR);
745 Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);
746 return Call;
747}
748
749namespace {
750 struct WidthAndSignedness {
751 unsigned Width;
752 bool Signed;
753 };
754}
755
756static WidthAndSignedness
758 const clang::QualType Type) {
759 assert(Type->isIntegerType() && "Given type is not an integer.");
760 unsigned Width = context.getIntWidth(Type);
762 return {Width, Signed};
763}
764
765// Given one or more integer types, this function produces an integer type that
766// encompasses them: any value in one of the given types could be expressed in
767// the encompassing type.
768static struct WidthAndSignedness
769EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
770 assert(Types.size() > 0 && "Empty list of types.");
771
772 // If any of the given types is signed, we must return a signed type.
773 bool Signed = false;
774 for (const auto &Type : Types) {
775 Signed |= Type.Signed;
776 }
777
778 // The encompassing type must have a width greater than or equal to the width
779 // of the specified types. Additionally, if the encompassing type is signed,
780 // its width must be strictly greater than the width of any unsigned types
781 // given.
782 unsigned Width = 0;
783 for (const auto &Type : Types) {
784 unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
785 if (Width < MinWidth) {
786 Width = MinWidth;
787 }
788 }
789
790 return {Width, Signed};
791}
792
793Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
794 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
795 return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),
796 ArgValue);
797}
798
799/// Checks if using the result of __builtin_object_size(p, @p From) in place of
800/// __builtin_object_size(p, @p To) is correct
801static bool areBOSTypesCompatible(int From, int To) {
802 // Note: Our __builtin_object_size implementation currently treats Type=0 and
803 // Type=2 identically. Encoding this implementation detail here may make
804 // improving __builtin_object_size difficult in the future, so it's omitted.
805 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
806}
807
808static llvm::Value *
809getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
810 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
811}
812
813llvm::Value *
814CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
815 llvm::IntegerType *ResType,
816 llvm::Value *EmittedE,
817 bool IsDynamic) {
818 uint64_t ObjectSize;
819 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
820 return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
821 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
822}
823
825 ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,
826 uint64_t &Offset) {
827 const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =
828 getLangOpts().getStrictFlexArraysLevel();
829 uint32_t FieldNo = 0;
830
831 if (RD->isImplicit())
832 return nullptr;
833
834 for (const FieldDecl *FD : RD->fields()) {
835 if ((!FAMDecl || FD == FAMDecl) &&
837 Ctx, FD, FD->getType(), StrictFlexArraysLevel,
838 /*IgnoreTemplateOrMacroSubstitution=*/true)) {
839 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
840 Offset += Layout.getFieldOffset(FieldNo);
841 return FD;
842 }
843
844 QualType Ty = FD->getType();
845 if (Ty->isRecordType()) {
847 Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {
848 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);
849 Offset += Layout.getFieldOffset(FieldNo);
850 return Field;
851 }
852 }
853
854 if (!RD->isUnion())
855 ++FieldNo;
856 }
857
858 return nullptr;
859}
860
861static unsigned CountCountedByAttrs(const RecordDecl *RD) {
862 unsigned Num = 0;
863
864 for (const FieldDecl *FD : RD->fields()) {
865 if (FD->getType()->isCountAttributedType())
866 return ++Num;
867
868 QualType Ty = FD->getType();
869 if (Ty->isRecordType())
871 }
872
873 return Num;
874}
875
876llvm::Value *
877CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
878 llvm::IntegerType *ResType) {
879 // The code generated here calculates the size of a struct with a flexible
880 // array member that uses the counted_by attribute. There are two instances
881 // we handle:
882 //
883 // struct s {
884 // unsigned long flags;
885 // int count;
886 // int array[] __attribute__((counted_by(count)));
887 // }
888 //
889 // 1) bdos of the flexible array itself:
890 //
891 // __builtin_dynamic_object_size(p->array, 1) ==
892 // p->count * sizeof(*p->array)
893 //
894 // 2) bdos of a pointer into the flexible array:
895 //
896 // __builtin_dynamic_object_size(&p->array[42], 1) ==
897 // (p->count - 42) * sizeof(*p->array)
898 //
899 // 2) bdos of the whole struct, including the flexible array:
900 //
901 // __builtin_dynamic_object_size(p, 1) ==
902 // max(sizeof(struct s),
903 // offsetof(struct s, array) + p->count * sizeof(*p->array))
904 //
905 ASTContext &Ctx = getContext();
906 const Expr *Base = E->IgnoreParenImpCasts();
907 const Expr *Idx = nullptr;
908
909 if (const auto *UO = dyn_cast<UnaryOperator>(Base);
910 UO && UO->getOpcode() == UO_AddrOf) {
911 Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();
912 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {
913 Base = ASE->getBase()->IgnoreParenImpCasts();
914 Idx = ASE->getIdx()->IgnoreParenImpCasts();
915
916 if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {
917 int64_t Val = IL->getValue().getSExtValue();
918 if (Val < 0)
920
921 if (Val == 0)
922 // The index is 0, so we don't need to take it into account.
923 Idx = nullptr;
924 }
925 } else {
926 // Potential pointer to another element in the struct.
927 Base = SubExpr;
928 }
929 }
930
931 // Get the flexible array member Decl.
932 const RecordDecl *OuterRD = nullptr;
933 const FieldDecl *FAMDecl = nullptr;
934 if (const auto *ME = dyn_cast<MemberExpr>(Base)) {
935 // Check if \p Base is referencing the FAM itself.
936 const ValueDecl *VD = ME->getMemberDecl();
938 FAMDecl = dyn_cast<FieldDecl>(VD);
939 if (!FAMDecl)
940 return nullptr;
941 } else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {
942 // Check if we're pointing to the whole struct.
943 QualType Ty = DRE->getDecl()->getType();
944 if (Ty->isPointerType())
945 Ty = Ty->getPointeeType();
946 OuterRD = Ty->getAsRecordDecl();
947
948 // If we have a situation like this:
949 //
950 // struct union_of_fams {
951 // int flags;
952 // union {
953 // signed char normal_field;
954 // struct {
955 // int count1;
956 // int arr1[] __counted_by(count1);
957 // };
958 // struct {
959 // signed char count2;
960 // int arr2[] __counted_by(count2);
961 // };
962 // };
963 // };
964 //
965 // We don't know which 'count' to use in this scenario:
966 //
967 // size_t get_size(struct union_of_fams *p) {
968 // return __builtin_dynamic_object_size(p, 1);
969 // }
970 //
971 // Instead of calculating a wrong number, we give up.
972 if (OuterRD && CountCountedByAttrs(OuterRD) > 1)
973 return nullptr;
974 }
975
976 if (!OuterRD)
977 return nullptr;
978
979 // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to
980 // get its offset.
981 uint64_t Offset = 0;
982 FAMDecl =
983 FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);
984 Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();
985
986 if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())
987 // No flexible array member found or it doesn't have the "counted_by"
988 // attribute.
989 return nullptr;
990
991 const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
992 if (!CountedByFD)
993 // Can't find the field referenced by the "counted_by" attribute.
994 return nullptr;
995
996 // Build a load of the counted_by field.
997 bool IsSigned = CountedByFD->getType()->isSignedIntegerType();
998 Value *CountedByInst = EmitLoadOfCountedByField(Base, FAMDecl, CountedByFD);
999 if (!CountedByInst)
1000 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1001
1002 CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);
1003
1004 // Build a load of the index and subtract it from the count.
1005 Value *IdxInst = nullptr;
1006 if (Idx) {
1007 if (Idx->HasSideEffects(getContext()))
1008 // We can't have side-effects.
1009 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1010
1011 bool IdxSigned = Idx->getType()->isSignedIntegerType();
1012 IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();
1013 IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);
1014
1015 // We go ahead with the calculation here. If the index turns out to be
1016 // negative, we'll catch it at the end.
1017 CountedByInst =
1018 Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);
1019 }
1020
1021 // Calculate how large the flexible array member is in bytes.
1022 const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());
1024 llvm::Constant *ElemSize =
1025 llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);
1026 Value *FAMSize =
1027 Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);
1028 FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned);
1029 Value *Res = FAMSize;
1030
1031 if (isa<DeclRefExpr>(Base)) {
1032 // The whole struct is specificed in the __bdos.
1033 const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD);
1034
1035 // Get the offset of the FAM.
1036 llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned);
1037 Value *OffsetAndFAMSize =
1038 Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned);
1039
1040 // Get the full size of the struct.
1041 llvm::Constant *SizeofStruct =
1042 ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned);
1043
1044 // max(sizeof(struct s),
1045 // offsetof(struct s, array) + p->count * sizeof(*p->array))
1046 Res = IsSigned
1047 ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax,
1048 OffsetAndFAMSize, SizeofStruct)
1049 : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax,
1050 OffsetAndFAMSize, SizeofStruct);
1051 }
1052
1053 // A negative \p IdxInst or \p CountedByInst means that the index lands
1054 // outside of the flexible array member. If that's the case, we want to
1055 // return 0.
1056 Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);
1057 if (IdxInst)
1058 Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);
1059
1060 return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));
1061}
1062
1063/// Returns a Value corresponding to the size of the given expression.
1064/// This Value may be either of the following:
1065/// - A llvm::Argument (if E is a param with the pass_object_size attribute on
1066/// it)
1067/// - A call to the @llvm.objectsize intrinsic
1068///
1069/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
1070/// and we wouldn't otherwise try to reference a pass_object_size parameter,
1071/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
1072llvm::Value *
1073CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
1074 llvm::IntegerType *ResType,
1075 llvm::Value *EmittedE, bool IsDynamic) {
1076 // We need to reference an argument if the pointer is a parameter with the
1077 // pass_object_size attribute.
1078 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
1079 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
1080 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
1081 if (Param != nullptr && PS != nullptr &&
1082 areBOSTypesCompatible(PS->getType(), Type)) {
1083 auto Iter = SizeArguments.find(Param);
1084 assert(Iter != SizeArguments.end());
1085
1086 const ImplicitParamDecl *D = Iter->second;
1087 auto DIter = LocalDeclMap.find(D);
1088 assert(DIter != LocalDeclMap.end());
1089
1090 return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
1091 getContext().getSizeType(), E->getBeginLoc());
1092 }
1093 }
1094
1095 if (IsDynamic) {
1096 // Emit special code for a flexible array member with the "counted_by"
1097 // attribute.
1098 if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))
1099 return V;
1100 }
1101
1102 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
1103 // evaluate E for side-effects. In either case, we shouldn't lower to
1104 // @llvm.objectsize.
1105 if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
1106 return getDefaultBuiltinObjectSizeResult(Type, ResType);
1107
1108 Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
1109 assert(Ptr->getType()->isPointerTy() &&
1110 "Non-pointer passed to __builtin_object_size?");
1111
1112 Function *F =
1113 CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
1114
1115 // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
1116 Value *Min = Builder.getInt1((Type & 2) != 0);
1117 // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
1118 Value *NullIsUnknown = Builder.getTrue();
1119 Value *Dynamic = Builder.getInt1(IsDynamic);
1120 return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
1121}
1122
1123namespace {
1124/// A struct to generically describe a bit test intrinsic.
1125struct BitTest {
1126 enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
1127 enum InterlockingKind : uint8_t {
1128 Unlocked,
1129 Sequential,
1130 Acquire,
1131 Release,
1132 NoFence
1133 };
1134
1135 ActionKind Action;
1136 InterlockingKind Interlocking;
1137 bool Is64Bit;
1138
1139 static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
1140};
1141
1142} // namespace
1143
1144BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
1145 switch (BuiltinID) {
1146 // Main portable variants.
1147 case Builtin::BI_bittest:
1148 return {TestOnly, Unlocked, false};
1149 case Builtin::BI_bittestandcomplement:
1150 return {Complement, Unlocked, false};
1151 case Builtin::BI_bittestandreset:
1152 return {Reset, Unlocked, false};
1153 case Builtin::BI_bittestandset:
1154 return {Set, Unlocked, false};
1155 case Builtin::BI_interlockedbittestandreset:
1156 return {Reset, Sequential, false};
1157 case Builtin::BI_interlockedbittestandset:
1158 return {Set, Sequential, false};
1159
1160 // X86-specific 64-bit variants.
1161 case Builtin::BI_bittest64:
1162 return {TestOnly, Unlocked, true};
1163 case Builtin::BI_bittestandcomplement64:
1164 return {Complement, Unlocked, true};
1165 case Builtin::BI_bittestandreset64:
1166 return {Reset, Unlocked, true};
1167 case Builtin::BI_bittestandset64:
1168 return {Set, Unlocked, true};
1169 case Builtin::BI_interlockedbittestandreset64:
1170 return {Reset, Sequential, true};
1171 case Builtin::BI_interlockedbittestandset64:
1172 return {Set, Sequential, true};
1173
1174 // ARM/AArch64-specific ordering variants.
1175 case Builtin::BI_interlockedbittestandset_acq:
1176 return {Set, Acquire, false};
1177 case Builtin::BI_interlockedbittestandset_rel:
1178 return {Set, Release, false};
1179 case Builtin::BI_interlockedbittestandset_nf:
1180 return {Set, NoFence, false};
1181 case Builtin::BI_interlockedbittestandreset_acq:
1182 return {Reset, Acquire, false};
1183 case Builtin::BI_interlockedbittestandreset_rel:
1184 return {Reset, Release, false};
1185 case Builtin::BI_interlockedbittestandreset_nf:
1186 return {Reset, NoFence, false};
1187 }
1188 llvm_unreachable("expected only bittest intrinsics");
1189}
1190
1191static char bitActionToX86BTCode(BitTest::ActionKind A) {
1192 switch (A) {
1193 case BitTest::TestOnly: return '\0';
1194 case BitTest::Complement: return 'c';
1195 case BitTest::Reset: return 'r';
1196 case BitTest::Set: return 's';
1197 }
1198 llvm_unreachable("invalid action");
1199}
1200
1202 BitTest BT,
1203 const CallExpr *E, Value *BitBase,
1204 Value *BitPos) {
1205 char Action = bitActionToX86BTCode(BT.Action);
1206 char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
1207
1208 // Build the assembly.
1210 raw_svector_ostream AsmOS(Asm);
1211 if (BT.Interlocking != BitTest::Unlocked)
1212 AsmOS << "lock ";
1213 AsmOS << "bt";
1214 if (Action)
1215 AsmOS << Action;
1216 AsmOS << SizeSuffix << " $2, ($1)";
1217
1218 // Build the constraints. FIXME: We should support immediates when possible.
1219 std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";
1220 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1221 if (!MachineClobbers.empty()) {
1222 Constraints += ',';
1223 Constraints += MachineClobbers;
1224 }
1225 llvm::IntegerType *IntType = llvm::IntegerType::get(
1226 CGF.getLLVMContext(),
1227 CGF.getContext().getTypeSize(E->getArg(1)->getType()));
1228 llvm::FunctionType *FTy =
1229 llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);
1230
1231 llvm::InlineAsm *IA =
1232 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1233 return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
1234}
1235
1236static llvm::AtomicOrdering
1237getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
1238 switch (I) {
1239 case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
1240 case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
1241 case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
1242 case BitTest::Release: return llvm::AtomicOrdering::Release;
1243 case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
1244 }
1245 llvm_unreachable("invalid interlocking");
1246}
1247
1248/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
1249/// bits and a bit position and read and optionally modify the bit at that
1250/// position. The position index can be arbitrarily large, i.e. it can be larger
1251/// than 31 or 63, so we need an indexed load in the general case.
1252static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
1253 unsigned BuiltinID,
1254 const CallExpr *E) {
1255 Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
1256 Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
1257
1258 BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
1259
1260 // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
1261 // indexing operation internally. Use them if possible.
1262 if (CGF.getTarget().getTriple().isX86())
1263 return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
1264
1265 // Otherwise, use generic code to load one byte and test the bit. Use all but
1266 // the bottom three bits as the array index, and the bottom three bits to form
1267 // a mask.
1268 // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
1269 Value *ByteIndex = CGF.Builder.CreateAShr(
1270 BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
1271 Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
1272 Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
1273 ByteIndex, "bittest.byteaddr"),
1274 CGF.Int8Ty, CharUnits::One());
1275 Value *PosLow =
1276 CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
1277 llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
1278
1279 // The updating instructions will need a mask.
1280 Value *Mask = nullptr;
1281 if (BT.Action != BitTest::TestOnly) {
1282 Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
1283 "bittest.mask");
1284 }
1285
1286 // Check the action and ordering of the interlocked intrinsics.
1287 llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
1288
1289 Value *OldByte = nullptr;
1290 if (Ordering != llvm::AtomicOrdering::NotAtomic) {
1291 // Emit a combined atomicrmw load/store operation for the interlocked
1292 // intrinsics.
1293 llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
1294 if (BT.Action == BitTest::Reset) {
1295 Mask = CGF.Builder.CreateNot(Mask);
1296 RMWOp = llvm::AtomicRMWInst::And;
1297 }
1298 OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
1299 } else {
1300 // Emit a plain load for the non-interlocked intrinsics.
1301 OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
1302 Value *NewByte = nullptr;
1303 switch (BT.Action) {
1304 case BitTest::TestOnly:
1305 // Don't store anything.
1306 break;
1307 case BitTest::Complement:
1308 NewByte = CGF.Builder.CreateXor(OldByte, Mask);
1309 break;
1310 case BitTest::Reset:
1311 NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
1312 break;
1313 case BitTest::Set:
1314 NewByte = CGF.Builder.CreateOr(OldByte, Mask);
1315 break;
1316 }
1317 if (NewByte)
1318 CGF.Builder.CreateStore(NewByte, ByteAddr);
1319 }
1320
1321 // However we loaded the old byte, either by plain load or atomicrmw, shift
1322 // the bit into the low position and mask it to 0 or 1.
1323 Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
1324 return CGF.Builder.CreateAnd(
1325 ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
1326}
1327
1329 unsigned BuiltinID,
1330 const CallExpr *E) {
1331 Value *Addr = CGF.EmitScalarExpr(E->getArg(0));
1332
1334 raw_svector_ostream AsmOS(Asm);
1335 llvm::IntegerType *RetType = CGF.Int32Ty;
1336
1337 switch (BuiltinID) {
1338 case clang::PPC::BI__builtin_ppc_ldarx:
1339 AsmOS << "ldarx ";
1340 RetType = CGF.Int64Ty;
1341 break;
1342 case clang::PPC::BI__builtin_ppc_lwarx:
1343 AsmOS << "lwarx ";
1344 RetType = CGF.Int32Ty;
1345 break;
1346 case clang::PPC::BI__builtin_ppc_lharx:
1347 AsmOS << "lharx ";
1348 RetType = CGF.Int16Ty;
1349 break;
1350 case clang::PPC::BI__builtin_ppc_lbarx:
1351 AsmOS << "lbarx ";
1352 RetType = CGF.Int8Ty;
1353 break;
1354 default:
1355 llvm_unreachable("Expected only PowerPC load reserve intrinsics");
1356 }
1357
1358 AsmOS << "$0, ${1:y}";
1359
1360 std::string Constraints = "=r,*Z,~{memory}";
1361 std::string_view MachineClobbers = CGF.getTarget().getClobbers();
1362 if (!MachineClobbers.empty()) {
1363 Constraints += ',';
1364 Constraints += MachineClobbers;
1365 }
1366
1367 llvm::Type *PtrType = CGF.UnqualPtrTy;
1368 llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);
1369
1370 llvm::InlineAsm *IA =
1371 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1372 llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});
1373 CI->addParamAttr(
1374 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));
1375 return CI;
1376}
1377
1378namespace {
1379enum class MSVCSetJmpKind {
1380 _setjmpex,
1381 _setjmp3,
1382 _setjmp
1383};
1384}
1385
1386/// MSVC handles setjmp a bit differently on different platforms. On every
1387/// architecture except 32-bit x86, the frame address is passed. On x86, extra
1388/// parameters can be passed as variadic arguments, but we always pass none.
1389static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,
1390 const CallExpr *E) {
1391 llvm::Value *Arg1 = nullptr;
1392 llvm::Type *Arg1Ty = nullptr;
1393 StringRef Name;
1394 bool IsVarArg = false;
1395 if (SJKind == MSVCSetJmpKind::_setjmp3) {
1396 Name = "_setjmp3";
1397 Arg1Ty = CGF.Int32Ty;
1398 Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
1399 IsVarArg = true;
1400 } else {
1401 Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
1402 Arg1Ty = CGF.Int8PtrTy;
1403 if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
1404 Arg1 = CGF.Builder.CreateCall(
1405 CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));
1406 } else
1407 Arg1 = CGF.Builder.CreateCall(
1408 CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),
1409 llvm::ConstantInt::get(CGF.Int32Ty, 0));
1410 }
1411
1412 // Mark the call site and declaration with ReturnsTwice.
1413 llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
1414 llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
1415 CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
1416 llvm::Attribute::ReturnsTwice);
1417 llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
1418 llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
1419 ReturnsTwiceAttr, /*Local=*/true);
1420
1421 llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
1422 CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
1423 llvm::Value *Args[] = {Buf, Arg1};
1424 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
1425 CB->setAttributes(ReturnsTwiceAttr);
1426 return RValue::get(CB);
1427}
1428
1429// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
1430// we handle them here.
1470 __fastfail,
1471};
1472
1473static std::optional<CodeGenFunction::MSVCIntrin>
1474translateArmToMsvcIntrin(unsigned BuiltinID) {
1475 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1476 switch (BuiltinID) {
1477 default:
1478 return std::nullopt;
1479 case clang::ARM::BI_BitScanForward:
1480 case clang::ARM::BI_BitScanForward64:
1481 return MSVCIntrin::_BitScanForward;
1482 case clang::ARM::BI_BitScanReverse:
1483 case clang::ARM::BI_BitScanReverse64:
1484 return MSVCIntrin::_BitScanReverse;
1485 case clang::ARM::BI_InterlockedAnd64:
1486 return MSVCIntrin::_InterlockedAnd;
1487 case clang::ARM::BI_InterlockedExchange64:
1488 return MSVCIntrin::_InterlockedExchange;
1489 case clang::ARM::BI_InterlockedExchangeAdd64:
1490 return MSVCIntrin::_InterlockedExchangeAdd;
1491 case clang::ARM::BI_InterlockedExchangeSub64:
1492 return MSVCIntrin::_InterlockedExchangeSub;
1493 case clang::ARM::BI_InterlockedOr64:
1494 return MSVCIntrin::_InterlockedOr;
1495 case clang::ARM::BI_InterlockedXor64:
1496 return MSVCIntrin::_InterlockedXor;
1497 case clang::ARM::BI_InterlockedDecrement64:
1498 return MSVCIntrin::_InterlockedDecrement;
1499 case clang::ARM::BI_InterlockedIncrement64:
1500 return MSVCIntrin::_InterlockedIncrement;
1501 case clang::ARM::BI_InterlockedExchangeAdd8_acq:
1502 case clang::ARM::BI_InterlockedExchangeAdd16_acq:
1503 case clang::ARM::BI_InterlockedExchangeAdd_acq:
1504 case clang::ARM::BI_InterlockedExchangeAdd64_acq:
1505 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1506 case clang::ARM::BI_InterlockedExchangeAdd8_rel:
1507 case clang::ARM::BI_InterlockedExchangeAdd16_rel:
1508 case clang::ARM::BI_InterlockedExchangeAdd_rel:
1509 case clang::ARM::BI_InterlockedExchangeAdd64_rel:
1510 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1511 case clang::ARM::BI_InterlockedExchangeAdd8_nf:
1512 case clang::ARM::BI_InterlockedExchangeAdd16_nf:
1513 case clang::ARM::BI_InterlockedExchangeAdd_nf:
1514 case clang::ARM::BI_InterlockedExchangeAdd64_nf:
1515 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1516 case clang::ARM::BI_InterlockedExchange8_acq:
1517 case clang::ARM::BI_InterlockedExchange16_acq:
1518 case clang::ARM::BI_InterlockedExchange_acq:
1519 case clang::ARM::BI_InterlockedExchange64_acq:
1520 return MSVCIntrin::_InterlockedExchange_acq;
1521 case clang::ARM::BI_InterlockedExchange8_rel:
1522 case clang::ARM::BI_InterlockedExchange16_rel:
1523 case clang::ARM::BI_InterlockedExchange_rel:
1524 case clang::ARM::BI_InterlockedExchange64_rel:
1525 return MSVCIntrin::_InterlockedExchange_rel;
1526 case clang::ARM::BI_InterlockedExchange8_nf:
1527 case clang::ARM::BI_InterlockedExchange16_nf:
1528 case clang::ARM::BI_InterlockedExchange_nf:
1529 case clang::ARM::BI_InterlockedExchange64_nf:
1530 return MSVCIntrin::_InterlockedExchange_nf;
1531 case clang::ARM::BI_InterlockedCompareExchange8_acq:
1532 case clang::ARM::BI_InterlockedCompareExchange16_acq:
1533 case clang::ARM::BI_InterlockedCompareExchange_acq:
1534 case clang::ARM::BI_InterlockedCompareExchange64_acq:
1535 return MSVCIntrin::_InterlockedCompareExchange_acq;
1536 case clang::ARM::BI_InterlockedCompareExchange8_rel:
1537 case clang::ARM::BI_InterlockedCompareExchange16_rel:
1538 case clang::ARM::BI_InterlockedCompareExchange_rel:
1539 case clang::ARM::BI_InterlockedCompareExchange64_rel:
1540 return MSVCIntrin::_InterlockedCompareExchange_rel;
1541 case clang::ARM::BI_InterlockedCompareExchange8_nf:
1542 case clang::ARM::BI_InterlockedCompareExchange16_nf:
1543 case clang::ARM::BI_InterlockedCompareExchange_nf:
1544 case clang::ARM::BI_InterlockedCompareExchange64_nf:
1545 return MSVCIntrin::_InterlockedCompareExchange_nf;
1546 case clang::ARM::BI_InterlockedOr8_acq:
1547 case clang::ARM::BI_InterlockedOr16_acq:
1548 case clang::ARM::BI_InterlockedOr_acq:
1549 case clang::ARM::BI_InterlockedOr64_acq:
1550 return MSVCIntrin::_InterlockedOr_acq;
1551 case clang::ARM::BI_InterlockedOr8_rel:
1552 case clang::ARM::BI_InterlockedOr16_rel:
1553 case clang::ARM::BI_InterlockedOr_rel:
1554 case clang::ARM::BI_InterlockedOr64_rel:
1555 return MSVCIntrin::_InterlockedOr_rel;
1556 case clang::ARM::BI_InterlockedOr8_nf:
1557 case clang::ARM::BI_InterlockedOr16_nf:
1558 case clang::ARM::BI_InterlockedOr_nf:
1559 case clang::ARM::BI_InterlockedOr64_nf:
1560 return MSVCIntrin::_InterlockedOr_nf;
1561 case clang::ARM::BI_InterlockedXor8_acq:
1562 case clang::ARM::BI_InterlockedXor16_acq:
1563 case clang::ARM::BI_InterlockedXor_acq:
1564 case clang::ARM::BI_InterlockedXor64_acq:
1565 return MSVCIntrin::_InterlockedXor_acq;
1566 case clang::ARM::BI_InterlockedXor8_rel:
1567 case clang::ARM::BI_InterlockedXor16_rel:
1568 case clang::ARM::BI_InterlockedXor_rel:
1569 case clang::ARM::BI_InterlockedXor64_rel:
1570 return MSVCIntrin::_InterlockedXor_rel;
1571 case clang::ARM::BI_InterlockedXor8_nf:
1572 case clang::ARM::BI_InterlockedXor16_nf:
1573 case clang::ARM::BI_InterlockedXor_nf:
1574 case clang::ARM::BI_InterlockedXor64_nf:
1575 return MSVCIntrin::_InterlockedXor_nf;
1576 case clang::ARM::BI_InterlockedAnd8_acq:
1577 case clang::ARM::BI_InterlockedAnd16_acq:
1578 case clang::ARM::BI_InterlockedAnd_acq:
1579 case clang::ARM::BI_InterlockedAnd64_acq:
1580 return MSVCIntrin::_InterlockedAnd_acq;
1581 case clang::ARM::BI_InterlockedAnd8_rel:
1582 case clang::ARM::BI_InterlockedAnd16_rel:
1583 case clang::ARM::BI_InterlockedAnd_rel:
1584 case clang::ARM::BI_InterlockedAnd64_rel:
1585 return MSVCIntrin::_InterlockedAnd_rel;
1586 case clang::ARM::BI_InterlockedAnd8_nf:
1587 case clang::ARM::BI_InterlockedAnd16_nf:
1588 case clang::ARM::BI_InterlockedAnd_nf:
1589 case clang::ARM::BI_InterlockedAnd64_nf:
1590 return MSVCIntrin::_InterlockedAnd_nf;
1591 case clang::ARM::BI_InterlockedIncrement16_acq:
1592 case clang::ARM::BI_InterlockedIncrement_acq:
1593 case clang::ARM::BI_InterlockedIncrement64_acq:
1594 return MSVCIntrin::_InterlockedIncrement_acq;
1595 case clang::ARM::BI_InterlockedIncrement16_rel:
1596 case clang::ARM::BI_InterlockedIncrement_rel:
1597 case clang::ARM::BI_InterlockedIncrement64_rel:
1598 return MSVCIntrin::_InterlockedIncrement_rel;
1599 case clang::ARM::BI_InterlockedIncrement16_nf:
1600 case clang::ARM::BI_InterlockedIncrement_nf:
1601 case clang::ARM::BI_InterlockedIncrement64_nf:
1602 return MSVCIntrin::_InterlockedIncrement_nf;
1603 case clang::ARM::BI_InterlockedDecrement16_acq:
1604 case clang::ARM::BI_InterlockedDecrement_acq:
1605 case clang::ARM::BI_InterlockedDecrement64_acq:
1606 return MSVCIntrin::_InterlockedDecrement_acq;
1607 case clang::ARM::BI_InterlockedDecrement16_rel:
1608 case clang::ARM::BI_InterlockedDecrement_rel:
1609 case clang::ARM::BI_InterlockedDecrement64_rel:
1610 return MSVCIntrin::_InterlockedDecrement_rel;
1611 case clang::ARM::BI_InterlockedDecrement16_nf:
1612 case clang::ARM::BI_InterlockedDecrement_nf:
1613 case clang::ARM::BI_InterlockedDecrement64_nf:
1614 return MSVCIntrin::_InterlockedDecrement_nf;
1615 }
1616 llvm_unreachable("must return from switch");
1617}
1618
1619static std::optional<CodeGenFunction::MSVCIntrin>
1620translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
1621 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1622 switch (BuiltinID) {
1623 default:
1624 return std::nullopt;
1625 case clang::AArch64::BI_BitScanForward:
1626 case clang::AArch64::BI_BitScanForward64:
1627 return MSVCIntrin::_BitScanForward;
1628 case clang::AArch64::BI_BitScanReverse:
1629 case clang::AArch64::BI_BitScanReverse64:
1630 return MSVCIntrin::_BitScanReverse;
1631 case clang::AArch64::BI_InterlockedAnd64:
1632 return MSVCIntrin::_InterlockedAnd;
1633 case clang::AArch64::BI_InterlockedExchange64:
1634 return MSVCIntrin::_InterlockedExchange;
1635 case clang::AArch64::BI_InterlockedExchangeAdd64:
1636 return MSVCIntrin::_InterlockedExchangeAdd;
1637 case clang::AArch64::BI_InterlockedExchangeSub64:
1638 return MSVCIntrin::_InterlockedExchangeSub;
1639 case clang::AArch64::BI_InterlockedOr64:
1640 return MSVCIntrin::_InterlockedOr;
1641 case clang::AArch64::BI_InterlockedXor64:
1642 return MSVCIntrin::_InterlockedXor;
1643 case clang::AArch64::BI_InterlockedDecrement64:
1644 return MSVCIntrin::_InterlockedDecrement;
1645 case clang::AArch64::BI_InterlockedIncrement64:
1646 return MSVCIntrin::_InterlockedIncrement;
1647 case clang::AArch64::BI_InterlockedExchangeAdd8_acq:
1648 case clang::AArch64::BI_InterlockedExchangeAdd16_acq:
1649 case clang::AArch64::BI_InterlockedExchangeAdd_acq:
1650 case clang::AArch64::BI_InterlockedExchangeAdd64_acq:
1651 return MSVCIntrin::_InterlockedExchangeAdd_acq;
1652 case clang::AArch64::BI_InterlockedExchangeAdd8_rel:
1653 case clang::AArch64::BI_InterlockedExchangeAdd16_rel:
1654 case clang::AArch64::BI_InterlockedExchangeAdd_rel:
1655 case clang::AArch64::BI_InterlockedExchangeAdd64_rel:
1656 return MSVCIntrin::_InterlockedExchangeAdd_rel;
1657 case clang::AArch64::BI_InterlockedExchangeAdd8_nf:
1658 case clang::AArch64::BI_InterlockedExchangeAdd16_nf:
1659 case clang::AArch64::BI_InterlockedExchangeAdd_nf:
1660 case clang::AArch64::BI_InterlockedExchangeAdd64_nf:
1661 return MSVCIntrin::_InterlockedExchangeAdd_nf;
1662 case clang::AArch64::BI_InterlockedExchange8_acq:
1663 case clang::AArch64::BI_InterlockedExchange16_acq:
1664 case clang::AArch64::BI_InterlockedExchange_acq:
1665 case clang::AArch64::BI_InterlockedExchange64_acq:
1666 return MSVCIntrin::_InterlockedExchange_acq;
1667 case clang::AArch64::BI_InterlockedExchange8_rel:
1668 case clang::AArch64::BI_InterlockedExchange16_rel:
1669 case clang::AArch64::BI_InterlockedExchange_rel:
1670 case clang::AArch64::BI_InterlockedExchange64_rel:
1671 return MSVCIntrin::_InterlockedExchange_rel;
1672 case clang::AArch64::BI_InterlockedExchange8_nf:
1673 case clang::AArch64::BI_InterlockedExchange16_nf:
1674 case clang::AArch64::BI_InterlockedExchange_nf:
1675 case clang::AArch64::BI_InterlockedExchange64_nf:
1676 return MSVCIntrin::_InterlockedExchange_nf;
1677 case clang::AArch64::BI_InterlockedCompareExchange8_acq:
1678 case clang::AArch64::BI_InterlockedCompareExchange16_acq:
1679 case clang::AArch64::BI_InterlockedCompareExchange_acq:
1680 case clang::AArch64::BI_InterlockedCompareExchange64_acq:
1681 return MSVCIntrin::_InterlockedCompareExchange_acq;
1682 case clang::AArch64::BI_InterlockedCompareExchange8_rel:
1683 case clang::AArch64::BI_InterlockedCompareExchange16_rel:
1684 case clang::AArch64::BI_InterlockedCompareExchange_rel:
1685 case clang::AArch64::BI_InterlockedCompareExchange64_rel:
1686 return MSVCIntrin::_InterlockedCompareExchange_rel;
1687 case clang::AArch64::BI_InterlockedCompareExchange8_nf:
1688 case clang::AArch64::BI_InterlockedCompareExchange16_nf:
1689 case clang::AArch64::BI_InterlockedCompareExchange_nf:
1690 case clang::AArch64::BI_InterlockedCompareExchange64_nf:
1691 return MSVCIntrin::_InterlockedCompareExchange_nf;
1692 case clang::AArch64::BI_InterlockedCompareExchange128:
1693 return MSVCIntrin::_InterlockedCompareExchange128;
1694 case clang::AArch64::BI_InterlockedCompareExchange128_acq:
1695 return MSVCIntrin::_InterlockedCompareExchange128_acq;
1696 case clang::AArch64::BI_InterlockedCompareExchange128_nf:
1697 return MSVCIntrin::_InterlockedCompareExchange128_nf;
1698 case clang::AArch64::BI_InterlockedCompareExchange128_rel:
1699 return MSVCIntrin::_InterlockedCompareExchange128_rel;
1700 case clang::AArch64::BI_InterlockedOr8_acq:
1701 case clang::AArch64::BI_InterlockedOr16_acq:
1702 case clang::AArch64::BI_InterlockedOr_acq:
1703 case clang::AArch64::BI_InterlockedOr64_acq:
1704 return MSVCIntrin::_InterlockedOr_acq;
1705 case clang::AArch64::BI_InterlockedOr8_rel:
1706 case clang::AArch64::BI_InterlockedOr16_rel:
1707 case clang::AArch64::BI_InterlockedOr_rel:
1708 case clang::AArch64::BI_InterlockedOr64_rel:
1709 return MSVCIntrin::_InterlockedOr_rel;
1710 case clang::AArch64::BI_InterlockedOr8_nf:
1711 case clang::AArch64::BI_InterlockedOr16_nf:
1712 case clang::AArch64::BI_InterlockedOr_nf:
1713 case clang::AArch64::BI_InterlockedOr64_nf:
1714 return MSVCIntrin::_InterlockedOr_nf;
1715 case clang::AArch64::BI_InterlockedXor8_acq:
1716 case clang::AArch64::BI_InterlockedXor16_acq:
1717 case clang::AArch64::BI_InterlockedXor_acq:
1718 case clang::AArch64::BI_InterlockedXor64_acq:
1719 return MSVCIntrin::_InterlockedXor_acq;
1720 case clang::AArch64::BI_InterlockedXor8_rel:
1721 case clang::AArch64::BI_InterlockedXor16_rel:
1722 case clang::AArch64::BI_InterlockedXor_rel:
1723 case clang::AArch64::BI_InterlockedXor64_rel:
1724 return MSVCIntrin::_InterlockedXor_rel;
1725 case clang::AArch64::BI_InterlockedXor8_nf:
1726 case clang::AArch64::BI_InterlockedXor16_nf:
1727 case clang::AArch64::BI_InterlockedXor_nf:
1728 case clang::AArch64::BI_InterlockedXor64_nf:
1729 return MSVCIntrin::_InterlockedXor_nf;
1730 case clang::AArch64::BI_InterlockedAnd8_acq:
1731 case clang::AArch64::BI_InterlockedAnd16_acq:
1732 case clang::AArch64::BI_InterlockedAnd_acq:
1733 case clang::AArch64::BI_InterlockedAnd64_acq:
1734 return MSVCIntrin::_InterlockedAnd_acq;
1735 case clang::AArch64::BI_InterlockedAnd8_rel:
1736 case clang::AArch64::BI_InterlockedAnd16_rel:
1737 case clang::AArch64::BI_InterlockedAnd_rel:
1738 case clang::AArch64::BI_InterlockedAnd64_rel:
1739 return MSVCIntrin::_InterlockedAnd_rel;
1740 case clang::AArch64::BI_InterlockedAnd8_nf:
1741 case clang::AArch64::BI_InterlockedAnd16_nf:
1742 case clang::AArch64::BI_InterlockedAnd_nf:
1743 case clang::AArch64::BI_InterlockedAnd64_nf:
1744 return MSVCIntrin::_InterlockedAnd_nf;
1745 case clang::AArch64::BI_InterlockedIncrement16_acq:
1746 case clang::AArch64::BI_InterlockedIncrement_acq:
1747 case clang::AArch64::BI_InterlockedIncrement64_acq:
1748 return MSVCIntrin::_InterlockedIncrement_acq;
1749 case clang::AArch64::BI_InterlockedIncrement16_rel:
1750 case clang::AArch64::BI_InterlockedIncrement_rel:
1751 case clang::AArch64::BI_InterlockedIncrement64_rel:
1752 return MSVCIntrin::_InterlockedIncrement_rel;
1753 case clang::AArch64::BI_InterlockedIncrement16_nf:
1754 case clang::AArch64::BI_InterlockedIncrement_nf:
1755 case clang::AArch64::BI_InterlockedIncrement64_nf:
1756 return MSVCIntrin::_InterlockedIncrement_nf;
1757 case clang::AArch64::BI_InterlockedDecrement16_acq:
1758 case clang::AArch64::BI_InterlockedDecrement_acq:
1759 case clang::AArch64::BI_InterlockedDecrement64_acq:
1760 return MSVCIntrin::_InterlockedDecrement_acq;
1761 case clang::AArch64::BI_InterlockedDecrement16_rel:
1762 case clang::AArch64::BI_InterlockedDecrement_rel:
1763 case clang::AArch64::BI_InterlockedDecrement64_rel:
1764 return MSVCIntrin::_InterlockedDecrement_rel;
1765 case clang::AArch64::BI_InterlockedDecrement16_nf:
1766 case clang::AArch64::BI_InterlockedDecrement_nf:
1767 case clang::AArch64::BI_InterlockedDecrement64_nf:
1768 return MSVCIntrin::_InterlockedDecrement_nf;
1769 }
1770 llvm_unreachable("must return from switch");
1771}
1772
1773static std::optional<CodeGenFunction::MSVCIntrin>
1774translateX86ToMsvcIntrin(unsigned BuiltinID) {
1775 using MSVCIntrin = CodeGenFunction::MSVCIntrin;
1776 switch (BuiltinID) {
1777 default:
1778 return std::nullopt;
1779 case clang::X86::BI_BitScanForward:
1780 case clang::X86::BI_BitScanForward64:
1781 return MSVCIntrin::_BitScanForward;
1782 case clang::X86::BI_BitScanReverse:
1783 case clang::X86::BI_BitScanReverse64:
1784 return MSVCIntrin::_BitScanReverse;
1785 case clang::X86::BI_InterlockedAnd64:
1786 return MSVCIntrin::_InterlockedAnd;
1787 case clang::X86::BI_InterlockedCompareExchange128:
1788 return MSVCIntrin::_InterlockedCompareExchange128;
1789 case clang::X86::BI_InterlockedExchange64:
1790 return MSVCIntrin::_InterlockedExchange;
1791 case clang::X86::BI_InterlockedExchangeAdd64:
1792 return MSVCIntrin::_InterlockedExchangeAdd;
1793 case clang::X86::BI_InterlockedExchangeSub64:
1794 return MSVCIntrin::_InterlockedExchangeSub;
1795 case clang::X86::BI_InterlockedOr64:
1796 return MSVCIntrin::_InterlockedOr;
1797 case clang::X86::BI_InterlockedXor64:
1798 return MSVCIntrin::_InterlockedXor;
1799 case clang::X86::BI_InterlockedDecrement64:
1800 return MSVCIntrin::_InterlockedDecrement;
1801 case clang::X86::BI_InterlockedIncrement64:
1802 return MSVCIntrin::_InterlockedIncrement;
1803 }
1804 llvm_unreachable("must return from switch");
1805}
1806
1807// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.
1808Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
1809 const CallExpr *E) {
1810 switch (BuiltinID) {
1811 case MSVCIntrin::_BitScanForward:
1812 case MSVCIntrin::_BitScanReverse: {
1813 Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));
1814 Value *ArgValue = EmitScalarExpr(E->getArg(1));
1815
1816 llvm::Type *ArgType = ArgValue->getType();
1817 llvm::Type *IndexType = IndexAddress.getElementType();
1818 llvm::Type *ResultType = ConvertType(E->getType());
1819
1820 Value *ArgZero = llvm::Constant::getNullValue(ArgType);
1821 Value *ResZero = llvm::Constant::getNullValue(ResultType);
1822 Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
1823
1824 BasicBlock *Begin = Builder.GetInsertBlock();
1825 BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
1826 Builder.SetInsertPoint(End);
1827 PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
1828
1829 Builder.SetInsertPoint(Begin);
1830 Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
1831 BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
1832 Builder.CreateCondBr(IsZero, End, NotZero);
1833 Result->addIncoming(ResZero, Begin);
1834
1835 Builder.SetInsertPoint(NotZero);
1836
1837 if (BuiltinID == MSVCIntrin::_BitScanForward) {
1838 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1839 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1840 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1841 Builder.CreateStore(ZeroCount, IndexAddress, false);
1842 } else {
1843 unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
1844 Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
1845
1846 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1847 Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
1848 ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
1849 Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
1850 Builder.CreateStore(Index, IndexAddress, false);
1851 }
1852 Builder.CreateBr(End);
1853 Result->addIncoming(ResOne, NotZero);
1854
1855 Builder.SetInsertPoint(End);
1856 return Result;
1857 }
1858 case MSVCIntrin::_InterlockedAnd:
1859 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
1860 case MSVCIntrin::_InterlockedExchange:
1861 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
1862 case MSVCIntrin::_InterlockedExchangeAdd:
1863 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
1864 case MSVCIntrin::_InterlockedExchangeSub:
1865 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
1866 case MSVCIntrin::_InterlockedOr:
1867 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
1868 case MSVCIntrin::_InterlockedXor:
1869 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
1870 case MSVCIntrin::_InterlockedExchangeAdd_acq:
1871 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1872 AtomicOrdering::Acquire);
1873 case MSVCIntrin::_InterlockedExchangeAdd_rel:
1874 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1875 AtomicOrdering::Release);
1876 case MSVCIntrin::_InterlockedExchangeAdd_nf:
1877 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
1878 AtomicOrdering::Monotonic);
1879 case MSVCIntrin::_InterlockedExchange_acq:
1880 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1881 AtomicOrdering::Acquire);
1882 case MSVCIntrin::_InterlockedExchange_rel:
1883 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1884 AtomicOrdering::Release);
1885 case MSVCIntrin::_InterlockedExchange_nf:
1886 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
1887 AtomicOrdering::Monotonic);
1888 case MSVCIntrin::_InterlockedCompareExchange_acq:
1889 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
1890 case MSVCIntrin::_InterlockedCompareExchange_rel:
1891 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
1892 case MSVCIntrin::_InterlockedCompareExchange_nf:
1893 return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1894 case MSVCIntrin::_InterlockedCompareExchange128:
1896 *this, E, AtomicOrdering::SequentiallyConsistent);
1897 case MSVCIntrin::_InterlockedCompareExchange128_acq:
1898 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
1899 case MSVCIntrin::_InterlockedCompareExchange128_rel:
1900 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
1901 case MSVCIntrin::_InterlockedCompareExchange128_nf:
1902 return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
1903 case MSVCIntrin::_InterlockedOr_acq:
1904 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1905 AtomicOrdering::Acquire);
1906 case MSVCIntrin::_InterlockedOr_rel:
1907 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1908 AtomicOrdering::Release);
1909 case MSVCIntrin::_InterlockedOr_nf:
1910 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1911 AtomicOrdering::Monotonic);
1912 case MSVCIntrin::_InterlockedXor_acq:
1913 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1914 AtomicOrdering::Acquire);
1915 case MSVCIntrin::_InterlockedXor_rel:
1916 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1917 AtomicOrdering::Release);
1918 case MSVCIntrin::_InterlockedXor_nf:
1919 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1920 AtomicOrdering::Monotonic);
1921 case MSVCIntrin::_InterlockedAnd_acq:
1922 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1923 AtomicOrdering::Acquire);
1924 case MSVCIntrin::_InterlockedAnd_rel:
1925 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1926 AtomicOrdering::Release);
1927 case MSVCIntrin::_InterlockedAnd_nf:
1928 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1929 AtomicOrdering::Monotonic);
1930 case MSVCIntrin::_InterlockedIncrement_acq:
1931 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1932 case MSVCIntrin::_InterlockedIncrement_rel:
1933 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1934 case MSVCIntrin::_InterlockedIncrement_nf:
1935 return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1936 case MSVCIntrin::_InterlockedDecrement_acq:
1937 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1938 case MSVCIntrin::_InterlockedDecrement_rel:
1939 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1940 case MSVCIntrin::_InterlockedDecrement_nf:
1941 return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1942
1943 case MSVCIntrin::_InterlockedDecrement:
1944 return EmitAtomicDecrementValue(*this, E);
1945 case MSVCIntrin::_InterlockedIncrement:
1946 return EmitAtomicIncrementValue(*this, E);
1947
1948 case MSVCIntrin::__fastfail: {
1949 // Request immediate process termination from the kernel. The instruction
1950 // sequences to do this are documented on MSDN:
1951 // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1952 llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1953 StringRef Asm, Constraints;
1954 switch (ISA) {
1955 default:
1956 ErrorUnsupported(E, "__fastfail call for this architecture");
1957 break;
1958 case llvm::Triple::x86:
1959 case llvm::Triple::x86_64:
1960 Asm = "int $$0x29";
1961 Constraints = "{cx}";
1962 break;
1963 case llvm::Triple::thumb:
1964 Asm = "udf #251";
1965 Constraints = "{r0}";
1966 break;
1967 case llvm::Triple::aarch64:
1968 Asm = "brk #0xF003";
1969 Constraints = "{w0}";
1970 }
1971 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1972 llvm::InlineAsm *IA =
1973 llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1974 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1975 getLLVMContext(), llvm::AttributeList::FunctionIndex,
1976 llvm::Attribute::NoReturn);
1977 llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1978 CI->setAttributes(NoReturnAttr);
1979 return CI;
1980 }
1981 }
1982 llvm_unreachable("Incorrect MSVC intrinsic!");
1983}
1984
1985namespace {
1986// ARC cleanup for __builtin_os_log_format
1987struct CallObjCArcUse final : EHScopeStack::Cleanup {
1988 CallObjCArcUse(llvm::Value *object) : object(object) {}
1989 llvm::Value *object;
1990
1991 void Emit(CodeGenFunction &CGF, Flags flags) override {
1992 CGF.EmitARCIntrinsicUse(object);
1993 }
1994};
1995}
1996
1998 BuiltinCheckKind Kind) {
1999 assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
2000 && "Unsupported builtin check kind");
2001
2002 Value *ArgValue = EmitScalarExpr(E);
2003 if (!SanOpts.has(SanitizerKind::Builtin))
2004 return ArgValue;
2005
2006 SanitizerScope SanScope(this);
2007 Value *Cond = Builder.CreateICmpNE(
2008 ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
2009 EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
2010 SanitizerHandler::InvalidBuiltin,
2012 llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
2013 std::nullopt);
2014 return ArgValue;
2015}
2016
2017static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {
2018 return CGF.Builder.CreateBinaryIntrinsic(
2019 Intrinsic::abs, ArgValue,
2020 ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));
2021}
2022
2024 bool SanitizeOverflow) {
2025 Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));
2026
2027 // Try to eliminate overflow check.
2028 if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {
2029 if (!VCI->isMinSignedValue())
2030 return EmitAbs(CGF, ArgValue, true);
2031 }
2032
2033 CodeGenFunction::SanitizerScope SanScope(&CGF);
2034
2035 Constant *Zero = Constant::getNullValue(ArgValue->getType());
2036 Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(
2037 Intrinsic::ssub_with_overflow, Zero, ArgValue);
2038 Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);
2039 Value *NotOverflow = CGF.Builder.CreateNot(
2040 CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));
2041
2042 // TODO: support -ftrapv-handler.
2043 if (SanitizeOverflow) {
2044 CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},
2045 SanitizerHandler::NegateOverflow,
2046 {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),
2048 {ArgValue});
2049 } else
2050 CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);
2051
2052 Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
2053 return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");
2054}
2055
2056/// Get the argument type for arguments to os_log_helper.
2058 QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
2059 return C.getCanonicalType(UnsignedTy);
2060}
2061
2064 CharUnits BufferAlignment) {
2065 ASTContext &Ctx = getContext();
2066
2068 {
2069 raw_svector_ostream OS(Name);
2070 OS << "__os_log_helper";
2071 OS << "_" << BufferAlignment.getQuantity();
2072 OS << "_" << int(Layout.getSummaryByte());
2073 OS << "_" << int(Layout.getNumArgsByte());
2074 for (const auto &Item : Layout.Items)
2075 OS << "_" << int(Item.getSizeByte()) << "_"
2076 << int(Item.getDescriptorByte());
2077 }
2078
2079 if (llvm::Function *F = CGM.getModule().getFunction(Name))
2080 return F;
2081
2083 FunctionArgList Args;
2084 Args.push_back(ImplicitParamDecl::Create(
2085 Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
2087 ArgTys.emplace_back(Ctx.VoidPtrTy);
2088
2089 for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
2090 char Size = Layout.Items[I].getSizeByte();
2091 if (!Size)
2092 continue;
2093
2094 QualType ArgTy = getOSLogArgType(Ctx, Size);
2095 Args.push_back(ImplicitParamDecl::Create(
2096 Ctx, nullptr, SourceLocation(),
2097 &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
2099 ArgTys.emplace_back(ArgTy);
2100 }
2101
2102 QualType ReturnTy = Ctx.VoidTy;
2103
2104 // The helper function has linkonce_odr linkage to enable the linker to merge
2105 // identical functions. To ensure the merging always happens, 'noinline' is
2106 // attached to the function when compiling with -Oz.
2107 const CGFunctionInfo &FI =
2109 llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
2110 llvm::Function *Fn = llvm::Function::Create(
2111 FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
2112 Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
2113 CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);
2115 Fn->setDoesNotThrow();
2116
2117 // Attach 'noinline' at -Oz.
2118 if (CGM.getCodeGenOpts().OptimizeSize == 2)
2119 Fn->addFnAttr(llvm::Attribute::NoInline);
2120
2121 auto NL = ApplyDebugLocation::CreateEmpty(*this);
2122 StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);
2123
2124 // Create a scope with an artificial location for the body of this function.
2125 auto AL = ApplyDebugLocation::CreateArtificial(*this);
2126
2127 CharUnits Offset;
2129 Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,
2130 BufferAlignment);
2131 Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
2132 Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2133 Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
2134 Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2135
2136 unsigned I = 1;
2137 for (const auto &Item : Layout.Items) {
2139 Builder.getInt8(Item.getDescriptorByte()),
2140 Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2142 Builder.getInt8(Item.getSizeByte()),
2143 Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2144
2145 CharUnits Size = Item.size();
2146 if (!Size.getQuantity())
2147 continue;
2148
2149 Address Arg = GetAddrOfLocalVar(Args[I]);
2150 Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
2151 Addr = Addr.withElementType(Arg.getElementType());
2153 Offset += Size;
2154 ++I;
2155 }
2156
2158
2159 return Fn;
2160}
2161
2163 assert(E.getNumArgs() >= 2 &&
2164 "__builtin_os_log_format takes at least 2 arguments");
2165 ASTContext &Ctx = getContext();
2168 Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
2169 llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2170
2171 // Ignore argument 1, the format string. It is not currently used.
2172 CallArgList Args;
2173 Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);
2174
2175 for (const auto &Item : Layout.Items) {
2176 int Size = Item.getSizeByte();
2177 if (!Size)
2178 continue;
2179
2180 llvm::Value *ArgVal;
2181
2182 if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
2183 uint64_t Val = 0;
2184 for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
2185 Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
2186 ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
2187 } else if (const Expr *TheExpr = Item.getExpr()) {
2188 ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2189
2190 // If a temporary object that requires destruction after the full
2191 // expression is passed, push a lifetime-extended cleanup to extend its
2192 // lifetime to the end of the enclosing block scope.
2193 auto LifetimeExtendObject = [&](const Expr *E) {
2194 E = E->IgnoreParenCasts();
2195 // Extend lifetimes of objects returned by function calls and message
2196 // sends.
2197
2198 // FIXME: We should do this in other cases in which temporaries are
2199 // created including arguments of non-ARC types (e.g., C++
2200 // temporaries).
2201 if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))
2202 return true;
2203 return false;
2204 };
2205
2206 if (TheExpr->getType()->isObjCRetainableType() &&
2207 getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {
2208 assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2209 "Only scalar can be a ObjC retainable type");
2210 if (!isa<Constant>(ArgVal)) {
2211 CleanupKind Cleanup = getARCCleanupKind();
2212 QualType Ty = TheExpr->getType();
2214 RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);
2215 ArgVal = EmitARCRetain(Ty, ArgVal);
2216 Builder.CreateStore(ArgVal, Addr);
2217 pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,
2219 Cleanup & EHCleanup);
2220
2221 // Push a clang.arc.use call to ensure ARC optimizer knows that the
2222 // argument has to be alive.
2223 if (CGM.getCodeGenOpts().OptimizationLevel != 0)
2224 pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);
2225 }
2226 }
2227 } else {
2228 ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
2229 }
2230
2231 unsigned ArgValSize =
2232 CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
2233 llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
2234 ArgValSize);
2235 ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
2236 CanQualType ArgTy = getOSLogArgType(Ctx, Size);
2237 // If ArgVal has type x86_fp80, zero-extend ArgVal.
2238 ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
2239 Args.add(RValue::get(ArgVal), ArgTy);
2240 }
2241
2242 const CGFunctionInfo &FI =
2245 Layout, BufAddr.getAlignment());
2247 return RValue::get(BufAddr, *this);
2248}
2249
2251 unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,
2252 WidthAndSignedness ResultInfo) {
2253 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2254 Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&
2255 !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;
2256}
2257
2259 CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,
2260 const clang::Expr *Op2, WidthAndSignedness Op2Info,
2261 const clang::Expr *ResultArg, QualType ResultQTy,
2262 WidthAndSignedness ResultInfo) {
2264 Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&
2265 "Cannot specialize this multiply");
2266
2267 llvm::Value *V1 = CGF.EmitScalarExpr(Op1);
2268 llvm::Value *V2 = CGF.EmitScalarExpr(Op2);
2269
2270 llvm::Value *HasOverflow;
2271 llvm::Value *Result = EmitOverflowIntrinsic(
2272 CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);
2273
2274 // The intrinsic call will detect overflow when the value is > UINT_MAX,
2275 // however, since the original builtin had a signed result, we need to report
2276 // an overflow when the result is greater than INT_MAX.
2277 auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);
2278 llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);
2279
2280 llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);
2281 HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);
2282
2283 bool isVolatile =
2284 ResultArg->getType()->getPointeeType().isVolatileQualified();
2285 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2286 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2287 isVolatile);
2288 return RValue::get(HasOverflow);
2289}
2290
2291/// Determine if a binop is a checked mixed-sign multiply we can specialize.
2292static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
2293 WidthAndSignedness Op1Info,
2294 WidthAndSignedness Op2Info,
2295 WidthAndSignedness ResultInfo) {
2296 return BuiltinID == Builtin::BI__builtin_mul_overflow &&
2297 std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
2298 Op1Info.Signed != Op2Info.Signed;
2299}
2300
2301/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
2302/// the generic checked-binop irgen.
2303static RValue
2305 WidthAndSignedness Op1Info, const clang::Expr *Op2,
2306 WidthAndSignedness Op2Info,
2307 const clang::Expr *ResultArg, QualType ResultQTy,
2308 WidthAndSignedness ResultInfo) {
2309 assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
2310 Op2Info, ResultInfo) &&
2311 "Not a mixed-sign multipliction we can specialize");
2312
2313 // Emit the signed and unsigned operands.
2314 const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
2315 const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
2316 llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
2317 llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
2318 unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
2319 unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
2320
2321 // One of the operands may be smaller than the other. If so, [s|z]ext it.
2322 if (SignedOpWidth < UnsignedOpWidth)
2323 Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
2324 if (UnsignedOpWidth < SignedOpWidth)
2325 Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
2326
2327 llvm::Type *OpTy = Signed->getType();
2328 llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
2329 Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
2330 llvm::Type *ResTy = ResultPtr.getElementType();
2331 unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
2332
2333 // Take the absolute value of the signed operand.
2334 llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
2335 llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
2336 llvm::Value *AbsSigned =
2337 CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
2338
2339 // Perform a checked unsigned multiplication.
2340 llvm::Value *UnsignedOverflow;
2341 llvm::Value *UnsignedResult =
2342 EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
2343 Unsigned, UnsignedOverflow);
2344
2345 llvm::Value *Overflow, *Result;
2346 if (ResultInfo.Signed) {
2347 // Signed overflow occurs if the result is greater than INT_MAX or lesser
2348 // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
2349 auto IntMax =
2350 llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);
2351 llvm::Value *MaxResult =
2352 CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
2353 CGF.Builder.CreateZExt(IsNegative, OpTy));
2354 llvm::Value *SignedOverflow =
2355 CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
2356 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
2357
2358 // Prepare the signed result (possibly by negating it).
2359 llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
2360 llvm::Value *SignedResult =
2361 CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
2362 Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
2363 } else {
2364 // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
2365 llvm::Value *Underflow = CGF.Builder.CreateAnd(
2366 IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
2367 Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
2368 if (ResultInfo.Width < OpWidth) {
2369 auto IntMax =
2370 llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
2371 llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
2372 UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
2373 Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
2374 }
2375
2376 // Negate the product if it would be negative in infinite precision.
2377 Result = CGF.Builder.CreateSelect(
2378 IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
2379
2380 Result = CGF.Builder.CreateTrunc(Result, ResTy);
2381 }
2382 assert(Overflow && Result && "Missing overflow or result");
2383
2384 bool isVolatile =
2385 ResultArg->getType()->getPointeeType().isVolatileQualified();
2386 CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
2387 isVolatile);
2388 return RValue::get(Overflow);
2389}
2390
2391static bool
2393 llvm::SmallPtrSetImpl<const Decl *> &Seen) {
2394 if (const auto *Arr = Ctx.getAsArrayType(Ty))
2395 Ty = Ctx.getBaseElementType(Arr);
2396
2397 const auto *Record = Ty->getAsCXXRecordDecl();
2398 if (!Record)
2399 return false;
2400
2401 // We've already checked this type, or are in the process of checking it.
2402 if (!Seen.insert(Record).second)
2403 return false;
2404
2405 assert(Record->hasDefinition() &&
2406 "Incomplete types should already be diagnosed");
2407
2408 if (Record->isDynamicClass())
2409 return true;
2410
2411 for (FieldDecl *F : Record->fields()) {
2412 if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
2413 return true;
2414 }
2415 return false;
2416}
2417
2418/// Determine if the specified type requires laundering by checking if it is a
2419/// dynamic class type or contains a subobject which is a dynamic class type.
2421 if (!CGM.getCodeGenOpts().StrictVTablePointers)
2422 return false;
2424 return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
2425}
2426
2427RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
2428 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
2429 llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
2430
2431 // The builtin's shift arg may have a different type than the source arg and
2432 // result, but the LLVM intrinsic uses the same type for all values.
2433 llvm::Type *Ty = Src->getType();
2434 ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
2435
2436 // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
2437 unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2438 Function *F = CGM.getIntrinsic(IID, Ty);
2439 return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
2440}
2441
2442// Map math builtins for long-double to f128 version.
2443static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {
2444 switch (BuiltinID) {
2445#define MUTATE_LDBL(func) \
2446 case Builtin::BI__builtin_##func##l: \
2447 return Builtin::BI__builtin_##func##f128;
2478 MUTATE_LDBL(nans)
2479 MUTATE_LDBL(inf)
2498 MUTATE_LDBL(huge_val)
2508#undef MUTATE_LDBL
2509 default:
2510 return BuiltinID;
2511 }
2512}
2513
2514static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,
2515 Value *V) {
2516 if (CGF.Builder.getIsFPConstrained() &&
2517 CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {
2518 if (Value *Result =
2519 CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))
2520 return Result;
2521 }
2522 return nullptr;
2523}
2524
2526 const FunctionDecl *FD) {
2527 auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";
2528 auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);
2529 auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);
2530
2532 for (auto &&FormalTy : FnTy->params())
2533 Args.push_back(llvm::PoisonValue::get(FormalTy));
2534
2535 return RValue::get(CGF->Builder.CreateCall(UBF, Args));
2536}
2537
2538RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
2539 const CallExpr *E,
2540 ReturnValueSlot ReturnValue) {
2541 const FunctionDecl *FD = GD.getDecl()->getAsFunction();
2542 // See if we can constant fold this builtin. If so, don't emit it at all.
2543 // TODO: Extend this handling to all builtin calls that we can constant-fold.
2546 !Result.hasSideEffects()) {
2547 if (Result.Val.isInt())
2548 return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
2549 Result.Val.getInt()));
2550 if (Result.Val.isFloat())
2551 return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
2552 Result.Val.getFloat()));
2553 }
2554
2555 // If current long-double semantics is IEEE 128-bit, replace math builtins
2556 // of long-double with f128 equivalent.
2557 // TODO: This mutation should also be applied to other targets other than PPC,
2558 // after backend supports IEEE 128-bit style libcalls.
2559 if (getTarget().getTriple().isPPC64() &&
2560 &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())
2561 BuiltinID = mutateLongDoubleBuiltin(BuiltinID);
2562
2563 // If the builtin has been declared explicitly with an assembler label,
2564 // disable the specialized emitting below. Ideally we should communicate the
2565 // rename in IR, or at least avoid generating the intrinsic calls that are
2566 // likely to get lowered to the renamed library functions.
2567 const unsigned BuiltinIDIfNoAsmLabel =
2568 FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;
2569
2570 std::optional<bool> ErrnoOverriden;
2571 // ErrnoOverriden is true if math-errno is overriden via the
2572 // '#pragma float_control(precise, on)'. This pragma disables fast-math,
2573 // which implies math-errno.
2574 if (E->hasStoredFPFeatures()) {
2575 FPOptionsOverride OP = E->getFPFeatures();
2576 if (OP.hasMathErrnoOverride())
2577 ErrnoOverriden = OP.getMathErrnoOverride();
2578 }
2579 // True if 'attribute__((optnone))' is used. This attribute overrides
2580 // fast-math which implies math-errno.
2581 bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
2582
2583 // True if we are compiling at -O2 and errno has been disabled
2584 // using the '#pragma float_control(precise, off)', and
2585 // attribute opt-none hasn't been seen.
2586 bool ErrnoOverridenToFalseWithOpt =
2587 ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&
2588 CGM.getCodeGenOpts().OptimizationLevel != 0;
2589
2590 // There are LLVM math intrinsics/instructions corresponding to math library
2591 // functions except the LLVM op will never set errno while the math library
2592 // might. Also, math builtins have the same semantics as their math library
2593 // twins. Thus, we can transform math library and builtin calls to their
2594 // LLVM counterparts if the call is marked 'const' (known to never set errno).
2595 // In case FP exceptions are enabled, the experimental versions of the
2596 // intrinsics model those.
2597 bool ConstAlways =
2598 getContext().BuiltinInfo.isConst(BuiltinID);
2599
2600 // There's a special case with the fma builtins where they are always const
2601 // if the target environment is GNU or the target is OS is Windows and we're
2602 // targeting the MSVCRT.dll environment.
2603 // FIXME: This list can be become outdated. Need to find a way to get it some
2604 // other way.
2605 switch (BuiltinID) {
2606 case Builtin::BI__builtin_fma:
2607 case Builtin::BI__builtin_fmaf:
2608 case Builtin::BI__builtin_fmal:
2609 case Builtin::BI__builtin_fmaf16:
2610 case Builtin::BIfma:
2611 case Builtin::BIfmaf:
2612 case Builtin::BIfmal: {
2613 auto &Trip = CGM.getTriple();
2614 if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())
2615 ConstAlways = true;
2616 break;
2617 }
2618 default:
2619 break;
2620 }
2621
2622 bool ConstWithoutErrnoAndExceptions =
2624 bool ConstWithoutExceptions =
2626
2627 // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is
2628 // disabled.
2629 // Math intrinsics are generated only when math-errno is disabled. Any pragmas
2630 // or attributes that affect math-errno should prevent or allow math
2631 // intrincs to be generated. Intrinsics are generated:
2632 // 1- In fast math mode, unless math-errno is overriden
2633 // via '#pragma float_control(precise, on)', or via an
2634 // 'attribute__((optnone))'.
2635 // 2- If math-errno was enabled on command line but overriden
2636 // to false via '#pragma float_control(precise, off))' and
2637 // 'attribute__((optnone))' hasn't been used.
2638 // 3- If we are compiling with optimization and errno has been disabled
2639 // via '#pragma float_control(precise, off)', and
2640 // 'attribute__((optnone))' hasn't been used.
2641
2642 bool ConstWithoutErrnoOrExceptions =
2643 ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;
2644 bool GenerateIntrinsics =
2645 (ConstAlways && !OptNone) ||
2646 (!getLangOpts().MathErrno &&
2647 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2648 if (!GenerateIntrinsics) {
2649 GenerateIntrinsics =
2650 ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;
2651 if (!GenerateIntrinsics)
2652 GenerateIntrinsics =
2653 ConstWithoutErrnoOrExceptions &&
2654 (!getLangOpts().MathErrno &&
2655 !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);
2656 if (!GenerateIntrinsics)
2657 GenerateIntrinsics =
2658 ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
2659 }
2660 if (GenerateIntrinsics) {
2661 switch (BuiltinIDIfNoAsmLabel) {
2662 case Builtin::BIacos:
2663 case Builtin::BIacosf:
2664 case Builtin::BIacosl:
2665 case Builtin::BI__builtin_acos:
2666 case Builtin::BI__builtin_acosf:
2667 case Builtin::BI__builtin_acosf16:
2668 case Builtin::BI__builtin_acosl:
2669 case Builtin::BI__builtin_acosf128:
2671 *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));
2672
2673 case Builtin::BIasin:
2674 case Builtin::BIasinf:
2675 case Builtin::BIasinl:
2676 case Builtin::BI__builtin_asin:
2677 case Builtin::BI__builtin_asinf:
2678 case Builtin::BI__builtin_asinf16:
2679 case Builtin::BI__builtin_asinl:
2680 case Builtin::BI__builtin_asinf128:
2682 *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));
2683
2684 case Builtin::BIatan:
2685 case Builtin::BIatanf:
2686 case Builtin::BIatanl:
2687 case Builtin::BI__builtin_atan:
2688 case Builtin::BI__builtin_atanf:
2689 case Builtin::BI__builtin_atanf16:
2690 case Builtin::BI__builtin_atanl:
2691 case Builtin::BI__builtin_atanf128:
2693 *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));
2694
2695 case Builtin::BIceil:
2696 case Builtin::BIceilf:
2697 case Builtin::BIceill:
2698 case Builtin::BI__builtin_ceil:
2699 case Builtin::BI__builtin_ceilf:
2700 case Builtin::BI__builtin_ceilf16:
2701 case Builtin::BI__builtin_ceill:
2702 case Builtin::BI__builtin_ceilf128:
2704 Intrinsic::ceil,
2705 Intrinsic::experimental_constrained_ceil));
2706
2707 case Builtin::BIcopysign:
2708 case Builtin::BIcopysignf:
2709 case Builtin::BIcopysignl:
2710 case Builtin::BI__builtin_copysign:
2711 case Builtin::BI__builtin_copysignf:
2712 case Builtin::BI__builtin_copysignf16:
2713 case Builtin::BI__builtin_copysignl:
2714 case Builtin::BI__builtin_copysignf128:
2715 return RValue::get(
2716 emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));
2717
2718 case Builtin::BIcos:
2719 case Builtin::BIcosf:
2720 case Builtin::BIcosl:
2721 case Builtin::BI__builtin_cos:
2722 case Builtin::BI__builtin_cosf:
2723 case Builtin::BI__builtin_cosf16:
2724 case Builtin::BI__builtin_cosl:
2725 case Builtin::BI__builtin_cosf128:
2727 Intrinsic::cos,
2728 Intrinsic::experimental_constrained_cos));
2729
2730 case Builtin::BIcosh:
2731 case Builtin::BIcoshf:
2732 case Builtin::BIcoshl:
2733 case Builtin::BI__builtin_cosh:
2734 case Builtin::BI__builtin_coshf:
2735 case Builtin::BI__builtin_coshf16:
2736 case Builtin::BI__builtin_coshl:
2737 case Builtin::BI__builtin_coshf128:
2739 *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));
2740
2741 case Builtin::BIexp:
2742 case Builtin::BIexpf:
2743 case Builtin::BIexpl:
2744 case Builtin::BI__builtin_exp:
2745 case Builtin::BI__builtin_expf:
2746 case Builtin::BI__builtin_expf16:
2747 case Builtin::BI__builtin_expl:
2748 case Builtin::BI__builtin_expf128:
2750 Intrinsic::exp,
2751 Intrinsic::experimental_constrained_exp));
2752
2753 case Builtin::BIexp2:
2754 case Builtin::BIexp2f:
2755 case Builtin::BIexp2l:
2756 case Builtin::BI__builtin_exp2:
2757 case Builtin::BI__builtin_exp2f:
2758 case Builtin::BI__builtin_exp2f16:
2759 case Builtin::BI__builtin_exp2l:
2760 case Builtin::BI__builtin_exp2f128:
2762 Intrinsic::exp2,
2763 Intrinsic::experimental_constrained_exp2));
2764 case Builtin::BI__builtin_exp10:
2765 case Builtin::BI__builtin_exp10f:
2766 case Builtin::BI__builtin_exp10f16:
2767 case Builtin::BI__builtin_exp10l:
2768 case Builtin::BI__builtin_exp10f128: {
2769 // TODO: strictfp support
2770 if (Builder.getIsFPConstrained())
2771 break;
2772 return RValue::get(
2773 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));
2774 }
2775 case Builtin::BIfabs:
2776 case Builtin::BIfabsf:
2777 case Builtin::BIfabsl:
2778 case Builtin::BI__builtin_fabs:
2779 case Builtin::BI__builtin_fabsf:
2780 case Builtin::BI__builtin_fabsf16:
2781 case Builtin::BI__builtin_fabsl:
2782 case Builtin::BI__builtin_fabsf128:
2783 return RValue::get(
2784 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));
2785
2786 case Builtin::BIfloor:
2787 case Builtin::BIfloorf:
2788 case Builtin::BIfloorl:
2789 case Builtin::BI__builtin_floor:
2790 case Builtin::BI__builtin_floorf:
2791 case Builtin::BI__builtin_floorf16:
2792 case Builtin::BI__builtin_floorl:
2793 case Builtin::BI__builtin_floorf128:
2795 Intrinsic::floor,
2796 Intrinsic::experimental_constrained_floor));
2797
2798 case Builtin::BIfma:
2799 case Builtin::BIfmaf:
2800 case Builtin::BIfmal:
2801 case Builtin::BI__builtin_fma:
2802 case Builtin::BI__builtin_fmaf:
2803 case Builtin::BI__builtin_fmaf16:
2804 case Builtin::BI__builtin_fmal:
2805 case Builtin::BI__builtin_fmaf128:
2807 Intrinsic::fma,
2808 Intrinsic::experimental_constrained_fma));
2809
2810 case Builtin::BIfmax:
2811 case Builtin::BIfmaxf:
2812 case Builtin::BIfmaxl:
2813 case Builtin::BI__builtin_fmax:
2814 case Builtin::BI__builtin_fmaxf:
2815 case Builtin::BI__builtin_fmaxf16:
2816 case Builtin::BI__builtin_fmaxl:
2817 case Builtin::BI__builtin_fmaxf128:
2819 Intrinsic::maxnum,
2820 Intrinsic::experimental_constrained_maxnum));
2821
2822 case Builtin::BIfmin:
2823 case Builtin::BIfminf:
2824 case Builtin::BIfminl:
2825 case Builtin::BI__builtin_fmin:
2826 case Builtin::BI__builtin_fminf:
2827 case Builtin::BI__builtin_fminf16:
2828 case Builtin::BI__builtin_fminl:
2829 case Builtin::BI__builtin_fminf128:
2831 Intrinsic::minnum,
2832 Intrinsic::experimental_constrained_minnum));
2833
2834 // fmod() is a special-case. It maps to the frem instruction rather than an
2835 // LLVM intrinsic.
2836 case Builtin::BIfmod:
2837 case Builtin::BIfmodf:
2838 case Builtin::BIfmodl:
2839 case Builtin::BI__builtin_fmod:
2840 case Builtin::BI__builtin_fmodf:
2841 case Builtin::BI__builtin_fmodf16:
2842 case Builtin::BI__builtin_fmodl:
2843 case Builtin::BI__builtin_fmodf128: {
2844 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
2845 Value *Arg1 = EmitScalarExpr(E->getArg(0));
2846 Value *Arg2 = EmitScalarExpr(E->getArg(1));
2847 return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
2848 }
2849
2850 case Builtin::BIlog:
2851 case Builtin::BIlogf:
2852 case Builtin::BIlogl:
2853 case Builtin::BI__builtin_log:
2854 case Builtin::BI__builtin_logf:
2855 case Builtin::BI__builtin_logf16:
2856 case Builtin::BI__builtin_logl:
2857 case Builtin::BI__builtin_logf128:
2859 Intrinsic::log,
2860 Intrinsic::experimental_constrained_log));
2861
2862 case Builtin::BIlog10:
2863 case Builtin::BIlog10f:
2864 case Builtin::BIlog10l:
2865 case Builtin::BI__builtin_log10:
2866 case Builtin::BI__builtin_log10f:
2867 case Builtin::BI__builtin_log10f16:
2868 case Builtin::BI__builtin_log10l:
2869 case Builtin::BI__builtin_log10f128:
2871 Intrinsic::log10,
2872 Intrinsic::experimental_constrained_log10));
2873
2874 case Builtin::BIlog2:
2875 case Builtin::BIlog2f:
2876 case Builtin::BIlog2l:
2877 case Builtin::BI__builtin_log2:
2878 case Builtin::BI__builtin_log2f:
2879 case Builtin::BI__builtin_log2f16:
2880 case Builtin::BI__builtin_log2l:
2881 case Builtin::BI__builtin_log2f128:
2883 Intrinsic::log2,
2884 Intrinsic::experimental_constrained_log2));
2885
2886 case Builtin::BInearbyint:
2887 case Builtin::BInearbyintf:
2888 case Builtin::BInearbyintl:
2889 case Builtin::BI__builtin_nearbyint:
2890 case Builtin::BI__builtin_nearbyintf:
2891 case Builtin::BI__builtin_nearbyintl:
2892 case Builtin::BI__builtin_nearbyintf128:
2894 Intrinsic::nearbyint,
2895 Intrinsic::experimental_constrained_nearbyint));
2896
2897 case Builtin::BIpow:
2898 case Builtin::BIpowf:
2899 case Builtin::BIpowl:
2900 case Builtin::BI__builtin_pow:
2901 case Builtin::BI__builtin_powf:
2902 case Builtin::BI__builtin_powf16:
2903 case Builtin::BI__builtin_powl:
2904 case Builtin::BI__builtin_powf128:
2906 Intrinsic::pow,
2907 Intrinsic::experimental_constrained_pow));
2908
2909 case Builtin::BIrint:
2910 case Builtin::BIrintf:
2911 case Builtin::BIrintl:
2912 case Builtin::BI__builtin_rint:
2913 case Builtin::BI__builtin_rintf:
2914 case Builtin::BI__builtin_rintf16:
2915 case Builtin::BI__builtin_rintl:
2916 case Builtin::BI__builtin_rintf128:
2918 Intrinsic::rint,
2919 Intrinsic::experimental_constrained_rint));
2920
2921 case Builtin::BIround:
2922 case Builtin::BIroundf:
2923 case Builtin::BIroundl:
2924 case Builtin::BI__builtin_round:
2925 case Builtin::BI__builtin_roundf:
2926 case Builtin::BI__builtin_roundf16:
2927 case Builtin::BI__builtin_roundl:
2928 case Builtin::BI__builtin_roundf128:
2930 Intrinsic::round,
2931 Intrinsic::experimental_constrained_round));
2932
2933 case Builtin::BIroundeven:
2934 case Builtin::BIroundevenf:
2935 case Builtin::BIroundevenl:
2936 case Builtin::BI__builtin_roundeven:
2937 case Builtin::BI__builtin_roundevenf:
2938 case Builtin::BI__builtin_roundevenf16:
2939 case Builtin::BI__builtin_roundevenl:
2940 case Builtin::BI__builtin_roundevenf128:
2942 Intrinsic::roundeven,
2943 Intrinsic::experimental_constrained_roundeven));
2944
2945 case Builtin::BIsin:
2946 case Builtin::BIsinf:
2947 case Builtin::BIsinl:
2948 case Builtin::BI__builtin_sin:
2949 case Builtin::BI__builtin_sinf:
2950 case Builtin::BI__builtin_sinf16:
2951 case Builtin::BI__builtin_sinl:
2952 case Builtin::BI__builtin_sinf128:
2954 Intrinsic::sin,
2955 Intrinsic::experimental_constrained_sin));
2956
2957 case Builtin::BIsinh:
2958 case Builtin::BIsinhf:
2959 case Builtin::BIsinhl:
2960 case Builtin::BI__builtin_sinh:
2961 case Builtin::BI__builtin_sinhf:
2962 case Builtin::BI__builtin_sinhf16:
2963 case Builtin::BI__builtin_sinhl:
2964 case Builtin::BI__builtin_sinhf128:
2966 *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));
2967
2968 case Builtin::BIsqrt:
2969 case Builtin::BIsqrtf:
2970 case Builtin::BIsqrtl:
2971 case Builtin::BI__builtin_sqrt:
2972 case Builtin::BI__builtin_sqrtf:
2973 case Builtin::BI__builtin_sqrtf16:
2974 case Builtin::BI__builtin_sqrtl:
2975 case Builtin::BI__builtin_sqrtf128:
2976 case Builtin::BI__builtin_elementwise_sqrt: {
2978 *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
2980 return RValue::get(Call);
2981 }
2982
2983 case Builtin::BItan:
2984 case Builtin::BItanf:
2985 case Builtin::BItanl:
2986 case Builtin::BI__builtin_tan:
2987 case Builtin::BI__builtin_tanf:
2988 case Builtin::BI__builtin_tanf16:
2989 case Builtin::BI__builtin_tanl:
2990 case Builtin::BI__builtin_tanf128:
2992 *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));
2993
2994 case Builtin::BItanh:
2995 case Builtin::BItanhf:
2996 case Builtin::BItanhl:
2997 case Builtin::BI__builtin_tanh:
2998 case Builtin::BI__builtin_tanhf:
2999 case Builtin::BI__builtin_tanhf16:
3000 case Builtin::BI__builtin_tanhl:
3001 case Builtin::BI__builtin_tanhf128:
3003 *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));
3004
3005 case Builtin::BItrunc:
3006 case Builtin::BItruncf:
3007 case Builtin::BItruncl:
3008 case Builtin::BI__builtin_trunc:
3009 case Builtin::BI__builtin_truncf:
3010 case Builtin::BI__builtin_truncf16:
3011 case Builtin::BI__builtin_truncl:
3012 case Builtin::BI__builtin_truncf128:
3014 Intrinsic::trunc,
3015 Intrinsic::experimental_constrained_trunc));
3016
3017 case Builtin::BIlround:
3018 case Builtin::BIlroundf:
3019 case Builtin::BIlroundl:
3020 case Builtin::BI__builtin_lround:
3021 case Builtin::BI__builtin_lroundf:
3022 case Builtin::BI__builtin_lroundl:
3023 case Builtin::BI__builtin_lroundf128:
3025 *this, E, Intrinsic::lround,
3026 Intrinsic::experimental_constrained_lround));
3027
3028 case Builtin::BIllround:
3029 case Builtin::BIllroundf:
3030 case Builtin::BIllroundl:
3031 case Builtin::BI__builtin_llround:
3032 case Builtin::BI__builtin_llroundf:
3033 case Builtin::BI__builtin_llroundl:
3034 case Builtin::BI__builtin_llroundf128:
3036 *this, E, Intrinsic::llround,
3037 Intrinsic::experimental_constrained_llround));
3038
3039 case Builtin::BIlrint:
3040 case Builtin::BIlrintf:
3041 case Builtin::BIlrintl:
3042 case Builtin::BI__builtin_lrint:
3043 case Builtin::BI__builtin_lrintf:
3044 case Builtin::BI__builtin_lrintl:
3045 case Builtin::BI__builtin_lrintf128:
3047 *this, E, Intrinsic::lrint,
3048 Intrinsic::experimental_constrained_lrint));
3049
3050 case Builtin::BIllrint:
3051 case Builtin::BIllrintf:
3052 case Builtin::BIllrintl:
3053 case Builtin::BI__builtin_llrint:
3054 case Builtin::BI__builtin_llrintf:
3055 case Builtin::BI__builtin_llrintl:
3056 case Builtin::BI__builtin_llrintf128:
3058 *this, E, Intrinsic::llrint,
3059 Intrinsic::experimental_constrained_llrint));
3060 case Builtin::BI__builtin_ldexp:
3061 case Builtin::BI__builtin_ldexpf:
3062 case Builtin::BI__builtin_ldexpl:
3063 case Builtin::BI__builtin_ldexpf16:
3064 case Builtin::BI__builtin_ldexpf128: {
3066 *this, E, Intrinsic::ldexp,
3067 Intrinsic::experimental_constrained_ldexp));
3068 }
3069 default:
3070 break;
3071 }
3072 }
3073
3074 // Check NonnullAttribute/NullabilityArg and Alignment.
3075 auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,
3076 unsigned ParmNum) {
3077 Value *Val = A.emitRawPointer(*this);
3078 EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,
3079 ParmNum);
3080
3081 if (SanOpts.has(SanitizerKind::Alignment)) {
3082 SanitizerSet SkippedChecks;
3083 SkippedChecks.set(SanitizerKind::All);
3084 SkippedChecks.clear(SanitizerKind::Alignment);
3085 SourceLocation Loc = Arg->getExprLoc();
3086 // Strip an implicit cast.
3087 if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))
3088 if (CE->getCastKind() == CK_BitCast)
3089 Arg = CE->getSubExpr();
3090 EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),
3091 SkippedChecks);
3092 }
3093 };
3094
3095 switch (BuiltinIDIfNoAsmLabel) {
3096 default: break;
3097 case Builtin::BI__builtin___CFStringMakeConstantString:
3098 case Builtin::BI__builtin___NSStringMakeConstantString:
3099 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
3100 case Builtin::BI__builtin_stdarg_start:
3101 case Builtin::BI__builtin_va_start:
3102 case Builtin::BI__va_start:
3103 case Builtin::BI__builtin_va_end:
3104 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
3105 ? EmitScalarExpr(E->getArg(0))
3106 : EmitVAListRef(E->getArg(0)).emitRawPointer(*this),
3107 BuiltinID != Builtin::BI__builtin_va_end);
3108 return RValue::get(nullptr);
3109 case Builtin::BI__builtin_va_copy: {
3110 Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);
3111 Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);
3112 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),
3113 {DstPtr, SrcPtr});
3114 return RValue::get(nullptr);
3115 }
3116 case Builtin::BIabs:
3117 case Builtin::BIlabs:
3118 case Builtin::BIllabs:
3119 case Builtin::BI__builtin_abs:
3120 case Builtin::BI__builtin_labs:
3121 case Builtin::BI__builtin_llabs: {
3122 bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);
3123
3124 Value *Result;
3125 switch (getLangOpts().getSignedOverflowBehavior()) {
3127 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);
3128 break;
3130 if (!SanitizeOverflow) {
3131 Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);
3132 break;
3133 }
3134 [[fallthrough]];
3136 // TODO: Somehow handle the corner case when the address of abs is taken.
3137 Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);
3138 break;
3139 }
3140 return RValue::get(Result);
3141 }
3142 case Builtin::BI__builtin_complex: {
3143 Value *Real = EmitScalarExpr(E->getArg(0));
3144 Value *Imag = EmitScalarExpr(E->getArg(1));
3145 return RValue::getComplex({Real, Imag});
3146 }
3147 case Builtin::BI__builtin_conj:
3148 case Builtin::BI__builtin_conjf:
3149 case Builtin::BI__builtin_conjl:
3150 case Builtin::BIconj:
3151 case Builtin::BIconjf:
3152 case Builtin::BIconjl: {
3153 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3154 Value *Real = ComplexVal.first;
3155 Value *Imag = ComplexVal.second;
3156 Imag = Builder.CreateFNeg(Imag, "neg");
3157 return RValue::getComplex(std::make_pair(Real, Imag));
3158 }
3159 case Builtin::BI__builtin_creal:
3160 case Builtin::BI__builtin_crealf:
3161 case Builtin::BI__builtin_creall:
3162 case Builtin::BIcreal:
3163 case Builtin::BIcrealf:
3164 case Builtin::BIcreall: {
3165 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3166 return RValue::get(ComplexVal.first);
3167 }
3168
3169 case Builtin::BI__builtin_preserve_access_index: {
3170 // Only enabled preserved access index region when debuginfo
3171 // is available as debuginfo is needed to preserve user-level
3172 // access pattern.
3173 if (!getDebugInfo()) {
3174 CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
3175 return RValue::get(EmitScalarExpr(E->getArg(0)));
3176 }
3177
3178 // Nested builtin_preserve_access_index() not supported
3180 CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
3181 return RValue::get(EmitScalarExpr(E->getArg(0)));
3182 }
3183
3184 IsInPreservedAIRegion = true;
3185 Value *Res = EmitScalarExpr(E->getArg(0));
3186 IsInPreservedAIRegion = false;
3187 return RValue::get(Res);
3188 }
3189
3190 case Builtin::BI__builtin_cimag:
3191 case Builtin::BI__builtin_cimagf:
3192 case Builtin::BI__builtin_cimagl:
3193 case Builtin::BIcimag:
3194 case Builtin::BIcimagf:
3195 case Builtin::BIcimagl: {
3196 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3197 return RValue::get(ComplexVal.second);
3198 }
3199
3200 case Builtin::BI__builtin_clrsb:
3201 case Builtin::BI__builtin_clrsbl:
3202 case Builtin::BI__builtin_clrsbll: {
3203 // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
3204 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3205
3206 llvm::Type *ArgType = ArgValue->getType();
3207 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3208
3209 llvm::Type *ResultType = ConvertType(E->getType());
3210 Value *Zero = llvm::Constant::getNullValue(ArgType);
3211 Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
3212 Value *Inverse = Builder.CreateNot(ArgValue, "not");
3213 Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
3214 Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
3215 Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
3216 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3217 "cast");
3218 return RValue::get(Result);
3219 }
3220 case Builtin::BI__builtin_ctzs:
3221 case Builtin::BI__builtin_ctz:
3222 case Builtin::BI__builtin_ctzl:
3223 case Builtin::BI__builtin_ctzll:
3224 case Builtin::BI__builtin_ctzg: {
3225 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&
3226 E->getNumArgs() > 1;
3227
3228 Value *ArgValue =
3229 HasFallback ? EmitScalarExpr(E->getArg(0))
3231
3232 llvm::Type *ArgType = ArgValue->getType();
3233 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3234
3235 llvm::Type *ResultType = ConvertType(E->getType());
3236 Value *ZeroUndef =
3237 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3238 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3239 if (Result->getType() != ResultType)
3240 Result =
3241 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3242 if (!HasFallback)
3243 return RValue::get(Result);
3244
3245 Value *Zero = Constant::getNullValue(ArgType);
3246 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3247 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3248 Value *ResultOrFallback =
3249 Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");
3250 return RValue::get(ResultOrFallback);
3251 }
3252 case Builtin::BI__builtin_clzs:
3253 case Builtin::BI__builtin_clz:
3254 case Builtin::BI__builtin_clzl:
3255 case Builtin::BI__builtin_clzll:
3256 case Builtin::BI__builtin_clzg: {
3257 bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&
3258 E->getNumArgs() > 1;
3259
3260 Value *ArgValue =
3261 HasFallback ? EmitScalarExpr(E->getArg(0))
3263
3264 llvm::Type *ArgType = ArgValue->getType();
3265 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3266
3267 llvm::Type *ResultType = ConvertType(E->getType());
3268 Value *ZeroUndef =
3269 Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());
3270 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
3271 if (Result->getType() != ResultType)
3272 Result =
3273 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3274 if (!HasFallback)
3275 return RValue::get(Result);
3276
3277 Value *Zero = Constant::getNullValue(ArgType);
3278 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3279 Value *FallbackValue = EmitScalarExpr(E->getArg(1));
3280 Value *ResultOrFallback =
3281 Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");
3282 return RValue::get(ResultOrFallback);
3283 }
3284 case Builtin::BI__builtin_ffs:
3285 case Builtin::BI__builtin_ffsl:
3286 case Builtin::BI__builtin_ffsll: {
3287 // ffs(x) -> x ? cttz(x) + 1 : 0
3288 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3289
3290 llvm::Type *ArgType = ArgValue->getType();
3291 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
3292
3293 llvm::Type *ResultType = ConvertType(E->getType());
3294 Value *Tmp =
3295 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
3296 llvm::ConstantInt::get(ArgType, 1));
3297 Value *Zero = llvm::Constant::getNullValue(ArgType);
3298 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
3299 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
3300 if (Result->getType() != ResultType)
3301 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3302 "cast");
3303 return RValue::get(Result);
3304 }
3305 case Builtin::BI__builtin_parity:
3306 case Builtin::BI__builtin_parityl:
3307 case Builtin::BI__builtin_parityll: {
3308 // parity(x) -> ctpop(x) & 1
3309 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3310
3311 llvm::Type *ArgType = ArgValue->getType();
3312 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3313
3314 llvm::Type *ResultType = ConvertType(E->getType());
3315 Value *Tmp = Builder.CreateCall(F, ArgValue);
3316 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
3317 if (Result->getType() != ResultType)
3318 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3319 "cast");
3320 return RValue::get(Result);
3321 }
3322 case Builtin::BI__lzcnt16:
3323 case Builtin::BI__lzcnt:
3324 case Builtin::BI__lzcnt64: {
3325 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3326
3327 llvm::Type *ArgType = ArgValue->getType();
3328 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
3329
3330 llvm::Type *ResultType = ConvertType(E->getType());
3331 Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
3332 if (Result->getType() != ResultType)
3333 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
3334 "cast");
3335 return RValue::get(Result);
3336 }
3337 case Builtin::BI__popcnt16:
3338 case Builtin::BI__popcnt:
3339 case Builtin::BI__popcnt64:
3340 case Builtin::BI__builtin_popcount:
3341 case Builtin::BI__builtin_popcountl:
3342 case Builtin::BI__builtin_popcountll:
3343 case Builtin::BI__builtin_popcountg: {
3344 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3345
3346 llvm::Type *ArgType = ArgValue->getType();
3347 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
3348
3349 llvm::Type *ResultType = ConvertType(E->getType());
3350 Value *Result = Builder.CreateCall(F, ArgValue);
3351 if (Result->getType() != ResultType)
3352 Result =
3353 Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");
3354 return RValue::get(Result);
3355 }
3356 case Builtin::BI__builtin_unpredictable: {
3357 // Always return the argument of __builtin_unpredictable. LLVM does not
3358 // handle this builtin. Metadata for this builtin should be added directly
3359 // to instructions such as branches or switches that use it.
3360 return RValue::get(EmitScalarExpr(E->getArg(0)));
3361 }
3362 case Builtin::BI__builtin_expect: {
3363 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3364 llvm::Type *ArgType = ArgValue->getType();
3365
3366 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3367 // Don't generate llvm.expect on -O0 as the backend won't use it for
3368 // anything.
3369 // Note, we still IRGen ExpectedValue because it could have side-effects.
3370 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3371 return RValue::get(ArgValue);
3372
3373 Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
3374 Value *Result =
3375 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
3376 return RValue::get(Result);
3377 }
3378 case Builtin::BI__builtin_expect_with_probability: {
3379 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3380 llvm::Type *ArgType = ArgValue->getType();
3381
3382 Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
3383 llvm::APFloat Probability(0.0);
3384 const Expr *ProbArg = E->getArg(2);
3385 bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());
3386 assert(EvalSucceed && "probability should be able to evaluate as float");
3387 (void)EvalSucceed;
3388 bool LoseInfo = false;
3389 Probability.convert(llvm::APFloat::IEEEdouble(),
3390 llvm::RoundingMode::Dynamic, &LoseInfo);
3391 llvm::Type *Ty = ConvertType(ProbArg->getType());
3392 Constant *Confidence = ConstantFP::get(Ty, Probability);
3393 // Don't generate llvm.expect.with.probability on -O0 as the backend
3394 // won't use it for anything.
3395 // Note, we still IRGen ExpectedValue because it could have side-effects.
3396 if (CGM.getCodeGenOpts().OptimizationLevel == 0)
3397 return RValue::get(ArgValue);
3398
3399 Function *FnExpect =
3400 CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);
3401 Value *Result = Builder.CreateCall(
3402 FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");
3403 return RValue::get(Result);
3404 }
3405 case Builtin::BI__builtin_assume_aligned: {
3406 const Expr *Ptr = E->getArg(0);
3407 Value *PtrValue = EmitScalarExpr(Ptr);
3408 Value *OffsetValue =
3409 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
3410
3411 Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
3412 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
3413 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
3414 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
3415 llvm::Value::MaximumAlignment);
3416
3417 emitAlignmentAssumption(PtrValue, Ptr,
3418 /*The expr loc is sufficient.*/ SourceLocation(),
3419 AlignmentCI, OffsetValue);
3420 return RValue::get(PtrValue);
3421 }
3422 case Builtin::BI__assume:
3423 case Builtin::BI__builtin_assume: {
3424 if (E->getArg(0)->HasSideEffects(getContext()))
3425 return RValue::get(nullptr);
3426
3427 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3428 Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
3429 Builder.CreateCall(FnAssume, ArgValue);
3430 return RValue::get(nullptr);
3431 }
3432 case Builtin::BI__builtin_assume_separate_storage: {
3433 const Expr *Arg0 = E->getArg(0);
3434 const Expr *Arg1 = E->getArg(1);
3435
3436 Value *Value0 = EmitScalarExpr(Arg0);
3437 Value *Value1 = EmitScalarExpr(Arg1);
3438
3439 Value *Values[] = {Value0, Value1};
3440 OperandBundleDefT<Value *> OBD("separate_storage", Values);
3441 Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});
3442 return RValue::get(nullptr);
3443 }
3444 case Builtin::BI__builtin_allow_runtime_check: {
3445 StringRef Kind =
3446 cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();
3447 LLVMContext &Ctx = CGM.getLLVMContext();
3448 llvm::Value *Allow = Builder.CreateCall(
3449 CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),
3450 llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));
3451 return RValue::get(Allow);
3452 }
3453 case Builtin::BI__arithmetic_fence: {
3454 // Create the builtin call if FastMath is selected, and the target
3455 // supports the builtin, otherwise just return the argument.
3456 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3457 llvm::FastMathFlags FMF = Builder.getFastMathFlags();
3458 bool isArithmeticFenceEnabled =
3459 FMF.allowReassoc() &&
3461 QualType ArgType = E->getArg(0)->getType();
3462 if (ArgType->isComplexType()) {
3463 if (isArithmeticFenceEnabled) {
3464 QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();
3465 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3466 Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,
3467 ConvertType(ElementType));
3468 Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,
3469 ConvertType(ElementType));
3470 return RValue::getComplex(std::make_pair(Real, Imag));
3471 }
3472 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
3473 Value *Real = ComplexVal.first;
3474 Value *Imag = ComplexVal.second;
3475 return RValue::getComplex(std::make_pair(Real, Imag));
3476 }
3477 Value *ArgValue = EmitScalarExpr(E->getArg(0));
3478 if (isArithmeticFenceEnabled)
3479 return RValue::get(
3480 Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));
3481 return RValue::get(ArgValue);
3482 }
3483 case Builtin::BI__builtin_bswap16:
3484 case Builtin::BI__builtin_bswap32:
3485 case Builtin::BI__builtin_bswap64:
3486 case Builtin::BI_byteswap_ushort:
3487 case Builtin::BI_byteswap_ulong:
3488 case Builtin::BI_byteswap_uint64: {
3489 return RValue::get(
3490 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));
3491 }
3492 case Builtin::BI__builtin_bitreverse8:
3493 case Builtin::BI__builtin_bitreverse16:
3494 case Builtin::BI__builtin_bitreverse32:
3495 case Builtin::BI__builtin_bitreverse64: {
3496 return RValue::get(
3497 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));
3498 }
3499 case Builtin::BI__builtin_rotateleft8:
3500 case Builtin::BI__builtin_rotateleft16:
3501 case Builtin::BI__builtin_rotateleft32:
3502 case Builtin::BI__builtin_rotateleft64:
3503 case Builtin::BI_rotl8: // Microsoft variants of rotate left
3504 case Builtin::BI_rotl16:
3505 case Builtin::BI_rotl:
3506 case Builtin::BI_lrotl:
3507 case Builtin::BI_rotl64:
3508 return emitRotate(E, false);
3509
3510 case Builtin::BI__builtin_rotateright8:
3511 case Builtin::BI__builtin_rotateright16:
3512 case Builtin::BI__builtin_rotateright32:
3513 case Builtin::BI__builtin_rotateright64:
3514 case Builtin::BI_rotr8: // Microsoft variants of rotate right
3515 case Builtin::BI_rotr16:
3516 case Builtin::BI_rotr:
3517 case Builtin::BI_lrotr:
3518 case Builtin::BI_rotr64:
3519 return emitRotate(E, true);
3520
3521 case Builtin::BI__builtin_constant_p: {
3522 llvm::Type *ResultType = ConvertType(E->getType());
3523
3524 const Expr *Arg = E->getArg(0);
3525 QualType ArgType = Arg->getType();
3526 // FIXME: The allowance for Obj-C pointers and block pointers is historical
3527 // and likely a mistake.
3528 if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
3529 !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
3530 // Per the GCC documentation, only numeric constants are recognized after
3531 // inlining.
3532 return RValue::get(ConstantInt::get(ResultType, 0));
3533
3534 if (Arg->HasSideEffects(getContext()))
3535 // The argument is unevaluated, so be conservative if it might have
3536 // side-effects.
3537 return RValue::get(ConstantInt::get(ResultType, 0));
3538
3539 Value *ArgValue = EmitScalarExpr(Arg);
3540 if (ArgType->isObjCObjectPointerType()) {
3541 // Convert Objective-C objects to id because we cannot distinguish between
3542 // LLVM types for Obj-C classes as they are opaque.
3543 ArgType = CGM.getContext().getObjCIdType();
3544 ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
3545 }
3546 Function *F =
3547 CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
3548 Value *Result = Builder.CreateCall(F, ArgValue);
3549 if (Result->getType() != ResultType)
3550 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
3551 return RValue::get(Result);
3552 }
3553 case Builtin::BI__builtin_dynamic_object_size:
3554 case Builtin::BI__builtin_object_size: {
3555 unsigned Type =
3556 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
3557 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
3558
3559 // We pass this builtin onto the optimizer so that it can figure out the
3560 // object size in more complex cases.
3561 bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
3562 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
3563 /*EmittedE=*/nullptr, IsDynamic));
3564 }
3565 case Builtin::BI__builtin_prefetch: {
3566 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
3567 // FIXME: Technically these constants should of type 'int', yes?
3568 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
3569 llvm::ConstantInt::get(Int32Ty, 0);
3570 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
3571 llvm::ConstantInt::get(Int32Ty, 3);
3572 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
3573 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
3574 Builder.CreateCall(F, {Address, RW, Locality, Data});
3575 return RValue::get(nullptr);
3576 }
3577 case Builtin::BI__builtin_readcyclecounter: {
3578 Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
3579 return RValue::get(Builder.CreateCall(F));
3580 }
3581 case Builtin::BI__builtin_readsteadycounter: {
3582 Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3583 return RValue::get(Builder.CreateCall(F));
3584 }
3585 case Builtin::BI__builtin___clear_cache: {
3586 Value *Begin = EmitScalarExpr(E->getArg(0));
3587 Value *End = EmitScalarExpr(E->getArg(1));
3588 Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
3589 return RValue::get(Builder.CreateCall(F, {Begin, End}));
3590 }
3591 case Builtin::BI__builtin_trap:
3592 EmitTrapCall(Intrinsic::trap);
3593 return RValue::get(nullptr);
3594 case Builtin::BI__builtin_verbose_trap: {
3595 llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
3596 if (getDebugInfo()) {
3597 TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
3598 TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
3599 *E->getArg(1)->tryEvaluateString(getContext()));
3600 }
3601 ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
3602 // Currently no attempt is made to prevent traps from being merged.
3603 EmitTrapCall(Intrinsic::trap);
3604 return RValue::get(nullptr);
3605 }
3606 case Builtin::BI__debugbreak:
3607 EmitTrapCall(Intrinsic::debugtrap);
3608 return RValue::get(nullptr);
3609 case Builtin::BI__builtin_unreachable: {
3611
3612 // We do need to preserve an insertion point.
3613 EmitBlock(createBasicBlock("unreachable.cont"));
3614
3615 return RValue::get(nullptr);
3616 }
3617
3618 case Builtin::BI__builtin_powi:
3619 case Builtin::BI__builtin_powif:
3620 case Builtin::BI__builtin_powil: {
3621 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
3622 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
3623
3624 if (Builder.getIsFPConstrained()) {
3625 // FIXME: llvm.powi has 2 mangling types,
3626 // llvm.experimental.constrained.powi has one.
3627 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3628 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,
3629 Src0->getType());
3630 return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));
3631 }
3632
3633 Function *F = CGM.getIntrinsic(Intrinsic::powi,
3634 { Src0->getType(), Src1->getType() });
3635 return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));
3636 }
3637 case Builtin::BI__builtin_frexpl: {
3638 // Linux PPC will not be adding additional PPCDoubleDouble support.
3639 // WIP to switch default to IEEE long double. Will emit libcall for
3640 // frexpl instead of legalizing this type in the BE.
3641 if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())
3642 break;
3643 [[fallthrough]];
3644 }
3645 case Builtin::BI__builtin_frexp:
3646 case Builtin::BI__builtin_frexpf:
3647 case Builtin::BI__builtin_frexpf128:
3648 case Builtin::BI__builtin_frexpf16:
3649 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
3650 case Builtin::BI__builtin_isgreater:
3651 case Builtin::BI__builtin_isgreaterequal:
3652 case Builtin::BI__builtin_isless:
3653 case Builtin::BI__builtin_islessequal:
3654 case Builtin::BI__builtin_islessgreater:
3655 case Builtin::BI__builtin_isunordered: {
3656 // Ordered comparisons: we know the arguments to these are matching scalar
3657 // floating point values.
3658 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3659 Value *LHS = EmitScalarExpr(E->getArg(0));
3660 Value *RHS = EmitScalarExpr(E->getArg(1));
3661
3662 switch (BuiltinID) {
3663 default: llvm_unreachable("Unknown ordered comparison");
3664 case Builtin::BI__builtin_isgreater:
3665 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
3666 break;
3667 case Builtin::BI__builtin_isgreaterequal:
3668 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
3669 break;
3670 case Builtin::BI__builtin_isless:
3671 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
3672 break;
3673 case Builtin::BI__builtin_islessequal:
3674 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
3675 break;
3676 case Builtin::BI__builtin_islessgreater:
3677 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
3678 break;
3679 case Builtin::BI__builtin_isunordered:
3680 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
3681 break;
3682 }
3683 // ZExt bool to int type.
3684 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
3685 }
3686
3687 case Builtin::BI__builtin_isnan: {
3688 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3689 Value *V = EmitScalarExpr(E->getArg(0));
3690 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3691 return RValue::get(Result);
3692 return RValue::get(
3693 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),
3694 ConvertType(E->getType())));
3695 }
3696
3697 case Builtin::BI__builtin_issignaling: {
3698 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3699 Value *V = EmitScalarExpr(E->getArg(0));
3700 return RValue::get(
3701 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),
3702 ConvertType(E->getType())));
3703 }
3704
3705 case Builtin::BI__builtin_isinf: {
3706 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3707 Value *V = EmitScalarExpr(E->getArg(0));
3708 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3709 return RValue::get(Result);
3710 return RValue::get(
3711 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),
3712 ConvertType(E->getType())));
3713 }
3714
3715 case Builtin::BIfinite:
3716 case Builtin::BI__finite:
3717 case Builtin::BIfinitef:
3718 case Builtin::BI__finitef:
3719 case Builtin::BIfinitel:
3720 case Builtin::BI__finitel:
3721 case Builtin::BI__builtin_isfinite: {
3722 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3723 Value *V = EmitScalarExpr(E->getArg(0));
3724 if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))
3725 return RValue::get(Result);
3726 return RValue::get(
3727 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),
3728 ConvertType(E->getType())));
3729 }
3730
3731 case Builtin::BI__builtin_isnormal: {
3732 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3733 Value *V = EmitScalarExpr(E->getArg(0));
3734 return RValue::get(
3735 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),
3736 ConvertType(E->getType())));
3737 }
3738
3739 case Builtin::BI__builtin_issubnormal: {
3740 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3741 Value *V = EmitScalarExpr(E->getArg(0));
3742 return RValue::get(
3743 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),
3744 ConvertType(E->getType())));
3745 }
3746
3747 case Builtin::BI__builtin_iszero: {
3748 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3749 Value *V = EmitScalarExpr(E->getArg(0));
3750 return RValue::get(
3751 Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),
3752 ConvertType(E->getType())));
3753 }
3754
3755 case Builtin::BI__builtin_isfpclass: {
3757 if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))
3758 break;
3759 uint64_t Test = Result.Val.getInt().getLimitedValue();
3760 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
3761 Value *V = EmitScalarExpr(E->getArg(0));
3762 return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),
3763 ConvertType(E->getType())));
3764 }
3765
3766 case Builtin::BI__builtin_nondeterministic_value: {
3767 llvm::Type *Ty = ConvertType(E->getArg(0)->getType());
3768
3769 Value *Result = PoisonValue::get(Ty);
3770 Result = Builder.CreateFreeze(Result);
3771
3772 return RValue::get(Result);
3773 }
3774
3775 case Builtin::BI__builtin_elementwise_abs: {
3776 Value *Result;
3777 QualType QT = E->getArg(0)->getType();
3778
3779 if (auto *VecTy = QT->getAs<VectorType>())
3780 QT = VecTy->getElementType();
3781 if (QT->isIntegerType())
3782 Result = Builder.CreateBinaryIntrinsic(
3783 llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),
3784 Builder.getFalse(), nullptr, "elt.abs");
3785 else
3786 Result = emitBuiltinWithOneOverloadedType<1>(
3787 *this, E, llvm::Intrinsic::fabs, "elt.abs");
3788
3789 return RValue::get(Result);
3790 }
3791 case Builtin::BI__builtin_elementwise_acos:
3792 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3793 *this, E, llvm::Intrinsic::acos, "elt.acos"));
3794 case Builtin::BI__builtin_elementwise_asin:
3795 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3796 *this, E, llvm::Intrinsic::asin, "elt.asin"));
3797 case Builtin::BI__builtin_elementwise_atan:
3798 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3799 *this, E, llvm::Intrinsic::atan, "elt.atan"));
3800 case Builtin::BI__builtin_elementwise_ceil:
3801 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3802 *this, E, llvm::Intrinsic::ceil, "elt.ceil"));
3803 case Builtin::BI__builtin_elementwise_exp:
3804 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3805 *this, E, llvm::Intrinsic::exp, "elt.exp"));
3806 case Builtin::BI__builtin_elementwise_exp2:
3807 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3808 *this, E, llvm::Intrinsic::exp2, "elt.exp2"));
3809 case Builtin::BI__builtin_elementwise_log:
3810 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3811 *this, E, llvm::Intrinsic::log, "elt.log"));
3812 case Builtin::BI__builtin_elementwise_log2:
3813 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3814 *this, E, llvm::Intrinsic::log2, "elt.log2"));
3815 case Builtin::BI__builtin_elementwise_log10:
3816 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3817 *this, E, llvm::Intrinsic::log10, "elt.log10"));
3818 case Builtin::BI__builtin_elementwise_pow: {
3819 return RValue::get(
3820 emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));
3821 }
3822 case Builtin::BI__builtin_elementwise_bitreverse:
3823 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3824 *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));
3825 case Builtin::BI__builtin_elementwise_cos:
3826 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3827 *this, E, llvm::Intrinsic::cos, "elt.cos"));
3828 case Builtin::BI__builtin_elementwise_cosh:
3829 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3830 *this, E, llvm::Intrinsic::cosh, "elt.cosh"));
3831 case Builtin::BI__builtin_elementwise_floor:
3832 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3833 *this, E, llvm::Intrinsic::floor, "elt.floor"));
3834 case Builtin::BI__builtin_elementwise_roundeven:
3835 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3836 *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
3837 case Builtin::BI__builtin_elementwise_round:
3838 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3839 *this, E, llvm::Intrinsic::round, "elt.round"));
3840 case Builtin::BI__builtin_elementwise_rint:
3841 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3842 *this, E, llvm::Intrinsic::rint, "elt.rint"));
3843 case Builtin::BI__builtin_elementwise_nearbyint:
3844 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3845 *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));
3846 case Builtin::BI__builtin_elementwise_sin:
3847 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3848 *this, E, llvm::Intrinsic::sin, "elt.sin"));
3849 case Builtin::BI__builtin_elementwise_sinh:
3850 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3851 *this, E, llvm::Intrinsic::sinh, "elt.sinh"));
3852 case Builtin::BI__builtin_elementwise_tan:
3853 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3854 *this, E, llvm::Intrinsic::tan, "elt.tan"));
3855 case Builtin::BI__builtin_elementwise_tanh:
3856 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3857 *this, E, llvm::Intrinsic::tanh, "elt.tanh"));
3858 case Builtin::BI__builtin_elementwise_trunc:
3859 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3860 *this, E, llvm::Intrinsic::trunc, "elt.trunc"));
3861 case Builtin::BI__builtin_elementwise_canonicalize:
3862 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3863 *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));
3864 case Builtin::BI__builtin_elementwise_copysign:
3865 return RValue::get(emitBuiltinWithOneOverloadedType<2>(
3866 *this, E, llvm::Intrinsic::copysign));
3867 case Builtin::BI__builtin_elementwise_fma:
3868 return RValue::get(
3869 emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));
3870 case Builtin::BI__builtin_elementwise_add_sat:
3871 case Builtin::BI__builtin_elementwise_sub_sat: {
3872 Value *Op0 = EmitScalarExpr(E->getArg(0));
3873 Value *Op1 = EmitScalarExpr(E->getArg(1));
3874 Value *Result;
3875 assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");
3876 QualType Ty = E->getArg(0)->getType();
3877 if (auto *VecTy = Ty->getAs<VectorType>())
3878 Ty = VecTy->getElementType();
3879 bool IsSigned = Ty->isSignedIntegerType();
3880 unsigned Opc;
3881 if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)
3882 Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;
3883 else
3884 Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;
3885 Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");
3886 return RValue::get(Result);
3887 }
3888
3889 case Builtin::BI__builtin_elementwise_max: {
3890 Value *Op0 = EmitScalarExpr(E->getArg(0));
3891 Value *Op1 = EmitScalarExpr(E->getArg(1));
3892 Value *Result;
3893 if (Op0->getType()->isIntOrIntVectorTy()) {
3894 QualType Ty = E->getArg(0)->getType();
3895 if (auto *VecTy = Ty->getAs<VectorType>())
3896 Ty = VecTy->getElementType();
3897 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3898 ? llvm::Intrinsic::smax
3899 : llvm::Intrinsic::umax,
3900 Op0, Op1, nullptr, "elt.max");
3901 } else
3902 Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");
3903 return RValue::get(Result);
3904 }
3905 case Builtin::BI__builtin_elementwise_min: {
3906 Value *Op0 = EmitScalarExpr(E->getArg(0));
3907 Value *Op1 = EmitScalarExpr(E->getArg(1));
3908 Value *Result;
3909 if (Op0->getType()->isIntOrIntVectorTy()) {
3910 QualType Ty = E->getArg(0)->getType();
3911 if (auto *VecTy = Ty->getAs<VectorType>())
3912 Ty = VecTy->getElementType();
3913 Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()
3914 ? llvm::Intrinsic::smin
3915 : llvm::Intrinsic::umin,
3916 Op0, Op1, nullptr, "elt.min");
3917 } else
3918 Result = Builder.CreateMinNum(Op0, Op1, "elt.min");
3919 return RValue::get(Result);
3920 }
3921
3922 case Builtin::BI__builtin_reduce_max: {
3923 auto GetIntrinsicID = [this](QualType QT) {
3924 if (auto *VecTy = QT->getAs<VectorType>())
3925 QT = VecTy->getElementType();
3926 else if (QT->isSizelessVectorType())
3928
3929 if (QT->isSignedIntegerType())
3930 return llvm::Intrinsic::vector_reduce_smax;
3931 if (QT->isUnsignedIntegerType())
3932 return llvm::Intrinsic::vector_reduce_umax;
3933 assert(QT->isFloatingType() && "must have a float here");
3934 return llvm::Intrinsic::vector_reduce_fmax;
3935 };
3936 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3937 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3938 }
3939
3940 case Builtin::BI__builtin_reduce_min: {
3941 auto GetIntrinsicID = [this](QualType QT) {
3942 if (auto *VecTy = QT->getAs<VectorType>())
3943 QT = VecTy->getElementType();
3944 else if (QT->isSizelessVectorType())
3946
3947 if (QT->isSignedIntegerType())
3948 return llvm::Intrinsic::vector_reduce_smin;
3949 if (QT->isUnsignedIntegerType())
3950 return llvm::Intrinsic::vector_reduce_umin;
3951 assert(QT->isFloatingType() && "must have a float here");
3952 return llvm::Intrinsic::vector_reduce_fmin;
3953 };
3954
3955 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3956 *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));
3957 }
3958
3959 case Builtin::BI__builtin_reduce_add:
3960 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3961 *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));
3962 case Builtin::BI__builtin_reduce_mul:
3963 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3964 *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));
3965 case Builtin::BI__builtin_reduce_xor:
3966 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3967 *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
3968 case Builtin::BI__builtin_reduce_or:
3969 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3970 *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
3971 case Builtin::BI__builtin_reduce_and:
3972 return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3973 *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
3974
3975 case Builtin::BI__builtin_matrix_transpose: {
3976 auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
3977 Value *MatValue = EmitScalarExpr(E->getArg(0));
3978 MatrixBuilder MB(Builder);
3979 Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),
3980 MatrixTy->getNumColumns());
3981 return RValue::get(Result);
3982 }
3983
3984 case Builtin::BI__builtin_matrix_column_major_load: {
3985 MatrixBuilder MB(Builder);
3986 // Emit everything that isn't dependent on the first parameter type
3987 Value *Stride = EmitScalarExpr(E->getArg(3));
3988 const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();
3989 auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();
3990 assert(PtrTy && "arg0 must be of pointer type");
3991 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
3992
3993 Address Src = EmitPointerWithAlignment(E->getArg(0));
3995 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
3996 0);
3997 Value *Result = MB.CreateColumnMajorLoad(
3998 Src.getElementType(), Src.emitRawPointer(*this),
3999 Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,
4000 ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");
4001 return RValue::get(Result);
4002 }
4003
4004 case Builtin::BI__builtin_matrix_column_major_store: {
4005 MatrixBuilder MB(Builder);
4006 Value *Matrix = EmitScalarExpr(E->getArg(0));
4007 Address Dst = EmitPointerWithAlignment(E->getArg(1));
4008 Value *Stride = EmitScalarExpr(E->getArg(2));
4009
4010 const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();
4011 auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();
4012 assert(PtrTy && "arg1 must be of pointer type");
4013 bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();
4014
4016 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4017 0);
4018 Value *Result = MB.CreateColumnMajorStore(
4019 Matrix, Dst.emitRawPointer(*this),
4020 Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,
4021 MatrixTy->getNumRows(), MatrixTy->getNumColumns());
4022 return RValue::get(Result);
4023 }
4024
4025 case Builtin::BI__builtin_isinf_sign: {
4026 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
4027 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4028 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4029 Value *Arg = EmitScalarExpr(E->getArg(0));
4030 Value *AbsArg = EmitFAbs(*this, Arg);
4031 Value *IsInf = Builder.CreateFCmpOEQ(
4032 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
4033 Value *IsNeg = EmitSignBit(*this, Arg);
4034
4035 llvm::Type *IntTy = ConvertType(E->getType());
4036 Value *Zero = Constant::getNullValue(IntTy);
4037 Value *One = ConstantInt::get(IntTy, 1);
4038 Value *NegativeOne = ConstantInt::get(IntTy, -1);
4039 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
4040 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
4041 return RValue::get(Result);
4042 }
4043
4044 case Builtin::BI__builtin_flt_rounds: {
4045 Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);
4046
4047 llvm::Type *ResultType = ConvertType(E->getType());
4048 Value *Result = Builder.CreateCall(F);
4049 if (Result->getType() != ResultType)
4050 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
4051 "cast");
4052 return RValue::get(Result);
4053 }
4054
4055 case Builtin::BI__builtin_set_flt_rounds: {
4056 Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);
4057
4058 Value *V = EmitScalarExpr(E->getArg(0));
4059 Builder.CreateCall(F, V);
4060 return RValue::get(nullptr);
4061 }
4062
4063 case Builtin::BI__builtin_fpclassify: {
4064 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
4065 // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.
4066 Value *V = EmitScalarExpr(E->getArg(5));
4067 llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
4068
4069 // Create Result
4070 BasicBlock *Begin = Builder.GetInsertBlock();
4071 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
4072 Builder.SetInsertPoint(End);
4073 PHINode *Result =
4074 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
4075 "fpclassify_result");
4076
4077 // if (V==0) return FP_ZERO
4078 Builder.SetInsertPoint(Begin);
4079 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
4080 "iszero");
4081 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
4082 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
4083 Builder.CreateCondBr(IsZero, End, NotZero);
4084 Result->addIncoming(ZeroLiteral, Begin);
4085
4086 // if (V != V) return FP_NAN
4087 Builder.SetInsertPoint(NotZero);
4088 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
4089 Value *NanLiteral = EmitScalarExpr(E->getArg(0));
4090 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
4091 Builder.CreateCondBr(IsNan, End, NotNan);
4092 Result->addIncoming(NanLiteral, NotZero);
4093
4094 // if (fabs(V) == infinity) return FP_INFINITY
4095 Builder.SetInsertPoint(NotNan);
4096 Value *VAbs = EmitFAbs(*this, V);
4097 Value *IsInf =
4098 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
4099 "isinf");
4100 Value *InfLiteral = EmitScalarExpr(E->getArg(1));
4101 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
4102 Builder.CreateCondBr(IsInf, End, NotInf);
4103 Result->addIncoming(InfLiteral, NotNan);
4104
4105 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
4106 Builder.SetInsertPoint(NotInf);
4107 APFloat Smallest = APFloat::getSmallestNormalized(
4108 getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
4109 Value *IsNormal =
4110 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
4111 "isnormal");
4112 Value *NormalResult =
4113 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
4114 EmitScalarExpr(E->getArg(3)));
4115 Builder.CreateBr(End);
4116 Result->addIncoming(NormalResult, NotInf);
4117
4118 // return Result
4119 Builder.SetInsertPoint(End);
4120 return RValue::get(Result);
4121 }
4122
4123 // An alloca will always return a pointer to the alloca (stack) address
4124 // space. This address space need not be the same as the AST / Language
4125 // default (e.g. in C / C++ auto vars are in the generic address space). At
4126 // the AST level this is handled within CreateTempAlloca et al., but for the
4127 // builtin / dynamic alloca we have to handle it here. We use an explicit cast
4128 // instead of passing an AS to CreateAlloca so as to not inhibit optimisation.
4129 case Builtin::BIalloca:
4130 case Builtin::BI_alloca:
4131 case Builtin::BI__builtin_alloca_uninitialized:
4132 case Builtin::BI__builtin_alloca: {
4133 Value *Size = EmitScalarExpr(E->getArg(0));
4134 const TargetInfo &TI = getContext().getTargetInfo();
4135 // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
4136 const Align SuitableAlignmentInBytes =
4137 CGM.getContext()
4139 .getAsAlign();
4140 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4141 AI->setAlignment(SuitableAlignmentInBytes);
4142 if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)
4143 initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
4146 if (AAS != EAS) {
4147 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4148 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4149 EAS, Ty));
4150 }
4151 return RValue::get(AI);
4152 }
4153
4154 case Builtin::BI__builtin_alloca_with_align_uninitialized:
4155 case Builtin::BI__builtin_alloca_with_align: {
4156 Value *Size = EmitScalarExpr(E->getArg(0));
4157 Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
4158 auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
4159 unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
4160 const Align AlignmentInBytes =
4161 CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();
4162 AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
4163 AI->setAlignment(AlignmentInBytes);
4164 if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)
4165 initializeAlloca(*this, AI, Size, AlignmentInBytes);
4168 if (AAS != EAS) {
4169 llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());
4170 return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,
4171 EAS, Ty));
4172 }
4173 return RValue::get(AI);
4174 }
4175
4176 case Builtin::BIbzero:
4177 case Builtin::BI__builtin_bzero: {
4178 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4179 Value *SizeVal = EmitScalarExpr(E->getArg(1));
4180 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4181 E->getArg(0)->getExprLoc(), FD, 0);
4182 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
4183 return RValue::get(nullptr);
4184 }
4185
4186 case Builtin::BIbcopy:
4187 case Builtin::BI__builtin_bcopy: {
4188 Address Src = EmitPointerWithAlignment(E->getArg(0));
4189 Address Dest = EmitPointerWithAlignment(E->getArg(1));
4190 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4192 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4193 0);
4195 E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,
4196 0);
4197 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4198 return RValue::get(nullptr);
4199 }
4200
4201 case Builtin::BImemcpy:
4202 case Builtin::BI__builtin_memcpy:
4203 case Builtin::BImempcpy:
4204 case Builtin::BI__builtin_mempcpy: {
4205 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4206 Address Src = EmitPointerWithAlignment(E->getArg(1));
4207 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4208 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4209 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4210 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4211 if (BuiltinID == Builtin::BImempcpy ||
4212 BuiltinID == Builtin::BI__builtin_mempcpy)
4214 Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));
4215 else
4216 return RValue::get(Dest, *this);
4217 }
4218
4219 case Builtin::BI__builtin_memcpy_inline: {
4220 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4221 Address Src = EmitPointerWithAlignment(E->getArg(1));
4222 uint64_t Size =
4223 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4224 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4225 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4226 Builder.CreateMemCpyInline(Dest, Src, Size);
4227 return RValue::get(nullptr);
4228 }
4229
4230 case Builtin::BI__builtin_char_memchr:
4231 BuiltinID = Builtin::BI__builtin_memchr;
4232 break;
4233
4234 case Builtin::BI__builtin___memcpy_chk: {
4235 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
4236 Expr::EvalResult SizeResult, DstSizeResult;
4237 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4238 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4239 break;
4240 llvm::APSInt Size = SizeResult.Val.getInt();
4241 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4242 if (Size.ugt(DstSize))
4243 break;
4244 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4245 Address Src = EmitPointerWithAlignment(E->getArg(1));
4246 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4247 Builder.CreateMemCpy(Dest, Src, SizeVal, false);
4248 return RValue::get(Dest, *this);
4249 }
4250
4251 case Builtin::BI__builtin_objc_memmove_collectable: {
4252 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
4253 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
4254 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4256 DestAddr, SrcAddr, SizeVal);
4257 return RValue::get(DestAddr, *this);
4258 }
4259
4260 case Builtin::BI__builtin___memmove_chk: {
4261 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
4262 Expr::EvalResult SizeResult, DstSizeResult;
4263 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4264 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4265 break;
4266 llvm::APSInt Size = SizeResult.Val.getInt();
4267 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4268 if (Size.ugt(DstSize))
4269 break;
4270 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4271 Address Src = EmitPointerWithAlignment(E->getArg(1));
4272 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4273 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4274 return RValue::get(Dest, *this);
4275 }
4276
4277 case Builtin::BImemmove:
4278 case Builtin::BI__builtin_memmove: {
4279 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4280 Address Src = EmitPointerWithAlignment(E->getArg(1));
4281 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4282 EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);
4283 EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);
4284 Builder.CreateMemMove(Dest, Src, SizeVal, false);
4285 return RValue::get(Dest, *this);
4286 }
4287 case Builtin::BImemset:
4288 case Builtin::BI__builtin_memset: {
4289 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4290 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4291 Builder.getInt8Ty());
4292 Value *SizeVal = EmitScalarExpr(E->getArg(2));
4293 EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),
4294 E->getArg(0)->getExprLoc(), FD, 0);
4295 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4296 return RValue::get(Dest, *this);
4297 }
4298 case Builtin::BI__builtin_memset_inline: {
4299 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4300 Value *ByteVal =
4301 Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());
4302 uint64_t Size =
4303 E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
4305 E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,
4306 0);
4307 Builder.CreateMemSetInline(Dest, ByteVal, Size);
4308 return RValue::get(nullptr);
4309 }
4310 case Builtin::BI__builtin___memset_chk: {
4311 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
4312 Expr::EvalResult SizeResult, DstSizeResult;
4313 if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
4314 !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
4315 break;
4316 llvm::APSInt Size = SizeResult.Val.getInt();
4317 llvm::APSInt DstSize = DstSizeResult.Val.getInt();
4318 if (Size.ugt(DstSize))
4319 break;
4320 Address Dest = EmitPointerWithAlignment(E->getArg(0));
4321 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
4322 Builder.getInt8Ty());
4323 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
4324 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
4325 return RValue::get(Dest, *this);
4326 }
4327 case Builtin::BI__builtin_wmemchr: {
4328 // The MSVC runtime library does not provide a definition of wmemchr, so we
4329 // need an inline implementation.
4330 if (!getTarget().getTriple().isOSMSVCRT())
4331 break;
4332
4333 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4334 Value *Str = EmitScalarExpr(E->getArg(0));
4335 Value *Chr = EmitScalarExpr(E->getArg(1));
4336 Value *Size = EmitScalarExpr(E->getArg(2));
4337
4338 BasicBlock *Entry = Builder.GetInsertBlock();
4339 BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");
4340 BasicBlock *Next = createBasicBlock("wmemchr.next");
4341 BasicBlock *Exit = createBasicBlock("wmemchr.exit");
4342 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4343 Builder.CreateCondBr(SizeEq0, Exit, CmpEq);
4344
4345 EmitBlock(CmpEq);
4346 PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);
4347 StrPhi->addIncoming(Str, Entry);
4348 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4349 SizePhi->addIncoming(Size, Entry);
4350 CharUnits WCharAlign =
4352 Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);
4353 Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);
4354 Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);
4355 Builder.CreateCondBr(StrEqChr, Exit, Next);
4356
4357 EmitBlock(Next);
4358 Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);
4359 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4360 Value *NextSizeEq0 =
4361 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4362 Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);
4363 StrPhi->addIncoming(NextStr, Next);
4364 SizePhi->addIncoming(NextSize, Next);
4365
4366 EmitBlock(Exit);
4367 PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);
4368 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);
4369 Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);
4370 Ret->addIncoming(FoundChr, CmpEq);
4371 return RValue::get(Ret);
4372 }
4373 case Builtin::BI__builtin_wmemcmp: {
4374 // The MSVC runtime library does not provide a definition of wmemcmp, so we
4375 // need an inline implementation.
4376 if (!getTarget().getTriple().isOSMSVCRT())
4377 break;
4378
4379 llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
4380
4381 Value *Dst = EmitScalarExpr(E->getArg(0));
4382 Value *Src = EmitScalarExpr(E->getArg(1));
4383 Value *Size = EmitScalarExpr(E->getArg(2));
4384
4385 BasicBlock *Entry = Builder.GetInsertBlock();
4386 BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
4387 BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
4388 BasicBlock *Next = createBasicBlock("wmemcmp.next");
4389 BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
4390 Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
4391 Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
4392
4393 EmitBlock(CmpGT);
4394 PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
4395 DstPhi->addIncoming(Dst, Entry);
4396 PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
4397 SrcPhi->addIncoming(Src, Entry);
4398 PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
4399 SizePhi->addIncoming(Size, Entry);
4400 CharUnits WCharAlign =
4402 Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
4403 Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
4404 Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
4405 Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
4406
4407 EmitBlock(CmpLT);
4408 Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
4409 Builder.CreateCondBr(DstLtSrc, Exit, Next);
4410
4411 EmitBlock(Next);
4412 Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
4413 Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
4414 Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
4415 Value *NextSizeEq0 =
4416 Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
4417 Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
4418 DstPhi->addIncoming(NextDst, Next);
4419 SrcPhi->addIncoming(NextSrc, Next);
4420 SizePhi->addIncoming(NextSize, Next);
4421
4422 EmitBlock(Exit);
4423 PHINode *Ret = Builder.CreatePHI(IntTy, 4);
4424 Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
4425 Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
4426 Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
4427 Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
4428 return RValue::get(Ret);
4429 }
4430 case Builtin::BI__builtin_dwarf_cfa: {
4431 // The offset in bytes from the first argument to the CFA.
4432 //
4433 // Why on earth is this in the frontend? Is there any reason at
4434 // all that the backend can't reasonably determine this while
4435 // lowering llvm.eh.dwarf.cfa()?
4436 //
4437 // TODO: If there's a satisfactory reason, add a target hook for
4438 // this instead of hard-coding 0, which is correct for most targets.
4439 int32_t Offset = 0;
4440
4441 Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
4442 return RValue::get(Builder.CreateCall(F,
4443 llvm::ConstantInt::get(Int32Ty, Offset)));
4444 }
4445 case Builtin::BI__builtin_return_address: {
4446 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4447 getContext().UnsignedIntTy);
4448 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4449 return RValue::get(Builder.CreateCall(F, Depth));
4450 }
4451 case Builtin::BI_ReturnAddress: {
4452 Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
4453 return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
4454 }
4455 case Builtin::BI__builtin_frame_address: {
4456 Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
4457 getContext().UnsignedIntTy);
4458 Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);
4459 return RValue::get(Builder.CreateCall(F, Depth));
4460 }
4461 case Builtin::BI__builtin_extract_return_addr: {
4462 Value *Address = EmitScalarExpr(E->getArg(0));
4464 return RValue::get(Result);
4465 }
4466 case Builtin::BI__builtin_frob_return_addr: {
4467 Value *Address = EmitScalarExpr(E->getArg(0));
4469 return RValue::get(Result);
4470 }
4471 case Builtin::BI__builtin_dwarf_sp_column: {
4472 llvm::IntegerType *Ty
4473 = cast<llvm::IntegerType>(ConvertType(E->getType()));
4475 if (Column == -1) {
4476 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
4477 return RValue::get(llvm::UndefValue::get(Ty));
4478 }
4479 return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
4480 }
4481 case Builtin::BI__builtin_init_dwarf_reg_size_table: {
4482 Value *Address = EmitScalarExpr(E->getArg(0));
4483 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
4484 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
4485 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
4486 }
4487 case Builtin::BI__builtin_eh_return: {
4488 Value *Int = EmitScalarExpr(E->getArg(0));
4489 Value *Ptr = EmitScalarExpr(E->getArg(1));
4490
4491 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
4492 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
4493 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
4494 Function *F =
4495 CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
4496 : Intrinsic::eh_return_i64);
4497 Builder.CreateCall(F, {Int, Ptr});
4498 Builder.CreateUnreachable();
4499
4500 // We do need to preserve an insertion point.
4501 EmitBlock(createBasicBlock("builtin_eh_return.cont"));
4502
4503 return RValue::get(nullptr);
4504 }
4505 case Builtin::BI__builtin_unwind_init: {
4506 Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
4507 Builder.CreateCall(F);
4508 return RValue::get(nullptr);
4509 }
4510 case Builtin::BI__builtin_extend_pointer: {
4511 // Extends a pointer to the size of an _Unwind_Word, which is
4512 // uint64_t on all platforms. Generally this gets poked into a
4513 // register and eventually used as an address, so if the
4514 // addressing registers are wider than pointers and the platform
4515 // doesn't implicitly ignore high-order bits when doing
4516 // addressing, we need to make sure we zext / sext based on
4517 // the platform's expectations.
4518 //
4519 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
4520
4521 // Cast the pointer to intptr_t.
4522 Value *Ptr = EmitScalarExpr(E->getArg(0));
4523 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
4524
4525 // If that's 64 bits, we're done.
4526 if (IntPtrTy->getBitWidth() == 64)
4527 return RValue::get(Result);
4528
4529 // Otherwise, ask the codegen data what to do.
4530 if (getTargetHooks().extendPointerWithSExt())
4531 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
4532 else
4533 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
4534 }
4535 case Builtin::BI__builtin_setjmp: {
4536 // Buffer is a void**.
4537 Address Buf = EmitPointerWithAlignment(E->getArg(0));
4538
4539 // Store the frame pointer to the setjmp buffer.
4540 Value *FrameAddr = Builder.CreateCall(
4541 CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),
4542 ConstantInt::get(Int32Ty, 0));
4543 Builder.CreateStore(FrameAddr, Buf);
4544
4545 // Store the stack pointer to the setjmp buffer.
4546 Value *StackAddr = Builder.CreateStackSave();
4547 assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());
4548
4549 Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
4550 Builder.CreateStore(StackAddr, StackSaveSlot);
4551
4552 // Call LLVM's EH setjmp, which is lightweight.
4553 Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
4554 return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));
4555 }
4556 case Builtin::BI__builtin_longjmp: {
4557 Value *Buf = EmitScalarExpr(E->getArg(0));
4558
4559 // Call LLVM's EH longjmp, which is lightweight.
4560 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
4561
4562 // longjmp doesn't return; mark this as unreachable.
4563 Builder.CreateUnreachable();
4564
4565 // We do need to preserve an insertion point.
4566 EmitBlock(createBasicBlock("longjmp.cont"));
4567
4568 return RValue::get(nullptr);
4569 }
4570 case Builtin::BI__builtin_launder: {
4571 const Expr *Arg = E->getArg(0);
4572 QualType ArgTy = Arg->getType()->getPointeeType();
4573 Value *Ptr = EmitScalarExpr(Arg);
4574 if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
4576
4577 return RValue::get(Ptr);
4578 }
4579 case Builtin::BI__sync_fetch_and_add:
4580 case Builtin::BI__sync_fetch_and_sub:
4581 case Builtin::BI__sync_fetch_and_or:
4582 case Builtin::BI__sync_fetch_and_and:
4583 case Builtin::BI__sync_fetch_and_xor:
4584 case Builtin::BI__sync_fetch_and_nand:
4585 case Builtin::BI__sync_add_and_fetch:
4586 case Builtin::BI__sync_sub_and_fetch:
4587 case Builtin::BI__sync_and_and_fetch:
4588 case Builtin::BI__sync_or_and_fetch:
4589 case Builtin::BI__sync_xor_and_fetch:
4590 case Builtin::BI__sync_nand_and_fetch:
4591 case Builtin::BI__sync_val_compare_and_swap:
4592 case Builtin::BI__sync_bool_compare_and_swap:
4593 case Builtin::BI__sync_lock_test_and_set:
4594 case Builtin::BI__sync_lock_release:
4595 case Builtin::BI__sync_swap:
4596 llvm_unreachable("Shouldn't make it through sema");
4597 case Builtin::BI__sync_fetch_and_add_1:
4598 case Builtin::BI__sync_fetch_and_add_2:
4599 case Builtin::BI__sync_fetch_and_add_4:
4600 case Builtin::BI__sync_fetch_and_add_8:
4601 case Builtin::BI__sync_fetch_and_add_16:
4602 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
4603 case Builtin::BI__sync_fetch_and_sub_1:
4604 case Builtin::BI__sync_fetch_and_sub_2:
4605 case Builtin::BI__sync_fetch_and_sub_4:
4606 case Builtin::BI__sync_fetch_and_sub_8:
4607 case Builtin::BI__sync_fetch_and_sub_16:
4608 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
4609 case Builtin::BI__sync_fetch_and_or_1:
4610 case Builtin::BI__sync_fetch_and_or_2:
4611 case Builtin::BI__sync_fetch_and_or_4:
4612 case Builtin::BI__sync_fetch_and_or_8:
4613 case Builtin::BI__sync_fetch_and_or_16:
4614 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
4615 case Builtin::BI__sync_fetch_and_and_1:
4616 case Builtin::BI__sync_fetch_and_and_2:
4617 case Builtin::BI__sync_fetch_and_and_4:
4618 case Builtin::BI__sync_fetch_and_and_8:
4619 case Builtin::BI__sync_fetch_and_and_16:
4620 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
4621 case Builtin::BI__sync_fetch_and_xor_1:
4622 case Builtin::BI__sync_fetch_and_xor_2:
4623 case Builtin::BI__sync_fetch_and_xor_4:
4624 case Builtin::BI__sync_fetch_and_xor_8:
4625 case Builtin::BI__sync_fetch_and_xor_16:
4626 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
4627 case Builtin::BI__sync_fetch_and_nand_1:
4628 case Builtin::BI__sync_fetch_and_nand_2:
4629 case Builtin::BI__sync_fetch_and_nand_4:
4630 case Builtin::BI__sync_fetch_and_nand_8:
4631 case Builtin::BI__sync_fetch_and_nand_16:
4632 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
4633
4634 // Clang extensions: not overloaded yet.
4635 case Builtin::BI__sync_fetch_and_min:
4636 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
4637 case Builtin::BI__sync_fetch_and_max:
4638 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
4639 case Builtin::BI__sync_fetch_and_umin:
4640 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
4641 case Builtin::BI__sync_fetch_and_umax:
4642 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
4643
4644 case Builtin::BI__sync_add_and_fetch_1:
4645 case Builtin::BI__sync_add_and_fetch_2:
4646 case Builtin::BI__sync_add_and_fetch_4:
4647 case Builtin::BI__sync_add_and_fetch_8:
4648 case Builtin::BI__sync_add_and_fetch_16:
4649 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
4650 llvm::Instruction::Add);
4651 case Builtin::BI__sync_sub_and_fetch_1:
4652 case Builtin::BI__sync_sub_and_fetch_2:
4653 case Builtin::BI__sync_sub_and_fetch_4:
4654 case Builtin::BI__sync_sub_and_fetch_8:
4655 case Builtin::BI__sync_sub_and_fetch_16:
4656 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
4657 llvm::Instruction::Sub);
4658 case Builtin::BI__sync_and_and_fetch_1:
4659 case Builtin::BI__sync_and_and_fetch_2:
4660 case Builtin::BI__sync_and_and_fetch_4:
4661 case Builtin::BI__sync_and_and_fetch_8:
4662 case Builtin::BI__sync_and_and_fetch_16:
4663 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
4664 llvm::Instruction::And);
4665 case Builtin::BI__sync_or_and_fetch_1:
4666 case Builtin::BI__sync_or_and_fetch_2:
4667 case Builtin::BI__sync_or_and_fetch_4:
4668 case Builtin::BI__sync_or_and_fetch_8:
4669 case Builtin::BI__sync_or_and_fetch_16:
4670 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
4671 llvm::Instruction::Or);
4672 case Builtin::BI__sync_xor_and_fetch_1:
4673 case Builtin::BI__sync_xor_and_fetch_2:
4674 case Builtin::BI__sync_xor_and_fetch_4:
4675 case Builtin::BI__sync_xor_and_fetch_8:
4676 case Builtin::BI__sync_xor_and_fetch_16:
4677 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
4678 llvm::Instruction::Xor);
4679 case Builtin::BI__sync_nand_and_fetch_1:
4680 case Builtin::BI__sync_nand_and_fetch_2:
4681 case Builtin::BI__sync_nand_and_fetch_4:
4682 case Builtin::BI__sync_nand_and_fetch_8:
4683 case Builtin::BI__sync_nand_and_fetch_16:
4684 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
4685 llvm::Instruction::And, true);
4686
4687 case Builtin::BI__sync_val_compare_and_swap_1:
4688 case Builtin::BI__sync_val_compare_and_swap_2:
4689 case Builtin::BI__sync_val_compare_and_swap_4:
4690 case Builtin::BI__sync_val_compare_and_swap_8:
4691 case Builtin::BI__sync_val_compare_and_swap_16:
4692 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
4693
4694 case Builtin::BI__sync_bool_compare_and_swap_1:
4695 case Builtin::BI__sync_bool_compare_and_swap_2:
4696 case Builtin::BI__sync_bool_compare_and_swap_4:
4697 case Builtin::BI__sync_bool_compare_and_swap_8:
4698 case Builtin::BI__sync_bool_compare_and_swap_16:
4699 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
4700
4701 case Builtin::BI__sync_swap_1:
4702 case Builtin::BI__sync_swap_2:
4703 case Builtin::BI__sync_swap_4:
4704 case Builtin::BI__sync_swap_8:
4705 case Builtin::BI__sync_swap_16:
4706 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4707
4708 case Builtin::BI__sync_lock_test_and_set_1:
4709 case Builtin::BI__sync_lock_test_and_set_2:
4710 case Builtin::BI__sync_lock_test_and_set_4:
4711 case Builtin::BI__sync_lock_test_and_set_8:
4712 case Builtin::BI__sync_lock_test_and_set_16:
4713 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
4714
4715 case Builtin::BI__sync_lock_release_1:
4716 case Builtin::BI__sync_lock_release_2:
4717 case Builtin::BI__sync_lock_release_4:
4718 case Builtin::BI__sync_lock_release_8:
4719 case Builtin::BI__sync_lock_release_16: {
4720 Address Ptr = CheckAtomicAlignment(*this, E);
4721 QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4722
4723 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4724 getContext().getTypeSize(ElTy));
4725 llvm::StoreInst *Store =
4726 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
4727 Store->setAtomic(llvm::AtomicOrdering::Release);
4728 return RValue::get(nullptr);
4729 }
4730
4731 case Builtin::BI__sync_synchronize: {
4732 // We assume this is supposed to correspond to a C++0x-style
4733 // sequentially-consistent fence (i.e. this is only usable for
4734 // synchronization, not device I/O or anything like that). This intrinsic
4735 // is really badly designed in the sense that in theory, there isn't
4736 // any way to safely use it... but in practice, it mostly works
4737 // to use it with non-atomic loads and stores to get acquire/release
4738 // semantics.
4739 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
4740 return RValue::get(nullptr);
4741 }
4742
4743 case Builtin::BI__builtin_nontemporal_load:
4744 return RValue::get(EmitNontemporalLoad(*this, E));
4745 case Builtin::BI__builtin_nontemporal_store:
4746 return RValue::get(EmitNontemporalStore(*this, E));
4747 case Builtin::BI__c11_atomic_is_lock_free:
4748 case Builtin::BI__atomic_is_lock_free: {
4749 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
4750 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
4751 // _Atomic(T) is always properly-aligned.
4752 const char *LibCallName = "__atomic_is_lock_free";
4753 CallArgList Args;
4754 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
4755 getContext().getSizeType());
4756 if (BuiltinID == Builtin::BI__atomic_is_lock_free)
4757 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
4759 else
4760 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
4762 const CGFunctionInfo &FuncInfo =
4764 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
4765 llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
4766 return EmitCall(FuncInfo, CGCallee::forDirect(Func),
4767 ReturnValueSlot(), Args);
4768 }
4769
4770 case Builtin::BI__atomic_test_and_set: {
4771 // Look at the argument type to determine whether this is a volatile
4772 // operation. The parameter type is always volatile.
4773 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4774 bool Volatile =
4776
4777 Address Ptr =
4779
4780 Value *NewVal = Builder.getInt8(1);
4781 Value *Order = EmitScalarExpr(E->getArg(1));
4782 if (isa<llvm::ConstantInt>(Order)) {
4783 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4784 AtomicRMWInst *Result = nullptr;
4785 switch (ord) {
4786 case 0: // memory_order_relaxed
4787 default: // invalid order
4788 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4789 llvm::AtomicOrdering::Monotonic);
4790 break;
4791 case 1: // memory_order_consume
4792 case 2: // memory_order_acquire
4793 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4794 llvm::AtomicOrdering::Acquire);
4795 break;
4796 case 3: // memory_order_release
4797 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4798 llvm::AtomicOrdering::Release);
4799 break;
4800 case 4: // memory_order_acq_rel
4801
4802 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4803 llvm::AtomicOrdering::AcquireRelease);
4804 break;
4805 case 5: // memory_order_seq_cst
4807 llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
4808 llvm::AtomicOrdering::SequentiallyConsistent);
4809 break;
4810 }
4811 Result->setVolatile(Volatile);
4812 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4813 }
4814
4815 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4816
4817 llvm::BasicBlock *BBs[5] = {
4818 createBasicBlock("monotonic", CurFn),
4819 createBasicBlock("acquire", CurFn),
4820 createBasicBlock("release", CurFn),
4821 createBasicBlock("acqrel", CurFn),
4822 createBasicBlock("seqcst", CurFn)
4823 };
4824 llvm::AtomicOrdering Orders[5] = {
4825 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
4826 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
4827 llvm::AtomicOrdering::SequentiallyConsistent};
4828
4829 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4830 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4831
4832 Builder.SetInsertPoint(ContBB);
4833 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
4834
4835 for (unsigned i = 0; i < 5; ++i) {
4836 Builder.SetInsertPoint(BBs[i]);
4837 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
4838 Ptr, NewVal, Orders[i]);
4839 RMW->setVolatile(Volatile);
4840 Result->addIncoming(RMW, BBs[i]);
4841 Builder.CreateBr(ContBB);
4842 }
4843
4844 SI->addCase(Builder.getInt32(0), BBs[0]);
4845 SI->addCase(Builder.getInt32(1), BBs[1]);
4846 SI->addCase(Builder.getInt32(2), BBs[1]);
4847 SI->addCase(Builder.getInt32(3), BBs[2]);
4848 SI->addCase(Builder.getInt32(4), BBs[3]);
4849 SI->addCase(Builder.getInt32(5), BBs[4]);
4850
4851 Builder.SetInsertPoint(ContBB);
4852 return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
4853 }
4854
4855 case Builtin::BI__atomic_clear: {
4856 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
4857 bool Volatile =
4859
4860 Address Ptr = EmitPointerWithAlignment(E->getArg(0));
4861 Ptr = Ptr.withElementType(Int8Ty);
4862 Value *NewVal = Builder.getInt8(0);
4863 Value *Order = EmitScalarExpr(E->getArg(1));
4864 if (isa<llvm::ConstantInt>(Order)) {
4865 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4866 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4867 switch (ord) {
4868 case 0: // memory_order_relaxed
4869 default: // invalid order
4870 Store->setOrdering(llvm::AtomicOrdering::Monotonic);
4871 break;
4872 case 3: // memory_order_release
4873 Store->setOrdering(llvm::AtomicOrdering::Release);
4874 break;
4875 case 5: // memory_order_seq_cst
4876 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
4877 break;
4878 }
4879 return RValue::get(nullptr);
4880 }
4881
4882 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4883
4884 llvm::BasicBlock *BBs[3] = {
4885 createBasicBlock("monotonic", CurFn),
4886 createBasicBlock("release", CurFn),
4887 createBasicBlock("seqcst", CurFn)
4888 };
4889 llvm::AtomicOrdering Orders[3] = {
4890 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
4891 llvm::AtomicOrdering::SequentiallyConsistent};
4892
4893 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4894 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
4895
4896 for (unsigned i = 0; i < 3; ++i) {
4897 Builder.SetInsertPoint(BBs[i]);
4898 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
4899 Store->setOrdering(Orders[i]);
4900 Builder.CreateBr(ContBB);
4901 }
4902
4903 SI->addCase(Builder.getInt32(0), BBs[0]);
4904 SI->addCase(Builder.getInt32(3), BBs[1]);
4905 SI->addCase(Builder.getInt32(5), BBs[2]);
4906
4907 Builder.SetInsertPoint(ContBB);
4908 return RValue::get(nullptr);
4909 }
4910
4911 case Builtin::BI__atomic_thread_fence:
4912 case Builtin::BI__atomic_signal_fence:
4913 case Builtin::BI__c11_atomic_thread_fence:
4914 case Builtin::BI__c11_atomic_signal_fence: {
4915 llvm::SyncScope::ID SSID;
4916 if (BuiltinID == Builtin::BI__atomic_signal_fence ||
4917 BuiltinID == Builtin::BI__c11_atomic_signal_fence)
4918 SSID = llvm::SyncScope::SingleThread;
4919 else
4920 SSID = llvm::SyncScope::System;
4921 Value *Order = EmitScalarExpr(E->getArg(0));
4922 if (isa<llvm::ConstantInt>(Order)) {
4923 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
4924 switch (ord) {
4925 case 0: // memory_order_relaxed
4926 default: // invalid order
4927 break;
4928 case 1: // memory_order_consume
4929 case 2: // memory_order_acquire
4930 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4931 break;
4932 case 3: // memory_order_release
4933 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4934 break;
4935 case 4: // memory_order_acq_rel
4936 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4937 break;
4938 case 5: // memory_order_seq_cst
4939 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4940 break;
4941 }
4942 return RValue::get(nullptr);
4943 }
4944
4945 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
4946 AcquireBB = createBasicBlock("acquire", CurFn);
4947 ReleaseBB = createBasicBlock("release", CurFn);
4948 AcqRelBB = createBasicBlock("acqrel", CurFn);
4949 SeqCstBB = createBasicBlock("seqcst", CurFn);
4950 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
4951
4952 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
4953 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
4954
4955 Builder.SetInsertPoint(AcquireBB);
4956 Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
4957 Builder.CreateBr(ContBB);
4958 SI->addCase(Builder.getInt32(1), AcquireBB);
4959 SI->addCase(Builder.getInt32(2), AcquireBB);
4960
4961 Builder.SetInsertPoint(ReleaseBB);
4962 Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
4963 Builder.CreateBr(ContBB);
4964 SI->addCase(Builder.getInt32(3), ReleaseBB);
4965
4966 Builder.SetInsertPoint(AcqRelBB);
4967 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
4968 Builder.CreateBr(ContBB);
4969 SI->addCase(Builder.getInt32(4), AcqRelBB);
4970
4971 Builder.SetInsertPoint(SeqCstBB);
4972 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
4973 Builder.CreateBr(ContBB);
4974 SI->addCase(Builder.getInt32(5), SeqCstBB);
4975
4976 Builder.SetInsertPoint(ContBB);
4977 return RValue::get(nullptr);
4978 }
4979
4980 case Builtin::BI__builtin_signbit:
4981 case Builtin::BI__builtin_signbitf:
4982 case Builtin::BI__builtin_signbitl: {
4983 return RValue::get(
4984 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
4985 ConvertType(E->getType())));
4986 }
4987 case Builtin::BI__warn_memset_zero_len:
4988 return RValue::getIgnored();
4989 case Builtin::BI__annotation: {
4990 // Re-encode each wide string to UTF8 and make an MDString.
4992 for (const Expr *Arg : E->arguments()) {
4993 const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
4994 assert(Str->getCharByteWidth() == 2);
4995 StringRef WideBytes = Str->getBytes();
4996 std::string StrUtf8;
4997 if (!convertUTF16ToUTF8String(
4998 ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
4999 CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
5000 continue;
5001 }
5002 Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
5003 }
5004
5005 // Build and MDTuple of MDStrings and emit the intrinsic call.
5006 llvm::Function *F =
5007 CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
5008 MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
5009 Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
5010 return RValue::getIgnored();
5011 }
5012 case Builtin::BI__builtin_annotation: {
5013 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
5014 llvm::Function *F =
5015 CGM.getIntrinsic(llvm::Intrinsic::annotation,
5016 {AnnVal->getType(), CGM.ConstGlobalsPtrTy});
5017
5018 // Get the annotation string, go through casts. Sema requires this to be a
5019 // non-wide string literal, potentially casted, so the cast<> is safe.
5020 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
5021 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
5022 return RValue::get(
5023 EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));
5024 }
5025 case Builtin::BI__builtin_addcb:
5026 case Builtin::BI__builtin_addcs:
5027 case Builtin::BI__builtin_addc:
5028 case Builtin::BI__builtin_addcl:
5029 case Builtin::BI__builtin_addcll:
5030 case Builtin::BI__builtin_subcb:
5031 case Builtin::BI__builtin_subcs:
5032 case Builtin::BI__builtin_subc:
5033 case Builtin::BI__builtin_subcl:
5034 case Builtin::BI__builtin_subcll: {
5035
5036 // We translate all of these builtins from expressions of the form:
5037 // int x = ..., y = ..., carryin = ..., carryout, result;
5038 // result = __builtin_addc(x, y, carryin, &carryout);
5039 //
5040 // to LLVM IR of the form:
5041 //
5042 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
5043 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
5044 // %carry1 = extractvalue {i32, i1} %tmp1, 1
5045 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
5046 // i32 %carryin)
5047 // %result = extractvalue {i32, i1} %tmp2, 0
5048 // %carry2 = extractvalue {i32, i1} %tmp2, 1
5049 // %tmp3 = or i1 %carry1, %carry2
5050 // %tmp4 = zext i1 %tmp3 to i32
5051 // store i32 %tmp4, i32* %carryout
5052
5053 // Scalarize our inputs.
5054 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5055 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5056 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
5057 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
5058
5059 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
5060 llvm::Intrinsic::ID IntrinsicId;
5061 switch (BuiltinID) {
5062 default: llvm_unreachable("Unknown multiprecision builtin id.");
5063 case Builtin::BI__builtin_addcb:
5064 case Builtin::BI__builtin_addcs:
5065 case Builtin::BI__builtin_addc:
5066 case Builtin::BI__builtin_addcl:
5067 case Builtin::BI__builtin_addcll:
5068 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5069 break;
5070 case Builtin::BI__builtin_subcb:
5071 case Builtin::BI__builtin_subcs:
5072 case Builtin::BI__builtin_subc:
5073 case Builtin::BI__builtin_subcl:
5074 case Builtin::BI__builtin_subcll:
5075 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5076 break;
5077 }
5078
5079 // Construct our resulting LLVM IR expression.
5080 llvm::Value *Carry1;
5081 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
5082 X, Y, Carry1);
5083 llvm::Value *Carry2;
5084 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
5085 Sum1, Carryin, Carry2);
5086 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
5087 X->getType());
5088 Builder.CreateStore(CarryOut, CarryOutPtr);
5089 return RValue::get(Sum2);
5090 }
5091
5092 case Builtin::BI__builtin_add_overflow:
5093 case Builtin::BI__builtin_sub_overflow:
5094 case Builtin::BI__builtin_mul_overflow: {
5095 const clang::Expr *LeftArg = E->getArg(0);
5096 const clang::Expr *RightArg = E->getArg(1);
5097 const clang::Expr *ResultArg = E->getArg(2);
5098
5099 clang::QualType ResultQTy =
5100 ResultArg->getType()->castAs<PointerType>()->getPointeeType();
5101
5102 WidthAndSignedness LeftInfo =
5104 WidthAndSignedness RightInfo =
5106 WidthAndSignedness ResultInfo =
5108
5109 // Handle mixed-sign multiplication as a special case, because adding
5110 // runtime or backend support for our generic irgen would be too expensive.
5111 if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
5112 return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
5113 RightInfo, ResultArg, ResultQTy,
5114 ResultInfo);
5115
5116 if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,
5117 ResultInfo))
5119 *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,
5120 ResultInfo);
5121
5122 WidthAndSignedness EncompassingInfo =
5123 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
5124
5125 llvm::Type *EncompassingLLVMTy =
5126 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
5127
5128 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
5129
5130 llvm::Intrinsic::ID IntrinsicId;
5131 switch (BuiltinID) {
5132 default:
5133 llvm_unreachable("Unknown overflow builtin id.");
5134 case Builtin::BI__builtin_add_overflow:
5135 IntrinsicId = EncompassingInfo.Signed
5136 ? llvm::Intrinsic::sadd_with_overflow
5137 : llvm::Intrinsic::uadd_with_overflow;
5138 break;
5139 case Builtin::BI__builtin_sub_overflow:
5140 IntrinsicId = EncompassingInfo.Signed
5141 ? llvm::Intrinsic::ssub_with_overflow
5142 : llvm::Intrinsic::usub_with_overflow;
5143 break;
5144 case Builtin::BI__builtin_mul_overflow:
5145 IntrinsicId = EncompassingInfo.Signed
5146 ? llvm::Intrinsic::smul_with_overflow
5147 : llvm::Intrinsic::umul_with_overflow;
5148 break;
5149 }
5150
5151 llvm::Value *Left = EmitScalarExpr(LeftArg);
5152 llvm::Value *Right = EmitScalarExpr(RightArg);
5153 Address ResultPtr = EmitPointerWithAlignment(ResultArg);
5154
5155 // Extend each operand to the encompassing type.
5156 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
5157 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
5158
5159 // Perform the operation on the extended values.
5160 llvm::Value *Overflow, *Result;
5161 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
5162
5163 if (EncompassingInfo.Width > ResultInfo.Width) {
5164 // The encompassing type is wider than the result type, so we need to
5165 // truncate it.
5166 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
5167
5168 // To see if the truncation caused an overflow, we will extend
5169 // the result and then compare it to the original result.
5170 llvm::Value *ResultTruncExt = Builder.CreateIntCast(
5171 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
5172 llvm::Value *TruncationOverflow =
5173 Builder.CreateICmpNE(Result, ResultTruncExt);
5174
5175 Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
5176 Result = ResultTrunc;
5177 }
5178
5179 // Finally, store the result using the pointer.
5180 bool isVolatile =
5181 ResultArg->getType()->getPointeeType().isVolatileQualified();
5182 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
5183
5184 return RValue::get(Overflow);
5185 }
5186
5187 case Builtin::BI__builtin_uadd_overflow:
5188 case Builtin::BI__builtin_uaddl_overflow:
5189 case Builtin::BI__builtin_uaddll_overflow:
5190 case Builtin::BI__builtin_usub_overflow:
5191 case Builtin::BI__builtin_usubl_overflow:
5192 case Builtin::BI__builtin_usubll_overflow:
5193 case Builtin::BI__builtin_umul_overflow:
5194 case Builtin::BI__builtin_umull_overflow:
5195 case Builtin::BI__builtin_umulll_overflow:
5196 case Builtin::BI__builtin_sadd_overflow:
5197 case Builtin::BI__builtin_saddl_overflow:
5198 case Builtin::BI__builtin_saddll_overflow:
5199 case Builtin::BI__builtin_ssub_overflow:
5200 case Builtin::BI__builtin_ssubl_overflow:
5201 case Builtin::BI__builtin_ssubll_overflow:
5202 case Builtin::BI__builtin_smul_overflow:
5203 case Builtin::BI__builtin_smull_overflow:
5204 case Builtin::BI__builtin_smulll_overflow: {
5205
5206 // We translate all of these builtins directly to the relevant llvm IR node.
5207
5208 // Scalarize our inputs.
5209 llvm::Value *X = EmitScalarExpr(E->getArg(0));
5210 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
5211 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
5212
5213 // Decide which of the overflow intrinsics we are lowering to:
5214 llvm::Intrinsic::ID IntrinsicId;
5215 switch (BuiltinID) {
5216 default: llvm_unreachable("Unknown overflow builtin id.");
5217 case Builtin::BI__builtin_uadd_overflow:
5218 case Builtin::BI__builtin_uaddl_overflow:
5219 case Builtin::BI__builtin_uaddll_overflow:
5220 IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
5221 break;
5222 case Builtin::BI__builtin_usub_overflow:
5223 case Builtin::BI__builtin_usubl_overflow:
5224 case Builtin::BI__builtin_usubll_overflow:
5225 IntrinsicId = llvm::Intrinsic::usub_with_overflow;
5226 break;
5227 case Builtin::BI__builtin_umul_overflow:
5228 case Builtin::BI__builtin_umull_overflow:
5229 case Builtin::BI__builtin_umulll_overflow:
5230 IntrinsicId = llvm::Intrinsic::umul_with_overflow;
5231 break;
5232 case Builtin::BI__builtin_sadd_overflow:
5233 case Builtin::BI__builtin_saddl_overflow:
5234 case Builtin::BI__builtin_saddll_overflow:
5235 IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
5236 break;
5237 case Builtin::BI__builtin_ssub_overflow:
5238 case Builtin::BI__builtin_ssubl_overflow:
5239 case Builtin::BI__builtin_ssubll_overflow:
5240 IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
5241 break;
5242 case Builtin::BI__builtin_smul_overflow:
5243 case Builtin::BI__builtin_smull_overflow:
5244 case Builtin::BI__builtin_smulll_overflow:
5245 IntrinsicId = llvm::Intrinsic::smul_with_overflow;
5246 break;
5247 }
5248
5249
5250 llvm::Value *Carry;
5251 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
5252 Builder.CreateStore(Sum, SumOutPtr);
5253
5254 return RValue::get(Carry);
5255 }
5256 case Builtin::BIaddressof:
5257 case Builtin::BI__addressof:
5258 case Builtin::BI__builtin_addressof:
5259 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5260 case Builtin::BI__builtin_function_start:
5263 case Builtin::BI__builtin_operator_new:
5265 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
5266 case Builtin::BI__builtin_operator_delete:
5268 E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
5269 return RValue::get(nullptr);
5270
5271 case Builtin::BI__builtin_is_aligned:
5272 return EmitBuiltinIsAligned(E);
5273 case Builtin::BI__builtin_align_up:
5274 return EmitBuiltinAlignTo(E, true);
5275 case Builtin::BI__builtin_align_down:
5276 return EmitBuiltinAlignTo(E, false);
5277
5278 case Builtin::BI__noop:
5279 // __noop always evaluates to an integer literal zero.
5280 return RValue::get(ConstantInt::get(IntTy, 0));
5281 case Builtin::BI__builtin_call_with_static_chain: {
5282 const CallExpr *Call = cast<CallExpr>(E->getArg(0));
5283 const Expr *Chain = E->getArg(1);
5284 return EmitCall(Call->getCallee()->getType(),
5285 EmitCallee(Call->getCallee()), Call, ReturnValue,
5286 EmitScalarExpr(Chain));
5287 }
5288 case Builtin::BI_InterlockedExchange8:
5289 case Builtin::BI_InterlockedExchange16:
5290 case Builtin::BI_InterlockedExchange:
5291 case Builtin::BI_InterlockedExchangePointer:
5292 return RValue::get(
5293 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
5294 case Builtin::BI_InterlockedCompareExchangePointer:
5295 case Builtin::BI_InterlockedCompareExchangePointer_nf: {
5296 llvm::Type *RTy;
5297 llvm::IntegerType *IntType = IntegerType::get(
5299
5300 Address DestAddr = CheckAtomicAlignment(*this, E);
5301
5302 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
5303 RTy = Exchange->getType();
5304 Exchange = Builder.CreatePtrToInt(Exchange, IntType);
5305
5306 llvm::Value *Comparand =
5307 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
5308
5309 auto Ordering =
5310 BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
5311 AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
5312
5313 auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
5314 Ordering, Ordering);
5315 Result->setVolatile(true);
5316
5317 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
5318 0),
5319 RTy));
5320 }
5321 case Builtin::BI_InterlockedCompareExchange8:
5322 case Builtin::BI_InterlockedCompareExchange16:
5323 case Builtin::BI_InterlockedCompareExchange:
5324 case Builtin::BI_InterlockedCompareExchange64:
5326 case Builtin::BI_InterlockedIncrement16:
5327 case Builtin::BI_InterlockedIncrement:
5328 return RValue::get(
5329 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
5330 case Builtin::BI_InterlockedDecrement16:
5331 case Builtin::BI_InterlockedDecrement:
5332 return RValue::get(
5333 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
5334 case Builtin::BI_InterlockedAnd8:
5335 case Builtin::BI_InterlockedAnd16:
5336 case Builtin::BI_InterlockedAnd:
5337 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
5338 case Builtin::BI_InterlockedExchangeAdd8:
5339 case Builtin::BI_InterlockedExchangeAdd16:
5340 case Builtin::BI_InterlockedExchangeAdd:
5341 return RValue::get(
5342 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
5343 case Builtin::BI_InterlockedExchangeSub8:
5344 case Builtin::BI_InterlockedExchangeSub16:
5345 case Builtin::BI_InterlockedExchangeSub:
5346 return RValue::get(
5347 EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
5348 case Builtin::BI_InterlockedOr8:
5349 case Builtin::BI_InterlockedOr16:
5350 case Builtin::BI_InterlockedOr:
5351 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
5352 case Builtin::BI_InterlockedXor8:
5353 case Builtin::BI_InterlockedXor16:
5354 case Builtin::BI_InterlockedXor:
5355 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
5356
5357 case Builtin::BI_bittest64:
5358 case Builtin::BI_bittest:
5359 case Builtin::BI_bittestandcomplement64:
5360 case Builtin::BI_bittestandcomplement:
5361 case Builtin::BI_bittestandreset64:
5362 case Builtin::BI_bittestandreset:
5363 case Builtin::BI_bittestandset64:
5364 case Builtin::BI_bittestandset:
5365 case Builtin::BI_interlockedbittestandreset:
5366 case Builtin::BI_interlockedbittestandreset64:
5367 case Builtin::BI_interlockedbittestandset64:
5368 case Builtin::BI_interlockedbittestandset:
5369 case Builtin::BI_interlockedbittestandset_acq:
5370 case Builtin::BI_interlockedbittestandset_rel:
5371 case Builtin::BI_interlockedbittestandset_nf:
5372 case Builtin::BI_interlockedbittestandreset_acq:
5373 case Builtin::BI_interlockedbittestandreset_rel:
5374 case Builtin::BI_interlockedbittestandreset_nf:
5375 return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
5376
5377 // These builtins exist to emit regular volatile loads and stores not
5378 // affected by the -fms-volatile setting.
5379 case Builtin::BI__iso_volatile_load8:
5380 case Builtin::BI__iso_volatile_load16:
5381 case Builtin::BI__iso_volatile_load32:
5382 case Builtin::BI__iso_volatile_load64:
5383 return RValue::get(EmitISOVolatileLoad(*this, E));
5384 case Builtin::BI__iso_volatile_store8:
5385 case Builtin::BI__iso_volatile_store16:
5386 case Builtin::BI__iso_volatile_store32:
5387 case Builtin::BI__iso_volatile_store64:
5388 return RValue::get(EmitISOVolatileStore(*this, E));
5389
5390 case Builtin::BI__builtin_ptrauth_sign_constant:
5391 return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
5392
5393 case Builtin::BI__builtin_ptrauth_auth:
5394 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5395 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5396 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5397 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5398 case Builtin::BI__builtin_ptrauth_strip: {
5399 // Emit the arguments.
5401 for (auto argExpr : E->arguments())
5402 Args.push_back(EmitScalarExpr(argExpr));
5403
5404 // Cast the value to intptr_t, saving its original type.
5405 llvm::Type *OrigValueType = Args[0]->getType();
5406 if (OrigValueType->isPointerTy())
5407 Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);
5408
5409 switch (BuiltinID) {
5410 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5411 if (Args[4]->getType()->isPointerTy())
5412 Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);
5413 [[fallthrough]];
5414
5415 case Builtin::BI__builtin_ptrauth_auth:
5416 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5417 if (Args[2]->getType()->isPointerTy())
5418 Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);
5419 break;
5420
5421 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5422 if (Args[1]->getType()->isPointerTy())
5423 Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);
5424 break;
5425
5426 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5427 case Builtin::BI__builtin_ptrauth_strip:
5428 break;
5429 }
5430
5431 // Call the intrinsic.
5432 auto IntrinsicID = [&]() -> unsigned {
5433 switch (BuiltinID) {
5434 case Builtin::BI__builtin_ptrauth_auth:
5435 return llvm::Intrinsic::ptrauth_auth;
5436 case Builtin::BI__builtin_ptrauth_auth_and_resign:
5437 return llvm::Intrinsic::ptrauth_resign;
5438 case Builtin::BI__builtin_ptrauth_blend_discriminator:
5439 return llvm::Intrinsic::ptrauth_blend;
5440 case Builtin::BI__builtin_ptrauth_sign_generic_data:
5441 return llvm::Intrinsic::ptrauth_sign_generic;
5442 case Builtin::BI__builtin_ptrauth_sign_unauthenticated:
5443 return llvm::Intrinsic::ptrauth_sign;
5444 case Builtin::BI__builtin_ptrauth_strip:
5445 return llvm::Intrinsic::ptrauth_strip;
5446 }
5447 llvm_unreachable("bad ptrauth intrinsic");
5448 }();
5449 auto Intrinsic = CGM.getIntrinsic(IntrinsicID);
5450 llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);
5451
5452 if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&
5453 BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&
5454 OrigValueType->isPointerTy()) {
5455 Result = Builder.CreateIntToPtr(Result, OrigValueType);
5456 }
5457 return RValue::get(Result);
5458 }
5459
5460 case Builtin::BI__exception_code:
5461 case Builtin::BI_exception_code:
5463 case Builtin::BI__exception_info:
5464 case Builtin::BI_exception_info:
5466 case Builtin::BI__abnormal_termination:
5467 case Builtin::BI_abnormal_termination:
5469 case Builtin::BI_setjmpex:
5470 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5471 E->getArg(0)->getType()->isPointerType())
5472 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5473 break;
5474 case Builtin::BI_setjmp:
5475 if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&
5476 E->getArg(0)->getType()->isPointerType()) {
5477 if (getTarget().getTriple().getArch() == llvm::Triple::x86)
5478 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
5479 else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
5480 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
5481 return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
5482 }
5483 break;
5484
5485 // C++ std:: builtins.
5486 case Builtin::BImove:
5487 case Builtin::BImove_if_noexcept:
5488 case Builtin::BIforward:
5489 case Builtin::BIforward_like:
5490 case Builtin::BIas_const:
5491 return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));
5492 case Builtin::BI__GetExceptionInfo: {
5493 if (llvm::GlobalVariable *GV =
5495 return RValue::get(GV);
5496 break;
5497 }
5498
5499 case Builtin::BI__fastfail:
5500 return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
5501
5502 case Builtin::BI__builtin_coro_id:
5503 return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
5504 case Builtin::BI__builtin_coro_promise:
5505 return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
5506 case Builtin::BI__builtin_coro_resume:
5507 EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
5508 return RValue::get(nullptr);
5509 case Builtin::BI__builtin_coro_frame:
5510 return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
5511 case Builtin::BI__builtin_coro_noop:
5512 return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
5513 case Builtin::BI__builtin_coro_free:
5514 return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
5515 case Builtin::BI__builtin_coro_destroy:
5516 EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
5517 return RValue::get(nullptr);
5518 case Builtin::BI__builtin_coro_done:
5519 return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
5520 case Builtin::BI__builtin_coro_alloc:
5521 return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
5522 case Builtin::BI__builtin_coro_begin:
5523 return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
5524 case Builtin::BI__builtin_coro_end:
5525 return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
5526 case Builtin::BI__builtin_coro_suspend:
5527 return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
5528 case Builtin::BI__builtin_coro_size:
5529 return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);
5530 case Builtin::BI__builtin_coro_align:
5531 return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);
5532
5533 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
5534 case Builtin::BIread_pipe:
5535 case Builtin::BIwrite_pipe: {
5536 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5537 *Arg1 = EmitScalarExpr(E->getArg(1));
5538 CGOpenCLRuntime OpenCLRT(CGM);
5539 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5540 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5541
5542 // Type of the generic packet parameter.
5543 unsigned GenericAS =
5545 llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);
5546
5547 // Testing which overloaded version we should generate the call for.
5548 if (2U == E->getNumArgs()) {
5549 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
5550 : "__write_pipe_2";
5551 // Creating a generic function type to be able to call with any builtin or
5552 // user defined type.
5553 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
5554 llvm::FunctionType *FTy = llvm::FunctionType::get(
5555 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5556 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
5557 return RValue::get(
5559 {Arg0, BCast, PacketSize, PacketAlign}));
5560 } else {
5561 assert(4 == E->getNumArgs() &&
5562 "Illegal number of parameters to pipe function");
5563 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
5564 : "__write_pipe_4";
5565
5566 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
5567 Int32Ty, Int32Ty};
5568 Value *Arg2 = EmitScalarExpr(E->getArg(2)),
5569 *Arg3 = EmitScalarExpr(E->getArg(3));
5570 llvm::FunctionType *FTy = llvm::FunctionType::get(
5571 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5572 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
5573 // We know the third argument is an integer type, but we may need to cast
5574 // it to i32.
5575 if (Arg2->getType() != Int32Ty)
5576 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
5577 return RValue::get(
5579 {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
5580 }
5581 }
5582 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
5583 // functions
5584 case Builtin::BIreserve_read_pipe:
5585 case Builtin::BIreserve_write_pipe:
5586 case Builtin::BIwork_group_reserve_read_pipe:
5587 case Builtin::BIwork_group_reserve_write_pipe:
5588 case Builtin::BIsub_group_reserve_read_pipe:
5589 case Builtin::BIsub_group_reserve_write_pipe: {
5590 // Composing the mangled name for the function.
5591 const char *Name;
5592 if (BuiltinID == Builtin::BIreserve_read_pipe)
5593 Name = "__reserve_read_pipe";
5594 else if (BuiltinID == Builtin::BIreserve_write_pipe)
5595 Name = "__reserve_write_pipe";
5596 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
5597 Name = "__work_group_reserve_read_pipe";
5598 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
5599 Name = "__work_group_reserve_write_pipe";
5600 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
5601 Name = "__sub_group_reserve_read_pipe";
5602 else
5603 Name = "__sub_group_reserve_write_pipe";
5604
5605 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5606 *Arg1 = EmitScalarExpr(E->getArg(1));
5607 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
5608 CGOpenCLRuntime OpenCLRT(CGM);
5609 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5610 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5611
5612 // Building the generic function prototype.
5613 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
5614 llvm::FunctionType *FTy = llvm::FunctionType::get(
5615 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5616 // We know the second argument is an integer type, but we may need to cast
5617 // it to i32.
5618 if (Arg1->getType() != Int32Ty)
5619 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
5621 {Arg0, Arg1, PacketSize, PacketAlign}));
5622 }
5623 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
5624 // functions
5625 case Builtin::BIcommit_read_pipe:
5626 case Builtin::BIcommit_write_pipe:
5627 case Builtin::BIwork_group_commit_read_pipe:
5628 case Builtin::BIwork_group_commit_write_pipe:
5629 case Builtin::BIsub_group_commit_read_pipe:
5630 case Builtin::BIsub_group_commit_write_pipe: {
5631 const char *Name;
5632 if (BuiltinID == Builtin::BIcommit_read_pipe)
5633 Name = "__commit_read_pipe";
5634 else if (BuiltinID == Builtin::BIcommit_write_pipe)
5635 Name = "__commit_write_pipe";
5636 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
5637 Name = "__work_group_commit_read_pipe";
5638 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
5639 Name = "__work_group_commit_write_pipe";
5640 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
5641 Name = "__sub_group_commit_read_pipe";
5642 else
5643 Name = "__sub_group_commit_write_pipe";
5644
5645 Value *Arg0 = EmitScalarExpr(E->getArg(0)),
5646 *Arg1 = EmitScalarExpr(E->getArg(1));
5647 CGOpenCLRuntime OpenCLRT(CGM);
5648 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5649 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5650
5651 // Building the generic function prototype.
5652 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
5653 llvm::FunctionType *FTy =
5654 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
5655 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5656
5658 {Arg0, Arg1, PacketSize, PacketAlign}));
5659 }
5660 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
5661 case Builtin::BIget_pipe_num_packets:
5662 case Builtin::BIget_pipe_max_packets: {
5663 const char *BaseName;
5664 const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();
5665 if (BuiltinID == Builtin::BIget_pipe_num_packets)
5666 BaseName = "__get_pipe_num_packets";
5667 else
5668 BaseName = "__get_pipe_max_packets";
5669 std::string Name = std::string(BaseName) +
5670 std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
5671
5672 // Building the generic function prototype.
5673 Value *Arg0 = EmitScalarExpr(E->getArg(0));
5674 CGOpenCLRuntime OpenCLRT(CGM);
5675 Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
5676 Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
5677 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
5678 llvm::FunctionType *FTy = llvm::FunctionType::get(
5679 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5680
5682 {Arg0, PacketSize, PacketAlign}));
5683 }
5684
5685 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
5686 case Builtin::BIto_global:
5687 case Builtin::BIto_local:
5688 case Builtin::BIto_private: {
5689 auto Arg0 = EmitScalarExpr(E->getArg(0));
5690 auto NewArgT = llvm::PointerType::get(
5693 auto NewRetT = llvm::PointerType::get(
5697 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
5698 llvm::Value *NewArg;
5699 if (Arg0->getType()->getPointerAddressSpace() !=
5700 NewArgT->getPointerAddressSpace())
5701 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
5702 else
5703 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
5704 auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
5705 auto NewCall =
5706 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
5707 return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
5708 ConvertType(E->getType())));
5709 }
5710
5711 // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
5712 // Table 6.13.17.1 specifies four overload forms of enqueue_kernel.
5713 // The code below expands the builtin call to a call to one of the following
5714 // functions that an OpenCL runtime library will have to provide:
5715 // __enqueue_kernel_basic
5716 // __enqueue_kernel_varargs
5717 // __enqueue_kernel_basic_events
5718 // __enqueue_kernel_events_varargs
5719 case Builtin::BIenqueue_kernel: {
5720 StringRef Name; // Generated function call name
5721 unsigned NumArgs = E->getNumArgs();
5722
5723 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
5724 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5725 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5726
5727 llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
5728 llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
5729 LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
5730 llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);
5731 llvm::Type *RangeTy = NDRangeL.getAddress().getType();
5732
5733 if (NumArgs == 4) {
5734 // The most basic form of the call with parameters:
5735 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
5736 Name = "__enqueue_kernel_basic";
5737 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
5738 GenericVoidPtrTy};
5739 llvm::FunctionType *FTy = llvm::FunctionType::get(
5740 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5741
5742 auto Info =
5743 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5744 llvm::Value *Kernel =
5745 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5746 llvm::Value *Block =
5747 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5748
5749 AttrBuilder B(Builder.getContext());
5750 B.addByValAttr(NDRangeL.getAddress().getElementType());
5751 llvm::AttributeList ByValAttrSet =
5752 llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
5753
5754 auto RTCall =
5755 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
5756 {Queue, Flags, Range, Kernel, Block});
5757 RTCall->setAttributes(ByValAttrSet);
5758 return RValue::get(RTCall);
5759 }
5760 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
5761
5762 // Create a temporary array to hold the sizes of local pointer arguments
5763 // for the block. \p First is the position of the first size argument.
5764 auto CreateArrayForSizeVar = [=](unsigned First)
5765 -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
5766 llvm::APInt ArraySize(32, NumArgs - First);
5768 getContext().getSizeType(), ArraySize, nullptr,
5770 /*IndexTypeQuals=*/0);
5771 auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
5772 llvm::Value *TmpPtr = Tmp.getPointer();
5773 llvm::Value *TmpSize = EmitLifetimeStart(
5774 CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5775 llvm::Value *ElemPtr;
5776 // Each of the following arguments specifies the size of the corresponding
5777 // argument passed to the enqueued block.
5778 auto *Zero = llvm::ConstantInt::get(IntTy, 0);
5779 for (unsigned I = First; I < NumArgs; ++I) {
5780 auto *Index = llvm::ConstantInt::get(IntTy, I - First);
5781 auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,
5782 {Zero, Index});
5783 if (I == First)
5784 ElemPtr = GEP;
5785 auto *V =
5786 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
5788 V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
5789 }
5790 return std::tie(ElemPtr, TmpSize, TmpPtr);
5791 };
5792
5793 // Could have events and/or varargs.
5794 if (E->getArg(3)->getType()->isBlockPointerType()) {
5795 // No events passed, but has variadic arguments.
5796 Name = "__enqueue_kernel_varargs";
5797 auto Info =
5798 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
5799 llvm::Value *Kernel =
5800 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5801 auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5802 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5803 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
5804
5805 // Create a vector of the arguments, as well as a constant value to
5806 // express to the runtime the number of variadic arguments.
5807 llvm::Value *const Args[] = {Queue, Flags,
5808 Range, Kernel,
5809 Block, ConstantInt::get(IntTy, NumArgs - 4),
5810 ElemPtr};
5811 llvm::Type *const ArgTys[] = {
5812 QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
5813 GenericVoidPtrTy, IntTy, ElemPtr->getType()};
5814
5815 llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);
5816 auto Call = RValue::get(
5817 EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
5818 if (TmpSize)
5819 EmitLifetimeEnd(TmpSize, TmpPtr);
5820 return Call;
5821 }
5822 // Any calls now have event arguments passed.
5823 if (NumArgs >= 7) {
5824 llvm::PointerType *PtrTy = llvm::PointerType::get(
5827
5828 llvm::Value *NumEvents =
5829 Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
5830
5831 // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
5832 // to be a null pointer constant (including `0` literal), we can take it
5833 // into account and emit null pointer directly.
5834 llvm::Value *EventWaitList = nullptr;
5835 if (E->getArg(4)->isNullPointerConstant(
5837 EventWaitList = llvm::ConstantPointerNull::get(PtrTy);
5838 } else {
5839 EventWaitList =
5840 E->getArg(4)->getType()->isArrayType()
5841 ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)
5842 : EmitScalarExpr(E->getArg(4));
5843 // Convert to generic address space.
5844 EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);
5845 }
5846 llvm::Value *EventRet = nullptr;
5847 if (E->getArg(5)->isNullPointerConstant(
5849 EventRet = llvm::ConstantPointerNull::get(PtrTy);
5850 } else {
5851 EventRet =
5852 Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);
5853 }
5854
5855 auto Info =
5856 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
5857 llvm::Value *Kernel =
5858 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5859 llvm::Value *Block =
5860 Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5861
5862 std::vector<llvm::Type *> ArgTys = {
5863 QueueTy, Int32Ty, RangeTy, Int32Ty,
5864 PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
5865
5866 std::vector<llvm::Value *> Args = {Queue, Flags, Range,
5867 NumEvents, EventWaitList, EventRet,
5868 Kernel, Block};
5869
5870 if (NumArgs == 7) {
5871 // Has events but no variadics.
5872 Name = "__enqueue_kernel_basic_events";
5873 llvm::FunctionType *FTy = llvm::FunctionType::get(
5874 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5875 return RValue::get(
5878 }
5879 // Has event info and variadics
5880 // Pass the number of variadics to the runtime function too.
5881 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
5882 ArgTys.push_back(Int32Ty);
5883 Name = "__enqueue_kernel_events_varargs";
5884
5885 llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
5886 std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
5887 Args.push_back(ElemPtr);
5888 ArgTys.push_back(ElemPtr->getType());
5889
5890 llvm::FunctionType *FTy = llvm::FunctionType::get(
5891 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
5892 auto Call =
5895 if (TmpSize)
5896 EmitLifetimeEnd(TmpSize, TmpPtr);
5897 return Call;
5898 }
5899 llvm_unreachable("Unexpected enqueue_kernel signature");
5900 }
5901 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
5902 // parameter.
5903 case Builtin::BIget_kernel_work_group_size: {
5904 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5905 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5906 auto Info =
5907 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5908 Value *Kernel =
5909 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5910 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5913 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5914 false),
5915 "__get_kernel_work_group_size_impl"),
5916 {Kernel, Arg}));
5917 }
5918 case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
5919 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5920 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5921 auto Info =
5922 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
5923 Value *Kernel =
5924 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5925 Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5928 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
5929 false),
5930 "__get_kernel_preferred_work_group_size_multiple_impl"),
5931 {Kernel, Arg}));
5932 }
5933 case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
5934 case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
5935 llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(
5936 getContext().getTargetAddressSpace(LangAS::opencl_generic));
5937 LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
5938 llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);
5939 auto Info =
5940 CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
5941 Value *Kernel =
5942 Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);
5943 Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
5944 const char *Name =
5945 BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
5946 ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
5947 : "__get_kernel_sub_group_count_for_ndrange_impl";
5950 llvm::FunctionType::get(
5951 IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
5952 false),
5953 Name),
5954 {NDRange, Kernel, Block}));
5955 }
5956 case Builtin::BI__builtin_store_half:
5957 case Builtin::BI__builtin_store_halff: {
5958 Value *Val = EmitScalarExpr(E->getArg(0));
5960 Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
5961 Builder.CreateStore(HalfVal, Address);
5962 return RValue::get(nullptr);
5963 }
5964 case Builtin::BI__builtin_load_half: {
5966 Value *HalfVal = Builder.CreateLoad(Address);
5967 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
5968 }
5969 case Builtin::BI__builtin_load_halff: {
5971 Value *HalfVal = Builder.CreateLoad(Address);
5972 return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
5973 }
5974 case Builtin::BI__builtin_printf:
5975 case Builtin::BIprintf:
5976 if (getTarget().getTriple().isNVPTX() ||
5977 getTarget().getTriple().isAMDGCN() ||
5978 (getTarget().getTriple().isSPIRV() &&
5979 getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
5980 if (getTarget().getTriple().isNVPTX())
5982 if ((getTarget().getTriple().isAMDGCN() ||
5983 getTarget().getTriple().isSPIRV()) &&
5984 getLangOpts().HIP)
5986 }
5987
5988 break;
5989 case Builtin::BI__builtin_canonicalize:
5990 case Builtin::BI__builtin_canonicalizef:
5991 case Builtin::BI__builtin_canonicalizef16:
5992 case Builtin::BI__builtin_canonicalizel:
5993 return RValue::get(
5994 emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));
5995
5996 case Builtin::BI__builtin_thread_pointer: {
5997 if (!getContext().getTargetInfo().isTLSSupported())
5998 CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
5999 // Fall through - it's already mapped to the intrinsic by ClangBuiltin.
6000 break;
6001 }
6002 case Builtin::BI__builtin_os_log_format:
6003 return emitBuiltinOSLogFormat(*E);
6004
6005 case Builtin::BI__xray_customevent: {
6007 return RValue::getIgnored();
6008
6011 return RValue::getIgnored();
6012
6013 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6014 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
6015 return RValue::getIgnored();
6016
6017 Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
6018 auto FTy = F->getFunctionType();
6019 auto Arg0 = E->getArg(0);
6020 auto Arg0Val = EmitScalarExpr(Arg0);
6021 auto Arg0Ty = Arg0->getType();
6022 auto PTy0 = FTy->getParamType(0);
6023 if (PTy0 != Arg0Val->getType()) {
6024 if (Arg0Ty->isArrayType())
6025 Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);
6026 else
6027 Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
6028 }
6029 auto Arg1 = EmitScalarExpr(E->getArg(1));
6030 auto PTy1 = FTy->getParamType(1);
6031 if (PTy1 != Arg1->getType())
6032 Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
6033 return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
6034 }
6035
6036 case Builtin::BI__xray_typedevent: {
6037 // TODO: There should be a way to always emit events even if the current
6038 // function is not instrumented. Losing events in a stream can cripple
6039 // a trace.
6041 return RValue::getIgnored();
6042
6045 return RValue::getIgnored();
6046
6047 if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
6048 if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
6049 return RValue::getIgnored();
6050
6051 Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
6052 auto FTy = F->getFunctionType();
6053 auto Arg0 = EmitScalarExpr(E->getArg(0));
6054 auto PTy0 = FTy->getParamType(0);
6055 if (PTy0 != Arg0->getType())
6056 Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
6057 auto Arg1 = E->getArg(1);
6058 auto Arg1Val = EmitScalarExpr(Arg1);
6059 auto Arg1Ty = Arg1->getType();
6060 auto PTy1 = FTy->getParamType(1);
6061 if (PTy1 != Arg1Val->getType()) {
6062 if (Arg1Ty->isArrayType())
6063 Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);
6064 else
6065 Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
6066 }
6067 auto Arg2 = EmitScalarExpr(E->getArg(2));
6068 auto PTy2 = FTy->getParamType(2);
6069 if (PTy2 != Arg2->getType())
6070 Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
6071 return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
6072 }
6073
6074 case Builtin::BI__builtin_ms_va_start:
6075 case Builtin::BI__builtin_ms_va_end:
6076 return RValue::get(
6078 BuiltinID == Builtin::BI__builtin_ms_va_start));
6079
6080 case Builtin::BI__builtin_ms_va_copy: {
6081 // Lower this manually. We can't reliably determine whether or not any
6082 // given va_copy() is for a Win64 va_list from the calling convention
6083 // alone, because it's legal to do this from a System V ABI function.
6084 // With opaque pointer types, we won't have enough information in LLVM
6085 // IR to determine this from the argument types, either. Best to do it
6086 // now, while we have enough information.
6087 Address DestAddr = EmitMSVAListRef(E->getArg(0));
6088 Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6089
6090 DestAddr = DestAddr.withElementType(Int8PtrTy);
6091 SrcAddr = SrcAddr.withElementType(Int8PtrTy);
6092
6093 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6094 return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
6095 }
6096
6097 case Builtin::BI__builtin_get_device_side_mangled_name: {
6098 auto Name = CGM.getCUDARuntime().getDeviceSideName(
6099 cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());
6100 auto Str = CGM.GetAddrOfConstantCString(Name, "");
6101 return RValue::get(Str.getPointer());
6102 }
6103 }
6104
6105 // If this is an alias for a lib function (e.g. __builtin_sin), emit
6106 // the call using the normal call path, but using the unmangled
6107 // version of the function name.
6108 if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
6109 return emitLibraryCall(*this, FD, E,
6110 CGM.getBuiltinLibFunction(FD, BuiltinID));
6111
6112 // If this is a predefined lib function (e.g. malloc), emit the call
6113 // using exactly the normal call path.
6114 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
6115 return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));
6116
6117 // Check that a call to a target specific builtin has the correct target
6118 // features.
6119 // This is down here to avoid non-target specific builtins, however, if
6120 // generic builtins start to require generic target features then we
6121 // can move this up to the beginning of the function.
6123
6124 if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
6125 LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
6126
6127 // See if we have a target specific intrinsic.
6128 StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);
6129 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
6130 StringRef Prefix =
6131 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
6132 if (!Prefix.empty()) {
6133 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);
6134 if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&
6135 getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)
6136 IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);
6137 // NOTE we don't need to perform a compatibility flag check here since the
6138 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
6139 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
6140 if (IntrinsicID == Intrinsic::not_intrinsic)
6141 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
6142 }
6143
6144 if (IntrinsicID != Intrinsic::not_intrinsic) {
6146
6147 // Find out if any arguments are required to be integer constant
6148 // expressions.
6149 unsigned ICEArguments = 0;
6151 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6152 assert(Error == ASTContext::GE_None && "Should not codegen an error");
6153
6154 Function *F = CGM.getIntrinsic(IntrinsicID);
6155 llvm::FunctionType *FTy = F->getFunctionType();
6156
6157 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
6158 Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
6159 // If the intrinsic arg type is different from the builtin arg type
6160 // we need to do a bit cast.
6161 llvm::Type *PTy = FTy->getParamType(i);
6162 if (PTy != ArgValue->getType()) {
6163 // XXX - vector of pointers?
6164 if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
6165 if (PtrTy->getAddressSpace() !=
6166 ArgValue->getType()->getPointerAddressSpace()) {
6167 ArgValue = Builder.CreateAddrSpaceCast(
6168 ArgValue, llvm::PointerType::get(getLLVMContext(),
6169 PtrTy->getAddressSpace()));
6170 }
6171 }
6172
6173 // Cast vector type (e.g., v256i32) to x86_amx, this only happen
6174 // in amx intrinsics.
6175 if (PTy->isX86_AMXTy())
6176 ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
6177 {ArgValue->getType()}, {ArgValue});
6178 else
6179 ArgValue = Builder.CreateBitCast(ArgValue, PTy);
6180 }
6181
6182 Args.push_back(ArgValue);
6183 }
6184
6185 Value *V = Builder.CreateCall(F, Args);
6186 QualType BuiltinRetType = E->getType();
6187
6188 llvm::Type *RetTy = VoidTy;
6189 if (!BuiltinRetType->isVoidType())
6190 RetTy = ConvertType(BuiltinRetType);
6191
6192 if (RetTy != V->getType()) {
6193 // XXX - vector of pointers?
6194 if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
6195 if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
6197 V, llvm::PointerType::get(getLLVMContext(),
6198 PtrTy->getAddressSpace()));
6199 }
6200 }
6201
6202 // Cast x86_amx to vector type (e.g., v256i32), this only happen
6203 // in amx intrinsics.
6204 if (V->getType()->isX86_AMXTy())
6205 V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},
6206 {V});
6207 else
6208 V = Builder.CreateBitCast(V, RetTy);
6209 }
6210
6211 if (RetTy->isVoidTy())
6212 return RValue::get(nullptr);
6213
6214 return RValue::get(V);
6215 }
6216
6217 // Some target-specific builtins can have aggregate return values, e.g.
6218 // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
6219 // ReturnValue to be non-null, so that the target-specific emission code can
6220 // always just emit into it.
6222 if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
6223 Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
6224 ReturnValue = ReturnValueSlot(DestPtr, false);
6225 }
6226
6227 // Now see if we can emit a target-specific builtin.
6228 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
6229 switch (EvalKind) {
6230 case TEK_Scalar:
6231 if (V->getType()->isVoidTy())
6232 return RValue::get(nullptr);
6233 return RValue::get(V);
6234 case TEK_Aggregate:
6235 return RValue::getAggregate(ReturnValue.getAddress(),
6236 ReturnValue.isVolatile());
6237 case TEK_Complex:
6238 llvm_unreachable("No current target builtin returns complex");
6239 }
6240 llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
6241 }
6242
6243 // EmitHLSLBuiltinExpr will check getLangOpts().HLSL
6244 if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))
6245 return RValue::get(V);
6246
6247 if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)
6248 return EmitHipStdParUnsupportedBuiltin(this, FD);
6249
6250 ErrorUnsupported(E, "builtin function");
6251
6252 // Unknown builtin, for now just dump it out and return undef.
6253 return GetUndefRValue(E->getType());
6254}
6255
6257 unsigned BuiltinID, const CallExpr *E,
6258 ReturnValueSlot ReturnValue,
6259 llvm::Triple::ArchType Arch) {
6260 // When compiling in HipStdPar mode we have to be conservative in rejecting
6261 // target specific features in the FE, and defer the possible error to the
6262 // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is
6263 // referenced by an accelerator executable function, we emit an error.
6264 // Returning nullptr here leads to the builtin being handled in
6265 // EmitStdParUnsupportedBuiltin.
6266 if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&
6267 Arch != CGF->getTarget().getTriple().getArch())
6268 return nullptr;
6269
6270 switch (Arch) {
6271 case llvm::Triple::arm:
6272 case llvm::Triple::armeb:
6273 case llvm::Triple::thumb:
6274 case llvm::Triple::thumbeb:
6275 return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
6276 case llvm::Triple::aarch64:
6277 case llvm::Triple::aarch64_32:
6278 case llvm::Triple::aarch64_be:
6279 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
6280 case llvm::Triple::bpfeb:
6281 case llvm::Triple::bpfel:
6282 return CGF->EmitBPFBuiltinExpr(BuiltinID, E);
6283 case llvm::Triple::x86:
6284 case llvm::Triple::x86_64:
6285 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
6286 case llvm::Triple::ppc:
6287 case llvm::Triple::ppcle:
6288 case llvm::Triple::ppc64:
6289 case llvm::Triple::ppc64le:
6290 return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
6291 case llvm::Triple::r600:
6292 case llvm::Triple::amdgcn:
6293 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6294 case llvm::Triple::systemz:
6295 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
6296 case llvm::Triple::nvptx:
6297 case llvm::Triple::nvptx64:
6298 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
6299 case llvm::Triple::wasm32:
6300 case llvm::Triple::wasm64:
6301 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
6302 case llvm::Triple::hexagon:
6303 return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
6304 case llvm::Triple::riscv32:
6305 case llvm::Triple::riscv64:
6306 return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);
6307 case llvm::Triple::spirv64:
6308 if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
6309 return nullptr;
6310 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
6311 default:
6312 return nullptr;
6313 }
6314}
6315
6317 const CallExpr *E,
6318 ReturnValueSlot ReturnValue) {
6319 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
6320 assert(getContext().getAuxTargetInfo() && "Missing aux target info");
6322 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
6323 ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
6324 }
6325
6326 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
6327 getTarget().getTriple().getArch());
6328}
6329
6330static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,
6331 NeonTypeFlags TypeFlags,
6332 bool HasLegalHalfType = true,
6333 bool V1Ty = false,
6334 bool AllowBFloatArgsAndRet = true) {
6335 int IsQuad = TypeFlags.isQuad();
6336 switch (TypeFlags.getEltType()) {
6339 return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
6342 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6344 if (AllowBFloatArgsAndRet)
6345 return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));
6346 else
6347 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6349 if (HasLegalHalfType)
6350 return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
6351 else
6352 return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
6354 return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
6357 return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
6359 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
6360 // There is a lot of i128 and f128 API missing.
6361 // so we use v16i8 to represent poly128 and get pattern matched.
6362 return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
6364 return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
6366 return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
6367 }
6368 llvm_unreachable("Unknown vector element type!");
6369}
6370
6371static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
6372 NeonTypeFlags IntTypeFlags) {
6373 int IsQuad = IntTypeFlags.isQuad();
6374 switch (IntTypeFlags.getEltType()) {
6376 return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
6378 return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
6380 return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
6381 default:
6382 llvm_unreachable("Type can't be converted to floating-point!");
6383 }
6384}
6385
6387 const ElementCount &Count) {
6388 Value *SV = llvm::ConstantVector::getSplat(Count, C);
6389 return Builder.CreateShuffleVector(V, V, SV, "lane");
6390}
6391
6393 ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();
6394 return EmitNeonSplat(V, C, EC);
6395}
6396
6398 const char *name,
6399 unsigned shift, bool rightshift) {
6400 unsigned j = 0;
6401 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
6402 ai != ae; ++ai, ++j) {
6403 if (F->isConstrainedFPIntrinsic())
6404 if (ai->getType()->isMetadataTy())
6405 continue;
6406 if (shift > 0 && shift == j)
6407 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
6408 else
6409 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
6410 }
6411
6412 if (F->isConstrainedFPIntrinsic())
6413 return Builder.CreateConstrainedFPCall(F, Ops, name);
6414 else
6415 return Builder.CreateCall(F, Ops, name);
6416}
6417
6419 bool neg) {
6420 int SV = cast<ConstantInt>(V)->getSExtValue();
6421 return ConstantInt::get(Ty, neg ? -SV : SV);
6422}
6423
6424// Right-shift a vector by a constant.
6426 llvm::Type *Ty, bool usgn,
6427 const char *name) {
6428 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6429
6430 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
6431 int EltSize = VTy->getScalarSizeInBits();
6432
6433 Vec = Builder.CreateBitCast(Vec, Ty);
6434
6435 // lshr/ashr are undefined when the shift amount is equal to the vector
6436 // element size.
6437 if (ShiftAmt == EltSize) {
6438 if (usgn) {
6439 // Right-shifting an unsigned value by its size yields 0.
6440 return llvm::ConstantAggregateZero::get(VTy);
6441 } else {
6442 // Right-shifting a signed value by its size is equivalent
6443 // to a shift of size-1.
6444 --ShiftAmt;
6445 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
6446 }
6447 }
6448
6449 Shift = EmitNeonShiftVector(Shift, Ty, false);
6450 if (usgn)
6451 return Builder.CreateLShr(Vec, Shift, name);
6452 else
6453 return Builder.CreateAShr(Vec, Shift, name);
6454}
6455
6456enum {
6457 AddRetType = (1 << 0),
6458 Add1ArgType = (1 << 1),
6459 Add2ArgTypes = (1 << 2),
6460
6463
6465 UnsignedAlts = (1 << 6),
6466
6469
6477
6478namespace {
6479struct ARMVectorIntrinsicInfo {
6480 const char *NameHint;
6481 unsigned BuiltinID;
6482 unsigned LLVMIntrinsic;
6483 unsigned AltLLVMIntrinsic;
6485
6486 bool operator<(unsigned RHSBuiltinID) const {
6487 return BuiltinID < RHSBuiltinID;
6488 }
6489 bool operator<(const ARMVectorIntrinsicInfo &TE) const {
6490 return BuiltinID < TE.BuiltinID;
6491 }
6492};
6493} // end anonymous namespace
6494
6495#define NEONMAP0(NameBase) \
6496 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
6497
6498#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
6499 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6500 Intrinsic::LLVMIntrinsic, 0, TypeModifier }
6501
6502#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
6503 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
6504 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
6505 TypeModifier }
6506
6507static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
6508 NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
6509 NEONMAP0(splat_lane_v),
6510 NEONMAP0(splat_laneq_v),
6511 NEONMAP0(splatq_lane_v),
6512 NEONMAP0(splatq_laneq_v),
6513 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6514 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
6515 NEONMAP1(vabs_v, arm_neon_vabs, 0),
6516 NEONMAP1(vabsq_v, arm_neon_vabs, 0),
6517 NEONMAP0(vadd_v),
6518 NEONMAP0(vaddhn_v),
6519 NEONMAP0(vaddq_v),
6520 NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),
6521 NEONMAP1(vaeseq_u8, arm_neon_aese, 0),
6522 NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),
6523 NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),
6524 NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),
6525 NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),
6526 NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),
6527 NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),
6528 NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),
6529 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
6530 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
6531 NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6532 NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6533 NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6534 NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6535 NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),
6536 NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),
6537 NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),
6538 NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),
6539 NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),
6540 NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),
6541 NEONMAP1(vcage_v, arm_neon_vacge, 0),
6542 NEONMAP1(vcageq_v, arm_neon_vacge, 0),
6543 NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
6544 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
6545 NEONMAP1(vcale_v, arm_neon_vacge, 0),
6546 NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
6547 NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
6548 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
6549 NEONMAP0(vceqz_v),
6550 NEONMAP0(vceqzq_v),
6551 NEONMAP0(vcgez_v),
6552 NEONMAP0(vcgezq_v),
6553 NEONMAP0(vcgtz_v),
6554 NEONMAP0(vcgtzq_v),
6555 NEONMAP0(vclez_v),
6556 NEONMAP0(vclezq_v),
6557 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
6558 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
6559 NEONMAP0(vcltz_v),
6560 NEONMAP0(vcltzq_v),
6561 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6562 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6563 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6564 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6565 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
6566 NEONMAP0(vcvt_f16_s16),
6567 NEONMAP0(vcvt_f16_u16),
6568 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
6569 NEONMAP0(vcvt_f32_v),
6570 NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6571 NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6572 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6573 NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6574 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6575 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6576 NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6577 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6578 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6579 NEONMAP0(vcvt_s16_f16),
6580 NEONMAP0(vcvt_s32_v),
6581 NEONMAP0(vcvt_s64_v),
6582 NEONMAP0(vcvt_u16_f16),
6583 NEONMAP0(vcvt_u32_v),
6584 NEONMAP0(vcvt_u64_v),
6585 NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),
6586 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
6587 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
6588 NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),
6589 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
6590 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
6591 NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),
6592 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
6593 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
6594 NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),
6595 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
6596 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
6597 NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),
6598 NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),
6599 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
6600 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
6601 NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),
6602 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
6603 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
6604 NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),
6605 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
6606 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
6607 NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),
6608 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
6609 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
6610 NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),
6611 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
6612 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
6613 NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),
6614 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
6615 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
6616 NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),
6617 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
6618 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
6619 NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),
6620 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
6621 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
6622 NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),
6623 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
6624 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
6625 NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),
6626 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
6627 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
6628 NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),
6629 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
6630 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
6631 NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),
6632 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
6633 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
6634 NEONMAP0(vcvtq_f16_s16),
6635 NEONMAP0(vcvtq_f16_u16),
6636 NEONMAP0(vcvtq_f32_v),
6637 NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),
6638 NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),
6639 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
6640 NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),
6641 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
6642 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
6643 NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),
6644 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
6645 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
6646 NEONMAP0(vcvtq_s16_f16),
6647 NEONMAP0(vcvtq_s32_v),
6648 NEONMAP0(vcvtq_s64_v),
6649 NEONMAP0(vcvtq_u16_f16),
6650 NEONMAP0(vcvtq_u32_v),
6651 NEONMAP0(vcvtq_u64_v),
6652 NEONMAP1(vdot_s32, arm_neon_sdot, 0),
6653 NEONMAP1(vdot_u32, arm_neon_udot, 0),
6654 NEONMAP1(vdotq_s32, arm_neon_sdot, 0),
6655 NEONMAP1(vdotq_u32, arm_neon_udot, 0),
6656 NEONMAP0(vext_v),
6657 NEONMAP0(vextq_v),
6658 NEONMAP0(vfma_v),
6659 NEONMAP0(vfmaq_v),
6660 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6661 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
6662 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6663 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
6664 NEONMAP0(vld1_dup_v),
6665 NEONMAP1(vld1_v, arm_neon_vld1, 0),
6666 NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
6667 NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
6668 NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
6669 NEONMAP0(vld1q_dup_v),
6670 NEONMAP1(vld1q_v, arm_neon_vld1, 0),
6671 NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
6672 NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
6673 NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
6674 NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
6675 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
6676 NEONMAP1(vld2_v, arm_neon_vld2, 0),
6677 NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
6678 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
6679 NEONMAP1(vld2q_v, arm_neon_vld2, 0),
6680 NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
6681 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
6682 NEONMAP1(vld3_v, arm_neon_vld3, 0),
6683 NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
6684 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
6685 NEONMAP1(vld3q_v, arm_neon_vld3, 0),
6686 NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
6687 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
6688 NEONMAP1(vld4_v, arm_neon_vld4, 0),
6689 NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
6690 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
6691 NEONMAP1(vld4q_v, arm_neon_vld4, 0),
6692 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6693 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
6694 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
6695 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
6696 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6697 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
6698 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
6699 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
6700 NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),
6701 NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),
6702 NEONMAP0(vmovl_v),
6703 NEONMAP0(vmovn_v),
6704 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
6705 NEONMAP0(vmull_v),
6706 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
6707 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6708 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
6709 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
6710 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6711 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
6712 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
6713 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
6714 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
6715 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
6716 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
6717 NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6718 NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),
6719 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),
6720 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),
6721 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
6722 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
6723 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
6724 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
6725 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
6726 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
6727 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
6728 NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),
6729 NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),
6730 NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),
6731 NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),
6732 NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),
6733 NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),
6734 NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),
6735 NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),
6736 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
6737 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
6738 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6739 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
6740 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6741 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6742 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
6743 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
6744 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
6745 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
6746 NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6747 NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),
6748 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
6749 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6750 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
6751 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
6752 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
6753 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6754 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
6755 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
6756 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
6757 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
6758 NEONMAP0(vrndi_v),
6759 NEONMAP0(vrndiq_v),
6760 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
6761 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
6762 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
6763 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
6764 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
6765 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
6766 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
6767 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
6768 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
6769 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6770 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
6771 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6772 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
6773 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6774 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
6775 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
6776 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
6777 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
6778 NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),
6779 NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),
6780 NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),
6781 NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),
6782 NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),
6783 NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),
6784 NEONMAP0(vshl_n_v),
6785 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6786 NEONMAP0(vshll_n_v),
6787 NEONMAP0(vshlq_n_v),
6788 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
6789 NEONMAP0(vshr_n_v),
6790 NEONMAP0(vshrn_n_v),
6791 NEONMAP0(vshrq_n_v),
6792 NEONMAP1(vst1_v, arm_neon_vst1, 0),
6793 NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
6794 NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
6795 NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
6796 NEONMAP1(vst1q_v, arm_neon_vst1, 0),
6797 NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
6798 NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
6799 NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
6800 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
6801 NEONMAP1(vst2_v, arm_neon_vst2, 0),
6802 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
6803 NEONMAP1(vst2q_v, arm_neon_vst2, 0),
6804 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
6805 NEONMAP1(vst3_v, arm_neon_vst3, 0),
6806 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
6807 NEONMAP1(vst3q_v, arm_neon_vst3, 0),
6808 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
6809 NEONMAP1(vst4_v, arm_neon_vst4, 0),
6810 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
6811 NEONMAP1(vst4q_v, arm_neon_vst4, 0),
6812 NEONMAP0(vsubhn_v),
6813 NEONMAP0(vtrn_v),
6814 NEONMAP0(vtrnq_v),
6815 NEONMAP0(vtst_v),
6816 NEONMAP0(vtstq_v),
6817 NEONMAP1(vusdot_s32, arm_neon_usdot, 0),
6818 NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),
6819 NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),
6820 NEONMAP0(vuzp_v),
6821 NEONMAP0(vuzpq_v),
6822 NEONMAP0(vzip_v),
6823 NEONMAP0(vzipq_v)
6824};
6825
6826static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
6827 NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),
6828 NEONMAP0(splat_lane_v),
6829 NEONMAP0(splat_laneq_v),
6830 NEONMAP0(splatq_lane_v),
6831 NEONMAP0(splatq_laneq_v),
6832 NEONMAP1(vabs_v, aarch64_neon_abs, 0),
6833 NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
6834 NEONMAP0(vadd_v),
6835 NEONMAP0(vaddhn_v),
6836 NEONMAP0(vaddq_p128),
6837 NEONMAP0(vaddq_v),
6838 NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),
6839 NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),
6840 NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),
6841 NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),
6842 NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6843 NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6844 NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6845 NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6846 NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6847 NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6848 NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6849 NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),
6850 NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),
6851 NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),
6852 NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),
6853 NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),
6854 NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),
6855 NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6856 NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6857 NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6858 NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6859 NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),
6860 NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),
6861 NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),
6862 NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),
6863 NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),
6864 NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),
6865 NEONMAP1(vcage_v, aarch64_neon_facge, 0),
6866 NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
6867 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
6868 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
6869 NEONMAP1(vcale_v, aarch64_neon_facge, 0),
6870 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
6871 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
6872 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
6873 NEONMAP0(vceqz_v),
6874 NEONMAP0(vceqzq_v),
6875 NEONMAP0(vcgez_v),
6876 NEONMAP0(vcgezq_v),
6877 NEONMAP0(vcgtz_v),
6878 NEONMAP0(vcgtzq_v),
6879 NEONMAP0(vclez_v),
6880 NEONMAP0(vclezq_v),
6881 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
6882 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
6883 NEONMAP0(vcltz_v),
6884 NEONMAP0(vcltzq_v),
6885 NEONMAP1(vclz_v, ctlz, Add1ArgType),
6886 NEONMAP1(vclzq_v, ctlz, Add1ArgType),
6887 NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6888 NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6889 NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6890 NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6891 NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6892 NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6893 NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6894 NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6895 NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),
6896 NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),
6897 NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),
6898 NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),
6899 NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),
6900 NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),
6901 NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),
6902 NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),
6903 NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),
6904 NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),
6905 NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),
6906 NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),
6907 NEONMAP1(vcnt_v, ctpop, Add1ArgType),
6908 NEONMAP1(vcntq_v, ctpop, Add1ArgType),
6909 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
6910 NEONMAP0(vcvt_f16_s16),
6911 NEONMAP0(vcvt_f16_u16),
6912 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
6913 NEONMAP0(vcvt_f32_v),
6914 NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6915 NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6916 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6917 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6918 NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6919 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6920 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6921 NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6922 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6923 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6924 NEONMAP0(vcvtq_f16_s16),
6925 NEONMAP0(vcvtq_f16_u16),
6926 NEONMAP0(vcvtq_f32_v),
6927 NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),
6928 NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),
6929 NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),
6930 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6931 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
6932 NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),
6933 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
6934 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
6935 NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),
6936 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
6937 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
6938 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
6939 NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),
6940 NEONMAP1(vdot_u32, aarch64_neon_udot, 0),
6941 NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),
6942 NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),
6943 NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6944 NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6945 NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6946 NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6947 NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6948 NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6949 NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6950 NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),
6951 NEONMAP0(vext_v),
6952 NEONMAP0(vextq_v),
6953 NEONMAP0(vfma_v),
6954 NEONMAP0(vfmaq_v),
6955 NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),
6956 NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),
6957 NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),
6958 NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),
6959 NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),
6960 NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),
6961 NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),
6962 NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),
6963 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6964 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
6965 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6966 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
6967 NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
6968 NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
6969 NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
6970 NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
6971 NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
6972 NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
6973 NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),
6974 NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),
6975 NEONMAP0(vmovl_v),
6976 NEONMAP0(vmovn_v),
6977 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
6978 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
6979 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
6980 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6981 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
6982 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
6983 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
6984 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
6985 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6986 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
6987 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
6988 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
6989 NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),
6990 NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6991 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
6992 NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),
6993 NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),
6994 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
6995 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
6996 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
6997 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
6998 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
6999 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
7000 NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7001 NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7002 NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),
7003 NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7004 NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7005 NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7006 NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),
7007 NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7008 NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7009 NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7010 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
7011 NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),
7012 NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),
7013 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
7014 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7015 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
7016 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
7017 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7018 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
7019 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
7020 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
7021 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
7022 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7023 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
7024 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
7025 NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),
7026 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7027 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
7028 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
7029 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
7030 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7031 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
7032 NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
7033 NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
7034 NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
7035 NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
7036 NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
7037 NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
7038 NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
7039 NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
7040 NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
7041 NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
7042 NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
7043 NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
7044 NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
7045 NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
7046 NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
7047 NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
7048 NEONMAP0(vrndi_v),
7049 NEONMAP0(vrndiq_v),
7050 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7051 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
7052 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7053 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
7054 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7055 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
7056 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
7057 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
7058 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
7059 NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),
7060 NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),
7061 NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),
7062 NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),
7063 NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),
7064 NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),
7065 NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),
7066 NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),
7067 NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),
7068 NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),
7069 NEONMAP0(vshl_n_v),
7070 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7071 NEONMAP0(vshll_n_v),
7072 NEONMAP0(vshlq_n_v),
7073 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
7074 NEONMAP0(vshr_n_v),
7075 NEONMAP0(vshrn_n_v),
7076 NEONMAP0(vshrq_n_v),
7077 NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),
7078 NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),
7079 NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),
7080 NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),
7081 NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),
7082 NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),
7083 NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),
7084 NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),
7085 NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),
7086 NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
7087 NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
7088 NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
7089 NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
7090 NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
7091 NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
7092 NEONMAP0(vsubhn_v),
7093 NEONMAP0(vtst_v),
7094 NEONMAP0(vtstq_v),
7095 NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),
7096 NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),
7097 NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),
7098 NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),
7099};
7100
7101static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
7102 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
7103 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
7104 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
7105 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7106 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7107 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
7108 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
7109 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7110 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7111 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7112 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
7113 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
7114 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
7115 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
7116 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7117 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7118 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7119 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7120 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7121 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7122 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
7123 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
7124 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
7125 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
7126 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7127 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7128 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7129 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7130 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7131 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7132 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7133 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7134 NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7135 NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7136 NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
7137 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7138 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7139 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7140 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7141 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7142 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7143 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7144 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7145 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7146 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7147 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7148 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7149 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7150 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7151 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7152 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7153 NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7154 NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7155 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
7156 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7157 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7158 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7159 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7160 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7161 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7162 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7163 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7164 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
7165 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
7166 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7167 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7168 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7169 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7170 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7171 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7172 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7173 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7174 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
7175 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
7176 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
7177 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
7178 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
7179 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7180 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
7181 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7182 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
7183 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7184 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
7185 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7186 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
7187 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
7188 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
7189 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7190 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
7191 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
7192 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
7193 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7194 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7195 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
7196 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
7197 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
7198 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
7199 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
7200 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
7201 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
7202 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
7203 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
7204 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
7205 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
7206 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
7207 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7208 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7209 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
7210 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
7211 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
7212 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7213 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
7214 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7215 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
7216 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
7217 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
7218 NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),
7219 NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),
7220 NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),
7221 NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),
7222 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
7223 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
7224 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7225 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7226 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
7227 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
7228 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
7229 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
7230 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
7231 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
7232 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
7233 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
7234 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7235 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7236 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
7237 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
7238 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
7239 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7240 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
7241 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7242 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7243 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7244 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7245 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
7246 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
7247 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7248 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7249 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
7250 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
7251 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
7252 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
7253 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
7254 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
7255 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7256 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
7257 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
7258 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
7259 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
7260 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7261 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7262 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
7263 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
7264 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
7265 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7266 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
7267 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7268 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7269 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
7270 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
7271 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
7272 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
7273 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
7274 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
7275 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
7276 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
7277 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
7278 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
7279 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
7280 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
7281 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
7282 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
7283 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
7284 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
7285 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
7286 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
7287 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
7288 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
7289 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
7290 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
7291 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
7292 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
7293 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7294 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
7295 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
7296 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
7297 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
7298 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
7299 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7300 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
7301 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
7302 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
7303 // FP16 scalar intrinisics go here.
7304 NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
7305 NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7306 NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
7307 NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7308 NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
7309 NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7310 NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
7311 NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7312 NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
7313 NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7314 NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
7315 NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7316 NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
7317 NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7318 NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
7319 NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7320 NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
7321 NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7322 NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
7323 NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7324 NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
7325 NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7326 NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
7327 NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7328 NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
7329 NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7330 NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
7331 NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7332 NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
7333 NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
7334 NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
7335 NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
7336 NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
7337 NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
7338};
7339
7340// Some intrinsics are equivalent for codegen.
7341static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {
7342 { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },
7343 { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },
7344 { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },
7345 { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },
7346 { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },
7347 { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },
7348 { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },
7349 { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },
7350 { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },
7351 { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },
7352 { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },
7353 { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },
7354 { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },
7355 { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },
7356 { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },
7357 { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },
7358 { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },
7359 { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },
7360 { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },
7361 { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },
7362 { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },
7363 { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },
7364 { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },
7365 { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },
7366 { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },
7367 { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },
7368 { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },
7369 { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },
7370 { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },
7371 { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },
7372 { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },
7373 { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },
7374 { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },
7375 { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },
7376 { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },
7377 { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },
7378 { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },
7379 { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },
7380 { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },
7381 { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },
7382 { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },
7383 { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },
7384 { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },
7385 { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },
7386 { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },
7387 { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },
7388 { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },
7389 { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },
7390 { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },
7391 { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },
7392 { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },
7393 { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },
7394 { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },
7395 { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },
7396 { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },
7397 { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },
7398 { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },
7399 { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },
7400 { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },
7401 { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },
7402 { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },
7403 { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },
7404 { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },
7405 { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },
7406 { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },
7407 { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },
7408 { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },
7409 { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },
7410 { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },
7411 { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },
7412 { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },
7413 { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },
7414 { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },
7415 { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },
7416 { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },
7417 { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },
7418 { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },
7419 { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },
7420 { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },
7421 { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },
7422 { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },
7423 { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },
7424 { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },
7425 { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },
7426 { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },
7427 { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },
7428 { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },
7429 { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },
7430 { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },
7431 { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },
7432 { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },
7433 { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },
7434 { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },
7435 { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },
7436 { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },
7437 { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },
7438 { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },
7439 { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },
7440 { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },
7441 { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },
7442 { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },
7443 { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },
7444 { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },
7445 { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },
7446 { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },
7447 { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },
7448 { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },
7449 { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },
7450 { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },
7451 { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },
7452 { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },
7453 { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },
7454 { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },
7455 { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },
7456 { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },
7457 { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },
7458 { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },
7459 { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },
7460 { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },
7461 { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },
7462 { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },
7463 { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },
7464 { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },
7465 { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },
7466 { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },
7467 { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },
7468 { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },
7469 { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },
7470 // The mangling rules cause us to have one ID for each type for vldap1(q)_lane
7471 // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an
7472 // arbitrary one to be handled as tha canonical variation.
7473 { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7474 { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7475 { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },
7476 { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7477 { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7478 { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },
7479 { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7480 { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7481 { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },
7482 { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7483 { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7484 { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },
7485};
7486
7487#undef NEONMAP0
7488#undef NEONMAP1
7489#undef NEONMAP2
7490
7491#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7492 { \
7493 #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7494 TypeModifier \
7495 }
7496
7497#define SVEMAP2(NameBase, TypeModifier) \
7498 { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }
7499static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
7500#define GET_SVE_LLVM_INTRINSIC_MAP
7501#include "clang/Basic/arm_sve_builtin_cg.inc"
7502#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
7503#undef GET_SVE_LLVM_INTRINSIC_MAP
7504};
7505
7506#undef SVEMAP1
7507#undef SVEMAP2
7508
7509#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
7510 { \
7511 #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \
7512 TypeModifier \
7513 }
7514
7515#define SMEMAP2(NameBase, TypeModifier) \
7516 { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }
7517static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {
7518#define GET_SME_LLVM_INTRINSIC_MAP
7519#include "clang/Basic/arm_sme_builtin_cg.inc"
7520#undef GET_SME_LLVM_INTRINSIC_MAP
7521};
7522
7523#undef SMEMAP1
7524#undef SMEMAP2
7525
7527
7532
7533static const ARMVectorIntrinsicInfo *
7535 unsigned BuiltinID, bool &MapProvenSorted) {
7536
7537#ifndef NDEBUG
7538 if (!MapProvenSorted) {
7539 assert(llvm::is_sorted(IntrinsicMap));
7540 MapProvenSorted = true;
7541 }
7542#endif
7543
7544 const ARMVectorIntrinsicInfo *Builtin =
7545 llvm::lower_bound(IntrinsicMap, BuiltinID);
7546
7547 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
7548 return Builtin;
7549
7550 return nullptr;
7551}
7552
7554 unsigned Modifier,
7555 llvm::Type *ArgType,
7556 const CallExpr *E) {
7557 int VectorSize = 0;
7558 if (Modifier & Use64BitVectors)
7559 VectorSize = 64;
7560 else if (Modifier & Use128BitVectors)
7561 VectorSize = 128;
7562
7563 // Return type.
7565 if (Modifier & AddRetType) {
7566 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
7567 if (Modifier & VectorizeRetType)
7568 Ty = llvm::FixedVectorType::get(
7569 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
7570
7571 Tys.push_back(Ty);
7572 }
7573
7574 // Arguments.
7575 if (Modifier & VectorizeArgTypes) {
7576 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
7577 ArgType = llvm::FixedVectorType::get(ArgType, Elts);
7578 }
7579
7580 if (Modifier & (Add1ArgType | Add2ArgTypes))
7581 Tys.push_back(ArgType);
7582
7583 if (Modifier & Add2ArgTypes)
7584 Tys.push_back(ArgType);
7585
7586 if (Modifier & InventFloatType)
7587 Tys.push_back(FloatTy);
7588
7589 return CGM.getIntrinsic(IntrinsicID, Tys);
7590}
7591
7593 CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,
7594 SmallVectorImpl<Value *> &Ops, const CallExpr *E) {
7595 unsigned BuiltinID = SISDInfo.BuiltinID;
7596 unsigned int Int = SISDInfo.LLVMIntrinsic;
7597 unsigned Modifier = SISDInfo.TypeModifier;
7598 const char *s = SISDInfo.NameHint;
7599
7600 switch (BuiltinID) {
7601 case NEON::BI__builtin_neon_vcled_s64:
7602 case NEON::BI__builtin_neon_vcled_u64:
7603 case NEON::BI__builtin_neon_vcles_f32:
7604 case NEON::BI__builtin_neon_vcled_f64:
7605 case NEON::BI__builtin_neon_vcltd_s64:
7606 case NEON::BI__builtin_neon_vcltd_u64:
7607 case NEON::BI__builtin_neon_vclts_f32:
7608 case NEON::BI__builtin_neon_vcltd_f64:
7609 case NEON::BI__builtin_neon_vcales_f32:
7610 case NEON::BI__builtin_neon_vcaled_f64:
7611 case NEON::BI__builtin_neon_vcalts_f32:
7612 case NEON::BI__builtin_neon_vcaltd_f64:
7613 // Only one direction of comparisons actually exist, cmle is actually a cmge
7614 // with swapped operands. The table gives us the right intrinsic but we
7615 // still need to do the swap.
7616 std::swap(Ops[0], Ops[1]);
7617 break;
7618 }
7619
7620 assert(Int && "Generic code assumes a valid intrinsic");
7621
7622 // Determine the type(s) of this overloaded AArch64 intrinsic.
7623 const Expr *Arg = E->getArg(0);
7624 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
7625 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
7626
7627 int j = 0;
7628 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
7629 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
7630 ai != ae; ++ai, ++j) {
7631 llvm::Type *ArgTy = ai->getType();
7632 if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
7633 ArgTy->getPrimitiveSizeInBits())
7634 continue;
7635
7636 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
7637 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
7638 // it before inserting.
7639 Ops[j] = CGF.Builder.CreateTruncOrBitCast(
7640 Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());
7641 Ops[j] =
7642 CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);
7643 }
7644
7645 Value *Result = CGF.EmitNeonCall(F, Ops, s);
7646 llvm::Type *ResultType = CGF.ConvertType(E->getType());
7647 if (ResultType->getPrimitiveSizeInBits().getFixedValue() <
7648 Result->getType()->getPrimitiveSizeInBits().getFixedValue())
7649 return CGF.Builder.CreateExtractElement(Result, C0);
7650
7651 return CGF.Builder.CreateBitCast(Result, ResultType, s);
7652}
7653
7655 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
7656 const char *NameHint, unsigned Modifier, const CallExpr *E,
7657 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
7658 llvm::Triple::ArchType Arch) {
7659 // Get the last argument, which specifies the vector type.
7660 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
7661 std::optional<llvm::APSInt> NeonTypeConst =
7663 if (!NeonTypeConst)
7664 return nullptr;
7665
7666 // Determine the type of this overloaded NEON intrinsic.
7667 NeonTypeFlags Type(NeonTypeConst->getZExtValue());
7668 bool Usgn = Type.isUnsigned();
7669 bool Quad = Type.isQuad();
7670 const bool HasLegalHalfType = getTarget().hasLegalHalfType();
7671 const bool AllowBFloatArgsAndRet =
7672 getTargetHooks().getABIInfo().allowBFloatArgsAndRet();
7673
7674 llvm::FixedVectorType *VTy =
7675 GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);
7676 llvm::Type *Ty = VTy;
7677 if (!Ty)
7678 return nullptr;
7679
7680 auto getAlignmentValue32 = [&](Address addr) -> Value* {
7681 return Builder.getInt32(addr.getAlignment().getQuantity());
7682 };
7683
7684 unsigned Int = LLVMIntrinsic;
7685 if ((Modifier & UnsignedAlts) && !Usgn)
7686 Int = AltLLVMIntrinsic;
7687
7688 switch (BuiltinID) {
7689 default: break;
7690 case NEON::BI__builtin_neon_splat_lane_v:
7691 case NEON::BI__builtin_neon_splat_laneq_v:
7692 case NEON::BI__builtin_neon_splatq_lane_v:
7693 case NEON::BI__builtin_neon_splatq_laneq_v: {
7694 auto NumElements = VTy->getElementCount();
7695 if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)
7696 NumElements = NumElements * 2;
7697 if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)
7698 NumElements = NumElements.divideCoefficientBy(2);
7699
7700 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7701 return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);
7702 }
7703 case NEON::BI__builtin_neon_vpadd_v:
7704 case NEON::BI__builtin_neon_vpaddq_v:
7705 // We don't allow fp/int overloading of intrinsics.
7706 if (VTy->getElementType()->isFloatingPointTy() &&
7707 Int == Intrinsic::aarch64_neon_addp)
7708 Int = Intrinsic::aarch64_neon_faddp;
7709 break;
7710 case NEON::BI__builtin_neon_vabs_v:
7711 case NEON::BI__builtin_neon_vabsq_v:
7712 if (VTy->getElementType()->isFloatingPointTy())
7713 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
7714 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
7715 case NEON::BI__builtin_neon_vadd_v:
7716 case NEON::BI__builtin_neon_vaddq_v: {
7717 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);
7718 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
7719 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
7720 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
7721 return Builder.CreateBitCast(Ops[0], Ty);
7722 }
7723 case NEON::BI__builtin_neon_vaddhn_v: {
7724 llvm::FixedVectorType *SrcTy =
7725 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
7726
7727 // %sum = add <4 x i32> %lhs, %rhs
7728 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
7729 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
7730 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
7731
7732 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
7733 Constant *ShiftAmt =
7734 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
7735 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
7736
7737 // %res = trunc <4 x i32> %high to <4 x i16>
7738 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
7739 }
7740 case NEON::BI__builtin_neon_vcale_v:
7741 case NEON::BI__builtin_neon_vcaleq_v:
7742 case NEON::BI__builtin_neon_vcalt_v:
7743 case NEON::BI__builtin_neon_vcaltq_v:
7744 std::swap(Ops[0], Ops[1]);
7745 [[fallthrough]];
7746 case NEON::BI__builtin_neon_vcage_v:
7747 case NEON::BI__builtin_neon_vcageq_v:
7748 case NEON::BI__builtin_neon_vcagt_v:
7749 case NEON::BI__builtin_neon_vcagtq_v: {
7750 llvm::Type *Ty;
7751 switch (VTy->getScalarSizeInBits()) {
7752 default: llvm_unreachable("unexpected type");
7753 case 32:
7754 Ty = FloatTy;
7755 break;
7756 case 64:
7757 Ty = DoubleTy;
7758 break;
7759 case 16:
7760 Ty = HalfTy;
7761 break;
7762 }
7763 auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
7764 llvm::Type *Tys[] = { VTy, VecFlt };
7765 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7766 return EmitNeonCall(F, Ops, NameHint);
7767 }
7768 case NEON::BI__builtin_neon_vceqz_v:
7769 case NEON::BI__builtin_neon_vceqzq_v:
7770 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
7771 ICmpInst::ICMP_EQ, "vceqz");
7772 case NEON::BI__builtin_neon_vcgez_v:
7773 case NEON::BI__builtin_neon_vcgezq_v:
7774 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
7775 ICmpInst::ICMP_SGE, "vcgez");
7776 case NEON::BI__builtin_neon_vclez_v:
7777 case NEON::BI__builtin_neon_vclezq_v:
7778 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
7779 ICmpInst::ICMP_SLE, "vclez");
7780 case NEON::BI__builtin_neon_vcgtz_v:
7781 case NEON::BI__builtin_neon_vcgtzq_v:
7782 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
7783 ICmpInst::ICMP_SGT, "vcgtz");
7784 case NEON::BI__builtin_neon_vcltz_v:
7785 case NEON::BI__builtin_neon_vcltzq_v:
7786 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
7787 ICmpInst::ICMP_SLT, "vcltz");
7788 case NEON::BI__builtin_neon_vclz_v:
7789 case NEON::BI__builtin_neon_vclzq_v:
7790 // We generate target-independent intrinsic, which needs a second argument
7791 // for whether or not clz of zero is undefined; on ARM it isn't.
7792 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
7793 break;
7794 case NEON::BI__builtin_neon_vcvt_f32_v:
7795 case NEON::BI__builtin_neon_vcvtq_f32_v:
7796 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7797 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
7798 HasLegalHalfType);
7799 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7800 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7801 case NEON::BI__builtin_neon_vcvt_f16_s16:
7802 case NEON::BI__builtin_neon_vcvt_f16_u16:
7803 case NEON::BI__builtin_neon_vcvtq_f16_s16:
7804 case NEON::BI__builtin_neon_vcvtq_f16_u16:
7805 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7806 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
7807 HasLegalHalfType);
7808 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
7809 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
7810 case NEON::BI__builtin_neon_vcvt_n_f16_s16:
7811 case NEON::BI__builtin_neon_vcvt_n_f16_u16:
7812 case NEON::BI__builtin_neon_vcvtq_n_f16_s16:
7813 case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {
7814 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7815 Function *F = CGM.getIntrinsic(Int, Tys);
7816 return EmitNeonCall(F, Ops, "vcvt_n");
7817 }
7818 case NEON::BI__builtin_neon_vcvt_n_f32_v:
7819 case NEON::BI__builtin_neon_vcvt_n_f64_v:
7820 case NEON::BI__builtin_neon_vcvtq_n_f32_v:
7821 case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
7822 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
7823 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
7824 Function *F = CGM.getIntrinsic(Int, Tys);
7825 return EmitNeonCall(F, Ops, "vcvt_n");
7826 }
7827 case NEON::BI__builtin_neon_vcvt_n_s16_f16:
7828 case NEON::BI__builtin_neon_vcvt_n_s32_v:
7829 case NEON::BI__builtin_neon_vcvt_n_u16_f16:
7830 case NEON::BI__builtin_neon_vcvt_n_u32_v:
7831 case NEON::BI__builtin_neon_vcvt_n_s64_v:
7832 case NEON::BI__builtin_neon_vcvt_n_u64_v:
7833 case NEON::BI__builtin_neon_vcvtq_n_s16_f16:
7834 case NEON::BI__builtin_neon_vcvtq_n_s32_v:
7835 case NEON::BI__builtin_neon_vcvtq_n_u16_f16:
7836 case NEON::BI__builtin_neon_vcvtq_n_u32_v:
7837 case NEON::BI__builtin_neon_vcvtq_n_s64_v:
7838 case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
7839 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7840 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7841 return EmitNeonCall(F, Ops, "vcvt_n");
7842 }
7843 case NEON::BI__builtin_neon_vcvt_s32_v:
7844 case NEON::BI__builtin_neon_vcvt_u32_v:
7845 case NEON::BI__builtin_neon_vcvt_s64_v:
7846 case NEON::BI__builtin_neon_vcvt_u64_v:
7847 case NEON::BI__builtin_neon_vcvt_s16_f16:
7848 case NEON::BI__builtin_neon_vcvt_u16_f16:
7849 case NEON::BI__builtin_neon_vcvtq_s32_v:
7850 case NEON::BI__builtin_neon_vcvtq_u32_v:
7851 case NEON::BI__builtin_neon_vcvtq_s64_v:
7852 case NEON::BI__builtin_neon_vcvtq_u64_v:
7853 case NEON::BI__builtin_neon_vcvtq_s16_f16:
7854 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
7855 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
7856 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
7857 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
7858 }
7859 case NEON::BI__builtin_neon_vcvta_s16_f16:
7860 case NEON::BI__builtin_neon_vcvta_s32_v:
7861 case NEON::BI__builtin_neon_vcvta_s64_v:
7862 case NEON::BI__builtin_neon_vcvta_u16_f16:
7863 case NEON::BI__builtin_neon_vcvta_u32_v:
7864 case NEON::BI__builtin_neon_vcvta_u64_v:
7865 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
7866 case NEON::BI__builtin_neon_vcvtaq_s32_v:
7867 case NEON::BI__builtin_neon_vcvtaq_s64_v:
7868 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
7869 case NEON::BI__builtin_neon_vcvtaq_u32_v:
7870 case NEON::BI__builtin_neon_vcvtaq_u64_v:
7871 case NEON::BI__builtin_neon_vcvtn_s16_f16:
7872 case NEON::BI__builtin_neon_vcvtn_s32_v:
7873 case NEON::BI__builtin_neon_vcvtn_s64_v:
7874 case NEON::BI__builtin_neon_vcvtn_u16_f16:
7875 case NEON::BI__builtin_neon_vcvtn_u32_v:
7876 case NEON::BI__builtin_neon_vcvtn_u64_v:
7877 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
7878 case NEON::BI__builtin_neon_vcvtnq_s32_v:
7879 case NEON::BI__builtin_neon_vcvtnq_s64_v:
7880 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
7881 case NEON::BI__builtin_neon_vcvtnq_u32_v:
7882 case NEON::BI__builtin_neon_vcvtnq_u64_v:
7883 case NEON::BI__builtin_neon_vcvtp_s16_f16:
7884 case NEON::BI__builtin_neon_vcvtp_s32_v:
7885 case NEON::BI__builtin_neon_vcvtp_s64_v:
7886 case NEON::BI__builtin_neon_vcvtp_u16_f16:
7887 case NEON::BI__builtin_neon_vcvtp_u32_v:
7888 case NEON::BI__builtin_neon_vcvtp_u64_v:
7889 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
7890 case NEON::BI__builtin_neon_vcvtpq_s32_v:
7891 case NEON::BI__builtin_neon_vcvtpq_s64_v:
7892 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
7893 case NEON::BI__builtin_neon_vcvtpq_u32_v:
7894 case NEON::BI__builtin_neon_vcvtpq_u64_v:
7895 case NEON::BI__builtin_neon_vcvtm_s16_f16:
7896 case NEON::BI__builtin_neon_vcvtm_s32_v:
7897 case NEON::BI__builtin_neon_vcvtm_s64_v:
7898 case NEON::BI__builtin_neon_vcvtm_u16_f16:
7899 case NEON::BI__builtin_neon_vcvtm_u32_v:
7900 case NEON::BI__builtin_neon_vcvtm_u64_v:
7901 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
7902 case NEON::BI__builtin_neon_vcvtmq_s32_v:
7903 case NEON::BI__builtin_neon_vcvtmq_s64_v:
7904 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
7905 case NEON::BI__builtin_neon_vcvtmq_u32_v:
7906 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
7907 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
7908 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7909 }
7910 case NEON::BI__builtin_neon_vcvtx_f32_v: {
7911 llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};
7912 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
7913
7914 }
7915 case NEON::BI__builtin_neon_vext_v:
7916 case NEON::BI__builtin_neon_vextq_v: {
7917 int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
7918 SmallVector<int, 16> Indices;
7919 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7920 Indices.push_back(i+CV);
7921
7922 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7923 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7924 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
7925 }
7926 case NEON::BI__builtin_neon_vfma_v:
7927 case NEON::BI__builtin_neon_vfmaq_v: {
7928 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
7929 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7930 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7931
7932 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7934 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
7935 {Ops[1], Ops[2], Ops[0]});
7936 }
7937 case NEON::BI__builtin_neon_vld1_v:
7938 case NEON::BI__builtin_neon_vld1q_v: {
7939 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7940 Ops.push_back(getAlignmentValue32(PtrOp0));
7941 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
7942 }
7943 case NEON::BI__builtin_neon_vld1_x2_v:
7944 case NEON::BI__builtin_neon_vld1q_x2_v:
7945 case NEON::BI__builtin_neon_vld1_x3_v:
7946 case NEON::BI__builtin_neon_vld1q_x3_v:
7947 case NEON::BI__builtin_neon_vld1_x4_v:
7948 case NEON::BI__builtin_neon_vld1q_x4_v: {
7949 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
7950 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7951 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
7952 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7953 }
7954 case NEON::BI__builtin_neon_vld2_v:
7955 case NEON::BI__builtin_neon_vld2q_v:
7956 case NEON::BI__builtin_neon_vld3_v:
7957 case NEON::BI__builtin_neon_vld3q_v:
7958 case NEON::BI__builtin_neon_vld4_v:
7959 case NEON::BI__builtin_neon_vld4q_v:
7960 case NEON::BI__builtin_neon_vld2_dup_v:
7961 case NEON::BI__builtin_neon_vld2q_dup_v:
7962 case NEON::BI__builtin_neon_vld3_dup_v:
7963 case NEON::BI__builtin_neon_vld3q_dup_v:
7964 case NEON::BI__builtin_neon_vld4_dup_v:
7965 case NEON::BI__builtin_neon_vld4q_dup_v: {
7966 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7967 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7968 Value *Align = getAlignmentValue32(PtrOp1);
7969 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
7970 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7971 }
7972 case NEON::BI__builtin_neon_vld1_dup_v:
7973 case NEON::BI__builtin_neon_vld1q_dup_v: {
7974 Value *V = PoisonValue::get(Ty);
7975 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
7976 LoadInst *Ld = Builder.CreateLoad(PtrOp0);
7977 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
7978 Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
7979 return EmitNeonSplat(Ops[0], CI);
7980 }
7981 case NEON::BI__builtin_neon_vld2_lane_v:
7982 case NEON::BI__builtin_neon_vld2q_lane_v:
7983 case NEON::BI__builtin_neon_vld3_lane_v:
7984 case NEON::BI__builtin_neon_vld3q_lane_v:
7985 case NEON::BI__builtin_neon_vld4_lane_v:
7986 case NEON::BI__builtin_neon_vld4q_lane_v: {
7987 llvm::Type *Tys[] = {Ty, Int8PtrTy};
7988 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
7989 for (unsigned I = 2; I < Ops.size() - 1; ++I)
7990 Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
7991 Ops.push_back(getAlignmentValue32(PtrOp1));
7992 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);
7993 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7994 }
7995 case NEON::BI__builtin_neon_vmovl_v: {
7996 llvm::FixedVectorType *DTy =
7997 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
7998 Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
7999 if (Usgn)
8000 return Builder.CreateZExt(Ops[0], Ty, "vmovl");
8001 return Builder.CreateSExt(Ops[0], Ty, "vmovl");
8002 }
8003 case NEON::BI__builtin_neon_vmovn_v: {
8004 llvm::FixedVectorType *QTy =
8005 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8006 Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
8007 return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
8008 }
8009 case NEON::BI__builtin_neon_vmull_v:
8010 // FIXME: the integer vmull operations could be emitted in terms of pure
8011 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
8012 // hoisting the exts outside loops. Until global ISel comes along that can
8013 // see through such movement this leads to bad CodeGen. So we need an
8014 // intrinsic for now.
8015 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
8016 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
8017 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8018 case NEON::BI__builtin_neon_vpadal_v:
8019 case NEON::BI__builtin_neon_vpadalq_v: {
8020 // The source operand type has twice as many elements of half the size.
8021 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8022 llvm::Type *EltTy =
8023 llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8024 auto *NarrowTy =
8025 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8026 llvm::Type *Tys[2] = { Ty, NarrowTy };
8027 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8028 }
8029 case NEON::BI__builtin_neon_vpaddl_v:
8030 case NEON::BI__builtin_neon_vpaddlq_v: {
8031 // The source operand type has twice as many elements of half the size.
8032 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
8033 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
8034 auto *NarrowTy =
8035 llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
8036 llvm::Type *Tys[2] = { Ty, NarrowTy };
8037 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
8038 }
8039 case NEON::BI__builtin_neon_vqdmlal_v:
8040 case NEON::BI__builtin_neon_vqdmlsl_v: {
8041 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
8042 Ops[1] =
8043 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
8044 Ops.resize(2);
8045 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
8046 }
8047 case NEON::BI__builtin_neon_vqdmulhq_lane_v:
8048 case NEON::BI__builtin_neon_vqdmulh_lane_v:
8049 case NEON::BI__builtin_neon_vqrdmulhq_lane_v:
8050 case NEON::BI__builtin_neon_vqrdmulh_lane_v: {
8051 auto *RTy = cast<llvm::FixedVectorType>(Ty);
8052 if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
8053 BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
8054 RTy = llvm::FixedVectorType::get(RTy->getElementType(),
8055 RTy->getNumElements() * 2);
8056 llvm::Type *Tys[2] = {
8057 RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8058 /*isQuad*/ false))};
8059 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8060 }
8061 case NEON::BI__builtin_neon_vqdmulhq_laneq_v:
8062 case NEON::BI__builtin_neon_vqdmulh_laneq_v:
8063 case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:
8064 case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {
8065 llvm::Type *Tys[2] = {
8066 Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
8067 /*isQuad*/ true))};
8068 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
8069 }
8070 case NEON::BI__builtin_neon_vqshl_n_v:
8071 case NEON::BI__builtin_neon_vqshlq_n_v:
8072 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
8073 1, false);
8074 case NEON::BI__builtin_neon_vqshlu_n_v:
8075 case NEON::BI__builtin_neon_vqshluq_n_v:
8076 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
8077 1, false);
8078 case NEON::BI__builtin_neon_vrecpe_v:
8079 case NEON::BI__builtin_neon_vrecpeq_v:
8080 case NEON::BI__builtin_neon_vrsqrte_v:
8081 case NEON::BI__builtin_neon_vrsqrteq_v:
8082 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
8083 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8084 case NEON::BI__builtin_neon_vrndi_v:
8085 case NEON::BI__builtin_neon_vrndiq_v:
8086 Int = Builder.getIsFPConstrained()
8087 ? Intrinsic::experimental_constrained_nearbyint
8088 : Intrinsic::nearbyint;
8089 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
8090 case NEON::BI__builtin_neon_vrshr_n_v:
8091 case NEON::BI__builtin_neon_vrshrq_n_v:
8092 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
8093 1, true);
8094 case NEON::BI__builtin_neon_vsha512hq_u64:
8095 case NEON::BI__builtin_neon_vsha512h2q_u64:
8096 case NEON::BI__builtin_neon_vsha512su0q_u64:
8097 case NEON::BI__builtin_neon_vsha512su1q_u64: {
8098 Function *F = CGM.getIntrinsic(Int);
8099 return EmitNeonCall(F, Ops, "");
8100 }
8101 case NEON::BI__builtin_neon_vshl_n_v:
8102 case NEON::BI__builtin_neon_vshlq_n_v:
8103 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
8104 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
8105 "vshl_n");
8106 case NEON::BI__builtin_neon_vshll_n_v: {
8107 llvm::FixedVectorType *SrcTy =
8108 llvm::FixedVectorType::getTruncatedElementVectorType(VTy);
8109 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8110 if (Usgn)
8111 Ops[0] = Builder.CreateZExt(Ops[0], VTy);
8112 else
8113 Ops[0] = Builder.CreateSExt(Ops[0], VTy);
8114 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
8115 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
8116 }
8117 case NEON::BI__builtin_neon_vshrn_n_v: {
8118 llvm::FixedVectorType *SrcTy =
8119 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8120 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8121 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
8122 if (Usgn)
8123 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
8124 else
8125 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
8126 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
8127 }
8128 case NEON::BI__builtin_neon_vshr_n_v:
8129 case NEON::BI__builtin_neon_vshrq_n_v:
8130 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
8131 case NEON::BI__builtin_neon_vst1_v:
8132 case NEON::BI__builtin_neon_vst1q_v:
8133 case NEON::BI__builtin_neon_vst2_v:
8134 case NEON::BI__builtin_neon_vst2q_v:
8135 case NEON::BI__builtin_neon_vst3_v:
8136 case NEON::BI__builtin_neon_vst3q_v:
8137 case NEON::BI__builtin_neon_vst4_v:
8138 case NEON::BI__builtin_neon_vst4q_v:
8139 case NEON::BI__builtin_neon_vst2_lane_v:
8140 case NEON::BI__builtin_neon_vst2q_lane_v:
8141 case NEON::BI__builtin_neon_vst3_lane_v:
8142 case NEON::BI__builtin_neon_vst3q_lane_v:
8143 case NEON::BI__builtin_neon_vst4_lane_v:
8144 case NEON::BI__builtin_neon_vst4q_lane_v: {
8145 llvm::Type *Tys[] = {Int8PtrTy, Ty};
8146 Ops.push_back(getAlignmentValue32(PtrOp0));
8147 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
8148 }
8149 case NEON::BI__builtin_neon_vsm3partw1q_u32:
8150 case NEON::BI__builtin_neon_vsm3partw2q_u32:
8151 case NEON::BI__builtin_neon_vsm3ss1q_u32:
8152 case NEON::BI__builtin_neon_vsm4ekeyq_u32:
8153 case NEON::BI__builtin_neon_vsm4eq_u32: {
8154 Function *F = CGM.getIntrinsic(Int);
8155 return EmitNeonCall(F, Ops, "");
8156 }
8157 case NEON::BI__builtin_neon_vsm3tt1aq_u32:
8158 case NEON::BI__builtin_neon_vsm3tt1bq_u32:
8159 case NEON::BI__builtin_neon_vsm3tt2aq_u32:
8160 case NEON::BI__builtin_neon_vsm3tt2bq_u32: {
8161 Function *F = CGM.getIntrinsic(Int);
8162 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
8163 return EmitNeonCall(F, Ops, "");
8164 }
8165 case NEON::BI__builtin_neon_vst1_x2_v:
8166 case NEON::BI__builtin_neon_vst1q_x2_v:
8167 case NEON::BI__builtin_neon_vst1_x3_v:
8168 case NEON::BI__builtin_neon_vst1q_x3_v:
8169 case NEON::BI__builtin_neon_vst1_x4_v:
8170 case NEON::BI__builtin_neon_vst1q_x4_v: {
8171 // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
8172 // in AArch64 it comes last. We may want to stick to one or another.
8173 if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||
8174 Arch == llvm::Triple::aarch64_32) {
8175 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
8176 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
8177 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8178 }
8179 llvm::Type *Tys[2] = {UnqualPtrTy, VTy};
8180 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
8181 }
8182 case NEON::BI__builtin_neon_vsubhn_v: {
8183 llvm::FixedVectorType *SrcTy =
8184 llvm::FixedVectorType::getExtendedElementVectorType(VTy);
8185
8186 // %sum = add <4 x i32> %lhs, %rhs
8187 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
8188 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
8189 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
8190
8191 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
8192 Constant *ShiftAmt =
8193 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
8194 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
8195
8196 // %res = trunc <4 x i32> %high to <4 x i16>
8197 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
8198 }
8199 case NEON::BI__builtin_neon_vtrn_v:
8200 case NEON::BI__builtin_neon_vtrnq_v: {
8201 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8202 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8203 Value *SV = nullptr;
8204
8205 for (unsigned vi = 0; vi != 2; ++vi) {
8206 SmallVector<int, 16> Indices;
8207 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8208 Indices.push_back(i+vi);
8209 Indices.push_back(i+e+vi);
8210 }
8211 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8212 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
8213 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8214 }
8215 return SV;
8216 }
8217 case NEON::BI__builtin_neon_vtst_v:
8218 case NEON::BI__builtin_neon_vtstq_v: {
8219 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8220 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8221 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
8222 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
8223 ConstantAggregateZero::get(Ty));
8224 return Builder.CreateSExt(Ops[0], Ty, "vtst");
8225 }
8226 case NEON::BI__builtin_neon_vuzp_v:
8227 case NEON::BI__builtin_neon_vuzpq_v: {
8228 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8229 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8230 Value *SV = nullptr;
8231
8232 for (unsigned vi = 0; vi != 2; ++vi) {
8233 SmallVector<int, 16> Indices;
8234 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
8235 Indices.push_back(2*i+vi);
8236
8237 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8238 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
8239 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8240 }
8241 return SV;
8242 }
8243 case NEON::BI__builtin_neon_vxarq_u64: {
8244 Function *F = CGM.getIntrinsic(Int);
8245 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
8246 return EmitNeonCall(F, Ops, "");
8247 }
8248 case NEON::BI__builtin_neon_vzip_v:
8249 case NEON::BI__builtin_neon_vzipq_v: {
8250 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8251 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8252 Value *SV = nullptr;
8253
8254 for (unsigned vi = 0; vi != 2; ++vi) {
8255 SmallVector<int, 16> Indices;
8256 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
8257 Indices.push_back((i + vi*e) >> 1);
8258 Indices.push_back(((i + vi*e) >> 1)+e);
8259 }
8260 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
8261 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
8262 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
8263 }
8264 return SV;
8265 }
8266 case NEON::BI__builtin_neon_vdot_s32:
8267 case NEON::BI__builtin_neon_vdot_u32:
8268 case NEON::BI__builtin_neon_vdotq_s32:
8269 case NEON::BI__builtin_neon_vdotq_u32: {
8270 auto *InputTy =
8271 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8272 llvm::Type *Tys[2] = { Ty, InputTy };
8273 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
8274 }
8275 case NEON::BI__builtin_neon_vfmlal_low_f16:
8276 case NEON::BI__builtin_neon_vfmlalq_low_f16: {
8277 auto *InputTy =
8278 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8279 llvm::Type *Tys[2] = { Ty, InputTy };
8280 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
8281 }
8282 case NEON::BI__builtin_neon_vfmlsl_low_f16:
8283 case NEON::BI__builtin_neon_vfmlslq_low_f16: {
8284 auto *InputTy =
8285 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8286 llvm::Type *Tys[2] = { Ty, InputTy };
8287 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
8288 }
8289 case NEON::BI__builtin_neon_vfmlal_high_f16:
8290 case NEON::BI__builtin_neon_vfmlalq_high_f16: {
8291 auto *InputTy =
8292 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8293 llvm::Type *Tys[2] = { Ty, InputTy };
8294 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
8295 }
8296 case NEON::BI__builtin_neon_vfmlsl_high_f16:
8297 case NEON::BI__builtin_neon_vfmlslq_high_f16: {
8298 auto *InputTy =
8299 llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
8300 llvm::Type *Tys[2] = { Ty, InputTy };
8301 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
8302 }
8303 case NEON::BI__builtin_neon_vmmlaq_s32:
8304 case NEON::BI__builtin_neon_vmmlaq_u32: {
8305 auto *InputTy =
8306 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8307 llvm::Type *Tys[2] = { Ty, InputTy };
8308 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");
8309 }
8310 case NEON::BI__builtin_neon_vusmmlaq_s32: {
8311 auto *InputTy =
8312 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8313 llvm::Type *Tys[2] = { Ty, InputTy };
8314 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
8315 }
8316 case NEON::BI__builtin_neon_vusdot_s32:
8317 case NEON::BI__builtin_neon_vusdotq_s32: {
8318 auto *InputTy =
8319 llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
8320 llvm::Type *Tys[2] = { Ty, InputTy };
8321 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
8322 }
8323 case NEON::BI__builtin_neon_vbfdot_f32:
8324 case NEON::BI__builtin_neon_vbfdotq_f32: {
8325 llvm::Type *InputTy =
8326 llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);
8327 llvm::Type *Tys[2] = { Ty, InputTy };
8328 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");
8329 }
8330 case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {
8331 llvm::Type *Tys[1] = { Ty };
8332 Function *F = CGM.getIntrinsic(Int, Tys);
8333 return EmitNeonCall(F, Ops, "vcvtfp2bf");
8334 }
8335
8336 }
8337
8338 assert(Int && "Expected valid intrinsic number");
8339
8340 // Determine the type(s) of this overloaded AArch64 intrinsic.
8341 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
8342
8343 Value *Result = EmitNeonCall(F, Ops, NameHint);
8344 llvm::Type *ResultType = ConvertType(E->getType());
8345 // AArch64 intrinsic one-element vector type cast to
8346 // scalar type expected by the builtin
8347 return Builder.CreateBitCast(Result, ResultType, NameHint);
8348}
8349
8351 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
8352 const CmpInst::Predicate Ip, const Twine &Name) {
8353 llvm::Type *OTy = Op->getType();
8354
8355 // FIXME: this is utterly horrific. We should not be looking at previous
8356 // codegen context to find out what needs doing. Unfortunately TableGen
8357 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
8358 // (etc).
8359 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
8360 OTy = BI->getOperand(0)->getType();
8361
8362 Op = Builder.CreateBitCast(Op, OTy);
8363 if (OTy->getScalarType()->isFloatingPointTy()) {
8364 if (Fp == CmpInst::FCMP_OEQ)
8365 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
8366 else
8367 Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));
8368 } else {
8369 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
8370 }
8371 return Builder.CreateSExt(Op, Ty, Name);
8372}
8373
8375 Value *ExtOp, Value *IndexOp,
8376 llvm::Type *ResTy, unsigned IntID,
8377 const char *Name) {
8379 if (ExtOp)
8380 TblOps.push_back(ExtOp);
8381
8382 // Build a vector containing sequential number like (0, 1, 2, ..., 15)
8383 SmallVector<int, 16> Indices;
8384 auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());
8385 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
8386 Indices.push_back(2*i);
8387 Indices.push_back(2*i+1);
8388 }
8389
8390 int PairPos = 0, End = Ops.size() - 1;
8391 while (PairPos < End) {
8392 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8393 Ops[PairPos+1], Indices,
8394 Name));
8395 PairPos += 2;
8396 }
8397
8398 // If there's an odd number of 64-bit lookup table, fill the high 64-bit
8399 // of the 128-bit lookup table with zero.
8400 if (PairPos == End) {
8401 Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
8402 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
8403 ZeroTbl, Indices, Name));
8404 }
8405
8406 Function *TblF;
8407 TblOps.push_back(IndexOp);
8408 TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
8409
8410 return CGF.EmitNeonCall(TblF, TblOps, Name);
8411}
8412
8413Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
8414 unsigned Value;
8415 switch (BuiltinID) {
8416 default:
8417 return nullptr;
8418 case clang::ARM::BI__builtin_arm_nop:
8419 Value = 0;
8420 break;
8421 case clang::ARM::BI__builtin_arm_yield:
8422 case clang::ARM::BI__yield:
8423 Value = 1;
8424 break;
8425 case clang::ARM::BI__builtin_arm_wfe:
8426 case clang::ARM::BI__wfe:
8427 Value = 2;
8428 break;
8429 case clang::ARM::BI__builtin_arm_wfi:
8430 case clang::ARM::BI__wfi:
8431 Value = 3;
8432 break;
8433 case clang::ARM::BI__builtin_arm_sev:
8434 case clang::ARM::BI__sev:
8435 Value = 4;
8436 break;
8437 case clang::ARM::BI__builtin_arm_sevl:
8438 case clang::ARM::BI__sevl:
8439 Value = 5;
8440 break;
8441 }
8442
8443 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
8444 llvm::ConstantInt::get(Int32Ty, Value));
8445}
8446
8451};
8452
8453// Generates the IR for __builtin_read_exec_*.
8454// Lowers the builtin to amdgcn_ballot intrinsic.
8456 llvm::Type *RegisterType,
8457 llvm::Type *ValueType, bool isExecHi) {
8458 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8459 CodeGen::CodeGenModule &CGM = CGF.CGM;
8460
8461 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});
8462 llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});
8463
8464 if (isExecHi) {
8465 Value *Rt2 = Builder.CreateLShr(Call, 32);
8466 Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);
8467 return Rt2;
8468 }
8469
8470 return Call;
8471}
8472
8473// Generates the IR for the read/write special register builtin,
8474// ValueType is the type of the value that is to be written or read,
8475// RegisterType is the type of the register being written to or read from.
8477 const CallExpr *E,
8478 llvm::Type *RegisterType,
8479 llvm::Type *ValueType,
8480 SpecialRegisterAccessKind AccessKind,
8481 StringRef SysReg = "") {
8482 // write and register intrinsics only support 32, 64 and 128 bit operations.
8483 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||
8484 RegisterType->isIntegerTy(128)) &&
8485 "Unsupported size for register.");
8486
8487 CodeGen::CGBuilderTy &Builder = CGF.Builder;
8488 CodeGen::CodeGenModule &CGM = CGF.CGM;
8489 LLVMContext &Context = CGM.getLLVMContext();
8490
8491 if (SysReg.empty()) {
8492 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
8493 SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
8494 }
8495
8496 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
8497 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
8498 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
8499
8500 llvm::Type *Types[] = { RegisterType };
8501
8502 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
8503 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
8504 && "Can't fit 64-bit value in 32-bit register");
8505
8506 if (AccessKind != Write) {
8507 assert(AccessKind == NormalRead || AccessKind == VolatileRead);
8508 llvm::Function *F = CGM.getIntrinsic(
8509 AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
8510 : llvm::Intrinsic::read_register,
8511 Types);
8512 llvm::Value *Call = Builder.CreateCall(F, Metadata);
8513
8514 if (MixedTypes)
8515 // Read into 64 bit register and then truncate result to 32 bit.
8516 return Builder.CreateTrunc(Call, ValueType);
8517
8518 if (ValueType->isPointerTy())
8519 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
8520 return Builder.CreateIntToPtr(Call, ValueType);
8521
8522 return Call;
8523 }
8524
8525 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
8526 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
8527 if (MixedTypes) {
8528 // Extend 32 bit write value to 64 bit to pass to write.
8529 ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
8530 return Builder.CreateCall(F, { Metadata, ArgValue });
8531 }
8532
8533 if (ValueType->isPointerTy()) {
8534 // Have VoidPtrTy ArgValue but want to return an i32/i64.
8535 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
8536 return Builder.CreateCall(F, { Metadata, ArgValue });
8537 }
8538
8539 return Builder.CreateCall(F, { Metadata, ArgValue });
8540}
8541
8542/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
8543/// argument that specifies the vector type.
8544static bool HasExtraNeonArgument(unsigned BuiltinID) {
8545 switch (BuiltinID) {
8546 default: break;
8547 case NEON::BI__builtin_neon_vget_lane_i8:
8548 case NEON::BI__builtin_neon_vget_lane_i16:
8549 case NEON::BI__builtin_neon_vget_lane_bf16:
8550 case NEON::BI__builtin_neon_vget_lane_i32:
8551 case NEON::BI__builtin_neon_vget_lane_i64:
8552 case NEON::BI__builtin_neon_vget_lane_f32:
8553 case NEON::BI__builtin_neon_vgetq_lane_i8:
8554 case NEON::BI__builtin_neon_vgetq_lane_i16:
8555 case NEON::BI__builtin_neon_vgetq_lane_bf16:
8556 case NEON::BI__builtin_neon_vgetq_lane_i32:
8557 case NEON::BI__builtin_neon_vgetq_lane_i64:
8558 case NEON::BI__builtin_neon_vgetq_lane_f32:
8559 case NEON::BI__builtin_neon_vduph_lane_bf16:
8560 case NEON::BI__builtin_neon_vduph_laneq_bf16:
8561 case NEON::BI__builtin_neon_vset_lane_i8:
8562 case NEON::BI__builtin_neon_vset_lane_i16:
8563 case NEON::BI__builtin_neon_vset_lane_bf16:
8564 case NEON::BI__builtin_neon_vset_lane_i32:
8565 case NEON::BI__builtin_neon_vset_lane_i64:
8566 case NEON::BI__builtin_neon_vset_lane_f32:
8567 case NEON::BI__builtin_neon_vsetq_lane_i8:
8568 case NEON::BI__builtin_neon_vsetq_lane_i16:
8569 case NEON::BI__builtin_neon_vsetq_lane_bf16:
8570 case NEON::BI__builtin_neon_vsetq_lane_i32:
8571 case NEON::BI__builtin_neon_vsetq_lane_i64:
8572 case NEON::BI__builtin_neon_vsetq_lane_f32:
8573 case NEON::BI__builtin_neon_vsha1h_u32:
8574 case NEON::BI__builtin_neon_vsha1cq_u32:
8575 case NEON::BI__builtin_neon_vsha1pq_u32:
8576 case NEON::BI__builtin_neon_vsha1mq_u32:
8577 case NEON::BI__builtin_neon_vcvth_bf16_f32:
8578 case clang::ARM::BI_MoveToCoprocessor:
8579 case clang::ARM::BI_MoveToCoprocessor2:
8580 return false;
8581 }
8582 return true;
8583}
8584
8585Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
8586 const CallExpr *E,
8587 ReturnValueSlot ReturnValue,
8588 llvm::Triple::ArchType Arch) {
8589 if (auto Hint = GetValueForARMHint(BuiltinID))
8590 return Hint;
8591
8592 if (BuiltinID == clang::ARM::BI__emit) {
8593 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
8594 llvm::FunctionType *FTy =
8595 llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
8596
8598 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
8599 llvm_unreachable("Sema will ensure that the parameter is constant");
8600
8601 llvm::APSInt Value = Result.Val.getInt();
8602 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
8603
8604 llvm::InlineAsm *Emit =
8605 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
8606 /*hasSideEffects=*/true)
8607 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
8608 /*hasSideEffects=*/true);
8609
8610 return Builder.CreateCall(Emit);
8611 }
8612
8613 if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {
8614 Value *Option = EmitScalarExpr(E->getArg(0));
8615 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
8616 }
8617
8618 if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {
8619 Value *Address = EmitScalarExpr(E->getArg(0));
8620 Value *RW = EmitScalarExpr(E->getArg(1));
8621 Value *IsData = EmitScalarExpr(E->getArg(2));
8622
8623 // Locality is not supported on ARM target
8624 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
8625
8626 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
8627 return Builder.CreateCall(F, {Address, RW, Locality, IsData});
8628 }
8629
8630 if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {
8631 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8632 return Builder.CreateCall(
8633 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
8634 }
8635
8636 if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||
8637 BuiltinID == clang::ARM::BI__builtin_arm_clz64) {
8638 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8639 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
8640 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
8641 if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)
8642 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
8643 return Res;
8644 }
8645
8646
8647 if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {
8648 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8649 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");
8650 }
8651 if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {
8652 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
8653 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,
8654 "cls");
8655 }
8656
8657 if (BuiltinID == clang::ARM::BI__clear_cache) {
8658 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
8659 const FunctionDecl *FD = E->getDirectCallee();
8660 Value *Ops[2];
8661 for (unsigned i = 0; i < 2; i++)
8662 Ops[i] = EmitScalarExpr(E->getArg(i));
8663 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
8664 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
8665 StringRef Name = FD->getName();
8666 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
8667 }
8668
8669 if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||
8670 BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {
8671 Function *F;
8672
8673 switch (BuiltinID) {
8674 default: llvm_unreachable("unexpected builtin");
8675 case clang::ARM::BI__builtin_arm_mcrr:
8676 F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
8677 break;
8678 case clang::ARM::BI__builtin_arm_mcrr2:
8679 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
8680 break;
8681 }
8682
8683 // MCRR{2} instruction has 5 operands but
8684 // the intrinsic has 4 because Rt and Rt2
8685 // are represented as a single unsigned 64
8686 // bit integer in the intrinsic definition
8687 // but internally it's represented as 2 32
8688 // bit integers.
8689
8690 Value *Coproc = EmitScalarExpr(E->getArg(0));
8691 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8692 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
8693 Value *CRm = EmitScalarExpr(E->getArg(3));
8694
8695 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8696 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
8697 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
8698 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
8699
8700 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
8701 }
8702
8703 if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||
8704 BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {
8705 Function *F;
8706
8707 switch (BuiltinID) {
8708 default: llvm_unreachable("unexpected builtin");
8709 case clang::ARM::BI__builtin_arm_mrrc:
8710 F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
8711 break;
8712 case clang::ARM::BI__builtin_arm_mrrc2:
8713 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
8714 break;
8715 }
8716
8717 Value *Coproc = EmitScalarExpr(E->getArg(0));
8718 Value *Opc1 = EmitScalarExpr(E->getArg(1));
8719 Value *CRm = EmitScalarExpr(E->getArg(2));
8720 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
8721
8722 // Returns an unsigned 64 bit integer, represented
8723 // as two 32 bit integers.
8724
8725 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
8726 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
8727 Rt = Builder.CreateZExt(Rt, Int64Ty);
8728 Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
8729
8730 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
8731 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
8732 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
8733
8734 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
8735 }
8736
8737 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||
8738 ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8739 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&
8740 getContext().getTypeSize(E->getType()) == 64) ||
8741 BuiltinID == clang::ARM::BI__ldrexd) {
8742 Function *F;
8743
8744 switch (BuiltinID) {
8745 default: llvm_unreachable("unexpected builtin");
8746 case clang::ARM::BI__builtin_arm_ldaex:
8747 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
8748 break;
8749 case clang::ARM::BI__builtin_arm_ldrexd:
8750 case clang::ARM::BI__builtin_arm_ldrex:
8751 case clang::ARM::BI__ldrexd:
8752 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
8753 break;
8754 }
8755
8756 Value *LdPtr = EmitScalarExpr(E->getArg(0));
8757 Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
8758
8759 Value *Val0 = Builder.CreateExtractValue(Val, 1);
8760 Value *Val1 = Builder.CreateExtractValue(Val, 0);
8761 Val0 = Builder.CreateZExt(Val0, Int64Ty);
8762 Val1 = Builder.CreateZExt(Val1, Int64Ty);
8763
8764 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
8765 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
8766 Val = Builder.CreateOr(Val, Val1);
8767 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
8768 }
8769
8770 if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||
8771 BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {
8772 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
8773
8774 QualType Ty = E->getType();
8775 llvm::Type *RealResTy = ConvertType(Ty);
8776 llvm::Type *IntTy =
8777 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8778
8780 BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex
8781 : Intrinsic::arm_ldrex,
8782 UnqualPtrTy);
8783 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
8784 Val->addParamAttr(
8785 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
8786
8787 if (RealResTy->isPointerTy())
8788 return Builder.CreateIntToPtr(Val, RealResTy);
8789 else {
8790 llvm::Type *IntResTy = llvm::IntegerType::get(
8791 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
8792 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
8793 RealResTy);
8794 }
8795 }
8796
8797 if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||
8798 ((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||
8799 BuiltinID == clang::ARM::BI__builtin_arm_strex) &&
8800 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
8802 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd
8803 : Intrinsic::arm_strexd);
8804 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
8805
8806 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
8807 Value *Val = EmitScalarExpr(E->getArg(0));
8808 Builder.CreateStore(Val, Tmp);
8809
8810 Address LdPtr = Tmp.withElementType(STy);
8811 Val = Builder.CreateLoad(LdPtr);
8812
8813 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
8814 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
8815 Value *StPtr = EmitScalarExpr(E->getArg(1));
8816 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
8817 }
8818
8819 if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||
8820 BuiltinID == clang::ARM::BI__builtin_arm_stlex) {
8821 Value *StoreVal = EmitScalarExpr(E->getArg(0));
8822 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
8823
8824 QualType Ty = E->getArg(0)->getType();
8825 llvm::Type *StoreTy =
8826 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
8827
8828 if (StoreVal->getType()->isPointerTy())
8829 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
8830 else {
8831 llvm::Type *IntTy = llvm::IntegerType::get(
8833 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
8834 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
8835 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
8836 }
8837
8839 BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex
8840 : Intrinsic::arm_strex,
8841 StoreAddr->getType());
8842
8843 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
8844 CI->addParamAttr(
8845 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
8846 return CI;
8847 }
8848
8849 if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {
8850 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
8851 return Builder.CreateCall(F);
8852 }
8853
8854 // CRC32
8855 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
8856 switch (BuiltinID) {
8857 case clang::ARM::BI__builtin_arm_crc32b:
8858 CRCIntrinsicID = Intrinsic::arm_crc32b; break;
8859 case clang::ARM::BI__builtin_arm_crc32cb:
8860 CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
8861 case clang::ARM::BI__builtin_arm_crc32h:
8862 CRCIntrinsicID = Intrinsic::arm_crc32h; break;
8863 case clang::ARM::BI__builtin_arm_crc32ch:
8864 CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
8865 case clang::ARM::BI__builtin_arm_crc32w:
8866 case clang::ARM::BI__builtin_arm_crc32d:
8867 CRCIntrinsicID = Intrinsic::arm_crc32w; break;
8868 case clang::ARM::BI__builtin_arm_crc32cw:
8869 case clang::ARM::BI__builtin_arm_crc32cd:
8870 CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
8871 }
8872
8873 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
8874 Value *Arg0 = EmitScalarExpr(E->getArg(0));
8875 Value *Arg1 = EmitScalarExpr(E->getArg(1));
8876
8877 // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w
8878 // intrinsics, hence we need different codegen for these cases.
8879 if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||
8880 BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {
8881 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
8882 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
8883 Value *Arg1b = Builder.CreateLShr(Arg1, C1);
8884 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
8885
8886 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8887 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
8888 return Builder.CreateCall(F, {Res, Arg1b});
8889 } else {
8890 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
8891
8892 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
8893 return Builder.CreateCall(F, {Arg0, Arg1});
8894 }
8895 }
8896
8897 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8898 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8899 BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8900 BuiltinID == clang::ARM::BI__builtin_arm_wsr ||
8901 BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||
8902 BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {
8903
8904 SpecialRegisterAccessKind AccessKind = Write;
8905 if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||
8906 BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8907 BuiltinID == clang::ARM::BI__builtin_arm_rsrp)
8908 AccessKind = VolatileRead;
8909
8910 bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||
8911 BuiltinID == clang::ARM::BI__builtin_arm_wsrp;
8912
8913 bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||
8914 BuiltinID == clang::ARM::BI__builtin_arm_wsr64;
8915
8916 llvm::Type *ValueType;
8917 llvm::Type *RegisterType;
8918 if (IsPointerBuiltin) {
8919 ValueType = VoidPtrTy;
8920 RegisterType = Int32Ty;
8921 } else if (Is64Bit) {
8922 ValueType = RegisterType = Int64Ty;
8923 } else {
8924 ValueType = RegisterType = Int32Ty;
8925 }
8926
8927 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
8928 AccessKind);
8929 }
8930
8931 if (BuiltinID == ARM::BI__builtin_sponentry) {
8932 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
8933 return Builder.CreateCall(F);
8934 }
8935
8936 // Handle MSVC intrinsics before argument evaluation to prevent double
8937 // evaluation.
8938 if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))
8939 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
8940
8941 // Deal with MVE builtins
8942 if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8943 return Result;
8944 // Handle CDE builtins
8945 if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
8946 return Result;
8947
8948 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
8949 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
8950 return P.first == BuiltinID;
8951 });
8952 if (It != end(NEONEquivalentIntrinsicMap))
8953 BuiltinID = It->second;
8954
8955 // Find out if any arguments are required to be integer constant
8956 // expressions.
8957 unsigned ICEArguments = 0;
8959 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
8960 assert(Error == ASTContext::GE_None && "Should not codegen an error");
8961
8962 auto getAlignmentValue32 = [&](Address addr) -> Value* {
8963 return Builder.getInt32(addr.getAlignment().getQuantity());
8964 };
8965
8966 Address PtrOp0 = Address::invalid();
8967 Address PtrOp1 = Address::invalid();
8969 bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
8970 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
8971 for (unsigned i = 0, e = NumArgs; i != e; i++) {
8972 if (i == 0) {
8973 switch (BuiltinID) {
8974 case NEON::BI__builtin_neon_vld1_v:
8975 case NEON::BI__builtin_neon_vld1q_v:
8976 case NEON::BI__builtin_neon_vld1q_lane_v:
8977 case NEON::BI__builtin_neon_vld1_lane_v:
8978 case NEON::BI__builtin_neon_vld1_dup_v:
8979 case NEON::BI__builtin_neon_vld1q_dup_v:
8980 case NEON::BI__builtin_neon_vst1_v:
8981 case NEON::BI__builtin_neon_vst1q_v:
8982 case NEON::BI__builtin_neon_vst1q_lane_v:
8983 case NEON::BI__builtin_neon_vst1_lane_v:
8984 case NEON::BI__builtin_neon_vst2_v:
8985 case NEON::BI__builtin_neon_vst2q_v:
8986 case NEON::BI__builtin_neon_vst2_lane_v:
8987 case NEON::BI__builtin_neon_vst2q_lane_v:
8988 case NEON::BI__builtin_neon_vst3_v:
8989 case NEON::BI__builtin_neon_vst3q_v:
8990 case NEON::BI__builtin_neon_vst3_lane_v:
8991 case NEON::BI__builtin_neon_vst3q_lane_v:
8992 case NEON::BI__builtin_neon_vst4_v:
8993 case NEON::BI__builtin_neon_vst4q_v:
8994 case NEON::BI__builtin_neon_vst4_lane_v:
8995 case NEON::BI__builtin_neon_vst4q_lane_v:
8996 // Get the alignment for the argument in addition to the value;
8997 // we'll use it later.
8998 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
8999 Ops.push_back(PtrOp0.emitRawPointer(*this));
9000 continue;
9001 }
9002 }
9003 if (i == 1) {
9004 switch (BuiltinID) {
9005 case NEON::BI__builtin_neon_vld2_v:
9006 case NEON::BI__builtin_neon_vld2q_v:
9007 case NEON::BI__builtin_neon_vld3_v:
9008 case NEON::BI__builtin_neon_vld3q_v:
9009 case NEON::BI__builtin_neon_vld4_v:
9010 case NEON::BI__builtin_neon_vld4q_v:
9011 case NEON::BI__builtin_neon_vld2_lane_v:
9012 case NEON::BI__builtin_neon_vld2q_lane_v:
9013 case NEON::BI__builtin_neon_vld3_lane_v:
9014 case NEON::BI__builtin_neon_vld3q_lane_v:
9015 case NEON::BI__builtin_neon_vld4_lane_v:
9016 case NEON::BI__builtin_neon_vld4q_lane_v:
9017 case NEON::BI__builtin_neon_vld2_dup_v:
9018 case NEON::BI__builtin_neon_vld2q_dup_v:
9019 case NEON::BI__builtin_neon_vld3_dup_v:
9020 case NEON::BI__builtin_neon_vld3q_dup_v:
9021 case NEON::BI__builtin_neon_vld4_dup_v:
9022 case NEON::BI__builtin_neon_vld4q_dup_v:
9023 // Get the alignment for the argument in addition to the value;
9024 // we'll use it later.
9025 PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
9026 Ops.push_back(PtrOp1.emitRawPointer(*this));
9027 continue;
9028 }
9029 }
9030
9031 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
9032 }
9033
9034 switch (BuiltinID) {
9035 default: break;
9036
9037 case NEON::BI__builtin_neon_vget_lane_i8:
9038 case NEON::BI__builtin_neon_vget_lane_i16:
9039 case NEON::BI__builtin_neon_vget_lane_i32:
9040 case NEON::BI__builtin_neon_vget_lane_i64:
9041 case NEON::BI__builtin_neon_vget_lane_bf16:
9042 case NEON::BI__builtin_neon_vget_lane_f32:
9043 case NEON::BI__builtin_neon_vgetq_lane_i8:
9044 case NEON::BI__builtin_neon_vgetq_lane_i16:
9045 case NEON::BI__builtin_neon_vgetq_lane_i32:
9046 case NEON::BI__builtin_neon_vgetq_lane_i64:
9047 case NEON::BI__builtin_neon_vgetq_lane_bf16:
9048 case NEON::BI__builtin_neon_vgetq_lane_f32:
9049 case NEON::BI__builtin_neon_vduph_lane_bf16:
9050 case NEON::BI__builtin_neon_vduph_laneq_bf16:
9051 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
9052
9053 case NEON::BI__builtin_neon_vrndns_f32: {
9054 Value *Arg = EmitScalarExpr(E->getArg(0));
9055 llvm::Type *Tys[] = {Arg->getType()};
9056 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
9057 return Builder.CreateCall(F, {Arg}, "vrndn"); }
9058
9059 case NEON::BI__builtin_neon_vset_lane_i8:
9060 case NEON::BI__builtin_neon_vset_lane_i16:
9061 case NEON::BI__builtin_neon_vset_lane_i32:
9062 case NEON::BI__builtin_neon_vset_lane_i64:
9063 case NEON::BI__builtin_neon_vset_lane_bf16:
9064 case NEON::BI__builtin_neon_vset_lane_f32:
9065 case NEON::BI__builtin_neon_vsetq_lane_i8:
9066 case NEON::BI__builtin_neon_vsetq_lane_i16:
9067 case NEON::BI__builtin_neon_vsetq_lane_i32:
9068 case NEON::BI__builtin_neon_vsetq_lane_i64:
9069 case NEON::BI__builtin_neon_vsetq_lane_bf16:
9070 case NEON::BI__builtin_neon_vsetq_lane_f32:
9071 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
9072
9073 case NEON::BI__builtin_neon_vsha1h_u32:
9074 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
9075 "vsha1h");
9076 case NEON::BI__builtin_neon_vsha1cq_u32:
9077 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
9078 "vsha1h");
9079 case NEON::BI__builtin_neon_vsha1pq_u32:
9080 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
9081 "vsha1h");
9082 case NEON::BI__builtin_neon_vsha1mq_u32:
9083 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
9084 "vsha1h");
9085
9086 case NEON::BI__builtin_neon_vcvth_bf16_f32: {
9087 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,
9088 "vcvtbfp2bf");
9089 }
9090
9091 // The ARM _MoveToCoprocessor builtins put the input register value as
9092 // the first argument, but the LLVM intrinsic expects it as the third one.
9093 case clang::ARM::BI_MoveToCoprocessor:
9094 case clang::ARM::BI_MoveToCoprocessor2: {
9095 Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor
9096 ? Intrinsic::arm_mcr
9097 : Intrinsic::arm_mcr2);
9098 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
9099 Ops[3], Ops[4], Ops[5]});
9100 }
9101 }
9102
9103 // Get the last argument, which specifies the vector type.
9104 assert(HasExtraArg);
9105 const Expr *Arg = E->getArg(E->getNumArgs()-1);
9106 std::optional<llvm::APSInt> Result =
9108 if (!Result)
9109 return nullptr;
9110
9111 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||
9112 BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {
9113 // Determine the overloaded type of this builtin.
9114 llvm::Type *Ty;
9115 if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)
9116 Ty = FloatTy;
9117 else
9118 Ty = DoubleTy;
9119
9120 // Determine whether this is an unsigned conversion or not.
9121 bool usgn = Result->getZExtValue() == 1;
9122 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
9123
9124 // Call the appropriate intrinsic.
9125 Function *F = CGM.getIntrinsic(Int, Ty);
9126 return Builder.CreateCall(F, Ops, "vcvtr");
9127 }
9128
9129 // Determine the type of this overloaded NEON intrinsic.
9130 NeonTypeFlags Type = Result->getZExtValue();
9131 bool usgn = Type.isUnsigned();
9132 bool rightShift = false;
9133
9134 llvm::FixedVectorType *VTy =
9135 GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,
9136 getTarget().hasBFloat16Type());
9137 llvm::Type *Ty = VTy;
9138 if (!Ty)
9139 return nullptr;
9140
9141 // Many NEON builtins have identical semantics and uses in ARM and
9142 // AArch64. Emit these in a single function.
9143 auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);
9144 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
9145 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
9146 if (Builtin)
9148 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
9149 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
9150
9151 unsigned Int;
9152 switch (BuiltinID) {
9153 default: return nullptr;
9154 case NEON::BI__builtin_neon_vld1q_lane_v:
9155 // Handle 64-bit integer elements as a special case. Use shuffles of
9156 // one-element vectors to avoid poor code for i64 in the backend.
9157 if (VTy->getElementType()->isIntegerTy(64)) {
9158 // Extract the other lane.
9159 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9160 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
9161 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
9162 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9163 // Load the value as a one-element vector.
9164 Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
9165 llvm::Type *Tys[] = {Ty, Int8PtrTy};
9166 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
9167 Value *Align = getAlignmentValue32(PtrOp0);
9168 Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
9169 // Combine them.
9170 int Indices[] = {1 - Lane, Lane};
9171 return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");
9172 }
9173 [[fallthrough]];
9174 case NEON::BI__builtin_neon_vld1_lane_v: {
9175 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9176 PtrOp0 = PtrOp0.withElementType(VTy->getElementType());
9177 Value *Ld = Builder.CreateLoad(PtrOp0);
9178 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
9179 }
9180 case NEON::BI__builtin_neon_vqrshrn_n_v:
9181 Int =
9182 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
9183 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
9184 1, true);
9185 case NEON::BI__builtin_neon_vqrshrun_n_v:
9186 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
9187 Ops, "vqrshrun_n", 1, true);
9188 case NEON::BI__builtin_neon_vqshrn_n_v:
9189 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
9190 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
9191 1, true);
9192 case NEON::BI__builtin_neon_vqshrun_n_v:
9193 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
9194 Ops, "vqshrun_n", 1, true);
9195 case NEON::BI__builtin_neon_vrecpe_v:
9196 case NEON::BI__builtin_neon_vrecpeq_v:
9197 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
9198 Ops, "vrecpe");
9199 case NEON::BI__builtin_neon_vrshrn_n_v:
9200 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
9201 Ops, "vrshrn_n", 1, true);
9202 case NEON::BI__builtin_neon_vrsra_n_v:
9203 case NEON::BI__builtin_neon_vrsraq_n_v:
9204 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9205 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9206 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
9207 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
9208 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
9209 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
9210 case NEON::BI__builtin_neon_vsri_n_v:
9211 case NEON::BI__builtin_neon_vsriq_n_v:
9212 rightShift = true;
9213 [[fallthrough]];
9214 case NEON::BI__builtin_neon_vsli_n_v:
9215 case NEON::BI__builtin_neon_vsliq_n_v:
9216 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
9217 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
9218 Ops, "vsli_n");
9219 case NEON::BI__builtin_neon_vsra_n_v:
9220 case NEON::BI__builtin_neon_vsraq_n_v:
9221 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9222 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
9223 return Builder.CreateAdd(Ops[0], Ops[1]);
9224 case NEON::BI__builtin_neon_vst1q_lane_v:
9225 // Handle 64-bit integer elements as a special case. Use a shuffle to get
9226 // a one-element vector and avoid poor code for i64 in the backend.
9227 if (VTy->getElementType()->isIntegerTy(64)) {
9228 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9229 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
9230 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
9231 Ops[2] = getAlignmentValue32(PtrOp0);
9232 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
9233 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
9234 Tys), Ops);
9235 }
9236 [[fallthrough]];
9237 case NEON::BI__builtin_neon_vst1_lane_v: {
9238 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9239 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9240 return Builder.CreateStore(Ops[1],
9241 PtrOp0.withElementType(Ops[1]->getType()));
9242 }
9243 case NEON::BI__builtin_neon_vtbl1_v:
9244 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
9245 Ops, "vtbl1");
9246 case NEON::BI__builtin_neon_vtbl2_v:
9247 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
9248 Ops, "vtbl2");
9249 case NEON::BI__builtin_neon_vtbl3_v:
9250 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
9251 Ops, "vtbl3");
9252 case NEON::BI__builtin_neon_vtbl4_v:
9253 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
9254 Ops, "vtbl4");
9255 case NEON::BI__builtin_neon_vtbx1_v:
9256 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
9257 Ops, "vtbx1");
9258 case NEON::BI__builtin_neon_vtbx2_v:
9259 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
9260 Ops, "vtbx2");
9261 case NEON::BI__builtin_neon_vtbx3_v:
9262 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
9263 Ops, "vtbx3");
9264 case NEON::BI__builtin_neon_vtbx4_v:
9265 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
9266 Ops, "vtbx4");
9267 }
9268}
9269
9270template<typename Integer>
9272 return E->getIntegerConstantExpr(Context)->getExtValue();
9273}
9274
9275static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
9276 llvm::Type *T, bool Unsigned) {
9277 // Helper function called by Tablegen-constructed ARM MVE builtin codegen,
9278 // which finds it convenient to specify signed/unsigned as a boolean flag.
9279 return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
9280}
9281
9282static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
9283 uint32_t Shift, bool Unsigned) {
9284 // MVE helper function for integer shift right. This must handle signed vs
9285 // unsigned, and also deal specially with the case where the shift count is
9286 // equal to the lane size. In LLVM IR, an LShr with that parameter would be
9287 // undefined behavior, but in MVE it's legal, so we must convert it to code
9288 // that is not undefined in IR.
9289 unsigned LaneBits = cast<llvm::VectorType>(V->getType())
9290 ->getElementType()
9291 ->getPrimitiveSizeInBits();
9292 if (Shift == LaneBits) {
9293 // An unsigned shift of the full lane size always generates zero, so we can
9294 // simply emit a zero vector. A signed shift of the full lane size does the
9295 // same thing as shifting by one bit fewer.
9296 if (Unsigned)
9297 return llvm::Constant::getNullValue(V->getType());
9298 else
9299 --Shift;
9300 }
9301 return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
9302}
9303
9304static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
9305 // MVE-specific helper function for a vector splat, which infers the element
9306 // count of the output vector by knowing that MVE vectors are all 128 bits
9307 // wide.
9308 unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();
9309 return Builder.CreateVectorSplat(Elements, V);
9310}
9311
9312static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,
9313 CodeGenFunction *CGF,
9314 llvm::Value *V,
9315 llvm::Type *DestType) {
9316 // Convert one MVE vector type into another by reinterpreting its in-register
9317 // format.
9318 //
9319 // Little-endian, this is identical to a bitcast (which reinterprets the
9320 // memory format). But big-endian, they're not necessarily the same, because
9321 // the register and memory formats map to each other differently depending on
9322 // the lane size.
9323 //
9324 // We generate a bitcast whenever we can (if we're little-endian, or if the
9325 // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic
9326 // that performs the different kind of reinterpretation.
9327 if (CGF->getTarget().isBigEndian() &&
9328 V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {
9329 return Builder.CreateCall(
9330 CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,
9331 {DestType, V->getType()}),
9332 V);
9333 } else {
9334 return Builder.CreateBitCast(V, DestType);
9335 }
9336}
9337
9338static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {
9339 // Make a shufflevector that extracts every other element of a vector (evens
9340 // or odds, as desired).
9341 SmallVector<int, 16> Indices;
9342 unsigned InputElements =
9343 cast<llvm::FixedVectorType>(V->getType())->getNumElements();
9344 for (unsigned i = 0; i < InputElements; i += 2)
9345 Indices.push_back(i + Odd);
9346 return Builder.CreateShuffleVector(V, Indices);
9347}
9348
9349static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
9350 llvm::Value *V1) {
9351 // Make a shufflevector that interleaves two vectors element by element.
9352 assert(V0->getType() == V1->getType() && "Can't zip different vector types");
9353 SmallVector<int, 16> Indices;
9354 unsigned InputElements =
9355 cast<llvm::FixedVectorType>(V0->getType())->getNumElements();
9356 for (unsigned i = 0; i < InputElements; i++) {
9357 Indices.push_back(i);
9358 Indices.push_back(i + InputElements);
9359 }
9360 return Builder.CreateShuffleVector(V0, V1, Indices);
9361}
9362
9363template<unsigned HighBit, unsigned OtherBits>
9364static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
9365 // MVE-specific helper function to make a vector splat of a constant such as
9366 // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
9367 llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();
9368 unsigned LaneBits = T->getPrimitiveSizeInBits();
9369 uint32_t Value = HighBit << (LaneBits - 1);
9370 if (OtherBits)
9371 Value |= (1UL << (LaneBits - 1)) - 1;
9372 llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
9373 return ARMMVEVectorSplat(Builder, Lane);
9374}
9375
9376static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
9377 llvm::Value *V,
9378 unsigned ReverseWidth) {
9379 // MVE-specific helper function which reverses the elements of a
9380 // vector within every (ReverseWidth)-bit collection of lanes.
9381 SmallVector<int, 16> Indices;
9382 unsigned LaneSize = V->getType()->getScalarSizeInBits();
9383 unsigned Elements = 128 / LaneSize;
9384 unsigned Mask = ReverseWidth / LaneSize - 1;
9385 for (unsigned i = 0; i < Elements; i++)
9386 Indices.push_back(i ^ Mask);
9387 return Builder.CreateShuffleVector(V, Indices);
9388}
9389
9391 const CallExpr *E,
9392 ReturnValueSlot ReturnValue,
9393 llvm::Triple::ArchType Arch) {
9394 enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
9395 Intrinsic::ID IRIntr;
9396 unsigned NumVectors;
9397
9398 // Code autogenerated by Tablegen will handle all the simple builtins.
9399 switch (BuiltinID) {
9400 #include "clang/Basic/arm_mve_builtin_cg.inc"
9401
9402 // If we didn't match an MVE builtin id at all, go back to the
9403 // main EmitARMBuiltinExpr.
9404 default:
9405 return nullptr;
9406 }
9407
9408 // Anything that breaks from that switch is an MVE builtin that
9409 // needs handwritten code to generate.
9410
9411 switch (CustomCodeGenType) {
9412
9413 case CustomCodeGen::VLD24: {
9416
9417 auto MvecCType = E->getType();
9418 auto MvecLType = ConvertType(MvecCType);
9419 assert(MvecLType->isStructTy() &&
9420 "Return type for vld[24]q should be a struct");
9421 assert(MvecLType->getStructNumElements() == 1 &&
9422 "Return-type struct for vld[24]q should have one element");
9423 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9424 assert(MvecLTypeInner->isArrayTy() &&
9425 "Return-type struct for vld[24]q should contain an array");
9426 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9427 "Array member of return-type struct vld[24]q has wrong length");
9428 auto VecLType = MvecLTypeInner->getArrayElementType();
9429
9430 Tys.push_back(VecLType);
9431
9432 auto Addr = E->getArg(0);
9433 Ops.push_back(EmitScalarExpr(Addr));
9434 Tys.push_back(ConvertType(Addr->getType()));
9435
9436 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9437 Value *LoadResult = Builder.CreateCall(F, Ops);
9438 Value *MvecOut = PoisonValue::get(MvecLType);
9439 for (unsigned i = 0; i < NumVectors; ++i) {
9440 Value *Vec = Builder.CreateExtractValue(LoadResult, i);
9441 MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
9442 }
9443
9444 if (ReturnValue.isNull())
9445 return MvecOut;
9446 else
9447 return Builder.CreateStore(MvecOut, ReturnValue.getAddress());
9448 }
9449
9450 case CustomCodeGen::VST24: {
9453
9454 auto Addr = E->getArg(0);
9455 Ops.push_back(EmitScalarExpr(Addr));
9456 Tys.push_back(ConvertType(Addr->getType()));
9457
9458 auto MvecCType = E->getArg(1)->getType();
9459 auto MvecLType = ConvertType(MvecCType);
9460 assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
9461 assert(MvecLType->getStructNumElements() == 1 &&
9462 "Data-type struct for vst2q should have one element");
9463 auto MvecLTypeInner = MvecLType->getStructElementType(0);
9464 assert(MvecLTypeInner->isArrayTy() &&
9465 "Data-type struct for vst2q should contain an array");
9466 assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
9467 "Array member of return-type struct vld[24]q has wrong length");
9468 auto VecLType = MvecLTypeInner->getArrayElementType();
9469
9470 Tys.push_back(VecLType);
9471
9472 AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
9473 EmitAggExpr(E->getArg(1), MvecSlot);
9474 auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
9475 for (unsigned i = 0; i < NumVectors; i++)
9476 Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
9477
9478 Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));
9479 Value *ToReturn = nullptr;
9480 for (unsigned i = 0; i < NumVectors; i++) {
9481 Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
9482 ToReturn = Builder.CreateCall(F, Ops);
9483 Ops.pop_back();
9484 }
9485 return ToReturn;
9486 }
9487 }
9488 llvm_unreachable("unknown custom codegen type.");
9489}
9490
9492 const CallExpr *E,
9493 ReturnValueSlot ReturnValue,
9494 llvm::Triple::ArchType Arch) {
9495 switch (BuiltinID) {
9496 default:
9497 return nullptr;
9498#include "clang/Basic/arm_cde_builtin_cg.inc"
9499 }
9500}
9501
9502static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
9503 const CallExpr *E,
9505 llvm::Triple::ArchType Arch) {
9506 unsigned int Int = 0;
9507 const char *s = nullptr;
9508
9509 switch (BuiltinID) {
9510 default:
9511 return nullptr;
9512 case NEON::BI__builtin_neon_vtbl1_v:
9513 case NEON::BI__builtin_neon_vqtbl1_v:
9514 case NEON::BI__builtin_neon_vqtbl1q_v:
9515 case NEON::BI__builtin_neon_vtbl2_v:
9516 case NEON::BI__builtin_neon_vqtbl2_v:
9517 case NEON::BI__builtin_neon_vqtbl2q_v:
9518 case NEON::BI__builtin_neon_vtbl3_v:
9519 case NEON::BI__builtin_neon_vqtbl3_v:
9520 case NEON::BI__builtin_neon_vqtbl3q_v:
9521 case NEON::BI__builtin_neon_vtbl4_v:
9522 case NEON::BI__builtin_neon_vqtbl4_v:
9523 case NEON::BI__builtin_neon_vqtbl4q_v:
9524 break;
9525 case NEON::BI__builtin_neon_vtbx1_v:
9526 case NEON::BI__builtin_neon_vqtbx1_v:
9527 case NEON::BI__builtin_neon_vqtbx1q_v:
9528 case NEON::BI__builtin_neon_vtbx2_v:
9529 case NEON::BI__builtin_neon_vqtbx2_v:
9530 case NEON::BI__builtin_neon_vqtbx2q_v:
9531 case NEON::BI__builtin_neon_vtbx3_v:
9532 case NEON::BI__builtin_neon_vqtbx3_v:
9533 case NEON::BI__builtin_neon_vqtbx3q_v:
9534 case NEON::BI__builtin_neon_vtbx4_v:
9535 case NEON::BI__builtin_neon_vqtbx4_v:
9536 case NEON::BI__builtin_neon_vqtbx4q_v:
9537 break;
9538 }
9539
9540 assert(E->getNumArgs() >= 3);
9541
9542 // Get the last argument, which specifies the vector type.
9543 const Expr *Arg = E->getArg(E->getNumArgs() - 1);
9544 std::optional<llvm::APSInt> Result =
9546 if (!Result)
9547 return nullptr;
9548
9549 // Determine the type of this overloaded NEON intrinsic.
9550 NeonTypeFlags Type = Result->getZExtValue();
9551 llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);
9552 if (!Ty)
9553 return nullptr;
9554
9555 CodeGen::CGBuilderTy &Builder = CGF.Builder;
9556
9557 // AArch64 scalar builtins are not overloaded, they do not have an extra
9558 // argument that specifies the vector type, need to handle each case.
9559 switch (BuiltinID) {
9560 case NEON::BI__builtin_neon_vtbl1_v: {
9561 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],
9562 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9563 }
9564 case NEON::BI__builtin_neon_vtbl2_v: {
9565 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],
9566 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
9567 }
9568 case NEON::BI__builtin_neon_vtbl3_v: {
9569 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],
9570 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9571 }
9572 case NEON::BI__builtin_neon_vtbl4_v: {
9573 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],
9574 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
9575 }
9576 case NEON::BI__builtin_neon_vtbx1_v: {
9577 Value *TblRes =
9578 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,
9579 Intrinsic::aarch64_neon_tbl1, "vtbl1");
9580
9581 llvm::Constant *EightV = ConstantInt::get(Ty, 8);
9582 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
9583 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9584
9585 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9586 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9587 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9588 }
9589 case NEON::BI__builtin_neon_vtbx2_v: {
9590 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],
9591 Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");
9592 }
9593 case NEON::BI__builtin_neon_vtbx3_v: {
9594 Value *TblRes =
9595 packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,
9596 Intrinsic::aarch64_neon_tbl2, "vtbl2");
9597
9598 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
9599 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
9600 TwentyFourV);
9601 CmpRes = Builder.CreateSExt(CmpRes, Ty);
9602
9603 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
9604 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
9605 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
9606 }
9607 case NEON::BI__builtin_neon_vtbx4_v: {
9608 return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],
9609 Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");
9610 }
9611 case NEON::BI__builtin_neon_vqtbl1_v:
9612 case NEON::BI__builtin_neon_vqtbl1q_v:
9613 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
9614 case NEON::BI__builtin_neon_vqtbl2_v:
9615 case NEON::BI__builtin_neon_vqtbl2q_v: {
9616 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
9617 case NEON::BI__builtin_neon_vqtbl3_v:
9618 case NEON::BI__builtin_neon_vqtbl3q_v:
9619 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
9620 case NEON::BI__builtin_neon_vqtbl4_v:
9621 case NEON::BI__builtin_neon_vqtbl4q_v:
9622 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
9623 case NEON::BI__builtin_neon_vqtbx1_v:
9624 case NEON::BI__builtin_neon_vqtbx1q_v:
9625 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
9626 case NEON::BI__builtin_neon_vqtbx2_v:
9627 case NEON::BI__builtin_neon_vqtbx2q_v:
9628 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
9629 case NEON::BI__builtin_neon_vqtbx3_v:
9630 case NEON::BI__builtin_neon_vqtbx3q_v:
9631 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
9632 case NEON::BI__builtin_neon_vqtbx4_v:
9633 case NEON::BI__builtin_neon_vqtbx4q_v:
9634 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
9635 }
9636 }
9637
9638 if (!Int)
9639 return nullptr;
9640
9641 Function *F = CGF.CGM.getIntrinsic(Int, Ty);
9642 return CGF.EmitNeonCall(F, Ops, s);
9643}
9644
9646 auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
9647 Op = Builder.CreateBitCast(Op, Int16Ty);
9648 Value *V = PoisonValue::get(VTy);
9649 llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
9650 Op = Builder.CreateInsertElement(V, Op, CI);
9651 return Op;
9652}
9653
9654/// SVEBuiltinMemEltTy - Returns the memory element type for this memory
9655/// access builtin. Only required if it can't be inferred from the base pointer
9656/// operand.
9657llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {
9658 switch (TypeFlags.getMemEltType()) {
9659 case SVETypeFlags::MemEltTyDefault:
9660 return getEltType(TypeFlags);
9661 case SVETypeFlags::MemEltTyInt8:
9662 return Builder.getInt8Ty();
9663 case SVETypeFlags::MemEltTyInt16:
9664 return Builder.getInt16Ty();
9665 case SVETypeFlags::MemEltTyInt32:
9666 return Builder.getInt32Ty();
9667 case SVETypeFlags::MemEltTyInt64:
9668 return Builder.getInt64Ty();
9669 }
9670 llvm_unreachable("Unknown MemEltType");
9671}
9672
9673llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
9674 switch (TypeFlags.getEltType()) {
9675 default:
9676 llvm_unreachable("Invalid SVETypeFlag!");
9677
9678 case SVETypeFlags::EltTyInt8:
9679 return Builder.getInt8Ty();
9680 case SVETypeFlags::EltTyInt16:
9681 return Builder.getInt16Ty();
9682 case SVETypeFlags::EltTyInt32:
9683 return Builder.getInt32Ty();
9684 case SVETypeFlags::EltTyInt64:
9685 return Builder.getInt64Ty();
9686 case SVETypeFlags::EltTyInt128:
9687 return Builder.getInt128Ty();
9688
9689 case SVETypeFlags::EltTyFloat16:
9690 return Builder.getHalfTy();
9691 case SVETypeFlags::EltTyFloat32:
9692 return Builder.getFloatTy();
9693 case SVETypeFlags::EltTyFloat64:
9694 return Builder.getDoubleTy();
9695
9696 case SVETypeFlags::EltTyBFloat16:
9697 return Builder.getBFloatTy();
9698
9699 case SVETypeFlags::EltTyBool8:
9700 case SVETypeFlags::EltTyBool16:
9701 case SVETypeFlags::EltTyBool32:
9702 case SVETypeFlags::EltTyBool64:
9703 return Builder.getInt1Ty();
9704 }
9705}
9706
9707// Return the llvm predicate vector type corresponding to the specified element
9708// TypeFlags.
9709llvm::ScalableVectorType *
9711 switch (TypeFlags.getEltType()) {
9712 default: llvm_unreachable("Unhandled SVETypeFlag!");
9713
9714 case SVETypeFlags::EltTyInt8:
9715 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9716 case SVETypeFlags::EltTyInt16:
9717 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9718 case SVETypeFlags::EltTyInt32:
9719 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9720 case SVETypeFlags::EltTyInt64:
9721 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9722
9723 case SVETypeFlags::EltTyBFloat16:
9724 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9725 case SVETypeFlags::EltTyFloat16:
9726 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9727 case SVETypeFlags::EltTyFloat32:
9728 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9729 case SVETypeFlags::EltTyFloat64:
9730 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9731
9732 case SVETypeFlags::EltTyBool8:
9733 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9734 case SVETypeFlags::EltTyBool16:
9735 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9736 case SVETypeFlags::EltTyBool32:
9737 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9738 case SVETypeFlags::EltTyBool64:
9739 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9740 }
9741}
9742
9743// Return the llvm vector type corresponding to the specified element TypeFlags.
9744llvm::ScalableVectorType *
9745CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {
9746 switch (TypeFlags.getEltType()) {
9747 default:
9748 llvm_unreachable("Invalid SVETypeFlag!");
9749
9750 case SVETypeFlags::EltTyInt8:
9751 return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
9752 case SVETypeFlags::EltTyInt16:
9753 return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);
9754 case SVETypeFlags::EltTyInt32:
9755 return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);
9756 case SVETypeFlags::EltTyInt64:
9757 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
9758
9759 case SVETypeFlags::EltTyFloat16:
9760 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
9761 case SVETypeFlags::EltTyBFloat16:
9762 return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);
9763 case SVETypeFlags::EltTyFloat32:
9764 return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);
9765 case SVETypeFlags::EltTyFloat64:
9766 return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);
9767
9768 case SVETypeFlags::EltTyBool8:
9769 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);
9770 case SVETypeFlags::EltTyBool16:
9771 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);
9772 case SVETypeFlags::EltTyBool32:
9773 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);
9774 case SVETypeFlags::EltTyBool64:
9775 return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);
9776 }
9777}
9778
9779llvm::Value *
9781 Function *Ptrue =
9782 CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));
9783 return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});
9784}
9785
9786constexpr unsigned SVEBitsPerBlock = 128;
9787
9788static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
9789 unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();
9790 return llvm::ScalableVectorType::get(EltTy, NumElts);
9791}
9792
9793// Reinterpret the input predicate so that it can be used to correctly isolate
9794// the elements of the specified datatype.
9796 llvm::ScalableVectorType *VTy) {
9797
9798 if (isa<TargetExtType>(Pred->getType()) &&
9799 cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
9800 return Pred;
9801
9802 auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
9803 if (Pred->getType() == RTy)
9804 return Pred;
9805
9806 unsigned IntID;
9807 llvm::Type *IntrinsicTy;
9808 switch (VTy->getMinNumElements()) {
9809 default:
9810 llvm_unreachable("unsupported element count!");
9811 case 1:
9812 case 2:
9813 case 4:
9814 case 8:
9815 IntID = Intrinsic::aarch64_sve_convert_from_svbool;
9816 IntrinsicTy = RTy;
9817 break;
9818 case 16:
9819 IntID = Intrinsic::aarch64_sve_convert_to_svbool;
9820 IntrinsicTy = Pred->getType();
9821 break;
9822 }
9823
9824 Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);
9825 Value *C = Builder.CreateCall(F, Pred);
9826 assert(C->getType() == RTy && "Unexpected return type!");
9827 return C;
9828}
9829
9832 unsigned IntID) {
9833 auto *ResultTy = getSVEType(TypeFlags);
9834 auto *OverloadedTy =
9835 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);
9836
9837 Function *F = nullptr;
9838 if (Ops[1]->getType()->isVectorTy())
9839 // This is the "vector base, scalar offset" case. In order to uniquely
9840 // map this built-in to an LLVM IR intrinsic, we need both the return type
9841 // and the type of the vector base.
9842 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});
9843 else
9844 // This is the "scalar base, vector offset case". The type of the offset
9845 // is encoded in the name of the intrinsic. We only need to specify the
9846 // return type in order to uniquely map this built-in to an LLVM IR
9847 // intrinsic.
9848 F = CGM.getIntrinsic(IntID, OverloadedTy);
9849
9850 // At the ACLE level there's only one predicate type, svbool_t, which is
9851 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9852 // actual type being loaded. For example, when loading doubles (i64) the
9853 // predicate should be <n x 2 x i1> instead. At the IR level the type of
9854 // the predicate and the data being loaded must match. Cast to the type
9855 // expected by the intrinsic. The intrinsic itself should be defined in
9856 // a way than enforces relations between parameter types.
9857 Ops[0] = EmitSVEPredicateCast(
9858 Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));
9859
9860 // Pass 0 when the offset is missing. This can only be applied when using
9861 // the "vector base" addressing mode for which ACLE allows no offset. The
9862 // corresponding LLVM IR always requires an offset.
9863 if (Ops.size() == 2) {
9864 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9865 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9866 }
9867
9868 // For "vector base, scalar index" scale the index so that it becomes a
9869 // scalar offset.
9870 if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {
9871 unsigned BytesPerElt =
9872 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9873 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9874 }
9875
9876 Value *Call = Builder.CreateCall(F, Ops);
9877
9878 // The following sext/zext is only needed when ResultTy != OverloadedTy. In
9879 // other cases it's folded into a nop.
9880 return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)
9881 : Builder.CreateSExt(Call, ResultTy);
9882}
9883
9886 unsigned IntID) {
9887 auto *SrcDataTy = getSVEType(TypeFlags);
9888 auto *OverloadedTy =
9889 llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);
9890
9891 // In ACLE the source data is passed in the last argument, whereas in LLVM IR
9892 // it's the first argument. Move it accordingly.
9893 Ops.insert(Ops.begin(), Ops.pop_back_val());
9894
9895 Function *F = nullptr;
9896 if (Ops[2]->getType()->isVectorTy())
9897 // This is the "vector base, scalar offset" case. In order to uniquely
9898 // map this built-in to an LLVM IR intrinsic, we need both the return type
9899 // and the type of the vector base.
9900 F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});
9901 else
9902 // This is the "scalar base, vector offset case". The type of the offset
9903 // is encoded in the name of the intrinsic. We only need to specify the
9904 // return type in order to uniquely map this built-in to an LLVM IR
9905 // intrinsic.
9906 F = CGM.getIntrinsic(IntID, OverloadedTy);
9907
9908 // Pass 0 when the offset is missing. This can only be applied when using
9909 // the "vector base" addressing mode for which ACLE allows no offset. The
9910 // corresponding LLVM IR always requires an offset.
9911 if (Ops.size() == 3) {
9912 assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");
9913 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9914 }
9915
9916 // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's
9917 // folded into a nop.
9918 Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);
9919
9920 // At the ACLE level there's only one predicate type, svbool_t, which is
9921 // mapped to <n x 16 x i1>. However, this might be incompatible with the
9922 // actual type being stored. For example, when storing doubles (i64) the
9923 // predicated should be <n x 2 x i1> instead. At the IR level the type of
9924 // the predicate and the data being stored must match. Cast to the type
9925 // expected by the intrinsic. The intrinsic itself should be defined in
9926 // a way that enforces relations between parameter types.
9927 Ops[1] = EmitSVEPredicateCast(
9928 Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));
9929
9930 // For "vector base, scalar index" scale the index so that it becomes a
9931 // scalar offset.
9932 if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {
9933 unsigned BytesPerElt =
9934 OverloadedTy->getElementType()->getScalarSizeInBits() / 8;
9935 Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));
9936 }
9937
9938 return Builder.CreateCall(F, Ops);
9939}
9940
9943 unsigned IntID) {
9944 // The gather prefetches are overloaded on the vector input - this can either
9945 // be the vector of base addresses or vector of offsets.
9946 auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());
9947 if (!OverloadedTy)
9948 OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());
9949
9950 // Cast the predicate from svbool_t to the right number of elements.
9951 Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
9952
9953 // vector + imm addressing modes
9954 if (Ops[1]->getType()->isVectorTy()) {
9955 if (Ops.size() == 3) {
9956 // Pass 0 for 'vector+imm' when the index is omitted.
9957 Ops.push_back(ConstantInt::get(Int64Ty, 0));
9958
9959 // The sv_prfop is the last operand in the builtin and IR intrinsic.
9960 std::swap(Ops[2], Ops[3]);
9961 } else {
9962 // Index needs to be passed as scaled offset.
9963 llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
9964 unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
9965 if (BytesPerElt > 1)
9966 Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));
9967 }
9968 }
9969
9970 Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
9971 return Builder.CreateCall(F, Ops);
9972}
9973
9976 unsigned IntID) {
9977 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
9978
9979 unsigned N;
9980 switch (IntID) {
9981 case Intrinsic::aarch64_sve_ld2_sret:
9982 case Intrinsic::aarch64_sve_ld1_pn_x2:
9983 case Intrinsic::aarch64_sve_ldnt1_pn_x2:
9984 case Intrinsic::aarch64_sve_ld2q_sret:
9985 N = 2;
9986 break;
9987 case Intrinsic::aarch64_sve_ld3_sret:
9988 case Intrinsic::aarch64_sve_ld3q_sret:
9989 N = 3;
9990 break;
9991 case Intrinsic::aarch64_sve_ld4_sret:
9992 case Intrinsic::aarch64_sve_ld1_pn_x4:
9993 case Intrinsic::aarch64_sve_ldnt1_pn_x4:
9994 case Intrinsic::aarch64_sve_ld4q_sret:
9995 N = 4;
9996 break;
9997 default:
9998 llvm_unreachable("unknown intrinsic!");
9999 }
10000 auto RetTy = llvm::VectorType::get(VTy->getElementType(),
10001 VTy->getElementCount() * N);
10002
10003 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10004 Value *BasePtr = Ops[1];
10005
10006 // Does the load have an offset?
10007 if (Ops.size() > 2)
10008 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10009
10010 Function *F = CGM.getIntrinsic(IntID, {VTy});
10011 Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});
10012 unsigned MinElts = VTy->getMinNumElements();
10013 Value *Ret = llvm::PoisonValue::get(RetTy);
10014 for (unsigned I = 0; I < N; I++) {
10015 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10016 Value *SRet = Builder.CreateExtractValue(Call, I);
10017 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
10018 }
10019 return Ret;
10020}
10021
10024 unsigned IntID) {
10025 llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
10026
10027 unsigned N;
10028 switch (IntID) {
10029 case Intrinsic::aarch64_sve_st2:
10030 case Intrinsic::aarch64_sve_st1_pn_x2:
10031 case Intrinsic::aarch64_sve_stnt1_pn_x2:
10032 case Intrinsic::aarch64_sve_st2q:
10033 N = 2;
10034 break;
10035 case Intrinsic::aarch64_sve_st3:
10036 case Intrinsic::aarch64_sve_st3q:
10037 N = 3;
10038 break;
10039 case Intrinsic::aarch64_sve_st4:
10040 case Intrinsic::aarch64_sve_st1_pn_x4:
10041 case Intrinsic::aarch64_sve_stnt1_pn_x4:
10042 case Intrinsic::aarch64_sve_st4q:
10043 N = 4;
10044 break;
10045 default:
10046 llvm_unreachable("unknown intrinsic!");
10047 }
10048
10049 Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
10050 Value *BasePtr = Ops[1];
10051
10052 // Does the store have an offset?
10053 if (Ops.size() > (2 + N))
10054 BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);
10055
10056 // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
10057 // need to break up the tuple vector.
10059 for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)
10060 Operands.push_back(Ops[I]);
10061 Operands.append({Predicate, BasePtr});
10062 Function *F = CGM.getIntrinsic(IntID, { VTy });
10063
10064 return Builder.CreateCall(F, Operands);
10065}
10066
10067// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
10068// svpmullt_pair intrinsics, with the exception that their results are bitcast
10069// to a wider type.
10072 unsigned BuiltinID) {
10073 // Splat scalar operand to vector (intrinsics with _n infix)
10074 if (TypeFlags.hasSplatOperand()) {
10075 unsigned OpNo = TypeFlags.getSplatOperand();
10076 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10077 }
10078
10079 // The pair-wise function has a narrower overloaded type.
10080 Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());
10081 Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});
10082
10083 // Now bitcast to the wider result type.
10084 llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);
10085 return EmitSVEReinterpret(Call, Ty);
10086}
10087
10089 ArrayRef<Value *> Ops, unsigned BuiltinID) {
10090 llvm::Type *OverloadedTy = getSVEType(TypeFlags);
10091 Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);
10092 return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});
10093}
10094
10097 unsigned BuiltinID) {
10098 auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
10099 auto *VectorTy = getSVEVectorForElementType(MemEltTy);
10100 auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10101
10102 Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
10103 Value *BasePtr = Ops[1];
10104
10105 // Implement the index operand if not omitted.
10106 if (Ops.size() > 3)
10107 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10108
10109 Value *PrfOp = Ops.back();
10110
10111 Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());
10112 return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});
10113}
10114
10116 llvm::Type *ReturnTy,
10118 unsigned IntrinsicID,
10119 bool IsZExtReturn) {
10120 QualType LangPTy = E->getArg(1)->getType();
10121 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10122 LangPTy->castAs<PointerType>()->getPointeeType());
10123
10124 // The vector type that is returned may be different from the
10125 // eventual type loaded from memory.
10126 auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);
10127 llvm::ScalableVectorType *MemoryTy = nullptr;
10128 llvm::ScalableVectorType *PredTy = nullptr;
10129 bool IsQuadLoad = false;
10130 switch (IntrinsicID) {
10131 case Intrinsic::aarch64_sve_ld1uwq:
10132 case Intrinsic::aarch64_sve_ld1udq:
10133 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10134 PredTy = llvm::ScalableVectorType::get(
10135 llvm::Type::getInt1Ty(getLLVMContext()), 1);
10136 IsQuadLoad = true;
10137 break;
10138 default:
10139 MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10140 PredTy = MemoryTy;
10141 break;
10142 }
10143
10144 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10145 Value *BasePtr = Ops[1];
10146
10147 // Does the load have an offset?
10148 if (Ops.size() > 2)
10149 BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);
10150
10151 Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);
10152 auto *Load =
10153 cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));
10154 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10155 CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);
10156
10157 if (IsQuadLoad)
10158 return Load;
10159
10160 return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)
10161 : Builder.CreateSExt(Load, VectorTy);
10162}
10163
10166 unsigned IntrinsicID) {
10167 QualType LangPTy = E->getArg(1)->getType();
10168 llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10169 LangPTy->castAs<PointerType>()->getPointeeType());
10170
10171 // The vector type that is stored may be different from the
10172 // eventual type stored to memory.
10173 auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());
10174 auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);
10175
10176 auto PredTy = MemoryTy;
10177 auto AddrMemoryTy = MemoryTy;
10178 bool IsQuadStore = false;
10179
10180 switch (IntrinsicID) {
10181 case Intrinsic::aarch64_sve_st1wq:
10182 case Intrinsic::aarch64_sve_st1dq:
10183 AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);
10184 PredTy =
10185 llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);
10186 IsQuadStore = true;
10187 break;
10188 default:
10189 break;
10190 }
10191 Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);
10192 Value *BasePtr = Ops[1];
10193
10194 // Does the store have an offset?
10195 if (Ops.size() == 4)
10196 BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);
10197
10198 // Last value is always the data
10199 Value *Val =
10200 IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);
10201
10202 Function *F =
10203 CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);
10204 auto *Store =
10205 cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));
10206 auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());
10207 CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);
10208 return Store;
10209}
10210
10213 unsigned IntID) {
10214 Ops[2] = EmitSVEPredicateCast(
10216
10217 SmallVector<Value *> NewOps;
10218 NewOps.push_back(Ops[2]);
10219
10220 llvm::Value *BasePtr = Ops[3];
10221
10222 // If the intrinsic contains the vnum parameter, multiply it with the vector
10223 // size in bytes.
10224 if (Ops.size() == 5) {
10225 Function *StreamingVectorLength =
10226 CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
10227 llvm::Value *StreamingVectorLengthCall =
10228 Builder.CreateCall(StreamingVectorLength);
10229 llvm::Value *Mulvl =
10230 Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");
10231 // The type of the ptr parameter is void *, so use Int8Ty here.
10232 BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);
10233 }
10234 NewOps.push_back(BasePtr);
10235 NewOps.push_back(Ops[0]);
10236 NewOps.push_back(Ops[1]);
10237 Function *F = CGM.getIntrinsic(IntID);
10238 return Builder.CreateCall(F, NewOps);
10239}
10240
10243 unsigned IntID) {
10244 auto *VecTy = getSVEType(TypeFlags);
10245 Function *F = CGM.getIntrinsic(IntID, VecTy);
10246 if (TypeFlags.isReadZA())
10247 Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);
10248 else if (TypeFlags.isWriteZA())
10249 Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);
10250 return Builder.CreateCall(F, Ops);
10251}
10252
10255 unsigned IntID) {
10256 // svzero_za() intrinsic zeros the entire za tile and has no paramters.
10257 if (Ops.size() == 0)
10258 Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));
10259 Function *F = CGM.getIntrinsic(IntID, {});
10260 return Builder.CreateCall(F, Ops);
10261}
10262
10265 unsigned IntID) {
10266 if (Ops.size() == 2)
10267 Ops.push_back(Builder.getInt32(0));
10268 else
10269 Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);
10270 Function *F = CGM.getIntrinsic(IntID, {});
10271 return Builder.CreateCall(F, Ops);
10272}
10273
10274// Limit the usage of scalable llvm IR generated by the ACLE by using the
10275// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.
10276Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {
10277 return Builder.CreateVectorSplat(
10278 cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);
10279}
10280
10282 return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));
10283}
10284
10285Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {
10286 // FIXME: For big endian this needs an additional REV, or needs a separate
10287 // intrinsic that is code-generated as a no-op, because the LLVM bitcast
10288 // instruction is defined as 'bitwise' equivalent from memory point of
10289 // view (when storing/reloading), whereas the svreinterpret builtin
10290 // implements bitwise equivalent cast from register point of view.
10291 // LLVM CodeGen for a bitcast must add an explicit REV for big-endian.
10292 return Builder.CreateBitCast(Val, Ty);
10293}
10294
10295static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10297 auto *SplatZero = Constant::getNullValue(Ty);
10298 Ops.insert(Ops.begin(), SplatZero);
10299}
10300
10301static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,
10303 auto *SplatUndef = UndefValue::get(Ty);
10304 Ops.insert(Ops.begin(), SplatUndef);
10305}
10306
10309 llvm::Type *ResultType,
10310 ArrayRef<Value *> Ops) {
10311 if (TypeFlags.isOverloadNone())
10312 return {};
10313
10314 llvm::Type *DefaultType = getSVEType(TypeFlags);
10315
10316 if (TypeFlags.isOverloadWhileOrMultiVecCvt())
10317 return {DefaultType, Ops[1]->getType()};
10318
10319 if (TypeFlags.isOverloadWhileRW())
10320 return {getSVEPredType(TypeFlags), Ops[0]->getType()};
10321
10322 if (TypeFlags.isOverloadCvt())
10323 return {Ops[0]->getType(), Ops.back()->getType()};
10324
10325 if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&
10326 ResultType->isVectorTy())
10327 return {ResultType, Ops[1]->getType()};
10328
10329 assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
10330 return {DefaultType};
10331}
10332
10334 llvm::Type *Ty,
10335 ArrayRef<Value *> Ops) {
10336 assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&
10337 "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");
10338
10339 unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();
10340 auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(
10341 TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);
10342
10343 if (!SingleVecTy)
10344 return nullptr;
10345
10346 Value *Idx = ConstantInt::get(CGM.Int64Ty,
10347 I * SingleVecTy->getMinNumElements());
10348
10349 if (TypeFlags.isTupleSet())
10350 return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);
10351 return Builder.CreateExtractVector(Ty, Ops[0], Idx);
10352}
10353
10355 llvm::Type *Ty,
10356 ArrayRef<Value *> Ops) {
10357 assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");
10358
10359 auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());
10360
10361 if (!SrcTy)
10362 return nullptr;
10363
10364 unsigned MinElts = SrcTy->getMinNumElements();
10365 Value *Call = llvm::PoisonValue::get(Ty);
10366 for (unsigned I = 0; I < Ops.size(); I++) {
10367 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10368 Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);
10369 }
10370
10371 return Call;
10372}
10373
10375 // Multi-vector results should be broken up into a single (wide) result
10376 // vector.
10377 auto *StructTy = dyn_cast<StructType>(Call->getType());
10378 if (!StructTy)
10379 return Call;
10380
10381 auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));
10382 if (!VTy)
10383 return Call;
10384 unsigned N = StructTy->getNumElements();
10385
10386 // We may need to emit a cast to a svbool_t
10387 bool IsPredTy = VTy->getElementType()->isIntegerTy(1);
10388 unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();
10389
10390 ScalableVectorType *WideVTy =
10391 ScalableVectorType::get(VTy->getElementType(), MinElts * N);
10392 Value *Ret = llvm::PoisonValue::get(WideVTy);
10393 for (unsigned I = 0; I < N; ++I) {
10394 Value *SRet = Builder.CreateExtractValue(Call, I);
10395 assert(SRet->getType() == VTy && "Unexpected type for result value");
10396 Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);
10397
10398 if (IsPredTy)
10399 SRet = EmitSVEPredicateCast(
10400 SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));
10401
10402 Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);
10403 }
10404 Call = Ret;
10405
10406 return Call;
10407}
10408
10410 unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,
10411 SVETypeFlags TypeFlags) {
10412 // Find out if any arguments are required to be integer constant expressions.
10413 unsigned ICEArguments = 0;
10415 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
10416 assert(Error == ASTContext::GE_None && "Should not codegen an error");
10417
10418 // Tuple set/get only requires one insert/extract vector, which is
10419 // created by EmitSVETupleSetOrGet.
10420 bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();
10421
10422 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
10423 bool IsICE = ICEArguments & (1 << i);
10424 Value *Arg = EmitScalarExpr(E->getArg(i));
10425
10426 if (IsICE) {
10427 // If this is required to be a constant, constant fold it so that we know
10428 // that the generated intrinsic gets a ConstantInt.
10429 std::optional<llvm::APSInt> Result =
10430 E->getArg(i)->getIntegerConstantExpr(getContext());
10431 assert(Result && "Expected argument to be a constant");
10432
10433 // Immediates for SVE llvm intrinsics are always 32bit. We can safely
10434 // truncate because the immediate has been range checked and no valid
10435 // immediate requires more than a handful of bits.
10436 *Result = Result->extOrTrunc(32);
10437 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));
10438 continue;
10439 }
10440
10441 if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {
10442 Ops.push_back(Arg);
10443 continue;
10444 }
10445
10446 auto *VTy = cast<ScalableVectorType>(Arg->getType());
10447 unsigned MinElts = VTy->getMinNumElements();
10448 bool IsPred = VTy->getElementType()->isIntegerTy(1);
10449 unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);
10450
10451 if (N == 1) {
10452 Ops.push_back(Arg);
10453 continue;
10454 }
10455
10456 for (unsigned I = 0; I < N; ++I) {
10457 Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);
10458 auto *NewVTy =
10459 ScalableVectorType::get(VTy->getElementType(), MinElts / N);
10460 Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));
10461 }
10462 }
10463}
10464
10466 const CallExpr *E) {
10467 llvm::Type *Ty = ConvertType(E->getType());
10468 if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&
10469 BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {
10470 Value *Val = EmitScalarExpr(E->getArg(0));
10471 return EmitSVEReinterpret(Val, Ty);
10472 }
10473
10474 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,
10476
10478 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10479 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10480
10481 if (TypeFlags.isLoad())
10482 return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,
10483 TypeFlags.isZExtReturn());
10484 else if (TypeFlags.isStore())
10485 return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);
10486 else if (TypeFlags.isGatherLoad())
10487 return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10488 else if (TypeFlags.isScatterStore())
10489 return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10490 else if (TypeFlags.isPrefetch())
10491 return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10492 else if (TypeFlags.isGatherPrefetch())
10493 return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10494 else if (TypeFlags.isStructLoad())
10495 return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10496 else if (TypeFlags.isStructStore())
10497 return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10498 else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())
10499 return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);
10500 else if (TypeFlags.isTupleCreate())
10501 return EmitSVETupleCreate(TypeFlags, Ty, Ops);
10502 else if (TypeFlags.isUndef())
10503 return UndefValue::get(Ty);
10504 else if (Builtin->LLVMIntrinsic != 0) {
10505 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
10507
10508 if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)
10510
10511 // Some ACLE builtins leave out the argument to specify the predicate
10512 // pattern, which is expected to be expanded to an SV_ALL pattern.
10513 if (TypeFlags.isAppendSVALL())
10514 Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));
10515 if (TypeFlags.isInsertOp1SVALL())
10516 Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));
10517
10518 // Predicates must match the main datatype.
10519 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10520 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10521 if (PredTy->getElementType()->isIntegerTy(1))
10522 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10523
10524 // Splat scalar operand to vector (intrinsics with _n infix)
10525 if (TypeFlags.hasSplatOperand()) {
10526 unsigned OpNo = TypeFlags.getSplatOperand();
10527 Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);
10528 }
10529
10530 if (TypeFlags.isReverseCompare())
10531 std::swap(Ops[1], Ops[2]);
10532 else if (TypeFlags.isReverseUSDOT())
10533 std::swap(Ops[1], Ops[2]);
10534 else if (TypeFlags.isReverseMergeAnyBinOp() &&
10535 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10536 std::swap(Ops[1], Ops[2]);
10537 else if (TypeFlags.isReverseMergeAnyAccOp() &&
10538 TypeFlags.getMergeType() == SVETypeFlags::MergeAny)
10539 std::swap(Ops[1], Ops[3]);
10540
10541 // Predicated intrinsics with _z suffix need a select w/ zeroinitializer.
10542 if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {
10543 llvm::Type *OpndTy = Ops[1]->getType();
10544 auto *SplatZero = Constant::getNullValue(OpndTy);
10545 Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);
10546 }
10547
10548 Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,
10549 getSVEOverloadTypes(TypeFlags, Ty, Ops));
10550 Value *Call = Builder.CreateCall(F, Ops);
10551
10552 // Predicate results must be converted to svbool_t.
10553 if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))
10554 if (PredTy->getScalarType()->isIntegerTy(1))
10555 Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10556
10557 return FormSVEBuiltinResult(Call);
10558 }
10559
10560 switch (BuiltinID) {
10561 default:
10562 return nullptr;
10563
10564 case SVE::BI__builtin_sve_svreinterpret_b: {
10565 auto SVCountTy =
10566 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10567 Function *CastFromSVCountF =
10568 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10569 return Builder.CreateCall(CastFromSVCountF, Ops[0]);
10570 }
10571 case SVE::BI__builtin_sve_svreinterpret_c: {
10572 auto SVCountTy =
10573 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10574 Function *CastToSVCountF =
10575 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10576 return Builder.CreateCall(CastToSVCountF, Ops[0]);
10577 }
10578
10579 case SVE::BI__builtin_sve_svpsel_lane_b8:
10580 case SVE::BI__builtin_sve_svpsel_lane_b16:
10581 case SVE::BI__builtin_sve_svpsel_lane_b32:
10582 case SVE::BI__builtin_sve_svpsel_lane_b64:
10583 case SVE::BI__builtin_sve_svpsel_lane_c8:
10584 case SVE::BI__builtin_sve_svpsel_lane_c16:
10585 case SVE::BI__builtin_sve_svpsel_lane_c32:
10586 case SVE::BI__builtin_sve_svpsel_lane_c64: {
10587 bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());
10588 assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==
10589 "aarch64.svcount")) &&
10590 "Unexpected TargetExtType");
10591 auto SVCountTy =
10592 llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");
10593 Function *CastFromSVCountF =
10594 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);
10595 Function *CastToSVCountF =
10596 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);
10597
10598 auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));
10599 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);
10600 llvm::Value *Ops0 =
10601 IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];
10602 llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);
10603 llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});
10604 return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;
10605 }
10606 case SVE::BI__builtin_sve_svmov_b_z: {
10607 // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)
10608 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10609 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10610 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);
10611 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});
10612 }
10613
10614 case SVE::BI__builtin_sve_svnot_b_z: {
10615 // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)
10616 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10617 llvm::Type* OverloadedTy = getSVEType(TypeFlags);
10618 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);
10619 return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});
10620 }
10621
10622 case SVE::BI__builtin_sve_svmovlb_u16:
10623 case SVE::BI__builtin_sve_svmovlb_u32:
10624 case SVE::BI__builtin_sve_svmovlb_u64:
10625 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);
10626
10627 case SVE::BI__builtin_sve_svmovlb_s16:
10628 case SVE::BI__builtin_sve_svmovlb_s32:
10629 case SVE::BI__builtin_sve_svmovlb_s64:
10630 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);
10631
10632 case SVE::BI__builtin_sve_svmovlt_u16:
10633 case SVE::BI__builtin_sve_svmovlt_u32:
10634 case SVE::BI__builtin_sve_svmovlt_u64:
10635 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);
10636
10637 case SVE::BI__builtin_sve_svmovlt_s16:
10638 case SVE::BI__builtin_sve_svmovlt_s32:
10639 case SVE::BI__builtin_sve_svmovlt_s64:
10640 return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);
10641
10642 case SVE::BI__builtin_sve_svpmullt_u16:
10643 case SVE::BI__builtin_sve_svpmullt_u64:
10644 case SVE::BI__builtin_sve_svpmullt_n_u16:
10645 case SVE::BI__builtin_sve_svpmullt_n_u64:
10646 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);
10647
10648 case SVE::BI__builtin_sve_svpmullb_u16:
10649 case SVE::BI__builtin_sve_svpmullb_u64:
10650 case SVE::BI__builtin_sve_svpmullb_n_u16:
10651 case SVE::BI__builtin_sve_svpmullb_n_u64:
10652 return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);
10653
10654 case SVE::BI__builtin_sve_svdup_n_b8:
10655 case SVE::BI__builtin_sve_svdup_n_b16:
10656 case SVE::BI__builtin_sve_svdup_n_b32:
10657 case SVE::BI__builtin_sve_svdup_n_b64: {
10658 Value *CmpNE =
10659 Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));
10660 llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);
10661 Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);
10662 return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));
10663 }
10664
10665 case SVE::BI__builtin_sve_svdupq_n_b8:
10666 case SVE::BI__builtin_sve_svdupq_n_b16:
10667 case SVE::BI__builtin_sve_svdupq_n_b32:
10668 case SVE::BI__builtin_sve_svdupq_n_b64:
10669 case SVE::BI__builtin_sve_svdupq_n_u8:
10670 case SVE::BI__builtin_sve_svdupq_n_s8:
10671 case SVE::BI__builtin_sve_svdupq_n_u64:
10672 case SVE::BI__builtin_sve_svdupq_n_f64:
10673 case SVE::BI__builtin_sve_svdupq_n_s64:
10674 case SVE::BI__builtin_sve_svdupq_n_u16:
10675 case SVE::BI__builtin_sve_svdupq_n_f16:
10676 case SVE::BI__builtin_sve_svdupq_n_bf16:
10677 case SVE::BI__builtin_sve_svdupq_n_s16:
10678 case SVE::BI__builtin_sve_svdupq_n_u32:
10679 case SVE::BI__builtin_sve_svdupq_n_f32:
10680 case SVE::BI__builtin_sve_svdupq_n_s32: {
10681 // These builtins are implemented by storing each element to an array and using
10682 // ld1rq to materialize a vector.
10683 unsigned NumOpnds = Ops.size();
10684
10685 bool IsBoolTy =
10686 cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);
10687
10688 // For svdupq_n_b* the element type of is an integer of type 128/numelts,
10689 // so that the compare can use the width that is natural for the expected
10690 // number of predicate lanes.
10691 llvm::Type *EltTy = Ops[0]->getType();
10692 if (IsBoolTy)
10693 EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);
10694
10696 for (unsigned I = 0; I < NumOpnds; ++I)
10697 VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));
10698 Value *Vec = BuildVector(VecOps);
10699
10700 llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);
10701 Value *InsertSubVec = Builder.CreateInsertVector(
10702 OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));
10703
10704 Function *F =
10705 CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);
10706 Value *DupQLane =
10707 Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});
10708
10709 if (!IsBoolTy)
10710 return DupQLane;
10711
10712 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10713 Value *Pred = EmitSVEAllTruePred(TypeFlags);
10714
10715 // For svdupq_n_b* we need to add an additional 'cmpne' with '0'.
10716 F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne
10717 : Intrinsic::aarch64_sve_cmpne_wide,
10718 OverloadedTy);
10719 Value *Call = Builder.CreateCall(
10720 F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});
10721 return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));
10722 }
10723
10724 case SVE::BI__builtin_sve_svpfalse_b:
10725 return ConstantInt::getFalse(Ty);
10726
10727 case SVE::BI__builtin_sve_svpfalse_c: {
10728 auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);
10729 Function *CastToSVCountF =
10730 CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);
10731 return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));
10732 }
10733
10734 case SVE::BI__builtin_sve_svlen_bf16:
10735 case SVE::BI__builtin_sve_svlen_f16:
10736 case SVE::BI__builtin_sve_svlen_f32:
10737 case SVE::BI__builtin_sve_svlen_f64:
10738 case SVE::BI__builtin_sve_svlen_s8:
10739 case SVE::BI__builtin_sve_svlen_s16:
10740 case SVE::BI__builtin_sve_svlen_s32:
10741 case SVE::BI__builtin_sve_svlen_s64:
10742 case SVE::BI__builtin_sve_svlen_u8:
10743 case SVE::BI__builtin_sve_svlen_u16:
10744 case SVE::BI__builtin_sve_svlen_u32:
10745 case SVE::BI__builtin_sve_svlen_u64: {
10746 SVETypeFlags TF(Builtin->TypeModifier);
10747 auto VTy = cast<llvm::VectorType>(getSVEType(TF));
10748 auto *NumEls =
10749 llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());
10750
10751 Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);
10752 return Builder.CreateMul(NumEls, Builder.CreateCall(F));
10753 }
10754
10755 case SVE::BI__builtin_sve_svtbl2_u8:
10756 case SVE::BI__builtin_sve_svtbl2_s8:
10757 case SVE::BI__builtin_sve_svtbl2_u16:
10758 case SVE::BI__builtin_sve_svtbl2_s16:
10759 case SVE::BI__builtin_sve_svtbl2_u32:
10760 case SVE::BI__builtin_sve_svtbl2_s32:
10761 case SVE::BI__builtin_sve_svtbl2_u64:
10762 case SVE::BI__builtin_sve_svtbl2_s64:
10763 case SVE::BI__builtin_sve_svtbl2_f16:
10764 case SVE::BI__builtin_sve_svtbl2_bf16:
10765 case SVE::BI__builtin_sve_svtbl2_f32:
10766 case SVE::BI__builtin_sve_svtbl2_f64: {
10767 SVETypeFlags TF(Builtin->TypeModifier);
10768 auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));
10769 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
10770 return Builder.CreateCall(F, Ops);
10771 }
10772
10773 case SVE::BI__builtin_sve_svset_neonq_s8:
10774 case SVE::BI__builtin_sve_svset_neonq_s16:
10775 case SVE::BI__builtin_sve_svset_neonq_s32:
10776 case SVE::BI__builtin_sve_svset_neonq_s64:
10777 case SVE::BI__builtin_sve_svset_neonq_u8:
10778 case SVE::BI__builtin_sve_svset_neonq_u16:
10779 case SVE::BI__builtin_sve_svset_neonq_u32:
10780 case SVE::BI__builtin_sve_svset_neonq_u64:
10781 case SVE::BI__builtin_sve_svset_neonq_f16:
10782 case SVE::BI__builtin_sve_svset_neonq_f32:
10783 case SVE::BI__builtin_sve_svset_neonq_f64:
10784 case SVE::BI__builtin_sve_svset_neonq_bf16: {
10785 return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
10786 }
10787
10788 case SVE::BI__builtin_sve_svget_neonq_s8:
10789 case SVE::BI__builtin_sve_svget_neonq_s16:
10790 case SVE::BI__builtin_sve_svget_neonq_s32:
10791 case SVE::BI__builtin_sve_svget_neonq_s64:
10792 case SVE::BI__builtin_sve_svget_neonq_u8:
10793 case SVE::BI__builtin_sve_svget_neonq_u16:
10794 case SVE::BI__builtin_sve_svget_neonq_u32:
10795 case SVE::BI__builtin_sve_svget_neonq_u64:
10796 case SVE::BI__builtin_sve_svget_neonq_f16:
10797 case SVE::BI__builtin_sve_svget_neonq_f32:
10798 case SVE::BI__builtin_sve_svget_neonq_f64:
10799 case SVE::BI__builtin_sve_svget_neonq_bf16: {
10800 return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
10801 }
10802
10803 case SVE::BI__builtin_sve_svdup_neonq_s8:
10804 case SVE::BI__builtin_sve_svdup_neonq_s16:
10805 case SVE::BI__builtin_sve_svdup_neonq_s32:
10806 case SVE::BI__builtin_sve_svdup_neonq_s64:
10807 case SVE::BI__builtin_sve_svdup_neonq_u8:
10808 case SVE::BI__builtin_sve_svdup_neonq_u16:
10809 case SVE::BI__builtin_sve_svdup_neonq_u32:
10810 case SVE::BI__builtin_sve_svdup_neonq_u64:
10811 case SVE::BI__builtin_sve_svdup_neonq_f16:
10812 case SVE::BI__builtin_sve_svdup_neonq_f32:
10813 case SVE::BI__builtin_sve_svdup_neonq_f64:
10814 case SVE::BI__builtin_sve_svdup_neonq_bf16: {
10815 Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],
10816 Builder.getInt64(0));
10817 return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},
10818 {Insert, Builder.getInt64(0)});
10819 }
10820 }
10821
10822 /// Should not happen
10823 return nullptr;
10824}
10825
10826static void swapCommutativeSMEOperands(unsigned BuiltinID,
10828 unsigned MultiVec;
10829 switch (BuiltinID) {
10830 default:
10831 return;
10832 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:
10833 MultiVec = 1;
10834 break;
10835 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:
10836 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:
10837 MultiVec = 2;
10838 break;
10839 case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:
10840 case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:
10841 MultiVec = 4;
10842 break;
10843 }
10844
10845 if (MultiVec > 0)
10846 for (unsigned I = 0; I < MultiVec; ++I)
10847 std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);
10848}
10849
10851 const CallExpr *E) {
10852 auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,
10854
10856 SVETypeFlags TypeFlags(Builtin->TypeModifier);
10857 GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);
10858
10859 if (TypeFlags.isLoad() || TypeFlags.isStore())
10860 return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10861 else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())
10862 return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10863 else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||
10864 BuiltinID == SME::BI__builtin_sme_svzero_za)
10865 return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10866 else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||
10867 BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||
10868 BuiltinID == SME::BI__builtin_sme_svldr_za ||
10869 BuiltinID == SME::BI__builtin_sme_svstr_za)
10870 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
10871
10872 // Handle builtins which require their multi-vector operands to be swapped
10873 swapCommutativeSMEOperands(BuiltinID, Ops);
10874
10875 // Should not happen!
10876 if (Builtin->LLVMIntrinsic == 0)
10877 return nullptr;
10878
10879 // Predicates must match the main datatype.
10880 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
10881 if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
10882 if (PredTy->getElementType()->isIntegerTy(1))
10883 Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
10884
10885 Function *F =
10886 TypeFlags.isOverloadNone()
10887 ? CGM.getIntrinsic(Builtin->LLVMIntrinsic)
10888 : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});
10889 Value *Call = Builder.CreateCall(F, Ops);
10890
10891 return FormSVEBuiltinResult(Call);
10892}
10893
10895 const CallExpr *E,
10896 llvm::Triple::ArchType Arch) {
10897 if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&
10898 BuiltinID <= clang::AArch64::LastSVEBuiltin)
10899 return EmitAArch64SVEBuiltinExpr(BuiltinID, E);
10900
10901 if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&
10902 BuiltinID <= clang::AArch64::LastSMEBuiltin)
10903 return EmitAArch64SMEBuiltinExpr(BuiltinID, E);
10904
10905 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
10906 return EmitAArch64CpuSupports(E);
10907
10908 unsigned HintID = static_cast<unsigned>(-1);
10909 switch (BuiltinID) {
10910 default: break;
10911 case clang::AArch64::BI__builtin_arm_nop:
10912 HintID = 0;
10913 break;
10914 case clang::AArch64::BI__builtin_arm_yield:
10915 case clang::AArch64::BI__yield:
10916 HintID = 1;
10917 break;
10918 case clang::AArch64::BI__builtin_arm_wfe:
10919 case clang::AArch64::BI__wfe:
10920 HintID = 2;
10921 break;
10922 case clang::AArch64::BI__builtin_arm_wfi:
10923 case clang::AArch64::BI__wfi:
10924 HintID = 3;
10925 break;
10926 case clang::AArch64::BI__builtin_arm_sev:
10927 case clang::AArch64::BI__sev:
10928 HintID = 4;
10929 break;
10930 case clang::AArch64::BI__builtin_arm_sevl:
10931 case clang::AArch64::BI__sevl:
10932 HintID = 5;
10933 break;
10934 }
10935
10936 if (HintID != static_cast<unsigned>(-1)) {
10937 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
10938 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
10939 }
10940
10941 if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {
10942 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);
10943 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10944 return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));
10945 }
10946
10947 if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {
10948 // Create call to __arm_sme_state and store the results to the two pointers.
10950 llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},
10951 false),
10952 "__arm_sme_state"));
10953 auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),
10954 "aarch64_pstate_sm_compatible");
10955 CI->setAttributes(Attrs);
10956 CI->setCallingConv(
10957 llvm::CallingConv::
10958 AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);
10959 Builder.CreateStore(Builder.CreateExtractValue(CI, 0),
10960 EmitPointerWithAlignment(E->getArg(0)));
10961 return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),
10962 EmitPointerWithAlignment(E->getArg(1)));
10963 }
10964
10965 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {
10966 assert((getContext().getTypeSize(E->getType()) == 32) &&
10967 "rbit of unusual size!");
10968 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10969 return Builder.CreateCall(
10970 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10971 }
10972 if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {
10973 assert((getContext().getTypeSize(E->getType()) == 64) &&
10974 "rbit of unusual size!");
10975 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10976 return Builder.CreateCall(
10977 CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
10978 }
10979
10980 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||
10981 BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {
10982 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10983 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());
10984 Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
10985 if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)
10986 Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());
10987 return Res;
10988 }
10989
10990 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {
10991 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10992 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,
10993 "cls");
10994 }
10995 if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {
10996 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
10997 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,
10998 "cls");
10999 }
11000
11001 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||
11002 BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {
11003 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11004 llvm::Type *Ty = Arg->getType();
11005 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),
11006 Arg, "frint32z");
11007 }
11008
11009 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||
11010 BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {
11011 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11012 llvm::Type *Ty = Arg->getType();
11013 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),
11014 Arg, "frint64z");
11015 }
11016
11017 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||
11018 BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {
11019 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11020 llvm::Type *Ty = Arg->getType();
11021 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),
11022 Arg, "frint32x");
11023 }
11024
11025 if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||
11026 BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {
11027 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11028 llvm::Type *Ty = Arg->getType();
11029 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),
11030 Arg, "frint64x");
11031 }
11032
11033 if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {
11034 assert((getContext().getTypeSize(E->getType()) == 32) &&
11035 "__jcvt of unusual size!");
11036 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
11037 return Builder.CreateCall(
11038 CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
11039 }
11040
11041 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||
11042 BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||
11043 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||
11044 BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {
11045 llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));
11046 llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));
11047
11048 if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {
11049 // Load from the address via an LLVM intrinsic, receiving a
11050 // tuple of 8 i64 words, and store each one to ValPtr.
11051 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);
11052 llvm::Value *Val = Builder.CreateCall(F, MemAddr);
11053 llvm::Value *ToRet;
11054 for (size_t i = 0; i < 8; i++) {
11055 llvm::Value *ValOffsetPtr =
11056 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11057 Address Addr =
11058 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11059 ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);
11060 }
11061 return ToRet;
11062 } else {
11063 // Load 8 i64 words from ValPtr, and store them to the address
11064 // via an LLVM intrinsic.
11066 Args.push_back(MemAddr);
11067 for (size_t i = 0; i < 8; i++) {
11068 llvm::Value *ValOffsetPtr =
11069 Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));
11070 Address Addr =
11071 Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));
11072 Args.push_back(Builder.CreateLoad(Addr));
11073 }
11074
11075 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b
11076 ? Intrinsic::aarch64_st64b
11077 : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv
11078 ? Intrinsic::aarch64_st64bv
11079 : Intrinsic::aarch64_st64bv0);
11080 Function *F = CGM.getIntrinsic(Intr);
11081 return Builder.CreateCall(F, Args);
11082 }
11083 }
11084
11085 if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||
11086 BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {
11087
11088 auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr
11089 ? Intrinsic::aarch64_rndr
11090 : Intrinsic::aarch64_rndrrs);
11091 Function *F = CGM.getIntrinsic(Intr);
11092 llvm::Value *Val = Builder.CreateCall(F);
11093 Value *RandomValue = Builder.CreateExtractValue(Val, 0);
11094 Value *Status = Builder.CreateExtractValue(Val, 1);
11095
11096 Address MemAddress = EmitPointerWithAlignment(E->getArg(0));
11097 Builder.CreateStore(RandomValue, MemAddress);
11098 Status = Builder.CreateZExt(Status, Int32Ty);
11099 return Status;
11100 }
11101
11102 if (BuiltinID == clang::AArch64::BI__clear_cache) {
11103 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
11104 const FunctionDecl *FD = E->getDirectCallee();
11105 Value *Ops[2];
11106 for (unsigned i = 0; i < 2; i++)
11107 Ops[i] = EmitScalarExpr(E->getArg(i));
11108 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
11109 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
11110 StringRef Name = FD->getName();
11111 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
11112 }
11113
11114 if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11115 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&
11116 getContext().getTypeSize(E->getType()) == 128) {
11117 Function *F =
11118 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11119 ? Intrinsic::aarch64_ldaxp
11120 : Intrinsic::aarch64_ldxp);
11121
11122 Value *LdPtr = EmitScalarExpr(E->getArg(0));
11123 Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");
11124
11125 Value *Val0 = Builder.CreateExtractValue(Val, 1);
11126 Value *Val1 = Builder.CreateExtractValue(Val, 0);
11127 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11128 Val0 = Builder.CreateZExt(Val0, Int128Ty);
11129 Val1 = Builder.CreateZExt(Val1, Int128Ty);
11130
11131 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
11132 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
11133 Val = Builder.CreateOr(Val, Val1);
11134 return Builder.CreateBitCast(Val, ConvertType(E->getType()));
11135 } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||
11136 BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {
11137 Value *LoadAddr = EmitScalarExpr(E->getArg(0));
11138
11139 QualType Ty = E->getType();
11140 llvm::Type *RealResTy = ConvertType(Ty);
11141 llvm::Type *IntTy =
11142 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11143
11144 Function *F =
11145 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex
11146 ? Intrinsic::aarch64_ldaxr
11147 : Intrinsic::aarch64_ldxr,
11148 UnqualPtrTy);
11149 CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
11150 Val->addParamAttr(
11151 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));
11152
11153 if (RealResTy->isPointerTy())
11154 return Builder.CreateIntToPtr(Val, RealResTy);
11155
11156 llvm::Type *IntResTy = llvm::IntegerType::get(
11157 getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
11158 return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),
11159 RealResTy);
11160 }
11161
11162 if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11163 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&
11164 getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
11165 Function *F =
11166 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11167 ? Intrinsic::aarch64_stlxp
11168 : Intrinsic::aarch64_stxp);
11169 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
11170
11171 Address Tmp = CreateMemTemp(E->getArg(0)->getType());
11172 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
11173
11174 Tmp = Tmp.withElementType(STy);
11175 llvm::Value *Val = Builder.CreateLoad(Tmp);
11176
11177 Value *Arg0 = Builder.CreateExtractValue(Val, 0);
11178 Value *Arg1 = Builder.CreateExtractValue(Val, 1);
11179 Value *StPtr = EmitScalarExpr(E->getArg(1));
11180 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
11181 }
11182
11183 if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||
11184 BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {
11185 Value *StoreVal = EmitScalarExpr(E->getArg(0));
11186 Value *StoreAddr = EmitScalarExpr(E->getArg(1));
11187
11188 QualType Ty = E->getArg(0)->getType();
11189 llvm::Type *StoreTy =
11190 llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));
11191
11192 if (StoreVal->getType()->isPointerTy())
11193 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
11194 else {
11195 llvm::Type *IntTy = llvm::IntegerType::get(
11197 CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
11198 StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
11199 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
11200 }
11201
11202 Function *F =
11203 CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex
11204 ? Intrinsic::aarch64_stlxr
11205 : Intrinsic::aarch64_stxr,
11206 StoreAddr->getType());
11207 CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
11208 CI->addParamAttr(
11209 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));
11210 return CI;
11211 }
11212
11213 if (BuiltinID == clang::AArch64::BI__getReg) {
11215 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11216 llvm_unreachable("Sema will ensure that the parameter is constant");
11217
11218 llvm::APSInt Value = Result.Val.getInt();
11219 LLVMContext &Context = CGM.getLLVMContext();
11220 std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);
11221
11222 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
11223 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11224 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11225
11226 llvm::Function *F =
11227 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11228 return Builder.CreateCall(F, Metadata);
11229 }
11230
11231 if (BuiltinID == clang::AArch64::BI__break) {
11233 if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
11234 llvm_unreachable("Sema will ensure that the parameter is constant");
11235
11236 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);
11237 return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11238 }
11239
11240 if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {
11241 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
11242 return Builder.CreateCall(F);
11243 }
11244
11245 if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)
11246 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
11247 llvm::SyncScope::SingleThread);
11248
11249 // CRC32
11250 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
11251 switch (BuiltinID) {
11252 case clang::AArch64::BI__builtin_arm_crc32b:
11253 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
11254 case clang::AArch64::BI__builtin_arm_crc32cb:
11255 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
11256 case clang::AArch64::BI__builtin_arm_crc32h:
11257 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
11258 case clang::AArch64::BI__builtin_arm_crc32ch:
11259 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
11260 case clang::AArch64::BI__builtin_arm_crc32w:
11261 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
11262 case clang::AArch64::BI__builtin_arm_crc32cw:
11263 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
11264 case clang::AArch64::BI__builtin_arm_crc32d:
11265 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
11266 case clang::AArch64::BI__builtin_arm_crc32cd:
11267 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
11268 }
11269
11270 if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
11271 Value *Arg0 = EmitScalarExpr(E->getArg(0));
11272 Value *Arg1 = EmitScalarExpr(E->getArg(1));
11273 Function *F = CGM.getIntrinsic(CRCIntrinsicID);
11274
11275 llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
11276 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
11277
11278 return Builder.CreateCall(F, {Arg0, Arg1});
11279 }
11280
11281 // Memory Operations (MOPS)
11282 if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {
11283 Value *Dst = EmitScalarExpr(E->getArg(0));
11284 Value *Val = EmitScalarExpr(E->getArg(1));
11285 Value *Size = EmitScalarExpr(E->getArg(2));
11286 Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);
11287 Val = Builder.CreateTrunc(Val, Int8Ty);
11288 Size = Builder.CreateIntCast(Size, Int64Ty, false);
11289 return Builder.CreateCall(
11290 CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});
11291 }
11292
11293 // Memory Tagging Extensions (MTE) Intrinsics
11294 Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
11295 switch (BuiltinID) {
11296 case clang::AArch64::BI__builtin_arm_irg:
11297 MTEIntrinsicID = Intrinsic::aarch64_irg; break;
11298 case clang::AArch64::BI__builtin_arm_addg:
11299 MTEIntrinsicID = Intrinsic::aarch64_addg; break;
11300 case clang::AArch64::BI__builtin_arm_gmi:
11301 MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
11302 case clang::AArch64::BI__builtin_arm_ldg:
11303 MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
11304 case clang::AArch64::BI__builtin_arm_stg:
11305 MTEIntrinsicID = Intrinsic::aarch64_stg; break;
11306 case clang::AArch64::BI__builtin_arm_subp:
11307 MTEIntrinsicID = Intrinsic::aarch64_subp; break;
11308 }
11309
11310 if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
11311 llvm::Type *T = ConvertType(E->getType());
11312
11313 if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
11314 Value *Pointer = EmitScalarExpr(E->getArg(0));
11315 Value *Mask = EmitScalarExpr(E->getArg(1));
11316
11317 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11318 Mask = Builder.CreateZExt(Mask, Int64Ty);
11319 Value *RV = Builder.CreateCall(
11320 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
11321 return Builder.CreatePointerCast(RV, T);
11322 }
11323 if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
11324 Value *Pointer = EmitScalarExpr(E->getArg(0));
11325 Value *TagOffset = EmitScalarExpr(E->getArg(1));
11326
11327 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11328 TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
11329 Value *RV = Builder.CreateCall(
11330 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
11331 return Builder.CreatePointerCast(RV, T);
11332 }
11333 if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
11334 Value *Pointer = EmitScalarExpr(E->getArg(0));
11335 Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
11336
11337 ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
11338 Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
11339 return Builder.CreateCall(
11340 CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
11341 }
11342 // Although it is possible to supply a different return
11343 // address (first arg) to this intrinsic, for now we set
11344 // return address same as input address.
11345 if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
11346 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11347 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11348 Value *RV = Builder.CreateCall(
11349 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11350 return Builder.CreatePointerCast(RV, T);
11351 }
11352 // Although it is possible to supply a different tag (to set)
11353 // to this intrinsic (as first arg), for now we supply
11354 // the tag that is in input address arg (common use case).
11355 if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
11356 Value *TagAddress = EmitScalarExpr(E->getArg(0));
11357 TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
11358 return Builder.CreateCall(
11359 CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
11360 }
11361 if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
11362 Value *PointerA = EmitScalarExpr(E->getArg(0));
11363 Value *PointerB = EmitScalarExpr(E->getArg(1));
11364 PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
11365 PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
11366 return Builder.CreateCall(
11367 CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
11368 }
11369 }
11370
11371 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11372 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11373 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11374 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11375 BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||
11376 BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||
11377 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||
11378 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {
11379
11380 SpecialRegisterAccessKind AccessKind = Write;
11381 if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11382 BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||
11383 BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11384 BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)
11385 AccessKind = VolatileRead;
11386
11387 bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||
11388 BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;
11389
11390 bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||
11391 BuiltinID == clang::AArch64::BI__builtin_arm_wsr;
11392
11393 bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||
11394 BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;
11395
11396 llvm::Type *ValueType;
11397 llvm::Type *RegisterType = Int64Ty;
11398 if (Is32Bit) {
11399 ValueType = Int32Ty;
11400 } else if (Is128Bit) {
11401 llvm::Type *Int128Ty =
11402 llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());
11403 ValueType = Int128Ty;
11404 RegisterType = Int128Ty;
11405 } else if (IsPointerBuiltin) {
11406 ValueType = VoidPtrTy;
11407 } else {
11408 ValueType = Int64Ty;
11409 };
11410
11411 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
11412 AccessKind);
11413 }
11414
11415 if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||
11416 BuiltinID == clang::AArch64::BI_WriteStatusReg) {
11417 LLVMContext &Context = CGM.getLLVMContext();
11418
11419 unsigned SysReg =
11420 E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
11421
11422 std::string SysRegStr;
11423 llvm::raw_string_ostream(SysRegStr) <<
11424 ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
11425 ((SysReg >> 11) & 7) << ":" <<
11426 ((SysReg >> 7) & 15) << ":" <<
11427 ((SysReg >> 3) & 15) << ":" <<
11428 ( SysReg & 7);
11429
11430 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
11431 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11432 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11433
11434 llvm::Type *RegisterType = Int64Ty;
11435 llvm::Type *Types[] = { RegisterType };
11436
11437 if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {
11438 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
11439
11440 return Builder.CreateCall(F, Metadata);
11441 }
11442
11443 llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
11444 llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
11445
11446 return Builder.CreateCall(F, { Metadata, ArgValue });
11447 }
11448
11449 if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {
11450 llvm::Function *F =
11451 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
11452 return Builder.CreateCall(F);
11453 }
11454
11455 if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {
11456 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);
11457 return Builder.CreateCall(F);
11458 }
11459
11460 if (BuiltinID == clang::AArch64::BI__mulh ||
11461 BuiltinID == clang::AArch64::BI__umulh) {
11462 llvm::Type *ResType = ConvertType(E->getType());
11463 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
11464
11465 bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;
11466 Value *LHS =
11467 Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
11468 Value *RHS =
11469 Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
11470
11471 Value *MulResult, *HigherBits;
11472 if (IsSigned) {
11473 MulResult = Builder.CreateNSWMul(LHS, RHS);
11474 HigherBits = Builder.CreateAShr(MulResult, 64);
11475 } else {
11476 MulResult = Builder.CreateNUWMul(LHS, RHS);
11477 HigherBits = Builder.CreateLShr(MulResult, 64);
11478 }
11479 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
11480
11481 return HigherBits;
11482 }
11483
11484 if (BuiltinID == AArch64::BI__writex18byte ||
11485 BuiltinID == AArch64::BI__writex18word ||
11486 BuiltinID == AArch64::BI__writex18dword ||
11487 BuiltinID == AArch64::BI__writex18qword) {
11488 // Read x18 as i8*
11489 LLVMContext &Context = CGM.getLLVMContext();
11490 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11491 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11492 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11493 llvm::Function *F =
11494 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11495 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11496 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11497
11498 // Store val at x18 + offset
11499 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11500 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11501 Value *Val = EmitScalarExpr(E->getArg(1));
11502 StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());
11503 return Store;
11504 }
11505
11506 if (BuiltinID == AArch64::BI__readx18byte ||
11507 BuiltinID == AArch64::BI__readx18word ||
11508 BuiltinID == AArch64::BI__readx18dword ||
11509 BuiltinID == AArch64::BI__readx18qword) {
11510 llvm::Type *IntTy = ConvertType(E->getType());
11511
11512 // Read x18 as i8*
11513 LLVMContext &Context = CGM.getLLVMContext();
11514 llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};
11515 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
11516 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
11517 llvm::Function *F =
11518 CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
11519 llvm::Value *X18 = Builder.CreateCall(F, Metadata);
11520 X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);
11521
11522 // Load x18 + offset
11523 Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);
11524 Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);
11525 LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());
11526 return Load;
11527 }
11528
11529 if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
11530 BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
11531 BuiltinID == AArch64::BI_CopyInt32FromFloat ||
11532 BuiltinID == AArch64::BI_CopyInt64FromDouble) {
11533 Value *Arg = EmitScalarExpr(E->getArg(0));
11534 llvm::Type *RetTy = ConvertType(E->getType());
11535 return Builder.CreateBitCast(Arg, RetTy);
11536 }
11537
11538 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11539 BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11540 BuiltinID == AArch64::BI_CountLeadingZeros ||
11541 BuiltinID == AArch64::BI_CountLeadingZeros64) {
11542 Value *Arg = EmitScalarExpr(E->getArg(0));
11543 llvm::Type *ArgType = Arg->getType();
11544
11545 if (BuiltinID == AArch64::BI_CountLeadingOnes ||
11546 BuiltinID == AArch64::BI_CountLeadingOnes64)
11547 Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
11548
11549 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
11550 Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
11551
11552 if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
11553 BuiltinID == AArch64::BI_CountLeadingZeros64)
11554 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11555 return Result;
11556 }
11557
11558 if (BuiltinID == AArch64::BI_CountLeadingSigns ||
11559 BuiltinID == AArch64::BI_CountLeadingSigns64) {
11560 Value *Arg = EmitScalarExpr(E->getArg(0));
11561
11562 Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
11563 ? CGM.getIntrinsic(Intrinsic::aarch64_cls)
11564 : CGM.getIntrinsic(Intrinsic::aarch64_cls64);
11565
11566 Value *Result = Builder.CreateCall(F, Arg, "cls");
11567 if (BuiltinID == AArch64::BI_CountLeadingSigns64)
11568 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11569 return Result;
11570 }
11571
11572 if (BuiltinID == AArch64::BI_CountOneBits ||
11573 BuiltinID == AArch64::BI_CountOneBits64) {
11574 Value *ArgValue = EmitScalarExpr(E->getArg(0));
11575 llvm::Type *ArgType = ArgValue->getType();
11576 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
11577
11578 Value *Result = Builder.CreateCall(F, ArgValue);
11579 if (BuiltinID == AArch64::BI_CountOneBits64)
11580 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11581 return Result;
11582 }
11583
11584 if (BuiltinID == AArch64::BI__prefetch) {
11585 Value *Address = EmitScalarExpr(E->getArg(0));
11586 Value *RW = llvm::ConstantInt::get(Int32Ty, 0);
11587 Value *Locality = ConstantInt::get(Int32Ty, 3);
11588 Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
11589 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
11590 return Builder.CreateCall(F, {Address, RW, Locality, Data});
11591 }
11592
11593 if (BuiltinID == AArch64::BI__hlt) {
11594 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);
11595 Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});
11596
11597 // Return 0 for convenience, even though MSVC returns some other undefined
11598 // value.
11599 return ConstantInt::get(Builder.getInt32Ty(), 0);
11600 }
11601
11602 // Handle MSVC intrinsics before argument evaluation to prevent double
11603 // evaluation.
11604 if (std::optional<MSVCIntrin> MsvcIntId =
11606 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
11607
11608 // Some intrinsics are equivalent - if they are use the base intrinsic ID.
11609 auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {
11610 return P.first == BuiltinID;
11611 });
11612 if (It != end(NEONEquivalentIntrinsicMap))
11613 BuiltinID = It->second;
11614
11615 // Find out if any arguments are required to be integer constant
11616 // expressions.
11617 unsigned ICEArguments = 0;
11619 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
11620 assert(Error == ASTContext::GE_None && "Should not codegen an error");
11621
11623 Address PtrOp0 = Address::invalid();
11624 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
11625 if (i == 0) {
11626 switch (BuiltinID) {
11627 case NEON::BI__builtin_neon_vld1_v:
11628 case NEON::BI__builtin_neon_vld1q_v:
11629 case NEON::BI__builtin_neon_vld1_dup_v:
11630 case NEON::BI__builtin_neon_vld1q_dup_v:
11631 case NEON::BI__builtin_neon_vld1_lane_v:
11632 case NEON::BI__builtin_neon_vld1q_lane_v:
11633 case NEON::BI__builtin_neon_vst1_v:
11634 case NEON::BI__builtin_neon_vst1q_v:
11635 case NEON::BI__builtin_neon_vst1_lane_v:
11636 case NEON::BI__builtin_neon_vst1q_lane_v:
11637 case NEON::BI__builtin_neon_vldap1_lane_s64:
11638 case NEON::BI__builtin_neon_vldap1q_lane_s64:
11639 case NEON::BI__builtin_neon_vstl1_lane_s64:
11640 case NEON::BI__builtin_neon_vstl1q_lane_s64:
11641 // Get the alignment for the argument in addition to the value;
11642 // we'll use it later.
11643 PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
11644 Ops.push_back(PtrOp0.emitRawPointer(*this));
11645 continue;
11646 }
11647 }
11648 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
11649 }
11650
11651 auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
11652 const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
11653 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
11654
11655 if (Builtin) {
11656 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
11657 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
11658 assert(Result && "SISD intrinsic should have been handled");
11659 return Result;
11660 }
11661
11662 const Expr *Arg = E->getArg(E->getNumArgs()-1);
11664 if (std::optional<llvm::APSInt> Result =
11666 // Determine the type of this overloaded NEON intrinsic.
11667 Type = NeonTypeFlags(Result->getZExtValue());
11668
11669 bool usgn = Type.isUnsigned();
11670 bool quad = Type.isQuad();
11671
11672 // Handle non-overloaded intrinsics first.
11673 switch (BuiltinID) {
11674 default: break;
11675 case NEON::BI__builtin_neon_vabsh_f16:
11676 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11677 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
11678 case NEON::BI__builtin_neon_vaddq_p128: {
11679 llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
11680 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11681 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
11682 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
11683 Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);
11684 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11685 return Builder.CreateBitCast(Ops[0], Int128Ty);
11686 }
11687 case NEON::BI__builtin_neon_vldrq_p128: {
11688 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
11689 Value *Ptr = EmitScalarExpr(E->getArg(0));
11690 return Builder.CreateAlignedLoad(Int128Ty, Ptr,
11692 }
11693 case NEON::BI__builtin_neon_vstrq_p128: {
11694 Value *Ptr = Ops[0];
11695 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
11696 }
11697 case NEON::BI__builtin_neon_vcvts_f32_u32:
11698 case NEON::BI__builtin_neon_vcvtd_f64_u64:
11699 usgn = true;
11700 [[fallthrough]];
11701 case NEON::BI__builtin_neon_vcvts_f32_s32:
11702 case NEON::BI__builtin_neon_vcvtd_f64_s64: {
11703 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11704 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
11705 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
11706 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
11707 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11708 if (usgn)
11709 return Builder.CreateUIToFP(Ops[0], FTy);
11710 return Builder.CreateSIToFP(Ops[0], FTy);
11711 }
11712 case NEON::BI__builtin_neon_vcvth_f16_u16:
11713 case NEON::BI__builtin_neon_vcvth_f16_u32:
11714 case NEON::BI__builtin_neon_vcvth_f16_u64:
11715 usgn = true;
11716 [[fallthrough]];
11717 case NEON::BI__builtin_neon_vcvth_f16_s16:
11718 case NEON::BI__builtin_neon_vcvth_f16_s32:
11719 case NEON::BI__builtin_neon_vcvth_f16_s64: {
11720 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11721 llvm::Type *FTy = HalfTy;
11722 llvm::Type *InTy;
11723 if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
11724 InTy = Int64Ty;
11725 else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
11726 InTy = Int32Ty;
11727 else
11728 InTy = Int16Ty;
11729 Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
11730 if (usgn)
11731 return Builder.CreateUIToFP(Ops[0], FTy);
11732 return Builder.CreateSIToFP(Ops[0], FTy);
11733 }
11734 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11735 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11736 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11737 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11738 case NEON::BI__builtin_neon_vcvth_u16_f16:
11739 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11740 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11741 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11742 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11743 case NEON::BI__builtin_neon_vcvth_s16_f16: {
11744 unsigned Int;
11745 llvm::Type* InTy = Int32Ty;
11746 llvm::Type* FTy = HalfTy;
11747 llvm::Type *Tys[2] = {InTy, FTy};
11748 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11749 switch (BuiltinID) {
11750 default: llvm_unreachable("missing builtin ID in switch!");
11751 case NEON::BI__builtin_neon_vcvtah_u16_f16:
11752 Int = Intrinsic::aarch64_neon_fcvtau; break;
11753 case NEON::BI__builtin_neon_vcvtmh_u16_f16:
11754 Int = Intrinsic::aarch64_neon_fcvtmu; break;
11755 case NEON::BI__builtin_neon_vcvtnh_u16_f16:
11756 Int = Intrinsic::aarch64_neon_fcvtnu; break;
11757 case NEON::BI__builtin_neon_vcvtph_u16_f16:
11758 Int = Intrinsic::aarch64_neon_fcvtpu; break;
11759 case NEON::BI__builtin_neon_vcvth_u16_f16:
11760 Int = Intrinsic::aarch64_neon_fcvtzu; break;
11761 case NEON::BI__builtin_neon_vcvtah_s16_f16:
11762 Int = Intrinsic::aarch64_neon_fcvtas; break;
11763 case NEON::BI__builtin_neon_vcvtmh_s16_f16:
11764 Int = Intrinsic::aarch64_neon_fcvtms; break;
11765 case NEON::BI__builtin_neon_vcvtnh_s16_f16:
11766 Int = Intrinsic::aarch64_neon_fcvtns; break;
11767 case NEON::BI__builtin_neon_vcvtph_s16_f16:
11768 Int = Intrinsic::aarch64_neon_fcvtps; break;
11769 case NEON::BI__builtin_neon_vcvth_s16_f16:
11770 Int = Intrinsic::aarch64_neon_fcvtzs; break;
11771 }
11772 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
11773 return Builder.CreateTrunc(Ops[0], Int16Ty);
11774 }
11775 case NEON::BI__builtin_neon_vcaleh_f16:
11776 case NEON::BI__builtin_neon_vcalth_f16:
11777 case NEON::BI__builtin_neon_vcageh_f16:
11778 case NEON::BI__builtin_neon_vcagth_f16: {
11779 unsigned Int;
11780 llvm::Type* InTy = Int32Ty;
11781 llvm::Type* FTy = HalfTy;
11782 llvm::Type *Tys[2] = {InTy, FTy};
11783 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11784 switch (BuiltinID) {
11785 default: llvm_unreachable("missing builtin ID in switch!");
11786 case NEON::BI__builtin_neon_vcageh_f16:
11787 Int = Intrinsic::aarch64_neon_facge; break;
11788 case NEON::BI__builtin_neon_vcagth_f16:
11789 Int = Intrinsic::aarch64_neon_facgt; break;
11790 case NEON::BI__builtin_neon_vcaleh_f16:
11791 Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
11792 case NEON::BI__builtin_neon_vcalth_f16:
11793 Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
11794 }
11795 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
11796 return Builder.CreateTrunc(Ops[0], Int16Ty);
11797 }
11798 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11799 case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
11800 unsigned Int;
11801 llvm::Type* InTy = Int32Ty;
11802 llvm::Type* FTy = HalfTy;
11803 llvm::Type *Tys[2] = {InTy, FTy};
11804 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11805 switch (BuiltinID) {
11806 default: llvm_unreachable("missing builtin ID in switch!");
11807 case NEON::BI__builtin_neon_vcvth_n_s16_f16:
11808 Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
11809 case NEON::BI__builtin_neon_vcvth_n_u16_f16:
11810 Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
11811 }
11812 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11813 return Builder.CreateTrunc(Ops[0], Int16Ty);
11814 }
11815 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11816 case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
11817 unsigned Int;
11818 llvm::Type* FTy = HalfTy;
11819 llvm::Type* InTy = Int32Ty;
11820 llvm::Type *Tys[2] = {FTy, InTy};
11821 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11822 switch (BuiltinID) {
11823 default: llvm_unreachable("missing builtin ID in switch!");
11824 case NEON::BI__builtin_neon_vcvth_n_f16_s16:
11825 Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
11826 Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
11827 break;
11828 case NEON::BI__builtin_neon_vcvth_n_f16_u16:
11829 Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
11830 Ops[0] = Builder.CreateZExt(Ops[0], InTy);
11831 break;
11832 }
11833 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
11834 }
11835 case NEON::BI__builtin_neon_vpaddd_s64: {
11836 auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
11837 Value *Vec = EmitScalarExpr(E->getArg(0));
11838 // The vector is v2f64, so make sure it's bitcast to that.
11839 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
11840 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11841 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11842 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11843 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11844 // Pairwise addition of a v2f64 into a scalar f64.
11845 return Builder.CreateAdd(Op0, Op1, "vpaddd");
11846 }
11847 case NEON::BI__builtin_neon_vpaddd_f64: {
11848 auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
11849 Value *Vec = EmitScalarExpr(E->getArg(0));
11850 // The vector is v2f64, so make sure it's bitcast to that.
11851 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
11852 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11853 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11854 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11855 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11856 // Pairwise addition of a v2f64 into a scalar f64.
11857 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11858 }
11859 case NEON::BI__builtin_neon_vpadds_f32: {
11860 auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
11861 Value *Vec = EmitScalarExpr(E->getArg(0));
11862 // The vector is v2f32, so make sure it's bitcast to that.
11863 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
11864 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
11865 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
11866 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
11867 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
11868 // Pairwise addition of a v2f32 into a scalar f32.
11869 return Builder.CreateFAdd(Op0, Op1, "vpaddd");
11870 }
11871 case NEON::BI__builtin_neon_vceqzd_s64:
11872 case NEON::BI__builtin_neon_vceqzd_f64:
11873 case NEON::BI__builtin_neon_vceqzs_f32:
11874 case NEON::BI__builtin_neon_vceqzh_f16:
11875 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11877 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11878 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
11879 case NEON::BI__builtin_neon_vcgezd_s64:
11880 case NEON::BI__builtin_neon_vcgezd_f64:
11881 case NEON::BI__builtin_neon_vcgezs_f32:
11882 case NEON::BI__builtin_neon_vcgezh_f16:
11883 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11885 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11886 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
11887 case NEON::BI__builtin_neon_vclezd_s64:
11888 case NEON::BI__builtin_neon_vclezd_f64:
11889 case NEON::BI__builtin_neon_vclezs_f32:
11890 case NEON::BI__builtin_neon_vclezh_f16:
11891 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11893 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11894 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
11895 case NEON::BI__builtin_neon_vcgtzd_s64:
11896 case NEON::BI__builtin_neon_vcgtzd_f64:
11897 case NEON::BI__builtin_neon_vcgtzs_f32:
11898 case NEON::BI__builtin_neon_vcgtzh_f16:
11899 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11901 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11902 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
11903 case NEON::BI__builtin_neon_vcltzd_s64:
11904 case NEON::BI__builtin_neon_vcltzd_f64:
11905 case NEON::BI__builtin_neon_vcltzs_f32:
11906 case NEON::BI__builtin_neon_vcltzh_f16:
11907 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11909 Ops[0], ConvertType(E->getCallReturnType(getContext())),
11910 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
11911
11912 case NEON::BI__builtin_neon_vceqzd_u64: {
11913 Ops.push_back(EmitScalarExpr(E->getArg(0)));
11914 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
11915 Ops[0] =
11916 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
11917 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
11918 }
11919 case NEON::BI__builtin_neon_vceqd_f64:
11920 case NEON::BI__builtin_neon_vcled_f64:
11921 case NEON::BI__builtin_neon_vcltd_f64:
11922 case NEON::BI__builtin_neon_vcged_f64:
11923 case NEON::BI__builtin_neon_vcgtd_f64: {
11924 llvm::CmpInst::Predicate P;
11925 switch (BuiltinID) {
11926 default: llvm_unreachable("missing builtin ID in switch!");
11927 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
11928 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
11929 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
11930 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
11931 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
11932 }
11933 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11934 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
11935 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
11936 if (P == llvm::FCmpInst::FCMP_OEQ)
11937 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11938 else
11939 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11940 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
11941 }
11942 case NEON::BI__builtin_neon_vceqs_f32:
11943 case NEON::BI__builtin_neon_vcles_f32:
11944 case NEON::BI__builtin_neon_vclts_f32:
11945 case NEON::BI__builtin_neon_vcges_f32:
11946 case NEON::BI__builtin_neon_vcgts_f32: {
11947 llvm::CmpInst::Predicate P;
11948 switch (BuiltinID) {
11949 default: llvm_unreachable("missing builtin ID in switch!");
11950 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
11951 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
11952 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
11953 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
11954 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
11955 }
11956 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11957 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
11958 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
11959 if (P == llvm::FCmpInst::FCMP_OEQ)
11960 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11961 else
11962 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11963 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
11964 }
11965 case NEON::BI__builtin_neon_vceqh_f16:
11966 case NEON::BI__builtin_neon_vcleh_f16:
11967 case NEON::BI__builtin_neon_vclth_f16:
11968 case NEON::BI__builtin_neon_vcgeh_f16:
11969 case NEON::BI__builtin_neon_vcgth_f16: {
11970 llvm::CmpInst::Predicate P;
11971 switch (BuiltinID) {
11972 default: llvm_unreachable("missing builtin ID in switch!");
11973 case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
11974 case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
11975 case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
11976 case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
11977 case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
11978 }
11979 Ops.push_back(EmitScalarExpr(E->getArg(1)));
11980 Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
11981 Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
11982 if (P == llvm::FCmpInst::FCMP_OEQ)
11983 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
11984 else
11985 Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);
11986 return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
11987 }
11988 case NEON::BI__builtin_neon_vceqd_s64:
11989 case NEON::BI__builtin_neon_vceqd_u64:
11990 case NEON::BI__builtin_neon_vcgtd_s64:
11991 case NEON::BI__builtin_neon_vcgtd_u64:
11992 case NEON::BI__builtin_neon_vcltd_s64:
11993 case NEON::BI__builtin_neon_vcltd_u64:
11994 case NEON::BI__builtin_neon_vcged_u64:
11995 case NEON::BI__builtin_neon_vcged_s64:
11996 case NEON::BI__builtin_neon_vcled_u64:
11997 case NEON::BI__builtin_neon_vcled_s64: {
11998 llvm::CmpInst::Predicate P;
11999 switch (BuiltinID) {
12000 default: llvm_unreachable("missing builtin ID in switch!");
12001 case NEON::BI__builtin_neon_vceqd_s64:
12002 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
12003 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
12004 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
12005 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
12006 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
12007 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
12008 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
12009 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
12010 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
12011 }
12012 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12013 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12014 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12015 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
12016 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
12017 }
12018 case NEON::BI__builtin_neon_vtstd_s64:
12019 case NEON::BI__builtin_neon_vtstd_u64: {
12020 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12021 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
12022 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12023 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
12024 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
12025 llvm::Constant::getNullValue(Int64Ty));
12026 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
12027 }
12028 case NEON::BI__builtin_neon_vset_lane_i8:
12029 case NEON::BI__builtin_neon_vset_lane_i16:
12030 case NEON::BI__builtin_neon_vset_lane_i32:
12031 case NEON::BI__builtin_neon_vset_lane_i64:
12032 case NEON::BI__builtin_neon_vset_lane_bf16:
12033 case NEON::BI__builtin_neon_vset_lane_f32:
12034 case NEON::BI__builtin_neon_vsetq_lane_i8:
12035 case NEON::BI__builtin_neon_vsetq_lane_i16:
12036 case NEON::BI__builtin_neon_vsetq_lane_i32:
12037 case NEON::BI__builtin_neon_vsetq_lane_i64:
12038 case NEON::BI__builtin_neon_vsetq_lane_bf16:
12039 case NEON::BI__builtin_neon_vsetq_lane_f32:
12040 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12041 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12042 case NEON::BI__builtin_neon_vset_lane_f64:
12043 // The vector type needs a cast for the v1f64 variant.
12044 Ops[1] =
12045 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
12046 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12047 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12048 case NEON::BI__builtin_neon_vsetq_lane_f64:
12049 // The vector type needs a cast for the v2f64 variant.
12050 Ops[1] =
12051 Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
12052 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12053 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
12054
12055 case NEON::BI__builtin_neon_vget_lane_i8:
12056 case NEON::BI__builtin_neon_vdupb_lane_i8:
12057 Ops[0] =
12058 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
12059 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12060 "vget_lane");
12061 case NEON::BI__builtin_neon_vgetq_lane_i8:
12062 case NEON::BI__builtin_neon_vdupb_laneq_i8:
12063 Ops[0] =
12064 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
12065 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12066 "vgetq_lane");
12067 case NEON::BI__builtin_neon_vget_lane_i16:
12068 case NEON::BI__builtin_neon_vduph_lane_i16:
12069 Ops[0] =
12070 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
12071 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12072 "vget_lane");
12073 case NEON::BI__builtin_neon_vgetq_lane_i16:
12074 case NEON::BI__builtin_neon_vduph_laneq_i16:
12075 Ops[0] =
12076 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
12077 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12078 "vgetq_lane");
12079 case NEON::BI__builtin_neon_vget_lane_i32:
12080 case NEON::BI__builtin_neon_vdups_lane_i32:
12081 Ops[0] =
12082 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
12083 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12084 "vget_lane");
12085 case NEON::BI__builtin_neon_vdups_lane_f32:
12086 Ops[0] =
12087 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12088 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12089 "vdups_lane");
12090 case NEON::BI__builtin_neon_vgetq_lane_i32:
12091 case NEON::BI__builtin_neon_vdups_laneq_i32:
12092 Ops[0] =
12093 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
12094 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12095 "vgetq_lane");
12096 case NEON::BI__builtin_neon_vget_lane_i64:
12097 case NEON::BI__builtin_neon_vdupd_lane_i64:
12098 Ops[0] =
12099 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
12100 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12101 "vget_lane");
12102 case NEON::BI__builtin_neon_vdupd_lane_f64:
12103 Ops[0] =
12104 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12105 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12106 "vdupd_lane");
12107 case NEON::BI__builtin_neon_vgetq_lane_i64:
12108 case NEON::BI__builtin_neon_vdupd_laneq_i64:
12109 Ops[0] =
12110 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
12111 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12112 "vgetq_lane");
12113 case NEON::BI__builtin_neon_vget_lane_f32:
12114 Ops[0] =
12115 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
12116 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12117 "vget_lane");
12118 case NEON::BI__builtin_neon_vget_lane_f64:
12119 Ops[0] =
12120 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
12121 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12122 "vget_lane");
12123 case NEON::BI__builtin_neon_vgetq_lane_f32:
12124 case NEON::BI__builtin_neon_vdups_laneq_f32:
12125 Ops[0] =
12126 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
12127 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12128 "vgetq_lane");
12129 case NEON::BI__builtin_neon_vgetq_lane_f64:
12130 case NEON::BI__builtin_neon_vdupd_laneq_f64:
12131 Ops[0] =
12132 Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
12133 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12134 "vgetq_lane");
12135 case NEON::BI__builtin_neon_vaddh_f16:
12136 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12137 return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
12138 case NEON::BI__builtin_neon_vsubh_f16:
12139 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12140 return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
12141 case NEON::BI__builtin_neon_vmulh_f16:
12142 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12143 return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
12144 case NEON::BI__builtin_neon_vdivh_f16:
12145 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12146 return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
12147 case NEON::BI__builtin_neon_vfmah_f16:
12148 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12150 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12151 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
12152 case NEON::BI__builtin_neon_vfmsh_f16: {
12153 Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
12154
12155 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
12157 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
12158 {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
12159 }
12160 case NEON::BI__builtin_neon_vaddd_s64:
12161 case NEON::BI__builtin_neon_vaddd_u64:
12162 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
12163 case NEON::BI__builtin_neon_vsubd_s64:
12164 case NEON::BI__builtin_neon_vsubd_u64:
12165 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
12166 case NEON::BI__builtin_neon_vqdmlalh_s16:
12167 case NEON::BI__builtin_neon_vqdmlslh_s16: {
12168 SmallVector<Value *, 2> ProductOps;
12169 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12170 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
12171 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12172 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12173 ProductOps, "vqdmlXl");
12174 Constant *CI = ConstantInt::get(SizeTy, 0);
12175 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12176
12177 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
12178 ? Intrinsic::aarch64_neon_sqadd
12179 : Intrinsic::aarch64_neon_sqsub;
12180 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
12181 }
12182 case NEON::BI__builtin_neon_vqshlud_n_s64: {
12183 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12184 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12185 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
12186 Ops, "vqshlu_n");
12187 }
12188 case NEON::BI__builtin_neon_vqshld_n_u64:
12189 case NEON::BI__builtin_neon_vqshld_n_s64: {
12190 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
12191 ? Intrinsic::aarch64_neon_uqshl
12192 : Intrinsic::aarch64_neon_sqshl;
12193 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12194 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
12195 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
12196 }
12197 case NEON::BI__builtin_neon_vrshrd_n_u64:
12198 case NEON::BI__builtin_neon_vrshrd_n_s64: {
12199 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
12200 ? Intrinsic::aarch64_neon_urshl
12201 : Intrinsic::aarch64_neon_srshl;
12202 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12203 int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
12204 Ops[1] = ConstantInt::get(Int64Ty, -SV);
12205 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
12206 }
12207 case NEON::BI__builtin_neon_vrsrad_n_u64:
12208 case NEON::BI__builtin_neon_vrsrad_n_s64: {
12209 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
12210 ? Intrinsic::aarch64_neon_urshl
12211 : Intrinsic::aarch64_neon_srshl;
12212 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
12213 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
12214 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
12215 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
12216 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
12217 }
12218 case NEON::BI__builtin_neon_vshld_n_s64:
12219 case NEON::BI__builtin_neon_vshld_n_u64: {
12220 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12221 return Builder.CreateShl(
12222 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
12223 }
12224 case NEON::BI__builtin_neon_vshrd_n_s64: {
12225 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12226 return Builder.CreateAShr(
12227 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12228 Amt->getZExtValue())),
12229 "shrd_n");
12230 }
12231 case NEON::BI__builtin_neon_vshrd_n_u64: {
12232 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
12233 uint64_t ShiftAmt = Amt->getZExtValue();
12234 // Right-shifting an unsigned value by its size yields 0.
12235 if (ShiftAmt == 64)
12236 return ConstantInt::get(Int64Ty, 0);
12237 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
12238 "shrd_n");
12239 }
12240 case NEON::BI__builtin_neon_vsrad_n_s64: {
12241 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12242 Ops[1] = Builder.CreateAShr(
12243 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
12244 Amt->getZExtValue())),
12245 "shrd_n");
12246 return Builder.CreateAdd(Ops[0], Ops[1]);
12247 }
12248 case NEON::BI__builtin_neon_vsrad_n_u64: {
12249 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
12250 uint64_t ShiftAmt = Amt->getZExtValue();
12251 // Right-shifting an unsigned value by its size yields 0.
12252 // As Op + 0 = Op, return Ops[0] directly.
12253 if (ShiftAmt == 64)
12254 return Ops[0];
12255 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
12256 "shrd_n");
12257 return Builder.CreateAdd(Ops[0], Ops[1]);
12258 }
12259 case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
12260 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
12261 case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
12262 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
12263 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12264 "lane");
12265 SmallVector<Value *, 2> ProductOps;
12266 ProductOps.push_back(vectorWrapScalar16(Ops[1]));
12267 ProductOps.push_back(vectorWrapScalar16(Ops[2]));
12268 auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
12269 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
12270 ProductOps, "vqdmlXl");
12271 Constant *CI = ConstantInt::get(SizeTy, 0);
12272 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
12273 Ops.pop_back();
12274
12275 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
12276 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
12277 ? Intrinsic::aarch64_neon_sqadd
12278 : Intrinsic::aarch64_neon_sqsub;
12279 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
12280 }
12281 case NEON::BI__builtin_neon_vqdmlals_s32:
12282 case NEON::BI__builtin_neon_vqdmlsls_s32: {
12283 SmallVector<Value *, 2> ProductOps;
12284 ProductOps.push_back(Ops[1]);
12285 ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
12286 Ops[1] =
12287 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12288 ProductOps, "vqdmlXl");
12289
12290 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
12291 ? Intrinsic::aarch64_neon_sqadd
12292 : Intrinsic::aarch64_neon_sqsub;
12293 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
12294 }
12295 case NEON::BI__builtin_neon_vqdmlals_lane_s32:
12296 case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
12297 case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
12298 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
12299 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
12300 "lane");
12301 SmallVector<Value *, 2> ProductOps;
12302 ProductOps.push_back(Ops[1]);
12303 ProductOps.push_back(Ops[2]);
12304 Ops[1] =
12305 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
12306 ProductOps, "vqdmlXl");
12307 Ops.pop_back();
12308
12309 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
12310 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
12311 ? Intrinsic::aarch64_neon_sqadd
12312 : Intrinsic::aarch64_neon_sqsub;
12313 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
12314 }
12315 case NEON::BI__builtin_neon_vget_lane_bf16:
12316 case NEON::BI__builtin_neon_vduph_lane_bf16:
12317 case NEON::BI__builtin_neon_vduph_lane_f16: {
12318 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12319 "vget_lane");
12320 }
12321 case NEON::BI__builtin_neon_vgetq_lane_bf16:
12322 case NEON::BI__builtin_neon_vduph_laneq_bf16:
12323 case NEON::BI__builtin_neon_vduph_laneq_f16: {
12324 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
12325 "vgetq_lane");
12326 }
12327
12328 case clang::AArch64::BI_InterlockedAdd:
12329 case clang::AArch64::BI_InterlockedAdd64: {
12330 Address DestAddr = CheckAtomicAlignment(*this, E);
12331 Value *Val = EmitScalarExpr(E->getArg(1));
12332 AtomicRMWInst *RMWI =
12333 Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,
12334 llvm::AtomicOrdering::SequentiallyConsistent);
12335 return Builder.CreateAdd(RMWI, Val);
12336 }
12337 }
12338
12339 llvm::FixedVectorType *VTy = GetNeonType(this, Type);
12340 llvm::Type *Ty = VTy;
12341 if (!Ty)
12342 return nullptr;
12343
12344 // Not all intrinsics handled by the common case work for AArch64 yet, so only
12345 // defer to common code if it's been added to our special map.
12348
12349 if (Builtin)
12351 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
12352 Builtin->NameHint, Builtin->TypeModifier, E, Ops,
12353 /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
12354
12355 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
12356 return V;
12357
12358 unsigned Int;
12359 switch (BuiltinID) {
12360 default: return nullptr;
12361 case NEON::BI__builtin_neon_vbsl_v:
12362 case NEON::BI__builtin_neon_vbslq_v: {
12363 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
12364 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
12365 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
12366 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
12367
12368 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
12369 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
12370 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
12371 return Builder.CreateBitCast(Ops[0], Ty);
12372 }
12373 case NEON::BI__builtin_neon_vfma_lane_v:
12374 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
12375 // The ARM builtins (and instructions) have the addend as the first
12376 // operand, but the 'fma' intrinsics have it last. Swap it around here.
12377 Value *Addend = Ops[0];
12378 Value *Multiplicand = Ops[1];
12379 Value *LaneSource = Ops[2];
12380 Ops[0] = Multiplicand;
12381 Ops[1] = LaneSource;
12382 Ops[2] = Addend;
12383
12384 // Now adjust things to handle the lane access.
12385 auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
12386 ? llvm::FixedVectorType::get(VTy->getElementType(),
12387 VTy->getNumElements() / 2)
12388 : VTy;
12389 llvm::Constant *cst = cast<Constant>(Ops[3]);
12390 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
12391 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
12392 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
12393
12394 Ops.pop_back();
12395 Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma
12396 : Intrinsic::fma;
12397 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
12398 }
12399 case NEON::BI__builtin_neon_vfma_laneq_v: {
12400 auto *VTy = cast<llvm::FixedVectorType>(Ty);
12401 // v1f64 fma should be mapped to Neon scalar f64 fma
12402 if (VTy && VTy->getElementType() == DoubleTy) {
12403 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12404 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
12405 llvm::FixedVectorType *VTy =
12407 Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
12408 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12409 Value *Result;
12411 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,
12412 DoubleTy, {Ops[1], Ops[2], Ops[0]});
12413 return Builder.CreateBitCast(Result, Ty);
12414 }
12415 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12416 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12417
12418 auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
12419 VTy->getNumElements() * 2);
12420 Ops[2] = Builder.CreateBitCast(Ops[2], STy);
12421 Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
12422 cast<ConstantInt>(Ops[3]));
12423 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
12424
12426 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12427 {Ops[2], Ops[1], Ops[0]});
12428 }
12429 case NEON::BI__builtin_neon_vfmaq_laneq_v: {
12430 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12431 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
12432
12433 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
12434 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
12436 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12437 {Ops[2], Ops[1], Ops[0]});
12438 }
12439 case NEON::BI__builtin_neon_vfmah_lane_f16:
12440 case NEON::BI__builtin_neon_vfmas_lane_f32:
12441 case NEON::BI__builtin_neon_vfmah_laneq_f16:
12442 case NEON::BI__builtin_neon_vfmas_laneq_f32:
12443 case NEON::BI__builtin_neon_vfmad_lane_f64:
12444 case NEON::BI__builtin_neon_vfmad_laneq_f64: {
12445 Ops.push_back(EmitScalarExpr(E->getArg(3)));
12446 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
12447 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
12449 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,
12450 {Ops[1], Ops[2], Ops[0]});
12451 }
12452 case NEON::BI__builtin_neon_vmull_v:
12453 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12454 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
12455 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
12456 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
12457 case NEON::BI__builtin_neon_vmax_v:
12458 case NEON::BI__builtin_neon_vmaxq_v:
12459 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12460 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
12461 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
12462 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
12463 case NEON::BI__builtin_neon_vmaxh_f16: {
12464 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12465 Int = Intrinsic::aarch64_neon_fmax;
12466 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
12467 }
12468 case NEON::BI__builtin_neon_vmin_v:
12469 case NEON::BI__builtin_neon_vminq_v:
12470 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12471 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
12472 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
12473 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
12474 case NEON::BI__builtin_neon_vminh_f16: {
12475 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12476 Int = Intrinsic::aarch64_neon_fmin;
12477 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
12478 }
12479 case NEON::BI__builtin_neon_vabd_v:
12480 case NEON::BI__builtin_neon_vabdq_v:
12481 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12482 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
12483 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
12484 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
12485 case NEON::BI__builtin_neon_vpadal_v:
12486 case NEON::BI__builtin_neon_vpadalq_v: {
12487 unsigned ArgElts = VTy->getNumElements();
12488 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
12489 unsigned BitWidth = EltTy->getBitWidth();
12490 auto *ArgTy = llvm::FixedVectorType::get(
12491 llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
12492 llvm::Type* Tys[2] = { VTy, ArgTy };
12493 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
12495 TmpOps.push_back(Ops[1]);
12496 Function *F = CGM.getIntrinsic(Int, Tys);
12497 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
12498 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
12499 return Builder.CreateAdd(tmp, addend);
12500 }
12501 case NEON::BI__builtin_neon_vpmin_v:
12502 case NEON::BI__builtin_neon_vpminq_v:
12503 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12504 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
12505 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
12506 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
12507 case NEON::BI__builtin_neon_vpmax_v:
12508 case NEON::BI__builtin_neon_vpmaxq_v:
12509 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
12510 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
12511 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
12512 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
12513 case NEON::BI__builtin_neon_vminnm_v:
12514 case NEON::BI__builtin_neon_vminnmq_v:
12515 Int = Intrinsic::aarch64_neon_fminnm;
12516 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
12517 case NEON::BI__builtin_neon_vminnmh_f16:
12518 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12519 Int = Intrinsic::aarch64_neon_fminnm;
12520 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
12521 case NEON::BI__builtin_neon_vmaxnm_v:
12522 case NEON::BI__builtin_neon_vmaxnmq_v:
12523 Int = Intrinsic::aarch64_neon_fmaxnm;
12524 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
12525 case NEON::BI__builtin_neon_vmaxnmh_f16:
12526 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12527 Int = Intrinsic::aarch64_neon_fmaxnm;
12528 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
12529 case NEON::BI__builtin_neon_vrecpss_f32: {
12530 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12531 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
12532 Ops, "vrecps");
12533 }
12534 case NEON::BI__builtin_neon_vrecpsd_f64:
12535 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12536 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
12537 Ops, "vrecps");
12538 case NEON::BI__builtin_neon_vrecpsh_f16:
12539 Ops.push_back(EmitScalarExpr(E->getArg(1)));
12540 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
12541 Ops, "vrecps");
12542 case NEON::BI__builtin_neon_vqshrun_n_v:
12543 Int = Intrinsic::aarch64_neon_sqshrun;
12544 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
12545 case NEON::BI__builtin_neon_vqrshrun_n_v:
12546 Int = Intrinsic::aarch64_neon_sqrshrun;
12547 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
12548 case NEON::BI__builtin_neon_vqshrn_n_v:
12549 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
12550 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
12551 case NEON::BI__builtin_neon_vrshrn_n_v:
12552 Int = Intrinsic::aarch64_neon_rshrn;
12553 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
12554 case NEON::BI__builtin_neon_vqrshrn_n_v:
12555 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
12556 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
12557 case NEON::BI__builtin_neon_vrndah_f16: {
12558 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12559 Int = Builder.getIsFPConstrained()
12560 ? Intrinsic::experimental_constrained_round
12561 : Intrinsic::round;
12562 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
12563 }
12564 case NEON::BI__builtin_neon_vrnda_v:
12565 case NEON::BI__builtin_neon_vrndaq_v: {
12566 Int = Builder.getIsFPConstrained()
12567 ? Intrinsic::experimental_constrained_round
12568 : Intrinsic::round;
12569 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
12570 }
12571 case NEON::BI__builtin_neon_vrndih_f16: {
12572 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12573 Int = Builder.getIsFPConstrained()
12574 ? Intrinsic::experimental_constrained_nearbyint
12575 : Intrinsic::nearbyint;
12576 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
12577 }
12578 case NEON::BI__builtin_neon_vrndmh_f16: {
12579 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12580 Int = Builder.getIsFPConstrained()
12581 ? Intrinsic::experimental_constrained_floor
12582 : Intrinsic::floor;
12583 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
12584 }
12585 case NEON::BI__builtin_neon_vrndm_v:
12586 case NEON::BI__builtin_neon_vrndmq_v: {
12587 Int = Builder.getIsFPConstrained()
12588 ? Intrinsic::experimental_constrained_floor
12589 : Intrinsic::floor;
12590 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
12591 }
12592 case NEON::BI__builtin_neon_vrndnh_f16: {
12593 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12594 Int = Builder.getIsFPConstrained()
12595 ? Intrinsic::experimental_constrained_roundeven
12596 : Intrinsic::roundeven;
12597 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
12598 }
12599 case NEON::BI__builtin_neon_vrndn_v:
12600 case NEON::BI__builtin_neon_vrndnq_v: {
12601 Int = Builder.getIsFPConstrained()
12602 ? Intrinsic::experimental_constrained_roundeven
12603 : Intrinsic::roundeven;
12604 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
12605 }
12606 case NEON::BI__builtin_neon_vrndns_f32: {
12607 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12608 Int = Builder.getIsFPConstrained()
12609 ? Intrinsic::experimental_constrained_roundeven
12610 : Intrinsic::roundeven;
12611 return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
12612 }
12613 case NEON::BI__builtin_neon_vrndph_f16: {
12614 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12615 Int = Builder.getIsFPConstrained()
12616 ? Intrinsic::experimental_constrained_ceil
12617 : Intrinsic::ceil;
12618 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
12619 }
12620 case NEON::BI__builtin_neon_vrndp_v:
12621 case NEON::BI__builtin_neon_vrndpq_v: {
12622 Int = Builder.getIsFPConstrained()
12623 ? Intrinsic::experimental_constrained_ceil
12624 : Intrinsic::ceil;
12625 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
12626 }
12627 case NEON::BI__builtin_neon_vrndxh_f16: {
12628 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12629 Int = Builder.getIsFPConstrained()
12630 ? Intrinsic::experimental_constrained_rint
12631 : Intrinsic::rint;
12632 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
12633 }
12634 case NEON::BI__builtin_neon_vrndx_v:
12635 case NEON::BI__builtin_neon_vrndxq_v: {
12636 Int = Builder.getIsFPConstrained()
12637 ? Intrinsic::experimental_constrained_rint
12638 : Intrinsic::rint;
12639 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
12640 }
12641 case NEON::BI__builtin_neon_vrndh_f16: {
12642 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12643 Int = Builder.getIsFPConstrained()
12644 ? Intrinsic::experimental_constrained_trunc
12645 : Intrinsic::trunc;
12646 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
12647 }
12648 case NEON::BI__builtin_neon_vrnd32x_f32:
12649 case NEON::BI__builtin_neon_vrnd32xq_f32:
12650 case NEON::BI__builtin_neon_vrnd32x_f64:
12651 case NEON::BI__builtin_neon_vrnd32xq_f64: {
12652 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12653 Int = Intrinsic::aarch64_neon_frint32x;
12654 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
12655 }
12656 case NEON::BI__builtin_neon_vrnd32z_f32:
12657 case NEON::BI__builtin_neon_vrnd32zq_f32:
12658 case NEON::BI__builtin_neon_vrnd32z_f64:
12659 case NEON::BI__builtin_neon_vrnd32zq_f64: {
12660 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12661 Int = Intrinsic::aarch64_neon_frint32z;
12662 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
12663 }
12664 case NEON::BI__builtin_neon_vrnd64x_f32:
12665 case NEON::BI__builtin_neon_vrnd64xq_f32:
12666 case NEON::BI__builtin_neon_vrnd64x_f64:
12667 case NEON::BI__builtin_neon_vrnd64xq_f64: {
12668 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12669 Int = Intrinsic::aarch64_neon_frint64x;
12670 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
12671 }
12672 case NEON::BI__builtin_neon_vrnd64z_f32:
12673 case NEON::BI__builtin_neon_vrnd64zq_f32:
12674 case NEON::BI__builtin_neon_vrnd64z_f64:
12675 case NEON::BI__builtin_neon_vrnd64zq_f64: {
12676 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12677 Int = Intrinsic::aarch64_neon_frint64z;
12678 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
12679 }
12680 case NEON::BI__builtin_neon_vrnd_v:
12681 case NEON::BI__builtin_neon_vrndq_v: {
12682 Int = Builder.getIsFPConstrained()
12683 ? Intrinsic::experimental_constrained_trunc
12684 : Intrinsic::trunc;
12685 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
12686 }
12687 case NEON::BI__builtin_neon_vcvt_f64_v:
12688 case NEON::BI__builtin_neon_vcvtq_f64_v:
12689 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12690 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
12691 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
12692 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
12693 case NEON::BI__builtin_neon_vcvt_f64_f32: {
12694 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
12695 "unexpected vcvt_f64_f32 builtin");
12696 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
12697 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12698
12699 return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
12700 }
12701 case NEON::BI__builtin_neon_vcvt_f32_f64: {
12702 assert(Type.getEltType() == NeonTypeFlags::Float32 &&
12703 "unexpected vcvt_f32_f64 builtin");
12704 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
12705 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
12706
12707 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
12708 }
12709 case NEON::BI__builtin_neon_vcvt_s32_v:
12710 case NEON::BI__builtin_neon_vcvt_u32_v:
12711 case NEON::BI__builtin_neon_vcvt_s64_v:
12712 case NEON::BI__builtin_neon_vcvt_u64_v:
12713 case NEON::BI__builtin_neon_vcvt_s16_f16:
12714 case NEON::BI__builtin_neon_vcvt_u16_f16:
12715 case NEON::BI__builtin_neon_vcvtq_s32_v:
12716 case NEON::BI__builtin_neon_vcvtq_u32_v:
12717 case NEON::BI__builtin_neon_vcvtq_s64_v:
12718 case NEON::BI__builtin_neon_vcvtq_u64_v:
12719 case NEON::BI__builtin_neon_vcvtq_s16_f16:
12720 case NEON::BI__builtin_neon_vcvtq_u16_f16: {
12721 Int =
12722 usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
12723 llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
12724 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
12725 }
12726 case NEON::BI__builtin_neon_vcvta_s16_f16:
12727 case NEON::BI__builtin_neon_vcvta_u16_f16:
12728 case NEON::BI__builtin_neon_vcvta_s32_v:
12729 case NEON::BI__builtin_neon_vcvtaq_s16_f16:
12730 case NEON::BI__builtin_neon_vcvtaq_s32_v:
12731 case NEON::BI__builtin_neon_vcvta_u32_v:
12732 case NEON::BI__builtin_neon_vcvtaq_u16_f16:
12733 case NEON::BI__builtin_neon_vcvtaq_u32_v:
12734 case NEON::BI__builtin_neon_vcvta_s64_v:
12735 case NEON::BI__builtin_neon_vcvtaq_s64_v:
12736 case NEON::BI__builtin_neon_vcvta_u64_v:
12737 case NEON::BI__builtin_neon_vcvtaq_u64_v: {
12738 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
12739 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12740 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
12741 }
12742 case NEON::BI__builtin_neon_vcvtm_s16_f16:
12743 case NEON::BI__builtin_neon_vcvtm_s32_v:
12744 case NEON::BI__builtin_neon_vcvtmq_s16_f16:
12745 case NEON::BI__builtin_neon_vcvtmq_s32_v:
12746 case NEON::BI__builtin_neon_vcvtm_u16_f16:
12747 case NEON::BI__builtin_neon_vcvtm_u32_v:
12748 case NEON::BI__builtin_neon_vcvtmq_u16_f16:
12749 case NEON::BI__builtin_neon_vcvtmq_u32_v:
12750 case NEON::BI__builtin_neon_vcvtm_s64_v:
12751 case NEON::BI__builtin_neon_vcvtmq_s64_v:
12752 case NEON::BI__builtin_neon_vcvtm_u64_v:
12753 case NEON::BI__builtin_neon_vcvtmq_u64_v: {
12754 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
12755 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12756 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
12757 }
12758 case NEON::BI__builtin_neon_vcvtn_s16_f16:
12759 case NEON::BI__builtin_neon_vcvtn_s32_v:
12760 case NEON::BI__builtin_neon_vcvtnq_s16_f16:
12761 case NEON::BI__builtin_neon_vcvtnq_s32_v:
12762 case NEON::BI__builtin_neon_vcvtn_u16_f16:
12763 case NEON::BI__builtin_neon_vcvtn_u32_v:
12764 case NEON::BI__builtin_neon_vcvtnq_u16_f16:
12765 case NEON::BI__builtin_neon_vcvtnq_u32_v:
12766 case NEON::BI__builtin_neon_vcvtn_s64_v:
12767 case NEON::BI__builtin_neon_vcvtnq_s64_v:
12768 case NEON::BI__builtin_neon_vcvtn_u64_v:
12769 case NEON::BI__builtin_neon_vcvtnq_u64_v: {
12770 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
12771 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12772 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
12773 }
12774 case NEON::BI__builtin_neon_vcvtp_s16_f16:
12775 case NEON::BI__builtin_neon_vcvtp_s32_v:
12776 case NEON::BI__builtin_neon_vcvtpq_s16_f16:
12777 case NEON::BI__builtin_neon_vcvtpq_s32_v:
12778 case NEON::BI__builtin_neon_vcvtp_u16_f16:
12779 case NEON::BI__builtin_neon_vcvtp_u32_v:
12780 case NEON::BI__builtin_neon_vcvtpq_u16_f16:
12781 case NEON::BI__builtin_neon_vcvtpq_u32_v:
12782 case NEON::BI__builtin_neon_vcvtp_s64_v:
12783 case NEON::BI__builtin_neon_vcvtpq_s64_v:
12784 case NEON::BI__builtin_neon_vcvtp_u64_v:
12785 case NEON::BI__builtin_neon_vcvtpq_u64_v: {
12786 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
12787 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
12788 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
12789 }
12790 case NEON::BI__builtin_neon_vmulx_v:
12791 case NEON::BI__builtin_neon_vmulxq_v: {
12792 Int = Intrinsic::aarch64_neon_fmulx;
12793 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
12794 }
12795 case NEON::BI__builtin_neon_vmulxh_lane_f16:
12796 case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
12797 // vmulx_lane should be mapped to Neon scalar mulx after
12798 // extracting the scalar element
12799 Ops.push_back(EmitScalarExpr(E->getArg(2)));
12800 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12801 Ops.pop_back();
12802 Int = Intrinsic::aarch64_neon_fmulx;
12803 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
12804 }
12805 case NEON::BI__builtin_neon_vmul_lane_v:
12806 case NEON::BI__builtin_neon_vmul_laneq_v: {
12807 // v1f64 vmul_lane should be mapped to Neon scalar mul lane
12808 bool Quad = false;
12809 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
12810 Quad = true;
12811 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
12812 llvm::FixedVectorType *VTy =
12814 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
12815 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
12816 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
12817 return Builder.CreateBitCast(Result, Ty);
12818 }
12819 case NEON::BI__builtin_neon_vnegd_s64:
12820 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
12821 case NEON::BI__builtin_neon_vnegh_f16:
12822 return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
12823 case NEON::BI__builtin_neon_vpmaxnm_v:
12824 case NEON::BI__builtin_neon_vpmaxnmq_v: {
12825 Int = Intrinsic::aarch64_neon_fmaxnmp;
12826 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
12827 }
12828 case NEON::BI__builtin_neon_vpminnm_v:
12829 case NEON::BI__builtin_neon_vpminnmq_v: {
12830 Int = Intrinsic::aarch64_neon_fminnmp;
12831 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
12832 }
12833 case NEON::BI__builtin_neon_vsqrth_f16: {
12834 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12835 Int = Builder.getIsFPConstrained()
12836 ? Intrinsic::experimental_constrained_sqrt
12837 : Intrinsic::sqrt;
12838 return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
12839 }
12840 case NEON::BI__builtin_neon_vsqrt_v:
12841 case NEON::BI__builtin_neon_vsqrtq_v: {
12842 Int = Builder.getIsFPConstrained()
12843 ? Intrinsic::experimental_constrained_sqrt
12844 : Intrinsic::sqrt;
12845 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
12846 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
12847 }
12848 case NEON::BI__builtin_neon_vrbit_v:
12849 case NEON::BI__builtin_neon_vrbitq_v: {
12850 Int = Intrinsic::bitreverse;
12851 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
12852 }
12853 case NEON::BI__builtin_neon_vaddv_u8:
12854 // FIXME: These are handled by the AArch64 scalar code.
12855 usgn = true;
12856 [[fallthrough]];
12857 case NEON::BI__builtin_neon_vaddv_s8: {
12858 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12859 Ty = Int32Ty;
12860 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12861 llvm::Type *Tys[2] = { Ty, VTy };
12862 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12863 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12864 return Builder.CreateTrunc(Ops[0], Int8Ty);
12865 }
12866 case NEON::BI__builtin_neon_vaddv_u16:
12867 usgn = true;
12868 [[fallthrough]];
12869 case NEON::BI__builtin_neon_vaddv_s16: {
12870 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12871 Ty = Int32Ty;
12872 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12873 llvm::Type *Tys[2] = { Ty, VTy };
12874 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12875 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12876 return Builder.CreateTrunc(Ops[0], Int16Ty);
12877 }
12878 case NEON::BI__builtin_neon_vaddvq_u8:
12879 usgn = true;
12880 [[fallthrough]];
12881 case NEON::BI__builtin_neon_vaddvq_s8: {
12882 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12883 Ty = Int32Ty;
12884 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12885 llvm::Type *Tys[2] = { Ty, VTy };
12886 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12887 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12888 return Builder.CreateTrunc(Ops[0], Int8Ty);
12889 }
12890 case NEON::BI__builtin_neon_vaddvq_u16:
12891 usgn = true;
12892 [[fallthrough]];
12893 case NEON::BI__builtin_neon_vaddvq_s16: {
12894 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
12895 Ty = Int32Ty;
12896 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12897 llvm::Type *Tys[2] = { Ty, VTy };
12898 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12899 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
12900 return Builder.CreateTrunc(Ops[0], Int16Ty);
12901 }
12902 case NEON::BI__builtin_neon_vmaxv_u8: {
12903 Int = Intrinsic::aarch64_neon_umaxv;
12904 Ty = Int32Ty;
12905 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12906 llvm::Type *Tys[2] = { Ty, VTy };
12907 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12908 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12909 return Builder.CreateTrunc(Ops[0], Int8Ty);
12910 }
12911 case NEON::BI__builtin_neon_vmaxv_u16: {
12912 Int = Intrinsic::aarch64_neon_umaxv;
12913 Ty = Int32Ty;
12914 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12915 llvm::Type *Tys[2] = { Ty, VTy };
12916 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12917 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12918 return Builder.CreateTrunc(Ops[0], Int16Ty);
12919 }
12920 case NEON::BI__builtin_neon_vmaxvq_u8: {
12921 Int = Intrinsic::aarch64_neon_umaxv;
12922 Ty = Int32Ty;
12923 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12924 llvm::Type *Tys[2] = { Ty, VTy };
12925 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12926 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12927 return Builder.CreateTrunc(Ops[0], Int8Ty);
12928 }
12929 case NEON::BI__builtin_neon_vmaxvq_u16: {
12930 Int = Intrinsic::aarch64_neon_umaxv;
12931 Ty = Int32Ty;
12932 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12933 llvm::Type *Tys[2] = { Ty, VTy };
12934 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12935 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12936 return Builder.CreateTrunc(Ops[0], Int16Ty);
12937 }
12938 case NEON::BI__builtin_neon_vmaxv_s8: {
12939 Int = Intrinsic::aarch64_neon_smaxv;
12940 Ty = Int32Ty;
12941 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12942 llvm::Type *Tys[2] = { Ty, VTy };
12943 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12944 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12945 return Builder.CreateTrunc(Ops[0], Int8Ty);
12946 }
12947 case NEON::BI__builtin_neon_vmaxv_s16: {
12948 Int = Intrinsic::aarch64_neon_smaxv;
12949 Ty = Int32Ty;
12950 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
12951 llvm::Type *Tys[2] = { Ty, VTy };
12952 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12953 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12954 return Builder.CreateTrunc(Ops[0], Int16Ty);
12955 }
12956 case NEON::BI__builtin_neon_vmaxvq_s8: {
12957 Int = Intrinsic::aarch64_neon_smaxv;
12958 Ty = Int32Ty;
12959 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
12960 llvm::Type *Tys[2] = { Ty, VTy };
12961 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12962 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12963 return Builder.CreateTrunc(Ops[0], Int8Ty);
12964 }
12965 case NEON::BI__builtin_neon_vmaxvq_s16: {
12966 Int = Intrinsic::aarch64_neon_smaxv;
12967 Ty = Int32Ty;
12968 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
12969 llvm::Type *Tys[2] = { Ty, VTy };
12970 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12971 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12972 return Builder.CreateTrunc(Ops[0], Int16Ty);
12973 }
12974 case NEON::BI__builtin_neon_vmaxv_f16: {
12975 Int = Intrinsic::aarch64_neon_fmaxv;
12976 Ty = HalfTy;
12977 VTy = llvm::FixedVectorType::get(HalfTy, 4);
12978 llvm::Type *Tys[2] = { Ty, VTy };
12979 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12980 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12981 return Builder.CreateTrunc(Ops[0], HalfTy);
12982 }
12983 case NEON::BI__builtin_neon_vmaxvq_f16: {
12984 Int = Intrinsic::aarch64_neon_fmaxv;
12985 Ty = HalfTy;
12986 VTy = llvm::FixedVectorType::get(HalfTy, 8);
12987 llvm::Type *Tys[2] = { Ty, VTy };
12988 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12989 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
12990 return Builder.CreateTrunc(Ops[0], HalfTy);
12991 }
12992 case NEON::BI__builtin_neon_vminv_u8: {
12993 Int = Intrinsic::aarch64_neon_uminv;
12994 Ty = Int32Ty;
12995 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
12996 llvm::Type *Tys[2] = { Ty, VTy };
12997 Ops.push_back(EmitScalarExpr(E->getArg(0)));
12998 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
12999 return Builder.CreateTrunc(Ops[0], Int8Ty);
13000 }
13001 case NEON::BI__builtin_neon_vminv_u16: {
13002 Int = Intrinsic::aarch64_neon_uminv;
13003 Ty = Int32Ty;
13004 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13005 llvm::Type *Tys[2] = { Ty, VTy };
13006 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13007 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13008 return Builder.CreateTrunc(Ops[0], Int16Ty);
13009 }
13010 case NEON::BI__builtin_neon_vminvq_u8: {
13011 Int = Intrinsic::aarch64_neon_uminv;
13012 Ty = Int32Ty;
13013 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13014 llvm::Type *Tys[2] = { Ty, VTy };
13015 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13016 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13017 return Builder.CreateTrunc(Ops[0], Int8Ty);
13018 }
13019 case NEON::BI__builtin_neon_vminvq_u16: {
13020 Int = Intrinsic::aarch64_neon_uminv;
13021 Ty = Int32Ty;
13022 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13023 llvm::Type *Tys[2] = { Ty, VTy };
13024 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13025 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13026 return Builder.CreateTrunc(Ops[0], Int16Ty);
13027 }
13028 case NEON::BI__builtin_neon_vminv_s8: {
13029 Int = Intrinsic::aarch64_neon_sminv;
13030 Ty = Int32Ty;
13031 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13032 llvm::Type *Tys[2] = { Ty, VTy };
13033 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13034 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13035 return Builder.CreateTrunc(Ops[0], Int8Ty);
13036 }
13037 case NEON::BI__builtin_neon_vminv_s16: {
13038 Int = Intrinsic::aarch64_neon_sminv;
13039 Ty = Int32Ty;
13040 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13041 llvm::Type *Tys[2] = { Ty, VTy };
13042 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13043 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13044 return Builder.CreateTrunc(Ops[0], Int16Ty);
13045 }
13046 case NEON::BI__builtin_neon_vminvq_s8: {
13047 Int = Intrinsic::aarch64_neon_sminv;
13048 Ty = Int32Ty;
13049 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13050 llvm::Type *Tys[2] = { Ty, VTy };
13051 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13052 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13053 return Builder.CreateTrunc(Ops[0], Int8Ty);
13054 }
13055 case NEON::BI__builtin_neon_vminvq_s16: {
13056 Int = Intrinsic::aarch64_neon_sminv;
13057 Ty = Int32Ty;
13058 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13059 llvm::Type *Tys[2] = { Ty, VTy };
13060 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13061 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13062 return Builder.CreateTrunc(Ops[0], Int16Ty);
13063 }
13064 case NEON::BI__builtin_neon_vminv_f16: {
13065 Int = Intrinsic::aarch64_neon_fminv;
13066 Ty = HalfTy;
13067 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13068 llvm::Type *Tys[2] = { Ty, VTy };
13069 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13070 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13071 return Builder.CreateTrunc(Ops[0], HalfTy);
13072 }
13073 case NEON::BI__builtin_neon_vminvq_f16: {
13074 Int = Intrinsic::aarch64_neon_fminv;
13075 Ty = HalfTy;
13076 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13077 llvm::Type *Tys[2] = { Ty, VTy };
13078 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13079 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
13080 return Builder.CreateTrunc(Ops[0], HalfTy);
13081 }
13082 case NEON::BI__builtin_neon_vmaxnmv_f16: {
13083 Int = Intrinsic::aarch64_neon_fmaxnmv;
13084 Ty = HalfTy;
13085 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13086 llvm::Type *Tys[2] = { Ty, VTy };
13087 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13088 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13089 return Builder.CreateTrunc(Ops[0], HalfTy);
13090 }
13091 case NEON::BI__builtin_neon_vmaxnmvq_f16: {
13092 Int = Intrinsic::aarch64_neon_fmaxnmv;
13093 Ty = HalfTy;
13094 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13095 llvm::Type *Tys[2] = { Ty, VTy };
13096 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13097 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
13098 return Builder.CreateTrunc(Ops[0], HalfTy);
13099 }
13100 case NEON::BI__builtin_neon_vminnmv_f16: {
13101 Int = Intrinsic::aarch64_neon_fminnmv;
13102 Ty = HalfTy;
13103 VTy = llvm::FixedVectorType::get(HalfTy, 4);
13104 llvm::Type *Tys[2] = { Ty, VTy };
13105 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13106 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13107 return Builder.CreateTrunc(Ops[0], HalfTy);
13108 }
13109 case NEON::BI__builtin_neon_vminnmvq_f16: {
13110 Int = Intrinsic::aarch64_neon_fminnmv;
13111 Ty = HalfTy;
13112 VTy = llvm::FixedVectorType::get(HalfTy, 8);
13113 llvm::Type *Tys[2] = { Ty, VTy };
13114 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13115 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
13116 return Builder.CreateTrunc(Ops[0], HalfTy);
13117 }
13118 case NEON::BI__builtin_neon_vmul_n_f64: {
13119 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
13120 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
13121 return Builder.CreateFMul(Ops[0], RHS);
13122 }
13123 case NEON::BI__builtin_neon_vaddlv_u8: {
13124 Int = Intrinsic::aarch64_neon_uaddlv;
13125 Ty = Int32Ty;
13126 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13127 llvm::Type *Tys[2] = { Ty, VTy };
13128 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13129 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13130 return Builder.CreateTrunc(Ops[0], Int16Ty);
13131 }
13132 case NEON::BI__builtin_neon_vaddlv_u16: {
13133 Int = Intrinsic::aarch64_neon_uaddlv;
13134 Ty = Int32Ty;
13135 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13136 llvm::Type *Tys[2] = { Ty, VTy };
13137 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13138 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13139 }
13140 case NEON::BI__builtin_neon_vaddlvq_u8: {
13141 Int = Intrinsic::aarch64_neon_uaddlv;
13142 Ty = Int32Ty;
13143 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13144 llvm::Type *Tys[2] = { Ty, VTy };
13145 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13146 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13147 return Builder.CreateTrunc(Ops[0], Int16Ty);
13148 }
13149 case NEON::BI__builtin_neon_vaddlvq_u16: {
13150 Int = Intrinsic::aarch64_neon_uaddlv;
13151 Ty = Int32Ty;
13152 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13153 llvm::Type *Tys[2] = { Ty, VTy };
13154 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13155 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13156 }
13157 case NEON::BI__builtin_neon_vaddlv_s8: {
13158 Int = Intrinsic::aarch64_neon_saddlv;
13159 Ty = Int32Ty;
13160 VTy = llvm::FixedVectorType::get(Int8Ty, 8);
13161 llvm::Type *Tys[2] = { Ty, VTy };
13162 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13163 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13164 return Builder.CreateTrunc(Ops[0], Int16Ty);
13165 }
13166 case NEON::BI__builtin_neon_vaddlv_s16: {
13167 Int = Intrinsic::aarch64_neon_saddlv;
13168 Ty = Int32Ty;
13169 VTy = llvm::FixedVectorType::get(Int16Ty, 4);
13170 llvm::Type *Tys[2] = { Ty, VTy };
13171 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13172 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13173 }
13174 case NEON::BI__builtin_neon_vaddlvq_s8: {
13175 Int = Intrinsic::aarch64_neon_saddlv;
13176 Ty = Int32Ty;
13177 VTy = llvm::FixedVectorType::get(Int8Ty, 16);
13178 llvm::Type *Tys[2] = { Ty, VTy };
13179 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13180 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13181 return Builder.CreateTrunc(Ops[0], Int16Ty);
13182 }
13183 case NEON::BI__builtin_neon_vaddlvq_s16: {
13184 Int = Intrinsic::aarch64_neon_saddlv;
13185 Ty = Int32Ty;
13186 VTy = llvm::FixedVectorType::get(Int16Ty, 8);
13187 llvm::Type *Tys[2] = { Ty, VTy };
13188 Ops.push_back(EmitScalarExpr(E->getArg(0)));
13189 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
13190 }
13191 case NEON::BI__builtin_neon_vsri_n_v:
13192 case NEON::BI__builtin_neon_vsriq_n_v: {
13193 Int = Intrinsic::aarch64_neon_vsri;
13194 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13195 return EmitNeonCall(Intrin, Ops, "vsri_n");
13196 }
13197 case NEON::BI__builtin_neon_vsli_n_v:
13198 case NEON::BI__builtin_neon_vsliq_n_v: {
13199 Int = Intrinsic::aarch64_neon_vsli;
13200 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
13201 return EmitNeonCall(Intrin, Ops, "vsli_n");
13202 }
13203 case NEON::BI__builtin_neon_vsra_n_v:
13204 case NEON::BI__builtin_neon_vsraq_n_v:
13205 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
13206 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
13207 return Builder.CreateAdd(Ops[0], Ops[1]);
13208 case NEON::BI__builtin_neon_vrsra_n_v:
13209 case NEON::BI__builtin_neon_vrsraq_n_v: {
13210 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
13212 TmpOps.push_back(Ops[1]);
13213 TmpOps.push_back(Ops[2]);
13214 Function* F = CGM.getIntrinsic(Int, Ty);
13215 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
13216 Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
13217 return Builder.CreateAdd(Ops[0], tmp);
13218 }
13219 case NEON::BI__builtin_neon_vld1_v:
13220 case NEON::BI__builtin_neon_vld1q_v: {
13221 return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());
13222 }
13223 case NEON::BI__builtin_neon_vst1_v:
13224 case NEON::BI__builtin_neon_vst1q_v:
13225 Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
13226 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13227 case NEON::BI__builtin_neon_vld1_lane_v:
13228 case NEON::BI__builtin_neon_vld1q_lane_v: {
13229 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13230 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13231 PtrOp0.getAlignment());
13232 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
13233 }
13234 case NEON::BI__builtin_neon_vldap1_lane_s64:
13235 case NEON::BI__builtin_neon_vldap1q_lane_s64: {
13236 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13237 llvm::LoadInst *LI = Builder.CreateAlignedLoad(
13238 VTy->getElementType(), Ops[0], PtrOp0.getAlignment());
13239 LI->setAtomic(llvm::AtomicOrdering::Acquire);
13240 Ops[0] = LI;
13241 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");
13242 }
13243 case NEON::BI__builtin_neon_vld1_dup_v:
13244 case NEON::BI__builtin_neon_vld1q_dup_v: {
13245 Value *V = PoisonValue::get(Ty);
13246 Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],
13247 PtrOp0.getAlignment());
13248 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
13249 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
13250 return EmitNeonSplat(Ops[0], CI);
13251 }
13252 case NEON::BI__builtin_neon_vst1_lane_v:
13253 case NEON::BI__builtin_neon_vst1q_lane_v:
13254 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13255 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13256 return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13257 case NEON::BI__builtin_neon_vstl1_lane_s64:
13258 case NEON::BI__builtin_neon_vstl1q_lane_s64: {
13259 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13260 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
13261 llvm::StoreInst *SI =
13262 Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());
13263 SI->setAtomic(llvm::AtomicOrdering::Release);
13264 return SI;
13265 }
13266 case NEON::BI__builtin_neon_vld2_v:
13267 case NEON::BI__builtin_neon_vld2q_v: {
13268 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13269 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
13270 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13271 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13272 }
13273 case NEON::BI__builtin_neon_vld3_v:
13274 case NEON::BI__builtin_neon_vld3q_v: {
13275 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13276 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
13277 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13278 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13279 }
13280 case NEON::BI__builtin_neon_vld4_v:
13281 case NEON::BI__builtin_neon_vld4q_v: {
13282 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13283 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
13284 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13285 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13286 }
13287 case NEON::BI__builtin_neon_vld2_dup_v:
13288 case NEON::BI__builtin_neon_vld2q_dup_v: {
13289 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13290 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
13291 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
13292 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13293 }
13294 case NEON::BI__builtin_neon_vld3_dup_v:
13295 case NEON::BI__builtin_neon_vld3q_dup_v: {
13296 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13297 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
13298 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
13299 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13300 }
13301 case NEON::BI__builtin_neon_vld4_dup_v:
13302 case NEON::BI__builtin_neon_vld4q_dup_v: {
13303 llvm::Type *Tys[2] = {VTy, UnqualPtrTy};
13304 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
13305 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
13306 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13307 }
13308 case NEON::BI__builtin_neon_vld2_lane_v:
13309 case NEON::BI__builtin_neon_vld2q_lane_v: {
13310 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13311 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
13312 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13313 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13314 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13315 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13316 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");
13317 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13318 }
13319 case NEON::BI__builtin_neon_vld3_lane_v:
13320 case NEON::BI__builtin_neon_vld3q_lane_v: {
13321 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13322 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
13323 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13324 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13325 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13326 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13327 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13328 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");
13329 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13330 }
13331 case NEON::BI__builtin_neon_vld4_lane_v:
13332 case NEON::BI__builtin_neon_vld4q_lane_v: {
13333 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
13334 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
13335 std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());
13336 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13337 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13338 Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
13339 Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
13340 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
13341 Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");
13342 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
13343 }
13344 case NEON::BI__builtin_neon_vst2_v:
13345 case NEON::BI__builtin_neon_vst2q_v: {
13346 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13347 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
13348 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
13349 Ops, "");
13350 }
13351 case NEON::BI__builtin_neon_vst2_lane_v:
13352 case NEON::BI__builtin_neon_vst2q_lane_v: {
13353 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13354 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
13355 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13356 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
13357 Ops, "");
13358 }
13359 case NEON::BI__builtin_neon_vst3_v:
13360 case NEON::BI__builtin_neon_vst3q_v: {
13361 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13362 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
13363 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
13364 Ops, "");
13365 }
13366 case NEON::BI__builtin_neon_vst3_lane_v:
13367 case NEON::BI__builtin_neon_vst3q_lane_v: {
13368 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13369 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
13370 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13371 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
13372 Ops, "");
13373 }
13374 case NEON::BI__builtin_neon_vst4_v:
13375 case NEON::BI__builtin_neon_vst4q_v: {
13376 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13377 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
13378 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
13379 Ops, "");
13380 }
13381 case NEON::BI__builtin_neon_vst4_lane_v:
13382 case NEON::BI__builtin_neon_vst4q_lane_v: {
13383 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
13384 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
13385 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
13386 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
13387 Ops, "");
13388 }
13389 case NEON::BI__builtin_neon_vtrn_v:
13390 case NEON::BI__builtin_neon_vtrnq_v: {
13391 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13392 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13393 Value *SV = nullptr;
13394
13395 for (unsigned vi = 0; vi != 2; ++vi) {
13396 SmallVector<int, 16> Indices;
13397 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13398 Indices.push_back(i+vi);
13399 Indices.push_back(i+e+vi);
13400 }
13401 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13402 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
13403 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13404 }
13405 return SV;
13406 }
13407 case NEON::BI__builtin_neon_vuzp_v:
13408 case NEON::BI__builtin_neon_vuzpq_v: {
13409 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13410 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13411 Value *SV = nullptr;
13412
13413 for (unsigned vi = 0; vi != 2; ++vi) {
13414 SmallVector<int, 16> Indices;
13415 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
13416 Indices.push_back(2*i+vi);
13417
13418 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13419 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
13420 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13421 }
13422 return SV;
13423 }
13424 case NEON::BI__builtin_neon_vzip_v:
13425 case NEON::BI__builtin_neon_vzipq_v: {
13426 Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
13427 Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
13428 Value *SV = nullptr;
13429
13430 for (unsigned vi = 0; vi != 2; ++vi) {
13431 SmallVector<int, 16> Indices;
13432 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
13433 Indices.push_back((i + vi*e) >> 1);
13434 Indices.push_back(((i + vi*e) >> 1)+e);
13435 }
13436 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
13437 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
13438 SV = Builder.CreateDefaultAlignedStore(SV, Addr);
13439 }
13440 return SV;
13441 }
13442 case NEON::BI__builtin_neon_vqtbl1q_v: {
13443 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
13444 Ops, "vtbl1");
13445 }
13446 case NEON::BI__builtin_neon_vqtbl2q_v: {
13447 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
13448 Ops, "vtbl2");
13449 }
13450 case NEON::BI__builtin_neon_vqtbl3q_v: {
13451 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
13452 Ops, "vtbl3");
13453 }
13454 case NEON::BI__builtin_neon_vqtbl4q_v: {
13455 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
13456 Ops, "vtbl4");
13457 }
13458 case NEON::BI__builtin_neon_vqtbx1q_v: {
13459 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
13460 Ops, "vtbx1");
13461 }
13462 case NEON::BI__builtin_neon_vqtbx2q_v: {
13463 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
13464 Ops, "vtbx2");
13465 }
13466 case NEON::BI__builtin_neon_vqtbx3q_v: {
13467 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
13468 Ops, "vtbx3");
13469 }
13470 case NEON::BI__builtin_neon_vqtbx4q_v: {
13471 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
13472 Ops, "vtbx4");
13473 }
13474 case NEON::BI__builtin_neon_vsqadd_v:
13475 case NEON::BI__builtin_neon_vsqaddq_v: {
13476 Int = Intrinsic::aarch64_neon_usqadd;
13477 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
13478 }
13479 case NEON::BI__builtin_neon_vuqadd_v:
13480 case NEON::BI__builtin_neon_vuqaddq_v: {
13481 Int = Intrinsic::aarch64_neon_suqadd;
13482 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
13483 }
13484 }
13485}
13486
13487Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,
13488 const CallExpr *E) {
13489 assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||
13490 BuiltinID == BPF::BI__builtin_btf_type_id ||
13491 BuiltinID == BPF::BI__builtin_preserve_type_info ||
13492 BuiltinID == BPF::BI__builtin_preserve_enum_value) &&
13493 "unexpected BPF builtin");
13494
13495 // A sequence number, injected into IR builtin functions, to
13496 // prevent CSE given the only difference of the function
13497 // may just be the debuginfo metadata.
13498 static uint32_t BuiltinSeqNum;
13499
13500 switch (BuiltinID) {
13501 default:
13502 llvm_unreachable("Unexpected BPF builtin");
13503 case BPF::BI__builtin_preserve_field_info: {
13504 const Expr *Arg = E->getArg(0);
13505 bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;
13506
13507 if (!getDebugInfo()) {
13508 CGM.Error(E->getExprLoc(),
13509 "using __builtin_preserve_field_info() without -g");
13510 return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13511 : EmitLValue(Arg).emitRawPointer(*this);
13512 }
13513
13514 // Enable underlying preserve_*_access_index() generation.
13515 bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;
13516 IsInPreservedAIRegion = true;
13517 Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)
13518 : EmitLValue(Arg).emitRawPointer(*this);
13519 IsInPreservedAIRegion = OldIsInPreservedAIRegion;
13520
13521 ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13522 Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());
13523
13524 // Built the IR for the preserve_field_info intrinsic.
13525 llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(
13526 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,
13527 {FieldAddr->getType()});
13528 return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});
13529 }
13530 case BPF::BI__builtin_btf_type_id:
13531 case BPF::BI__builtin_preserve_type_info: {
13532 if (!getDebugInfo()) {
13533 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13534 return nullptr;
13535 }
13536
13537 const Expr *Arg0 = E->getArg(0);
13538 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13539 Arg0->getType(), Arg0->getExprLoc());
13540
13541 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13542 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13543 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13544
13545 llvm::Function *FnDecl;
13546 if (BuiltinID == BPF::BI__builtin_btf_type_id)
13547 FnDecl = llvm::Intrinsic::getDeclaration(
13548 &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});
13549 else
13550 FnDecl = llvm::Intrinsic::getDeclaration(
13551 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});
13552 CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});
13553 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13554 return Fn;
13555 }
13556 case BPF::BI__builtin_preserve_enum_value: {
13557 if (!getDebugInfo()) {
13558 CGM.Error(E->getExprLoc(), "using builtin function without -g");
13559 return nullptr;
13560 }
13561
13562 const Expr *Arg0 = E->getArg(0);
13563 llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(
13564 Arg0->getType(), Arg0->getExprLoc());
13565
13566 // Find enumerator
13567 const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());
13568 const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());
13569 const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());
13570 const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());
13571
13572 auto InitVal = Enumerator->getInitVal();
13573 std::string InitValStr;
13574 if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))
13575 InitValStr = std::to_string(InitVal.getSExtValue());
13576 else
13577 InitValStr = std::to_string(InitVal.getZExtValue());
13578 std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;
13579 Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);
13580
13581 ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
13582 Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());
13583 Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);
13584
13585 llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(
13586 &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});
13587 CallInst *Fn =
13588 Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});
13589 Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);
13590 return Fn;
13591 }
13592 }
13593}
13594
13595llvm::Value *CodeGenFunction::
13597 assert((Ops.size() & (Ops.size() - 1)) == 0 &&
13598 "Not a power-of-two sized vector!");
13599 bool AllConstants = true;
13600 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
13601 AllConstants &= isa<Constant>(Ops[i]);
13602
13603 // If this is a constant vector, create a ConstantVector.
13604 if (AllConstants) {
13606 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13607 CstOps.push_back(cast<Constant>(Ops[i]));
13608 return llvm::ConstantVector::get(CstOps);
13609 }
13610
13611 // Otherwise, insertelement the values to build the vector.
13612 Value *Result = llvm::PoisonValue::get(
13613 llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
13614
13615 for (unsigned i = 0, e = Ops.size(); i != e; ++i)
13616 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));
13617
13618 return Result;
13619}
13620
13621// Convert the mask from an integer type to a vector of i1.
13623 unsigned NumElts) {
13624
13625 auto *MaskTy = llvm::FixedVectorType::get(
13626 CGF.Builder.getInt1Ty(),
13627 cast<IntegerType>(Mask->getType())->getBitWidth());
13628 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
13629
13630 // If we have less than 8 elements, then the starting mask was an i8 and
13631 // we need to extract down to the right number of elements.
13632 if (NumElts < 8) {
13633 int Indices[4];
13634 for (unsigned i = 0; i != NumElts; ++i)
13635 Indices[i] = i;
13636 MaskVec = CGF.Builder.CreateShuffleVector(
13637 MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");
13638 }
13639 return MaskVec;
13640}
13641
13643 Align Alignment) {
13644 Value *Ptr = Ops[0];
13645
13646 Value *MaskVec = getMaskVecValue(
13647 CGF, Ops[2],
13648 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());
13649
13650 return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);
13651}
13652
13654 Align Alignment) {
13655 llvm::Type *Ty = Ops[1]->getType();
13656 Value *Ptr = Ops[0];
13657
13658 Value *MaskVec = getMaskVecValue(
13659 CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());
13660
13661 return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);
13662}
13663
13665 ArrayRef<Value *> Ops) {
13666 auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());
13667 Value *Ptr = Ops[0];
13668
13669 Value *MaskVec = getMaskVecValue(
13670 CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());
13671
13672 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
13673 ResultTy);
13674 return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
13675}
13676
13679 bool IsCompress) {
13680 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13681
13682 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13683
13684 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
13685 : Intrinsic::x86_avx512_mask_expand;
13686 llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
13687 return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
13688}
13689
13691 ArrayRef<Value *> Ops) {
13692 auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());
13693 Value *Ptr = Ops[0];
13694
13695 Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());
13696
13697 llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
13698 ResultTy);
13699 return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
13700}
13701
13702static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
13704 bool InvertLHS = false) {
13705 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
13706 Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
13707 Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
13708
13709 if (InvertLHS)
13710 LHS = CGF.Builder.CreateNot(LHS);
13711
13712 return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
13713 Ops[0]->getType());
13714}
13715
13717 Value *Amt, bool IsRight) {
13718 llvm::Type *Ty = Op0->getType();
13719
13720 // Amount may be scalar immediate, in which case create a splat vector.
13721 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
13722 // we only care about the lowest log2 bits anyway.
13723 if (Amt->getType() != Ty) {
13724 unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();
13725 Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
13726 Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
13727 }
13728
13729 unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
13730 Function *F = CGF.CGM.getIntrinsic(IID, Ty);
13731 return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
13732}
13733
13735 bool IsSigned) {
13736 Value *Op0 = Ops[0];
13737 Value *Op1 = Ops[1];
13738 llvm::Type *Ty = Op0->getType();
13739 uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
13740
13741 CmpInst::Predicate Pred;
13742 switch (Imm) {
13743 case 0x0:
13744 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
13745 break;
13746 case 0x1:
13747 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
13748 break;
13749 case 0x2:
13750 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
13751 break;
13752 case 0x3:
13753 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
13754 break;
13755 case 0x4:
13756 Pred = ICmpInst::ICMP_EQ;
13757 break;
13758 case 0x5:
13759 Pred = ICmpInst::ICMP_NE;
13760 break;
13761 case 0x6:
13762 return llvm::Constant::getNullValue(Ty); // FALSE
13763 case 0x7:
13764 return llvm::Constant::getAllOnesValue(Ty); // TRUE
13765 default:
13766 llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
13767 }
13768
13769 Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
13770 Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
13771 return Res;
13772}
13773
13775 Value *Mask, Value *Op0, Value *Op1) {
13776
13777 // If the mask is all ones just return first argument.
13778 if (const auto *C = dyn_cast<Constant>(Mask))
13779 if (C->isAllOnesValue())
13780 return Op0;
13781
13782 Mask = getMaskVecValue(
13783 CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());
13784
13785 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13786}
13787
13789 Value *Mask, Value *Op0, Value *Op1) {
13790 // If the mask is all ones just return first argument.
13791 if (const auto *C = dyn_cast<Constant>(Mask))
13792 if (C->isAllOnesValue())
13793 return Op0;
13794
13795 auto *MaskTy = llvm::FixedVectorType::get(
13796 CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
13797 Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
13798 Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
13799 return CGF.Builder.CreateSelect(Mask, Op0, Op1);
13800}
13801
13803 unsigned NumElts, Value *MaskIn) {
13804 if (MaskIn) {
13805 const auto *C = dyn_cast<Constant>(MaskIn);
13806 if (!C || !C->isAllOnesValue())
13807 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
13808 }
13809
13810 if (NumElts < 8) {
13811 int Indices[8];
13812 for (unsigned i = 0; i != NumElts; ++i)
13813 Indices[i] = i;
13814 for (unsigned i = NumElts; i != 8; ++i)
13815 Indices[i] = i % NumElts + NumElts;
13816 Cmp = CGF.Builder.CreateShuffleVector(
13817 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
13818 }
13819
13820 return CGF.Builder.CreateBitCast(Cmp,
13821 IntegerType::get(CGF.getLLVMContext(),
13822 std::max(NumElts, 8U)));
13823}
13824
13826 bool Signed, ArrayRef<Value *> Ops) {
13827 assert((Ops.size() == 2 || Ops.size() == 4) &&
13828 "Unexpected number of arguments");
13829 unsigned NumElts =
13830 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
13831 Value *Cmp;
13832
13833 if (CC == 3) {
13834 Cmp = Constant::getNullValue(
13835 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13836 } else if (CC == 7) {
13837 Cmp = Constant::getAllOnesValue(
13838 llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
13839 } else {
13840 ICmpInst::Predicate Pred;
13841 switch (CC) {
13842 default: llvm_unreachable("Unknown condition code");
13843 case 0: Pred = ICmpInst::ICMP_EQ; break;
13844 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
13845 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
13846 case 4: Pred = ICmpInst::ICMP_NE; break;
13847 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
13848 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
13849 }
13850 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
13851 }
13852
13853 Value *MaskIn = nullptr;
13854 if (Ops.size() == 4)
13855 MaskIn = Ops[3];
13856
13857 return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
13858}
13859
13861 Value *Zero = Constant::getNullValue(In->getType());
13862 return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
13863}
13864
13866 ArrayRef<Value *> Ops, bool IsSigned) {
13867 unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
13868 llvm::Type *Ty = Ops[1]->getType();
13869
13870 Value *Res;
13871 if (Rnd != 4) {
13872 Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
13873 : Intrinsic::x86_avx512_uitofp_round;
13874 Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
13875 Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
13876 } else {
13877 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
13878 Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
13879 : CGF.Builder.CreateUIToFP(Ops[0], Ty);
13880 }
13881
13882 return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
13883}
13884
13885// Lowers X86 FMA intrinsics to IR.
13887 ArrayRef<Value *> Ops, unsigned BuiltinID,
13888 bool IsAddSub) {
13889
13890 bool Subtract = false;
13891 Intrinsic::ID IID = Intrinsic::not_intrinsic;
13892 switch (BuiltinID) {
13893 default: break;
13894 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
13895 Subtract = true;
13896 [[fallthrough]];
13897 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
13898 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
13899 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
13900 IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;
13901 break;
13902 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
13903 Subtract = true;
13904 [[fallthrough]];
13905 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
13906 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
13907 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
13908 IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;
13909 break;
13910 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
13911 Subtract = true;
13912 [[fallthrough]];
13913 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
13914 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
13915 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
13916 IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
13917 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
13918 Subtract = true;
13919 [[fallthrough]];
13920 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
13921 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
13922 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
13923 IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
13924 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
13925 Subtract = true;
13926 [[fallthrough]];
13927 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
13928 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
13929 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
13930 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
13931 break;
13932 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
13933 Subtract = true;
13934 [[fallthrough]];
13935 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
13936 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
13937 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
13938 IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
13939 break;
13940 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
13941 Subtract = true;
13942 LLVM_FALLTHROUGH;
13943 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
13944 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
13945 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
13946 IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
13947 break;
13948 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
13949 Subtract = true;
13950 LLVM_FALLTHROUGH;
13951 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
13952 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
13953 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
13954 IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
13955 break;
13956 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
13957 Subtract = true;
13958 LLVM_FALLTHROUGH;
13959 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
13960 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
13961 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
13962 IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
13963 break;
13964 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
13965 Subtract = true;
13966 LLVM_FALLTHROUGH;
13967 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
13968 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
13969 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
13970 IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
13971 break;
13972 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
13973 Subtract = true;
13974 LLVM_FALLTHROUGH;
13975 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
13976 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
13977 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
13978 IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
13979 break;
13980 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
13981 Subtract = true;
13982 LLVM_FALLTHROUGH;
13983 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
13984 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
13985 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
13986 IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
13987 break;
13988 }
13989
13990 Value *A = Ops[0];
13991 Value *B = Ops[1];
13992 Value *C = Ops[2];
13993
13994 if (Subtract)
13995 C = CGF.Builder.CreateFNeg(C);
13996
13997 Value *Res;
13998
13999 // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
14000 if (IID != Intrinsic::not_intrinsic &&
14001 (cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
14002 IsAddSub)) {
14003 Function *Intr = CGF.CGM.getIntrinsic(IID);
14004 Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
14005 } else {
14006 llvm::Type *Ty = A->getType();
14007 Function *FMA;
14008 if (CGF.Builder.getIsFPConstrained()) {
14009 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14010 FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);
14011 Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});
14012 } else {
14013 FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
14014 Res = CGF.Builder.CreateCall(FMA, {A, B, C});
14015 }
14016 }
14017
14018 // Handle any required masking.
14019 Value *MaskFalseVal = nullptr;
14020 switch (BuiltinID) {
14021 case clang::X86::BI__builtin_ia32_vfmaddph512_mask:
14022 case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
14023 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
14024 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
14025 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
14026 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14027 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
14028 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
14029 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14030 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14031 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14032 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14033 MaskFalseVal = Ops[0];
14034 break;
14035 case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
14036 case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
14037 case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
14038 case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14039 case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14040 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14041 case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14042 case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14043 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14044 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14045 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14046 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14047 MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
14048 break;
14049 case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
14050 case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:
14051 case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
14052 case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
14053 case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
14054 case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
14055 case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14056 case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14057 case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14058 case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14059 case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14060 case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14061 case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14062 case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14063 case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14064 case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14065 case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14066 case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14067 case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14068 case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14069 case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14070 case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14071 case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14072 case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14073 MaskFalseVal = Ops[2];
14074 break;
14075 }
14076
14077 if (MaskFalseVal)
14078 return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
14079
14080 return Res;
14081}
14082
14084 MutableArrayRef<Value *> Ops, Value *Upper,
14085 bool ZeroMask = false, unsigned PTIdx = 0,
14086 bool NegAcc = false) {
14087 unsigned Rnd = 4;
14088 if (Ops.size() > 4)
14089 Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
14090
14091 if (NegAcc)
14092 Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
14093
14094 Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
14095 Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
14096 Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
14097 Value *Res;
14098 if (Rnd != 4) {
14099 Intrinsic::ID IID;
14100
14101 switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {
14102 case 16:
14103 IID = Intrinsic::x86_avx512fp16_vfmadd_f16;
14104 break;
14105 case 32:
14106 IID = Intrinsic::x86_avx512_vfmadd_f32;
14107 break;
14108 case 64:
14109 IID = Intrinsic::x86_avx512_vfmadd_f64;
14110 break;
14111 default:
14112 llvm_unreachable("Unexpected size");
14113 }
14114 Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14115 {Ops[0], Ops[1], Ops[2], Ops[4]});
14116 } else if (CGF.Builder.getIsFPConstrained()) {
14117 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
14118 Function *FMA = CGF.CGM.getIntrinsic(
14119 Intrinsic::experimental_constrained_fma, Ops[0]->getType());
14120 Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));
14121 } else {
14122 Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
14123 Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
14124 }
14125 // If we have more than 3 arguments, we need to do masking.
14126 if (Ops.size() > 3) {
14127 Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
14128 : Ops[PTIdx];
14129
14130 // If we negated the accumulator and the its the PassThru value we need to
14131 // bypass the negate. Conveniently Upper should be the same thing in this
14132 // case.
14133 if (NegAcc && PTIdx == 2)
14134 PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
14135
14136 Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
14137 }
14138 return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
14139}
14140
14141static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
14142 ArrayRef<Value *> Ops) {
14143 llvm::Type *Ty = Ops[0]->getType();
14144 // Arguments have a vXi32 type so cast to vXi64.
14145 Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
14146 Ty->getPrimitiveSizeInBits() / 64);
14147 Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
14148 Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
14149
14150 if (IsSigned) {
14151 // Shift left then arithmetic shift right.
14152 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
14153 LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
14154 LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
14155 RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
14156 RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
14157 } else {
14158 // Clear the upper bits.
14159 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
14160 LHS = CGF.Builder.CreateAnd(LHS, Mask);
14161 RHS = CGF.Builder.CreateAnd(RHS, Mask);
14162 }
14163
14164 return CGF.Builder.CreateMul(LHS, RHS);
14165}
14166
14167// Emit a masked pternlog intrinsic. This only exists because the header has to
14168// use a macro and we aren't able to pass the input argument to a pternlog
14169// builtin and a select builtin without evaluating it twice.
14170static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
14171 ArrayRef<Value *> Ops) {
14172 llvm::Type *Ty = Ops[0]->getType();
14173
14174 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
14175 unsigned EltWidth = Ty->getScalarSizeInBits();
14176 Intrinsic::ID IID;
14177 if (VecWidth == 128 && EltWidth == 32)
14178 IID = Intrinsic::x86_avx512_pternlog_d_128;
14179 else if (VecWidth == 256 && EltWidth == 32)
14180 IID = Intrinsic::x86_avx512_pternlog_d_256;
14181 else if (VecWidth == 512 && EltWidth == 32)
14182 IID = Intrinsic::x86_avx512_pternlog_d_512;
14183 else if (VecWidth == 128 && EltWidth == 64)
14184 IID = Intrinsic::x86_avx512_pternlog_q_128;
14185 else if (VecWidth == 256 && EltWidth == 64)
14186 IID = Intrinsic::x86_avx512_pternlog_q_256;
14187 else if (VecWidth == 512 && EltWidth == 64)
14188 IID = Intrinsic::x86_avx512_pternlog_q_512;
14189 else
14190 llvm_unreachable("Unexpected intrinsic");
14191
14192 Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
14193 Ops.drop_back());
14194 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
14195 return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
14196}
14197
14199 llvm::Type *DstTy) {
14200 unsigned NumberOfElements =
14201 cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14202 Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
14203 return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
14204}
14205
14206Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
14207 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
14208 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
14209 return EmitX86CpuIs(CPUStr);
14210}
14211
14212// Convert F16 halfs to floats.
14215 llvm::Type *DstTy) {
14216 assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&
14217 "Unknown cvtph2ps intrinsic");
14218
14219 // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
14220 if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {
14221 Function *F =
14222 CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);
14223 return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});
14224 }
14225
14226 unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
14227 Value *Src = Ops[0];
14228
14229 // Extract the subvector.
14230 if (NumDstElts !=
14231 cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {
14232 assert(NumDstElts == 4 && "Unexpected vector size");
14233 Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});
14234 }
14235
14236 // Bitcast from vXi16 to vXf16.
14237 auto *HalfTy = llvm::FixedVectorType::get(
14238 llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
14239 Src = CGF.Builder.CreateBitCast(Src, HalfTy);
14240
14241 // Perform the fp-extension.
14242 Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");
14243
14244 if (Ops.size() >= 3)
14245 Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);
14246 return Res;
14247}
14248
14249Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
14250
14251 llvm::Type *Int32Ty = Builder.getInt32Ty();
14252
14253 // Matching the struct layout from the compiler-rt/libgcc structure that is
14254 // filled in:
14255 // unsigned int __cpu_vendor;
14256 // unsigned int __cpu_type;
14257 // unsigned int __cpu_subtype;
14258 // unsigned int __cpu_features[1];
14259 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14260 llvm::ArrayType::get(Int32Ty, 1));
14261
14262 // Grab the global __cpu_model.
14263 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14264 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14265
14266 // Calculate the index needed to access the correct field based on the
14267 // range. Also adjust the expected value.
14268 unsigned Index;
14269 unsigned Value;
14270 std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
14271#define X86_VENDOR(ENUM, STRING) \
14272 .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
14273#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \
14274 .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14275#define X86_CPU_TYPE(ENUM, STR) \
14276 .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
14277#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \
14278 .Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14279#define X86_CPU_SUBTYPE(ENUM, STR) \
14280 .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
14281#include "llvm/TargetParser/X86TargetParser.def"
14282 .Default({0, 0});
14283 assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
14284
14285 // Grab the appropriate field from __cpu_model.
14286 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
14287 ConstantInt::get(Int32Ty, Index)};
14288 llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14289 CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,
14291
14292 // Check the value of the field against the requested value.
14293 return Builder.CreateICmpEQ(CpuValue,
14294 llvm::ConstantInt::get(Int32Ty, Value));
14295}
14296
14297Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
14298 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14299 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14300 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14301 return Builder.getFalse();
14302 return EmitX86CpuSupports(FeatureStr);
14303}
14304
14305Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
14306 return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));
14307}
14308
14309llvm::Value *
14310CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {
14311 Value *Result = Builder.getTrue();
14312 if (FeatureMask[0] != 0) {
14313 // Matching the struct layout from the compiler-rt/libgcc structure that is
14314 // filled in:
14315 // unsigned int __cpu_vendor;
14316 // unsigned int __cpu_type;
14317 // unsigned int __cpu_subtype;
14318 // unsigned int __cpu_features[1];
14319 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
14320 llvm::ArrayType::get(Int32Ty, 1));
14321
14322 // Grab the global __cpu_model.
14323 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
14324 cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
14325
14326 // Grab the first (0th) element from the field __cpu_features off of the
14327 // global in the struct STy.
14328 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
14329 Builder.getInt32(0)};
14330 Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);
14331 Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,
14333
14334 // Check the value of the bit corresponding to the feature requested.
14335 Value *Mask = Builder.getInt32(FeatureMask[0]);
14336 Value *Bitset = Builder.CreateAnd(Features, Mask);
14337 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14338 Result = Builder.CreateAnd(Result, Cmp);
14339 }
14340
14341 llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);
14342 llvm::Constant *CpuFeatures2 =
14343 CGM.CreateRuntimeVariable(ATy, "__cpu_features2");
14344 cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
14345 for (int i = 1; i != 4; ++i) {
14346 const uint32_t M = FeatureMask[i];
14347 if (!M)
14348 continue;
14349 Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};
14350 Value *Features = Builder.CreateAlignedLoad(
14351 Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),
14353 // Check the value of the bit corresponding to the feature requested.
14354 Value *Mask = Builder.getInt32(M);
14355 Value *Bitset = Builder.CreateAnd(Features, Mask);
14356 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14357 Result = Builder.CreateAnd(Result, Cmp);
14358 }
14359
14360 return Result;
14361}
14362
14363Value *CodeGenFunction::EmitAArch64CpuInit() {
14364 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
14365 llvm::FunctionCallee Func =
14366 CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");
14367 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14368 cast<llvm::GlobalValue>(Func.getCallee())
14369 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14370 return Builder.CreateCall(Func);
14371}
14372
14374 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {VoidPtrTy}, false);
14375 llvm::FunctionCallee Func =
14376 CGM.CreateRuntimeFunction(FTy, "__init_riscv_feature_bits");
14377 auto *CalleeGV = cast<llvm::GlobalValue>(Func.getCallee());
14378 CalleeGV->setDSOLocal(true);
14379 CalleeGV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14380 return Builder.CreateCall(Func, {llvm::ConstantPointerNull::get(VoidPtrTy)});
14381}
14382
14383Value *CodeGenFunction::EmitX86CpuInit() {
14384 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
14385 /*Variadic*/ false);
14386 llvm::FunctionCallee Func =
14387 CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
14388 cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
14389 cast<llvm::GlobalValue>(Func.getCallee())
14390 ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
14391 return Builder.CreateCall(Func);
14392}
14393
14394Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
14395 const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();
14396 StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();
14398 ArgStr.split(Features, "+");
14399 for (auto &Feature : Features) {
14400 Feature = Feature.trim();
14401 if (!llvm::AArch64::parseFMVExtension(Feature))
14402 return Builder.getFalse();
14403 if (Feature != "default")
14404 Features.push_back(Feature);
14405 }
14406 return EmitAArch64CpuSupports(Features);
14407}
14408
14409llvm::Value *
14410CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
14411 uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
14412 Value *Result = Builder.getTrue();
14413 if (FeaturesMask != 0) {
14414 // Get features from structure in runtime library
14415 // struct {
14416 // unsigned long long features;
14417 // } __aarch64_cpu_features;
14418 llvm::Type *STy = llvm::StructType::get(Int64Ty);
14419 llvm::Constant *AArch64CPUFeatures =
14420 CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");
14421 cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);
14422 llvm::Value *CpuFeatures = Builder.CreateGEP(
14423 STy, AArch64CPUFeatures,
14424 {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
14425 Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
14427 Value *Mask = Builder.getInt64(FeaturesMask);
14428 Value *Bitset = Builder.CreateAnd(Features, Mask);
14429 Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
14430 Result = Builder.CreateAnd(Result, Cmp);
14431 }
14432 return Result;
14433}
14434
14436
14437 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
14438 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
14439 if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))
14440 return Builder.getFalse();
14441
14442 return EmitRISCVCpuSupports(ArrayRef<StringRef>(FeatureStr));
14443}
14444
14445static Value *loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder,
14446 CodeGenModule &CGM) {
14447 llvm::Type *Int32Ty = Builder.getInt32Ty();
14448 llvm::Type *Int64Ty = Builder.getInt64Ty();
14449 llvm::ArrayType *ArrayOfInt64Ty =
14450 llvm::ArrayType::get(Int64Ty, llvm::RISCVISAInfo::FeatureBitSize);
14451 llvm::Type *StructTy = llvm::StructType::get(Int32Ty, ArrayOfInt64Ty);
14452 llvm::Constant *RISCVFeaturesBits =
14453 CGM.CreateRuntimeVariable(StructTy, "__riscv_feature_bits");
14454 cast<llvm::GlobalValue>(RISCVFeaturesBits)->setDSOLocal(true);
14455 Value *IndexVal = llvm::ConstantInt::get(Int32Ty, Index);
14456 llvm::Value *GEPIndices[] = {Builder.getInt32(0), Builder.getInt32(1),
14457 IndexVal};
14458 Value *Ptr =
14459 Builder.CreateInBoundsGEP(StructTy, RISCVFeaturesBits, GEPIndices);
14460 Value *FeaturesBit =
14461 Builder.CreateAlignedLoad(Int64Ty, Ptr, CharUnits::fromQuantity(8));
14462 return FeaturesBit;
14463}
14464
14466 const unsigned RISCVFeatureLength = llvm::RISCVISAInfo::FeatureBitSize;
14467 uint64_t RequireBitMasks[RISCVFeatureLength] = {0};
14468
14469 for (auto Feat : FeaturesStrs) {
14470 auto [GroupID, BitPos] = RISCVISAInfo::getRISCVFeaturesBitsInfo(Feat);
14471
14472 // If there isn't BitPos for this feature, skip this version.
14473 // It also report the warning to user during compilation.
14474 if (BitPos == -1)
14475 return Builder.getFalse();
14476
14477 RequireBitMasks[GroupID] |= (1ULL << BitPos);
14478 }
14479
14480 Value *Result = nullptr;
14481 for (unsigned Idx = 0; Idx < RISCVFeatureLength; Idx++) {
14482 if (RequireBitMasks[Idx] == 0)
14483 continue;
14484
14485 Value *Mask = Builder.getInt64(RequireBitMasks[Idx]);
14486 Value *Bitset =
14487 Builder.CreateAnd(loadRISCVFeatureBits(Idx, Builder, CGM), Mask);
14488 Value *CmpV = Builder.CreateICmpEQ(Bitset, Mask);
14489 Result = (!Result) ? CmpV : Builder.CreateAnd(Result, CmpV);
14490 }
14491
14492 assert(Result && "Should have value here.");
14493
14494 return Result;
14495}
14496
14497Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
14498 const CallExpr *E) {
14499 if (BuiltinID == Builtin::BI__builtin_cpu_is)
14500 return EmitX86CpuIs(E);
14501 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
14502 return EmitX86CpuSupports(E);
14503 if (BuiltinID == Builtin::BI__builtin_cpu_init)
14504 return EmitX86CpuInit();
14505
14506 // Handle MSVC intrinsics before argument evaluation to prevent double
14507 // evaluation.
14508 if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))
14509 return EmitMSVCBuiltinExpr(*MsvcIntId, E);
14510
14512 bool IsMaskFCmp = false;
14513 bool IsConjFMA = false;
14514
14515 // Find out if any arguments are required to be integer constant expressions.
14516 unsigned ICEArguments = 0;
14518 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
14519 assert(Error == ASTContext::GE_None && "Should not codegen an error");
14520
14521 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
14522 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
14523 }
14524
14525 // These exist so that the builtin that takes an immediate can be bounds
14526 // checked by clang to avoid passing bad immediates to the backend. Since
14527 // AVX has a larger immediate than SSE we would need separate builtins to
14528 // do the different bounds checking. Rather than create a clang specific
14529 // SSE only builtin, this implements eight separate builtins to match gcc
14530 // implementation.
14531 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
14532 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
14533 llvm::Function *F = CGM.getIntrinsic(ID);
14534 return Builder.CreateCall(F, Ops);
14535 };
14536
14537 // For the vector forms of FP comparisons, translate the builtins directly to
14538 // IR.
14539 // TODO: The builtins could be removed if the SSE header files used vector
14540 // extension comparisons directly (vector ordered/unordered may need
14541 // additional support via __builtin_isnan()).
14542 auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,
14543 bool IsSignaling) {
14544 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
14545 Value *Cmp;
14546 if (IsSignaling)
14547 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
14548 else
14549 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
14550 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
14551 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
14552 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
14553 return Builder.CreateBitCast(Sext, FPVecTy);
14554 };
14555
14556 switch (BuiltinID) {
14557 default: return nullptr;
14558 case X86::BI_mm_prefetch: {
14559 Value *Address = Ops[0];
14560 ConstantInt *C = cast<ConstantInt>(Ops[1]);
14561 Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
14562 Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
14563 Value *Data = ConstantInt::get(Int32Ty, 1);
14564 Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
14565 return Builder.CreateCall(F, {Address, RW, Locality, Data});
14566 }
14567 case X86::BI_mm_clflush: {
14568 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
14569 Ops[0]);
14570 }
14571 case X86::BI_mm_lfence: {
14572 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
14573 }
14574 case X86::BI_mm_mfence: {
14575 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
14576 }
14577 case X86::BI_mm_sfence: {
14578 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
14579 }
14580 case X86::BI_mm_pause: {
14581 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
14582 }
14583 case X86::BI__rdtsc: {
14584 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
14585 }
14586 case X86::BI__builtin_ia32_rdtscp: {
14587 Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
14588 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
14589 Ops[0]);
14590 return Builder.CreateExtractValue(Call, 0);
14591 }
14592 case X86::BI__builtin_ia32_lzcnt_u16:
14593 case X86::BI__builtin_ia32_lzcnt_u32:
14594 case X86::BI__builtin_ia32_lzcnt_u64: {
14595 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
14596 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14597 }
14598 case X86::BI__builtin_ia32_tzcnt_u16:
14599 case X86::BI__builtin_ia32_tzcnt_u32:
14600 case X86::BI__builtin_ia32_tzcnt_u64: {
14601 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
14602 return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
14603 }
14604 case X86::BI__builtin_ia32_undef128:
14605 case X86::BI__builtin_ia32_undef256:
14606 case X86::BI__builtin_ia32_undef512:
14607 // The x86 definition of "undef" is not the same as the LLVM definition
14608 // (PR32176). We leave optimizing away an unnecessary zero constant to the
14609 // IR optimizer and backend.
14610 // TODO: If we had a "freeze" IR instruction to generate a fixed undef
14611 // value, we should use that here instead of a zero.
14612 return llvm::Constant::getNullValue(ConvertType(E->getType()));
14613 case X86::BI__builtin_ia32_vec_ext_v4hi:
14614 case X86::BI__builtin_ia32_vec_ext_v16qi:
14615 case X86::BI__builtin_ia32_vec_ext_v8hi:
14616 case X86::BI__builtin_ia32_vec_ext_v4si:
14617 case X86::BI__builtin_ia32_vec_ext_v4sf:
14618 case X86::BI__builtin_ia32_vec_ext_v2di:
14619 case X86::BI__builtin_ia32_vec_ext_v32qi:
14620 case X86::BI__builtin_ia32_vec_ext_v16hi:
14621 case X86::BI__builtin_ia32_vec_ext_v8si:
14622 case X86::BI__builtin_ia32_vec_ext_v4di: {
14623 unsigned NumElts =
14624 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14625 uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
14626 Index &= NumElts - 1;
14627 // These builtins exist so we can ensure the index is an ICE and in range.
14628 // Otherwise we could just do this in the header file.
14629 return Builder.CreateExtractElement(Ops[0], Index);
14630 }
14631 case X86::BI__builtin_ia32_vec_set_v4hi:
14632 case X86::BI__builtin_ia32_vec_set_v16qi:
14633 case X86::BI__builtin_ia32_vec_set_v8hi:
14634 case X86::BI__builtin_ia32_vec_set_v4si:
14635 case X86::BI__builtin_ia32_vec_set_v2di:
14636 case X86::BI__builtin_ia32_vec_set_v32qi:
14637 case X86::BI__builtin_ia32_vec_set_v16hi:
14638 case X86::BI__builtin_ia32_vec_set_v8si:
14639 case X86::BI__builtin_ia32_vec_set_v4di: {
14640 unsigned NumElts =
14641 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
14642 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
14643 Index &= NumElts - 1;
14644 // These builtins exist so we can ensure the index is an ICE and in range.
14645 // Otherwise we could just do this in the header file.
14646 return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
14647 }
14648 case X86::BI_mm_setcsr:
14649 case X86::BI__builtin_ia32_ldmxcsr: {
14650 RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());
14651 Builder.CreateStore(Ops[0], Tmp);
14652 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
14653 Tmp.getPointer());
14654 }
14655 case X86::BI_mm_getcsr:
14656 case X86::BI__builtin_ia32_stmxcsr: {
14658 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
14659 Tmp.getPointer());
14660 return Builder.CreateLoad(Tmp, "stmxcsr");
14661 }
14662 case X86::BI__builtin_ia32_xsave:
14663 case X86::BI__builtin_ia32_xsave64:
14664 case X86::BI__builtin_ia32_xrstor:
14665 case X86::BI__builtin_ia32_xrstor64:
14666 case X86::BI__builtin_ia32_xsaveopt:
14667 case X86::BI__builtin_ia32_xsaveopt64:
14668 case X86::BI__builtin_ia32_xrstors:
14669 case X86::BI__builtin_ia32_xrstors64:
14670 case X86::BI__builtin_ia32_xsavec:
14671 case X86::BI__builtin_ia32_xsavec64:
14672 case X86::BI__builtin_ia32_xsaves:
14673 case X86::BI__builtin_ia32_xsaves64:
14674 case X86::BI__builtin_ia32_xsetbv:
14675 case X86::BI_xsetbv: {
14676 Intrinsic::ID ID;
14677#define INTRINSIC_X86_XSAVE_ID(NAME) \
14678 case X86::BI__builtin_ia32_##NAME: \
14679 ID = Intrinsic::x86_##NAME; \
14680 break
14681 switch (BuiltinID) {
14682 default: llvm_unreachable("Unsupported intrinsic!");
14684 INTRINSIC_X86_XSAVE_ID(xsave64);
14685 INTRINSIC_X86_XSAVE_ID(xrstor);
14686 INTRINSIC_X86_XSAVE_ID(xrstor64);
14687 INTRINSIC_X86_XSAVE_ID(xsaveopt);
14688 INTRINSIC_X86_XSAVE_ID(xsaveopt64);
14689 INTRINSIC_X86_XSAVE_ID(xrstors);
14690 INTRINSIC_X86_XSAVE_ID(xrstors64);
14691 INTRINSIC_X86_XSAVE_ID(xsavec);
14692 INTRINSIC_X86_XSAVE_ID(xsavec64);
14693 INTRINSIC_X86_XSAVE_ID(xsaves);
14694 INTRINSIC_X86_XSAVE_ID(xsaves64);
14695 INTRINSIC_X86_XSAVE_ID(xsetbv);
14696 case X86::BI_xsetbv:
14697 ID = Intrinsic::x86_xsetbv;
14698 break;
14699 }
14700#undef INTRINSIC_X86_XSAVE_ID
14701 Value *Mhi = Builder.CreateTrunc(
14702 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
14703 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
14704 Ops[1] = Mhi;
14705 Ops.push_back(Mlo);
14706 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14707 }
14708 case X86::BI__builtin_ia32_xgetbv:
14709 case X86::BI_xgetbv:
14710 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
14711 case X86::BI__builtin_ia32_storedqudi128_mask:
14712 case X86::BI__builtin_ia32_storedqusi128_mask:
14713 case X86::BI__builtin_ia32_storedquhi128_mask:
14714 case X86::BI__builtin_ia32_storedquqi128_mask:
14715 case X86::BI__builtin_ia32_storeupd128_mask:
14716 case X86::BI__builtin_ia32_storeups128_mask:
14717 case X86::BI__builtin_ia32_storedqudi256_mask:
14718 case X86::BI__builtin_ia32_storedqusi256_mask:
14719 case X86::BI__builtin_ia32_storedquhi256_mask:
14720 case X86::BI__builtin_ia32_storedquqi256_mask:
14721 case X86::BI__builtin_ia32_storeupd256_mask:
14722 case X86::BI__builtin_ia32_storeups256_mask:
14723 case X86::BI__builtin_ia32_storedqudi512_mask:
14724 case X86::BI__builtin_ia32_storedqusi512_mask:
14725 case X86::BI__builtin_ia32_storedquhi512_mask:
14726 case X86::BI__builtin_ia32_storedquqi512_mask:
14727 case X86::BI__builtin_ia32_storeupd512_mask:
14728 case X86::BI__builtin_ia32_storeups512_mask:
14729 return EmitX86MaskedStore(*this, Ops, Align(1));
14730
14731 case X86::BI__builtin_ia32_storesh128_mask:
14732 case X86::BI__builtin_ia32_storess128_mask:
14733 case X86::BI__builtin_ia32_storesd128_mask:
14734 return EmitX86MaskedStore(*this, Ops, Align(1));
14735
14736 case X86::BI__builtin_ia32_vpopcntb_128:
14737 case X86::BI__builtin_ia32_vpopcntd_128:
14738 case X86::BI__builtin_ia32_vpopcntq_128:
14739 case X86::BI__builtin_ia32_vpopcntw_128:
14740 case X86::BI__builtin_ia32_vpopcntb_256:
14741 case X86::BI__builtin_ia32_vpopcntd_256:
14742 case X86::BI__builtin_ia32_vpopcntq_256:
14743 case X86::BI__builtin_ia32_vpopcntw_256:
14744 case X86::BI__builtin_ia32_vpopcntb_512:
14745 case X86::BI__builtin_ia32_vpopcntd_512:
14746 case X86::BI__builtin_ia32_vpopcntq_512:
14747 case X86::BI__builtin_ia32_vpopcntw_512: {
14748 llvm::Type *ResultType = ConvertType(E->getType());
14749 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
14750 return Builder.CreateCall(F, Ops);
14751 }
14752 case X86::BI__builtin_ia32_cvtmask2b128:
14753 case X86::BI__builtin_ia32_cvtmask2b256:
14754 case X86::BI__builtin_ia32_cvtmask2b512:
14755 case X86::BI__builtin_ia32_cvtmask2w128:
14756 case X86::BI__builtin_ia32_cvtmask2w256:
14757 case X86::BI__builtin_ia32_cvtmask2w512:
14758 case X86::BI__builtin_ia32_cvtmask2d128:
14759 case X86::BI__builtin_ia32_cvtmask2d256:
14760 case X86::BI__builtin_ia32_cvtmask2d512:
14761 case X86::BI__builtin_ia32_cvtmask2q128:
14762 case X86::BI__builtin_ia32_cvtmask2q256:
14763 case X86::BI__builtin_ia32_cvtmask2q512:
14764 return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
14765
14766 case X86::BI__builtin_ia32_cvtb2mask128:
14767 case X86::BI__builtin_ia32_cvtb2mask256:
14768 case X86::BI__builtin_ia32_cvtb2mask512:
14769 case X86::BI__builtin_ia32_cvtw2mask128:
14770 case X86::BI__builtin_ia32_cvtw2mask256:
14771 case X86::BI__builtin_ia32_cvtw2mask512:
14772 case X86::BI__builtin_ia32_cvtd2mask128:
14773 case X86::BI__builtin_ia32_cvtd2mask256:
14774 case X86::BI__builtin_ia32_cvtd2mask512:
14775 case X86::BI__builtin_ia32_cvtq2mask128:
14776 case X86::BI__builtin_ia32_cvtq2mask256:
14777 case X86::BI__builtin_ia32_cvtq2mask512:
14778 return EmitX86ConvertToMask(*this, Ops[0]);
14779
14780 case X86::BI__builtin_ia32_cvtdq2ps512_mask:
14781 case X86::BI__builtin_ia32_cvtqq2ps512_mask:
14782 case X86::BI__builtin_ia32_cvtqq2pd512_mask:
14783 case X86::BI__builtin_ia32_vcvtw2ph512_mask:
14784 case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
14785 case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
14786 case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
14787 case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
14788 case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
14789 case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
14790 case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
14791 case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
14792 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
14793 case X86::BI__builtin_ia32_cvtudq2ps512_mask:
14794 case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
14795 case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
14796 case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
14797 case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
14798 case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
14799 case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
14800 case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
14801 case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
14802 case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
14803 case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
14804 case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
14805 return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
14806
14807 case X86::BI__builtin_ia32_vfmaddss3:
14808 case X86::BI__builtin_ia32_vfmaddsd3:
14809 case X86::BI__builtin_ia32_vfmaddsh3_mask:
14810 case X86::BI__builtin_ia32_vfmaddss3_mask:
14811 case X86::BI__builtin_ia32_vfmaddsd3_mask:
14812 return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);
14813 case X86::BI__builtin_ia32_vfmaddss:
14814 case X86::BI__builtin_ia32_vfmaddsd:
14815 return EmitScalarFMAExpr(*this, E, Ops,
14816 Constant::getNullValue(Ops[0]->getType()));
14817 case X86::BI__builtin_ia32_vfmaddsh3_maskz:
14818 case X86::BI__builtin_ia32_vfmaddss3_maskz:
14819 case X86::BI__builtin_ia32_vfmaddsd3_maskz:
14820 return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);
14821 case X86::BI__builtin_ia32_vfmaddsh3_mask3:
14822 case X86::BI__builtin_ia32_vfmaddss3_mask3:
14823 case X86::BI__builtin_ia32_vfmaddsd3_mask3:
14824 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);
14825 case X86::BI__builtin_ia32_vfmsubsh3_mask3:
14826 case X86::BI__builtin_ia32_vfmsubss3_mask3:
14827 case X86::BI__builtin_ia32_vfmsubsd3_mask3:
14828 return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,
14829 /*NegAcc*/ true);
14830 case X86::BI__builtin_ia32_vfmaddph:
14831 case X86::BI__builtin_ia32_vfmaddps:
14832 case X86::BI__builtin_ia32_vfmaddpd:
14833 case X86::BI__builtin_ia32_vfmaddph256:
14834 case X86::BI__builtin_ia32_vfmaddps256:
14835 case X86::BI__builtin_ia32_vfmaddpd256:
14836 case X86::BI__builtin_ia32_vfmaddph512_mask:
14837 case X86::BI__builtin_ia32_vfmaddph512_maskz:
14838 case X86::BI__builtin_ia32_vfmaddph512_mask3:
14839 case X86::BI__builtin_ia32_vfmaddps512_mask:
14840 case X86::BI__builtin_ia32_vfmaddps512_maskz:
14841 case X86::BI__builtin_ia32_vfmaddps512_mask3:
14842 case X86::BI__builtin_ia32_vfmsubps512_mask3:
14843 case X86::BI__builtin_ia32_vfmaddpd512_mask:
14844 case X86::BI__builtin_ia32_vfmaddpd512_maskz:
14845 case X86::BI__builtin_ia32_vfmaddpd512_mask3:
14846 case X86::BI__builtin_ia32_vfmsubpd512_mask3:
14847 case X86::BI__builtin_ia32_vfmsubph512_mask3:
14848 case X86::BI__builtin_ia32_vfmaddph256_round_mask:
14849 case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
14850 case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
14851 case X86::BI__builtin_ia32_vfmaddps256_round_mask:
14852 case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
14853 case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
14854 case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
14855 case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
14856 case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
14857 case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
14858 case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
14859 case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
14860 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
14861 case X86::BI__builtin_ia32_vfmaddsubph512_mask:
14862 case X86::BI__builtin_ia32_vfmaddsubph512_maskz:
14863 case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
14864 case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
14865 case X86::BI__builtin_ia32_vfmaddsubps512_mask:
14866 case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
14867 case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
14868 case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
14869 case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
14870 case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
14871 case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
14872 case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
14873 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
14874 case X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
14875 case X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
14876 case X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
14877 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
14878 case X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
14879 case X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
14880 case X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
14881 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
14882 case X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
14883 case X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
14884 case X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
14885 return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);
14886
14887 case X86::BI__builtin_ia32_movdqa32store128_mask:
14888 case X86::BI__builtin_ia32_movdqa64store128_mask:
14889 case X86::BI__builtin_ia32_storeaps128_mask:
14890 case X86::BI__builtin_ia32_storeapd128_mask:
14891 case X86::BI__builtin_ia32_movdqa32store256_mask:
14892 case X86::BI__builtin_ia32_movdqa64store256_mask:
14893 case X86::BI__builtin_ia32_storeaps256_mask:
14894 case X86::BI__builtin_ia32_storeapd256_mask:
14895 case X86::BI__builtin_ia32_movdqa32store512_mask:
14896 case X86::BI__builtin_ia32_movdqa64store512_mask:
14897 case X86::BI__builtin_ia32_storeaps512_mask:
14898 case X86::BI__builtin_ia32_storeapd512_mask:
14899 return EmitX86MaskedStore(
14900 *this, Ops,
14901 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14902
14903 case X86::BI__builtin_ia32_loadups128_mask:
14904 case X86::BI__builtin_ia32_loadups256_mask:
14905 case X86::BI__builtin_ia32_loadups512_mask:
14906 case X86::BI__builtin_ia32_loadupd128_mask:
14907 case X86::BI__builtin_ia32_loadupd256_mask:
14908 case X86::BI__builtin_ia32_loadupd512_mask:
14909 case X86::BI__builtin_ia32_loaddquqi128_mask:
14910 case X86::BI__builtin_ia32_loaddquqi256_mask:
14911 case X86::BI__builtin_ia32_loaddquqi512_mask:
14912 case X86::BI__builtin_ia32_loaddquhi128_mask:
14913 case X86::BI__builtin_ia32_loaddquhi256_mask:
14914 case X86::BI__builtin_ia32_loaddquhi512_mask:
14915 case X86::BI__builtin_ia32_loaddqusi128_mask:
14916 case X86::BI__builtin_ia32_loaddqusi256_mask:
14917 case X86::BI__builtin_ia32_loaddqusi512_mask:
14918 case X86::BI__builtin_ia32_loaddqudi128_mask:
14919 case X86::BI__builtin_ia32_loaddqudi256_mask:
14920 case X86::BI__builtin_ia32_loaddqudi512_mask:
14921 return EmitX86MaskedLoad(*this, Ops, Align(1));
14922
14923 case X86::BI__builtin_ia32_loadsh128_mask:
14924 case X86::BI__builtin_ia32_loadss128_mask:
14925 case X86::BI__builtin_ia32_loadsd128_mask:
14926 return EmitX86MaskedLoad(*this, Ops, Align(1));
14927
14928 case X86::BI__builtin_ia32_loadaps128_mask:
14929 case X86::BI__builtin_ia32_loadaps256_mask:
14930 case X86::BI__builtin_ia32_loadaps512_mask:
14931 case X86::BI__builtin_ia32_loadapd128_mask:
14932 case X86::BI__builtin_ia32_loadapd256_mask:
14933 case X86::BI__builtin_ia32_loadapd512_mask:
14934 case X86::BI__builtin_ia32_movdqa32load128_mask:
14935 case X86::BI__builtin_ia32_movdqa32load256_mask:
14936 case X86::BI__builtin_ia32_movdqa32load512_mask:
14937 case X86::BI__builtin_ia32_movdqa64load128_mask:
14938 case X86::BI__builtin_ia32_movdqa64load256_mask:
14939 case X86::BI__builtin_ia32_movdqa64load512_mask:
14940 return EmitX86MaskedLoad(
14941 *this, Ops,
14942 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());
14943
14944 case X86::BI__builtin_ia32_expandloaddf128_mask:
14945 case X86::BI__builtin_ia32_expandloaddf256_mask:
14946 case X86::BI__builtin_ia32_expandloaddf512_mask:
14947 case X86::BI__builtin_ia32_expandloadsf128_mask:
14948 case X86::BI__builtin_ia32_expandloadsf256_mask:
14949 case X86::BI__builtin_ia32_expandloadsf512_mask:
14950 case X86::BI__builtin_ia32_expandloaddi128_mask:
14951 case X86::BI__builtin_ia32_expandloaddi256_mask:
14952 case X86::BI__builtin_ia32_expandloaddi512_mask:
14953 case X86::BI__builtin_ia32_expandloadsi128_mask:
14954 case X86::BI__builtin_ia32_expandloadsi256_mask:
14955 case X86::BI__builtin_ia32_expandloadsi512_mask:
14956 case X86::BI__builtin_ia32_expandloadhi128_mask:
14957 case X86::BI__builtin_ia32_expandloadhi256_mask:
14958 case X86::BI__builtin_ia32_expandloadhi512_mask:
14959 case X86::BI__builtin_ia32_expandloadqi128_mask:
14960 case X86::BI__builtin_ia32_expandloadqi256_mask:
14961 case X86::BI__builtin_ia32_expandloadqi512_mask:
14962 return EmitX86ExpandLoad(*this, Ops);
14963
14964 case X86::BI__builtin_ia32_compressstoredf128_mask:
14965 case X86::BI__builtin_ia32_compressstoredf256_mask:
14966 case X86::BI__builtin_ia32_compressstoredf512_mask:
14967 case X86::BI__builtin_ia32_compressstoresf128_mask:
14968 case X86::BI__builtin_ia32_compressstoresf256_mask:
14969 case X86::BI__builtin_ia32_compressstoresf512_mask:
14970 case X86::BI__builtin_ia32_compressstoredi128_mask:
14971 case X86::BI__builtin_ia32_compressstoredi256_mask:
14972 case X86::BI__builtin_ia32_compressstoredi512_mask:
14973 case X86::BI__builtin_ia32_compressstoresi128_mask:
14974 case X86::BI__builtin_ia32_compressstoresi256_mask:
14975 case X86::BI__builtin_ia32_compressstoresi512_mask:
14976 case X86::BI__builtin_ia32_compressstorehi128_mask:
14977 case X86::BI__builtin_ia32_compressstorehi256_mask:
14978 case X86::BI__builtin_ia32_compressstorehi512_mask:
14979 case X86::BI__builtin_ia32_compressstoreqi128_mask:
14980 case X86::BI__builtin_ia32_compressstoreqi256_mask:
14981 case X86::BI__builtin_ia32_compressstoreqi512_mask:
14982 return EmitX86CompressStore(*this, Ops);
14983
14984 case X86::BI__builtin_ia32_expanddf128_mask:
14985 case X86::BI__builtin_ia32_expanddf256_mask:
14986 case X86::BI__builtin_ia32_expanddf512_mask:
14987 case X86::BI__builtin_ia32_expandsf128_mask:
14988 case X86::BI__builtin_ia32_expandsf256_mask:
14989 case X86::BI__builtin_ia32_expandsf512_mask:
14990 case X86::BI__builtin_ia32_expanddi128_mask:
14991 case X86::BI__builtin_ia32_expanddi256_mask:
14992 case X86::BI__builtin_ia32_expanddi512_mask:
14993 case X86::BI__builtin_ia32_expandsi128_mask:
14994 case X86::BI__builtin_ia32_expandsi256_mask:
14995 case X86::BI__builtin_ia32_expandsi512_mask:
14996 case X86::BI__builtin_ia32_expandhi128_mask:
14997 case X86::BI__builtin_ia32_expandhi256_mask:
14998 case X86::BI__builtin_ia32_expandhi512_mask:
14999 case X86::BI__builtin_ia32_expandqi128_mask:
15000 case X86::BI__builtin_ia32_expandqi256_mask:
15001 case X86::BI__builtin_ia32_expandqi512_mask:
15002 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
15003
15004 case X86::BI__builtin_ia32_compressdf128_mask:
15005 case X86::BI__builtin_ia32_compressdf256_mask:
15006 case X86::BI__builtin_ia32_compressdf512_mask:
15007 case X86::BI__builtin_ia32_compresssf128_mask:
15008 case X86::BI__builtin_ia32_compresssf256_mask:
15009 case X86::BI__builtin_ia32_compresssf512_mask:
15010 case X86::BI__builtin_ia32_compressdi128_mask:
15011 case X86::BI__builtin_ia32_compressdi256_mask:
15012 case X86::BI__builtin_ia32_compressdi512_mask:
15013 case X86::BI__builtin_ia32_compresssi128_mask:
15014 case X86::BI__builtin_ia32_compresssi256_mask:
15015 case X86::BI__builtin_ia32_compresssi512_mask:
15016 case X86::BI__builtin_ia32_compresshi128_mask:
15017 case X86::BI__builtin_ia32_compresshi256_mask:
15018 case X86::BI__builtin_ia32_compresshi512_mask:
15019 case X86::BI__builtin_ia32_compressqi128_mask:
15020 case X86::BI__builtin_ia32_compressqi256_mask:
15021 case X86::BI__builtin_ia32_compressqi512_mask:
15022 return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
15023
15024 case X86::BI__builtin_ia32_gather3div2df:
15025 case X86::BI__builtin_ia32_gather3div2di:
15026 case X86::BI__builtin_ia32_gather3div4df:
15027 case X86::BI__builtin_ia32_gather3div4di:
15028 case X86::BI__builtin_ia32_gather3div4sf:
15029 case X86::BI__builtin_ia32_gather3div4si:
15030 case X86::BI__builtin_ia32_gather3div8sf:
15031 case X86::BI__builtin_ia32_gather3div8si:
15032 case X86::BI__builtin_ia32_gather3siv2df:
15033 case X86::BI__builtin_ia32_gather3siv2di:
15034 case X86::BI__builtin_ia32_gather3siv4df:
15035 case X86::BI__builtin_ia32_gather3siv4di:
15036 case X86::BI__builtin_ia32_gather3siv4sf:
15037 case X86::BI__builtin_ia32_gather3siv4si:
15038 case X86::BI__builtin_ia32_gather3siv8sf:
15039 case X86::BI__builtin_ia32_gather3siv8si:
15040 case X86::BI__builtin_ia32_gathersiv8df:
15041 case X86::BI__builtin_ia32_gathersiv16sf:
15042 case X86::BI__builtin_ia32_gatherdiv8df:
15043 case X86::BI__builtin_ia32_gatherdiv16sf:
15044 case X86::BI__builtin_ia32_gathersiv8di:
15045 case X86::BI__builtin_ia32_gathersiv16si:
15046 case X86::BI__builtin_ia32_gatherdiv8di:
15047 case X86::BI__builtin_ia32_gatherdiv16si: {
15048 Intrinsic::ID IID;
15049 switch (BuiltinID) {
15050 default: llvm_unreachable("Unexpected builtin");
15051 case X86::BI__builtin_ia32_gather3div2df:
15052 IID = Intrinsic::x86_avx512_mask_gather3div2_df;
15053 break;
15054 case X86::BI__builtin_ia32_gather3div2di:
15055 IID = Intrinsic::x86_avx512_mask_gather3div2_di;
15056 break;
15057 case X86::BI__builtin_ia32_gather3div4df:
15058 IID = Intrinsic::x86_avx512_mask_gather3div4_df;
15059 break;
15060 case X86::BI__builtin_ia32_gather3div4di:
15061 IID = Intrinsic::x86_avx512_mask_gather3div4_di;
15062 break;
15063 case X86::BI__builtin_ia32_gather3div4sf:
15064 IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
15065 break;
15066 case X86::BI__builtin_ia32_gather3div4si:
15067 IID = Intrinsic::x86_avx512_mask_gather3div4_si;
15068 break;
15069 case X86::BI__builtin_ia32_gather3div8sf:
15070 IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
15071 break;
15072 case X86::BI__builtin_ia32_gather3div8si:
15073 IID = Intrinsic::x86_avx512_mask_gather3div8_si;
15074 break;
15075 case X86::BI__builtin_ia32_gather3siv2df:
15076 IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
15077 break;
15078 case X86::BI__builtin_ia32_gather3siv2di:
15079 IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
15080 break;
15081 case X86::BI__builtin_ia32_gather3siv4df:
15082 IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
15083 break;
15084 case X86::BI__builtin_ia32_gather3siv4di:
15085 IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
15086 break;
15087 case X86::BI__builtin_ia32_gather3siv4sf:
15088 IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
15089 break;
15090 case X86::BI__builtin_ia32_gather3siv4si:
15091 IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
15092 break;
15093 case X86::BI__builtin_ia32_gather3siv8sf:
15094 IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
15095 break;
15096 case X86::BI__builtin_ia32_gather3siv8si:
15097 IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
15098 break;
15099 case X86::BI__builtin_ia32_gathersiv8df:
15100 IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
15101 break;
15102 case X86::BI__builtin_ia32_gathersiv16sf:
15103 IID = Intrinsic::x86_avx512_mask_gather_dps_512;
15104 break;
15105 case X86::BI__builtin_ia32_gatherdiv8df:
15106 IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
15107 break;
15108 case X86::BI__builtin_ia32_gatherdiv16sf:
15109 IID = Intrinsic::x86_avx512_mask_gather_qps_512;
15110 break;
15111 case X86::BI__builtin_ia32_gathersiv8di:
15112 IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
15113 break;
15114 case X86::BI__builtin_ia32_gathersiv16si:
15115 IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
15116 break;
15117 case X86::BI__builtin_ia32_gatherdiv8di:
15118 IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
15119 break;
15120 case X86::BI__builtin_ia32_gatherdiv16si:
15121 IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
15122 break;
15123 }
15124
15125 unsigned MinElts = std::min(
15126 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),
15127 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());
15128 Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
15129 Function *Intr = CGM.getIntrinsic(IID);
15130 return Builder.CreateCall(Intr, Ops);
15131 }
15132
15133 case X86::BI__builtin_ia32_scattersiv8df:
15134 case X86::BI__builtin_ia32_scattersiv16sf:
15135 case X86::BI__builtin_ia32_scatterdiv8df:
15136 case X86::BI__builtin_ia32_scatterdiv16sf:
15137 case X86::BI__builtin_ia32_scattersiv8di:
15138 case X86::BI__builtin_ia32_scattersiv16si:
15139 case X86::BI__builtin_ia32_scatterdiv8di:
15140 case X86::BI__builtin_ia32_scatterdiv16si:
15141 case X86::BI__builtin_ia32_scatterdiv2df:
15142 case X86::BI__builtin_ia32_scatterdiv2di:
15143 case X86::BI__builtin_ia32_scatterdiv4df:
15144 case X86::BI__builtin_ia32_scatterdiv4di:
15145 case X86::BI__builtin_ia32_scatterdiv4sf:
15146 case X86::BI__builtin_ia32_scatterdiv4si:
15147 case X86::BI__builtin_ia32_scatterdiv8sf:
15148 case X86::BI__builtin_ia32_scatterdiv8si:
15149 case X86::BI__builtin_ia32_scattersiv2df:
15150 case X86::BI__builtin_ia32_scattersiv2di:
15151 case X86::BI__builtin_ia32_scattersiv4df:
15152 case X86::BI__builtin_ia32_scattersiv4di:
15153 case X86::BI__builtin_ia32_scattersiv4sf:
15154 case X86::BI__builtin_ia32_scattersiv4si:
15155 case X86::BI__builtin_ia32_scattersiv8sf:
15156 case X86::BI__builtin_ia32_scattersiv8si: {
15157 Intrinsic::ID IID;
15158 switch (BuiltinID) {
15159 default: llvm_unreachable("Unexpected builtin");
15160 case X86::BI__builtin_ia32_scattersiv8df:
15161 IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
15162 break;
15163 case X86::BI__builtin_ia32_scattersiv16sf:
15164 IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
15165 break;
15166 case X86::BI__builtin_ia32_scatterdiv8df:
15167 IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
15168 break;
15169 case X86::BI__builtin_ia32_scatterdiv16sf:
15170 IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
15171 break;
15172 case X86::BI__builtin_ia32_scattersiv8di:
15173 IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
15174 break;
15175 case X86::BI__builtin_ia32_scattersiv16si:
15176 IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
15177 break;
15178 case X86::BI__builtin_ia32_scatterdiv8di:
15179 IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
15180 break;
15181 case X86::BI__builtin_ia32_scatterdiv16si:
15182 IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
15183 break;
15184 case X86::BI__builtin_ia32_scatterdiv2df:
15185 IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
15186 break;
15187 case X86::BI__builtin_ia32_scatterdiv2di:
15188 IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
15189 break;
15190 case X86::BI__builtin_ia32_scatterdiv4df:
15191 IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
15192 break;
15193 case X86::BI__builtin_ia32_scatterdiv4di:
15194 IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
15195 break;
15196 case X86::BI__builtin_ia32_scatterdiv4sf:
15197 IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
15198 break;
15199 case X86::BI__builtin_ia32_scatterdiv4si:
15200 IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
15201 break;
15202 case X86::BI__builtin_ia32_scatterdiv8sf:
15203 IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
15204 break;
15205 case X86::BI__builtin_ia32_scatterdiv8si:
15206 IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
15207 break;
15208 case X86::BI__builtin_ia32_scattersiv2df:
15209 IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
15210 break;
15211 case X86::BI__builtin_ia32_scattersiv2di:
15212 IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
15213 break;
15214 case X86::BI__builtin_ia32_scattersiv4df:
15215 IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
15216 break;
15217 case X86::BI__builtin_ia32_scattersiv4di:
15218 IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
15219 break;
15220 case X86::BI__builtin_ia32_scattersiv4sf:
15221 IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
15222 break;
15223 case X86::BI__builtin_ia32_scattersiv4si:
15224 IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
15225 break;
15226 case X86::BI__builtin_ia32_scattersiv8sf:
15227 IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
15228 break;
15229 case X86::BI__builtin_ia32_scattersiv8si:
15230 IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
15231 break;
15232 }
15233
15234 unsigned MinElts = std::min(
15235 cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),
15236 cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());
15237 Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
15238 Function *Intr = CGM.getIntrinsic(IID);
15239 return Builder.CreateCall(Intr, Ops);
15240 }
15241
15242 case X86::BI__builtin_ia32_vextractf128_pd256:
15243 case X86::BI__builtin_ia32_vextractf128_ps256:
15244 case X86::BI__builtin_ia32_vextractf128_si256:
15245 case X86::BI__builtin_ia32_extract128i256:
15246 case X86::BI__builtin_ia32_extractf64x4_mask:
15247 case X86::BI__builtin_ia32_extractf32x4_mask:
15248 case X86::BI__builtin_ia32_extracti64x4_mask:
15249 case X86::BI__builtin_ia32_extracti32x4_mask:
15250 case X86::BI__builtin_ia32_extractf32x8_mask:
15251 case X86::BI__builtin_ia32_extracti32x8_mask:
15252 case X86::BI__builtin_ia32_extractf32x4_256_mask:
15253 case X86::BI__builtin_ia32_extracti32x4_256_mask:
15254 case X86::BI__builtin_ia32_extractf64x2_256_mask:
15255 case X86::BI__builtin_ia32_extracti64x2_256_mask:
15256 case X86::BI__builtin_ia32_extractf64x2_512_mask:
15257 case X86::BI__builtin_ia32_extracti64x2_512_mask: {
15258 auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));
15259 unsigned NumElts = DstTy->getNumElements();
15260 unsigned SrcNumElts =
15261 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15262 unsigned SubVectors = SrcNumElts / NumElts;
15263 unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
15264 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15265 Index &= SubVectors - 1; // Remove any extra bits.
15266 Index *= NumElts;
15267
15268 int Indices[16];
15269 for (unsigned i = 0; i != NumElts; ++i)
15270 Indices[i] = i + Index;
15271
15272 Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15273 "extract");
15274
15275 if (Ops.size() == 4)
15276 Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
15277
15278 return Res;
15279 }
15280 case X86::BI__builtin_ia32_vinsertf128_pd256:
15281 case X86::BI__builtin_ia32_vinsertf128_ps256:
15282 case X86::BI__builtin_ia32_vinsertf128_si256:
15283 case X86::BI__builtin_ia32_insert128i256:
15284 case X86::BI__builtin_ia32_insertf64x4:
15285 case X86::BI__builtin_ia32_insertf32x4:
15286 case X86::BI__builtin_ia32_inserti64x4:
15287 case X86::BI__builtin_ia32_inserti32x4:
15288 case X86::BI__builtin_ia32_insertf32x8:
15289 case X86::BI__builtin_ia32_inserti32x8:
15290 case X86::BI__builtin_ia32_insertf32x4_256:
15291 case X86::BI__builtin_ia32_inserti32x4_256:
15292 case X86::BI__builtin_ia32_insertf64x2_256:
15293 case X86::BI__builtin_ia32_inserti64x2_256:
15294 case X86::BI__builtin_ia32_insertf64x2_512:
15295 case X86::BI__builtin_ia32_inserti64x2_512: {
15296 unsigned DstNumElts =
15297 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15298 unsigned SrcNumElts =
15299 cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();
15300 unsigned SubVectors = DstNumElts / SrcNumElts;
15301 unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
15302 assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
15303 Index &= SubVectors - 1; // Remove any extra bits.
15304 Index *= SrcNumElts;
15305
15306 int Indices[16];
15307 for (unsigned i = 0; i != DstNumElts; ++i)
15308 Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
15309
15310 Value *Op1 = Builder.CreateShuffleVector(
15311 Ops[1], ArrayRef(Indices, DstNumElts), "widen");
15312
15313 for (unsigned i = 0; i != DstNumElts; ++i) {
15314 if (i >= Index && i < (Index + SrcNumElts))
15315 Indices[i] = (i - Index) + DstNumElts;
15316 else
15317 Indices[i] = i;
15318 }
15319
15320 return Builder.CreateShuffleVector(Ops[0], Op1,
15321 ArrayRef(Indices, DstNumElts), "insert");
15322 }
15323 case X86::BI__builtin_ia32_pmovqd512_mask:
15324 case X86::BI__builtin_ia32_pmovwb512_mask: {
15325 Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15326 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
15327 }
15328 case X86::BI__builtin_ia32_pmovdb512_mask:
15329 case X86::BI__builtin_ia32_pmovdw512_mask:
15330 case X86::BI__builtin_ia32_pmovqw512_mask: {
15331 if (const auto *C = dyn_cast<Constant>(Ops[2]))
15332 if (C->isAllOnesValue())
15333 return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
15334
15335 Intrinsic::ID IID;
15336 switch (BuiltinID) {
15337 default: llvm_unreachable("Unsupported intrinsic!");
15338 case X86::BI__builtin_ia32_pmovdb512_mask:
15339 IID = Intrinsic::x86_avx512_mask_pmov_db_512;
15340 break;
15341 case X86::BI__builtin_ia32_pmovdw512_mask:
15342 IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
15343 break;
15344 case X86::BI__builtin_ia32_pmovqw512_mask:
15345 IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
15346 break;
15347 }
15348
15349 Function *Intr = CGM.getIntrinsic(IID);
15350 return Builder.CreateCall(Intr, Ops);
15351 }
15352 case X86::BI__builtin_ia32_pblendw128:
15353 case X86::BI__builtin_ia32_blendpd:
15354 case X86::BI__builtin_ia32_blendps:
15355 case X86::BI__builtin_ia32_blendpd256:
15356 case X86::BI__builtin_ia32_blendps256:
15357 case X86::BI__builtin_ia32_pblendw256:
15358 case X86::BI__builtin_ia32_pblendd128:
15359 case X86::BI__builtin_ia32_pblendd256: {
15360 unsigned NumElts =
15361 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15362 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15363
15364 int Indices[16];
15365 // If there are more than 8 elements, the immediate is used twice so make
15366 // sure we handle that.
15367 for (unsigned i = 0; i != NumElts; ++i)
15368 Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
15369
15370 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15371 ArrayRef(Indices, NumElts), "blend");
15372 }
15373 case X86::BI__builtin_ia32_pshuflw:
15374 case X86::BI__builtin_ia32_pshuflw256:
15375 case X86::BI__builtin_ia32_pshuflw512: {
15376 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15377 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15378 unsigned NumElts = Ty->getNumElements();
15379
15380 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15381 Imm = (Imm & 0xff) * 0x01010101;
15382
15383 int Indices[32];
15384 for (unsigned l = 0; l != NumElts; l += 8) {
15385 for (unsigned i = 0; i != 4; ++i) {
15386 Indices[l + i] = l + (Imm & 3);
15387 Imm >>= 2;
15388 }
15389 for (unsigned i = 4; i != 8; ++i)
15390 Indices[l + i] = l + i;
15391 }
15392
15393 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15394 "pshuflw");
15395 }
15396 case X86::BI__builtin_ia32_pshufhw:
15397 case X86::BI__builtin_ia32_pshufhw256:
15398 case X86::BI__builtin_ia32_pshufhw512: {
15399 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15400 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15401 unsigned NumElts = Ty->getNumElements();
15402
15403 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15404 Imm = (Imm & 0xff) * 0x01010101;
15405
15406 int Indices[32];
15407 for (unsigned l = 0; l != NumElts; l += 8) {
15408 for (unsigned i = 0; i != 4; ++i)
15409 Indices[l + i] = l + i;
15410 for (unsigned i = 4; i != 8; ++i) {
15411 Indices[l + i] = l + 4 + (Imm & 3);
15412 Imm >>= 2;
15413 }
15414 }
15415
15416 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15417 "pshufhw");
15418 }
15419 case X86::BI__builtin_ia32_pshufd:
15420 case X86::BI__builtin_ia32_pshufd256:
15421 case X86::BI__builtin_ia32_pshufd512:
15422 case X86::BI__builtin_ia32_vpermilpd:
15423 case X86::BI__builtin_ia32_vpermilps:
15424 case X86::BI__builtin_ia32_vpermilpd256:
15425 case X86::BI__builtin_ia32_vpermilps256:
15426 case X86::BI__builtin_ia32_vpermilpd512:
15427 case X86::BI__builtin_ia32_vpermilps512: {
15428 uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15429 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15430 unsigned NumElts = Ty->getNumElements();
15431 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15432 unsigned NumLaneElts = NumElts / NumLanes;
15433
15434 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15435 Imm = (Imm & 0xff) * 0x01010101;
15436
15437 int Indices[16];
15438 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15439 for (unsigned i = 0; i != NumLaneElts; ++i) {
15440 Indices[i + l] = (Imm % NumLaneElts) + l;
15441 Imm /= NumLaneElts;
15442 }
15443 }
15444
15445 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15446 "permil");
15447 }
15448 case X86::BI__builtin_ia32_shufpd:
15449 case X86::BI__builtin_ia32_shufpd256:
15450 case X86::BI__builtin_ia32_shufpd512:
15451 case X86::BI__builtin_ia32_shufps:
15452 case X86::BI__builtin_ia32_shufps256:
15453 case X86::BI__builtin_ia32_shufps512: {
15454 uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15455 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15456 unsigned NumElts = Ty->getNumElements();
15457 unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
15458 unsigned NumLaneElts = NumElts / NumLanes;
15459
15460 // Splat the 8-bits of immediate 4 times to help the loop wrap around.
15461 Imm = (Imm & 0xff) * 0x01010101;
15462
15463 int Indices[16];
15464 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15465 for (unsigned i = 0; i != NumLaneElts; ++i) {
15466 unsigned Index = Imm % NumLaneElts;
15467 Imm /= NumLaneElts;
15468 if (i >= (NumLaneElts / 2))
15469 Index += NumElts;
15470 Indices[l + i] = l + Index;
15471 }
15472 }
15473
15474 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15475 ArrayRef(Indices, NumElts), "shufp");
15476 }
15477 case X86::BI__builtin_ia32_permdi256:
15478 case X86::BI__builtin_ia32_permdf256:
15479 case X86::BI__builtin_ia32_permdi512:
15480 case X86::BI__builtin_ia32_permdf512: {
15481 unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
15482 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15483 unsigned NumElts = Ty->getNumElements();
15484
15485 // These intrinsics operate on 256-bit lanes of four 64-bit elements.
15486 int Indices[8];
15487 for (unsigned l = 0; l != NumElts; l += 4)
15488 for (unsigned i = 0; i != 4; ++i)
15489 Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
15490
15491 return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),
15492 "perm");
15493 }
15494 case X86::BI__builtin_ia32_palignr128:
15495 case X86::BI__builtin_ia32_palignr256:
15496 case X86::BI__builtin_ia32_palignr512: {
15497 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15498
15499 unsigned NumElts =
15500 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15501 assert(NumElts % 16 == 0);
15502
15503 // If palignr is shifting the pair of vectors more than the size of two
15504 // lanes, emit zero.
15505 if (ShiftVal >= 32)
15506 return llvm::Constant::getNullValue(ConvertType(E->getType()));
15507
15508 // If palignr is shifting the pair of input vectors more than one lane,
15509 // but less than two lanes, convert to shifting in zeroes.
15510 if (ShiftVal > 16) {
15511 ShiftVal -= 16;
15512 Ops[1] = Ops[0];
15513 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
15514 }
15515
15516 int Indices[64];
15517 // 256-bit palignr operates on 128-bit lanes so we need to handle that
15518 for (unsigned l = 0; l != NumElts; l += 16) {
15519 for (unsigned i = 0; i != 16; ++i) {
15520 unsigned Idx = ShiftVal + i;
15521 if (Idx >= 16)
15522 Idx += NumElts - 16; // End of lane, switch operand.
15523 Indices[l + i] = Idx + l;
15524 }
15525 }
15526
15527 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15528 ArrayRef(Indices, NumElts), "palignr");
15529 }
15530 case X86::BI__builtin_ia32_alignd128:
15531 case X86::BI__builtin_ia32_alignd256:
15532 case X86::BI__builtin_ia32_alignd512:
15533 case X86::BI__builtin_ia32_alignq128:
15534 case X86::BI__builtin_ia32_alignq256:
15535 case X86::BI__builtin_ia32_alignq512: {
15536 unsigned NumElts =
15537 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15538 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
15539
15540 // Mask the shift amount to width of a vector.
15541 ShiftVal &= NumElts - 1;
15542
15543 int Indices[16];
15544 for (unsigned i = 0; i != NumElts; ++i)
15545 Indices[i] = i + ShiftVal;
15546
15547 return Builder.CreateShuffleVector(Ops[1], Ops[0],
15548 ArrayRef(Indices, NumElts), "valign");
15549 }
15550 case X86::BI__builtin_ia32_shuf_f32x4_256:
15551 case X86::BI__builtin_ia32_shuf_f64x2_256:
15552 case X86::BI__builtin_ia32_shuf_i32x4_256:
15553 case X86::BI__builtin_ia32_shuf_i64x2_256:
15554 case X86::BI__builtin_ia32_shuf_f32x4:
15555 case X86::BI__builtin_ia32_shuf_f64x2:
15556 case X86::BI__builtin_ia32_shuf_i32x4:
15557 case X86::BI__builtin_ia32_shuf_i64x2: {
15558 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15559 auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());
15560 unsigned NumElts = Ty->getNumElements();
15561 unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
15562 unsigned NumLaneElts = NumElts / NumLanes;
15563
15564 int Indices[16];
15565 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
15566 unsigned Index = (Imm % NumLanes) * NumLaneElts;
15567 Imm /= NumLanes; // Discard the bits we just used.
15568 if (l >= (NumElts / 2))
15569 Index += NumElts; // Switch to other source.
15570 for (unsigned i = 0; i != NumLaneElts; ++i) {
15571 Indices[l + i] = Index + i;
15572 }
15573 }
15574
15575 return Builder.CreateShuffleVector(Ops[0], Ops[1],
15576 ArrayRef(Indices, NumElts), "shuf");
15577 }
15578
15579 case X86::BI__builtin_ia32_vperm2f128_pd256:
15580 case X86::BI__builtin_ia32_vperm2f128_ps256:
15581 case X86::BI__builtin_ia32_vperm2f128_si256:
15582 case X86::BI__builtin_ia32_permti256: {
15583 unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
15584 unsigned NumElts =
15585 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
15586
15587 // This takes a very simple approach since there are two lanes and a
15588 // shuffle can have 2 inputs. So we reserve the first input for the first
15589 // lane and the second input for the second lane. This may result in
15590 // duplicate sources, but this can be dealt with in the backend.
15591
15592 Value *OutOps[2];
15593 int Indices[8];
15594 for (unsigned l = 0; l != 2; ++l) {
15595 // Determine the source for this lane.
15596 if (Imm & (1 << ((l * 4) + 3)))
15597 OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
15598 else if (Imm & (1 << ((l * 4) + 1)))
15599 OutOps[l] = Ops[1];
15600 else
15601 OutOps[l] = Ops[0];
15602
15603 for (unsigned i = 0; i != NumElts/2; ++i) {
15604 // Start with ith element of the source for this lane.
15605 unsigned Idx = (l * NumElts) + i;
15606 // If bit 0 of the immediate half is set, switch to the high half of
15607 // the source.
15608 if (Imm & (1 << (l * 4)))
15609 Idx += NumElts/2;
15610 Indices[(l * (NumElts/2)) + i] = Idx;
15611 }
15612 }
15613
15614 return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
15615 ArrayRef(Indices, NumElts), "vperm");
15616 }
15617
15618 case X86::BI__builtin_ia32_pslldqi128_byteshift:
15619 case X86::BI__builtin_ia32_pslldqi256_byteshift:
15620 case X86::BI__builtin_ia32_pslldqi512_byteshift: {
15621 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15622 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15623 // Builtin type is vXi64 so multiply by 8 to get bytes.
15624 unsigned NumElts = ResultType->getNumElements() * 8;
15625
15626 // If pslldq is shifting the vector more than 15 bytes, emit zero.
15627 if (ShiftVal >= 16)
15628 return llvm::Constant::getNullValue(ResultType);
15629
15630 int Indices[64];
15631 // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
15632 for (unsigned l = 0; l != NumElts; l += 16) {
15633 for (unsigned i = 0; i != 16; ++i) {
15634 unsigned Idx = NumElts + i - ShiftVal;
15635 if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
15636 Indices[l + i] = Idx + l;
15637 }
15638 }
15639
15640 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15641 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15642 Value *Zero = llvm::Constant::getNullValue(VecTy);
15643 Value *SV = Builder.CreateShuffleVector(
15644 Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");
15645 return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
15646 }
15647 case X86::BI__builtin_ia32_psrldqi128_byteshift:
15648 case X86::BI__builtin_ia32_psrldqi256_byteshift:
15649 case X86::BI__builtin_ia32_psrldqi512_byteshift: {
15650 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15651 auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());
15652 // Builtin type is vXi64 so multiply by 8 to get bytes.
15653 unsigned NumElts = ResultType->getNumElements() * 8;
15654
15655 // If psrldq is shifting the vector more than 15 bytes, emit zero.
15656 if (ShiftVal >= 16)
15657 return llvm::Constant::getNullValue(ResultType);
15658
15659 int Indices[64];
15660 // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
15661 for (unsigned l = 0; l != NumElts; l += 16) {
15662 for (unsigned i = 0; i != 16; ++i) {
15663 unsigned Idx = i + ShiftVal;
15664 if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
15665 Indices[l + i] = Idx + l;
15666 }
15667 }
15668
15669 auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
15670 Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
15671 Value *Zero = llvm::Constant::getNullValue(VecTy);
15672 Value *SV = Builder.CreateShuffleVector(
15673 Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");
15674 return Builder.CreateBitCast(SV, ResultType, "cast");
15675 }
15676 case X86::BI__builtin_ia32_kshiftliqi:
15677 case X86::BI__builtin_ia32_kshiftlihi:
15678 case X86::BI__builtin_ia32_kshiftlisi:
15679 case X86::BI__builtin_ia32_kshiftlidi: {
15680 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15681 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15682
15683 if (ShiftVal >= NumElts)
15684 return llvm::Constant::getNullValue(Ops[0]->getType());
15685
15686 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15687
15688 int Indices[64];
15689 for (unsigned i = 0; i != NumElts; ++i)
15690 Indices[i] = NumElts + i - ShiftVal;
15691
15692 Value *Zero = llvm::Constant::getNullValue(In->getType());
15693 Value *SV = Builder.CreateShuffleVector(
15694 Zero, In, ArrayRef(Indices, NumElts), "kshiftl");
15695 return Builder.CreateBitCast(SV, Ops[0]->getType());
15696 }
15697 case X86::BI__builtin_ia32_kshiftriqi:
15698 case X86::BI__builtin_ia32_kshiftrihi:
15699 case X86::BI__builtin_ia32_kshiftrisi:
15700 case X86::BI__builtin_ia32_kshiftridi: {
15701 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
15702 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15703
15704 if (ShiftVal >= NumElts)
15705 return llvm::Constant::getNullValue(Ops[0]->getType());
15706
15707 Value *In = getMaskVecValue(*this, Ops[0], NumElts);
15708
15709 int Indices[64];
15710 for (unsigned i = 0; i != NumElts; ++i)
15711 Indices[i] = i + ShiftVal;
15712
15713 Value *Zero = llvm::Constant::getNullValue(In->getType());
15714 Value *SV = Builder.CreateShuffleVector(
15715 In, Zero, ArrayRef(Indices, NumElts), "kshiftr");
15716 return Builder.CreateBitCast(SV, Ops[0]->getType());
15717 }
15718 case X86::BI__builtin_ia32_movnti:
15719 case X86::BI__builtin_ia32_movnti64:
15720 case X86::BI__builtin_ia32_movntsd:
15721 case X86::BI__builtin_ia32_movntss: {
15722 llvm::MDNode *Node = llvm::MDNode::get(
15723 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
15724
15725 Value *Ptr = Ops[0];
15726 Value *Src = Ops[1];
15727
15728 // Extract the 0'th element of the source vector.
15729 if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
15730 BuiltinID == X86::BI__builtin_ia32_movntss)
15731 Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
15732
15733 // Unaligned nontemporal store of the scalar value.
15734 StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);
15735 SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);
15736 SI->setAlignment(llvm::Align(1));
15737 return SI;
15738 }
15739 // Rotate is a special case of funnel shift - 1st 2 args are the same.
15740 case X86::BI__builtin_ia32_vprotb:
15741 case X86::BI__builtin_ia32_vprotw:
15742 case X86::BI__builtin_ia32_vprotd:
15743 case X86::BI__builtin_ia32_vprotq:
15744 case X86::BI__builtin_ia32_vprotbi:
15745 case X86::BI__builtin_ia32_vprotwi:
15746 case X86::BI__builtin_ia32_vprotdi:
15747 case X86::BI__builtin_ia32_vprotqi:
15748 case X86::BI__builtin_ia32_prold128:
15749 case X86::BI__builtin_ia32_prold256:
15750 case X86::BI__builtin_ia32_prold512:
15751 case X86::BI__builtin_ia32_prolq128:
15752 case X86::BI__builtin_ia32_prolq256:
15753 case X86::BI__builtin_ia32_prolq512:
15754 case X86::BI__builtin_ia32_prolvd128:
15755 case X86::BI__builtin_ia32_prolvd256:
15756 case X86::BI__builtin_ia32_prolvd512:
15757 case X86::BI__builtin_ia32_prolvq128:
15758 case X86::BI__builtin_ia32_prolvq256:
15759 case X86::BI__builtin_ia32_prolvq512:
15760 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
15761 case X86::BI__builtin_ia32_prord128:
15762 case X86::BI__builtin_ia32_prord256:
15763 case X86::BI__builtin_ia32_prord512:
15764 case X86::BI__builtin_ia32_prorq128:
15765 case X86::BI__builtin_ia32_prorq256:
15766 case X86::BI__builtin_ia32_prorq512:
15767 case X86::BI__builtin_ia32_prorvd128:
15768 case X86::BI__builtin_ia32_prorvd256:
15769 case X86::BI__builtin_ia32_prorvd512:
15770 case X86::BI__builtin_ia32_prorvq128:
15771 case X86::BI__builtin_ia32_prorvq256:
15772 case X86::BI__builtin_ia32_prorvq512:
15773 return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
15774 case X86::BI__builtin_ia32_selectb_128:
15775 case X86::BI__builtin_ia32_selectb_256:
15776 case X86::BI__builtin_ia32_selectb_512:
15777 case X86::BI__builtin_ia32_selectw_128:
15778 case X86::BI__builtin_ia32_selectw_256:
15779 case X86::BI__builtin_ia32_selectw_512:
15780 case X86::BI__builtin_ia32_selectd_128:
15781 case X86::BI__builtin_ia32_selectd_256:
15782 case X86::BI__builtin_ia32_selectd_512:
15783 case X86::BI__builtin_ia32_selectq_128:
15784 case X86::BI__builtin_ia32_selectq_256:
15785 case X86::BI__builtin_ia32_selectq_512:
15786 case X86::BI__builtin_ia32_selectph_128:
15787 case X86::BI__builtin_ia32_selectph_256:
15788 case X86::BI__builtin_ia32_selectph_512:
15789 case X86::BI__builtin_ia32_selectpbf_128:
15790 case X86::BI__builtin_ia32_selectpbf_256:
15791 case X86::BI__builtin_ia32_selectpbf_512:
15792 case X86::BI__builtin_ia32_selectps_128:
15793 case X86::BI__builtin_ia32_selectps_256:
15794 case X86::BI__builtin_ia32_selectps_512:
15795 case X86::BI__builtin_ia32_selectpd_128:
15796 case X86::BI__builtin_ia32_selectpd_256:
15797 case X86::BI__builtin_ia32_selectpd_512:
15798 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
15799 case X86::BI__builtin_ia32_selectsh_128:
15800 case X86::BI__builtin_ia32_selectsbf_128:
15801 case X86::BI__builtin_ia32_selectss_128:
15802 case X86::BI__builtin_ia32_selectsd_128: {
15803 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
15804 Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
15805 A = EmitX86ScalarSelect(*this, Ops[0], A, B);
15806 return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
15807 }
15808 case X86::BI__builtin_ia32_cmpb128_mask:
15809 case X86::BI__builtin_ia32_cmpb256_mask:
15810 case X86::BI__builtin_ia32_cmpb512_mask:
15811 case X86::BI__builtin_ia32_cmpw128_mask:
15812 case X86::BI__builtin_ia32_cmpw256_mask:
15813 case X86::BI__builtin_ia32_cmpw512_mask:
15814 case X86::BI__builtin_ia32_cmpd128_mask:
15815 case X86::BI__builtin_ia32_cmpd256_mask:
15816 case X86::BI__builtin_ia32_cmpd512_mask:
15817 case X86::BI__builtin_ia32_cmpq128_mask:
15818 case X86::BI__builtin_ia32_cmpq256_mask:
15819 case X86::BI__builtin_ia32_cmpq512_mask: {
15820 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15821 return EmitX86MaskedCompare(*this, CC, true, Ops);
15822 }
15823 case X86::BI__builtin_ia32_ucmpb128_mask:
15824 case X86::BI__builtin_ia32_ucmpb256_mask:
15825 case X86::BI__builtin_ia32_ucmpb512_mask:
15826 case X86::BI__builtin_ia32_ucmpw128_mask:
15827 case X86::BI__builtin_ia32_ucmpw256_mask:
15828 case X86::BI__builtin_ia32_ucmpw512_mask:
15829 case X86::BI__builtin_ia32_ucmpd128_mask:
15830 case X86::BI__builtin_ia32_ucmpd256_mask:
15831 case X86::BI__builtin_ia32_ucmpd512_mask:
15832 case X86::BI__builtin_ia32_ucmpq128_mask:
15833 case X86::BI__builtin_ia32_ucmpq256_mask:
15834 case X86::BI__builtin_ia32_ucmpq512_mask: {
15835 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
15836 return EmitX86MaskedCompare(*this, CC, false, Ops);
15837 }
15838 case X86::BI__builtin_ia32_vpcomb:
15839 case X86::BI__builtin_ia32_vpcomw:
15840 case X86::BI__builtin_ia32_vpcomd:
15841 case X86::BI__builtin_ia32_vpcomq:
15842 return EmitX86vpcom(*this, Ops, true);
15843 case X86::BI__builtin_ia32_vpcomub:
15844 case X86::BI__builtin_ia32_vpcomuw:
15845 case X86::BI__builtin_ia32_vpcomud:
15846 case X86::BI__builtin_ia32_vpcomuq:
15847 return EmitX86vpcom(*this, Ops, false);
15848
15849 case X86::BI__builtin_ia32_kortestcqi:
15850 case X86::BI__builtin_ia32_kortestchi:
15851 case X86::BI__builtin_ia32_kortestcsi:
15852 case X86::BI__builtin_ia32_kortestcdi: {
15853 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15854 Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
15855 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15856 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15857 }
15858 case X86::BI__builtin_ia32_kortestzqi:
15859 case X86::BI__builtin_ia32_kortestzhi:
15860 case X86::BI__builtin_ia32_kortestzsi:
15861 case X86::BI__builtin_ia32_kortestzdi: {
15862 Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
15863 Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
15864 Value *Cmp = Builder.CreateICmpEQ(Or, C);
15865 return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
15866 }
15867
15868 case X86::BI__builtin_ia32_ktestcqi:
15869 case X86::BI__builtin_ia32_ktestzqi:
15870 case X86::BI__builtin_ia32_ktestchi:
15871 case X86::BI__builtin_ia32_ktestzhi:
15872 case X86::BI__builtin_ia32_ktestcsi:
15873 case X86::BI__builtin_ia32_ktestzsi:
15874 case X86::BI__builtin_ia32_ktestcdi:
15875 case X86::BI__builtin_ia32_ktestzdi: {
15876 Intrinsic::ID IID;
15877 switch (BuiltinID) {
15878 default: llvm_unreachable("Unsupported intrinsic!");
15879 case X86::BI__builtin_ia32_ktestcqi:
15880 IID = Intrinsic::x86_avx512_ktestc_b;
15881 break;
15882 case X86::BI__builtin_ia32_ktestzqi:
15883 IID = Intrinsic::x86_avx512_ktestz_b;
15884 break;
15885 case X86::BI__builtin_ia32_ktestchi:
15886 IID = Intrinsic::x86_avx512_ktestc_w;
15887 break;
15888 case X86::BI__builtin_ia32_ktestzhi:
15889 IID = Intrinsic::x86_avx512_ktestz_w;
15890 break;
15891 case X86::BI__builtin_ia32_ktestcsi:
15892 IID = Intrinsic::x86_avx512_ktestc_d;
15893 break;
15894 case X86::BI__builtin_ia32_ktestzsi:
15895 IID = Intrinsic::x86_avx512_ktestz_d;
15896 break;
15897 case X86::BI__builtin_ia32_ktestcdi:
15898 IID = Intrinsic::x86_avx512_ktestc_q;
15899 break;
15900 case X86::BI__builtin_ia32_ktestzdi:
15901 IID = Intrinsic::x86_avx512_ktestz_q;
15902 break;
15903 }
15904
15905 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15906 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15907 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15908 Function *Intr = CGM.getIntrinsic(IID);
15909 return Builder.CreateCall(Intr, {LHS, RHS});
15910 }
15911
15912 case X86::BI__builtin_ia32_kaddqi:
15913 case X86::BI__builtin_ia32_kaddhi:
15914 case X86::BI__builtin_ia32_kaddsi:
15915 case X86::BI__builtin_ia32_kadddi: {
15916 Intrinsic::ID IID;
15917 switch (BuiltinID) {
15918 default: llvm_unreachable("Unsupported intrinsic!");
15919 case X86::BI__builtin_ia32_kaddqi:
15920 IID = Intrinsic::x86_avx512_kadd_b;
15921 break;
15922 case X86::BI__builtin_ia32_kaddhi:
15923 IID = Intrinsic::x86_avx512_kadd_w;
15924 break;
15925 case X86::BI__builtin_ia32_kaddsi:
15926 IID = Intrinsic::x86_avx512_kadd_d;
15927 break;
15928 case X86::BI__builtin_ia32_kadddi:
15929 IID = Intrinsic::x86_avx512_kadd_q;
15930 break;
15931 }
15932
15933 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15934 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15935 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15936 Function *Intr = CGM.getIntrinsic(IID);
15937 Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
15938 return Builder.CreateBitCast(Res, Ops[0]->getType());
15939 }
15940 case X86::BI__builtin_ia32_kandqi:
15941 case X86::BI__builtin_ia32_kandhi:
15942 case X86::BI__builtin_ia32_kandsi:
15943 case X86::BI__builtin_ia32_kanddi:
15944 return EmitX86MaskLogic(*this, Instruction::And, Ops);
15945 case X86::BI__builtin_ia32_kandnqi:
15946 case X86::BI__builtin_ia32_kandnhi:
15947 case X86::BI__builtin_ia32_kandnsi:
15948 case X86::BI__builtin_ia32_kandndi:
15949 return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
15950 case X86::BI__builtin_ia32_korqi:
15951 case X86::BI__builtin_ia32_korhi:
15952 case X86::BI__builtin_ia32_korsi:
15953 case X86::BI__builtin_ia32_kordi:
15954 return EmitX86MaskLogic(*this, Instruction::Or, Ops);
15955 case X86::BI__builtin_ia32_kxnorqi:
15956 case X86::BI__builtin_ia32_kxnorhi:
15957 case X86::BI__builtin_ia32_kxnorsi:
15958 case X86::BI__builtin_ia32_kxnordi:
15959 return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
15960 case X86::BI__builtin_ia32_kxorqi:
15961 case X86::BI__builtin_ia32_kxorhi:
15962 case X86::BI__builtin_ia32_kxorsi:
15963 case X86::BI__builtin_ia32_kxordi:
15964 return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
15965 case X86::BI__builtin_ia32_knotqi:
15966 case X86::BI__builtin_ia32_knothi:
15967 case X86::BI__builtin_ia32_knotsi:
15968 case X86::BI__builtin_ia32_knotdi: {
15969 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15970 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15971 return Builder.CreateBitCast(Builder.CreateNot(Res),
15972 Ops[0]->getType());
15973 }
15974 case X86::BI__builtin_ia32_kmovb:
15975 case X86::BI__builtin_ia32_kmovw:
15976 case X86::BI__builtin_ia32_kmovd:
15977 case X86::BI__builtin_ia32_kmovq: {
15978 // Bitcast to vXi1 type and then back to integer. This gets the mask
15979 // register type into the IR, but might be optimized out depending on
15980 // what's around it.
15981 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15982 Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
15983 return Builder.CreateBitCast(Res, Ops[0]->getType());
15984 }
15985
15986 case X86::BI__builtin_ia32_kunpckdi:
15987 case X86::BI__builtin_ia32_kunpcksi:
15988 case X86::BI__builtin_ia32_kunpckhi: {
15989 unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
15990 Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
15991 Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
15992 int Indices[64];
15993 for (unsigned i = 0; i != NumElts; ++i)
15994 Indices[i] = i;
15995
15996 // First extract half of each vector. This gives better codegen than
15997 // doing it in a single shuffle.
15998 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
15999 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
16000 // Concat the vectors.
16001 // NOTE: Operands are swapped to match the intrinsic definition.
16002 Value *Res =
16003 Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
16004 return Builder.CreateBitCast(Res, Ops[0]->getType());
16005 }
16006
16007 case X86::BI__builtin_ia32_vplzcntd_128:
16008 case X86::BI__builtin_ia32_vplzcntd_256:
16009 case X86::BI__builtin_ia32_vplzcntd_512:
16010 case X86::BI__builtin_ia32_vplzcntq_128:
16011 case X86::BI__builtin_ia32_vplzcntq_256:
16012 case X86::BI__builtin_ia32_vplzcntq_512: {
16013 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
16014 return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
16015 }
16016 case X86::BI__builtin_ia32_sqrtss:
16017 case X86::BI__builtin_ia32_sqrtsd: {
16018 Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
16019 Function *F;
16020 if (Builder.getIsFPConstrained()) {
16021 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16022 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16023 A->getType());
16024 A = Builder.CreateConstrainedFPCall(F, {A});
16025 } else {
16026 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16027 A = Builder.CreateCall(F, {A});
16028 }
16029 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16030 }
16031 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16032 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16033 case X86::BI__builtin_ia32_sqrtss_round_mask: {
16034 unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
16035 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16036 // otherwise keep the intrinsic.
16037 if (CC != 4) {
16038 Intrinsic::ID IID;
16039
16040 switch (BuiltinID) {
16041 default:
16042 llvm_unreachable("Unsupported intrinsic!");
16043 case X86::BI__builtin_ia32_sqrtsh_round_mask:
16044 IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;
16045 break;
16046 case X86::BI__builtin_ia32_sqrtsd_round_mask:
16047 IID = Intrinsic::x86_avx512_mask_sqrt_sd;
16048 break;
16049 case X86::BI__builtin_ia32_sqrtss_round_mask:
16050 IID = Intrinsic::x86_avx512_mask_sqrt_ss;
16051 break;
16052 }
16053 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16054 }
16055 Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
16056 Function *F;
16057 if (Builder.getIsFPConstrained()) {
16058 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16059 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16060 A->getType());
16061 A = Builder.CreateConstrainedFPCall(F, A);
16062 } else {
16063 F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
16064 A = Builder.CreateCall(F, A);
16065 }
16066 Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
16067 A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
16068 return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
16069 }
16070 case X86::BI__builtin_ia32_sqrtpd256:
16071 case X86::BI__builtin_ia32_sqrtpd:
16072 case X86::BI__builtin_ia32_sqrtps256:
16073 case X86::BI__builtin_ia32_sqrtps:
16074 case X86::BI__builtin_ia32_sqrtph256:
16075 case X86::BI__builtin_ia32_sqrtph:
16076 case X86::BI__builtin_ia32_sqrtph512:
16077 case X86::BI__builtin_ia32_sqrtps512:
16078 case X86::BI__builtin_ia32_sqrtpd512: {
16079 if (Ops.size() == 2) {
16080 unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
16081 // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
16082 // otherwise keep the intrinsic.
16083 if (CC != 4) {
16084 Intrinsic::ID IID;
16085
16086 switch (BuiltinID) {
16087 default:
16088 llvm_unreachable("Unsupported intrinsic!");
16089 case X86::BI__builtin_ia32_sqrtph512:
16090 IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;
16091 break;
16092 case X86::BI__builtin_ia32_sqrtps512:
16093 IID = Intrinsic::x86_avx512_sqrt_ps_512;
16094 break;
16095 case X86::BI__builtin_ia32_sqrtpd512:
16096 IID = Intrinsic::x86_avx512_sqrt_pd_512;
16097 break;
16098 }
16099 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16100 }
16101 }
16102 if (Builder.getIsFPConstrained()) {
16103 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16104 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,
16105 Ops[0]->getType());
16106 return Builder.CreateConstrainedFPCall(F, Ops[0]);
16107 } else {
16108 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
16109 return Builder.CreateCall(F, Ops[0]);
16110 }
16111 }
16112
16113 case X86::BI__builtin_ia32_pmuludq128:
16114 case X86::BI__builtin_ia32_pmuludq256:
16115 case X86::BI__builtin_ia32_pmuludq512:
16116 return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
16117
16118 case X86::BI__builtin_ia32_pmuldq128:
16119 case X86::BI__builtin_ia32_pmuldq256:
16120 case X86::BI__builtin_ia32_pmuldq512:
16121 return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
16122
16123 case X86::BI__builtin_ia32_pternlogd512_mask:
16124 case X86::BI__builtin_ia32_pternlogq512_mask:
16125 case X86::BI__builtin_ia32_pternlogd128_mask:
16126 case X86::BI__builtin_ia32_pternlogd256_mask:
16127 case X86::BI__builtin_ia32_pternlogq128_mask:
16128 case X86::BI__builtin_ia32_pternlogq256_mask:
16129 return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
16130
16131 case X86::BI__builtin_ia32_pternlogd512_maskz:
16132 case X86::BI__builtin_ia32_pternlogq512_maskz:
16133 case X86::BI__builtin_ia32_pternlogd128_maskz:
16134 case X86::BI__builtin_ia32_pternlogd256_maskz:
16135 case X86::BI__builtin_ia32_pternlogq128_maskz:
16136 case X86::BI__builtin_ia32_pternlogq256_maskz:
16137 return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
16138
16139 case X86::BI__builtin_ia32_vpshldd128:
16140 case X86::BI__builtin_ia32_vpshldd256:
16141 case X86::BI__builtin_ia32_vpshldd512:
16142 case X86::BI__builtin_ia32_vpshldq128:
16143 case X86::BI__builtin_ia32_vpshldq256:
16144 case X86::BI__builtin_ia32_vpshldq512:
16145 case X86::BI__builtin_ia32_vpshldw128:
16146 case X86::BI__builtin_ia32_vpshldw256:
16147 case X86::BI__builtin_ia32_vpshldw512:
16148 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16149
16150 case X86::BI__builtin_ia32_vpshrdd128:
16151 case X86::BI__builtin_ia32_vpshrdd256:
16152 case X86::BI__builtin_ia32_vpshrdd512:
16153 case X86::BI__builtin_ia32_vpshrdq128:
16154 case X86::BI__builtin_ia32_vpshrdq256:
16155 case X86::BI__builtin_ia32_vpshrdq512:
16156 case X86::BI__builtin_ia32_vpshrdw128:
16157 case X86::BI__builtin_ia32_vpshrdw256:
16158 case X86::BI__builtin_ia32_vpshrdw512:
16159 // Ops 0 and 1 are swapped.
16160 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16161
16162 case X86::BI__builtin_ia32_vpshldvd128:
16163 case X86::BI__builtin_ia32_vpshldvd256:
16164 case X86::BI__builtin_ia32_vpshldvd512:
16165 case X86::BI__builtin_ia32_vpshldvq128:
16166 case X86::BI__builtin_ia32_vpshldvq256:
16167 case X86::BI__builtin_ia32_vpshldvq512:
16168 case X86::BI__builtin_ia32_vpshldvw128:
16169 case X86::BI__builtin_ia32_vpshldvw256:
16170 case X86::BI__builtin_ia32_vpshldvw512:
16171 return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
16172
16173 case X86::BI__builtin_ia32_vpshrdvd128:
16174 case X86::BI__builtin_ia32_vpshrdvd256:
16175 case X86::BI__builtin_ia32_vpshrdvd512:
16176 case X86::BI__builtin_ia32_vpshrdvq128:
16177 case X86::BI__builtin_ia32_vpshrdvq256:
16178 case X86::BI__builtin_ia32_vpshrdvq512:
16179 case X86::BI__builtin_ia32_vpshrdvw128:
16180 case X86::BI__builtin_ia32_vpshrdvw256:
16181 case X86::BI__builtin_ia32_vpshrdvw512:
16182 // Ops 0 and 1 are swapped.
16183 return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
16184
16185 // Reductions
16186 case X86::BI__builtin_ia32_reduce_fadd_pd512:
16187 case X86::BI__builtin_ia32_reduce_fadd_ps512:
16188 case X86::BI__builtin_ia32_reduce_fadd_ph512:
16189 case X86::BI__builtin_ia32_reduce_fadd_ph256:
16190 case X86::BI__builtin_ia32_reduce_fadd_ph128: {
16191 Function *F =
16192 CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
16193 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16194 Builder.getFastMathFlags().setAllowReassoc();
16195 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16196 }
16197 case X86::BI__builtin_ia32_reduce_fmul_pd512:
16198 case X86::BI__builtin_ia32_reduce_fmul_ps512:
16199 case X86::BI__builtin_ia32_reduce_fmul_ph512:
16200 case X86::BI__builtin_ia32_reduce_fmul_ph256:
16201 case X86::BI__builtin_ia32_reduce_fmul_ph128: {
16202 Function *F =
16203 CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
16204 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16205 Builder.getFastMathFlags().setAllowReassoc();
16206 return Builder.CreateCall(F, {Ops[0], Ops[1]});
16207 }
16208 case X86::BI__builtin_ia32_reduce_fmax_pd512:
16209 case X86::BI__builtin_ia32_reduce_fmax_ps512:
16210 case X86::BI__builtin_ia32_reduce_fmax_ph512:
16211 case X86::BI__builtin_ia32_reduce_fmax_ph256:
16212 case X86::BI__builtin_ia32_reduce_fmax_ph128: {
16213 Function *F =
16214 CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());
16215 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16216 Builder.getFastMathFlags().setNoNaNs();
16217 return Builder.CreateCall(F, {Ops[0]});
16218 }
16219 case X86::BI__builtin_ia32_reduce_fmin_pd512:
16220 case X86::BI__builtin_ia32_reduce_fmin_ps512:
16221 case X86::BI__builtin_ia32_reduce_fmin_ph512:
16222 case X86::BI__builtin_ia32_reduce_fmin_ph256:
16223 case X86::BI__builtin_ia32_reduce_fmin_ph128: {
16224 Function *F =
16225 CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());
16226 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
16227 Builder.getFastMathFlags().setNoNaNs();
16228 return Builder.CreateCall(F, {Ops[0]});
16229 }
16230
16231 case X86::BI__builtin_ia32_rdrand16_step:
16232 case X86::BI__builtin_ia32_rdrand32_step:
16233 case X86::BI__builtin_ia32_rdrand64_step:
16234 case X86::BI__builtin_ia32_rdseed16_step:
16235 case X86::BI__builtin_ia32_rdseed32_step:
16236 case X86::BI__builtin_ia32_rdseed64_step: {
16237 Intrinsic::ID ID;
16238 switch (BuiltinID) {
16239 default: llvm_unreachable("Unsupported intrinsic!");
16240 case X86::BI__builtin_ia32_rdrand16_step:
16241 ID = Intrinsic::x86_rdrand_16;
16242 break;
16243 case X86::BI__builtin_ia32_rdrand32_step:
16244 ID = Intrinsic::x86_rdrand_32;
16245 break;
16246 case X86::BI__builtin_ia32_rdrand64_step:
16247 ID = Intrinsic::x86_rdrand_64;
16248 break;
16249 case X86::BI__builtin_ia32_rdseed16_step:
16250 ID = Intrinsic::x86_rdseed_16;
16251 break;
16252 case X86::BI__builtin_ia32_rdseed32_step:
16253 ID = Intrinsic::x86_rdseed_32;
16254 break;
16255 case X86::BI__builtin_ia32_rdseed64_step:
16256 ID = Intrinsic::x86_rdseed_64;
16257 break;
16258 }
16259
16260 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
16261 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
16262 Ops[0]);
16263 return Builder.CreateExtractValue(Call, 1);
16264 }
16265 case X86::BI__builtin_ia32_addcarryx_u32:
16266 case X86::BI__builtin_ia32_addcarryx_u64:
16267 case X86::BI__builtin_ia32_subborrow_u32:
16268 case X86::BI__builtin_ia32_subborrow_u64: {
16269 Intrinsic::ID IID;
16270 switch (BuiltinID) {
16271 default: llvm_unreachable("Unsupported intrinsic!");
16272 case X86::BI__builtin_ia32_addcarryx_u32:
16273 IID = Intrinsic::x86_addcarry_32;
16274 break;
16275 case X86::BI__builtin_ia32_addcarryx_u64:
16276 IID = Intrinsic::x86_addcarry_64;
16277 break;
16278 case X86::BI__builtin_ia32_subborrow_u32:
16279 IID = Intrinsic::x86_subborrow_32;
16280 break;
16281 case X86::BI__builtin_ia32_subborrow_u64:
16282 IID = Intrinsic::x86_subborrow_64;
16283 break;
16284 }
16285
16286 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
16287 { Ops[0], Ops[1], Ops[2] });
16288 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
16289 Ops[3]);
16290 return Builder.CreateExtractValue(Call, 0);
16291 }
16292
16293 case X86::BI__builtin_ia32_fpclassps128_mask:
16294 case X86::BI__builtin_ia32_fpclassps256_mask:
16295 case X86::BI__builtin_ia32_fpclassps512_mask:
16296 case X86::BI__builtin_ia32_fpclassph128_mask:
16297 case X86::BI__builtin_ia32_fpclassph256_mask:
16298 case X86::BI__builtin_ia32_fpclassph512_mask:
16299 case X86::BI__builtin_ia32_fpclasspd128_mask:
16300 case X86::BI__builtin_ia32_fpclasspd256_mask:
16301 case X86::BI__builtin_ia32_fpclasspd512_mask: {
16302 unsigned NumElts =
16303 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16304 Value *MaskIn = Ops[2];
16305 Ops.erase(&Ops[2]);
16306
16307 Intrinsic::ID ID;
16308 switch (BuiltinID) {
16309 default: llvm_unreachable("Unsupported intrinsic!");
16310 case X86::BI__builtin_ia32_fpclassph128_mask:
16311 ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;
16312 break;
16313 case X86::BI__builtin_ia32_fpclassph256_mask:
16314 ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;
16315 break;
16316 case X86::BI__builtin_ia32_fpclassph512_mask:
16317 ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;
16318 break;
16319 case X86::BI__builtin_ia32_fpclassps128_mask:
16320 ID = Intrinsic::x86_avx512_fpclass_ps_128;
16321 break;
16322 case X86::BI__builtin_ia32_fpclassps256_mask:
16323 ID = Intrinsic::x86_avx512_fpclass_ps_256;
16324 break;
16325 case X86::BI__builtin_ia32_fpclassps512_mask:
16326 ID = Intrinsic::x86_avx512_fpclass_ps_512;
16327 break;
16328 case X86::BI__builtin_ia32_fpclasspd128_mask:
16329 ID = Intrinsic::x86_avx512_fpclass_pd_128;
16330 break;
16331 case X86::BI__builtin_ia32_fpclasspd256_mask:
16332 ID = Intrinsic::x86_avx512_fpclass_pd_256;
16333 break;
16334 case X86::BI__builtin_ia32_fpclasspd512_mask:
16335 ID = Intrinsic::x86_avx512_fpclass_pd_512;
16336 break;
16337 }
16338
16339 Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16340 return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
16341 }
16342
16343 case X86::BI__builtin_ia32_vp2intersect_q_512:
16344 case X86::BI__builtin_ia32_vp2intersect_q_256:
16345 case X86::BI__builtin_ia32_vp2intersect_q_128:
16346 case X86::BI__builtin_ia32_vp2intersect_d_512:
16347 case X86::BI__builtin_ia32_vp2intersect_d_256:
16348 case X86::BI__builtin_ia32_vp2intersect_d_128: {
16349 unsigned NumElts =
16350 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16351 Intrinsic::ID ID;
16352
16353 switch (BuiltinID) {
16354 default: llvm_unreachable("Unsupported intrinsic!");
16355 case X86::BI__builtin_ia32_vp2intersect_q_512:
16356 ID = Intrinsic::x86_avx512_vp2intersect_q_512;
16357 break;
16358 case X86::BI__builtin_ia32_vp2intersect_q_256:
16359 ID = Intrinsic::x86_avx512_vp2intersect_q_256;
16360 break;
16361 case X86::BI__builtin_ia32_vp2intersect_q_128:
16362 ID = Intrinsic::x86_avx512_vp2intersect_q_128;
16363 break;
16364 case X86::BI__builtin_ia32_vp2intersect_d_512:
16365 ID = Intrinsic::x86_avx512_vp2intersect_d_512;
16366 break;
16367 case X86::BI__builtin_ia32_vp2intersect_d_256:
16368 ID = Intrinsic::x86_avx512_vp2intersect_d_256;
16369 break;
16370 case X86::BI__builtin_ia32_vp2intersect_d_128:
16371 ID = Intrinsic::x86_avx512_vp2intersect_d_128;
16372 break;
16373 }
16374
16375 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
16376 Value *Result = Builder.CreateExtractValue(Call, 0);
16377 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16379
16380 Result = Builder.CreateExtractValue(Call, 1);
16381 Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
16383 }
16384
16385 case X86::BI__builtin_ia32_vpmultishiftqb128:
16386 case X86::BI__builtin_ia32_vpmultishiftqb256:
16387 case X86::BI__builtin_ia32_vpmultishiftqb512: {
16388 Intrinsic::ID ID;
16389 switch (BuiltinID) {
16390 default: llvm_unreachable("Unsupported intrinsic!");
16391 case X86::BI__builtin_ia32_vpmultishiftqb128:
16392 ID = Intrinsic::x86_avx512_pmultishift_qb_128;
16393 break;
16394 case X86::BI__builtin_ia32_vpmultishiftqb256:
16395 ID = Intrinsic::x86_avx512_pmultishift_qb_256;
16396 break;
16397 case X86::BI__builtin_ia32_vpmultishiftqb512:
16398 ID = Intrinsic::x86_avx512_pmultishift_qb_512;
16399 break;
16400 }
16401
16402 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16403 }
16404
16405 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16406 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16407 case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
16408 unsigned NumElts =
16409 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16410 Value *MaskIn = Ops[2];
16411 Ops.erase(&Ops[2]);
16412
16413 Intrinsic::ID ID;
16414 switch (BuiltinID) {
16415 default: llvm_unreachable("Unsupported intrinsic!");
16416 case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
16417 ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
16418 break;
16419 case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
16420 ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
16421 break;
16422 case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
16423 ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
16424 break;
16425 }
16426
16427 Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
16428 return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
16429 }
16430
16431 // packed comparison intrinsics
16432 case X86::BI__builtin_ia32_cmpeqps:
16433 case X86::BI__builtin_ia32_cmpeqpd:
16434 return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
16435 case X86::BI__builtin_ia32_cmpltps:
16436 case X86::BI__builtin_ia32_cmpltpd:
16437 return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
16438 case X86::BI__builtin_ia32_cmpleps:
16439 case X86::BI__builtin_ia32_cmplepd:
16440 return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
16441 case X86::BI__builtin_ia32_cmpunordps:
16442 case X86::BI__builtin_ia32_cmpunordpd:
16443 return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
16444 case X86::BI__builtin_ia32_cmpneqps:
16445 case X86::BI__builtin_ia32_cmpneqpd:
16446 return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
16447 case X86::BI__builtin_ia32_cmpnltps:
16448 case X86::BI__builtin_ia32_cmpnltpd:
16449 return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
16450 case X86::BI__builtin_ia32_cmpnleps:
16451 case X86::BI__builtin_ia32_cmpnlepd:
16452 return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
16453 case X86::BI__builtin_ia32_cmpordps:
16454 case X86::BI__builtin_ia32_cmpordpd:
16455 return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
16456 case X86::BI__builtin_ia32_cmpph128_mask:
16457 case X86::BI__builtin_ia32_cmpph256_mask:
16458 case X86::BI__builtin_ia32_cmpph512_mask:
16459 case X86::BI__builtin_ia32_cmpps128_mask:
16460 case X86::BI__builtin_ia32_cmpps256_mask:
16461 case X86::BI__builtin_ia32_cmpps512_mask:
16462 case X86::BI__builtin_ia32_cmppd128_mask:
16463 case X86::BI__builtin_ia32_cmppd256_mask:
16464 case X86::BI__builtin_ia32_cmppd512_mask:
16465 case X86::BI__builtin_ia32_vcmppd256_round_mask:
16466 case X86::BI__builtin_ia32_vcmpps256_round_mask:
16467 case X86::BI__builtin_ia32_vcmpph256_round_mask:
16468 IsMaskFCmp = true;
16469 [[fallthrough]];
16470 case X86::BI__builtin_ia32_cmpps:
16471 case X86::BI__builtin_ia32_cmpps256:
16472 case X86::BI__builtin_ia32_cmppd:
16473 case X86::BI__builtin_ia32_cmppd256: {
16474 // Lowering vector comparisons to fcmp instructions, while
16475 // ignoring signalling behaviour requested
16476 // ignoring rounding mode requested
16477 // This is only possible if fp-model is not strict and FENV_ACCESS is off.
16478
16479 // The third argument is the comparison condition, and integer in the
16480 // range [0, 31]
16481 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
16482
16483 // Lowering to IR fcmp instruction.
16484 // Ignoring requested signaling behaviour,
16485 // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
16486 FCmpInst::Predicate Pred;
16487 bool IsSignaling;
16488 // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
16489 // behavior is inverted. We'll handle that after the switch.
16490 switch (CC & 0xf) {
16491 case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;
16492 case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;
16493 case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;
16494 case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;
16495 case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;
16496 case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;
16497 case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;
16498 case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;
16499 case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;
16500 case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;
16501 case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;
16502 case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
16503 case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;
16504 case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;
16505 case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;
16506 case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;
16507 default: llvm_unreachable("Unhandled CC");
16508 }
16509
16510 // Invert the signalling behavior for 16-31.
16511 if (CC & 0x10)
16512 IsSignaling = !IsSignaling;
16513
16514 // If the predicate is true or false and we're using constrained intrinsics,
16515 // we don't have a compare intrinsic we can use. Just use the legacy X86
16516 // specific intrinsic.
16517 // If the intrinsic is mask enabled and we're using constrained intrinsics,
16518 // use the legacy X86 specific intrinsic.
16519 if (Builder.getIsFPConstrained() &&
16520 (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||
16521 IsMaskFCmp)) {
16522
16523 Intrinsic::ID IID;
16524 switch (BuiltinID) {
16525 default: llvm_unreachable("Unexpected builtin");
16526 case X86::BI__builtin_ia32_cmpps:
16527 IID = Intrinsic::x86_sse_cmp_ps;
16528 break;
16529 case X86::BI__builtin_ia32_cmpps256:
16530 IID = Intrinsic::x86_avx_cmp_ps_256;
16531 break;
16532 case X86::BI__builtin_ia32_cmppd:
16533 IID = Intrinsic::x86_sse2_cmp_pd;
16534 break;
16535 case X86::BI__builtin_ia32_cmppd256:
16536 IID = Intrinsic::x86_avx_cmp_pd_256;
16537 break;
16538 case X86::BI__builtin_ia32_cmpph128_mask:
16539 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;
16540 break;
16541 case X86::BI__builtin_ia32_cmpph256_mask:
16542 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;
16543 break;
16544 case X86::BI__builtin_ia32_cmpph512_mask:
16545 IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;
16546 break;
16547 case X86::BI__builtin_ia32_cmpps512_mask:
16548 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
16549 break;
16550 case X86::BI__builtin_ia32_cmppd512_mask:
16551 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
16552 break;
16553 case X86::BI__builtin_ia32_cmpps128_mask:
16554 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
16555 break;
16556 case X86::BI__builtin_ia32_cmpps256_mask:
16557 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
16558 break;
16559 case X86::BI__builtin_ia32_cmppd128_mask:
16560 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
16561 break;
16562 case X86::BI__builtin_ia32_cmppd256_mask:
16563 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
16564 break;
16565 }
16566
16567 Function *Intr = CGM.getIntrinsic(IID);
16568 if (IsMaskFCmp) {
16569 unsigned NumElts =
16570 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16571 Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);
16572 Value *Cmp = Builder.CreateCall(Intr, Ops);
16573 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);
16574 }
16575
16576 return Builder.CreateCall(Intr, Ops);
16577 }
16578
16579 // Builtins without the _mask suffix return a vector of integers
16580 // of the same width as the input vectors
16581 if (IsMaskFCmp) {
16582 // We ignore SAE if strict FP is disabled. We only keep precise
16583 // exception behavior under strict FP.
16584 // NOTE: If strict FP does ever go through here a CGFPOptionsRAII
16585 // object will be required.
16586 unsigned NumElts =
16587 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();
16588 Value *Cmp;
16589 if (IsSignaling)
16590 Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
16591 else
16592 Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
16593 return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
16594 }
16595
16596 return getVectorFCmpIR(Pred, IsSignaling);
16597 }
16598
16599 // SSE scalar comparison intrinsics
16600 case X86::BI__builtin_ia32_cmpeqss:
16601 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
16602 case X86::BI__builtin_ia32_cmpltss:
16603 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
16604 case X86::BI__builtin_ia32_cmpless:
16605 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
16606 case X86::BI__builtin_ia32_cmpunordss:
16607 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
16608 case X86::BI__builtin_ia32_cmpneqss:
16609 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
16610 case X86::BI__builtin_ia32_cmpnltss:
16611 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
16612 case X86::BI__builtin_ia32_cmpnless:
16613 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
16614 case X86::BI__builtin_ia32_cmpordss:
16615 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
16616 case X86::BI__builtin_ia32_cmpeqsd:
16617 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
16618 case X86::BI__builtin_ia32_cmpltsd:
16619 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
16620 case X86::BI__builtin_ia32_cmplesd:
16621 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
16622 case X86::BI__builtin_ia32_cmpunordsd:
16623 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
16624 case X86::BI__builtin_ia32_cmpneqsd:
16625 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
16626 case X86::BI__builtin_ia32_cmpnltsd:
16627 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
16628 case X86::BI__builtin_ia32_cmpnlesd:
16629 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
16630 case X86::BI__builtin_ia32_cmpordsd:
16631 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
16632
16633 // f16c half2float intrinsics
16634 case X86::BI__builtin_ia32_vcvtph2ps:
16635 case X86::BI__builtin_ia32_vcvtph2ps256:
16636 case X86::BI__builtin_ia32_vcvtph2ps_mask:
16637 case X86::BI__builtin_ia32_vcvtph2ps256_mask:
16638 case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
16639 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
16640 return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));
16641 }
16642
16643 // AVX512 bf16 intrinsics
16644 case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
16645 Ops[2] = getMaskVecValue(
16646 *this, Ops[2],
16647 cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());
16648 Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
16649 return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16650 }
16651 case X86::BI__builtin_ia32_cvtsbf162ss_32:
16652 return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());
16653
16654 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16655 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
16656 Intrinsic::ID IID;
16657 switch (BuiltinID) {
16658 default: llvm_unreachable("Unsupported intrinsic!");
16659 case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
16660 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
16661 break;
16662 case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
16663 IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
16664 break;
16665 }
16666 Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
16667 return EmitX86Select(*this, Ops[2], Res, Ops[1]);
16668 }
16669
16670 case X86::BI__cpuid:
16671 case X86::BI__cpuidex: {
16672 Value *FuncId = EmitScalarExpr(E->getArg(1));
16673 Value *SubFuncId = BuiltinID == X86::BI__cpuidex
16674 ? EmitScalarExpr(E->getArg(2))
16675 : llvm::ConstantInt::get(Int32Ty, 0);
16676
16677 llvm::StructType *CpuidRetTy =
16678 llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
16679 llvm::FunctionType *FTy =
16680 llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
16681
16682 StringRef Asm, Constraints;
16683 if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
16684 Asm = "cpuid";
16685 Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
16686 } else {
16687 // x86-64 uses %rbx as the base register, so preserve it.
16688 Asm = "xchgq %rbx, ${1:q}\n"
16689 "cpuid\n"
16690 "xchgq %rbx, ${1:q}";
16691 Constraints = "={ax},=r,={cx},={dx},0,2";
16692 }
16693
16694 llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
16695 /*hasSideEffects=*/false);
16696 Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
16697 Value *BasePtr = EmitScalarExpr(E->getArg(0));
16698 Value *Store = nullptr;
16699 for (unsigned i = 0; i < 4; i++) {
16700 Value *Extracted = Builder.CreateExtractValue(IACall, i);
16701 Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
16702 Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
16703 }
16704
16705 // Return the last store instruction to signal that we have emitted the
16706 // the intrinsic.
16707 return Store;
16708 }
16709
16710 case X86::BI__emul:
16711 case X86::BI__emulu: {
16712 llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
16713 bool isSigned = (BuiltinID == X86::BI__emul);
16714 Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
16715 Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
16716 return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
16717 }
16718 case X86::BI__mulh:
16719 case X86::BI__umulh:
16720 case X86::BI_mul128:
16721 case X86::BI_umul128: {
16722 llvm::Type *ResType = ConvertType(E->getType());
16723 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
16724
16725 bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
16726 Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
16727 Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
16728
16729 Value *MulResult, *HigherBits;
16730 if (IsSigned) {
16731 MulResult = Builder.CreateNSWMul(LHS, RHS);
16732 HigherBits = Builder.CreateAShr(MulResult, 64);
16733 } else {
16734 MulResult = Builder.CreateNUWMul(LHS, RHS);
16735 HigherBits = Builder.CreateLShr(MulResult, 64);
16736 }
16737 HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
16738
16739 if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
16740 return HigherBits;
16741
16742 Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
16743 Builder.CreateStore(HigherBits, HighBitsAddress);
16744 return Builder.CreateIntCast(MulResult, ResType, IsSigned);
16745 }
16746
16747 case X86::BI__faststorefence: {
16748 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16749 llvm::SyncScope::System);
16750 }
16751 case X86::BI__shiftleft128:
16752 case X86::BI__shiftright128: {
16753 llvm::Function *F = CGM.getIntrinsic(
16754 BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
16755 Int64Ty);
16756 // Flip low/high ops and zero-extend amount to matching type.
16757 // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)
16758 // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)
16759 std::swap(Ops[0], Ops[1]);
16760 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
16761 return Builder.CreateCall(F, Ops);
16762 }
16763 case X86::BI_ReadWriteBarrier:
16764 case X86::BI_ReadBarrier:
16765 case X86::BI_WriteBarrier: {
16766 return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
16767 llvm::SyncScope::SingleThread);
16768 }
16769
16770 case X86::BI_AddressOfReturnAddress: {
16771 Function *F =
16772 CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);
16773 return Builder.CreateCall(F);
16774 }
16775 case X86::BI__stosb: {
16776 // We treat __stosb as a volatile memset - it may not generate "rep stosb"
16777 // instruction, but it will create a memset that won't be optimized away.
16778 return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);
16779 }
16780 case X86::BI__ud2:
16781 // llvm.trap makes a ud2a instruction on x86.
16782 return EmitTrapCall(Intrinsic::trap);
16783 case X86::BI__int2c: {
16784 // This syscall signals a driver assertion failure in x86 NT kernels.
16785 llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
16786 llvm::InlineAsm *IA =
16787 llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
16788 llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
16789 getLLVMContext(), llvm::AttributeList::FunctionIndex,
16790 llvm::Attribute::NoReturn);
16791 llvm::CallInst *CI = Builder.CreateCall(IA);
16792 CI->setAttributes(NoReturnAttr);
16793 return CI;
16794 }
16795 case X86::BI__readfsbyte:
16796 case X86::BI__readfsword:
16797 case X86::BI__readfsdword:
16798 case X86::BI__readfsqword: {
16799 llvm::Type *IntTy = ConvertType(E->getType());
16800 Value *Ptr = Builder.CreateIntToPtr(
16801 Ops[0], llvm::PointerType::get(getLLVMContext(), 257));
16802 LoadInst *Load = Builder.CreateAlignedLoad(
16803 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16804 Load->setVolatile(true);
16805 return Load;
16806 }
16807 case X86::BI__readgsbyte:
16808 case X86::BI__readgsword:
16809 case X86::BI__readgsdword:
16810 case X86::BI__readgsqword: {
16811 llvm::Type *IntTy = ConvertType(E->getType());
16812 Value *Ptr = Builder.CreateIntToPtr(
16813 Ops[0], llvm::PointerType::get(getLLVMContext(), 256));
16814 LoadInst *Load = Builder.CreateAlignedLoad(
16815 IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
16816 Load->setVolatile(true);
16817 return Load;
16818 }
16819 case X86::BI__builtin_ia32_encodekey128_u32: {
16820 Intrinsic::ID IID = Intrinsic::x86_encodekey128;
16821
16822 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});
16823
16824 for (int i = 0; i < 3; ++i) {
16825 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16826 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);
16827 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16828 }
16829
16830 return Builder.CreateExtractValue(Call, 0);
16831 }
16832 case X86::BI__builtin_ia32_encodekey256_u32: {
16833 Intrinsic::ID IID = Intrinsic::x86_encodekey256;
16834
16835 Value *Call =
16836 Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});
16837
16838 for (int i = 0; i < 4; ++i) {
16839 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16840 Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);
16841 Builder.CreateAlignedStore(Extract, Ptr, Align(1));
16842 }
16843
16844 return Builder.CreateExtractValue(Call, 0);
16845 }
16846 case X86::BI__builtin_ia32_aesenc128kl_u8:
16847 case X86::BI__builtin_ia32_aesdec128kl_u8:
16848 case X86::BI__builtin_ia32_aesenc256kl_u8:
16849 case X86::BI__builtin_ia32_aesdec256kl_u8: {
16850 Intrinsic::ID IID;
16851 StringRef BlockName;
16852 switch (BuiltinID) {
16853 default:
16854 llvm_unreachable("Unexpected builtin");
16855 case X86::BI__builtin_ia32_aesenc128kl_u8:
16856 IID = Intrinsic::x86_aesenc128kl;
16857 BlockName = "aesenc128kl";
16858 break;
16859 case X86::BI__builtin_ia32_aesdec128kl_u8:
16860 IID = Intrinsic::x86_aesdec128kl;
16861 BlockName = "aesdec128kl";
16862 break;
16863 case X86::BI__builtin_ia32_aesenc256kl_u8:
16864 IID = Intrinsic::x86_aesenc256kl;
16865 BlockName = "aesenc256kl";
16866 break;
16867 case X86::BI__builtin_ia32_aesdec256kl_u8:
16868 IID = Intrinsic::x86_aesdec256kl;
16869 BlockName = "aesdec256kl";
16870 break;
16871 }
16872
16873 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});
16874
16875 BasicBlock *NoError =
16876 createBasicBlock(BlockName + "_no_error", this->CurFn);
16877 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16878 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16879
16880 Value *Ret = Builder.CreateExtractValue(Call, 0);
16881 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16882 Value *Out = Builder.CreateExtractValue(Call, 1);
16883 Builder.CreateCondBr(Succ, NoError, Error);
16884
16885 Builder.SetInsertPoint(NoError);
16887 Builder.CreateBr(End);
16888
16889 Builder.SetInsertPoint(Error);
16890 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16891 Builder.CreateDefaultAlignedStore(Zero, Ops[0]);
16892 Builder.CreateBr(End);
16893
16894 Builder.SetInsertPoint(End);
16895 return Builder.CreateExtractValue(Call, 0);
16896 }
16897 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16898 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16899 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16900 case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
16901 Intrinsic::ID IID;
16902 StringRef BlockName;
16903 switch (BuiltinID) {
16904 case X86::BI__builtin_ia32_aesencwide128kl_u8:
16905 IID = Intrinsic::x86_aesencwide128kl;
16906 BlockName = "aesencwide128kl";
16907 break;
16908 case X86::BI__builtin_ia32_aesdecwide128kl_u8:
16909 IID = Intrinsic::x86_aesdecwide128kl;
16910 BlockName = "aesdecwide128kl";
16911 break;
16912 case X86::BI__builtin_ia32_aesencwide256kl_u8:
16913 IID = Intrinsic::x86_aesencwide256kl;
16914 BlockName = "aesencwide256kl";
16915 break;
16916 case X86::BI__builtin_ia32_aesdecwide256kl_u8:
16917 IID = Intrinsic::x86_aesdecwide256kl;
16918 BlockName = "aesdecwide256kl";
16919 break;
16920 }
16921
16922 llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);
16923 Value *InOps[9];
16924 InOps[0] = Ops[2];
16925 for (int i = 0; i != 8; ++i) {
16926 Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);
16927 InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));
16928 }
16929
16930 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);
16931
16932 BasicBlock *NoError =
16933 createBasicBlock(BlockName + "_no_error", this->CurFn);
16934 BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);
16935 BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);
16936
16937 Value *Ret = Builder.CreateExtractValue(Call, 0);
16938 Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());
16939 Builder.CreateCondBr(Succ, NoError, Error);
16940
16941 Builder.SetInsertPoint(NoError);
16942 for (int i = 0; i != 8; ++i) {
16943 Value *Extract = Builder.CreateExtractValue(Call, i + 1);
16944 Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);
16945 Builder.CreateAlignedStore(Extract, Ptr, Align(16));
16946 }
16947 Builder.CreateBr(End);
16948
16949 Builder.SetInsertPoint(Error);
16950 for (int i = 0; i != 8; ++i) {
16951 Value *Out = Builder.CreateExtractValue(Call, i + 1);
16952 Constant *Zero = llvm::Constant::getNullValue(Out->getType());
16953 Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);
16954 Builder.CreateAlignedStore(Zero, Ptr, Align(16));
16955 }
16956 Builder.CreateBr(End);
16957
16958 Builder.SetInsertPoint(End);
16959 return Builder.CreateExtractValue(Call, 0);
16960 }
16961 case X86::BI__builtin_ia32_vfcmaddcph512_mask:
16962 IsConjFMA = true;
16963 [[fallthrough]];
16964 case X86::BI__builtin_ia32_vfmaddcph512_mask: {
16965 Intrinsic::ID IID = IsConjFMA
16966 ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
16967 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
16968 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16969 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16970 }
16971 case X86::BI__builtin_ia32_vfcmaddcph256_round_mask:
16972 IsConjFMA = true;
16973 LLVM_FALLTHROUGH;
16974 case X86::BI__builtin_ia32_vfmaddcph256_round_mask: {
16975 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx10_mask_vfcmaddcph256
16976 : Intrinsic::x86_avx10_mask_vfmaddcph256;
16977 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16978 return EmitX86Select(*this, Ops[3], Call, Ops[0]);
16979 }
16980 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
16981 IsConjFMA = true;
16982 [[fallthrough]];
16983 case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
16984 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16985 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16986 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16987 Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
16988 return EmitX86Select(*this, And, Call, Ops[0]);
16989 }
16990 case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
16991 IsConjFMA = true;
16992 [[fallthrough]];
16993 case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
16994 Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
16995 : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
16996 Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
16997 static constexpr int Mask[] = {0, 5, 6, 7};
16998 return Builder.CreateShuffleVector(Call, Ops[2], Mask);
16999 }
17000 case X86::BI__builtin_ia32_prefetchi:
17001 return Builder.CreateCall(
17002 CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),
17003 {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],
17004 llvm::ConstantInt::get(Int32Ty, 0)});
17005 }
17006}
17007
17008Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
17009 const CallExpr *E) {
17010 // Do not emit the builtin arguments in the arguments of a function call,
17011 // because the evaluation order of function arguments is not specified in C++.
17012 // This is important when testing to ensure the arguments are emitted in the
17013 // same order every time. Eg:
17014 // Instead of:
17015 // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),
17016 // EmitScalarExpr(E->getArg(1)), "swdiv");
17017 // Use:
17018 // Value *Op0 = EmitScalarExpr(E->getArg(0));
17019 // Value *Op1 = EmitScalarExpr(E->getArg(1));
17020 // return Builder.CreateFDiv(Op0, Op1, "swdiv")
17021
17022 Intrinsic::ID ID = Intrinsic::not_intrinsic;
17023
17024#include "llvm/TargetParser/PPCTargetParser.def"
17025 auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,
17026 unsigned Mask, CmpInst::Predicate CompOp,
17027 unsigned OpValue) -> Value * {
17028 if (SupportMethod == BUILTIN_PPC_FALSE)
17029 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17030
17031 if (SupportMethod == BUILTIN_PPC_TRUE)
17032 return llvm::ConstantInt::getTrue(ConvertType(E->getType()));
17033
17034 assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");
17035
17036 llvm::Value *FieldValue = nullptr;
17037 if (SupportMethod == USE_SYS_CONF) {
17038 llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);
17039 llvm::Constant *SysConf =
17040 CGM.CreateRuntimeVariable(STy, "_system_configuration");
17041
17042 // Grab the appropriate field from _system_configuration.
17043 llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
17044 ConstantInt::get(Int32Ty, FieldIdx)};
17045
17046 FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);
17047 FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,
17049 } else if (SupportMethod == SYS_CALL) {
17050 llvm::FunctionType *FTy =
17051 llvm::FunctionType::get(Int64Ty, Int32Ty, false);
17052 llvm::FunctionCallee Func =
17053 CGM.CreateRuntimeFunction(FTy, "getsystemcfg");
17054
17055 FieldValue =
17056 Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});
17057 }
17058 assert(FieldValue &&
17059 "SupportMethod value is not defined in PPCTargetParser.def.");
17060
17061 if (Mask)
17062 FieldValue = Builder.CreateAnd(FieldValue, Mask);
17063
17064 llvm::Type *ValueType = FieldValue->getType();
17065 bool IsValueType64Bit = ValueType->isIntegerTy(64);
17066 assert(
17067 (IsValueType64Bit || ValueType->isIntegerTy(32)) &&
17068 "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");
17069
17070 return Builder.CreateICmp(
17071 CompOp, FieldValue,
17072 ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));
17073 };
17074
17075 switch (BuiltinID) {
17076 default: return nullptr;
17077
17078 case Builtin::BI__builtin_cpu_is: {
17079 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17080 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17081 llvm::Triple Triple = getTarget().getTriple();
17082
17083 unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;
17084 typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;
17085
17086 std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =
17087 static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)
17088#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \
17089 AIXID) \
17090 .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})
17091#include "llvm/TargetParser/PPCTargetParser.def"
17092 .Default({BUILTIN_PPC_UNSUPPORTED, 0,
17093 BUILTIN_PPC_UNSUPPORTED, 0}));
17094
17095 if (Triple.isOSAIX()) {
17096 assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17097 "Invalid CPU name. Missed by SemaChecking?");
17098 return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,
17099 ICmpInst::ICMP_EQ, AIXIDValue);
17100 }
17101
17102 assert(Triple.isOSLinux() &&
17103 "__builtin_cpu_is() is only supported for AIX and Linux.");
17104
17105 assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&
17106 "Invalid CPU name. Missed by SemaChecking?");
17107
17108 if (LinuxSupportMethod == BUILTIN_PPC_FALSE)
17109 return llvm::ConstantInt::getFalse(ConvertType(E->getType()));
17110
17111 Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);
17112 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17113 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");
17114 return Builder.CreateICmpEQ(TheCall,
17115 llvm::ConstantInt::get(Int32Ty, LinuxIDValue));
17116 }
17117 case Builtin::BI__builtin_cpu_supports: {
17118 llvm::Triple Triple = getTarget().getTriple();
17119 const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
17120 StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
17121 if (Triple.isOSAIX()) {
17122 unsigned SupportMethod, FieldIdx, Mask, Value;
17123 CmpInst::Predicate CompOp;
17124 typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,
17125 unsigned>
17126 CPUSupportType;
17127 std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =
17128 static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)
17129#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \
17130 VALUE) \
17131 .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})
17132#include "llvm/TargetParser/PPCTargetParser.def"
17133 .Default({BUILTIN_PPC_FALSE, 0, 0,
17134 CmpInst::Predicate(), 0}));
17135 return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,
17136 Value);
17137 }
17138
17139 assert(Triple.isOSLinux() &&
17140 "__builtin_cpu_supports() is only supported for AIX and Linux.");
17141 unsigned FeatureWord;
17142 unsigned BitMask;
17143 std::tie(FeatureWord, BitMask) =
17144 StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
17145#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \
17146 .Case(Name, {FA_WORD, Bitmask})
17147#include "llvm/TargetParser/PPCTargetParser.def"
17148 .Default({0, 0});
17149 if (!BitMask)
17150 return Builder.getFalse();
17151 Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);
17152 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);
17153 Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");
17154 Value *Mask =
17155 Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));
17156 return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));
17157#undef PPC_FAWORD_HWCAP
17158#undef PPC_FAWORD_HWCAP2
17159#undef PPC_FAWORD_CPUID
17160 }
17161
17162 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
17163 // call __builtin_readcyclecounter.
17164 case PPC::BI__builtin_ppc_get_timebase:
17165 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
17166
17167 // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
17168 case PPC::BI__builtin_altivec_lvx:
17169 case PPC::BI__builtin_altivec_lvxl:
17170 case PPC::BI__builtin_altivec_lvebx:
17171 case PPC::BI__builtin_altivec_lvehx:
17172 case PPC::BI__builtin_altivec_lvewx:
17173 case PPC::BI__builtin_altivec_lvsl:
17174 case PPC::BI__builtin_altivec_lvsr:
17175 case PPC::BI__builtin_vsx_lxvd2x:
17176 case PPC::BI__builtin_vsx_lxvw4x:
17177 case PPC::BI__builtin_vsx_lxvd2x_be:
17178 case PPC::BI__builtin_vsx_lxvw4x_be:
17179 case PPC::BI__builtin_vsx_lxvl:
17180 case PPC::BI__builtin_vsx_lxvll:
17181 {
17183 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17184 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17185 if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
17186 BuiltinID == PPC::BI__builtin_vsx_lxvll)) {
17187 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
17188 Ops.pop_back();
17189 }
17190
17191 switch (BuiltinID) {
17192 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
17193 case PPC::BI__builtin_altivec_lvx:
17194 ID = Intrinsic::ppc_altivec_lvx;
17195 break;
17196 case PPC::BI__builtin_altivec_lvxl:
17197 ID = Intrinsic::ppc_altivec_lvxl;
17198 break;
17199 case PPC::BI__builtin_altivec_lvebx:
17200 ID = Intrinsic::ppc_altivec_lvebx;
17201 break;
17202 case PPC::BI__builtin_altivec_lvehx:
17203 ID = Intrinsic::ppc_altivec_lvehx;
17204 break;
17205 case PPC::BI__builtin_altivec_lvewx:
17206 ID = Intrinsic::ppc_altivec_lvewx;
17207 break;
17208 case PPC::BI__builtin_altivec_lvsl:
17209 ID = Intrinsic::ppc_altivec_lvsl;
17210 break;
17211 case PPC::BI__builtin_altivec_lvsr:
17212 ID = Intrinsic::ppc_altivec_lvsr;
17213 break;
17214 case PPC::BI__builtin_vsx_lxvd2x:
17215 ID = Intrinsic::ppc_vsx_lxvd2x;
17216 break;
17217 case PPC::BI__builtin_vsx_lxvw4x:
17218 ID = Intrinsic::ppc_vsx_lxvw4x;
17219 break;
17220 case PPC::BI__builtin_vsx_lxvd2x_be:
17221 ID = Intrinsic::ppc_vsx_lxvd2x_be;
17222 break;
17223 case PPC::BI__builtin_vsx_lxvw4x_be:
17224 ID = Intrinsic::ppc_vsx_lxvw4x_be;
17225 break;
17226 case PPC::BI__builtin_vsx_lxvl:
17227 ID = Intrinsic::ppc_vsx_lxvl;
17228 break;
17229 case PPC::BI__builtin_vsx_lxvll:
17230 ID = Intrinsic::ppc_vsx_lxvll;
17231 break;
17232 }
17233 llvm::Function *F = CGM.getIntrinsic(ID);
17234 return Builder.CreateCall(F, Ops, "");
17235 }
17236
17237 // vec_st, vec_xst_be
17238 case PPC::BI__builtin_altivec_stvx:
17239 case PPC::BI__builtin_altivec_stvxl:
17240 case PPC::BI__builtin_altivec_stvebx:
17241 case PPC::BI__builtin_altivec_stvehx:
17242 case PPC::BI__builtin_altivec_stvewx:
17243 case PPC::BI__builtin_vsx_stxvd2x:
17244 case PPC::BI__builtin_vsx_stxvw4x:
17245 case PPC::BI__builtin_vsx_stxvd2x_be:
17246 case PPC::BI__builtin_vsx_stxvw4x_be:
17247 case PPC::BI__builtin_vsx_stxvl:
17248 case PPC::BI__builtin_vsx_stxvll:
17249 {
17251 Ops.push_back(EmitScalarExpr(E->getArg(0)));
17252 Ops.push_back(EmitScalarExpr(E->getArg(1)));
17253 Ops.push_back(EmitScalarExpr(E->getArg(2)));
17254 if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
17255 BuiltinID == PPC::BI__builtin_vsx_stxvll)) {
17256 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
17257 Ops.pop_back();
17258 }
17259
17260 switch (BuiltinID) {
17261 default: llvm_unreachable("Unsupported st intrinsic!");
17262 case PPC::BI__builtin_altivec_stvx:
17263 ID = Intrinsic::ppc_altivec_stvx;
17264 break;
17265 case PPC::BI__builtin_altivec_stvxl:
17266 ID = Intrinsic::ppc_altivec_stvxl;
17267 break;
17268 case PPC::BI__builtin_altivec_stvebx:
17269 ID = Intrinsic::ppc_altivec_stvebx;
17270 break;
17271 case PPC::BI__builtin_altivec_stvehx:
17272 ID = Intrinsic::ppc_altivec_stvehx;
17273 break;
17274 case PPC::BI__builtin_altivec_stvewx:
17275 ID = Intrinsic::ppc_altivec_stvewx;
17276 break;
17277 case PPC::BI__builtin_vsx_stxvd2x:
17278 ID = Intrinsic::ppc_vsx_stxvd2x;
17279 break;
17280 case PPC::BI__builtin_vsx_stxvw4x:
17281 ID = Intrinsic::ppc_vsx_stxvw4x;
17282 break;
17283 case PPC::BI__builtin_vsx_stxvd2x_be:
17284 ID = Intrinsic::ppc_vsx_stxvd2x_be;
17285 break;
17286 case PPC::BI__builtin_vsx_stxvw4x_be:
17287 ID = Intrinsic::ppc_vsx_stxvw4x_be;
17288 break;
17289 case PPC::BI__builtin_vsx_stxvl:
17290 ID = Intrinsic::ppc_vsx_stxvl;
17291 break;
17292 case PPC::BI__builtin_vsx_stxvll:
17293 ID = Intrinsic::ppc_vsx_stxvll;
17294 break;
17295 }
17296 llvm::Function *F = CGM.getIntrinsic(ID);
17297 return Builder.CreateCall(F, Ops, "");
17298 }
17299 case PPC::BI__builtin_vsx_ldrmb: {
17300 // Essentially boils down to performing an unaligned VMX load sequence so
17301 // as to avoid crossing a page boundary and then shuffling the elements
17302 // into the right side of the vector register.
17303 Value *Op0 = EmitScalarExpr(E->getArg(0));
17304 Value *Op1 = EmitScalarExpr(E->getArg(1));
17305 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17306 llvm::Type *ResTy = ConvertType(E->getType());
17307 bool IsLE = getTarget().isLittleEndian();
17308
17309 // If the user wants the entire vector, just load the entire vector.
17310 if (NumBytes == 16) {
17311 Value *LD =
17313 if (!IsLE)
17314 return LD;
17315
17316 // Reverse the bytes on LE.
17317 SmallVector<int, 16> RevMask;
17318 for (int Idx = 0; Idx < 16; Idx++)
17319 RevMask.push_back(15 - Idx);
17320 return Builder.CreateShuffleVector(LD, LD, RevMask);
17321 }
17322
17323 llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);
17324 llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr
17325 : Intrinsic::ppc_altivec_lvsl);
17326 llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);
17327 Value *HiMem = Builder.CreateGEP(
17328 Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));
17329 Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");
17330 Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");
17331 Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");
17332
17333 Op0 = IsLE ? HiLd : LoLd;
17334 Op1 = IsLE ? LoLd : HiLd;
17335 Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");
17336 Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());
17337
17338 if (IsLE) {
17339 SmallVector<int, 16> Consts;
17340 for (int Idx = 0; Idx < 16; Idx++) {
17341 int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)
17342 : 16 - (NumBytes - Idx);
17343 Consts.push_back(Val);
17344 }
17345 return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),
17346 Zero, Consts);
17347 }
17349 for (int Idx = 0; Idx < 16; Idx++)
17350 Consts.push_back(Builder.getInt8(NumBytes + Idx));
17351 Value *Mask2 = ConstantVector::get(Consts);
17352 return Builder.CreateBitCast(
17353 Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);
17354 }
17355 case PPC::BI__builtin_vsx_strmb: {
17356 Value *Op0 = EmitScalarExpr(E->getArg(0));
17357 Value *Op1 = EmitScalarExpr(E->getArg(1));
17358 Value *Op2 = EmitScalarExpr(E->getArg(2));
17359 int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();
17360 bool IsLE = getTarget().isLittleEndian();
17361 auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {
17362 // Storing the whole vector, simply store it on BE and reverse bytes and
17363 // store on LE.
17364 if (Width == 16) {
17365 Value *StVec = Op2;
17366 if (IsLE) {
17367 SmallVector<int, 16> RevMask;
17368 for (int Idx = 0; Idx < 16; Idx++)
17369 RevMask.push_back(15 - Idx);
17370 StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);
17371 }
17372 return Builder.CreateStore(
17373 StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));
17374 }
17375 auto *ConvTy = Int64Ty;
17376 unsigned NumElts = 0;
17377 switch (Width) {
17378 default:
17379 llvm_unreachable("width for stores must be a power of 2");
17380 case 8:
17381 ConvTy = Int64Ty;
17382 NumElts = 2;
17383 break;
17384 case 4:
17385 ConvTy = Int32Ty;
17386 NumElts = 4;
17387 break;
17388 case 2:
17389 ConvTy = Int16Ty;
17390 NumElts = 8;
17391 break;
17392 case 1:
17393 ConvTy = Int8Ty;
17394 NumElts = 16;
17395 break;
17396 }
17397 Value *Vec = Builder.CreateBitCast(
17398 Op2, llvm::FixedVectorType::get(ConvTy, NumElts));
17399 Value *Ptr =
17400 Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));
17401 Value *Elt = Builder.CreateExtractElement(Vec, EltNo);
17402 if (IsLE && Width > 1) {
17403 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);
17404 Elt = Builder.CreateCall(F, Elt);
17405 }
17406 return Builder.CreateStore(
17407 Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));
17408 };
17409 unsigned Stored = 0;
17410 unsigned RemainingBytes = NumBytes;
17411 Value *Result;
17412 if (NumBytes == 16)
17413 return StoreSubVec(16, 0, 0);
17414 if (NumBytes >= 8) {
17415 Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);
17416 RemainingBytes -= 8;
17417 Stored += 8;
17418 }
17419 if (RemainingBytes >= 4) {
17420 Result = StoreSubVec(4, NumBytes - Stored - 4,
17421 IsLE ? (Stored >> 2) : 3 - (Stored >> 2));
17422 RemainingBytes -= 4;
17423 Stored += 4;
17424 }
17425 if (RemainingBytes >= 2) {
17426 Result = StoreSubVec(2, NumBytes - Stored - 2,
17427 IsLE ? (Stored >> 1) : 7 - (Stored >> 1));
17428 RemainingBytes -= 2;
17429 Stored += 2;
17430 }
17431 if (RemainingBytes)
17432 Result =
17433 StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);
17434 return Result;
17435 }
17436 // Square root
17437 case PPC::BI__builtin_vsx_xvsqrtsp:
17438 case PPC::BI__builtin_vsx_xvsqrtdp: {
17439 llvm::Type *ResultType = ConvertType(E->getType());
17440 Value *X = EmitScalarExpr(E->getArg(0));
17441 if (Builder.getIsFPConstrained()) {
17442 llvm::Function *F = CGM.getIntrinsic(
17443 Intrinsic::experimental_constrained_sqrt, ResultType);
17444 return Builder.CreateConstrainedFPCall(F, X);
17445 } else {
17446 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17447 return Builder.CreateCall(F, X);
17448 }
17449 }
17450 // Count leading zeros
17451 case PPC::BI__builtin_altivec_vclzb:
17452 case PPC::BI__builtin_altivec_vclzh:
17453 case PPC::BI__builtin_altivec_vclzw:
17454 case PPC::BI__builtin_altivec_vclzd: {
17455 llvm::Type *ResultType = ConvertType(E->getType());
17456 Value *X = EmitScalarExpr(E->getArg(0));
17457 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17458 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
17459 return Builder.CreateCall(F, {X, Undef});
17460 }
17461 case PPC::BI__builtin_altivec_vctzb:
17462 case PPC::BI__builtin_altivec_vctzh:
17463 case PPC::BI__builtin_altivec_vctzw:
17464 case PPC::BI__builtin_altivec_vctzd: {
17465 llvm::Type *ResultType = ConvertType(E->getType());
17466 Value *X = EmitScalarExpr(E->getArg(0));
17467 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
17468 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
17469 return Builder.CreateCall(F, {X, Undef});
17470 }
17471 case PPC::BI__builtin_altivec_vinsd:
17472 case PPC::BI__builtin_altivec_vinsw:
17473 case PPC::BI__builtin_altivec_vinsd_elt:
17474 case PPC::BI__builtin_altivec_vinsw_elt: {
17475 llvm::Type *ResultType = ConvertType(E->getType());
17476 Value *Op0 = EmitScalarExpr(E->getArg(0));
17477 Value *Op1 = EmitScalarExpr(E->getArg(1));
17478 Value *Op2 = EmitScalarExpr(E->getArg(2));
17479
17480 bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17481 BuiltinID == PPC::BI__builtin_altivec_vinsd);
17482
17483 bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||
17484 BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);
17485
17486 // The third argument must be a compile time constant.
17487 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17488 assert(ArgCI &&
17489 "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
17490
17491 // Valid value for the third argument is dependent on the input type and
17492 // builtin called.
17493 int ValidMaxValue = 0;
17494 if (IsUnaligned)
17495 ValidMaxValue = (Is32bit) ? 12 : 8;
17496 else
17497 ValidMaxValue = (Is32bit) ? 3 : 1;
17498
17499 // Get value of third argument.
17500 int64_t ConstArg = ArgCI->getSExtValue();
17501
17502 // Compose range checking error message.
17503 std::string RangeErrMsg = IsUnaligned ? "byte" : "element";
17504 RangeErrMsg += " number " + llvm::to_string(ConstArg);
17505 RangeErrMsg += " is outside of the valid range [0, ";
17506 RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";
17507
17508 // Issue error if third argument is not within the valid range.
17509 if (ConstArg < 0 || ConstArg > ValidMaxValue)
17510 CGM.Error(E->getExprLoc(), RangeErrMsg);
17511
17512 // Input to vec_replace_elt is an element index, convert to byte index.
17513 if (!IsUnaligned) {
17514 ConstArg *= Is32bit ? 4 : 8;
17515 // Fix the constant according to endianess.
17516 if (getTarget().isLittleEndian())
17517 ConstArg = (Is32bit ? 12 : 8) - ConstArg;
17518 }
17519
17520 ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;
17521 Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);
17522 // Casting input to vector int as per intrinsic definition.
17523 Op0 =
17524 Is32bit
17525 ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))
17526 : Builder.CreateBitCast(Op0,
17527 llvm::FixedVectorType::get(Int64Ty, 2));
17528 return Builder.CreateBitCast(
17529 Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
17530 }
17531 case PPC::BI__builtin_altivec_vpopcntb:
17532 case PPC::BI__builtin_altivec_vpopcnth:
17533 case PPC::BI__builtin_altivec_vpopcntw:
17534 case PPC::BI__builtin_altivec_vpopcntd: {
17535 llvm::Type *ResultType = ConvertType(E->getType());
17536 Value *X = EmitScalarExpr(E->getArg(0));
17537 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
17538 return Builder.CreateCall(F, X);
17539 }
17540 case PPC::BI__builtin_altivec_vadduqm:
17541 case PPC::BI__builtin_altivec_vsubuqm: {
17542 Value *Op0 = EmitScalarExpr(E->getArg(0));
17543 Value *Op1 = EmitScalarExpr(E->getArg(1));
17544 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
17545 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));
17546 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));
17547 if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)
17548 return Builder.CreateAdd(Op0, Op1, "vadduqm");
17549 else
17550 return Builder.CreateSub(Op0, Op1, "vsubuqm");
17551 }
17552 case PPC::BI__builtin_altivec_vaddcuq_c:
17553 case PPC::BI__builtin_altivec_vsubcuq_c: {
17555 Value *Op0 = EmitScalarExpr(E->getArg(0));
17556 Value *Op1 = EmitScalarExpr(E->getArg(1));
17557 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17558 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17559 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17560 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17561 ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)
17562 ? Intrinsic::ppc_altivec_vaddcuq
17563 : Intrinsic::ppc_altivec_vsubcuq;
17564 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17565 }
17566 case PPC::BI__builtin_altivec_vaddeuqm_c:
17567 case PPC::BI__builtin_altivec_vaddecuq_c:
17568 case PPC::BI__builtin_altivec_vsubeuqm_c:
17569 case PPC::BI__builtin_altivec_vsubecuq_c: {
17571 Value *Op0 = EmitScalarExpr(E->getArg(0));
17572 Value *Op1 = EmitScalarExpr(E->getArg(1));
17573 Value *Op2 = EmitScalarExpr(E->getArg(2));
17574 llvm::Type *V1I128Ty = llvm::FixedVectorType::get(
17575 llvm::IntegerType::get(getLLVMContext(), 128), 1);
17576 Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));
17577 Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));
17578 Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));
17579 switch (BuiltinID) {
17580 default:
17581 llvm_unreachable("Unsupported intrinsic!");
17582 case PPC::BI__builtin_altivec_vaddeuqm_c:
17583 ID = Intrinsic::ppc_altivec_vaddeuqm;
17584 break;
17585 case PPC::BI__builtin_altivec_vaddecuq_c:
17586 ID = Intrinsic::ppc_altivec_vaddecuq;
17587 break;
17588 case PPC::BI__builtin_altivec_vsubeuqm_c:
17589 ID = Intrinsic::ppc_altivec_vsubeuqm;
17590 break;
17591 case PPC::BI__builtin_altivec_vsubecuq_c:
17592 ID = Intrinsic::ppc_altivec_vsubecuq;
17593 break;
17594 }
17595 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");
17596 }
17597 case PPC::BI__builtin_ppc_rldimi:
17598 case PPC::BI__builtin_ppc_rlwimi: {
17599 Value *Op0 = EmitScalarExpr(E->getArg(0));
17600 Value *Op1 = EmitScalarExpr(E->getArg(1));
17601 Value *Op2 = EmitScalarExpr(E->getArg(2));
17602 Value *Op3 = EmitScalarExpr(E->getArg(3));
17603 // rldimi is 64-bit instruction, expand the intrinsic before isel to
17604 // leverage peephole and avoid legalization efforts.
17605 if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&
17606 !getTarget().getTriple().isPPC64()) {
17607 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());
17608 Op2 = Builder.CreateZExt(Op2, Int64Ty);
17609 Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});
17610 return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),
17611 Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));
17612 }
17613 return Builder.CreateCall(
17614 CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi
17615 ? Intrinsic::ppc_rldimi
17616 : Intrinsic::ppc_rlwimi),
17617 {Op0, Op1, Op2, Op3});
17618 }
17619 case PPC::BI__builtin_ppc_rlwnm: {
17620 Value *Op0 = EmitScalarExpr(E->getArg(0));
17621 Value *Op1 = EmitScalarExpr(E->getArg(1));
17622 Value *Op2 = EmitScalarExpr(E->getArg(2));
17623 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),
17624 {Op0, Op1, Op2});
17625 }
17626 case PPC::BI__builtin_ppc_poppar4:
17627 case PPC::BI__builtin_ppc_poppar8: {
17628 Value *Op0 = EmitScalarExpr(E->getArg(0));
17629 llvm::Type *ArgType = Op0->getType();
17630 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
17631 Value *Tmp = Builder.CreateCall(F, Op0);
17632
17633 llvm::Type *ResultType = ConvertType(E->getType());
17634 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
17635 if (Result->getType() != ResultType)
17636 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
17637 "cast");
17638 return Result;
17639 }
17640 case PPC::BI__builtin_ppc_cmpb: {
17641 Value *Op0 = EmitScalarExpr(E->getArg(0));
17642 Value *Op1 = EmitScalarExpr(E->getArg(1));
17643 if (getTarget().getTriple().isPPC64()) {
17644 Function *F =
17645 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});
17646 return Builder.CreateCall(F, {Op0, Op1}, "cmpb");
17647 }
17648 // For 32 bit, emit the code as below:
17649 // %conv = trunc i64 %a to i32
17650 // %conv1 = trunc i64 %b to i32
17651 // %shr = lshr i64 %a, 32
17652 // %conv2 = trunc i64 %shr to i32
17653 // %shr3 = lshr i64 %b, 32
17654 // %conv4 = trunc i64 %shr3 to i32
17655 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)
17656 // %conv5 = zext i32 %0 to i64
17657 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)
17658 // %conv614 = zext i32 %1 to i64
17659 // %shl = shl nuw i64 %conv614, 32
17660 // %or = or i64 %shl, %conv5
17661 // ret i64 %or
17662 Function *F =
17663 CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});
17664 Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);
17665 Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);
17666 Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);
17667 Value *ArgOneHi =
17668 Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);
17669 Value *ArgTwoHi =
17670 Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);
17671 Value *ResLo = Builder.CreateZExt(
17672 Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);
17673 Value *ResHiShift = Builder.CreateZExt(
17674 Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);
17675 Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);
17676 return Builder.CreateOr(ResLo, ResHi);
17677 }
17678 // Copy sign
17679 case PPC::BI__builtin_vsx_xvcpsgnsp:
17680 case PPC::BI__builtin_vsx_xvcpsgndp: {
17681 llvm::Type *ResultType = ConvertType(E->getType());
17682 Value *X = EmitScalarExpr(E->getArg(0));
17683 Value *Y = EmitScalarExpr(E->getArg(1));
17684 ID = Intrinsic::copysign;
17685 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17686 return Builder.CreateCall(F, {X, Y});
17687 }
17688 // Rounding/truncation
17689 case PPC::BI__builtin_vsx_xvrspip:
17690 case PPC::BI__builtin_vsx_xvrdpip:
17691 case PPC::BI__builtin_vsx_xvrdpim:
17692 case PPC::BI__builtin_vsx_xvrspim:
17693 case PPC::BI__builtin_vsx_xvrdpi:
17694 case PPC::BI__builtin_vsx_xvrspi:
17695 case PPC::BI__builtin_vsx_xvrdpic:
17696 case PPC::BI__builtin_vsx_xvrspic:
17697 case PPC::BI__builtin_vsx_xvrdpiz:
17698 case PPC::BI__builtin_vsx_xvrspiz: {
17699 llvm::Type *ResultType = ConvertType(E->getType());
17700 Value *X = EmitScalarExpr(E->getArg(0));
17701 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
17702 BuiltinID == PPC::BI__builtin_vsx_xvrspim)
17703 ID = Builder.getIsFPConstrained()
17704 ? Intrinsic::experimental_constrained_floor
17705 : Intrinsic::floor;
17706 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
17707 BuiltinID == PPC::BI__builtin_vsx_xvrspi)
17708 ID = Builder.getIsFPConstrained()
17709 ? Intrinsic::experimental_constrained_round
17710 : Intrinsic::round;
17711 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
17712 BuiltinID == PPC::BI__builtin_vsx_xvrspic)
17713 ID = Builder.getIsFPConstrained()
17714 ? Intrinsic::experimental_constrained_rint
17715 : Intrinsic::rint;
17716 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
17717 BuiltinID == PPC::BI__builtin_vsx_xvrspip)
17718 ID = Builder.getIsFPConstrained()
17719 ? Intrinsic::experimental_constrained_ceil
17720 : Intrinsic::ceil;
17721 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
17722 BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
17723 ID = Builder.getIsFPConstrained()
17724 ? Intrinsic::experimental_constrained_trunc
17725 : Intrinsic::trunc;
17726 llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
17727 return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)
17728 : Builder.CreateCall(F, X);
17729 }
17730
17731 // Absolute value
17732 case PPC::BI__builtin_vsx_xvabsdp:
17733 case PPC::BI__builtin_vsx_xvabssp: {
17734 llvm::Type *ResultType = ConvertType(E->getType());
17735 Value *X = EmitScalarExpr(E->getArg(0));
17736 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
17737 return Builder.CreateCall(F, X);
17738 }
17739
17740 // Fastmath by default
17741 case PPC::BI__builtin_ppc_recipdivf:
17742 case PPC::BI__builtin_ppc_recipdivd:
17743 case PPC::BI__builtin_ppc_rsqrtf:
17744 case PPC::BI__builtin_ppc_rsqrtd: {
17745 FastMathFlags FMF = Builder.getFastMathFlags();
17746 Builder.getFastMathFlags().setFast();
17747 llvm::Type *ResultType = ConvertType(E->getType());
17748 Value *X = EmitScalarExpr(E->getArg(0));
17749
17750 if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
17751 BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
17752 Value *Y = EmitScalarExpr(E->getArg(1));
17753 Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");
17754 Builder.getFastMathFlags() &= (FMF);
17755 return FDiv;
17756 }
17757 auto *One = ConstantFP::get(ResultType, 1.0);
17758 llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
17759 Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
17760 Builder.getFastMathFlags() &= (FMF);
17761 return FDiv;
17762 }
17763 case PPC::BI__builtin_ppc_alignx: {
17764 Value *Op0 = EmitScalarExpr(E->getArg(0));
17765 Value *Op1 = EmitScalarExpr(E->getArg(1));
17766 ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);
17767 if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))
17768 AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),
17769 llvm::Value::MaximumAlignment);
17770
17771 emitAlignmentAssumption(Op1, E->getArg(1),
17772 /*The expr loc is sufficient.*/ SourceLocation(),
17773 AlignmentCI, nullptr);
17774 return Op1;
17775 }
17776 case PPC::BI__builtin_ppc_rdlam: {
17777 Value *Op0 = EmitScalarExpr(E->getArg(0));
17778 Value *Op1 = EmitScalarExpr(E->getArg(1));
17779 Value *Op2 = EmitScalarExpr(E->getArg(2));
17780 llvm::Type *Ty = Op0->getType();
17781 Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);
17782 Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);
17783 Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});
17784 return Builder.CreateAnd(Rotate, Op2);
17785 }
17786 case PPC::BI__builtin_ppc_load2r: {
17787 Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);
17788 Value *Op0 = EmitScalarExpr(E->getArg(0));
17789 Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});
17790 return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);
17791 }
17792 // FMA variations
17793 case PPC::BI__builtin_ppc_fnmsub:
17794 case PPC::BI__builtin_ppc_fnmsubs:
17795 case PPC::BI__builtin_vsx_xvmaddadp:
17796 case PPC::BI__builtin_vsx_xvmaddasp:
17797 case PPC::BI__builtin_vsx_xvnmaddadp:
17798 case PPC::BI__builtin_vsx_xvnmaddasp:
17799 case PPC::BI__builtin_vsx_xvmsubadp:
17800 case PPC::BI__builtin_vsx_xvmsubasp:
17801 case PPC::BI__builtin_vsx_xvnmsubadp:
17802 case PPC::BI__builtin_vsx_xvnmsubasp: {
17803 llvm::Type *ResultType = ConvertType(E->getType());
17804 Value *X = EmitScalarExpr(E->getArg(0));
17805 Value *Y = EmitScalarExpr(E->getArg(1));
17806 Value *Z = EmitScalarExpr(E->getArg(2));
17807 llvm::Function *F;
17808 if (Builder.getIsFPConstrained())
17809 F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
17810 else
17811 F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
17812 switch (BuiltinID) {
17813 case PPC::BI__builtin_vsx_xvmaddadp:
17814 case PPC::BI__builtin_vsx_xvmaddasp:
17815 if (Builder.getIsFPConstrained())
17816 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
17817 else
17818 return Builder.CreateCall(F, {X, Y, Z});
17819 case PPC::BI__builtin_vsx_xvnmaddadp:
17820 case PPC::BI__builtin_vsx_xvnmaddasp:
17821 if (Builder.getIsFPConstrained())
17822 return Builder.CreateFNeg(
17823 Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
17824 else
17825 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
17826 case PPC::BI__builtin_vsx_xvmsubadp:
17827 case PPC::BI__builtin_vsx_xvmsubasp:
17828 if (Builder.getIsFPConstrained())
17829 return Builder.CreateConstrainedFPCall(
17830 F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17831 else
17832 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
17833 case PPC::BI__builtin_ppc_fnmsub:
17834 case PPC::BI__builtin_ppc_fnmsubs:
17835 case PPC::BI__builtin_vsx_xvnmsubadp:
17836 case PPC::BI__builtin_vsx_xvnmsubasp:
17837 if (Builder.getIsFPConstrained())
17838 return Builder.CreateFNeg(
17839 Builder.CreateConstrainedFPCall(
17840 F, {X, Y, Builder.CreateFNeg(Z, "neg")}),
17841 "neg");
17842 else
17843 return Builder.CreateCall(
17844 CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});
17845 }
17846 llvm_unreachable("Unknown FMA operation");
17847 return nullptr; // Suppress no-return warning
17848 }
17849
17850 case PPC::BI__builtin_vsx_insertword: {
17851 Value *Op0 = EmitScalarExpr(E->getArg(0));
17852 Value *Op1 = EmitScalarExpr(E->getArg(1));
17853 Value *Op2 = EmitScalarExpr(E->getArg(2));
17854 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
17855
17856 // Third argument is a compile time constant int. It must be clamped to
17857 // to the range [0, 12].
17858 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17859 assert(ArgCI &&
17860 "Third arg to xxinsertw intrinsic must be constant integer");
17861 const int64_t MaxIndex = 12;
17862 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17863
17864 // The builtin semantics don't exactly match the xxinsertw instructions
17865 // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
17866 // word from the first argument, and inserts it in the second argument. The
17867 // instruction extracts the word from its second input register and inserts
17868 // it into its first input register, so swap the first and second arguments.
17869 std::swap(Op0, Op1);
17870
17871 // Need to cast the second argument from a vector of unsigned int to a
17872 // vector of long long.
17873 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17874
17875 if (getTarget().isLittleEndian()) {
17876 // Reverse the double words in the vector we will extract from.
17877 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17878 Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});
17879
17880 // Reverse the index.
17881 Index = MaxIndex - Index;
17882 }
17883
17884 // Intrinsic expects the first arg to be a vector of int.
17885 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17886 Op2 = ConstantInt::getSigned(Int32Ty, Index);
17887 return Builder.CreateCall(F, {Op0, Op1, Op2});
17888 }
17889
17890 case PPC::BI__builtin_vsx_extractuword: {
17891 Value *Op0 = EmitScalarExpr(E->getArg(0));
17892 Value *Op1 = EmitScalarExpr(E->getArg(1));
17893 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
17894
17895 // Intrinsic expects the first argument to be a vector of doublewords.
17896 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17897
17898 // The second argument is a compile time constant int that needs to
17899 // be clamped to the range [0, 12].
17900 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);
17901 assert(ArgCI &&
17902 "Second Arg to xxextractuw intrinsic must be a constant integer!");
17903 const int64_t MaxIndex = 12;
17904 int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);
17905
17906 if (getTarget().isLittleEndian()) {
17907 // Reverse the index.
17908 Index = MaxIndex - Index;
17909 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17910
17911 // Emit the call, then reverse the double words of the results vector.
17912 Value *Call = Builder.CreateCall(F, {Op0, Op1});
17913
17914 Value *ShuffleCall =
17915 Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});
17916 return ShuffleCall;
17917 } else {
17918 Op1 = ConstantInt::getSigned(Int32Ty, Index);
17919 return Builder.CreateCall(F, {Op0, Op1});
17920 }
17921 }
17922
17923 case PPC::BI__builtin_vsx_xxpermdi: {
17924 Value *Op0 = EmitScalarExpr(E->getArg(0));
17925 Value *Op1 = EmitScalarExpr(E->getArg(1));
17926 Value *Op2 = EmitScalarExpr(E->getArg(2));
17927 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17928 assert(ArgCI && "Third arg must be constant integer!");
17929
17930 unsigned Index = ArgCI->getZExtValue();
17931 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));
17932 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));
17933
17934 // Account for endianness by treating this as just a shuffle. So we use the
17935 // same indices for both LE and BE in order to produce expected results in
17936 // both cases.
17937 int ElemIdx0 = (Index & 2) >> 1;
17938 int ElemIdx1 = 2 + (Index & 1);
17939
17940 int ShuffleElts[2] = {ElemIdx0, ElemIdx1};
17941 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17942 QualType BIRetType = E->getType();
17943 auto RetTy = ConvertType(BIRetType);
17944 return Builder.CreateBitCast(ShuffleCall, RetTy);
17945 }
17946
17947 case PPC::BI__builtin_vsx_xxsldwi: {
17948 Value *Op0 = EmitScalarExpr(E->getArg(0));
17949 Value *Op1 = EmitScalarExpr(E->getArg(1));
17950 Value *Op2 = EmitScalarExpr(E->getArg(2));
17951 ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);
17952 assert(ArgCI && "Third argument must be a compile time constant");
17953 unsigned Index = ArgCI->getZExtValue() & 0x3;
17954 Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));
17955 Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));
17956
17957 // Create a shuffle mask
17958 int ElemIdx0;
17959 int ElemIdx1;
17960 int ElemIdx2;
17961 int ElemIdx3;
17962 if (getTarget().isLittleEndian()) {
17963 // Little endian element N comes from element 8+N-Index of the
17964 // concatenated wide vector (of course, using modulo arithmetic on
17965 // the total number of elements).
17966 ElemIdx0 = (8 - Index) % 8;
17967 ElemIdx1 = (9 - Index) % 8;
17968 ElemIdx2 = (10 - Index) % 8;
17969 ElemIdx3 = (11 - Index) % 8;
17970 } else {
17971 // Big endian ElemIdx<N> = Index + N
17972 ElemIdx0 = Index;
17973 ElemIdx1 = Index + 1;
17974 ElemIdx2 = Index + 2;
17975 ElemIdx3 = Index + 3;
17976 }
17977
17978 int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};
17979 Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);
17980 QualType BIRetType = E->getType();
17981 auto RetTy = ConvertType(BIRetType);
17982 return Builder.CreateBitCast(ShuffleCall, RetTy);
17983 }
17984
17985 case PPC::BI__builtin_pack_vector_int128: {
17986 Value *Op0 = EmitScalarExpr(E->getArg(0));
17987 Value *Op1 = EmitScalarExpr(E->getArg(1));
17988 bool isLittleEndian = getTarget().isLittleEndian();
17989 Value *PoisonValue =
17990 llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));
17991 Value *Res = Builder.CreateInsertElement(
17992 PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));
17993 Res = Builder.CreateInsertElement(Res, Op1,
17994 (uint64_t)(isLittleEndian ? 0 : 1));
17995 return Builder.CreateBitCast(Res, ConvertType(E->getType()));
17996 }
17997
17998 case PPC::BI__builtin_unpack_vector_int128: {
17999 Value *Op0 = EmitScalarExpr(E->getArg(0));
18000 Value *Op1 = EmitScalarExpr(E->getArg(1));
18001 ConstantInt *Index = cast<ConstantInt>(Op1);
18002 Value *Unpacked = Builder.CreateBitCast(
18003 Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
18004
18005 if (getTarget().isLittleEndian())
18006 Index =
18007 ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());
18008
18009 return Builder.CreateExtractElement(Unpacked, Index);
18010 }
18011
18012 case PPC::BI__builtin_ppc_sthcx: {
18013 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);
18014 Value *Op0 = EmitScalarExpr(E->getArg(0));
18015 Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);
18016 return Builder.CreateCall(F, {Op0, Op1});
18017 }
18018
18019 // The PPC MMA builtins take a pointer to a __vector_quad as an argument.
18020 // Some of the MMA instructions accumulate their result into an existing
18021 // accumulator whereas the others generate a new accumulator. So we need to
18022 // use custom code generation to expand a builtin call with a pointer to a
18023 // load (if the corresponding instruction accumulates its result) followed by
18024 // the call to the intrinsic and a store of the result.
18025#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \
18026 case PPC::BI__builtin_##Name:
18027#include "clang/Basic/BuiltinsPPC.def"
18028 {
18030 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
18031 if (E->getArg(i)->getType()->isArrayType())
18032 Ops.push_back(
18033 EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));
18034 else
18035 Ops.push_back(EmitScalarExpr(E->getArg(i)));
18036 // The first argument of these two builtins is a pointer used to store their
18037 // result. However, the llvm intrinsics return their result in multiple
18038 // return values. So, here we emit code extracting these values from the
18039 // intrinsic results and storing them using that pointer.
18040 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||
18041 BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||
18042 BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {
18043 unsigned NumVecs = 2;
18044 auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;
18045 if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {
18046 NumVecs = 4;
18047 Intrinsic = Intrinsic::ppc_mma_disassemble_acc;
18048 }
18049 llvm::Function *F = CGM.getIntrinsic(Intrinsic);
18050 Address Addr = EmitPointerWithAlignment(E->getArg(1));
18051 Value *Vec = Builder.CreateLoad(Addr);
18052 Value *Call = Builder.CreateCall(F, {Vec});
18053 llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);
18054 Value *Ptr = Ops[0];
18055 for (unsigned i=0; i<NumVecs; i++) {
18056 Value *Vec = Builder.CreateExtractValue(Call, i);
18057 llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);
18058 Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);
18059 Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));
18060 }
18061 return Call;
18062 }
18063 if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||
18064 BuiltinID == PPC::BI__builtin_mma_build_acc) {
18065 // Reverse the order of the operands for LE, so the
18066 // same builtin call can be used on both LE and BE
18067 // without the need for the programmer to swap operands.
18068 // The operands are reversed starting from the second argument,
18069 // the first operand is the pointer to the pair/accumulator
18070 // that is being built.
18071 if (getTarget().isLittleEndian())
18072 std::reverse(Ops.begin() + 1, Ops.end());
18073 }
18074 bool Accumulate;
18075 switch (BuiltinID) {
18076 #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \
18077 case PPC::BI__builtin_##Name: \
18078 ID = Intrinsic::ppc_##Intr; \
18079 Accumulate = Acc; \
18080 break;
18081 #include "clang/Basic/BuiltinsPPC.def"
18082 }
18083 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18084 BuiltinID == PPC::BI__builtin_vsx_stxvp ||
18085 BuiltinID == PPC::BI__builtin_mma_lxvp ||
18086 BuiltinID == PPC::BI__builtin_mma_stxvp) {
18087 if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||
18088 BuiltinID == PPC::BI__builtin_mma_lxvp) {
18089 Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);
18090 } else {
18091 Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);
18092 }
18093 Ops.pop_back();
18094 llvm::Function *F = CGM.getIntrinsic(ID);
18095 return Builder.CreateCall(F, Ops, "");
18096 }
18097 SmallVector<Value*, 4> CallOps;
18098 if (Accumulate) {
18099 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18100 Value *Acc = Builder.CreateLoad(Addr);
18101 CallOps.push_back(Acc);
18102 }
18103 for (unsigned i=1; i<Ops.size(); i++)
18104 CallOps.push_back(Ops[i]);
18105 llvm::Function *F = CGM.getIntrinsic(ID);
18106 Value *Call = Builder.CreateCall(F, CallOps);
18107 return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));
18108 }
18109
18110 case PPC::BI__builtin_ppc_compare_and_swap:
18111 case PPC::BI__builtin_ppc_compare_and_swaplp: {
18112 Address Addr = EmitPointerWithAlignment(E->getArg(0));
18113 Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));
18114 Value *OldVal = Builder.CreateLoad(OldValAddr);
18115 QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();
18116 LValue LV = MakeAddrLValue(Addr, AtomicTy);
18117 Value *Op2 = EmitScalarExpr(E->getArg(2));
18118 auto Pair = EmitAtomicCompareExchange(
18119 LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),
18120 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);
18121 // Unlike c11's atomic_compare_exchange, according to
18122 // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp
18123 // > In either case, the contents of the memory location specified by addr
18124 // > are copied into the memory location specified by old_val_addr.
18125 // But it hasn't specified storing to OldValAddr is atomic or not and
18126 // which order to use. Now following XL's codegen, treat it as a normal
18127 // store.
18128 Value *LoadedVal = Pair.first.getScalarVal();
18129 Builder.CreateStore(LoadedVal, OldValAddr);
18130 return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());
18131 }
18132 case PPC::BI__builtin_ppc_fetch_and_add:
18133 case PPC::BI__builtin_ppc_fetch_and_addlp: {
18134 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
18135 llvm::AtomicOrdering::Monotonic);
18136 }
18137 case PPC::BI__builtin_ppc_fetch_and_and:
18138 case PPC::BI__builtin_ppc_fetch_and_andlp: {
18139 return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
18140 llvm::AtomicOrdering::Monotonic);
18141 }
18142
18143 case PPC::BI__builtin_ppc_fetch_and_or:
18144 case PPC::BI__builtin_ppc_fetch_and_orlp: {
18145 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
18146 llvm::AtomicOrdering::Monotonic);
18147 }
18148 case PPC::BI__builtin_ppc_fetch_and_swap:
18149 case PPC::BI__builtin_ppc_fetch_and_swaplp: {
18150 return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
18151 llvm::AtomicOrdering::Monotonic);
18152 }
18153 case PPC::BI__builtin_ppc_ldarx:
18154 case PPC::BI__builtin_ppc_lwarx:
18155 case PPC::BI__builtin_ppc_lharx:
18156 case PPC::BI__builtin_ppc_lbarx:
18157 return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);
18158 case PPC::BI__builtin_ppc_mfspr: {
18159 Value *Op0 = EmitScalarExpr(E->getArg(0));
18160 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18161 ? Int32Ty
18162 : Int64Ty;
18163 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);
18164 return Builder.CreateCall(F, {Op0});
18165 }
18166 case PPC::BI__builtin_ppc_mtspr: {
18167 Value *Op0 = EmitScalarExpr(E->getArg(0));
18168 Value *Op1 = EmitScalarExpr(E->getArg(1));
18169 llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32
18170 ? Int32Ty
18171 : Int64Ty;
18172 Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);
18173 return Builder.CreateCall(F, {Op0, Op1});
18174 }
18175 case PPC::BI__builtin_ppc_popcntb: {
18176 Value *ArgValue = EmitScalarExpr(E->getArg(0));
18177 llvm::Type *ArgType = ArgValue->getType();
18178 Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});
18179 return Builder.CreateCall(F, {ArgValue}, "popcntb");
18180 }
18181 case PPC::BI__builtin_ppc_mtfsf: {
18182 // The builtin takes a uint32 that needs to be cast to an
18183 // f64 to be passed to the intrinsic.
18184 Value *Op0 = EmitScalarExpr(E->getArg(0));
18185 Value *Op1 = EmitScalarExpr(E->getArg(1));
18186 Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);
18187 llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);
18188 return Builder.CreateCall(F, {Op0, Cast}, "");
18189 }
18190
18191 case PPC::BI__builtin_ppc_swdiv_nochk:
18192 case PPC::BI__builtin_ppc_swdivs_nochk: {
18193 Value *Op0 = EmitScalarExpr(E->getArg(0));
18194 Value *Op1 = EmitScalarExpr(E->getArg(1));
18195 FastMathFlags FMF = Builder.getFastMathFlags();
18196 Builder.getFastMathFlags().setFast();
18197 Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");
18198 Builder.getFastMathFlags() &= (FMF);
18199 return FDiv;
18200 }
18201 case PPC::BI__builtin_ppc_fric:
18203 *this, E, Intrinsic::rint,
18204 Intrinsic::experimental_constrained_rint))
18205 .getScalarVal();
18206 case PPC::BI__builtin_ppc_frim:
18207 case PPC::BI__builtin_ppc_frims:
18209 *this, E, Intrinsic::floor,
18210 Intrinsic::experimental_constrained_floor))
18211 .getScalarVal();
18212 case PPC::BI__builtin_ppc_frin:
18213 case PPC::BI__builtin_ppc_frins:
18215 *this, E, Intrinsic::round,
18216 Intrinsic::experimental_constrained_round))
18217 .getScalarVal();
18218 case PPC::BI__builtin_ppc_frip:
18219 case PPC::BI__builtin_ppc_frips:
18221 *this, E, Intrinsic::ceil,
18222 Intrinsic::experimental_constrained_ceil))
18223 .getScalarVal();
18224 case PPC::BI__builtin_ppc_friz:
18225 case PPC::BI__builtin_ppc_frizs:
18227 *this, E, Intrinsic::trunc,
18228 Intrinsic::experimental_constrained_trunc))
18229 .getScalarVal();
18230 case PPC::BI__builtin_ppc_fsqrt:
18231 case PPC::BI__builtin_ppc_fsqrts:
18233 *this, E, Intrinsic::sqrt,
18234 Intrinsic::experimental_constrained_sqrt))
18235 .getScalarVal();
18236 case PPC::BI__builtin_ppc_test_data_class: {
18237 Value *Op0 = EmitScalarExpr(E->getArg(0));
18238 Value *Op1 = EmitScalarExpr(E->getArg(1));
18239 return Builder.CreateCall(
18240 CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),
18241 {Op0, Op1}, "test_data_class");
18242 }
18243 case PPC::BI__builtin_ppc_maxfe: {
18244 Value *Op0 = EmitScalarExpr(E->getArg(0));
18245 Value *Op1 = EmitScalarExpr(E->getArg(1));
18246 Value *Op2 = EmitScalarExpr(E->getArg(2));
18247 Value *Op3 = EmitScalarExpr(E->getArg(3));
18248 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),
18249 {Op0, Op1, Op2, Op3});
18250 }
18251 case PPC::BI__builtin_ppc_maxfl: {
18252 Value *Op0 = EmitScalarExpr(E->getArg(0));
18253 Value *Op1 = EmitScalarExpr(E->getArg(1));
18254 Value *Op2 = EmitScalarExpr(E->getArg(2));
18255 Value *Op3 = EmitScalarExpr(E->getArg(3));
18256 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),
18257 {Op0, Op1, Op2, Op3});
18258 }
18259 case PPC::BI__builtin_ppc_maxfs: {
18260 Value *Op0 = EmitScalarExpr(E->getArg(0));
18261 Value *Op1 = EmitScalarExpr(E->getArg(1));
18262 Value *Op2 = EmitScalarExpr(E->getArg(2));
18263 Value *Op3 = EmitScalarExpr(E->getArg(3));
18264 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),
18265 {Op0, Op1, Op2, Op3});
18266 }
18267 case PPC::BI__builtin_ppc_minfe: {
18268 Value *Op0 = EmitScalarExpr(E->getArg(0));
18269 Value *Op1 = EmitScalarExpr(E->getArg(1));
18270 Value *Op2 = EmitScalarExpr(E->getArg(2));
18271 Value *Op3 = EmitScalarExpr(E->getArg(3));
18272 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),
18273 {Op0, Op1, Op2, Op3});
18274 }
18275 case PPC::BI__builtin_ppc_minfl: {
18276 Value *Op0 = EmitScalarExpr(E->getArg(0));
18277 Value *Op1 = EmitScalarExpr(E->getArg(1));
18278 Value *Op2 = EmitScalarExpr(E->getArg(2));
18279 Value *Op3 = EmitScalarExpr(E->getArg(3));
18280 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),
18281 {Op0, Op1, Op2, Op3});
18282 }
18283 case PPC::BI__builtin_ppc_minfs: {
18284 Value *Op0 = EmitScalarExpr(E->getArg(0));
18285 Value *Op1 = EmitScalarExpr(E->getArg(1));
18286 Value *Op2 = EmitScalarExpr(E->getArg(2));
18287 Value *Op3 = EmitScalarExpr(E->getArg(3));
18288 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),
18289 {Op0, Op1, Op2, Op3});
18290 }
18291 case PPC::BI__builtin_ppc_swdiv:
18292 case PPC::BI__builtin_ppc_swdivs: {
18293 Value *Op0 = EmitScalarExpr(E->getArg(0));
18294 Value *Op1 = EmitScalarExpr(E->getArg(1));
18295 return Builder.CreateFDiv(Op0, Op1, "swdiv");
18296 }
18297 case PPC::BI__builtin_ppc_set_fpscr_rn:
18298 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),
18299 {EmitScalarExpr(E->getArg(0))});
18300 case PPC::BI__builtin_ppc_mffs:
18301 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));
18302 }
18303}
18304
18305namespace {
18306// If \p E is not null pointer, insert address space cast to match return
18307// type of \p E if necessary.
18308Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,
18309 const CallExpr *E = nullptr) {
18310 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);
18311 auto *Call = CGF.Builder.CreateCall(F);
18312 Call->addRetAttr(
18313 Attribute::getWithDereferenceableBytes(Call->getContext(), 64));
18314 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));
18315 if (!E)
18316 return Call;
18317 QualType BuiltinRetType = E->getType();
18318 auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));
18319 if (RetTy == Call->getType())
18320 return Call;
18321 return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);
18322}
18323
18324Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
18325 auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);
18326 auto *Call = CGF.Builder.CreateCall(F);
18327 Call->addRetAttr(
18328 Attribute::getWithDereferenceableBytes(Call->getContext(), 256));
18329 Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));
18330 return Call;
18331}
18332
18333// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18334/// Emit code based on Code Object ABI version.
18335/// COV_4 : Emit code to use dispatch ptr
18336/// COV_5+ : Emit code to use implicitarg ptr
18337/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
18338/// and use its value for COV_4 or COV_5+ approach. It is used for
18339/// compiling device libraries in an ABI-agnostic way.
18340///
18341/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
18342/// clang during compilation of user code.
18343Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
18344 llvm::LoadInst *LD;
18345
18346 auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
18347
18348 if (Cov == CodeObjectVersionKind::COV_None) {
18349 StringRef Name = "__oclc_ABI_version";
18350 auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
18351 if (!ABIVersionC)
18352 ABIVersionC = new llvm::GlobalVariable(
18353 CGF.CGM.getModule(), CGF.Int32Ty, false,
18354 llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,
18355 llvm::GlobalVariable::NotThreadLocal,
18357
18358 // This load will be eliminated by the IPSCCP because it is constant
18359 // weak_odr without externally_initialized. Either changing it to weak or
18360 // adding externally_initialized will keep the load.
18361 Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,
18362 CGF.CGM.getIntAlign());
18363
18364 Value *IsCOV5 = CGF.Builder.CreateICmpSGE(
18365 ABIVersion,
18366 llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));
18367
18368 // Indexing the implicit kernarg segment.
18369 Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(
18370 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18371
18372 // Indexing the HSA kernel_dispatch_packet struct.
18373 Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(
18374 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18375
18376 auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);
18377 LD = CGF.Builder.CreateLoad(
18379 } else {
18380 Value *GEP = nullptr;
18381 if (Cov >= CodeObjectVersionKind::COV_5) {
18382 // Indexing the implicit kernarg segment.
18383 GEP = CGF.Builder.CreateConstGEP1_32(
18384 CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
18385 } else {
18386 // Indexing the HSA kernel_dispatch_packet struct.
18387 GEP = CGF.Builder.CreateConstGEP1_32(
18388 CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);
18389 }
18390 LD = CGF.Builder.CreateLoad(
18392 }
18393
18394 llvm::MDBuilder MDHelper(CGF.getLLVMContext());
18395 llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),
18396 APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));
18397 LD->setMetadata(llvm::LLVMContext::MD_range, RNode);
18398 LD->setMetadata(llvm::LLVMContext::MD_noundef,
18399 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18400 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18401 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18402 return LD;
18403}
18404
18405// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
18406Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {
18407 const unsigned XOffset = 12;
18408 auto *DP = EmitAMDGPUDispatchPtr(CGF);
18409 // Indexing the HSA kernel_dispatch_packet struct.
18410 auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);
18411 auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);
18412 auto *LD = CGF.Builder.CreateLoad(
18414 LD->setMetadata(llvm::LLVMContext::MD_invariant_load,
18415 llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));
18416 return LD;
18417}
18418} // namespace
18419
18420// For processing memory ordering and memory scope arguments of various
18421// amdgcn builtins.
18422// \p Order takes a C++11 comptabile memory-ordering specifier and converts
18423// it into LLVM's memory ordering specifier using atomic C ABI, and writes
18424// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
18425// specific SyncScopeID and writes it to \p SSID.
18427 llvm::AtomicOrdering &AO,
18428 llvm::SyncScope::ID &SSID) {
18429 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
18430
18431 // Map C11/C++11 memory ordering to LLVM memory ordering
18432 assert(llvm::isValidAtomicOrderingCABI(ord));
18433 switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
18434 case llvm::AtomicOrderingCABI::acquire:
18435 case llvm::AtomicOrderingCABI::consume:
18436 AO = llvm::AtomicOrdering::Acquire;
18437 break;
18438 case llvm::AtomicOrderingCABI::release:
18439 AO = llvm::AtomicOrdering::Release;
18440 break;
18441 case llvm::AtomicOrderingCABI::acq_rel:
18442 AO = llvm::AtomicOrdering::AcquireRelease;
18443 break;
18444 case llvm::AtomicOrderingCABI::seq_cst:
18445 AO = llvm::AtomicOrdering::SequentiallyConsistent;
18446 break;
18447 case llvm::AtomicOrderingCABI::relaxed:
18448 AO = llvm::AtomicOrdering::Monotonic;
18449 break;
18450 }
18451
18452 // Some of the atomic builtins take the scope as a string name.
18453 StringRef scp;
18454 if (llvm::getConstantStringInfo(Scope, scp)) {
18455 SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18456 return;
18457 }
18458
18459 // Older builtins had an enum argument for the memory scope.
18460 int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
18461 switch (scope) {
18462 case 0: // __MEMORY_SCOPE_SYSTEM
18463 SSID = llvm::SyncScope::System;
18464 break;
18465 case 1: // __MEMORY_SCOPE_DEVICE
18466 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
18467 break;
18468 case 2: // __MEMORY_SCOPE_WRKGRP
18469 SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
18470 break;
18471 case 3: // __MEMORY_SCOPE_WVFRNT
18472 SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
18473 break;
18474 case 4: // __MEMORY_SCOPE_SINGLE
18475 SSID = llvm::SyncScope::SingleThread;
18476 break;
18477 default:
18478 SSID = llvm::SyncScope::System;
18479 break;
18480 }
18481}
18482
18483llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
18484 unsigned Idx,
18485 const CallExpr *E) {
18486 llvm::Value *Arg = nullptr;
18487 if ((ICEArguments & (1 << Idx)) == 0) {
18488 Arg = EmitScalarExpr(E->getArg(Idx));
18489 } else {
18490 // If this is required to be a constant, constant fold it so that we
18491 // know that the generated intrinsic gets a ConstantInt.
18492 std::optional<llvm::APSInt> Result =
18493 E->getArg(Idx)->getIntegerConstantExpr(getContext());
18494 assert(Result && "Expected argument to be a constant");
18495 Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);
18496 }
18497 return Arg;
18498}
18499
18500Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {
18501 if (QT->hasFloatingRepresentation()) {
18502 switch (elementCount) {
18503 case 2:
18504 return Intrinsic::dx_dot2;
18505 case 3:
18506 return Intrinsic::dx_dot3;
18507 case 4:
18508 return Intrinsic::dx_dot4;
18509 }
18510 }
18512 return Intrinsic::dx_sdot;
18513
18515 return Intrinsic::dx_udot;
18516}
18517
18519 const CallExpr *E) {
18520 if (!getLangOpts().HLSL)
18521 return nullptr;
18522
18523 switch (BuiltinID) {
18524 case Builtin::BI__builtin_hlsl_all: {
18525 Value *Op0 = EmitScalarExpr(E->getArg(0));
18526 return Builder.CreateIntrinsic(
18527 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18528 CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18529 "hlsl.all");
18530 }
18531 case Builtin::BI__builtin_hlsl_any: {
18532 Value *Op0 = EmitScalarExpr(E->getArg(0));
18533 return Builder.CreateIntrinsic(
18534 /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
18535 CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
18536 "hlsl.any");
18537 }
18538 case Builtin::BI__builtin_hlsl_elementwise_clamp: {
18539 Value *OpX = EmitScalarExpr(E->getArg(0));
18540 Value *OpMin = EmitScalarExpr(E->getArg(1));
18541 Value *OpMax = EmitScalarExpr(E->getArg(2));
18542
18543 QualType Ty = E->getArg(0)->getType();
18544 bool IsUnsigned = false;
18545 if (auto *VecTy = Ty->getAs<VectorType>())
18546 Ty = VecTy->getElementType();
18547 IsUnsigned = Ty->isUnsignedIntegerType();
18548 return Builder.CreateIntrinsic(
18549 /*ReturnType=*/OpX->getType(),
18550 IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
18551 ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
18552 }
18553 case Builtin::BI__builtin_hlsl_dot: {
18554 Value *Op0 = EmitScalarExpr(E->getArg(0));
18555 Value *Op1 = EmitScalarExpr(E->getArg(1));
18556 llvm::Type *T0 = Op0->getType();
18557 llvm::Type *T1 = Op1->getType();
18558 if (!T0->isVectorTy() && !T1->isVectorTy()) {
18559 if (T0->isFloatingPointTy())
18560 return Builder.CreateFMul(Op0, Op1, "dx.dot");
18561
18562 if (T0->isIntegerTy())
18563 return Builder.CreateMul(Op0, Op1, "dx.dot");
18564
18565 // Bools should have been promoted
18566 llvm_unreachable(
18567 "Scalar dot product is only supported on ints and floats.");
18568 }
18569 // A VectorSplat should have happened
18570 assert(T0->isVectorTy() && T1->isVectorTy() &&
18571 "Dot product of vector and scalar is not supported.");
18572
18573 // A vector sext or sitofp should have happened
18574 assert(T0->getScalarType() == T1->getScalarType() &&
18575 "Dot product of vectors need the same element types.");
18576
18577 auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();
18578 [[maybe_unused]] auto *VecTy1 =
18579 E->getArg(1)->getType()->getAs<VectorType>();
18580 // A HLSLVectorTruncation should have happend
18581 assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&
18582 "Dot product requires vectors to be of the same size.");
18583
18584 return Builder.CreateIntrinsic(
18585 /*ReturnType=*/T0->getScalarType(),
18586 getDotProductIntrinsic(E->getArg(0)->getType(),
18587 VecTy0->getNumElements()),
18588 ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
18589 } break;
18590 case Builtin::BI__builtin_hlsl_lerp: {
18591 Value *X = EmitScalarExpr(E->getArg(0));
18592 Value *Y = EmitScalarExpr(E->getArg(1));
18593 Value *S = EmitScalarExpr(E->getArg(2));
18594 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18595 llvm_unreachable("lerp operand must have a float representation");
18596 return Builder.CreateIntrinsic(
18597 /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
18598 ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
18599 }
18600 case Builtin::BI__builtin_hlsl_length: {
18601 Value *X = EmitScalarExpr(E->getArg(0));
18602
18603 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
18604 "length operand must have a float representation");
18605 // if the operand is a scalar, we can use the fabs llvm intrinsic directly
18606 if (!E->getArg(0)->getType()->isVectorType())
18607 return EmitFAbs(*this, X);
18608
18609 return Builder.CreateIntrinsic(
18610 /*ReturnType=*/X->getType()->getScalarType(),
18611 CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
18612 nullptr, "hlsl.length");
18613 }
18614 case Builtin::BI__builtin_hlsl_normalize: {
18615 Value *X = EmitScalarExpr(E->getArg(0));
18616
18617 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
18618 "normalize operand must have a float representation");
18619
18620 return Builder.CreateIntrinsic(
18621 /*ReturnType=*/X->getType(),
18622 CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
18623 nullptr, "hlsl.normalize");
18624 }
18625 case Builtin::BI__builtin_hlsl_elementwise_frac: {
18626 Value *Op0 = EmitScalarExpr(E->getArg(0));
18627 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18628 llvm_unreachable("frac operand must have a float representation");
18629 return Builder.CreateIntrinsic(
18630 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
18631 ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
18632}
18633case Builtin::BI__builtin_hlsl_elementwise_isinf: {
18634 Value *Op0 = EmitScalarExpr(E->getArg(0));
18635 llvm::Type *Xty = Op0->getType();
18636 llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
18637 if (Xty->isVectorTy()) {
18638 auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();
18639 retType = llvm::VectorType::get(
18640 retType, ElementCount::getFixed(XVecTy->getNumElements()));
18641 }
18642 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18643 llvm_unreachable("isinf operand must have a float representation");
18644 return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,
18645 ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");
18646 }
18647 case Builtin::BI__builtin_hlsl_mad: {
18648 Value *M = EmitScalarExpr(E->getArg(0));
18649 Value *A = EmitScalarExpr(E->getArg(1));
18650 Value *B = EmitScalarExpr(E->getArg(2));
18651 if (E->getArg(0)->getType()->hasFloatingRepresentation())
18652 return Builder.CreateIntrinsic(
18653 /*ReturnType*/ M->getType(), Intrinsic::fmuladd,
18654 ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
18655
18656 if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
18657 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18658 return Builder.CreateIntrinsic(
18659 /*ReturnType*/ M->getType(), Intrinsic::dx_imad,
18660 ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
18661
18662 Value *Mul = Builder.CreateNSWMul(M, A);
18663 return Builder.CreateNSWAdd(Mul, B);
18664 }
18665 assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
18666 if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
18667 return Builder.CreateIntrinsic(
18668 /*ReturnType=*/M->getType(), Intrinsic::dx_umad,
18669 ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
18670
18671 Value *Mul = Builder.CreateNUWMul(M, A);
18672 return Builder.CreateNUWAdd(Mul, B);
18673 }
18674 case Builtin::BI__builtin_hlsl_elementwise_rcp: {
18675 Value *Op0 = EmitScalarExpr(E->getArg(0));
18676 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18677 llvm_unreachable("rcp operand must have a float representation");
18678 llvm::Type *Ty = Op0->getType();
18679 llvm::Type *EltTy = Ty->getScalarType();
18680 Constant *One = Ty->isVectorTy()
18681 ? ConstantVector::getSplat(
18682 ElementCount::getFixed(
18683 cast<FixedVectorType>(Ty)->getNumElements()),
18684 ConstantFP::get(EltTy, 1.0))
18685 : ConstantFP::get(EltTy, 1.0);
18686 return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
18687 }
18688 case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
18689 Value *Op0 = EmitScalarExpr(E->getArg(0));
18690 if (!E->getArg(0)->getType()->hasFloatingRepresentation())
18691 llvm_unreachable("rsqrt operand must have a float representation");
18692 return Builder.CreateIntrinsic(
18693 /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
18694 ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
18695 }
18696 case Builtin::BI__builtin_hlsl_elementwise_saturate: {
18697 Value *Op0 = EmitScalarExpr(E->getArg(0));
18698 assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
18699 "saturate operand must have a float representation");
18700 return Builder.CreateIntrinsic(
18701 /*ReturnType=*/Op0->getType(),
18702 CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
18703 nullptr, "hlsl.saturate");
18704 }
18705 case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
18707 llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
18708 {}, false, true));
18709 }
18710 }
18711 return nullptr;
18712}
18713
18714void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
18715 const CallExpr *E) {
18716 constexpr const char *Tag = "amdgpu-as";
18717
18718 LLVMContext &Ctx = Inst->getContext();
18720 for (unsigned K = 2; K < E->getNumArgs(); ++K) {
18721 llvm::Value *V = EmitScalarExpr(E->getArg(K));
18722 StringRef AS;
18723 if (llvm::getConstantStringInfo(V, AS)) {
18724 MMRAs.push_back({Tag, AS});
18725 // TODO: Delete the resulting unused constant?
18726 continue;
18727 }
18728 CGM.Error(E->getExprLoc(),
18729 "expected an address space name as a string literal");
18730 }
18731
18732 llvm::sort(MMRAs);
18733 MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());
18734 Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
18735}
18736
18738 const CallExpr *E) {
18739 llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
18740 llvm::SyncScope::ID SSID;
18741 switch (BuiltinID) {
18742 case AMDGPU::BI__builtin_amdgcn_div_scale:
18743 case AMDGPU::BI__builtin_amdgcn_div_scalef: {
18744 // Translate from the intrinsics's struct return to the builtin's out
18745 // argument.
18746
18747 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
18748
18749 llvm::Value *X = EmitScalarExpr(E->getArg(0));
18750 llvm::Value *Y = EmitScalarExpr(E->getArg(1));
18751 llvm::Value *Z = EmitScalarExpr(E->getArg(2));
18752
18753 llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
18754 X->getType());
18755
18756 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
18757
18758 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
18759 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
18760
18761 llvm::Type *RealFlagType = FlagOutPtr.getElementType();
18762
18763 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
18764 Builder.CreateStore(FlagExt, FlagOutPtr);
18765 return Result;
18766 }
18767 case AMDGPU::BI__builtin_amdgcn_div_fmas:
18768 case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
18769 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18770 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18771 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18772 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
18773
18774 llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
18775 Src0->getType());
18776 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
18777 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
18778 }
18779
18780 case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
18781 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18782 Intrinsic::amdgcn_ds_swizzle);
18783 case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
18784 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18785 Intrinsic::amdgcn_mov_dpp8);
18786 case AMDGPU::BI__builtin_amdgcn_mov_dpp:
18787 case AMDGPU::BI__builtin_amdgcn_update_dpp: {
18789 // Find out if any arguments are required to be integer constant
18790 // expressions.
18791 unsigned ICEArguments = 0;
18793 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
18794 assert(Error == ASTContext::GE_None && "Should not codegen an error");
18795 for (unsigned I = 0; I != E->getNumArgs(); ++I) {
18796 Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));
18797 }
18798 assert(Args.size() == 5 || Args.size() == 6);
18799 if (Args.size() == 5)
18800 Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));
18801 Function *F =
18802 CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
18803 return Builder.CreateCall(F, Args);
18804 }
18805 case AMDGPU::BI__builtin_amdgcn_permlane16:
18806 case AMDGPU::BI__builtin_amdgcn_permlanex16:
18807 return emitBuiltinWithOneOverloadedType<6>(
18808 *this, E,
18809 BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16
18810 ? Intrinsic::amdgcn_permlane16
18811 : Intrinsic::amdgcn_permlanex16);
18812 case AMDGPU::BI__builtin_amdgcn_permlane64:
18813 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18814 Intrinsic::amdgcn_permlane64);
18815 case AMDGPU::BI__builtin_amdgcn_readlane:
18816 return emitBuiltinWithOneOverloadedType<2>(*this, E,
18817 Intrinsic::amdgcn_readlane);
18818 case AMDGPU::BI__builtin_amdgcn_readfirstlane:
18819 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18820 Intrinsic::amdgcn_readfirstlane);
18821 case AMDGPU::BI__builtin_amdgcn_div_fixup:
18822 case AMDGPU::BI__builtin_amdgcn_div_fixupf:
18823 case AMDGPU::BI__builtin_amdgcn_div_fixuph:
18824 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18825 Intrinsic::amdgcn_div_fixup);
18826 case AMDGPU::BI__builtin_amdgcn_trig_preop:
18827 case AMDGPU::BI__builtin_amdgcn_trig_preopf:
18828 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
18829 case AMDGPU::BI__builtin_amdgcn_rcp:
18830 case AMDGPU::BI__builtin_amdgcn_rcpf:
18831 case AMDGPU::BI__builtin_amdgcn_rcph:
18832 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);
18833 case AMDGPU::BI__builtin_amdgcn_sqrt:
18834 case AMDGPU::BI__builtin_amdgcn_sqrtf:
18835 case AMDGPU::BI__builtin_amdgcn_sqrth:
18836 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18837 Intrinsic::amdgcn_sqrt);
18838 case AMDGPU::BI__builtin_amdgcn_rsq:
18839 case AMDGPU::BI__builtin_amdgcn_rsqf:
18840 case AMDGPU::BI__builtin_amdgcn_rsqh:
18841 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);
18842 case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
18843 case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
18844 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18845 Intrinsic::amdgcn_rsq_clamp);
18846 case AMDGPU::BI__builtin_amdgcn_sinf:
18847 case AMDGPU::BI__builtin_amdgcn_sinh:
18848 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);
18849 case AMDGPU::BI__builtin_amdgcn_cosf:
18850 case AMDGPU::BI__builtin_amdgcn_cosh:
18851 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);
18852 case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
18853 return EmitAMDGPUDispatchPtr(*this, E);
18854 case AMDGPU::BI__builtin_amdgcn_logf:
18855 return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
18856 case AMDGPU::BI__builtin_amdgcn_exp2f:
18857 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18858 Intrinsic::amdgcn_exp2);
18859 case AMDGPU::BI__builtin_amdgcn_log_clampf:
18860 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18861 Intrinsic::amdgcn_log_clamp);
18862 case AMDGPU::BI__builtin_amdgcn_ldexp:
18863 case AMDGPU::BI__builtin_amdgcn_ldexpf: {
18864 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18865 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18866 llvm::Function *F =
18867 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
18868 return Builder.CreateCall(F, {Src0, Src1});
18869 }
18870 case AMDGPU::BI__builtin_amdgcn_ldexph: {
18871 // The raw instruction has a different behavior for out of bounds exponent
18872 // values (implicit truncation instead of saturate to short_min/short_max).
18873 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18874 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18875 llvm::Function *F =
18876 CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
18877 return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
18878 }
18879 case AMDGPU::BI__builtin_amdgcn_frexp_mant:
18880 case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
18881 case AMDGPU::BI__builtin_amdgcn_frexp_manth:
18882 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18883 Intrinsic::amdgcn_frexp_mant);
18884 case AMDGPU::BI__builtin_amdgcn_frexp_exp:
18885 case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
18886 Value *Src0 = EmitScalarExpr(E->getArg(0));
18887 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18888 { Builder.getInt32Ty(), Src0->getType() });
18889 return Builder.CreateCall(F, Src0);
18890 }
18891 case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
18892 Value *Src0 = EmitScalarExpr(E->getArg(0));
18893 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
18894 { Builder.getInt16Ty(), Src0->getType() });
18895 return Builder.CreateCall(F, Src0);
18896 }
18897 case AMDGPU::BI__builtin_amdgcn_fract:
18898 case AMDGPU::BI__builtin_amdgcn_fractf:
18899 case AMDGPU::BI__builtin_amdgcn_fracth:
18900 return emitBuiltinWithOneOverloadedType<1>(*this, E,
18901 Intrinsic::amdgcn_fract);
18902 case AMDGPU::BI__builtin_amdgcn_lerp:
18903 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18904 Intrinsic::amdgcn_lerp);
18905 case AMDGPU::BI__builtin_amdgcn_ubfe:
18906 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18907 Intrinsic::amdgcn_ubfe);
18908 case AMDGPU::BI__builtin_amdgcn_sbfe:
18909 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18910 Intrinsic::amdgcn_sbfe);
18911 case AMDGPU::BI__builtin_amdgcn_ballot_w32:
18912 case AMDGPU::BI__builtin_amdgcn_ballot_w64: {
18913 llvm::Type *ResultType = ConvertType(E->getType());
18914 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
18915 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });
18916 return Builder.CreateCall(F, { Src });
18917 }
18918 case AMDGPU::BI__builtin_amdgcn_uicmp:
18919 case AMDGPU::BI__builtin_amdgcn_uicmpl:
18920 case AMDGPU::BI__builtin_amdgcn_sicmp:
18921 case AMDGPU::BI__builtin_amdgcn_sicmpl: {
18922 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18923 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18924 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18925
18926 // FIXME-GFX10: How should 32 bit mask be handled?
18927 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
18928 { Builder.getInt64Ty(), Src0->getType() });
18929 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18930 }
18931 case AMDGPU::BI__builtin_amdgcn_fcmp:
18932 case AMDGPU::BI__builtin_amdgcn_fcmpf: {
18933 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
18934 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
18935 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
18936
18937 // FIXME-GFX10: How should 32 bit mask be handled?
18938 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
18939 { Builder.getInt64Ty(), Src0->getType() });
18940 return Builder.CreateCall(F, { Src0, Src1, Src2 });
18941 }
18942 case AMDGPU::BI__builtin_amdgcn_class:
18943 case AMDGPU::BI__builtin_amdgcn_classf:
18944 case AMDGPU::BI__builtin_amdgcn_classh:
18945 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
18946 case AMDGPU::BI__builtin_amdgcn_fmed3f:
18947 case AMDGPU::BI__builtin_amdgcn_fmed3h:
18948 return emitBuiltinWithOneOverloadedType<3>(*this, E,
18949 Intrinsic::amdgcn_fmed3);
18950 case AMDGPU::BI__builtin_amdgcn_ds_append:
18951 case AMDGPU::BI__builtin_amdgcn_ds_consume: {
18952 Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
18953 Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
18954 Value *Src0 = EmitScalarExpr(E->getArg(0));
18955 Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18956 return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18957 }
18958 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18959 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18960 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18961 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18962 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18963 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18964 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18965 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: {
18966
18967 Intrinsic::ID IID;
18968 switch (BuiltinID) {
18969 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18970 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18971 IID = Intrinsic::amdgcn_global_load_tr_b64;
18972 break;
18973 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
18974 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:
18975 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:
18976 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:
18977 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:
18978 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:
18979 IID = Intrinsic::amdgcn_global_load_tr_b128;
18980 break;
18981 }
18982 llvm::Type *LoadTy = ConvertType(E->getType());
18983 llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18984 llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
18985 return Builder.CreateCall(F, {Addr});
18986 }
18987 case AMDGPU::BI__builtin_amdgcn_get_fpenv: {
18988 Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,
18989 {llvm::Type::getInt64Ty(getLLVMContext())});
18990 return Builder.CreateCall(F);
18991 }
18992 case AMDGPU::BI__builtin_amdgcn_set_fpenv: {
18993 Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,
18994 {llvm::Type::getInt64Ty(getLLVMContext())});
18995 llvm::Value *Env = EmitScalarExpr(E->getArg(0));
18996 return Builder.CreateCall(F, {Env});
18997 }
18998 case AMDGPU::BI__builtin_amdgcn_read_exec:
18999 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);
19000 case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
19001 return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);
19002 case AMDGPU::BI__builtin_amdgcn_read_exec_hi:
19003 return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);
19004 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:
19005 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:
19006 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:
19007 case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {
19008 llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));
19009 llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));
19010 llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));
19011 llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));
19012 llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));
19013 llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));
19014
19015 // The builtins take these arguments as vec4 where the last element is
19016 // ignored. The intrinsic takes them as vec3.
19017 RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,
19018 ArrayRef<int>{0, 1, 2});
19019 RayDir =
19020 Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});
19021 RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,
19022 ArrayRef<int>{0, 1, 2});
19023
19024 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,
19025 {NodePtr->getType(), RayDir->getType()});
19026 return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,
19027 RayInverseDir, TextureDescr});
19028 }
19029
19030 case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {
19032 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19033 Args.push_back(EmitScalarExpr(E->getArg(i)));
19034
19035 Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);
19036 Value *Call = Builder.CreateCall(F, Args);
19037 Value *Rtn = Builder.CreateExtractValue(Call, 0);
19038 Value *A = Builder.CreateExtractValue(Call, 1);
19039 llvm::Type *RetTy = ConvertType(E->getType());
19040 Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,
19041 (uint64_t)0);
19042 return Builder.CreateInsertElement(I0, A, 1);
19043 }
19044
19045 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19046 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19047 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19048 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19049 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19050 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19051 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19052 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19053 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19054 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19055 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19056 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19057 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19058 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19059 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19060 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19061 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19062 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19063 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19064 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19065 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19066 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19067 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19068 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19069 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19070 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19071 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19072 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19073 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19074 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19075 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19076 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19077 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19078 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19079 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19080 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19081 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19082 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19083 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19084 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19085 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19086 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19087 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19088 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19089 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19090 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19091 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19092 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19093 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19094 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19095 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19096 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19097 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19098 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19099 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19100 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19101 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19102 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19103 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19104 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
19105
19106 // These operations perform a matrix multiplication and accumulation of
19107 // the form:
19108 // D = A * B + C
19109 // We need to specify one type for matrices AB and one for matrices CD.
19110 // Sparse matrix operations can have different types for A and B as well as
19111 // an additional type for sparsity index.
19112 // Destination type should be put before types used for source operands.
19113 SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
19114 // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
19115 // There is no need for the variable opsel argument, so always set it to
19116 // "false".
19117 bool AppendFalseForOpselArg = false;
19118 unsigned BuiltinWMMAOp;
19119
19120 switch (BuiltinID) {
19121 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
19122 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
19123 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
19124 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
19125 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19126 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
19127 break;
19128 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
19129 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
19130 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
19131 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
19132 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19133 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
19134 break;
19135 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
19136 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
19137 AppendFalseForOpselArg = true;
19138 [[fallthrough]];
19139 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
19140 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
19141 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19142 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
19143 break;
19144 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
19145 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
19146 AppendFalseForOpselArg = true;
19147 [[fallthrough]];
19148 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
19149 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
19150 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19151 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
19152 break;
19153 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
19154 case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
19155 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19156 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
19157 break;
19158 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
19159 case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
19160 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19161 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
19162 break;
19163 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
19164 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
19165 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
19166 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
19167 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19168 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
19169 break;
19170 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
19171 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
19172 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
19173 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
19174 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19175 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
19176 break;
19177 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
19178 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
19179 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19180 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
19181 break;
19182 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
19183 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
19184 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19185 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
19186 break;
19187 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
19188 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
19189 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19190 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
19191 break;
19192 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
19193 case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
19194 ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
19195 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
19196 break;
19197 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
19198 case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
19199 ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
19200 BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
19201 break;
19202 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
19203 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
19204 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19205 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
19206 break;
19207 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
19208 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
19209 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19210 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
19211 break;
19212 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
19213 case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
19214 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19215 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
19216 break;
19217 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
19218 case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
19219 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19220 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
19221 break;
19222 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
19223 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
19224 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19225 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
19226 break;
19227 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
19228 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
19229 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19230 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
19231 break;
19232 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
19233 case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
19234 ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
19235 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
19236 break;
19237 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
19238 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
19239 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19240 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
19241 break;
19242 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
19243 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
19244 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19245 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
19246 break;
19247 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
19248 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
19249 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19250 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
19251 break;
19252 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
19253 case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
19254 ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
19255 BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
19256 break;
19257 }
19258
19260 for (int i = 0, e = E->getNumArgs(); i != e; ++i)
19261 Args.push_back(EmitScalarExpr(E->getArg(i)));
19262 if (AppendFalseForOpselArg)
19263 Args.push_back(Builder.getFalse());
19264
19266 for (auto ArgIdx : ArgsForMatchingMatrixTypes)
19267 ArgTypes.push_back(Args[ArgIdx]->getType());
19268
19269 Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);
19270 return Builder.CreateCall(F, Args);
19271 }
19272
19273 // amdgcn workitem
19274 case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
19275 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
19276 case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
19277 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
19278 case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
19279 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
19280
19281 // amdgcn workgroup size
19282 case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:
19283 return EmitAMDGPUWorkGroupSize(*this, 0);
19284 case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:
19285 return EmitAMDGPUWorkGroupSize(*this, 1);
19286 case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:
19287 return EmitAMDGPUWorkGroupSize(*this, 2);
19288
19289 // amdgcn grid size
19290 case AMDGPU::BI__builtin_amdgcn_grid_size_x:
19291 return EmitAMDGPUGridSize(*this, 0);
19292 case AMDGPU::BI__builtin_amdgcn_grid_size_y:
19293 return EmitAMDGPUGridSize(*this, 1);
19294 case AMDGPU::BI__builtin_amdgcn_grid_size_z:
19295 return EmitAMDGPUGridSize(*this, 2);
19296
19297 // r600 intrinsics
19298 case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
19299 case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
19300 return emitBuiltinWithOneOverloadedType<1>(*this, E,
19301 Intrinsic::r600_recipsqrt_ieee);
19302 case AMDGPU::BI__builtin_r600_read_tidig_x:
19303 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
19304 case AMDGPU::BI__builtin_r600_read_tidig_y:
19305 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
19306 case AMDGPU::BI__builtin_r600_read_tidig_z:
19307 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
19308 case AMDGPU::BI__builtin_amdgcn_alignbit: {
19309 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
19310 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
19311 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
19312 Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());
19313 return Builder.CreateCall(F, { Src0, Src1, Src2 });
19314 }
19315 case AMDGPU::BI__builtin_amdgcn_fence: {
19317 EmitScalarExpr(E->getArg(1)), AO, SSID);
19318 FenceInst *Fence = Builder.CreateFence(AO, SSID);
19319 if (E->getNumArgs() > 2)
19321 return Fence;
19322 }
19323 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19324 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19325 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19326 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19327 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19328 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19329 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19330 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19331 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19332 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19333 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
19334 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
19335 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
19336 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19337 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19338 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19339 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19340 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19341 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19342 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
19343 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
19344 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
19345 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: {
19346 llvm::AtomicRMWInst::BinOp BinOp;
19347 switch (BuiltinID) {
19348 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19349 case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19350 BinOp = llvm::AtomicRMWInst::UIncWrap;
19351 break;
19352 case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19353 case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19354 BinOp = llvm::AtomicRMWInst::UDecWrap;
19355 break;
19356 case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19357 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19358 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19359 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19360 case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19361 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
19362 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
19363 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
19364 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
19365 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19366 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19367 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19368 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
19369 BinOp = llvm::AtomicRMWInst::FAdd;
19370 break;
19371 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
19372 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:
19373 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:
19374 BinOp = llvm::AtomicRMWInst::FMin;
19375 break;
19376 case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:
19377 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:
19378 case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
19379 BinOp = llvm::AtomicRMWInst::FMax;
19380 break;
19381 }
19382
19383 Address Ptr = CheckAtomicAlignment(*this, E);
19384 Value *Val = EmitScalarExpr(E->getArg(1));
19385 llvm::Type *OrigTy = Val->getType();
19386 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19387
19388 bool Volatile;
19389
19390 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
19391 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
19392 BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
19393 // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
19394 Volatile =
19395 cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
19396 } else {
19397 // Infer volatile from the passed type.
19398 Volatile =
19400 }
19401
19402 if (E->getNumArgs() >= 4) {
19403 // Some of the builtins have explicit ordering and scope arguments.
19405 EmitScalarExpr(E->getArg(3)), AO, SSID);
19406 } else {
19407 // Most of the builtins do not have syncscope/order arguments. For DS
19408 // atomics the scope doesn't really matter, as they implicitly operate at
19409 // workgroup scope.
19410 //
19411 // The global/flat cases need to use agent scope to consistently produce
19412 // the native instruction instead of a cmpxchg expansion.
19413 SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
19414 AO = AtomicOrdering::Monotonic;
19415
19416 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19417 if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
19418 BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
19419 BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
19420 llvm::Type *V2BF16Ty = FixedVectorType::get(
19421 llvm::Type::getBFloatTy(Builder.getContext()), 2);
19422 Val = Builder.CreateBitCast(Val, V2BF16Ty);
19423 }
19424 }
19425
19426 llvm::AtomicRMWInst *RMW =
19427 Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19428 if (Volatile)
19429 RMW->setVolatile(true);
19430
19431 unsigned AddrSpace = Ptr.getType()->getAddressSpace();
19432 if (AddrSpace != llvm::AMDGPUAS::LOCAL_ADDRESS) {
19433 // Most targets require "amdgpu.no.fine.grained.memory" to emit the native
19434 // instruction for flat and global operations.
19435 llvm::MDTuple *EmptyMD = MDNode::get(getLLVMContext(), {});
19436 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
19437
19438 // Most targets require "amdgpu.ignore.denormal.mode" to emit the native
19439 // instruction, but this only matters for float fadd.
19440 if (BinOp == llvm::AtomicRMWInst::FAdd && Val->getType()->isFloatTy())
19441 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
19442 }
19443
19444 return Builder.CreateBitCast(RMW, OrigTy);
19445 }
19446 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19447 case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
19448 llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
19449 llvm::Type *ResultType = ConvertType(E->getType());
19450 // s_sendmsg_rtn is mangled using return type only.
19451 Function *F =
19452 CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
19453 return Builder.CreateCall(F, {Arg});
19454 }
19455 case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:
19456 return emitBuiltinWithOneOverloadedType<4>(
19457 *this, E, Intrinsic::amdgcn_make_buffer_rsrc);
19458 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:
19459 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:
19460 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:
19461 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:
19462 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:
19463 case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
19464 return emitBuiltinWithOneOverloadedType<5>(
19465 *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
19466 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19467 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19468 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19469 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19470 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19471 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
19472 llvm::Type *RetTy = nullptr;
19473 switch (BuiltinID) {
19474 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19475 RetTy = Int8Ty;
19476 break;
19477 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19478 RetTy = Int16Ty;
19479 break;
19480 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19481 RetTy = Int32Ty;
19482 break;
19483 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19484 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
19485 break;
19486 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19487 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
19488 break;
19489 case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
19490 RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
19491 break;
19492 }
19493 Function *F =
19494 CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
19495 return Builder.CreateCall(
19496 F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
19497 EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
19498 }
19499 default:
19500 return nullptr;
19501 }
19502}
19503
19504/// Handle a SystemZ function in which the final argument is a pointer
19505/// to an int that receives the post-instruction CC value. At the LLVM level
19506/// this is represented as a function that returns a {result, cc} pair.
19508 unsigned IntrinsicID,
19509 const CallExpr *E) {
19510 unsigned NumArgs = E->getNumArgs() - 1;
19511 SmallVector<Value *, 8> Args(NumArgs);
19512 for (unsigned I = 0; I < NumArgs; ++I)
19513 Args[I] = CGF.EmitScalarExpr(E->getArg(I));
19514 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
19515 Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
19516 Value *Call = CGF.Builder.CreateCall(F, Args);
19517 Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
19518 CGF.Builder.CreateStore(CC, CCPtr);
19519 return CGF.Builder.CreateExtractValue(Call, 0);
19520}
19521
19523 const CallExpr *E) {
19524 switch (BuiltinID) {
19525 case SystemZ::BI__builtin_tbegin: {
19526 Value *TDB = EmitScalarExpr(E->getArg(0));
19527 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19528 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
19529 return Builder.CreateCall(F, {TDB, Control});
19530 }
19531 case SystemZ::BI__builtin_tbegin_nofloat: {
19532 Value *TDB = EmitScalarExpr(E->getArg(0));
19533 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
19534 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
19535 return Builder.CreateCall(F, {TDB, Control});
19536 }
19537 case SystemZ::BI__builtin_tbeginc: {
19538 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
19539 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
19540 Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
19541 return Builder.CreateCall(F, {TDB, Control});
19542 }
19543 case SystemZ::BI__builtin_tabort: {
19544 Value *Data = EmitScalarExpr(E->getArg(0));
19545 Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
19546 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
19547 }
19548 case SystemZ::BI__builtin_non_tx_store: {
19549 Value *Address = EmitScalarExpr(E->getArg(0));
19550 Value *Data = EmitScalarExpr(E->getArg(1));
19551 Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
19552 return Builder.CreateCall(F, {Data, Address});
19553 }
19554
19555 // Vector builtins. Note that most vector builtins are mapped automatically
19556 // to target-specific LLVM intrinsics. The ones handled specially here can
19557 // be represented via standard LLVM IR, which is preferable to enable common
19558 // LLVM optimizations.
19559
19560 case SystemZ::BI__builtin_s390_vpopctb:
19561 case SystemZ::BI__builtin_s390_vpopcth:
19562 case SystemZ::BI__builtin_s390_vpopctf:
19563 case SystemZ::BI__builtin_s390_vpopctg: {
19564 llvm::Type *ResultType = ConvertType(E->getType());
19565 Value *X = EmitScalarExpr(E->getArg(0));
19566 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
19567 return Builder.CreateCall(F, X);
19568 }
19569
19570 case SystemZ::BI__builtin_s390_vclzb:
19571 case SystemZ::BI__builtin_s390_vclzh:
19572 case SystemZ::BI__builtin_s390_vclzf:
19573 case SystemZ::BI__builtin_s390_vclzg: {
19574 llvm::Type *ResultType = ConvertType(E->getType());
19575 Value *X = EmitScalarExpr(E->getArg(0));
19576 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19577 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
19578 return Builder.CreateCall(F, {X, Undef});
19579 }
19580
19581 case SystemZ::BI__builtin_s390_vctzb:
19582 case SystemZ::BI__builtin_s390_vctzh:
19583 case SystemZ::BI__builtin_s390_vctzf:
19584 case SystemZ::BI__builtin_s390_vctzg: {
19585 llvm::Type *ResultType = ConvertType(E->getType());
19586 Value *X = EmitScalarExpr(E->getArg(0));
19587 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
19588 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
19589 return Builder.CreateCall(F, {X, Undef});
19590 }
19591
19592 case SystemZ::BI__builtin_s390_verllb:
19593 case SystemZ::BI__builtin_s390_verllh:
19594 case SystemZ::BI__builtin_s390_verllf:
19595 case SystemZ::BI__builtin_s390_verllg: {
19596 llvm::Type *ResultType = ConvertType(E->getType());
19597 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19598 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19599 // Splat scalar rotate amount to vector type.
19600 unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
19601 Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
19602 Amt = Builder.CreateVectorSplat(NumElts, Amt);
19603 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19604 return Builder.CreateCall(F, { Src, Src, Amt });
19605 }
19606
19607 case SystemZ::BI__builtin_s390_verllvb:
19608 case SystemZ::BI__builtin_s390_verllvh:
19609 case SystemZ::BI__builtin_s390_verllvf:
19610 case SystemZ::BI__builtin_s390_verllvg: {
19611 llvm::Type *ResultType = ConvertType(E->getType());
19612 llvm::Value *Src = EmitScalarExpr(E->getArg(0));
19613 llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
19614 Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
19615 return Builder.CreateCall(F, { Src, Src, Amt });
19616 }
19617
19618 case SystemZ::BI__builtin_s390_vfsqsb:
19619 case SystemZ::BI__builtin_s390_vfsqdb: {
19620 llvm::Type *ResultType = ConvertType(E->getType());
19621 Value *X = EmitScalarExpr(E->getArg(0));
19622 if (Builder.getIsFPConstrained()) {
19623 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
19624 return Builder.CreateConstrainedFPCall(F, { X });
19625 } else {
19626 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
19627 return Builder.CreateCall(F, X);
19628 }
19629 }
19630 case SystemZ::BI__builtin_s390_vfmasb:
19631 case SystemZ::BI__builtin_s390_vfmadb: {
19632 llvm::Type *ResultType = ConvertType(E->getType());
19633 Value *X = EmitScalarExpr(E->getArg(0));
19634 Value *Y = EmitScalarExpr(E->getArg(1));
19635 Value *Z = EmitScalarExpr(E->getArg(2));
19636 if (Builder.getIsFPConstrained()) {
19637 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19638 return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
19639 } else {
19640 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19641 return Builder.CreateCall(F, {X, Y, Z});
19642 }
19643 }
19644 case SystemZ::BI__builtin_s390_vfmssb:
19645 case SystemZ::BI__builtin_s390_vfmsdb: {
19646 llvm::Type *ResultType = ConvertType(E->getType());
19647 Value *X = EmitScalarExpr(E->getArg(0));
19648 Value *Y = EmitScalarExpr(E->getArg(1));
19649 Value *Z = EmitScalarExpr(E->getArg(2));
19650 if (Builder.getIsFPConstrained()) {
19651 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19652 return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19653 } else {
19654 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19655 return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
19656 }
19657 }
19658 case SystemZ::BI__builtin_s390_vfnmasb:
19659 case SystemZ::BI__builtin_s390_vfnmadb: {
19660 llvm::Type *ResultType = ConvertType(E->getType());
19661 Value *X = EmitScalarExpr(E->getArg(0));
19662 Value *Y = EmitScalarExpr(E->getArg(1));
19663 Value *Z = EmitScalarExpr(E->getArg(2));
19664 if (Builder.getIsFPConstrained()) {
19665 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19666 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
19667 } else {
19668 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19669 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
19670 }
19671 }
19672 case SystemZ::BI__builtin_s390_vfnmssb:
19673 case SystemZ::BI__builtin_s390_vfnmsdb: {
19674 llvm::Type *ResultType = ConvertType(E->getType());
19675 Value *X = EmitScalarExpr(E->getArg(0));
19676 Value *Y = EmitScalarExpr(E->getArg(1));
19677 Value *Z = EmitScalarExpr(E->getArg(2));
19678 if (Builder.getIsFPConstrained()) {
19679 Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
19680 Value *NegZ = Builder.CreateFNeg(Z, "sub");
19681 return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
19682 } else {
19683 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
19684 Value *NegZ = Builder.CreateFNeg(Z, "neg");
19685 return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
19686 }
19687 }
19688 case SystemZ::BI__builtin_s390_vflpsb:
19689 case SystemZ::BI__builtin_s390_vflpdb: {
19690 llvm::Type *ResultType = ConvertType(E->getType());
19691 Value *X = EmitScalarExpr(E->getArg(0));
19692 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19693 return Builder.CreateCall(F, X);
19694 }
19695 case SystemZ::BI__builtin_s390_vflnsb:
19696 case SystemZ::BI__builtin_s390_vflndb: {
19697 llvm::Type *ResultType = ConvertType(E->getType());
19698 Value *X = EmitScalarExpr(E->getArg(0));
19699 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
19700 return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");
19701 }
19702 case SystemZ::BI__builtin_s390_vfisb:
19703 case SystemZ::BI__builtin_s390_vfidb: {
19704 llvm::Type *ResultType = ConvertType(E->getType());
19705 Value *X = EmitScalarExpr(E->getArg(0));
19706 // Constant-fold the M4 and M5 mask arguments.
19707 llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
19708 llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19709 // Check whether this instance can be represented via a LLVM standard
19710 // intrinsic. We only support some combinations of M4 and M5.
19711 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19712 Intrinsic::ID CI;
19713 switch (M4.getZExtValue()) {
19714 default: break;
19715 case 0: // IEEE-inexact exception allowed
19716 switch (M5.getZExtValue()) {
19717 default: break;
19718 case 0: ID = Intrinsic::rint;
19719 CI = Intrinsic::experimental_constrained_rint; break;
19720 }
19721 break;
19722 case 4: // IEEE-inexact exception suppressed
19723 switch (M5.getZExtValue()) {
19724 default: break;
19725 case 0: ID = Intrinsic::nearbyint;
19726 CI = Intrinsic::experimental_constrained_nearbyint; break;
19727 case 1: ID = Intrinsic::round;
19728 CI = Intrinsic::experimental_constrained_round; break;
19729 case 5: ID = Intrinsic::trunc;
19730 CI = Intrinsic::experimental_constrained_trunc; break;
19731 case 6: ID = Intrinsic::ceil;
19732 CI = Intrinsic::experimental_constrained_ceil; break;
19733 case 7: ID = Intrinsic::floor;
19734 CI = Intrinsic::experimental_constrained_floor; break;
19735 }
19736 break;
19737 }
19738 if (ID != Intrinsic::not_intrinsic) {
19739 if (Builder.getIsFPConstrained()) {
19740 Function *F = CGM.getIntrinsic(CI, ResultType);
19741 return Builder.CreateConstrainedFPCall(F, X);
19742 } else {
19743 Function *F = CGM.getIntrinsic(ID, ResultType);
19744 return Builder.CreateCall(F, X);
19745 }
19746 }
19747 switch (BuiltinID) { // FIXME: constrained version?
19748 case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
19749 case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
19750 default: llvm_unreachable("Unknown BuiltinID");
19751 }
19752 Function *F = CGM.getIntrinsic(ID);
19753 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19754 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
19755 return Builder.CreateCall(F, {X, M4Value, M5Value});
19756 }
19757 case SystemZ::BI__builtin_s390_vfmaxsb:
19758 case SystemZ::BI__builtin_s390_vfmaxdb: {
19759 llvm::Type *ResultType = ConvertType(E->getType());
19760 Value *X = EmitScalarExpr(E->getArg(0));
19761 Value *Y = EmitScalarExpr(E->getArg(1));
19762 // Constant-fold the M4 mask argument.
19763 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19764 // Check whether this instance can be represented via a LLVM standard
19765 // intrinsic. We only support some values of M4.
19766 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19767 Intrinsic::ID CI;
19768 switch (M4.getZExtValue()) {
19769 default: break;
19770 case 4: ID = Intrinsic::maxnum;
19771 CI = Intrinsic::experimental_constrained_maxnum; break;
19772 }
19773 if (ID != Intrinsic::not_intrinsic) {
19774 if (Builder.getIsFPConstrained()) {
19775 Function *F = CGM.getIntrinsic(CI, ResultType);
19776 return Builder.CreateConstrainedFPCall(F, {X, Y});
19777 } else {
19778 Function *F = CGM.getIntrinsic(ID, ResultType);
19779 return Builder.CreateCall(F, {X, Y});
19780 }
19781 }
19782 switch (BuiltinID) {
19783 case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
19784 case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
19785 default: llvm_unreachable("Unknown BuiltinID");
19786 }
19787 Function *F = CGM.getIntrinsic(ID);
19788 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19789 return Builder.CreateCall(F, {X, Y, M4Value});
19790 }
19791 case SystemZ::BI__builtin_s390_vfminsb:
19792 case SystemZ::BI__builtin_s390_vfmindb: {
19793 llvm::Type *ResultType = ConvertType(E->getType());
19794 Value *X = EmitScalarExpr(E->getArg(0));
19795 Value *Y = EmitScalarExpr(E->getArg(1));
19796 // Constant-fold the M4 mask argument.
19797 llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
19798 // Check whether this instance can be represented via a LLVM standard
19799 // intrinsic. We only support some values of M4.
19800 Intrinsic::ID ID = Intrinsic::not_intrinsic;
19801 Intrinsic::ID CI;
19802 switch (M4.getZExtValue()) {
19803 default: break;
19804 case 4: ID = Intrinsic::minnum;
19805 CI = Intrinsic::experimental_constrained_minnum; break;
19806 }
19807 if (ID != Intrinsic::not_intrinsic) {
19808 if (Builder.getIsFPConstrained()) {
19809 Function *F = CGM.getIntrinsic(CI, ResultType);
19810 return Builder.CreateConstrainedFPCall(F, {X, Y});
19811 } else {
19812 Function *F = CGM.getIntrinsic(ID, ResultType);
19813 return Builder.CreateCall(F, {X, Y});
19814 }
19815 }
19816 switch (BuiltinID) {
19817 case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
19818 case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
19819 default: llvm_unreachable("Unknown BuiltinID");
19820 }
19821 Function *F = CGM.getIntrinsic(ID);
19822 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
19823 return Builder.CreateCall(F, {X, Y, M4Value});
19824 }
19825
19826 case SystemZ::BI__builtin_s390_vlbrh:
19827 case SystemZ::BI__builtin_s390_vlbrf:
19828 case SystemZ::BI__builtin_s390_vlbrg: {
19829 llvm::Type *ResultType = ConvertType(E->getType());
19830 Value *X = EmitScalarExpr(E->getArg(0));
19831 Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
19832 return Builder.CreateCall(F, X);
19833 }
19834
19835 // Vector intrinsics that output the post-instruction CC value.
19836
19837#define INTRINSIC_WITH_CC(NAME) \
19838 case SystemZ::BI__builtin_##NAME: \
19839 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
19840
19841 INTRINSIC_WITH_CC(s390_vpkshs);
19842 INTRINSIC_WITH_CC(s390_vpksfs);
19843 INTRINSIC_WITH_CC(s390_vpksgs);
19844
19845 INTRINSIC_WITH_CC(s390_vpklshs);
19846 INTRINSIC_WITH_CC(s390_vpklsfs);
19847 INTRINSIC_WITH_CC(s390_vpklsgs);
19848
19849 INTRINSIC_WITH_CC(s390_vceqbs);
19850 INTRINSIC_WITH_CC(s390_vceqhs);
19851 INTRINSIC_WITH_CC(s390_vceqfs);
19852 INTRINSIC_WITH_CC(s390_vceqgs);
19853
19854 INTRINSIC_WITH_CC(s390_vchbs);
19855 INTRINSIC_WITH_CC(s390_vchhs);
19856 INTRINSIC_WITH_CC(s390_vchfs);
19857 INTRINSIC_WITH_CC(s390_vchgs);
19858
19859 INTRINSIC_WITH_CC(s390_vchlbs);
19860 INTRINSIC_WITH_CC(s390_vchlhs);
19861 INTRINSIC_WITH_CC(s390_vchlfs);
19862 INTRINSIC_WITH_CC(s390_vchlgs);
19863
19864 INTRINSIC_WITH_CC(s390_vfaebs);
19865 INTRINSIC_WITH_CC(s390_vfaehs);
19866 INTRINSIC_WITH_CC(s390_vfaefs);
19867
19868 INTRINSIC_WITH_CC(s390_vfaezbs);
19869 INTRINSIC_WITH_CC(s390_vfaezhs);
19870 INTRINSIC_WITH_CC(s390_vfaezfs);
19871
19872 INTRINSIC_WITH_CC(s390_vfeebs);
19873 INTRINSIC_WITH_CC(s390_vfeehs);
19874 INTRINSIC_WITH_CC(s390_vfeefs);
19875
19876 INTRINSIC_WITH_CC(s390_vfeezbs);
19877 INTRINSIC_WITH_CC(s390_vfeezhs);
19878 INTRINSIC_WITH_CC(s390_vfeezfs);
19879
19880 INTRINSIC_WITH_CC(s390_vfenebs);
19881 INTRINSIC_WITH_CC(s390_vfenehs);
19882 INTRINSIC_WITH_CC(s390_vfenefs);
19883
19884 INTRINSIC_WITH_CC(s390_vfenezbs);
19885 INTRINSIC_WITH_CC(s390_vfenezhs);
19886 INTRINSIC_WITH_CC(s390_vfenezfs);
19887
19888 INTRINSIC_WITH_CC(s390_vistrbs);
19889 INTRINSIC_WITH_CC(s390_vistrhs);
19890 INTRINSIC_WITH_CC(s390_vistrfs);
19891
19892 INTRINSIC_WITH_CC(s390_vstrcbs);
19893 INTRINSIC_WITH_CC(s390_vstrchs);
19894 INTRINSIC_WITH_CC(s390_vstrcfs);
19895
19896 INTRINSIC_WITH_CC(s390_vstrczbs);
19897 INTRINSIC_WITH_CC(s390_vstrczhs);
19898 INTRINSIC_WITH_CC(s390_vstrczfs);
19899
19900 INTRINSIC_WITH_CC(s390_vfcesbs);
19901 INTRINSIC_WITH_CC(s390_vfcedbs);
19902 INTRINSIC_WITH_CC(s390_vfchsbs);
19903 INTRINSIC_WITH_CC(s390_vfchdbs);
19904 INTRINSIC_WITH_CC(s390_vfchesbs);
19905 INTRINSIC_WITH_CC(s390_vfchedbs);
19906
19907 INTRINSIC_WITH_CC(s390_vftcisb);
19908 INTRINSIC_WITH_CC(s390_vftcidb);
19909
19910 INTRINSIC_WITH_CC(s390_vstrsb);
19911 INTRINSIC_WITH_CC(s390_vstrsh);
19912 INTRINSIC_WITH_CC(s390_vstrsf);
19913
19914 INTRINSIC_WITH_CC(s390_vstrszb);
19915 INTRINSIC_WITH_CC(s390_vstrszh);
19916 INTRINSIC_WITH_CC(s390_vstrszf);
19917
19918#undef INTRINSIC_WITH_CC
19919
19920 default:
19921 return nullptr;
19922 }
19923}
19924
19925namespace {
19926// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
19927struct NVPTXMmaLdstInfo {
19928 unsigned NumResults; // Number of elements to load/store
19929 // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
19930 unsigned IID_col;
19931 unsigned IID_row;
19932};
19933
19934#define MMA_INTR(geom_op_type, layout) \
19935 Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
19936#define MMA_LDST(n, geom_op_type) \
19937 { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
19938
19939static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
19940 switch (BuiltinID) {
19941 // FP MMA loads
19942 case NVPTX::BI__hmma_m16n16k16_ld_a:
19943 return MMA_LDST(8, m16n16k16_load_a_f16);
19944 case NVPTX::BI__hmma_m16n16k16_ld_b:
19945 return MMA_LDST(8, m16n16k16_load_b_f16);
19946 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
19947 return MMA_LDST(4, m16n16k16_load_c_f16);
19948 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
19949 return MMA_LDST(8, m16n16k16_load_c_f32);
19950 case NVPTX::BI__hmma_m32n8k16_ld_a:
19951 return MMA_LDST(8, m32n8k16_load_a_f16);
19952 case NVPTX::BI__hmma_m32n8k16_ld_b:
19953 return MMA_LDST(8, m32n8k16_load_b_f16);
19954 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
19955 return MMA_LDST(4, m32n8k16_load_c_f16);
19956 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
19957 return MMA_LDST(8, m32n8k16_load_c_f32);
19958 case NVPTX::BI__hmma_m8n32k16_ld_a:
19959 return MMA_LDST(8, m8n32k16_load_a_f16);
19960 case NVPTX::BI__hmma_m8n32k16_ld_b:
19961 return MMA_LDST(8, m8n32k16_load_b_f16);
19962 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
19963 return MMA_LDST(4, m8n32k16_load_c_f16);
19964 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
19965 return MMA_LDST(8, m8n32k16_load_c_f32);
19966
19967 // Integer MMA loads
19968 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
19969 return MMA_LDST(2, m16n16k16_load_a_s8);
19970 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
19971 return MMA_LDST(2, m16n16k16_load_a_u8);
19972 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
19973 return MMA_LDST(2, m16n16k16_load_b_s8);
19974 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
19975 return MMA_LDST(2, m16n16k16_load_b_u8);
19976 case NVPTX::BI__imma_m16n16k16_ld_c:
19977 return MMA_LDST(8, m16n16k16_load_c_s32);
19978 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
19979 return MMA_LDST(4, m32n8k16_load_a_s8);
19980 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
19981 return MMA_LDST(4, m32n8k16_load_a_u8);
19982 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
19983 return MMA_LDST(1, m32n8k16_load_b_s8);
19984 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
19985 return MMA_LDST(1, m32n8k16_load_b_u8);
19986 case NVPTX::BI__imma_m32n8k16_ld_c:
19987 return MMA_LDST(8, m32n8k16_load_c_s32);
19988 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
19989 return MMA_LDST(1, m8n32k16_load_a_s8);
19990 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
19991 return MMA_LDST(1, m8n32k16_load_a_u8);
19992 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
19993 return MMA_LDST(4, m8n32k16_load_b_s8);
19994 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
19995 return MMA_LDST(4, m8n32k16_load_b_u8);
19996 case NVPTX::BI__imma_m8n32k16_ld_c:
19997 return MMA_LDST(8, m8n32k16_load_c_s32);
19998
19999 // Sub-integer MMA loads.
20000 // Only row/col layout is supported by A/B fragments.
20001 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20002 return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
20003 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20004 return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
20005 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20006 return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
20007 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20008 return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
20009 case NVPTX::BI__imma_m8n8k32_ld_c:
20010 return MMA_LDST(2, m8n8k32_load_c_s32);
20011 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20012 return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
20013 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20014 return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
20015 case NVPTX::BI__bmma_m8n8k128_ld_c:
20016 return MMA_LDST(2, m8n8k128_load_c_s32);
20017
20018 // Double MMA loads
20019 case NVPTX::BI__dmma_m8n8k4_ld_a:
20020 return MMA_LDST(1, m8n8k4_load_a_f64);
20021 case NVPTX::BI__dmma_m8n8k4_ld_b:
20022 return MMA_LDST(1, m8n8k4_load_b_f64);
20023 case NVPTX::BI__dmma_m8n8k4_ld_c:
20024 return MMA_LDST(2, m8n8k4_load_c_f64);
20025
20026 // Alternate float MMA loads
20027 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20028 return MMA_LDST(4, m16n16k16_load_a_bf16);
20029 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20030 return MMA_LDST(4, m16n16k16_load_b_bf16);
20031 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20032 return MMA_LDST(2, m8n32k16_load_a_bf16);
20033 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20034 return MMA_LDST(8, m8n32k16_load_b_bf16);
20035 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20036 return MMA_LDST(8, m32n8k16_load_a_bf16);
20037 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20038 return MMA_LDST(2, m32n8k16_load_b_bf16);
20039 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20040 return MMA_LDST(4, m16n16k8_load_a_tf32);
20041 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20042 return MMA_LDST(4, m16n16k8_load_b_tf32);
20043 case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
20044 return MMA_LDST(8, m16n16k8_load_c_f32);
20045
20046 // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
20047 // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
20048 // use fragment C for both loads and stores.
20049 // FP MMA stores.
20050 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20051 return MMA_LDST(4, m16n16k16_store_d_f16);
20052 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20053 return MMA_LDST(8, m16n16k16_store_d_f32);
20054 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20055 return MMA_LDST(4, m32n8k16_store_d_f16);
20056 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20057 return MMA_LDST(8, m32n8k16_store_d_f32);
20058 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20059 return MMA_LDST(4, m8n32k16_store_d_f16);
20060 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20061 return MMA_LDST(8, m8n32k16_store_d_f32);
20062
20063 // Integer and sub-integer MMA stores.
20064 // Another naming quirk. Unlike other MMA builtins that use PTX types in the
20065 // name, integer loads/stores use LLVM's i32.
20066 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20067 return MMA_LDST(8, m16n16k16_store_d_s32);
20068 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20069 return MMA_LDST(8, m32n8k16_store_d_s32);
20070 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20071 return MMA_LDST(8, m8n32k16_store_d_s32);
20072 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20073 return MMA_LDST(2, m8n8k32_store_d_s32);
20074 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20075 return MMA_LDST(2, m8n8k128_store_d_s32);
20076
20077 // Double MMA store
20078 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20079 return MMA_LDST(2, m8n8k4_store_d_f64);
20080
20081 // Alternate float MMA store
20082 case NVPTX::BI__mma_m16n16k8_st_c_f32:
20083 return MMA_LDST(8, m16n16k8_store_d_f32);
20084
20085 default:
20086 llvm_unreachable("Unknown MMA builtin");
20087 }
20088}
20089#undef MMA_LDST
20090#undef MMA_INTR
20091
20092
20093struct NVPTXMmaInfo {
20094 unsigned NumEltsA;
20095 unsigned NumEltsB;
20096 unsigned NumEltsC;
20097 unsigned NumEltsD;
20098
20099 // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority
20100 // over 'col' for layout. The index of non-satf variants is expected to match
20101 // the undocumented layout constants used by CUDA's mma.hpp.
20102 std::array<unsigned, 8> Variants;
20103
20104 unsigned getMMAIntrinsic(int Layout, bool Satf) {
20105 unsigned Index = Layout + 4 * Satf;
20106 if (Index >= Variants.size())
20107 return 0;
20108 return Variants[Index];
20109 }
20110};
20111
20112 // Returns an intrinsic that matches Layout and Satf for valid combinations of
20113 // Layout and Satf, 0 otherwise.
20114static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
20115 // clang-format off
20116#define MMA_VARIANTS(geom, type) \
20117 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
20118 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
20119 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
20120 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type
20121#define MMA_SATF_VARIANTS(geom, type) \
20122 MMA_VARIANTS(geom, type), \
20123 Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
20124 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
20125 Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
20126 Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite
20127// Sub-integer MMA only supports row.col layout.
20128#define MMA_VARIANTS_I4(geom, type) \
20129 0, \
20130 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
20131 0, \
20132 0, \
20133 0, \
20134 Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
20135 0, \
20136 0
20137// b1 MMA does not support .satfinite.
20138#define MMA_VARIANTS_B1_XOR(geom, type) \
20139 0, \
20140 Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \
20141 0, \
20142 0, \
20143 0, \
20144 0, \
20145 0, \
20146 0
20147#define MMA_VARIANTS_B1_AND(geom, type) \
20148 0, \
20149 Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \
20150 0, \
20151 0, \
20152 0, \
20153 0, \
20154 0, \
20155 0
20156 // clang-format on
20157 switch (BuiltinID) {
20158 // FP MMA
20159 // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while
20160 // NumEltsN of return value are ordered as A,B,C,D.
20161 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20162 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};
20163 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20164 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};
20165 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20166 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};
20167 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20168 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};
20169 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20170 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};
20171 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20172 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};
20173 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20174 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};
20175 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20176 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};
20177 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20178 return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};
20179 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20180 return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};
20181 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20182 return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};
20183 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20184 return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};
20185
20186 // Integer MMA
20187 case NVPTX::BI__imma_m16n16k16_mma_s8:
20188 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};
20189 case NVPTX::BI__imma_m16n16k16_mma_u8:
20190 return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};
20191 case NVPTX::BI__imma_m32n8k16_mma_s8:
20192 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};
20193 case NVPTX::BI__imma_m32n8k16_mma_u8:
20194 return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};
20195 case NVPTX::BI__imma_m8n32k16_mma_s8:
20196 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};
20197 case NVPTX::BI__imma_m8n32k16_mma_u8:
20198 return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};
20199
20200 // Sub-integer MMA
20201 case NVPTX::BI__imma_m8n8k32_mma_s4:
20202 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};
20203 case NVPTX::BI__imma_m8n8k32_mma_u4:
20204 return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};
20205 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20206 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};
20207 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20208 return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};
20209
20210 // Double MMA
20211 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20212 return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};
20213
20214 // Alternate FP MMA
20215 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20216 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};
20217 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20218 return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};
20219 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20220 return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};
20221 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:
20222 return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};
20223 default:
20224 llvm_unreachable("Unexpected builtin ID.");
20225 }
20226#undef MMA_VARIANTS
20227#undef MMA_SATF_VARIANTS
20228#undef MMA_VARIANTS_I4
20229#undef MMA_VARIANTS_B1_AND
20230#undef MMA_VARIANTS_B1_XOR
20231}
20232
20233static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
20234 const CallExpr *E) {
20235 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20236 QualType ArgType = E->getArg(0)->getType();
20238 llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
20239 return CGF.Builder.CreateCall(
20240 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20241 {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
20242}
20243
20244static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
20245 const CallExpr *E) {
20246 Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
20247 llvm::Type *ElemTy =
20248 CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20249 return CGF.Builder.CreateCall(
20250 CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
20251 {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
20252}
20253
20254static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,
20255 CodeGenFunction &CGF, const CallExpr *E,
20256 int SrcSize) {
20257 return E->getNumArgs() == 3
20258 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),
20259 {CGF.EmitScalarExpr(E->getArg(0)),
20260 CGF.EmitScalarExpr(E->getArg(1)),
20261 CGF.EmitScalarExpr(E->getArg(2))})
20262 : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),
20263 {CGF.EmitScalarExpr(E->getArg(0)),
20264 CGF.EmitScalarExpr(E->getArg(1))});
20265}
20266
20267static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
20268 const CallExpr *E, CodeGenFunction &CGF) {
20269 auto &C = CGF.CGM.getContext();
20270 if (!(C.getLangOpts().NativeHalfType ||
20271 !C.getTargetInfo().useFP16ConversionIntrinsics())) {
20272 CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
20273 " requires native half type support.");
20274 return nullptr;
20275 }
20276
20277 if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
20278 IntrinsicID == Intrinsic::nvvm_ldu_global_f)
20279 return MakeLdgLdu(IntrinsicID, CGF, E);
20280
20282 auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
20283 auto *FTy = F->getFunctionType();
20284 unsigned ICEArguments = 0;
20286 C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
20287 assert(Error == ASTContext::GE_None && "Should not codegen an error");
20288 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
20289 assert((ICEArguments & (1 << i)) == 0);
20290 auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
20291 auto *PTy = FTy->getParamType(i);
20292 if (PTy != ArgValue->getType())
20293 ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
20294 Args.push_back(ArgValue);
20295 }
20296
20297 return CGF.Builder.CreateCall(F, Args);
20298}
20299} // namespace
20300
20302 const CallExpr *E) {
20303 switch (BuiltinID) {
20304 case NVPTX::BI__nvvm_atom_add_gen_i:
20305 case NVPTX::BI__nvvm_atom_add_gen_l:
20306 case NVPTX::BI__nvvm_atom_add_gen_ll:
20307 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
20308
20309 case NVPTX::BI__nvvm_atom_sub_gen_i:
20310 case NVPTX::BI__nvvm_atom_sub_gen_l:
20311 case NVPTX::BI__nvvm_atom_sub_gen_ll:
20312 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
20313
20314 case NVPTX::BI__nvvm_atom_and_gen_i:
20315 case NVPTX::BI__nvvm_atom_and_gen_l:
20316 case NVPTX::BI__nvvm_atom_and_gen_ll:
20317 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
20318
20319 case NVPTX::BI__nvvm_atom_or_gen_i:
20320 case NVPTX::BI__nvvm_atom_or_gen_l:
20321 case NVPTX::BI__nvvm_atom_or_gen_ll:
20322 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
20323
20324 case NVPTX::BI__nvvm_atom_xor_gen_i:
20325 case NVPTX::BI__nvvm_atom_xor_gen_l:
20326 case NVPTX::BI__nvvm_atom_xor_gen_ll:
20327 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
20328
20329 case NVPTX::BI__nvvm_atom_xchg_gen_i:
20330 case NVPTX::BI__nvvm_atom_xchg_gen_l:
20331 case NVPTX::BI__nvvm_atom_xchg_gen_ll:
20332 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
20333
20334 case NVPTX::BI__nvvm_atom_max_gen_i:
20335 case NVPTX::BI__nvvm_atom_max_gen_l:
20336 case NVPTX::BI__nvvm_atom_max_gen_ll:
20337 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
20338
20339 case NVPTX::BI__nvvm_atom_max_gen_ui:
20340 case NVPTX::BI__nvvm_atom_max_gen_ul:
20341 case NVPTX::BI__nvvm_atom_max_gen_ull:
20342 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
20343
20344 case NVPTX::BI__nvvm_atom_min_gen_i:
20345 case NVPTX::BI__nvvm_atom_min_gen_l:
20346 case NVPTX::BI__nvvm_atom_min_gen_ll:
20347 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
20348
20349 case NVPTX::BI__nvvm_atom_min_gen_ui:
20350 case NVPTX::BI__nvvm_atom_min_gen_ul:
20351 case NVPTX::BI__nvvm_atom_min_gen_ull:
20352 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
20353
20354 case NVPTX::BI__nvvm_atom_cas_gen_i:
20355 case NVPTX::BI__nvvm_atom_cas_gen_l:
20356 case NVPTX::BI__nvvm_atom_cas_gen_ll:
20357 // __nvvm_atom_cas_gen_* should return the old value rather than the
20358 // success flag.
20359 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
20360
20361 case NVPTX::BI__nvvm_atom_add_gen_f:
20362 case NVPTX::BI__nvvm_atom_add_gen_d: {
20363 Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
20364 Value *Val = EmitScalarExpr(E->getArg(1));
20365
20366 return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
20367 AtomicOrdering::SequentiallyConsistent);
20368 }
20369
20370 case NVPTX::BI__nvvm_atom_inc_gen_ui: {
20371 Value *Ptr = EmitScalarExpr(E->getArg(0));
20372 Value *Val = EmitScalarExpr(E->getArg(1));
20373 Function *FnALI32 =
20374 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
20375 return Builder.CreateCall(FnALI32, {Ptr, Val});
20376 }
20377
20378 case NVPTX::BI__nvvm_atom_dec_gen_ui: {
20379 Value *Ptr = EmitScalarExpr(E->getArg(0));
20380 Value *Val = EmitScalarExpr(E->getArg(1));
20381 Function *FnALD32 =
20382 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
20383 return Builder.CreateCall(FnALD32, {Ptr, Val});
20384 }
20385
20386 case NVPTX::BI__nvvm_ldg_c:
20387 case NVPTX::BI__nvvm_ldg_sc:
20388 case NVPTX::BI__nvvm_ldg_c2:
20389 case NVPTX::BI__nvvm_ldg_sc2:
20390 case NVPTX::BI__nvvm_ldg_c4:
20391 case NVPTX::BI__nvvm_ldg_sc4:
20392 case NVPTX::BI__nvvm_ldg_s:
20393 case NVPTX::BI__nvvm_ldg_s2:
20394 case NVPTX::BI__nvvm_ldg_s4:
20395 case NVPTX::BI__nvvm_ldg_i:
20396 case NVPTX::BI__nvvm_ldg_i2:
20397 case NVPTX::BI__nvvm_ldg_i4:
20398 case NVPTX::BI__nvvm_ldg_l:
20399 case NVPTX::BI__nvvm_ldg_l2:
20400 case NVPTX::BI__nvvm_ldg_ll:
20401 case NVPTX::BI__nvvm_ldg_ll2:
20402 case NVPTX::BI__nvvm_ldg_uc:
20403 case NVPTX::BI__nvvm_ldg_uc2:
20404 case NVPTX::BI__nvvm_ldg_uc4:
20405 case NVPTX::BI__nvvm_ldg_us:
20406 case NVPTX::BI__nvvm_ldg_us2:
20407 case NVPTX::BI__nvvm_ldg_us4:
20408 case NVPTX::BI__nvvm_ldg_ui:
20409 case NVPTX::BI__nvvm_ldg_ui2:
20410 case NVPTX::BI__nvvm_ldg_ui4:
20411 case NVPTX::BI__nvvm_ldg_ul:
20412 case NVPTX::BI__nvvm_ldg_ul2:
20413 case NVPTX::BI__nvvm_ldg_ull:
20414 case NVPTX::BI__nvvm_ldg_ull2:
20415 // PTX Interoperability section 2.2: "For a vector with an even number of
20416 // elements, its alignment is set to number of elements times the alignment
20417 // of its member: n*alignof(t)."
20418 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
20419 case NVPTX::BI__nvvm_ldg_f:
20420 case NVPTX::BI__nvvm_ldg_f2:
20421 case NVPTX::BI__nvvm_ldg_f4:
20422 case NVPTX::BI__nvvm_ldg_d:
20423 case NVPTX::BI__nvvm_ldg_d2:
20424 return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
20425
20426 case NVPTX::BI__nvvm_ldu_c:
20427 case NVPTX::BI__nvvm_ldu_sc:
20428 case NVPTX::BI__nvvm_ldu_c2:
20429 case NVPTX::BI__nvvm_ldu_sc2:
20430 case NVPTX::BI__nvvm_ldu_c4:
20431 case NVPTX::BI__nvvm_ldu_sc4:
20432 case NVPTX::BI__nvvm_ldu_s:
20433 case NVPTX::BI__nvvm_ldu_s2:
20434 case NVPTX::BI__nvvm_ldu_s4:
20435 case NVPTX::BI__nvvm_ldu_i:
20436 case NVPTX::BI__nvvm_ldu_i2:
20437 case NVPTX::BI__nvvm_ldu_i4:
20438 case NVPTX::BI__nvvm_ldu_l:
20439 case NVPTX::BI__nvvm_ldu_l2:
20440 case NVPTX::BI__nvvm_ldu_ll:
20441 case NVPTX::BI__nvvm_ldu_ll2:
20442 case NVPTX::BI__nvvm_ldu_uc:
20443 case NVPTX::BI__nvvm_ldu_uc2:
20444 case NVPTX::BI__nvvm_ldu_uc4:
20445 case NVPTX::BI__nvvm_ldu_us:
20446 case NVPTX::BI__nvvm_ldu_us2:
20447 case NVPTX::BI__nvvm_ldu_us4:
20448 case NVPTX::BI__nvvm_ldu_ui:
20449 case NVPTX::BI__nvvm_ldu_ui2:
20450 case NVPTX::BI__nvvm_ldu_ui4:
20451 case NVPTX::BI__nvvm_ldu_ul:
20452 case NVPTX::BI__nvvm_ldu_ul2:
20453 case NVPTX::BI__nvvm_ldu_ull:
20454 case NVPTX::BI__nvvm_ldu_ull2:
20455 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
20456 case NVPTX::BI__nvvm_ldu_f:
20457 case NVPTX::BI__nvvm_ldu_f2:
20458 case NVPTX::BI__nvvm_ldu_f4:
20459 case NVPTX::BI__nvvm_ldu_d:
20460 case NVPTX::BI__nvvm_ldu_d2:
20461 return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
20462
20463 case NVPTX::BI__nvvm_atom_cta_add_gen_i:
20464 case NVPTX::BI__nvvm_atom_cta_add_gen_l:
20465 case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
20466 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
20467 case NVPTX::BI__nvvm_atom_sys_add_gen_i:
20468 case NVPTX::BI__nvvm_atom_sys_add_gen_l:
20469 case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
20470 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
20471 case NVPTX::BI__nvvm_atom_cta_add_gen_f:
20472 case NVPTX::BI__nvvm_atom_cta_add_gen_d:
20473 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
20474 case NVPTX::BI__nvvm_atom_sys_add_gen_f:
20475 case NVPTX::BI__nvvm_atom_sys_add_gen_d:
20476 return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
20477 case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
20478 case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
20479 case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
20480 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
20481 case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
20482 case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
20483 case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
20484 return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
20485 case NVPTX::BI__nvvm_atom_cta_max_gen_i:
20486 case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
20487 case NVPTX::BI__nvvm_atom_cta_max_gen_l:
20488 case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
20489 case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
20490 case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
20491 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
20492 case NVPTX::BI__nvvm_atom_sys_max_gen_i:
20493 case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
20494 case NVPTX::BI__nvvm_atom_sys_max_gen_l:
20495 case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
20496 case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
20497 case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
20498 return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
20499 case NVPTX::BI__nvvm_atom_cta_min_gen_i:
20500 case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
20501 case NVPTX::BI__nvvm_atom_cta_min_gen_l:
20502 case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
20503 case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
20504 case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
20505 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
20506 case NVPTX::BI__nvvm_atom_sys_min_gen_i:
20507 case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
20508 case NVPTX::BI__nvvm_atom_sys_min_gen_l:
20509 case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
20510 case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
20511 case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
20512 return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
20513 case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
20514 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
20515 case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
20516 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
20517 case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
20518 return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
20519 case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
20520 return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
20521 case NVPTX::BI__nvvm_atom_cta_and_gen_i:
20522 case NVPTX::BI__nvvm_atom_cta_and_gen_l:
20523 case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
20524 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
20525 case NVPTX::BI__nvvm_atom_sys_and_gen_i:
20526 case NVPTX::BI__nvvm_atom_sys_and_gen_l:
20527 case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
20528 return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
20529 case NVPTX::BI__nvvm_atom_cta_or_gen_i:
20530 case NVPTX::BI__nvvm_atom_cta_or_gen_l:
20531 case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
20532 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
20533 case NVPTX::BI__nvvm_atom_sys_or_gen_i:
20534 case NVPTX::BI__nvvm_atom_sys_or_gen_l:
20535 case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
20536 return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
20537 case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
20538 case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
20539 case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
20540 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
20541 case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
20542 case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
20543 case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
20544 return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
20545 case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
20546 case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
20547 case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
20548 Value *Ptr = EmitScalarExpr(E->getArg(0));
20549 llvm::Type *ElemTy =
20550 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20551 return Builder.CreateCall(
20553 Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),
20554 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20555 }
20556 case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
20557 case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
20558 case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
20559 Value *Ptr = EmitScalarExpr(E->getArg(0));
20560 llvm::Type *ElemTy =
20561 ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
20562 return Builder.CreateCall(
20564 Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),
20565 {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
20566 }
20567 case NVPTX::BI__nvvm_match_all_sync_i32p:
20568 case NVPTX::BI__nvvm_match_all_sync_i64p: {
20569 Value *Mask = EmitScalarExpr(E->getArg(0));
20570 Value *Val = EmitScalarExpr(E->getArg(1));
20571 Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
20572 Value *ResultPair = Builder.CreateCall(
20573 CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
20574 ? Intrinsic::nvvm_match_all_sync_i32p
20575 : Intrinsic::nvvm_match_all_sync_i64p),
20576 {Mask, Val});
20577 Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
20578 PredOutPtr.getElementType());
20579 Builder.CreateStore(Pred, PredOutPtr);
20580 return Builder.CreateExtractValue(ResultPair, 0);
20581 }
20582
20583 // FP MMA loads
20584 case NVPTX::BI__hmma_m16n16k16_ld_a:
20585 case NVPTX::BI__hmma_m16n16k16_ld_b:
20586 case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
20587 case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
20588 case NVPTX::BI__hmma_m32n8k16_ld_a:
20589 case NVPTX::BI__hmma_m32n8k16_ld_b:
20590 case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
20591 case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
20592 case NVPTX::BI__hmma_m8n32k16_ld_a:
20593 case NVPTX::BI__hmma_m8n32k16_ld_b:
20594 case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
20595 case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
20596 // Integer MMA loads.
20597 case NVPTX::BI__imma_m16n16k16_ld_a_s8:
20598 case NVPTX::BI__imma_m16n16k16_ld_a_u8:
20599 case NVPTX::BI__imma_m16n16k16_ld_b_s8:
20600 case NVPTX::BI__imma_m16n16k16_ld_b_u8:
20601 case NVPTX::BI__imma_m16n16k16_ld_c:
20602 case NVPTX::BI__imma_m32n8k16_ld_a_s8:
20603 case NVPTX::BI__imma_m32n8k16_ld_a_u8:
20604 case NVPTX::BI__imma_m32n8k16_ld_b_s8:
20605 case NVPTX::BI__imma_m32n8k16_ld_b_u8:
20606 case NVPTX::BI__imma_m32n8k16_ld_c:
20607 case NVPTX::BI__imma_m8n32k16_ld_a_s8:
20608 case NVPTX::BI__imma_m8n32k16_ld_a_u8:
20609 case NVPTX::BI__imma_m8n32k16_ld_b_s8:
20610 case NVPTX::BI__imma_m8n32k16_ld_b_u8:
20611 case NVPTX::BI__imma_m8n32k16_ld_c:
20612 // Sub-integer MMA loads.
20613 case NVPTX::BI__imma_m8n8k32_ld_a_s4:
20614 case NVPTX::BI__imma_m8n8k32_ld_a_u4:
20615 case NVPTX::BI__imma_m8n8k32_ld_b_s4:
20616 case NVPTX::BI__imma_m8n8k32_ld_b_u4:
20617 case NVPTX::BI__imma_m8n8k32_ld_c:
20618 case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
20619 case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
20620 case NVPTX::BI__bmma_m8n8k128_ld_c:
20621 // Double MMA loads.
20622 case NVPTX::BI__dmma_m8n8k4_ld_a:
20623 case NVPTX::BI__dmma_m8n8k4_ld_b:
20624 case NVPTX::BI__dmma_m8n8k4_ld_c:
20625 // Alternate float MMA loads.
20626 case NVPTX::BI__mma_bf16_m16n16k16_ld_a:
20627 case NVPTX::BI__mma_bf16_m16n16k16_ld_b:
20628 case NVPTX::BI__mma_bf16_m8n32k16_ld_a:
20629 case NVPTX::BI__mma_bf16_m8n32k16_ld_b:
20630 case NVPTX::BI__mma_bf16_m32n8k16_ld_a:
20631 case NVPTX::BI__mma_bf16_m32n8k16_ld_b:
20632 case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
20633 case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
20634 case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {
20635 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20636 Value *Src = EmitScalarExpr(E->getArg(1));
20637 Value *Ldm = EmitScalarExpr(E->getArg(2));
20638 std::optional<llvm::APSInt> isColMajorArg =
20639 E->getArg(3)->getIntegerConstantExpr(getContext());
20640 if (!isColMajorArg)
20641 return nullptr;
20642 bool isColMajor = isColMajorArg->getSExtValue();
20643 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20644 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20645 if (IID == 0)
20646 return nullptr;
20647
20648 Value *Result =
20649 Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
20650
20651 // Save returned values.
20652 assert(II.NumResults);
20653 if (II.NumResults == 1) {
20656 } else {
20657 for (unsigned i = 0; i < II.NumResults; ++i) {
20659 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
20660 Dst.getElementType()),
20662 llvm::ConstantInt::get(IntTy, i)),
20664 }
20665 }
20666 return Result;
20667 }
20668
20669 case NVPTX::BI__hmma_m16n16k16_st_c_f16:
20670 case NVPTX::BI__hmma_m16n16k16_st_c_f32:
20671 case NVPTX::BI__hmma_m32n8k16_st_c_f16:
20672 case NVPTX::BI__hmma_m32n8k16_st_c_f32:
20673 case NVPTX::BI__hmma_m8n32k16_st_c_f16:
20674 case NVPTX::BI__hmma_m8n32k16_st_c_f32:
20675 case NVPTX::BI__imma_m16n16k16_st_c_i32:
20676 case NVPTX::BI__imma_m32n8k16_st_c_i32:
20677 case NVPTX::BI__imma_m8n32k16_st_c_i32:
20678 case NVPTX::BI__imma_m8n8k32_st_c_i32:
20679 case NVPTX::BI__bmma_m8n8k128_st_c_i32:
20680 case NVPTX::BI__dmma_m8n8k4_st_c_f64:
20681 case NVPTX::BI__mma_m16n16k8_st_c_f32: {
20682 Value *Dst = EmitScalarExpr(E->getArg(0));
20683 Address Src = EmitPointerWithAlignment(E->getArg(1));
20684 Value *Ldm = EmitScalarExpr(E->getArg(2));
20685 std::optional<llvm::APSInt> isColMajorArg =
20686 E->getArg(3)->getIntegerConstantExpr(getContext());
20687 if (!isColMajorArg)
20688 return nullptr;
20689 bool isColMajor = isColMajorArg->getSExtValue();
20690 NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
20691 unsigned IID = isColMajor ? II.IID_col : II.IID_row;
20692 if (IID == 0)
20693 return nullptr;
20694 Function *Intrinsic =
20695 CGM.getIntrinsic(IID, Dst->getType());
20696 llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
20697 SmallVector<Value *, 10> Values = {Dst};
20698 for (unsigned i = 0; i < II.NumResults; ++i) {
20700 Src.getElementType(),
20702 llvm::ConstantInt::get(IntTy, i)),
20704 Values.push_back(Builder.CreateBitCast(V, ParamType));
20705 }
20706 Values.push_back(Ldm);
20707 Value *Result = Builder.CreateCall(Intrinsic, Values);
20708 return Result;
20709 }
20710
20711 // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
20712 // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
20713 case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
20714 case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
20715 case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
20716 case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
20717 case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
20718 case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
20719 case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
20720 case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
20721 case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
20722 case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
20723 case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
20724 case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
20725 case NVPTX::BI__imma_m16n16k16_mma_s8:
20726 case NVPTX::BI__imma_m16n16k16_mma_u8:
20727 case NVPTX::BI__imma_m32n8k16_mma_s8:
20728 case NVPTX::BI__imma_m32n8k16_mma_u8:
20729 case NVPTX::BI__imma_m8n32k16_mma_s8:
20730 case NVPTX::BI__imma_m8n32k16_mma_u8:
20731 case NVPTX::BI__imma_m8n8k32_mma_s4:
20732 case NVPTX::BI__imma_m8n8k32_mma_u4:
20733 case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
20734 case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:
20735 case NVPTX::BI__dmma_m8n8k4_mma_f64:
20736 case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:
20737 case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:
20738 case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:
20739 case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {
20740 Address Dst = EmitPointerWithAlignment(E->getArg(0));
20741 Address SrcA = EmitPointerWithAlignment(E->getArg(1));
20742 Address SrcB = EmitPointerWithAlignment(E->getArg(2));
20743 Address SrcC = EmitPointerWithAlignment(E->getArg(3));
20744 std::optional<llvm::APSInt> LayoutArg =
20745 E->getArg(4)->getIntegerConstantExpr(getContext());
20746 if (!LayoutArg)
20747 return nullptr;
20748 int Layout = LayoutArg->getSExtValue();
20749 if (Layout < 0 || Layout > 3)
20750 return nullptr;
20751 llvm::APSInt SatfArg;
20752 if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||
20753 BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)
20754 SatfArg = 0; // .b1 does not have satf argument.
20755 else if (std::optional<llvm::APSInt> OptSatfArg =
20756 E->getArg(5)->getIntegerConstantExpr(getContext()))
20757 SatfArg = *OptSatfArg;
20758 else
20759 return nullptr;
20760 bool Satf = SatfArg.getSExtValue();
20761 NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
20762 unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
20763 if (IID == 0) // Unsupported combination of Layout/Satf.
20764 return nullptr;
20765
20767 Function *Intrinsic = CGM.getIntrinsic(IID);
20768 llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
20769 // Load A
20770 for (unsigned i = 0; i < MI.NumEltsA; ++i) {
20772 SrcA.getElementType(),
20773 Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),
20774 llvm::ConstantInt::get(IntTy, i)),
20776 Values.push_back(Builder.CreateBitCast(V, AType));
20777 }
20778 // Load B
20779 llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
20780 for (unsigned i = 0; i < MI.NumEltsB; ++i) {
20782 SrcB.getElementType(),
20783 Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),
20784 llvm::ConstantInt::get(IntTy, i)),
20786 Values.push_back(Builder.CreateBitCast(V, BType));
20787 }
20788 // Load C
20789 llvm::Type *CType =
20790 Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
20791 for (unsigned i = 0; i < MI.NumEltsC; ++i) {
20793 SrcC.getElementType(),
20794 Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),
20795 llvm::ConstantInt::get(IntTy, i)),
20797 Values.push_back(Builder.CreateBitCast(V, CType));
20798 }
20799 Value *Result = Builder.CreateCall(Intrinsic, Values);
20800 llvm::Type *DType = Dst.getElementType();
20801 for (unsigned i = 0; i < MI.NumEltsD; ++i)
20803 Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
20805 llvm::ConstantInt::get(IntTy, i)),
20807 return Result;
20808 }
20809 // The following builtins require half type support
20810 case NVPTX::BI__nvvm_ex2_approx_f16:
20811 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
20812 case NVPTX::BI__nvvm_ex2_approx_f16x2:
20813 return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
20814 case NVPTX::BI__nvvm_ff2f16x2_rn:
20815 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
20816 case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
20817 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
20818 case NVPTX::BI__nvvm_ff2f16x2_rz:
20819 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
20820 case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
20821 return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
20822 case NVPTX::BI__nvvm_fma_rn_f16:
20823 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
20824 case NVPTX::BI__nvvm_fma_rn_f16x2:
20825 return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
20826 case NVPTX::BI__nvvm_fma_rn_ftz_f16:
20827 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
20828 case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
20829 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
20830 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
20831 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
20832 *this);
20833 case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
20834 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
20835 *this);
20836 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
20837 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
20838 *this);
20839 case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
20840 return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
20841 *this);
20842 case NVPTX::BI__nvvm_fma_rn_relu_f16:
20843 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
20844 case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
20845 return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
20846 case NVPTX::BI__nvvm_fma_rn_sat_f16:
20847 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
20848 case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
20849 return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
20850 case NVPTX::BI__nvvm_fmax_f16:
20851 return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
20852 case NVPTX::BI__nvvm_fmax_f16x2:
20853 return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
20854 case NVPTX::BI__nvvm_fmax_ftz_f16:
20855 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
20856 case NVPTX::BI__nvvm_fmax_ftz_f16x2:
20857 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
20858 case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
20859 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
20860 case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
20861 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
20862 *this);
20863 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
20864 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
20865 E, *this);
20866 case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
20867 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
20868 BuiltinID, E, *this);
20869 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
20870 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
20871 *this);
20872 case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
20873 return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
20874 E, *this);
20875 case NVPTX::BI__nvvm_fmax_nan_f16:
20876 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
20877 case NVPTX::BI__nvvm_fmax_nan_f16x2:
20878 return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
20879 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
20880 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
20881 *this);
20882 case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
20883 return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
20884 E, *this);
20885 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
20886 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
20887 *this);
20888 case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
20889 return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
20890 *this);
20891 case NVPTX::BI__nvvm_fmin_f16:
20892 return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
20893 case NVPTX::BI__nvvm_fmin_f16x2:
20894 return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
20895 case NVPTX::BI__nvvm_fmin_ftz_f16:
20896 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
20897 case NVPTX::BI__nvvm_fmin_ftz_f16x2:
20898 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
20899 case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
20900 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
20901 case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
20902 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
20903 *this);
20904 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
20905 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
20906 E, *this);
20907 case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
20908 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
20909 BuiltinID, E, *this);
20910 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
20911 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
20912 *this);
20913 case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
20914 return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
20915 E, *this);
20916 case NVPTX::BI__nvvm_fmin_nan_f16:
20917 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
20918 case NVPTX::BI__nvvm_fmin_nan_f16x2:
20919 return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
20920 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
20921 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
20922 *this);
20923 case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
20924 return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
20925 E, *this);
20926 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
20927 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
20928 *this);
20929 case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
20930 return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
20931 *this);
20932 case NVPTX::BI__nvvm_ldg_h:
20933 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20934 case NVPTX::BI__nvvm_ldg_h2:
20935 return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
20936 case NVPTX::BI__nvvm_ldu_h:
20937 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20938 case NVPTX::BI__nvvm_ldu_h2: {
20939 return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
20940 }
20941 case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
20942 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
20943 Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
20944 4);
20945 case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:
20946 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,
20947 Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,
20948 8);
20949 case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:
20950 return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,
20951 Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,
20952 16);
20953 case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:
20954 return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,
20955 Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,
20956 16);
20957 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:
20958 return Builder.CreateCall(
20959 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));
20960 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:
20961 return Builder.CreateCall(
20962 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));
20963 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:
20964 return Builder.CreateCall(
20965 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));
20966 case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:
20967 return Builder.CreateCall(
20968 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));
20969 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:
20970 return Builder.CreateCall(
20971 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));
20972 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:
20973 return Builder.CreateCall(
20974 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));
20975 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:
20976 return Builder.CreateCall(
20977 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));
20978 case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:
20979 return Builder.CreateCall(
20980 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));
20981 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:
20982 return Builder.CreateCall(
20983 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));
20984 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:
20985 return Builder.CreateCall(
20986 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));
20987 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:
20988 return Builder.CreateCall(
20989 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));
20990 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:
20991 return Builder.CreateCall(
20992 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));
20993 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:
20994 return Builder.CreateCall(
20995 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));
20996 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:
20997 return Builder.CreateCall(
20998 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));
20999 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:
21000 return Builder.CreateCall(
21001 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));
21002 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:
21003 return Builder.CreateCall(
21004 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));
21005 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:
21006 return Builder.CreateCall(
21007 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));
21008 case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:
21009 return Builder.CreateCall(
21010 CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));
21011 case NVPTX::BI__nvvm_is_explicit_cluster:
21012 return Builder.CreateCall(
21013 CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));
21014 case NVPTX::BI__nvvm_isspacep_shared_cluster:
21015 return Builder.CreateCall(
21016 CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),
21017 EmitScalarExpr(E->getArg(0)));
21018 case NVPTX::BI__nvvm_mapa:
21019 return Builder.CreateCall(
21020 CGM.getIntrinsic(Intrinsic::nvvm_mapa),
21021 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21022 case NVPTX::BI__nvvm_mapa_shared_cluster:
21023 return Builder.CreateCall(
21024 CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),
21025 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21026 case NVPTX::BI__nvvm_getctarank:
21027 return Builder.CreateCall(
21028 CGM.getIntrinsic(Intrinsic::nvvm_getctarank),
21029 EmitScalarExpr(E->getArg(0)));
21030 case NVPTX::BI__nvvm_getctarank_shared_cluster:
21031 return Builder.CreateCall(
21032 CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),
21033 EmitScalarExpr(E->getArg(0)));
21034 case NVPTX::BI__nvvm_barrier_cluster_arrive:
21035 return Builder.CreateCall(
21036 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));
21037 case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:
21038 return Builder.CreateCall(
21039 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));
21040 case NVPTX::BI__nvvm_barrier_cluster_wait:
21041 return Builder.CreateCall(
21042 CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));
21043 case NVPTX::BI__nvvm_fence_sc_cluster:
21044 return Builder.CreateCall(
21045 CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));
21046 default:
21047 return nullptr;
21048 }
21049}
21050
21051namespace {
21052struct BuiltinAlignArgs {
21053 llvm::Value *Src = nullptr;
21054 llvm::Type *SrcType = nullptr;
21055 llvm::Value *Alignment = nullptr;
21056 llvm::Value *Mask = nullptr;
21057 llvm::IntegerType *IntType = nullptr;
21058
21059 BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {
21060 QualType AstType = E->getArg(0)->getType();
21061 if (AstType->isArrayType())
21062 Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);
21063 else
21064 Src = CGF.EmitScalarExpr(E->getArg(0));
21065 SrcType = Src->getType();
21066 if (SrcType->isPointerTy()) {
21067 IntType = IntegerType::get(
21068 CGF.getLLVMContext(),
21069 CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));
21070 } else {
21071 assert(SrcType->isIntegerTy());
21072 IntType = cast<llvm::IntegerType>(SrcType);
21073 }
21074 Alignment = CGF.EmitScalarExpr(E->getArg(1));
21075 Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");
21076 auto *One = llvm::ConstantInt::get(IntType, 1);
21077 Mask = CGF.Builder.CreateSub(Alignment, One, "mask");
21078 }
21079};
21080} // namespace
21081
21082/// Generate (x & (y-1)) == 0.
21084 BuiltinAlignArgs Args(E, *this);
21085 llvm::Value *SrcAddress = Args.Src;
21086 if (Args.SrcType->isPointerTy())
21087 SrcAddress =
21088 Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");
21089 return RValue::get(Builder.CreateICmpEQ(
21090 Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),
21091 llvm::Constant::getNullValue(Args.IntType), "is_aligned"));
21092}
21093
21094/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
21095/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
21096/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
21098 BuiltinAlignArgs Args(E, *this);
21099 llvm::Value *SrcForMask = Args.Src;
21100 if (AlignUp) {
21101 // When aligning up we have to first add the mask to ensure we go over the
21102 // next alignment value and then align down to the next valid multiple.
21103 // By adding the mask, we ensure that align_up on an already aligned
21104 // value will not change the value.
21105 if (Args.Src->getType()->isPointerTy()) {
21106 if (getLangOpts().isSignedOverflowDefined())
21107 SrcForMask =
21108 Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
21109 else
21110 SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
21111 /*SignedIndices=*/true,
21112 /*isSubtraction=*/false,
21113 E->getExprLoc(), "over_boundary");
21114 } else {
21115 SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
21116 }
21117 }
21118 // Invert the mask to only clear the lower bits.
21119 llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
21120 llvm::Value *Result = nullptr;
21121 if (Args.Src->getType()->isPointerTy()) {
21122 Result = Builder.CreateIntrinsic(
21123 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
21124 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
21125 } else {
21126 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
21127 }
21128 assert(Result->getType() == Args.SrcType);
21129 return RValue::get(Result);
21130}
21131
21133 const CallExpr *E) {
21134 switch (BuiltinID) {
21135 case WebAssembly::BI__builtin_wasm_memory_size: {
21136 llvm::Type *ResultType = ConvertType(E->getType());
21137 Value *I = EmitScalarExpr(E->getArg(0));
21138 Function *Callee =
21139 CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
21140 return Builder.CreateCall(Callee, I);
21141 }
21142 case WebAssembly::BI__builtin_wasm_memory_grow: {
21143 llvm::Type *ResultType = ConvertType(E->getType());
21144 Value *Args[] = {EmitScalarExpr(E->getArg(0)),
21145 EmitScalarExpr(E->getArg(1))};
21146 Function *Callee =
21147 CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
21148 return Builder.CreateCall(Callee, Args);
21149 }
21150 case WebAssembly::BI__builtin_wasm_tls_size: {
21151 llvm::Type *ResultType = ConvertType(E->getType());
21152 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
21153 return Builder.CreateCall(Callee);
21154 }
21155 case WebAssembly::BI__builtin_wasm_tls_align: {
21156 llvm::Type *ResultType = ConvertType(E->getType());
21157 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);
21158 return Builder.CreateCall(Callee);
21159 }
21160 case WebAssembly::BI__builtin_wasm_tls_base: {
21161 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);
21162 return Builder.CreateCall(Callee);
21163 }
21164 case WebAssembly::BI__builtin_wasm_throw: {
21165 Value *Tag = EmitScalarExpr(E->getArg(0));
21166 Value *Obj = EmitScalarExpr(E->getArg(1));
21167 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
21168 return Builder.CreateCall(Callee, {Tag, Obj});
21169 }
21170 case WebAssembly::BI__builtin_wasm_rethrow: {
21171 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
21172 return Builder.CreateCall(Callee);
21173 }
21174 case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {
21175 Value *Addr = EmitScalarExpr(E->getArg(0));
21176 Value *Expected = EmitScalarExpr(E->getArg(1));
21177 Value *Timeout = EmitScalarExpr(E->getArg(2));
21178 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);
21179 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
21180 }
21181 case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {
21182 Value *Addr = EmitScalarExpr(E->getArg(0));
21183 Value *Expected = EmitScalarExpr(E->getArg(1));
21184 Value *Timeout = EmitScalarExpr(E->getArg(2));
21185 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);
21186 return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
21187 }
21188 case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {
21189 Value *Addr = EmitScalarExpr(E->getArg(0));
21190 Value *Count = EmitScalarExpr(E->getArg(1));
21191 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);
21192 return Builder.CreateCall(Callee, {Addr, Count});
21193 }
21194 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:
21195 case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:
21196 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:
21197 case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {
21198 Value *Src = EmitScalarExpr(E->getArg(0));
21199 llvm::Type *ResT = ConvertType(E->getType());
21200 Function *Callee =
21201 CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});
21202 return Builder.CreateCall(Callee, {Src});
21203 }
21204 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:
21205 case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:
21206 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:
21207 case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {
21208 Value *Src = EmitScalarExpr(E->getArg(0));
21209 llvm::Type *ResT = ConvertType(E->getType());
21210 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,
21211 {ResT, Src->getType()});
21212 return Builder.CreateCall(Callee, {Src});
21213 }
21214 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
21215 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
21216 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
21217 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
21218 case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
21219 Value *Src = EmitScalarExpr(E->getArg(0));
21220 llvm::Type *ResT = ConvertType(E->getType());
21221 Function *Callee =
21222 CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});
21223 return Builder.CreateCall(Callee, {Src});
21224 }
21225 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
21226 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
21227 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
21228 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
21229 case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
21230 Value *Src = EmitScalarExpr(E->getArg(0));
21231 llvm::Type *ResT = ConvertType(E->getType());
21232 Function *Callee =
21233 CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});
21234 return Builder.CreateCall(Callee, {Src});
21235 }
21236 case WebAssembly::BI__builtin_wasm_min_f32:
21237 case WebAssembly::BI__builtin_wasm_min_f64:
21238 case WebAssembly::BI__builtin_wasm_min_f16x8:
21239 case WebAssembly::BI__builtin_wasm_min_f32x4:
21240 case WebAssembly::BI__builtin_wasm_min_f64x2: {
21241 Value *LHS = EmitScalarExpr(E->getArg(0));
21242 Value *RHS = EmitScalarExpr(E->getArg(1));
21243 Function *Callee =
21244 CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));
21245 return Builder.CreateCall(Callee, {LHS, RHS});
21246 }
21247 case WebAssembly::BI__builtin_wasm_max_f32:
21248 case WebAssembly::BI__builtin_wasm_max_f64:
21249 case WebAssembly::BI__builtin_wasm_max_f16x8:
21250 case WebAssembly::BI__builtin_wasm_max_f32x4:
21251 case WebAssembly::BI__builtin_wasm_max_f64x2: {
21252 Value *LHS = EmitScalarExpr(E->getArg(0));
21253 Value *RHS = EmitScalarExpr(E->getArg(1));
21254 Function *Callee =
21255 CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));
21256 return Builder.CreateCall(Callee, {LHS, RHS});
21257 }
21258 case WebAssembly::BI__builtin_wasm_pmin_f16x8:
21259 case WebAssembly::BI__builtin_wasm_pmin_f32x4:
21260 case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
21261 Value *LHS = EmitScalarExpr(E->getArg(0));
21262 Value *RHS = EmitScalarExpr(E->getArg(1));
21263 Function *Callee =
21264 CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
21265 return Builder.CreateCall(Callee, {LHS, RHS});
21266 }
21267 case WebAssembly::BI__builtin_wasm_pmax_f16x8:
21268 case WebAssembly::BI__builtin_wasm_pmax_f32x4:
21269 case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
21270 Value *LHS = EmitScalarExpr(E->getArg(0));
21271 Value *RHS = EmitScalarExpr(E->getArg(1));
21272 Function *Callee =
21273 CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
21274 return Builder.CreateCall(Callee, {LHS, RHS});
21275 }
21276 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21277 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21278 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21279 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21280 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21281 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21282 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21283 case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
21284 unsigned IntNo;
21285 switch (BuiltinID) {
21286 case WebAssembly::BI__builtin_wasm_ceil_f32x4:
21287 case WebAssembly::BI__builtin_wasm_ceil_f64x2:
21288 IntNo = Intrinsic::ceil;
21289 break;
21290 case WebAssembly::BI__builtin_wasm_floor_f32x4:
21291 case WebAssembly::BI__builtin_wasm_floor_f64x2:
21292 IntNo = Intrinsic::floor;
21293 break;
21294 case WebAssembly::BI__builtin_wasm_trunc_f32x4:
21295 case WebAssembly::BI__builtin_wasm_trunc_f64x2:
21296 IntNo = Intrinsic::trunc;
21297 break;
21298 case WebAssembly::BI__builtin_wasm_nearest_f32x4:
21299 case WebAssembly::BI__builtin_wasm_nearest_f64x2:
21300 IntNo = Intrinsic::nearbyint;
21301 break;
21302 default:
21303 llvm_unreachable("unexpected builtin ID");
21304 }
21305 Value *Value = EmitScalarExpr(E->getArg(0));
21307 return Builder.CreateCall(Callee, Value);
21308 }
21309 case WebAssembly::BI__builtin_wasm_ref_null_extern: {
21310 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);
21311 return Builder.CreateCall(Callee);
21312 }
21313 case WebAssembly::BI__builtin_wasm_ref_null_func: {
21314 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
21315 return Builder.CreateCall(Callee);
21316 }
21317 case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
21318 Value *Src = EmitScalarExpr(E->getArg(0));
21319 Value *Indices = EmitScalarExpr(E->getArg(1));
21320 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);
21321 return Builder.CreateCall(Callee, {Src, Indices});
21322 }
21323 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21324 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21325 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21326 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21327 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21328 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21329 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21330 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {
21331 unsigned IntNo;
21332 switch (BuiltinID) {
21333 case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:
21334 case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:
21335 IntNo = Intrinsic::sadd_sat;
21336 break;
21337 case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:
21338 case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:
21339 IntNo = Intrinsic::uadd_sat;
21340 break;
21341 case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:
21342 case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:
21343 IntNo = Intrinsic::wasm_sub_sat_signed;
21344 break;
21345 case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:
21346 case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:
21347 IntNo = Intrinsic::wasm_sub_sat_unsigned;
21348 break;
21349 default:
21350 llvm_unreachable("unexpected builtin ID");
21351 }
21352 Value *LHS = EmitScalarExpr(E->getArg(0));
21353 Value *RHS = EmitScalarExpr(E->getArg(1));
21355 return Builder.CreateCall(Callee, {LHS, RHS});
21356 }
21357 case WebAssembly::BI__builtin_wasm_abs_i8x16:
21358 case WebAssembly::BI__builtin_wasm_abs_i16x8:
21359 case WebAssembly::BI__builtin_wasm_abs_i32x4:
21360 case WebAssembly::BI__builtin_wasm_abs_i64x2: {
21361 Value *Vec = EmitScalarExpr(E->getArg(0));
21362 Value *Neg = Builder.CreateNeg(Vec, "neg");
21363 Constant *Zero = llvm::Constant::getNullValue(Vec->getType());
21364 Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");
21365 return Builder.CreateSelect(ICmp, Neg, Vec, "abs");
21366 }
21367 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21368 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21369 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21370 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21371 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21372 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21373 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21374 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21375 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21376 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21377 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21378 case WebAssembly::BI__builtin_wasm_max_u_i32x4: {
21379 Value *LHS = EmitScalarExpr(E->getArg(0));
21380 Value *RHS = EmitScalarExpr(E->getArg(1));
21381 Value *ICmp;
21382 switch (BuiltinID) {
21383 case WebAssembly::BI__builtin_wasm_min_s_i8x16:
21384 case WebAssembly::BI__builtin_wasm_min_s_i16x8:
21385 case WebAssembly::BI__builtin_wasm_min_s_i32x4:
21386 ICmp = Builder.CreateICmpSLT(LHS, RHS);
21387 break;
21388 case WebAssembly::BI__builtin_wasm_min_u_i8x16:
21389 case WebAssembly::BI__builtin_wasm_min_u_i16x8:
21390 case WebAssembly::BI__builtin_wasm_min_u_i32x4:
21391 ICmp = Builder.CreateICmpULT(LHS, RHS);
21392 break;
21393 case WebAssembly::BI__builtin_wasm_max_s_i8x16:
21394 case WebAssembly::BI__builtin_wasm_max_s_i16x8:
21395 case WebAssembly::BI__builtin_wasm_max_s_i32x4:
21396 ICmp = Builder.CreateICmpSGT(LHS, RHS);
21397 break;
21398 case WebAssembly::BI__builtin_wasm_max_u_i8x16:
21399 case WebAssembly::BI__builtin_wasm_max_u_i16x8:
21400 case WebAssembly::BI__builtin_wasm_max_u_i32x4:
21401 ICmp = Builder.CreateICmpUGT(LHS, RHS);
21402 break;
21403 default:
21404 llvm_unreachable("unexpected builtin ID");
21405 }
21406 return Builder.CreateSelect(ICmp, LHS, RHS);
21407 }
21408 case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:
21409 case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {
21410 Value *LHS = EmitScalarExpr(E->getArg(0));
21411 Value *RHS = EmitScalarExpr(E->getArg(1));
21412 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,
21413 ConvertType(E->getType()));
21414 return Builder.CreateCall(Callee, {LHS, RHS});
21415 }
21416 case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {
21417 Value *LHS = EmitScalarExpr(E->getArg(0));
21418 Value *RHS = EmitScalarExpr(E->getArg(1));
21419 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);
21420 return Builder.CreateCall(Callee, {LHS, RHS});
21421 }
21422 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21423 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21424 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21425 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {
21426 Value *Vec = EmitScalarExpr(E->getArg(0));
21427 unsigned IntNo;
21428 switch (BuiltinID) {
21429 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:
21430 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:
21431 IntNo = Intrinsic::wasm_extadd_pairwise_signed;
21432 break;
21433 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:
21434 case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:
21435 IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;
21436 break;
21437 default:
21438 llvm_unreachable("unexpected builtin ID");
21439 }
21440
21442 return Builder.CreateCall(Callee, Vec);
21443 }
21444 case WebAssembly::BI__builtin_wasm_bitselect: {
21445 Value *V1 = EmitScalarExpr(E->getArg(0));
21446 Value *V2 = EmitScalarExpr(E->getArg(1));
21447 Value *C = EmitScalarExpr(E->getArg(2));
21448 Function *Callee =
21449 CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));
21450 return Builder.CreateCall(Callee, {V1, V2, C});
21451 }
21452 case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {
21453 Value *LHS = EmitScalarExpr(E->getArg(0));
21454 Value *RHS = EmitScalarExpr(E->getArg(1));
21455 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
21456 return Builder.CreateCall(Callee, {LHS, RHS});
21457 }
21458 case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
21459 Value *Vec = EmitScalarExpr(E->getArg(0));
21460 Function *Callee =
21461 CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
21462 return Builder.CreateCall(Callee, {Vec});
21463 }
21464 case WebAssembly::BI__builtin_wasm_any_true_v128:
21465 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21466 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21467 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21468 case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
21469 unsigned IntNo;
21470 switch (BuiltinID) {
21471 case WebAssembly::BI__builtin_wasm_any_true_v128:
21472 IntNo = Intrinsic::wasm_anytrue;
21473 break;
21474 case WebAssembly::BI__builtin_wasm_all_true_i8x16:
21475 case WebAssembly::BI__builtin_wasm_all_true_i16x8:
21476 case WebAssembly::BI__builtin_wasm_all_true_i32x4:
21477 case WebAssembly::BI__builtin_wasm_all_true_i64x2:
21478 IntNo = Intrinsic::wasm_alltrue;
21479 break;
21480 default:
21481 llvm_unreachable("unexpected builtin ID");
21482 }
21483 Value *Vec = EmitScalarExpr(E->getArg(0));
21484 Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
21485 return Builder.CreateCall(Callee, {Vec});
21486 }
21487 case WebAssembly::BI__builtin_wasm_bitmask_i8x16:
21488 case WebAssembly::BI__builtin_wasm_bitmask_i16x8:
21489 case WebAssembly::BI__builtin_wasm_bitmask_i32x4:
21490 case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {
21491 Value *Vec = EmitScalarExpr(E->getArg(0));
21492 Function *Callee =
21493 CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
21494 return Builder.CreateCall(Callee, {Vec});
21495 }
21496 case WebAssembly::BI__builtin_wasm_abs_f32x4:
21497 case WebAssembly::BI__builtin_wasm_abs_f64x2: {
21498 Value *Vec = EmitScalarExpr(E->getArg(0));
21499 Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
21500 return Builder.CreateCall(Callee, {Vec});
21501 }
21502 case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
21503 case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
21504 Value *Vec = EmitScalarExpr(E->getArg(0));
21505 Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
21506 return Builder.CreateCall(Callee, {Vec});
21507 }
21508 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21509 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21510 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21511 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {
21512 Value *Low = EmitScalarExpr(E->getArg(0));
21513 Value *High = EmitScalarExpr(E->getArg(1));
21514 unsigned IntNo;
21515 switch (BuiltinID) {
21516 case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:
21517 case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:
21518 IntNo = Intrinsic::wasm_narrow_signed;
21519 break;
21520 case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:
21521 case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
21522 IntNo = Intrinsic::wasm_narrow_unsigned;
21523 break;
21524 default:
21525 llvm_unreachable("unexpected builtin ID");
21526 }
21527 Function *Callee =
21528 CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
21529 return Builder.CreateCall(Callee, {Low, High});
21530 }
21531 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21532 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {
21533 Value *Vec = EmitScalarExpr(E->getArg(0));
21534 unsigned IntNo;
21535 switch (BuiltinID) {
21536 case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:
21537 IntNo = Intrinsic::fptosi_sat;
21538 break;
21539 case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:
21540 IntNo = Intrinsic::fptoui_sat;
21541 break;
21542 default:
21543 llvm_unreachable("unexpected builtin ID");
21544 }
21545 llvm::Type *SrcT = Vec->getType();
21546 llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());
21547 Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});
21548 Value *Trunc = Builder.CreateCall(Callee, Vec);
21549 Value *Splat = Constant::getNullValue(TruncT);
21550 return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});
21551 }
21552 case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {
21553 Value *Ops[18];
21554 size_t OpIdx = 0;
21555 Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
21556 Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
21557 while (OpIdx < 18) {
21558 std::optional<llvm::APSInt> LaneConst =
21559 E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
21560 assert(LaneConst && "Constant arg isn't actually constant?");
21561 Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);
21562 }
21563 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);
21564 return Builder.CreateCall(Callee, Ops);
21565 }
21566 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21567 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21568 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21569 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21570 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21571 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {
21572 Value *A = EmitScalarExpr(E->getArg(0));
21573 Value *B = EmitScalarExpr(E->getArg(1));
21574 Value *C = EmitScalarExpr(E->getArg(2));
21575 unsigned IntNo;
21576 switch (BuiltinID) {
21577 case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:
21578 case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:
21579 case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:
21580 IntNo = Intrinsic::wasm_relaxed_madd;
21581 break;
21582 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:
21583 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:
21584 case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:
21585 IntNo = Intrinsic::wasm_relaxed_nmadd;
21586 break;
21587 default:
21588 llvm_unreachable("unexpected builtin ID");
21589 }
21590 Function *Callee = CGM.getIntrinsic(IntNo, A->getType());
21591 return Builder.CreateCall(Callee, {A, B, C});
21592 }
21593 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:
21594 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:
21595 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:
21596 case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {
21597 Value *A = EmitScalarExpr(E->getArg(0));
21598 Value *B = EmitScalarExpr(E->getArg(1));
21599 Value *C = EmitScalarExpr(E->getArg(2));
21600 Function *Callee =
21601 CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());
21602 return Builder.CreateCall(Callee, {A, B, C});
21603 }
21604 case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {
21605 Value *Src = EmitScalarExpr(E->getArg(0));
21606 Value *Indices = EmitScalarExpr(E->getArg(1));
21607 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);
21608 return Builder.CreateCall(Callee, {Src, Indices});
21609 }
21610 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21611 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21612 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21613 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {
21614 Value *LHS = EmitScalarExpr(E->getArg(0));
21615 Value *RHS = EmitScalarExpr(E->getArg(1));
21616 unsigned IntNo;
21617 switch (BuiltinID) {
21618 case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:
21619 case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:
21620 IntNo = Intrinsic::wasm_relaxed_min;
21621 break;
21622 case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:
21623 case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:
21624 IntNo = Intrinsic::wasm_relaxed_max;
21625 break;
21626 default:
21627 llvm_unreachable("unexpected builtin ID");
21628 }
21629 Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());
21630 return Builder.CreateCall(Callee, {LHS, RHS});
21631 }
21632 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21633 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21634 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21635 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {
21636 Value *Vec = EmitScalarExpr(E->getArg(0));
21637 unsigned IntNo;
21638 switch (BuiltinID) {
21639 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:
21640 IntNo = Intrinsic::wasm_relaxed_trunc_signed;
21641 break;
21642 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:
21643 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;
21644 break;
21645 case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:
21646 IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;
21647 break;
21648 case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:
21649 IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;
21650 break;
21651 default:
21652 llvm_unreachable("unexpected builtin ID");
21653 }
21654 Function *Callee = CGM.getIntrinsic(IntNo);
21655 return Builder.CreateCall(Callee, {Vec});
21656 }
21657 case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {
21658 Value *LHS = EmitScalarExpr(E->getArg(0));
21659 Value *RHS = EmitScalarExpr(E->getArg(1));
21660 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);
21661 return Builder.CreateCall(Callee, {LHS, RHS});
21662 }
21663 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {
21664 Value *LHS = EmitScalarExpr(E->getArg(0));
21665 Value *RHS = EmitScalarExpr(E->getArg(1));
21666 Function *Callee =
21667 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
21668 return Builder.CreateCall(Callee, {LHS, RHS});
21669 }
21670 case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {
21671 Value *LHS = EmitScalarExpr(E->getArg(0));
21672 Value *RHS = EmitScalarExpr(E->getArg(1));
21673 Value *Acc = EmitScalarExpr(E->getArg(2));
21674 Function *Callee =
21675 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
21676 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21677 }
21678 case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {
21679 Value *LHS = EmitScalarExpr(E->getArg(0));
21680 Value *RHS = EmitScalarExpr(E->getArg(1));
21681 Value *Acc = EmitScalarExpr(E->getArg(2));
21682 Function *Callee =
21683 CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);
21684 return Builder.CreateCall(Callee, {LHS, RHS, Acc});
21685 }
21686 case WebAssembly::BI__builtin_wasm_loadf16_f32: {
21687 Value *Addr = EmitScalarExpr(E->getArg(0));
21688 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);
21689 return Builder.CreateCall(Callee, {Addr});
21690 }
21691 case WebAssembly::BI__builtin_wasm_storef16_f32: {
21692 Value *Val = EmitScalarExpr(E->getArg(0));
21693 Value *Addr = EmitScalarExpr(E->getArg(1));
21694 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);
21695 return Builder.CreateCall(Callee, {Val, Addr});
21696 }
21697 case WebAssembly::BI__builtin_wasm_splat_f16x8: {
21698 Value *Val = EmitScalarExpr(E->getArg(0));
21699 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);
21700 return Builder.CreateCall(Callee, {Val});
21701 }
21702 case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {
21703 Value *Vector = EmitScalarExpr(E->getArg(0));
21704 Value *Index = EmitScalarExpr(E->getArg(1));
21705 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);
21706 return Builder.CreateCall(Callee, {Vector, Index});
21707 }
21708 case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: {
21709 Value *Vector = EmitScalarExpr(E->getArg(0));
21710 Value *Index = EmitScalarExpr(E->getArg(1));
21711 Value *Val = EmitScalarExpr(E->getArg(2));
21712 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8);
21713 return Builder.CreateCall(Callee, {Vector, Index, Val});
21714 }
21715 case WebAssembly::BI__builtin_wasm_table_get: {
21716 assert(E->getArg(0)->getType()->isArrayType());
21717 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21718 Value *Index = EmitScalarExpr(E->getArg(1));
21721 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);
21722 else if (E->getType().isWebAssemblyFuncrefType())
21723 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);
21724 else
21725 llvm_unreachable(
21726 "Unexpected reference type for __builtin_wasm_table_get");
21727 return Builder.CreateCall(Callee, {Table, Index});
21728 }
21729 case WebAssembly::BI__builtin_wasm_table_set: {
21730 assert(E->getArg(0)->getType()->isArrayType());
21731 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21732 Value *Index = EmitScalarExpr(E->getArg(1));
21733 Value *Val = EmitScalarExpr(E->getArg(2));
21735 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21736 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);
21737 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21738 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);
21739 else
21740 llvm_unreachable(
21741 "Unexpected reference type for __builtin_wasm_table_set");
21742 return Builder.CreateCall(Callee, {Table, Index, Val});
21743 }
21744 case WebAssembly::BI__builtin_wasm_table_size: {
21745 assert(E->getArg(0)->getType()->isArrayType());
21746 Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21747 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);
21748 return Builder.CreateCall(Callee, Value);
21749 }
21750 case WebAssembly::BI__builtin_wasm_table_grow: {
21751 assert(E->getArg(0)->getType()->isArrayType());
21752 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21753 Value *Val = EmitScalarExpr(E->getArg(1));
21754 Value *NElems = EmitScalarExpr(E->getArg(2));
21755
21757 if (E->getArg(1)->getType().isWebAssemblyExternrefType())
21758 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);
21759 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21760 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21761 else
21762 llvm_unreachable(
21763 "Unexpected reference type for __builtin_wasm_table_grow");
21764
21765 return Builder.CreateCall(Callee, {Table, Val, NElems});
21766 }
21767 case WebAssembly::BI__builtin_wasm_table_fill: {
21768 assert(E->getArg(0)->getType()->isArrayType());
21769 Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21770 Value *Index = EmitScalarExpr(E->getArg(1));
21771 Value *Val = EmitScalarExpr(E->getArg(2));
21772 Value *NElems = EmitScalarExpr(E->getArg(3));
21773
21775 if (E->getArg(2)->getType().isWebAssemblyExternrefType())
21776 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);
21777 else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())
21778 Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);
21779 else
21780 llvm_unreachable(
21781 "Unexpected reference type for __builtin_wasm_table_fill");
21782
21783 return Builder.CreateCall(Callee, {Table, Index, Val, NElems});
21784 }
21785 case WebAssembly::BI__builtin_wasm_table_copy: {
21786 assert(E->getArg(0)->getType()->isArrayType());
21787 Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);
21788 Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);
21789 Value *DstIdx = EmitScalarExpr(E->getArg(2));
21790 Value *SrcIdx = EmitScalarExpr(E->getArg(3));
21791 Value *NElems = EmitScalarExpr(E->getArg(4));
21792
21793 Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);
21794
21795 return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});
21796 }
21797 default:
21798 return nullptr;
21799 }
21800}
21801
21802static std::pair<Intrinsic::ID, unsigned>
21804 struct Info {
21805 unsigned BuiltinID;
21806 Intrinsic::ID IntrinsicID;
21807 unsigned VecLen;
21808 };
21809 static Info Infos[] = {
21810#define CUSTOM_BUILTIN_MAPPING(x,s) \
21811 { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },
21812 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)
21813 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)
21814 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)
21815 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)
21816 CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)
21817 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)
21818 CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)
21819 CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)
21820 CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)
21821 CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)
21822 CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)
21823 CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)
21824 CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)
21825 CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)
21826 CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)
21827 CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)
21828 CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)
21829 CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)
21830 CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)
21831 CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)
21832 CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)
21833 CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)
21834 // Legacy builtins that take a vector in place of a vector predicate.
21835 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)
21836 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)
21837 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)
21838 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)
21839 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)
21840 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)
21841 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)
21842 CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)
21843#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"
21844#undef CUSTOM_BUILTIN_MAPPING
21845 };
21846
21847 auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };
21848 static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);
21849 (void)SortOnce;
21850
21851 const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);
21852 if (F == std::end(Infos) || F->BuiltinID != BuiltinID)
21853 return {Intrinsic::not_intrinsic, 0};
21854
21855 return {F->IntrinsicID, F->VecLen};
21856}
21857
21859 const CallExpr *E) {
21860 Intrinsic::ID ID;
21861 unsigned VecLen;
21862 std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);
21863
21864 auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {
21865 // The base pointer is passed by address, so it needs to be loaded.
21866 Address A = EmitPointerWithAlignment(E->getArg(0));
21868 llvm::Value *Base = Builder.CreateLoad(BP);
21869 // The treatment of both loads and stores is the same: the arguments for
21870 // the builtin are the same as the arguments for the intrinsic.
21871 // Load:
21872 // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)
21873 // builtin(Base, Mod, Start) -> intr(Base, Mod, Start)
21874 // Store:
21875 // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)
21876 // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)
21878 for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)
21879 Ops.push_back(EmitScalarExpr(E->getArg(i)));
21880
21881 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
21882 // The load intrinsics generate two results (Value, NewBase), stores
21883 // generate one (NewBase). The new base address needs to be stored.
21884 llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)
21885 : Result;
21886 llvm::Value *LV = EmitScalarExpr(E->getArg(0));
21887 Address Dest = EmitPointerWithAlignment(E->getArg(0));
21888 llvm::Value *RetVal =
21889 Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
21890 if (IsLoad)
21891 RetVal = Builder.CreateExtractValue(Result, 0);
21892 return RetVal;
21893 };
21894
21895 // Handle the conversion of bit-reverse load intrinsics to bit code.
21896 // The intrinsic call after this function only reads from memory and the
21897 // write to memory is dealt by the store instruction.
21898 auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {
21899 // The intrinsic generates one result, which is the new value for the base
21900 // pointer. It needs to be returned. The result of the load instruction is
21901 // passed to intrinsic by address, so the value needs to be stored.
21902 llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));
21903
21904 // Expressions like &(*pt++) will be incremented per evaluation.
21905 // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
21906 // per call.
21907 Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
21908 DestAddr = DestAddr.withElementType(Int8Ty);
21909 llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);
21910
21911 // Operands are Base, Dest, Modifier.
21912 // The intrinsic format in LLVM IR is defined as
21913 // { ValueType, i8* } (i8*, i32).
21914 llvm::Value *Result = Builder.CreateCall(
21915 CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});
21916
21917 // The value needs to be stored as the variable is passed by reference.
21918 llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
21919
21920 // The store needs to be truncated to fit the destination type.
21921 // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
21922 // to be handled with stores of respective destination type.
21923 DestVal = Builder.CreateTrunc(DestVal, DestTy);
21924
21925 Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());
21926 // The updated value of the base pointer is returned.
21927 return Builder.CreateExtractValue(Result, 1);
21928 };
21929
21930 auto V2Q = [this, VecLen] (llvm::Value *Vec) {
21931 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B
21932 : Intrinsic::hexagon_V6_vandvrt;
21933 return Builder.CreateCall(CGM.getIntrinsic(ID),
21934 {Vec, Builder.getInt32(-1)});
21935 };
21936 auto Q2V = [this, VecLen] (llvm::Value *Pred) {
21937 Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B
21938 : Intrinsic::hexagon_V6_vandqrt;
21939 return Builder.CreateCall(CGM.getIntrinsic(ID),
21940 {Pred, Builder.getInt32(-1)});
21941 };
21942
21943 switch (BuiltinID) {
21944 // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,
21945 // and the corresponding C/C++ builtins use loads/stores to update
21946 // the predicate.
21947 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
21948 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:
21949 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
21950 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
21951 // Get the type from the 0-th argument.
21952 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21953 Address PredAddr =
21954 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21955 llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));
21956 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21957 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});
21958
21959 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21960 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21961 PredAddr.getAlignment());
21962 return Builder.CreateExtractValue(Result, 0);
21963 }
21964 // These are identical to the builtins above, except they don't consume
21965 // input carry, only generate carry-out. Since they still produce two
21966 // outputs, generate the store of the predicate, but no load.
21967 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:
21968 case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:
21969 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:
21970 case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {
21971 // Get the type from the 0-th argument.
21972 llvm::Type *VecType = ConvertType(E->getArg(0)->getType());
21973 Address PredAddr =
21974 EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);
21975 llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),
21976 {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});
21977
21978 llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);
21979 Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),
21980 PredAddr.getAlignment());
21981 return Builder.CreateExtractValue(Result, 0);
21982 }
21983
21984 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:
21985 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:
21986 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:
21987 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:
21988 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:
21989 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:
21990 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:
21991 case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {
21993 const Expr *PredOp = E->getArg(0);
21994 // There will be an implicit cast to a boolean vector. Strip it.
21995 if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {
21996 if (Cast->getCastKind() == CK_BitCast)
21997 PredOp = Cast->getSubExpr();
21998 Ops.push_back(V2Q(EmitScalarExpr(PredOp)));
21999 }
22000 for (int i = 1, e = E->getNumArgs(); i != e; ++i)
22001 Ops.push_back(EmitScalarExpr(E->getArg(i)));
22002 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
22003 }
22004
22005 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
22006 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
22007 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
22008 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
22009 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
22010 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
22011 case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
22012 case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
22013 case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
22014 case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
22015 case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
22016 case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
22017 return MakeCircOp(ID, /*IsLoad=*/true);
22018 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
22019 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
22020 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
22021 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
22022 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
22023 case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
22024 case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
22025 case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
22026 case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
22027 case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
22028 return MakeCircOp(ID, /*IsLoad=*/false);
22029 case Hexagon::BI__builtin_brev_ldub:
22030 return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
22031 case Hexagon::BI__builtin_brev_ldb:
22032 return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
22033 case Hexagon::BI__builtin_brev_lduh:
22034 return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
22035 case Hexagon::BI__builtin_brev_ldh:
22036 return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
22037 case Hexagon::BI__builtin_brev_ldw:
22038 return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
22039 case Hexagon::BI__builtin_brev_ldd:
22040 return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
22041 } // switch
22042
22043 return nullptr;
22044}
22045
22047 const CallExpr *E,
22048 ReturnValueSlot ReturnValue) {
22049
22050 if (BuiltinID == Builtin::BI__builtin_cpu_supports)
22051 return EmitRISCVCpuSupports(E);
22052 if (BuiltinID == Builtin::BI__builtin_cpu_init)
22053 return EmitRISCVCpuInit();
22054
22056 llvm::Type *ResultType = ConvertType(E->getType());
22057
22058 // Find out if any arguments are required to be integer constant expressions.
22059 unsigned ICEArguments = 0;
22061 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
22062 if (Error == ASTContext::GE_Missing_type) {
22063 // Vector intrinsics don't have a type string.
22064 assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&
22065 BuiltinID <= clang::RISCV::LastRVVBuiltin);
22066 ICEArguments = 0;
22067 if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||
22068 BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)
22069 ICEArguments = 1 << 1;
22070 } else {
22071 assert(Error == ASTContext::GE_None && "Unexpected error");
22072 }
22073
22074 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)
22075 ICEArguments |= (1 << 1);
22076 if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)
22077 ICEArguments |= (1 << 2);
22078
22079 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
22080 // Handle aggregate argument, namely RVV tuple types in segment load/store
22081 if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {
22082 LValue L = EmitAggExprToLValue(E->getArg(i));
22083 llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());
22084 Ops.push_back(AggValue);
22085 continue;
22086 }
22087 Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
22088 }
22089
22090 Intrinsic::ID ID = Intrinsic::not_intrinsic;
22091 unsigned NF = 1;
22092 // The 0th bit simulates the `vta` of RVV
22093 // The 1st bit simulates the `vma` of RVV
22094 constexpr unsigned RVV_VTA = 0x1;
22095 constexpr unsigned RVV_VMA = 0x2;
22096 int PolicyAttrs = 0;
22097 bool IsMasked = false;
22098
22099 // Required for overloaded intrinsics.
22101 switch (BuiltinID) {
22102 default: llvm_unreachable("unexpected builtin ID");
22103 case RISCV::BI__builtin_riscv_orc_b_32:
22104 case RISCV::BI__builtin_riscv_orc_b_64:
22105 case RISCV::BI__builtin_riscv_clz_32:
22106 case RISCV::BI__builtin_riscv_clz_64:
22107 case RISCV::BI__builtin_riscv_ctz_32:
22108 case RISCV::BI__builtin_riscv_ctz_64:
22109 case RISCV::BI__builtin_riscv_clmul_32:
22110 case RISCV::BI__builtin_riscv_clmul_64:
22111 case RISCV::BI__builtin_riscv_clmulh_32:
22112 case RISCV::BI__builtin_riscv_clmulh_64:
22113 case RISCV::BI__builtin_riscv_clmulr_32:
22114 case RISCV::BI__builtin_riscv_clmulr_64:
22115 case RISCV::BI__builtin_riscv_xperm4_32:
22116 case RISCV::BI__builtin_riscv_xperm4_64:
22117 case RISCV::BI__builtin_riscv_xperm8_32:
22118 case RISCV::BI__builtin_riscv_xperm8_64:
22119 case RISCV::BI__builtin_riscv_brev8_32:
22120 case RISCV::BI__builtin_riscv_brev8_64:
22121 case RISCV::BI__builtin_riscv_zip_32:
22122 case RISCV::BI__builtin_riscv_unzip_32: {
22123 switch (BuiltinID) {
22124 default: llvm_unreachable("unexpected builtin ID");
22125 // Zbb
22126 case RISCV::BI__builtin_riscv_orc_b_32:
22127 case RISCV::BI__builtin_riscv_orc_b_64:
22128 ID = Intrinsic::riscv_orc_b;
22129 break;
22130 case RISCV::BI__builtin_riscv_clz_32:
22131 case RISCV::BI__builtin_riscv_clz_64: {
22132 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
22133 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
22134 if (Result->getType() != ResultType)
22135 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
22136 "cast");
22137 return Result;
22138 }
22139 case RISCV::BI__builtin_riscv_ctz_32:
22140 case RISCV::BI__builtin_riscv_ctz_64: {
22141 Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
22142 Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
22143 if (Result->getType() != ResultType)
22144 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
22145 "cast");
22146 return Result;
22147 }
22148
22149 // Zbc
22150 case RISCV::BI__builtin_riscv_clmul_32:
22151 case RISCV::BI__builtin_riscv_clmul_64:
22152 ID = Intrinsic::riscv_clmul;
22153 break;
22154 case RISCV::BI__builtin_riscv_clmulh_32:
22155 case RISCV::BI__builtin_riscv_clmulh_64:
22156 ID = Intrinsic::riscv_clmulh;
22157 break;
22158 case RISCV::BI__builtin_riscv_clmulr_32:
22159 case RISCV::BI__builtin_riscv_clmulr_64:
22160 ID = Intrinsic::riscv_clmulr;
22161 break;
22162
22163 // Zbkx
22164 case RISCV::BI__builtin_riscv_xperm8_32:
22165 case RISCV::BI__builtin_riscv_xperm8_64:
22166 ID = Intrinsic::riscv_xperm8;
22167 break;
22168 case RISCV::BI__builtin_riscv_xperm4_32:
22169 case RISCV::BI__builtin_riscv_xperm4_64:
22170 ID = Intrinsic::riscv_xperm4;
22171 break;
22172
22173 // Zbkb
22174 case RISCV::BI__builtin_riscv_brev8_32:
22175 case RISCV::BI__builtin_riscv_brev8_64:
22176 ID = Intrinsic::riscv_brev8;
22177 break;
22178 case RISCV::BI__builtin_riscv_zip_32:
22179 ID = Intrinsic::riscv_zip;
22180 break;
22181 case RISCV::BI__builtin_riscv_unzip_32:
22182 ID = Intrinsic::riscv_unzip;
22183 break;
22184 }
22185
22186 IntrinsicTypes = {ResultType};
22187 break;
22188 }
22189
22190 // Zk builtins
22191
22192 // Zknh
22193 case RISCV::BI__builtin_riscv_sha256sig0:
22194 ID = Intrinsic::riscv_sha256sig0;
22195 break;
22196 case RISCV::BI__builtin_riscv_sha256sig1:
22197 ID = Intrinsic::riscv_sha256sig1;
22198 break;
22199 case RISCV::BI__builtin_riscv_sha256sum0:
22200 ID = Intrinsic::riscv_sha256sum0;
22201 break;
22202 case RISCV::BI__builtin_riscv_sha256sum1:
22203 ID = Intrinsic::riscv_sha256sum1;
22204 break;
22205
22206 // Zksed
22207 case RISCV::BI__builtin_riscv_sm4ks:
22208 ID = Intrinsic::riscv_sm4ks;
22209 break;
22210 case RISCV::BI__builtin_riscv_sm4ed:
22211 ID = Intrinsic::riscv_sm4ed;
22212 break;
22213
22214 // Zksh
22215 case RISCV::BI__builtin_riscv_sm3p0:
22216 ID = Intrinsic::riscv_sm3p0;
22217 break;
22218 case RISCV::BI__builtin_riscv_sm3p1:
22219 ID = Intrinsic::riscv_sm3p1;
22220 break;
22221
22222 // Zihintntl
22223 case RISCV::BI__builtin_riscv_ntl_load: {
22224 llvm::Type *ResTy = ConvertType(E->getType());
22225 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22226 if (Ops.size() == 2)
22227 DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();
22228
22229 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22231 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22232 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22233 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22234
22235 int Width;
22236 if(ResTy->isScalableTy()) {
22237 const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);
22238 llvm::Type *ScalarTy = ResTy->getScalarType();
22239 Width = ScalarTy->getPrimitiveSizeInBits() *
22240 SVTy->getElementCount().getKnownMinValue();
22241 } else
22242 Width = ResTy->getPrimitiveSizeInBits();
22243 LoadInst *Load = Builder.CreateLoad(
22244 Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));
22245
22246 Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22247 Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22248 RISCVDomainNode);
22249
22250 return Load;
22251 }
22252 case RISCV::BI__builtin_riscv_ntl_store: {
22253 unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL
22254 if (Ops.size() == 3)
22255 DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();
22256
22257 llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(
22259 llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));
22260 llvm::MDNode *NontemporalNode = llvm::MDNode::get(
22261 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
22262
22263 StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
22264 Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);
22265 Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),
22266 RISCVDomainNode);
22267
22268 return Store;
22269 }
22270
22271 // Vector builtins are handled from here.
22272#include "clang/Basic/riscv_vector_builtin_cg.inc"
22273 // SiFive Vector builtins are handled from here.
22274#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
22275 }
22276
22277 assert(ID != Intrinsic::not_intrinsic);
22278
22279 llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
22280 return Builder.CreateCall(F, Ops, "");
22281}
Defines the clang::ASTContext interface.
#define V(N, I)
Definition: ASTContext.h:3341
DynTypedNode Node
StringRef P
#define PPC_LNX_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN)
static constexpr SparcCPUInfo CPUInfo[]
Definition: Sparc.cpp:67
#define X86_CPU_SUBTYPE(ENUM, STR)
#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS)
#define X86_VENDOR(ENUM, STRING)
#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)
#define X86_CPU_TYPE(ENUM, STR)
static constexpr Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:32
static void Accumulate(SMap &SM, CFGBlock *B)
Definition: CFGStmtMap.cpp:49
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg="")
Definition: CGBuiltin.cpp:8476
static llvm::Value * ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType)
Definition: CGBuiltin.cpp:9312
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node.
Definition: CGBuiltin.cpp:214
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:1191
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6502
static Value * EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering)
Definition: CGBuiltin.cpp:393
#define INTRINSIC_X86_XSAVE_ID(NAME)
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:2057
static Value * EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow)
Definition: CGBuiltin.cpp:2023
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:6371
static Value * tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V)
Definition: CGBuiltin.cpp:2514
static llvm::Value * MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned)
Definition: CGBuiltin.cpp:9282
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p,...
Definition: CGBuiltin.cpp:801
static llvm::Value * SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned)
Definition: CGBuiltin.cpp:9275
static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[]
Definition: CGBuiltin.cpp:7517
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:9502
#define MMA_VARIANTS_B1_AND(geom, type)
static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl< Value * > &Ops)
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7529
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsCompress)
static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[]
Definition: CGBuiltin.cpp:7499
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:8544
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl * > &Seen)
Definition: CGBuiltin.cpp:2392
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:446
static std::pair< Intrinsic::ID, unsigned > getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID)
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:739
#define MMA_INTR(geom_op_type, layout)
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value * > Ops)
static Value * emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:496
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:6498
static bool AArch64SVEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7530
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:1389
static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef< ARMVectorIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:7534
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef< Value * > Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
static Value * loadRISCVFeatureBits(unsigned Index, CGBuilderTy &Builder, CodeGenModule &CGM)
#define MUTATE_LDBL(func)
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static unsigned CountCountedByAttrs(const RecordDecl *RD)
Definition: CGBuiltin.cpp:861
static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:614
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:2420
static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl< Value * > &Ops)
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:470
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:769
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context)
Definition: CGBuiltin.cpp:9271
#define MMA_VARIANTS(geom, type)
static bool AArch64SMEIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7531
static llvm::Value * VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1)
Definition: CGBuiltin.cpp:9349
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6256
constexpr unsigned SVEBitsPerBlock
Definition: CGBuiltin.cpp:9786
static std::optional< CodeGenFunction::MSVCIntrin > translateX86ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1774
static const std::pair< unsigned, unsigned > NEONEquivalentIntrinsicMap[]
Definition: CGBuiltin.cpp:7341
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:6495
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Align Alignment)
static Value * emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:513
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node,...
Definition: CGBuiltin.cpp:266
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value * > Ops)
static llvm::Value * ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth)
Definition: CGBuiltin.cpp:9376
#define MMA_SATF_VARIANTS(geom, type)
static std::optional< CodeGenFunction::MSVCIntrin > translateAarch64ToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1620
static std::optional< CodeGenFunction::MSVCIntrin > translateArmToMsvcIntrin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:1474
Value * emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name="")
Definition: CGBuiltin.cpp:590
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:1252
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6507
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:660
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to @llvm.fabs().
Definition: CGBuiltin.cpp:651
#define CUSTOM_BUILTIN_MAPPING(x, s)
static Value * EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef< Value * > Ops, llvm::Type *DstTy)
static bool isSpecialUnsignedMultiplySignedResult(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2250
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:809
static llvm::Value * emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:1328
static llvm::Value * VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd)
Definition: CGBuiltin.cpp:9338
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
static Value * emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:550
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:757
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:2304
static llvm::ScalableVectorType * getSVEVectorForElementType(llvm::Type *EltTy)
Definition: CGBuiltin.cpp:9788
static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID)
Definition: CGBuiltin.cpp:2443
#define INTRINSIC_WITH_CC(NAME)
static llvm::FixedVectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false, bool AllowBFloatArgsAndRet=true)
Definition: CGBuiltin.cpp:6330
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:257
static llvm::Value * ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT)
Definition: CGBuiltin.cpp:9364
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value * > Ops, bool InvertLHS=false)
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:185
static Value * EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi)
Definition: CGBuiltin.cpp:8455
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes)
Definition: CGBuiltin.cpp:75
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
SpecialRegisterAccessKind
Definition: CGBuiltin.cpp:8447
@ VolatileRead
Definition: CGBuiltin.cpp:8449
@ NormalRead
Definition: CGBuiltin.cpp:8448
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft's _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:351
static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:196
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, unsigned BuiltinID, bool IsAddSub)
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:2292
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:311
static Value * emitBinaryExpMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID)
Definition: CGBuiltin.cpp:530
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size.
Definition: CGBuiltin.cpp:174
static llvm::Value * ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V)
Definition: CGBuiltin.cpp:9304
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value * > Ops)
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7526
static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:6826
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:238
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:724
@ UnsignedAlts
Definition: CGBuiltin.cpp:6465
@ Vectorize1ArgType
Definition: CGBuiltin.cpp:6470
@ FpCmpzModifiers
Definition: CGBuiltin.cpp:6474
@ Use64BitVectors
Definition: CGBuiltin.cpp:6467
@ VectorizeArgTypes
Definition: CGBuiltin.cpp:6462
@ VectorRetGetArgs01
Definition: CGBuiltin.cpp:6472
@ InventFloatType
Definition: CGBuiltin.cpp:6464
@ AddRetType
Definition: CGBuiltin.cpp:6457
@ Add2ArgTypes
Definition: CGBuiltin.cpp:6459
@ VectorizeRetType
Definition: CGBuiltin.cpp:6461
@ VectorRet
Definition: CGBuiltin.cpp:6471
@ Add1ArgType
Definition: CGBuiltin.cpp:6458
@ Use128BitVectors
Definition: CGBuiltin.cpp:6468
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:7592
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:602
#define MMA_LDST(n, geom_op_type)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value * > Ops, bool IsSigned)
static Value * emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID)
Definition: CGBuiltin.cpp:632
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:482
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD)
Definition: CGBuiltin.cpp:2525
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:1201
static RValue EmitCheckedUnsignedMultiplySignedResult(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Definition: CGBuiltin.cpp:2258
static Value * emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef< Value * > Args)
Definition: CGBuiltin.cpp:569
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:689
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:249
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value * > Ops)
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:1237
#define MMA_VARIANTS_B1_XOR(geom, type)
#define MMA_VARIANTS_I4(geom, type)
Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount)
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value * > Ops, bool IsSigned)
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:8374
static Value * EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW)
Definition: CGBuiltin.cpp:2017
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:457
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:7528
static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:7101
CodeGenFunction::ComplexPairTy ComplexPairTy
const Decl * D
Expr * E
const Environment & Env
Definition: HTMLLogger.cpp:148
unsigned Iter
Definition: HTMLLogger.cpp:154
#define ALIAS(NAME, TOK, FLAGS)
#define X(type, name)
Definition: Value.h:143
llvm::MachO::Record Record
Definition: MachO.h:31
static std::string getName(const CallEvent &Call)
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
SourceRange Range
Definition: SemaObjC.cpp:758
SourceLocation Loc
Definition: SemaObjC.cpp:759
static QualType getPointeeType(const MemRegion *R)
Enumerates target-specific builtins in their own namespaces within namespace clang.
Defines the clang::TargetOptions class.
SourceLocation Begin
__DEVICE__ float modf(float __x, float *__iptr)
__DEVICE__ double nan(const char *)
__device__ int
__device__ __2f16 float __ockl_bool s
APSInt & getInt()
Definition: APValue.h:423
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
unsigned getIntWidth(QualType T) const
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
CanQualType VoidPtrTy
Definition: ASTContext.h:1146
IdentifierTable & Idents
Definition: ASTContext.h:660
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:662
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType IntTy
Definition: ASTContext.h:1128
QualType getObjCIdType() const
Represents the Objective-CC id type.
Definition: ASTContext.h:2117
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2675
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2394
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1119
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:779
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
unsigned getTargetAddressSpace(LangAS AS) const
@ GE_None
No error.
Definition: ASTContext.h:2296
@ GE_Missing_type
Missing a type.
Definition: ASTContext.h:2299
ASTRecordLayout - This class contains layout information for one RecordDecl, which is a struct/union/...
Definition: RecordLayout.h:38
CharUnits getSize() const
getSize - Get the record size in characters.
Definition: RecordLayout.h:193
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
Definition: RecordLayout.h:200
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3566
QualType getElementType() const
Definition: Type.h:3578
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e....
Definition: Builtins.h:149
llvm::StringRef getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:103
bool isConstWithoutErrnoAndExceptions(unsigned ID) const
Return true if this function has no side effects and doesn't read memory, except for possibly errno o...
Definition: Builtins.h:247
bool isConstWithoutExceptions(unsigned ID) const
Definition: Builtins.h:251
bool isConst(unsigned ID) const
Return true if this function has no side effects and doesn't read memory.
Definition: Builtins.h:122
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2830
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:58
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
XRayInstrSet XRayInstrumentationBundle
Set of XRay instrumentation kinds to emit.
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
An aggregate value slot.
Definition: CGValue.h:504
Address getAddress() const
Definition: CGValue.h:644
A scoped helper to set the current debug location to the specified location or preferred location of ...
Definition: CGDebugInfo.h:855
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:895
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:912
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:135
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:142
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:291
llvm::CallInst * CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:387
llvm::CallInst * CreateMemCpyInline(Address Dest, Address Src, uint64_t Size)
Definition: CGBuilder.h:379
llvm::AtomicRMWInst * CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val, llvm::AtomicOrdering Ordering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:179
llvm::CallInst * CreateMemSetInline(Address Dest, llvm::Value *Value, uint64_t Size)
Definition: CGBuilder.h:403
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:150
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:396
llvm::AtomicCmpXchgInst * CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New, llvm::AtomicOrdering SuccessOrdering, llvm::AtomicOrdering FailureOrdering, llvm::SyncScope::ID SSID=llvm::SyncScope::System)
Definition: CGBuilder.h:168
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:107
Address CreateConstByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:314
Address CreateLaunderInvariantGroup(Address Addr)
Definition: CGBuilder.h:435
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:363
llvm::LoadInst * CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:127
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:188
Address CreateConstInBoundsGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:260
Address CreateInBoundsGEP(Address Addr, ArrayRef< llvm::Value * > IdxList, llvm::Type *ElementType, CharUnits Align, const Twine &Name="")
Definition: CGBuilder.h:344
virtual std::string getDeviceSideName(const NamedDecl *ND)=0
Returns function or variable name on device side even if the current compilation is for host.
virtual llvm::GlobalVariable * getThrowInfo(QualType T)
Definition: CGCXXABI.h:259
All available information about a concrete callee.
Definition: CGCall.h:63
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:137
llvm::DIType * getOrCreateStandaloneType(QualType Ty, SourceLocation Loc)
Emit standalone debug info for a type.
llvm::DILocation * CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg)
Create a debug location from TrapLocation that adds an artificial inline frame where the frame name i...
CGFunctionInfo - Class to encapsulate the information about a function definition.
virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, llvm::Value *Size)=0
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E)
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:274
void add(RValue rvalue, QualType type)
Definition: CGCall.h:298
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation.
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
llvm::Value * EmitLifetimeStart(llvm::TypeSize Size, llvm::Value *Addr)
std::pair< RValue, llvm::Value * > EmitAtomicCompareExchange(LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, llvm::AtomicOrdering Success=llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering Failure=llvm::AtomicOrdering::SequentiallyConsistent, bool IsWeak=false, AggValueSlot Slot=AggValueSlot::ignored())
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
llvm::Value * EmitSVEPredicateCast(llvm::Value *Pred, llvm::ScalableVectorType *VTy)
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified.
SanitizerSet SanOpts
Sanitizers enabled for this function.
RValue EmitBuiltinIsAligned(const CallExpr *E)
Emit IR for __builtin_is_aligned.
LValue EmitAggExprToLValue(const Expr *E)
EmitAggExprToLValue - Emit the computation of the specified expression of aggregate type into a tempo...
void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum)
Create a check for a function parameter that may potentially be declared as non-null.
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void GetAArch64SVEProcessedOperands(unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, SVETypeFlags TypeFlags)
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr)
void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
llvm::Value * EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
CleanupKind getARCCleanupKind()
Retrieves the default cleanup kind for an ARC cleanup.
llvm::Value * EmitRISCVCpuSupports(const CallExpr *E)
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm....
llvm::Value * EmitSVEMaskedStore(const CallExpr *, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitSVEReinterpret(llvm::Value *Val, llvm::Type *Ty)
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
llvm::Value * EmitSEHExceptionInfo()
RValue EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp)
Emit IR for __builtin_align_up/__builtin_align_down.
const LangOptions & getLangOpts() const
llvm::Value * EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, const FieldDecl *CountDecl)
Build an expression accessing the "counted_by" field.
void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID)
llvm::Constant * EmitCheckTypeDescriptor(QualType T)
Emit a description of a type in a format suitable for passing to a runtime sanitizer handler.
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
void EmitTrapCheck(llvm::Value *Checked, SanitizerHandler CheckHandlerID)
Create a basic block that will call the trap intrinsic, and emit a conditional branch to it,...
void EmitUnreachable(SourceLocation Loc)
Emit a reached-unreachable diagnostic if Loc is valid and runtime checking is enabled.
llvm::Value * EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, bool IsMustTail, SourceLocation Loc, bool IsVirtualFunctionPointerThunk=false)
EmitCall - Generate a call of the given function, expecting the given result type,...
Address makeNaturalAddressForPointer(llvm::Value *Ptr, QualType T, CharUnits Alignment=CharUnits::Zero(), bool ForPointeeType=false, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
Construct an address with the natural alignment of T.
ComplexPairTy EmitComplexExpr(const Expr *E, bool IgnoreReal=false, bool IgnoreImag=false)
EmitComplexExpr - Emit the computation of the specified expression of complex type,...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
TypeCheckKind
Situations in which we might emit a check for the suitability of a pointer or glvalue.
@ TCK_Store
Checking the destination of a store. Must be suitably sized and aligned.
@ TCK_Load
Checking the operand of a load. Must be suitably sized and aligned.
llvm::Value * EmitSMELdrStr(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * FormSVEBuiltinResult(llvm::Value *Call)
FormSVEBuiltinResult - Returns the struct of scalable vectors as a wider vector.
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx, const llvm::ElementCount &Count)
llvm::Type * ConvertTypeForMem(QualType T)
llvm::Value * EmitSVEMaskedLoad(const CallExpr *, llvm::Type *ReturnTy, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID, bool IsZExtReturn)
bool AlwaysEmitXRayCustomEvents() const
AlwaysEmitXRayCustomEvents - Return true if we must unconditionally emit XRay custom event handling c...
llvm::Value * EmitSVEDupX(llvm::Value *Scalar)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
@ Default
! No language constraints on evaluation order.
const TargetInfo & getTarget() const
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Value * EmitSEHExceptionCode()
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
EmitTargetBuiltinExpr - Emit the given builtin call.
RValue EmitAnyExprToTemp(const Expr *E)
EmitAnyExprToTemp - Similarly to EmitAnyExpr(), however, the result will always be accessible even if...
RValue EmitCoroutineIntrinsic(const CallExpr *E, unsigned int IID)
llvm::Value * EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E)
Address EmitArrayToPointerDecay(const Expr *Array, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
void EmitCheck(ArrayRef< std::pair< llvm::Value *, SanitizerMask > > Checked, SanitizerHandler Check, ArrayRef< llvm::Constant * > StaticArgs, ArrayRef< llvm::Value * > DynamicArgs)
Create a basic block that will either trap or call a handler function in the UBSan runtime with the p...
RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type, const CallExpr *TheCallExpr, bool IsDelete)
llvm::Value * EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
AggValueSlot CreateAggTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateAggTemp - Create a temporary memory object for the given aggregate type.
llvm::ScalableVectorType * getSVEType(const SVETypeFlags &TypeFlags)
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
llvm::Constant * EmitCheckSourceLocation(SourceLocation Loc)
Emit a description of a source location in a format suitable for passing to a runtime sanitizer handl...
void ErrorUnsupported(const Stmt *S, const char *Type)
ErrorUnsupported - Print out an error that codegen doesn't support the specified stmt yet.
const FieldDecl * FindFlexibleArrayMemberFieldAndOffset(ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset)
Address EmitVAListRef(const Expr *E)
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
llvm::Value * EmitSVEMovl(const SVETypeFlags &TypeFlags, llvm::ArrayRef< llvm::Value * > Ops, unsigned BuiltinID)
void emitAlignmentAssumption(llvm::Value *PtrValue, QualType Ty, SourceLocation Loc, SourceLocation AssumptionLoc, llvm::Value *Alignment, llvm::Value *OffsetValue=nullptr)
const TargetCodeGenInfo & getTargetHooks() const
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::Type * getEltType(const SVETypeFlags &TypeFlags)
void EmitAggExpr(const Expr *E, AggValueSlot AS)
EmitAggExpr - Emit the computation of the specified expression of aggregate type.
bool ShouldXRayInstrumentFunction() const
ShouldXRayInstrument - Return true if the current function should be instrumented with XRay nop sleds...
llvm::Value * EmitSVEPMull(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned BuiltinID)
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation.
bool IsInPreservedAIRegion
True if CodeGen currently emits code inside presereved access index region.
llvm::Value * EmitARCRetain(QualType type, llvm::Value *value)
bool AlwaysEmitXRayTypedEvents() const
AlwaysEmitXRayTypedEvents - Return true if clang must unconditionally emit XRay typed event handling ...
void SetSqrtFPAccuracy(llvm::Value *Val)
Set the minimum required accuracy of the given sqrt operation based on CodeGenOpts.
llvm::Value * EmitSVEScatterStore(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
CGCallee EmitCallee(const Expr *E)
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::Value * EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E)
void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl)
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value * > args, const Twine &name="")
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::Value * EmitSMEReadWrite(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc, LValue LV, QualType Type, SanitizerSet SkippedChecks=SanitizerSet(), llvm::Value *ArraySize=nullptr)
llvm::Value * EmitSMELd1St1(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static Destroyer destroyARCStrongPrecise
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E)
llvm::Value * EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address EmitMSVAListRef(const Expr *E)
Emit a "reference" to a __builtin_ms_va_list; this is always the value of the expression,...
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
SmallVector< llvm::Type *, 2 > getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ReturnType, ArrayRef< llvm::Value * > Ops)
static bool hasAggregateEvaluationKind(QualType T)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
llvm::Value * EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch)
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
llvm::Value * EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
llvm::Value * EmitSEHAbnormalTermination()
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
llvm::Value * EmitSVEAllTruePred(const SVETypeFlags &TypeFlags)
RValue GetUndefRValue(QualType Ty)
GetUndefRValue - Get an appropriate 'undef' rvalue for the given type.
llvm::Type * SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags)
SVEBuiltinMemEltTy - Returns the memory element type for this memory access builtin.
llvm::LLVMContext & getLLVMContext()
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E)
llvm::Value * EmitSMEZero(const SVETypeFlags &TypeFlags, llvm::SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
llvm::Value * EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
llvm::Value * EmitAnnotationCall(llvm::Function *AnnotationFn, llvm::Value *AnnotatedVal, StringRef AnnotationStr, SourceLocation Location, const AnnotateAttr *Attr)
Emit an annotation call (intrinsic).
llvm::ScalableVectorType * getSVEPredType(const SVETypeFlags &TypeFlags)
llvm::Value * EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl< llvm::Value * > &Ops, unsigned IntID)
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
llvm::Value * EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
This class organizes the cross-function state that is used while generating LLVM code.
CGHLSLRuntime & getHLSLRuntime()
Return a reference to the configured HLSL runtime.
llvm::Module & getModule() const
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false, bool AssumeConvergent=false)
Create or return a runtime function declaration with the specified type and name.
llvm::Constant * getRawFunctionPointer(GlobalDecl GD, llvm::Type *Ty=nullptr)
Return a function pointer for a reference to the given function.
Definition: CGExpr.cpp:2879
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:100
DiagnosticsEngine & getDiags() const
void ErrorUnsupported(const Stmt *S, const char *Type)
Print out an error that codegen doesn't support the specified stmt yet.
const LangOptions & getLangOpts() const
CGCUDARuntime & getCUDARuntime()
Return a reference to the configured CUDA runtime.
CGOpenCLRuntime & getOpenCLRuntime()
Return a reference to the configured OpenCL runtime.
const TargetInfo & getTarget() const
const llvm::DataLayout & getDataLayout() const
void Error(SourceLocation loc, StringRef error)
Emit a general error that something can't be done.
CGCXXABI & getCXXABI() const
llvm::Constant * GetFunctionStart(const ValueDecl *Decl)
const llvm::Triple & getTriple() const
void DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo)
DecorateInstructionWithTBAA - Decorate the instruction with a TBAA tag.
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
TBAAAccessInfo getTBAAAccessInfo(QualType AccessType)
getTBAAAccessInfo - Get TBAA information that describes an access to an object of the given type.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
llvm::LLVMContext & getLLVMContext()
CGObjCRuntime & getObjCRuntime()
Return a reference to the configured Objective-C runtime.
void SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk)
Set the LLVM function attributes (sext, zext, etc).
void SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F)
Set the LLVM function attributes which only apply to a function definition.
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=std::nullopt)
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating '\0' character.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1606
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:679
const CGFunctionInfo & arrangeBuiltinFunctionCall(QualType resultType, const CallArgList &args)
Definition: CGCall.cpp:667
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:368
LValue - This represents an lvalue references.
Definition: CGValue.h:182
llvm::Value * getRawBitFieldPointer(CodeGenFunction &CGF) const
Definition: CGValue.h:419
llvm::Value * getPointer(CodeGenFunction &CGF) const
Address getAddress() const
Definition: CGValue.h:361
void setNontemporal(bool Value)
Definition: CGValue.h:319
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue getIgnored()
Definition: CGValue.h:93
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getAggregate(Address addr, bool isVolatile=false)
Convert an Address to an RValue.
Definition: CGValue.h:125
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
ReturnValueSlot - Contains the address where the return value of a function can be stored,...
Definition: CGCall.h:372
virtual llvm::Value * encodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert the address of an instruction into a return address ...
Definition: TargetInfo.h:152
virtual llvm::Value * decodeReturnAddress(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const
Performs the code-generation required to convert a return address as stored by the system into the ac...
Definition: TargetInfo.h:142
const T & getABIInfo() const
Definition: TargetInfo.h:57
virtual int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const
Determines the DWARF register number for the stack pointer, for exception-handling purposes.
Definition: TargetInfo.h:124
virtual llvm::Value * testFPKind(llvm::Value *V, unsigned BuiltinID, CGBuilderTy &Builder, CodeGenModule &CGM) const
Performs a target specific test of a floating point value for things like IsNaN, Infinity,...
Definition: TargetInfo.h:161
Complex values, per C99 6.2.5p11.
Definition: Type.h:3134
Represents a concrete matrix type with constant number of rows and columns.
Definition: Type.h:4219
RecordDecl * getOuterLexicalRecordContext()
Retrieve the outermost lexically enclosing record context.
Definition: DeclBase.cpp:2014
T * getAttr() const
Definition: DeclBase.h:580
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
Definition: DeclBase.h:600
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:249
DeclContext * getDeclContext()
Definition: DeclBase.h:455
static bool isFlexibleArrayMemberLike(ASTContext &Context, const Decl *D, QualType Ty, LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel, bool IgnoreTemplateOrMacroSubstitution)
Whether it resembles a flexible array member.
Definition: DeclBase.cpp:435
bool hasAttr() const
Definition: DeclBase.h:584
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:192
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
This represents one expression.
Definition: Expr.h:110
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer,...
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3075
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3070
bool EvaluateAsFloat(llvm::APFloat &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsFloat - Return true if this is a constant which we can fold and convert to a floating point...
Expr * IgnoreParens() LLVM_READONLY
Skip past any parentheses which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3066
bool isPRValue() const
Definition: Expr.h:278
@ NPC_ValueDependentIsNotNull
Specifies that a value-dependent expression should be considered to never be a null pointer constant.
Definition: Expr.h:830
ExprObjectKind getObjectKind() const
getObjectKind - The object kind that this expression produces.
Definition: Expr.h:444
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3567
std::optional< std::string > tryEvaluateString(ASTContext &Ctx) const
If the current Expr can be evaluated to a pointer to a null-terminated constant string,...
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point.
Definition: Expr.cpp:3050
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant.
Definition: Expr.cpp:3941
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
const ValueDecl * getAsBuiltinConstantDeclRef(const ASTContext &Context) const
If this expression is an unambiguous reference to a single declaration, in the style of __builtin_fun...
Definition: Expr.cpp:226
Represents difference between two FPOptions values.
Definition: LangOptions.h:947
Represents a member of a struct/union/class.
Definition: Decl.h:3030
const FieldDecl * findCountedByField() const
Find the FieldDecl specified in a FAM's "counted_by" attribute.
Definition: Decl.cpp:4681
Represents a function declaration or definition.
Definition: Decl.h:1932
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2669
unsigned getBuiltinID(bool ConsiderWrapperFunctions=false) const
Returns a value indicating whether this function corresponds to a builtin function.
Definition: Decl.cpp:3618
Represents a prototype with parameter type info, e.g.
Definition: Type.h:5002
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5378
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
std::string getNameAsString() const
Get a human-readable name for the declaration, even if it is one of the special kinds of names (C++ c...
Definition: Decl.h:292
Flags to identify the types for overloaded Neon builtins.
EltType getEltType() const
PipeType - OpenCL20.
Definition: Type.h:7599
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3187
QualType getPointeeType() const
Definition: Type.h:3197
A (possibly-)qualified type.
Definition: Type.h:941
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:7834
bool isWebAssemblyFuncrefType() const
Returns true if it is a WebAssembly Funcref Type.
Definition: Type.cpp:2848
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:7876
bool isWebAssemblyExternrefType() const
Returns true if it is a WebAssembly Externref Type.
Definition: Type.cpp:2844
The collection of all-type qualifiers we support.
Definition: Type.h:319
Represents a struct/union/class.
Definition: Decl.h:4145
field_range fields() const
Definition: Decl.h:4351
Flags to identify the types for overloaded SVE builtins.
bool isZExtReturn() const
bool isReverseUSDOT() const
bool isOverloadNone() const
bool isUndef() const
MemEltType getMemEltType() const
bool isWriteZA() const
bool isGatherLoad() const
bool isOverloadCvt() const
EltType getEltType() const
bool isOverloadDefault() const
bool isPrefetch() const
bool isOverloadWhileRW() const
bool isReadZA() const
bool isTupleSet() const
bool isReverseMergeAnyAccOp() const
bool isReductionQV() const
bool isTupleGet() const
bool isInsertOp1SVALL() const
bool isAppendSVALL() const
bool isReverseMergeAnyBinOp() const
bool isStructStore() const
bool isTupleCreate() const
bool isGatherPrefetch() const
bool hasSplatOperand() const
MergeType getMergeType() const
bool isByteIndexed() const
bool isStructLoad() const
bool isOverloadWhileOrMultiVecCvt() const
unsigned getSplatOperand() const
bool isStore() const
bool isScatterStore() const
bool isReverseCompare() const
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
bool isUnion() const
Definition: Decl.h:3767
Exposes information about the current target.
Definition: TargetInfo.h:218
TargetOptions & getTargetOpts() const
Retrieve the target options.
Definition: TargetInfo.h:312
virtual bool hasLegalHalfType() const
Determine whether _Float16 is supported on this target.
Definition: TargetInfo.h:687
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:1256
bool isLittleEndian() const
Definition: TargetInfo.h:1666
unsigned getMaxOpenCLWorkGroupSize() const
Definition: TargetInfo.h:851
bool isBigEndian() const
Definition: TargetInfo.h:1665
virtual bool checkArithmeticFenceSupported() const
Controls if __arithmetic_fence is supported in the targeted backend.
Definition: TargetInfo.h:1672
unsigned getSuitableAlign() const
Return the alignment that is the largest alignment ever used for any scalar/SIMD data type on the tar...
Definition: TargetInfo.h:723
virtual std::string_view getClobbers() const =0
Returns a string of target-specific clobbers, in LLVM format.
llvm::CodeObjectVersionKind CodeObjectVersion
Code object version for AMDGPU.
Definition: TargetOptions.h:82
The base class of the type hierarchy.
Definition: Type.h:1829
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1882
bool isBlockPointerType() const
Definition: Type.h:8017
bool isVoidType() const
Definition: Type.h:8319
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2146
bool isComplexType() const
isComplexType() does not include complex integers (a GCC extension).
Definition: Type.cpp:677
bool isArrayType() const
Definition: Type.h:8075
bool isCountAttributedType() const
Definition: Type.cpp:694
bool isPointerType() const
Definition: Type.h:8003
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8359
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8607
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:8434
bool hasUnsignedIntegerRepresentation() const
Determine whether this type has an unsigned integer representation of some sort, e....
Definition: Type.cpp:2236
bool hasSignedIntegerRepresentation() const
Determine whether this type has an signed integer representation of some sort, e.g....
Definition: Type.cpp:2186
bool isObjCObjectPointerType() const
Definition: Type.h:8145
bool hasFloatingRepresentation() const
Determine whether this type has a floating-point representation of some sort, e.g....
Definition: Type.cpp:2258
bool isVectorType() const
Definition: Type.h:8115
bool isFloatingType() const
Definition: Type.cpp:2249
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2196
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8540
bool isRecordType() const
Definition: Type.h:8103
bool isSizelessVectorType() const
Returns true for all scalable vector types.
Definition: Type.cpp:2479
QualType getSizelessVectorEltType(const ASTContext &Ctx) const
Returns the representative type for the element of a sizeless vector builtin type.
Definition: Type.cpp:2536
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1886
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:667
QualType getType() const
Definition: Decl.h:678
QualType getType() const
Definition: Value.cpp:234
Represents a GCC generic vector type.
Definition: Type.h:4021
unsigned getNumElements() const
Definition: Type.h:4036
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
unsigned char getSummaryByte() const
Definition: OSLog.h:139
Defines the clang::TargetInfo interface.
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:15
TypeEvaluationKind
The kind of evaluation to perform on values of a particular type.
@ EHCleanup
Denotes a cleanup that should run when a scope is exited using exceptional control flow (a throw stat...
Definition: EHScopeStack.h:80
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:42
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:41
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:181
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
llvm::APFloat APFloat
Definition: Floating.h:23
llvm::APInt APInt
Definition: Integral.h:29
bool Dup(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1145
bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.h:276
bool Zero(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2243
bool Mul(InterpState &S, CodePtr OpPC)
Definition: Interp.h:408
bool Neg(InterpState &S, CodePtr OpPC)
Definition: Interp.h:671
bool Load(InterpState &S, CodePtr OpPC)
Definition: Interp.h:1666
bool Cast(InterpState &S, CodePtr OpPC)
Definition: Interp.h:2052
The JSON file list parser is used to communicate input to InstallAPI.
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ DType
'dtype' clause, an alias for 'device_type', stored separately for diagnostic purposes.
@ OK_BitField
A bitfield object is a bitfield on a C or C++ record.
Definition: Specifiers.h:154
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
@ Asm
Assembly: we accept this only so that we can preprocess it.
@ Result
The result type of a method or function.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
const FunctionProtoType * T
@ Success
Template argument deduction was successful.
@ Other
Other implicit parameter.
unsigned long uint64_t
long int64_t
int int32_t
unsigned int uint32_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define true
Definition: stdbool.h:25
llvm::PointerType * ConstGlobalsPtrTy
void* in the address space for constant globals
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::Type * HalfTy
half, bfloat, float, double
llvm::IntegerType * IntTy
int
llvm::PointerType * AllocaInt8PtrTy
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:644
void clear(SanitizerMask K=SanitizerKind::All)
Disable the sanitizers specified in K.
Definition: Sanitizers.h:176
void set(SanitizerMask K, bool Value)
Enable or disable a certain (single) sanitizer.
Definition: Sanitizers.h:168
bool has(SanitizerMask K) const
Check if a certain (single) sanitizer is enabled.
Definition: Sanitizers.h:159
bool has(XRayInstrMask K) const
Definition: XRayInstr.h:48
#define sinh(__x)
Definition: tgmath.h:373
#define asin(__x)
Definition: tgmath.h:112
#define scalbln(__x, __y)
Definition: tgmath.h:1182
#define sqrt(__x)
Definition: tgmath.h:520
#define acos(__x)
Definition: tgmath.h:83
#define fmin(__x, __y)
Definition: tgmath.h:780
#define exp(__x)
Definition: tgmath.h:431
#define ilogb(__x)
Definition: tgmath.h:851
#define copysign(__x, __y)
Definition: tgmath.h:618
#define erf(__x)
Definition: tgmath.h:636
#define atanh(__x)
Definition: tgmath.h:228
#define remquo(__x, __y, __z)
Definition: tgmath.h:1111
#define nextafter(__x, __y)
Definition: tgmath.h:1055
#define frexp(__x, __y)
Definition: tgmath.h:816
#define asinh(__x)
Definition: tgmath.h:199
#define erfc(__x)
Definition: tgmath.h:653
#define atan2(__x, __y)
Definition: tgmath.h:566
#define nexttoward(__x, __y)
Definition: tgmath.h:1073
#define hypot(__x, __y)
Definition: tgmath.h:833
#define exp2(__x)
Definition: tgmath.h:670
#define sin(__x)
Definition: tgmath.h:286
#define cbrt(__x)
Definition: tgmath.h:584
#define log2(__x)
Definition: tgmath.h:970
#define llround(__x)
Definition: tgmath.h:919
#define cosh(__x)
Definition: tgmath.h:344
#define trunc(__x)
Definition: tgmath.h:1216
#define fmax(__x, __y)
Definition: tgmath.h:762
#define ldexp(__x, __y)
Definition: tgmath.h:868
#define acosh(__x)
Definition: tgmath.h:170
#define tgamma(__x)
Definition: tgmath.h:1199
#define scalbn(__x, __y)
Definition: tgmath.h:1165
#define round(__x)
Definition: tgmath.h:1148
#define fmod(__x, __y)
Definition: tgmath.h:798
#define llrint(__x)
Definition: tgmath.h:902
#define tan(__x)
Definition: tgmath.h:315
#define cos(__x)
Definition: tgmath.h:257
#define log10(__x)
Definition: tgmath.h:936
#define fabs(__x)
Definition: tgmath.h:549
#define pow(__x, __y)
Definition: tgmath.h:490
#define log1p(__x)
Definition: tgmath.h:953
#define rint(__x)
Definition: tgmath.h:1131
#define expm1(__x)
Definition: tgmath.h:687
#define remainder(__x, __y)
Definition: tgmath.h:1090
#define fdim(__x, __y)
Definition: tgmath.h:704
#define lgamma(__x)
Definition: tgmath.h:885
#define tanh(__x)
Definition: tgmath.h:402
#define lrint(__x)
Definition: tgmath.h:1004
#define atan(__x)
Definition: tgmath.h:141
#define floor(__x)
Definition: tgmath.h:722
#define ceil(__x)
Definition: tgmath.h:601
#define log(__x)
Definition: tgmath.h:460
#define logb(__x)
Definition: tgmath.h:987
#define nearbyint(__x)
Definition: tgmath.h:1038
#define lround(__x)
Definition: tgmath.h:1021
#define fma(__x, __y, __z)
Definition: tgmath.h:742