aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Sherwood <david.sherwood@arm.com>2020-10-13 15:57:08 +0100
committerTom Stellard <tstellar@redhat.com>2020-12-08 16:30:15 -0500
commita60b9f1bf32026995080d28f7be33f2658da191e (patch)
treee32cfb933544ebf9261a86fafa2e92de70530941
parentFix unwind info relocation with large code model on AArch64 (diff)
downloadllvm-project-a60b9f1bf32026995080d28f7be33f2658da191e.tar.gz
llvm-project-a60b9f1bf32026995080d28f7be33f2658da191e.tar.bz2
llvm-project-a60b9f1bf32026995080d28f7be33f2658da191e.zip
Add fatal error when running out of registers for SVE tuple call arguments
When passing SVE types as arguments to function calls we can run out of hardware SVE registers. This is normally fine, since we switch to an indirect mode where we pass a pointer to a SVE stack object in a GPR. However, if we switch over part-way through processing a SVE tuple then part of it will be in registers and the other part will be on the stack. This is wrong and we'd like to avoid any silent ABI compatibility issues in future. For now, I've added a fatal error when this happens until we can get a proper fix. NOTE: Cherry-pick contains changes to remove redundant operand from min/max tests in 'llvm-ir-to-intrinsic.ll', which weren't originally part of this patch since they were removed in D85142 before this landed, but they fail otherwise. Differential Revision: https://reviews.llvm.org/D89326 (cherry picked from commit af57a0838eba528c2e5bd805d92c611435fca0d8)
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp9
-rw-r--r--llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll58
-rw-r--r--llvm/test/CodeGen/AArch64/sve-calling-convention-tuples-broken.ll23
4 files changed, 69 insertions, 30 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
index 84ec5afcc9c1..9ae2b465e247 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -35,6 +35,9 @@ static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
AArch64::Q3, AArch64::Q4, AArch64::Q5,
AArch64::Q6, AArch64::Q7};
+static const MCPhysReg ZRegList[] = {AArch64::Z0, AArch64::Z1, AArch64::Z2,
+ AArch64::Z3, AArch64::Z4, AArch64::Z5,
+ AArch64::Z6, AArch64::Z7};
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
@@ -97,6 +100,8 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
RegList = DRegList;
else if (LocVT.SimpleTy == MVT::f128 || LocVT.is128BitVector())
RegList = QRegList;
+ else if (LocVT.isScalableVector())
+ RegList = ZRegList;
else {
// Not an array we want to split up after all.
return false;
@@ -141,6 +146,10 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}
+ if (LocVT.isScalableVector())
+ report_fatal_error(
+ "Passing consecutive scalable vector registers unsupported");
+
// Mark all regs in the class as unavailable
for (auto Reg : RegList)
State.AllocateReg(Reg);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 45bfa85bdc07..74a6c28fd32a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14702,7 +14702,14 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
- return Ty->isArrayTy();
+ if (Ty->isArrayTy())
+ return true;
+
+ const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
+ if (TySize.isScalable() && TySize.getKnownMinSize() > 128)
+ return true;
+
+ return false;
}
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
index 816465f9eaa1..2bf95d1e61fb 100644
--- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
+++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
@@ -182,7 +182,7 @@ define <vscale x 2 x i64> @urem_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
; SMIN
;
-define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: smin_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
@@ -193,7 +193,7 @@ define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
ret <vscale x 16 x i8> %min
}
-define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: smin_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -204,7 +204,7 @@ define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
ret <vscale x 8 x i16> %min
}
-define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: smin_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -215,7 +215,7 @@ define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
ret <vscale x 4 x i32> %min
}
-define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: smin_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
@@ -226,7 +226,7 @@ define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
ret <vscale x 2 x i64> %min
}
-define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c) {
+define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
; CHECK-LABEL: smin_split_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
@@ -238,7 +238,7 @@ define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i
ret <vscale x 32 x i8> %min
}
-define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c) {
+define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
; CHECK-LABEL: smin_split_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -252,7 +252,7 @@ define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32
ret <vscale x 32 x i16> %min
}
-define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
+define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
; CHECK-LABEL: smin_split_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -264,7 +264,7 @@ define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i
ret <vscale x 8 x i32> %min
}
-define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
+define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
; CHECK-LABEL: smin_split_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
@@ -276,7 +276,7 @@ define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i
ret <vscale x 4 x i64> %min
}
-define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
+define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-LABEL: smin_promote_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -289,7 +289,7 @@ define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8
ret <vscale x 8 x i8> %min
}
-define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
+define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
; CHECK-LABEL: smin_promote_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -302,7 +302,7 @@ define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x
ret <vscale x 4 x i16> %min
}
-define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
+define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
; CHECK-LABEL: smin_promote_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
@@ -319,7 +319,7 @@ define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x
; UMIN
;
-define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: umin_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
@@ -330,7 +330,7 @@ define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
ret <vscale x 16 x i8> %min
}
-define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: umin_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -341,7 +341,7 @@ define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
ret <vscale x 8 x i16> %min
}
-define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: umin_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -352,7 +352,7 @@ define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
ret <vscale x 4 x i32> %min
}
-define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: umin_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
@@ -363,7 +363,7 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
ret <vscale x 2 x i64> %min
}
-define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
+define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
; CHECK-LABEL: umin_split_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
@@ -375,7 +375,7 @@ define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i
ret <vscale x 4 x i64> %min
}
-define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
+define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
; CHECK-LABEL: umin_promote_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -392,7 +392,7 @@ define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8
; SMAX
;
-define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: smax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
@@ -403,7 +403,7 @@ define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
ret <vscale x 16 x i8> %max
}
-define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: smax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -414,7 +414,7 @@ define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
ret <vscale x 8 x i16> %max
}
-define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: smax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -425,7 +425,7 @@ define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
ret <vscale x 4 x i32> %max
}
-define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: smax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
@@ -436,7 +436,7 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
ret <vscale x 2 x i64> %max
}
-define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
+define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
; CHECK-LABEL: smax_split_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -448,7 +448,7 @@ define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i
ret <vscale x 8 x i32> %max
}
-define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
+define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
; CHECK-LABEL: smax_promote_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -465,7 +465,7 @@ define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x
; UMAX
;
-define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: umax_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
@@ -476,7 +476,7 @@ define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
ret <vscale x 16 x i8> %max
}
-define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: umax_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -487,7 +487,7 @@ define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
ret <vscale x 8 x i16> %max
}
-define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: umax_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
@@ -498,7 +498,7 @@ define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
ret <vscale x 4 x i32> %max
}
-define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: umax_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
@@ -509,7 +509,7 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
ret <vscale x 2 x i64> %max
}
-define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c) {
+define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b) {
; CHECK-LABEL: umax_split_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
@@ -521,7 +521,7 @@ define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16
ret <vscale x 16 x i16> %max
}
-define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
+define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
; CHECK-LABEL: umax_promote_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuples-broken.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuples-broken.ll
new file mode 100644
index 000000000000..ee88f0b460ed
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuples-broken.ll
@@ -0,0 +1,23 @@
+; RUN: not --crash llc < %s -mtriple aarch64-linux-gnu -mattr=+sve >/dev/null 2>%t
+; RUN: FileCheck %s < %t
+
+; CHECK: Passing consecutive scalable vector registers unsupported
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define float @foo(double* %x0, double* %x1) {
+entry:
+ %0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %1 = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
+ %2 = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1(<vscale x 2 x i1> %1, double* %x0)
+ %3 = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1(<vscale x 2 x i1> %1, double* %x1)
+ %call = call float @callee(float 1.000000e+00, <vscale x 8 x double> %2, <vscale x 8 x double> %3)
+ ret float %call
+}
+
+declare float @callee(float, <vscale x 8 x double>, <vscale x 8 x double>)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
+declare <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1(<vscale x 2 x i1>, double*)