diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2021-04-11 20:06:53 +0100 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2021-04-11 20:07:09 +0100 |
commit | 231b87618bb61b24674d060721f7004057da9336 (patch) | |
tree | a5ec1071f2809445cb2a77b3c562ead63025dc57 | |
parent | [WebAssembly] Update v128.any_true (diff) | |
download | llvm-project-231b87618bb61b24674d060721f7004057da9336.tar.gz llvm-project-231b87618bb61b24674d060721f7004057da9336.tar.bz2 llvm-project-231b87618bb61b24674d060721f7004057da9336.zip |
[X86][AVX512] Fold not(kmov(x)) -> kmov(not(x)) and not(widen_subvector(x)) -> widen_subvector(not(x))
Improve AVX512 mask inversion, rG38c799bce801 exposed some missing opportunities to move scalar not() back onto the boolvector types for folding with setcc etc.
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/movmsk-cmp.ll | 31 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-reduce-and-bool.ll | 45 |
3 files changed, 48 insertions, 50 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0214745f88f3..4ecd9f86322b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46988,6 +46988,28 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; + // Fold not(iX bitcast(vXi1)) -> (iX bitcast(not(vec))) for legal boolvecs. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (llvm::isAllOnesConstant(N1) && N0.getOpcode() == ISD::BITCAST && + N0.getOperand(0).getValueType().isVector() && + N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && + TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) { + return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0), + N0.getOperand(0).getValueType())); + } + + // Handle AVX512 mask widening. + // Fold not(insert_subvector(undef,sub)) -> insert_subvector(undef,not(sub)) + if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() && + VT.getVectorElementType() == MVT::i1 && + N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() && + TLI.isTypeLegal(N0.getOperand(1).getValueType())) { + return DAG.getNode( + ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), + DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()), + N0.getOperand(2)); + } + // Fold xor(zext(xor(x,c1)),c2) -> xor(zext(x),xor(zext(c1),c2)) // Fold xor(truncate(xor(x,c1)),c2) -> xor(truncate(x),xor(truncate(c1),c2)) // TODO: Under what circumstances could this be performed in DAGCombine? diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 3d0b7cd50fcf..30aa8cacd3cd 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -1017,9 +1017,8 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) { ; KNL-LABEL: allzeros_v2i64_not: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $3, %al ; KNL-NEXT: setne %al ; KNL-NEXT: vzeroupper @@ -1838,9 +1837,8 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) { ; KNL-LABEL: allones_v4i32_and1: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 +; KNL-NEXT: vptestnmd {{.*}}(%rip){1to16}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $15, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -2154,9 +2152,8 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) { ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1] -; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -2252,9 +2249,8 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) { ; KNL-LABEL: allones_v4i64_and1: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0 +; KNL-NEXT: vptestnmq {{.*}}(%rip){1to8}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $15, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -3159,9 +3155,8 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) { ; KNL-LABEL: allones_v4i32_and4: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0 +; KNL-NEXT: vptestnmd {{.*}}(%rip){1to16}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $15, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -3475,9 +3470,8 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) { ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4] -; KNL-NEXT: vptestmq %zmm1, %zmm0, %k0 +; KNL-NEXT: vptestnmq %zmm1, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -3573,9 +3567,8 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) { ; KNL-LABEL: allones_v4i64_and4: ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; KNL-NEXT: vptestmq {{.*}}(%rip){1to8}, %zmm0, %k0 +; KNL-NEXT: vptestnmq {{.*}}(%rip){1to8}, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $15, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -4018,9 +4011,8 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) { ; KNL: # %bb.0: ; KNL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $-109, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -4029,8 +4021,8 @@ define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SKX-LABEL: movmsk_v8i16: ; SKX: # %bb.0: ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 +; SKX-NEXT: knotb %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: notb %al ; SKX-NEXT: testb $-109, %al ; SKX-NEXT: sete %al ; SKX-NEXT: retq @@ -4136,9 +4128,8 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) { ; KNL: # %bb.0: ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 +; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper @@ -4278,8 +4269,8 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) { ; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; KNL-NEXT: vcmplepd %zmm0, %zmm1, %k0 +; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: notb %al ; KNL-NEXT: testb $3, %al ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll index 4e0410f97346..50aad826e730 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll @@ -31,9 +31,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512F-LABEL: trunc_v2i64_v2i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: notb %al ; AVX512F-NEXT: testb $3, %al ; AVX512F-NEXT: sete %al ; AVX512F-NEXT: vzeroupper @@ -42,9 +41,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512BW-LABEL: trunc_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: testb $3, %al ; AVX512BW-NEXT: sete %al ; AVX512BW-NEXT: vzeroupper @@ -53,9 +51,8 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) { ; AVX512VL-LABEL: trunc_v2i64_v2i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0 +; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: testb $3, %al ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq @@ -84,9 +81,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512F-LABEL: trunc_v4i32_v4i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: notb %al ; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: sete %al ; AVX512F-NEXT: vzeroupper @@ -95,9 +91,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512BW-LABEL: trunc_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: sete %al ; AVX512BW-NEXT: vzeroupper @@ -106,9 +101,8 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) { ; AVX512VL-LABEL: trunc_v4i32_v4i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0 +; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq @@ -244,9 +238,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512F-LABEL: trunc_v4i64_v4i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsllq $63, %ymm0, %ymm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: notb %al ; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: sete %al ; AVX512F-NEXT: vzeroupper @@ -255,9 +248,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512BW-LABEL: trunc_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllq $63, %ymm0, %ymm0 -; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: sete %al ; AVX512BW-NEXT: vzeroupper @@ -266,9 +258,8 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) { ; AVX512VL-LABEL: trunc_v4i64_v4i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0 -; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0 +; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0 ; AVX512VL-NEXT: kmovd %k0, %eax -; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: testb $15, %al ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: vzeroupper @@ -875,9 +866,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512F-LABEL: icmp_v2i64_v2i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: notb %al ; AVX512F-NEXT: testb $3, %al ; AVX512F-NEXT: sete %al ; AVX512F-NEXT: vzeroupper @@ -886,9 +876,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; AVX512BW-LABEL: icmp_v2i64_v2i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: testb $3, %al ; AVX512BW-NEXT: sete %al ; AVX512BW-NEXT: vzeroupper @@ -931,9 +920,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512F-LABEL: icmp_v4i32_v4i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: notb %al ; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: sete %al ; AVX512F-NEXT: vzeroupper @@ -942,9 +930,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) { ; AVX512BW-LABEL: icmp_v4i32_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: sete %al ; AVX512BW-NEXT: vzeroupper @@ -1110,9 +1097,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512F-LABEL: icmp_v4i64_v4i1: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: notb %al ; AVX512F-NEXT: testb $15, %al ; AVX512F-NEXT: sete %al ; AVX512F-NEXT: vzeroupper @@ -1121,9 +1107,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; AVX512BW-LABEL: icmp_v4i64_v4i1: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax -; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: testb $15, %al ; AVX512BW-NEXT: sete %al ; AVX512BW-NEXT: vzeroupper |