-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[LV] Add extra check for signed overflow for SDiv/SRem #170818
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-risc-v Author: Shih-Po Hung (arcbbb) ChangesThis change complements the zero-divisor guard with a dividend guard for the sdiv/srem overflow case (INT_MIN / −1). Fixes #170775 Full diff: https://github.com/llvm/llvm-project/pull/170818.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f78acf31a0de2..c4844496ebea7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2856,6 +2856,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// having at least one active lane (the first). If the side-effects of the
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
// - it will cause the same side-effects as when masked.
+ auto MayOverflow = [&](Instruction *I) {
+ auto *LHS = I->getOperand(0);
+ auto *RHS = I->getOperand(1);
+ return !this->Legal->isInvariant(LHS) &&
+ (!Legal->isInvariant(RHS) ||
+ (llvm::isa<llvm::ConstantInt>(RHS) &&
+ llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+ };
switch(I->getOpcode()) {
default:
llvm_unreachable(
@@ -2878,11 +2886,13 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
}
case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
case Instruction::URem:
// If the divisor is loop-invariant no predication is needed.
return !Legal->isInvariant(I->getOperand(1));
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ // Extra check for signed overflow.
+ return !Legal->isInvariant(I->getOperand(1)) || MayOverflow(I);
}
}
@@ -7866,12 +7876,30 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
// If not provably safe, use a select to form a safe divisor before widening the
// div/rem operation itself. Otherwise fall through to general handling below.
if (CM.isPredicatedInst(I)) {
+ auto MayOverflow = [&](Instruction *I) {
+ Value *LHS = I->getOperand(0);
+ Value *RHS = I->getOperand(1);
+ return !Legal->isInvariant(LHS) &&
+ (!Legal->isInvariant(RHS) ||
+ (llvm::isa<llvm::ConstantInt>(RHS) &&
+ llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+ };
SmallVector<VPValue *> Ops(VPI->operands());
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
VPValue *One = Plan.getConstantInt(I->getType(), 1u);
auto *SafeRHS =
Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc());
Ops[1] = SafeRHS;
+ if (VPI->getOpcode() == Instruction::SDiv ||
+ VPI->getOpcode() == Instruction::SRem) {
+ if (MayOverflow(I)) {
+ VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
+ VPValue *Zero = Plan.getConstantInt(I->getType(), 0);
+ auto *SafeLHS =
+ Builder.createSelect(Mask, Ops[0], Zero, VPI->getDebugLoc());
+ Ops[0] = SafeLHS;
+ }
+ }
return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc());
}
[[fallthrough]];
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index e4ba6fe9d757d..de235a85349d7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -575,7 +575,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 -128)
; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 1), i32 [[TMP12]])
-; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], [[TMP10]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[WIDE_LOAD]], <vscale x 16 x i8> zeroinitializer, i32 [[TMP12]])
+; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[TMP4]], [[TMP10]]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[WIDE_LOAD]]
; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[PREDPHI]], ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
@@ -599,7 +600,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128)
; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
-; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]]
+; FIXED-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[WIDE_LOAD1]], <32 x i8> zeroinitializer
+; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[TMP3]], [[TMP7]]
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP9]], <32 x i8> [[WIDE_LOAD1]]
; FIXED-NEXT: store <32 x i8> [[PREDPHI2]], ptr [[TMP1]], align 1
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
index 8cd540c3888db..9d4274a559f5f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
@@ -23,7 +23,8 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[TMP4]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
@@ -214,7 +215,8 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT: [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT: [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[TMP4]], [[TMP11]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
|
| // No predicate if RHS have no poison in masked-off lanes and not -1. | ||
| if (isa<SCEVAddRecExpr>(RHSSC) && !MayBeNegOne) | ||
| if (Legal->isInductionVariable(RHS) && !MayBeNegOne) | ||
| return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will we return false if the RHS is an IV that can be zero at some iteration? Is there a test that covers this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As shown in https://godbolt.org/z/oc6WKh9aT
I think it is ok to return false which is consistent with non-tail-folded loops.
Only the mask-off lanes matter here.
| auto *LHS = I->getOperand(0); | ||
| auto *RHS = I->getOperand(1); | ||
| ScalarEvolution &SE = *PSE.getSE(); | ||
| auto *LHSSC = SE.getSCEV(LHS); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there some guarantee in this code that LHS and RHS are of scevable type?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think SDiv/SRem guarantees that LHS and RHS are integer types.
| case Instruction::SRem: | ||
| case Instruction::URem: | ||
| // If the divisor is loop-invariant no predication is needed. | ||
| return !Legal->isInvariant(I->getOperand(1)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Meta comment: I think your change might be trying to do too much. It looks like you're trying to handle some of the loop varying cases where the udiv case doesn't. My suggestion would be something like the following:
if (!Legal->isInvariant(I->getOperand(1)))
return true;
return RHSCouldBeMinusOne && LHSCouldBeSignedMin;
In particular, I think the poison comments may be a red herring as the udiv case doesn't ensure the invariant RHS is non-poison either.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Meta comment: I think your change might be trying to do too much. It looks like you're trying to handle some of the loop varying cases where the udiv case doesn't. My suggestion would be something like the following:
if (!Legal->isInvariant(I->getOperand(1))) return true; return RHSCouldBeMinusOne && LHSCouldBeSignedMin;In particular, I think the poison comments may be a red herring as the udiv case doesn't ensure the invariant RHS is non-poison either.
My understanding: we check the SCEV range only when no lanes can be poison.
If any poison is present, the range is considered full.
Based on this, udiv case needs to ensure the invariant RHS is non-poison, otherwise zero is possible.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on yesterday’s discussion, I’ve updated this to predicate conservatively.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't the conservative to always predicate them?
| case Instruction::SRem: | ||
| case Instruction::URem: | ||
| // If the divisor is loop-invariant no predication is needed. | ||
| return !Legal->isInvariant(I->getOperand(1)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't the conservative to always predicate them?
| SE.getSignedRangeMin(SE.getSCEV(LHS)).isMinSignedValue(); | ||
| if (!Legal->isInvariant(RHS)) | ||
| return true; | ||
| return RHSCouldBeMinusOne && LHSCouldBeSignedMin; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is still a bit difficult to follow the logic here. IIUC it should be: if either operand is loop-varying, it could be poison in the masked off lanes. If that is the case we cannot rely on the range check, as the original input will never be poison, but the masked off lanes will be. So I think we should only be able to use the ranges for loop-invariant values.
The most conservative solution would be to always treat as predicated; Does the more complicated logic help in practice?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Induction-variable scenarios aren’t covered yet with the more complicated logic.
I'll stick with the safest option until I can address it properly.
Add a check for the SDiv/SRem overflow case (INT_MIN / -1).
SDiv/SRem must be predicated if the RHS have poisons in masked-off lanes.
They must also be predicated when RHS could be −1 and LHS have masked-off lanes to avoid signed‑division overflow.
Fixes #170775.