[LV] Add extra check for signed overflow for SDiv/SRem #170818

arcbbb · 2025-12-05T08:58:18Z

Add a check for the SDiv/SRem overflow case (INT_MIN / -1).

SDiv/SRem must be predicated if the RHS have poisons in masked-off lanes.
They must also be predicated when RHS could be −1 and LHS have masked-off lanes to avoid signed‑division overflow.

Fixes #170775.

llvmbot · 2025-12-05T08:58:51Z

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Shih-Po Hung (arcbbb)

Changes

This change complements the zero-divisor guard with a dividend guard for the sdiv/srem overflow case (INT_MIN / −1).

Fixes #170775

Full diff: https://github.com/llvm/llvm-project/pull/170818.diff

3 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+30-2)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll (+4-2)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll (+4-2)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f78acf31a0de2..c4844496ebea7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2856,6 +2856,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
   // having at least one active lane (the first). If the side-effects of the
   // instruction are invariant, executing it w/o (the tail-folding) mask is safe
   // - it will cause the same side-effects as when masked.
+  auto MayOverflow = [&](Instruction *I) {
+    auto *LHS = I->getOperand(0);
+    auto *RHS = I->getOperand(1);
+    return !this->Legal->isInvariant(LHS) &&
+           (!Legal->isInvariant(RHS) ||
+            (llvm::isa<llvm::ConstantInt>(RHS) &&
+             llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+  };
   switch(I->getOpcode()) {
   default:
     llvm_unreachable(
@@ -2878,11 +2886,13 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
              TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
   }
   case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::SRem:
   case Instruction::URem:
     // If the divisor is loop-invariant no predication is needed.
     return !Legal->isInvariant(I->getOperand(1));
+  case Instruction::SDiv:
+  case Instruction::SRem:
+    // Extra check for signed overflow.
+    return !Legal->isInvariant(I->getOperand(1)) || MayOverflow(I);
   }
 }
 
@@ -7866,12 +7876,30 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(VPInstruction *VPI) {
     // If not provably safe, use a select to form a safe divisor before widening the
     // div/rem operation itself.  Otherwise fall through to general handling below.
     if (CM.isPredicatedInst(I)) {
+      auto MayOverflow = [&](Instruction *I) {
+        Value *LHS = I->getOperand(0);
+        Value *RHS = I->getOperand(1);
+        return !Legal->isInvariant(LHS) &&
+               (!Legal->isInvariant(RHS) ||
+                (llvm::isa<llvm::ConstantInt>(RHS) &&
+                 llvm::cast<llvm::ConstantInt>(RHS)->getValue().isAllOnes()));
+      };
       SmallVector<VPValue *> Ops(VPI->operands());
       VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
       VPValue *One = Plan.getConstantInt(I->getType(), 1u);
       auto *SafeRHS =
           Builder.createSelect(Mask, Ops[1], One, VPI->getDebugLoc());
       Ops[1] = SafeRHS;
+      if (VPI->getOpcode() == Instruction::SDiv ||
+          VPI->getOpcode() == Instruction::SRem) {
+        if (MayOverflow(I)) {
+          VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
+          VPValue *Zero = Plan.getConstantInt(I->getType(), 0);
+          auto *SafeLHS =
+              Builder.createSelect(Mask, Ops[0], Zero, VPI->getDebugLoc());
+          Ops[0] = SafeLHS;
+        }
+      }
       return new VPWidenRecipe(*I, Ops, *VPI, *VPI, VPI->getDebugLoc());
     }
     [[fallthrough]];
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index e4ba6fe9d757d..de235a85349d7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -575,7 +575,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 -128)
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 1), i32 [[TMP12]])
-; CHECK-NEXT:    [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], [[TMP10]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[WIDE_LOAD]], <vscale x 16 x i8> zeroinitializer, i32 [[TMP12]])
+; CHECK-NEXT:    [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[TMP4]], [[TMP10]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[WIDE_LOAD]]
 ; CHECK-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[PREDPHI]], ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
 ; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
@@ -599,7 +600,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
 ; FIXED-NEXT:    [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
 ; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128)
 ; FIXED-NEXT:    [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
-; FIXED-NEXT:    [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]]
+; FIXED-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[WIDE_LOAD1]], <32 x i8> zeroinitializer
+; FIXED-NEXT:    [[TMP9:%.*]] = sdiv <32 x i8> [[TMP3]], [[TMP7]]
 ; FIXED-NEXT:    [[PREDPHI2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP9]], <32 x i8> [[WIDE_LOAD1]]
 ; FIXED-NEXT:    store <32 x i8> [[PREDPHI2]], ptr [[TMP1]], align 1
 ; FIXED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
index 8cd540c3888db..9d4274a559f5f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll
@@ -23,7 +23,8 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; IF-EVL-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT:    [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_OP:%.*]] = sdiv <vscale x 2 x i64> [[TMP4]], [[TMP11]]
 ; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
@@ -214,7 +215,8 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
 ; IF-EVL-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP11:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD1]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP5]])
-; IF-EVL-NEXT:    [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[VP_OP_LOAD]], [[TMP11]]
+; IF-EVL-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[VP_OP_LOAD]], <vscale x 2 x i64> zeroinitializer, i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_OP:%.*]] = srem <vscale x 2 x i64> [[TMP4]], [[TMP11]]
 ; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VP_OP]], ptr align 8 [[TMP12]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP5]])
 ; IF-EVL-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP5]] to i64

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

lukel97 · 2025-12-12T08:17:10Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

    // No predicate if RHS have no poison in masked-off lanes and not -1.
-    if (isa<SCEVAddRecExpr>(RHSSC) && !MayBeNegOne)
+    if (Legal->isInductionVariable(RHS) && !MayBeNegOne)
      return false;


Will we return false if the RHS is an IV that can be zero at some iteration? Is there a test that covers this

As shown in https://godbolt.org/z/oc6WKh9aT
I think it is ok to return false which is consistent with non-tail-folded loops.
Only the mask-off lanes matter here.

preames · 2025-12-12T16:10:48Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+    auto *LHS = I->getOperand(0);
+    auto *RHS = I->getOperand(1);
+    ScalarEvolution &SE = *PSE.getSE();
+    auto *LHSSC = SE.getSCEV(LHS);


Is there some guarantee in this code that LHS and RHS are of scevable type?

I think SDiv/SRem guarantees that LHS and RHS are integer types.

preames · 2025-12-12T16:15:47Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

-  case Instruction::SRem:
  case Instruction::URem:
    // If the divisor is loop-invariant no predication is needed.
    return !Legal->isInvariant(I->getOperand(1));


Meta comment: I think your change might be trying to do too much. It looks like you're trying to handle some of the loop varying cases where the udiv case doesn't. My suggestion would be something like the following:

if (!Legal->isInvariant(I->getOperand(1))) return true; return RHSCouldBeMinusOne && LHSCouldBeSignedMin;

In particular, I think the poison comments may be a red herring as the udiv case doesn't ensure the invariant RHS is non-poison either.

Meta comment: I think your change might be trying to do too much. It looks like you're trying to handle some of the loop varying cases where the udiv case doesn't. My suggestion would be something like the following:

if (!Legal->isInvariant(I->getOperand(1))) return true; return RHSCouldBeMinusOne && LHSCouldBeSignedMin;

In particular, I think the poison comments may be a red herring as the udiv case doesn't ensure the invariant RHS is non-poison either.

My understanding: we check the SCEV range only when no lanes can be poison.
If any poison is present, the range is considered full.
Based on this, udiv case needs to ensure the invariant RHS is non-poison, otherwise zero is possible.

Based on yesterday’s discussion, I’ve updated this to predicate conservatively.

Wouldn't the conservative to always predicate them?

fhahn · 2025-12-17T21:56:01Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

-  case Instruction::SRem:
  case Instruction::URem:
    // If the divisor is loop-invariant no predication is needed.
    return !Legal->isInvariant(I->getOperand(1));


Wouldn't the conservative to always predicate them?

fhahn · 2025-12-17T22:00:50Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+        SE.getSignedRangeMin(SE.getSCEV(LHS)).isMinSignedValue();
+    if (!Legal->isInvariant(RHS))
+      return true;
+    return RHSCouldBeMinusOne && LHSCouldBeSignedMin;


It is still a bit difficult to follow the logic here. IIUC it should be: if either operand is loop-varying, it could be poison in the masked off lanes. If that is the case we cannot rely on the range check, as the original input will never be poison, but the masked off lanes will be. So I think we should only be able to use the ranges for loop-invariant values.

The most conservative solution would be to always treat as predicated; Does the more complicated logic help in practice?

Induction-variable scenarios aren’t covered yet with the more complicated logic.
I'll stick with the safest option until I can address it properly.

arcbbb · 2025-12-22T07:28:46Z

After this patch, #148828 and #154076 look like good follow‑ups to clean up the extra selects on RISC‑V.
For generic targets, we could simplify it when the RHS is a live‑in or a widened integer IV whose range excludes −1.

arcbbb requested review from fhahn and lukel97 December 5, 2025 08:58

llvmbot added backend:RISC-V vectorizers llvm:transforms labels Dec 5, 2025

arcbbb requested a review from preames December 5, 2025 09:04

lukel97 reviewed Dec 5, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll Outdated Show resolved Hide resolved

arcbbb force-pushed the safe-dividend branch from 76addd5 to 3d6c7f0 Compare December 5, 2025 09:30

llvm deleted a comment from github-actions bot Dec 5, 2025

fhahn reviewed Dec 5, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll Outdated Show resolved Hide resolved

[LV] Add extra check for signed oveflow for SDiv/SRem

5f81156

arcbbb force-pushed the safe-dividend branch from 3d6c7f0 to 5f81156 Compare December 5, 2025 16:44

topperc reviewed Dec 5, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

topperc reviewed Dec 5, 2025

View reviewed changes

llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll Show resolved Hide resolved

Address comments

b3771fd

arcbbb force-pushed the safe-dividend branch from 47790e8 to b3771fd Compare December 5, 2025 17:59

lukel97 reviewed Dec 8, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

Use SCEV to detect LHS range

d80f3df

lukel97 reviewed Dec 8, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

Use isMinSignedValue to check

9b71a7f

fhahn reviewed Dec 8, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

fhahn reviewed Dec 8, 2025

View reviewed changes

llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll Show resolved Hide resolved

arcbbb added 2 commits December 10, 2025 01:14

Add more tests

0e2403a

Refine comments

0f46a3b

lukel97 reviewed Dec 11, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Outdated Show resolved Hide resolved

arcbbb added 2 commits December 11, 2025 01:27

Fix typos

292a510

Replace SCEVAddRecExpr check with isInductionVariable

52766e2

lukel97 reviewed Dec 12, 2025

View reviewed changes

preames reviewed Dec 12, 2025

View reviewed changes

arcbbb changed the title ~~[LV] Add extra check for signed oveflow for SDiv/SRem~~ [LV] Add extra check for signed overflow for SDiv/SRem Dec 16, 2025

Be conservative to predicate

2de392d

fhahn reviewed Dec 17, 2025

View reviewed changes

Adopt the most conservative solution

06b81a5

[LV] Add extra check for signed overflow for SDiv/SRem #170818

Are you sure you want to change the base?

[LV] Add extra check for signed overflow for SDiv/SRem #170818

Conversation

arcbbb commented Dec 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

arcbbb commented Dec 22, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

arcbbb commented Dec 5, 2025 •

edited

Loading

llvmbot commented Dec 5, 2025 •

edited

Loading