Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2878,11 +2878,14 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
}
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
case Instruction::URem:
// If the divisor is loop-invariant no predication is needed.
return !Legal->isInvariant(I->getOperand(1));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Meta comment: I think your change might be trying to do too much. It looks like you're trying to handle some of the loop varying cases where the udiv case doesn't. My suggestion would be something like the following:

if (!Legal->isInvariant(I->getOperand(1)))
   return true;
return RHSCouldBeMinusOne && LHSCouldBeSignedMin;

In particular, I think the poison comments may be a red herring as the udiv case doesn't ensure the invariant RHS is non-poison either.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Meta comment: I think your change might be trying to do too much. It looks like you're trying to handle some of the loop varying cases where the udiv case doesn't. My suggestion would be something like the following:

if (!Legal->isInvariant(I->getOperand(1)))
   return true;
return RHSCouldBeMinusOne && LHSCouldBeSignedMin;

In particular, I think the poison comments may be a red herring as the udiv case doesn't ensure the invariant RHS is non-poison either.

My understanding: we check the SCEV range only when no lanes can be poison.
If any poison is present, the range is considered full.
Based on this, udiv case needs to ensure the invariant RHS is non-poison, otherwise zero is possible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on yesterday’s discussion, I’ve updated this to predicate conservatively.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't the conservative to always predicate them?

case Instruction::SDiv:
case Instruction::SRem:
// Conservative for now, since masked-off lanes may be poison and could
// trigger signed overflow.
return true;
}
}

Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP9:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP10]])
; CHECK-NEXT: [[TMP9:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP2]]
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP9]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]]
Expand Down Expand Up @@ -213,7 +214,8 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP9:%.*]] = srem <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1), i32 [[TMP10]])
; CHECK-NEXT: [[TMP9:%.*]] = srem <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP2]]
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP9]], ptr align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP10]])
; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]]
Expand Down
390 changes: 381 additions & 9 deletions llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll

Large diffs are not rendered by default.

34 changes: 29 additions & 5 deletions llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,45 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
; CHECK-NEXT: [[TMP17:%.*]] = sdiv i64 [[M]], [[CONV6]]
; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP17]] to i32
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP17]], [[CONV61]]
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[X]], [[TMP19]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE8:.*]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]]
; CHECK: [[PRED_SDIV_IF]]:
; CHECK-NEXT: [[TMP7:%.*]] = sdiv i64 [[M]], [[CONV6]]
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]]
; CHECK: [[PRED_SDIV_CONTINUE]]:
; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_SDIV_IF]] ]
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SDIV_IF3:.*]], label %[[PRED_SDIV_CONTINUE4:.*]]
; CHECK: [[PRED_SDIV_IF3]]:
; CHECK-NEXT: [[TMP10:%.*]] = sdiv i64 [[M]], [[CONV6]]
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE4]]
; CHECK: [[PRED_SDIV_CONTINUE4]]:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_SDIV_IF5:.*]], label %[[PRED_SDIV_CONTINUE6:.*]]
; CHECK: [[PRED_SDIV_IF5]]:
; CHECK-NEXT: [[TMP12:%.*]] = sdiv i64 [[M]], [[CONV6]]
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE6]]
; CHECK: [[PRED_SDIV_CONTINUE6]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_SDIV_IF7:.*]], label %[[PRED_SDIV_CONTINUE8]]
; CHECK: [[PRED_SDIV_IF7]]:
; CHECK-NEXT: [[TMP14:%.*]] = sdiv i64 [[M]], [[CONV6]]
; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE8]]
; CHECK: [[PRED_SDIV_CONTINUE8]]:
; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP8]] to i32
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP8]], [[CONV61]]
; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[INDEX]], [[TMP20]]
; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32
; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[X]], [[TMP15]]
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP23]], [[TMP22]]
; CHECK-NEXT: [[TMP25:%.*]] = sext i32 [[TMP24]] to i64
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP25]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED1]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv>
; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, ir<%rem>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
Expand All @@ -59,16 +57,18 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%conv>, ir<%rem>
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst>
; CHECK-NEXT: Successor(s): pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1
; CHECK-NEXT: Successor(s): loop.2
; CHECK-EMPTY:
; CHECK-NEXT: loop.1:
; CHECK-NEXT: loop.2:
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -210,10 +210,27 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]>, vp<[[BTC]]>
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next>
; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: WIDEN ir<%add> = add ir<%rem>, ir<%recur.next>
; CHECK-NEXT: Successor(s): pred.srem
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.srem: {
; CHECK-NEXT: pred.srem.entry:
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.srem.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: Successor(s): pred.srem.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.srem.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, ir<%recur.next>
; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]>, ir<%and.red.next>, ir<%and.red>
; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<%6>, ir<%and.red.next>, ir<%and.red>
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -293,7 +310,6 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0>, ir<%conv>
; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: Successor(s): pred.load
; CHECK-EMPTY:
; CHECK: <xVFxUF> pred.load: {
Expand All @@ -302,18 +318,20 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
; CHECK-EMPTY:
; CHECK: pred.load.if:
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> (S->V)
; CHECK-NEXT: Successor(s): pred.load.continue
; CHECK-EMPTY:
; CHECK: pred.load.continue:
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.2>
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[REMPHI:%.+]]> = ir<%rem>
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%lv.2>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.1
; CHECK-NEXT: Successor(s): loop.2
; CHECK-EMPTY:
; CHECK-NEXT: loop.1:
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, ir<%rem>
; CHECK-NEXT: WIDEN-CAST ir<%conv.lv.2> = sext vp<%9> to i32
; CHECK-NEXT: loop.2:
; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<[[REMPHI]]>
; CHECK-NEXT: WIDEN-CAST ir<%conv.lv.2> = sext vp<%10> to i32
; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
Expand All @@ -330,9 +348,9 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.2
; CHECK-NEXT: Successor(s): loop.3
; CHECK-EMPTY:
; CHECK: loop.2:
; CHECK: loop.3:
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -394,7 +412,6 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur>, ir<%recur.next>
; CHECK-NEXT: WIDEN ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
Expand All @@ -404,6 +421,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, ir<%rem>
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%rem.div>, ir<%gep>
Expand All @@ -414,9 +432,9 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.2
; CHECK-NEXT: Successor(s): loop.3
; CHECK-EMPTY:
; CHECK-NEXT: loop.2:
; CHECK-NEXT: loop.3:
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down Expand Up @@ -474,7 +492,6 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp ule vp<[[WIDE_IV]]>, vp<[[BTC]]>
; CHECK-NEXT: CLONE ir<[[L]]> = load ir<%src>
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%.pn>, ir<[[L]]>
; CHECK-NEXT: WIDEN ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
; CHECK-NEXT: <xVFxUF> pred.store: {
Expand All @@ -483,6 +500,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
; CHECK-EMPTY:
; CHECK-NEXT: pred.store.if:
; CHECK-NEXT: REPLICATE ir<%val> = sdiv vp<[[SPLICE]]>, ir<%x>
; CHECK-NEXT: vp<[[SCALAR_STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>, vp<[[VF]]>
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[SCALAR_STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%val>, ir<%gep.dst>
Expand All @@ -491,9 +509,9 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias
; CHECK-NEXT: pred.store.continue:
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): loop.0
; CHECK-NEXT: Successor(s): loop.1
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: loop.1:
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
; CHECK-NEXT: No successors
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ define void @loop_invariant_srem(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP3]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
; CHECK-NEXT: [[TMP11:%.*]] = srem <4 x i8> [[VEC_IND1]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i8> [[TMP8]], <4 x i8> splat (i8 1)
; CHECK-NEXT: [[TMP11:%.*]] = srem <4 x i8> [[VEC_IND1]], [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; CHECK: [[PRED_STORE_IF]]:
Expand Down
Loading