From 1aeef52fd2de5be894cafb1687ce38860b71d33f Mon Sep 17 00:00:00 2001 From: John Brawn Date: Fri, 12 Dec 2025 16:40:10 +0000 Subject: [PATCH 1/4] Precommit test --- .../AArch64/unrolled-loop.ll | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll new file mode 100644 index 0000000000000..b22ae3c175e32 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -mtriple=aarch64-none-elf -loop-reduce < %s | FileCheck %s + +; An unrolled loop that's too complex, causing LSR to collapse the unrolled +; LSRUses into one. +; The last LSRUse should be the one that the others are collased into, and IV +; increment GEP should happen at the end of the loop. + +define void @unrolled_loop(ptr %src, ptr %dst, i32 %low, i32 %high, i64 %n) { +; CHECK-LABEL: define void @unrolled_loop( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i32 [[LOW:%.*]], i32 [[HIGH:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_INC_3:.*]] ], [ [[SCEVGEP6]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC_3]] ], [ [[SCEVGEP]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_INC_3]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 -8 +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SCEVGEP3]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[VAL]], [[HIGH]] +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[SCEVGEP10:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -8 +; CHECK-NEXT: store i32 [[HIGH]], ptr [[SCEVGEP10]], align 4 +; CHECK-NEXT: br label %[[FOR_INC:.*]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VAL]], [[LOW]] +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -8 +; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_ELSE_THEN:.*]], label %[[IF_ELSE_ELSE:.*]] +; CHECK: [[IF_ELSE_THEN]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[SCEVGEP9]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[IF_ELSE_ELSE]]: +; CHECK-NEXT: store i32 [[VAL]], ptr [[SCEVGEP9]], align 4 +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 -4 +; CHECK-NEXT: [[VAL_1:%.*]] = load i32, ptr [[SCEVGEP5]], align 4 +; CHECK-NEXT: [[CMP1_1:%.*]] = icmp sgt i32 [[VAL_1]], [[HIGH]] +; CHECK-NEXT: br i1 [[CMP1_1]], label %[[IF_THEN_1:.*]], label %[[IF_ELSE_1:.*]] +; CHECK: [[IF_ELSE_1]]: +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp slt i32 [[VAL_1]], [[LOW]] +; CHECK-NEXT: [[SCEVGEP14:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -4 +; CHECK-NEXT: br i1 [[CMP2_1]], label %[[IF_ELSE_THEN_1:.*]], label %[[IF_ELSE_ELSE_1:.*]] +; CHECK: [[IF_ELSE_ELSE_1]]: +; CHECK-NEXT: store i32 [[VAL_1]], ptr [[SCEVGEP14]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_1:.*]] +; CHECK: [[IF_ELSE_THEN_1]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[SCEVGEP14]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_1]] +; CHECK: [[IF_THEN_1]]: +; CHECK-NEXT: [[SCEVGEP13:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 -4 +; CHECK-NEXT: store i32 [[HIGH]], ptr [[SCEVGEP13]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_1]] +; CHECK: [[FOR_INC_1]]: +; CHECK-NEXT: [[VAL_2:%.*]] = load i32, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[CMP1_2:%.*]] = icmp sgt i32 [[VAL_2]], [[HIGH]] +; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 16 +; CHECK-NEXT: br i1 [[CMP1_2]], label %[[IF_THEN_2:.*]], label %[[IF_ELSE_2:.*]] +; CHECK: [[IF_ELSE_2]]: +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp slt i32 [[VAL_2]], [[LOW]] +; CHECK-NEXT: br i1 [[CMP2_2]], label %[[IF_ELSE_THEN_2:.*]], label %[[IF_ELSE_ELSE_2:.*]] +; CHECK: [[IF_ELSE_ELSE_2]]: +; CHECK-NEXT: store i32 [[VAL_2]], ptr [[LSR_IV7]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_2:.*]] +; CHECK: [[IF_ELSE_THEN_2]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[LSR_IV7]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_2]] +; CHECK: [[IF_THEN_2]]: +; CHECK-NEXT: store i32 [[HIGH]], ptr [[LSR_IV7]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_2]] +; CHECK: [[FOR_INC_2]]: +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[VAL_3:%.*]] = load i32, ptr [[SCEVGEP4]], align 4 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp sgt i32 [[VAL_3]], [[HIGH]] +; CHECK-NEXT: br i1 [[CMP1_3]], label %[[IF_THEN_3:.*]], label %[[IF_ELSE_3:.*]] +; CHECK: [[IF_ELSE_3]]: +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp slt i32 [[VAL_3]], [[LOW]] +; CHECK-NEXT: [[SCEVGEP12:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 4 +; CHECK-NEXT: br i1 [[CMP2_3]], label %[[IF_ELSE_THEN_3:.*]], label %[[IF_ELSE_ELSE_3:.*]] +; CHECK: [[IF_ELSE_ELSE_3]]: +; CHECK-NEXT: store i32 [[VAL_3]], ptr [[SCEVGEP12]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_3]] +; CHECK: [[IF_ELSE_THEN_3]]: +; CHECK-NEXT: store i32 [[LOW]], ptr [[SCEVGEP12]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_3]] +; CHECK: [[IF_THEN_3]]: +; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[LSR_IV7]], i64 4 +; CHECK-NEXT: store i32 [[HIGH]], ptr [[SCEVGEP11]], align 4 +; CHECK-NEXT: br label %[[FOR_INC_3]] +; CHECK: [[FOR_INC_3]]: +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -4 +; CHECK-NEXT: [[SCEVGEP8]] = getelementptr i8, ptr [[LSR_IV7]], i64 16 +; CHECK-NEXT: [[NITER_NCMP:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[NITER_NCMP]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.inc.3 ] + %niter = phi i64 [ 0, %entry ], [ %niter.next, %for.inc.3 ] + %srcidx = getelementptr inbounds nuw i32, ptr %src, i64 %idx + %val = load i32, ptr %srcidx, align 4 + %cmp1 = icmp sgt i32 %val, %high + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %dstidx1 = getelementptr inbounds nuw i32, ptr %dst, i64 %idx + store i32 %high, ptr %dstidx1, align 4 + br label %for.inc + +if.else: + %cmp2 = icmp slt i32 %val, %low + %dstidx2 = getelementptr inbounds nuw i32, ptr %dst, i64 %idx + br i1 %cmp2, label %if.else.then, label %if.else.else + +if.else.then: + store i32 %low, ptr %dstidx2, align 4 + br label %for.inc + +if.else.else: + store i32 %val, ptr %dstidx2, align 4 + br label %for.inc + +for.inc: + %inc = or disjoint i64 %idx, 1 + %srcidx.1 = getelementptr inbounds nuw i32, ptr %src, i64 %inc + %val.1 = load i32, ptr %srcidx.1, align 4 + %cmp1.1 = icmp sgt i32 %val.1, %high + br i1 %cmp1.1, label %if.then.1, label %if.else.1 + +if.else.1: + %cmp2.1 = icmp slt i32 %val.1, %low + %dstidx2.1 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc + br i1 %cmp2.1, label %if.else.then.1, label %if.else.else.1 + +if.else.else.1: + store i32 %val.1, ptr %dstidx2.1, align 4 + br label %for.inc.1 + +if.else.then.1: + store i32 %low, ptr %dstidx2.1, align 4 + br label %for.inc.1 + +if.then.1: + %dstidx1.1 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc + store i32 %high, ptr %dstidx1.1, align 4 + br label %for.inc.1 + +for.inc.1: + %inc.1 = or disjoint i64 %idx, 2 + %srcidx.2 = getelementptr inbounds nuw i32, ptr %src, i64 %inc.1 + %val.2 = load i32, ptr %srcidx.2, align 4 + %cmp1.2 = icmp sgt i32 %val.2, %high + br i1 %cmp1.2, label %if.then.2, label %if.else.2 + +if.else.2: + %cmp2.2 = icmp slt i32 %val.2, %low + %dstidx2.2 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.1 + br i1 %cmp2.2, label %if.else.then.2, label %if.else.else.2 + +if.else.else.2: + store i32 %val.2, ptr %dstidx2.2, align 4 + br label %for.inc.2 + +if.else.then.2: + store i32 %low, ptr %dstidx2.2, align 4 + br label %for.inc.2 + +if.then.2: + %dstidx1.2 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.1 + store i32 %high, ptr %dstidx1.2, align 4 + br label %for.inc.2 + +for.inc.2: + %inc.2 = or disjoint i64 %idx, 3 + %srcidx.3 = getelementptr inbounds nuw i32, ptr %src, i64 %inc.2 + %val.3 = load i32, ptr %srcidx.3, align 4 + %cmp1.3 = icmp sgt i32 %val.3, %high + br i1 %cmp1.3, label %if.then.3, label %if.else.3 + +if.else.3: + %cmp2.3 = icmp slt i32 %val.3, %low + %dstidx2.3 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.2 + br i1 %cmp2.3, label %if.else.then.3, label %if.else.else.3 + +if.else.else.3: + store i32 %val.3, ptr %dstidx2.3, align 4 + br label %for.inc.3 + +if.else.then.3: + store i32 %low, ptr %dstidx2.3, align 4 + br label %for.inc.3 + +if.then.3: + %dstidx1.3 = getelementptr inbounds nuw i32, ptr %dst, i64 %inc.2 + store i32 %high, ptr %dstidx1.3, align 4 + br label %for.inc.3 + +for.inc.3: + %idx.next = add nuw i64 %idx, 4 + %niter.next = add i64 %niter, 4 + %niter.ncmp = icmp eq i64 %niter.next, %n + br i1 %niter.ncmp, label %exit, label %for.body + +exit: + ret void +} From cf548e5137e753964c0ac9441191bd781f9e8acd Mon Sep 17 00:00:00 2001 From: John Brawn Date: Thu, 11 Dec 2025 15:16:37 +0000 Subject: [PATCH 2/4] [LSR] Reverse order in NarrowSearchSpaceByCollapsingUnrolledCode The order in which NarrowSearchSpaceByCollapsingUnrolledCode iterates through the Uses array determines which LSRUses get deleted, with earlier uses being deleted and collapsed into later ones. The Uses array is generated from IVUsers which places later uses earlier in the array. Currently we iterate forward through the array, so the later uses are deleted and we end up with earlier uses. However we also delete elements by swapping with the last element which changes the order, meaning we can end up with a use in the middle of the loop being the final one. This is bad if we end up with a postincrement solution, as the value before postincrement will still be used later so we needs to be kept live in a register. Fix this by iterating backwards through the Uses array, which means that the last use will be the one that is kept, and we don't have the order changing as uses get deleted. --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 4 +- llvm/test/CodeGen/ARM/loop-indexing.ll | 5 +- .../CodeGen/PowerPC/dform-pair-load-store.ll | 4 +- .../CodeGen/PowerPC/lsr-profitable-chain.ll | 62 +-- .../CodeGen/PowerPC/more-dq-form-prepare.ll | 495 +++++++++--------- llvm/test/CodeGen/PowerPC/prefer-dqform.ll | 42 +- llvm/test/CodeGen/PowerPC/swaps-le-1.ll | 40 +- .../LowOverheadLoops/mve-float-loops.ll | 22 +- .../LowOverheadLoops/mve-tail-data-types.ll | 98 ++-- .../CodeGen/X86/dag-update-nodetomatch.ll | 4 +- .../LoopStrengthReduce/AArch64/lsr-ldp.ll | 20 +- .../AArch64/unrolled-loop.ll | 12 +- .../LoopStrengthReduce/ARM/complexity.ll | 3 +- .../LoopStrengthReduce/RISCV/many-geps.ll | 32 +- 14 files changed, 412 insertions(+), 431 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index e12caa2136962..2b0e98c2fcfd8 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4957,7 +4957,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { // This is especially useful for unrolled loops. - for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + for (ssize_t LUIdx = Uses.size()-1; LUIdx >= 0; --LUIdx) { LSRUse &LU = Uses[LUIdx]; for (const Formula &F : LU.Formulae) { if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1)) @@ -5002,8 +5002,6 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { // Delete the old use. DeleteUse(LU, LUIdx); - --LUIdx; - --NumUses; break; } } diff --git a/llvm/test/CodeGen/ARM/loop-indexing.ll b/llvm/test/CodeGen/ARM/loop-indexing.ll index bb859b202bbc0..62fafc53e5e86 100644 --- a/llvm/test/CodeGen/ARM/loop-indexing.ll +++ b/llvm/test/CodeGen/ARM/loop-indexing.ll @@ -68,12 +68,11 @@ exit: } ; CHECK-LABEL: convolve_16bit -; TODO: Both arrays should use indexing ; CHECK-DEFAULT: ldr{{.*}}, #8]! -; CHECK-DEFAULT-NOT: ldr{{.*}}]! +; CHECK-DEFAULT: ldr{{.*}}, #8]! ; CHECK-COMPLEX: ldr{{.*}}, #8]! -; CHECK-COMPLEX-NOT: ldr{{.*}}]! +; CHECK-COMPLEX: ldr{{.*}}, #8]! ; DISABLED-NOT: ldr{{.*}}]! ; DISABLED-NOT: str{{.*}}]! diff --git a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll index f5ae9a20a4ee0..030acb382bb5a 100644 --- a/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll +++ b/llvm/test/CodeGen/PowerPC/dform-pair-load-store.ll @@ -16,8 +16,8 @@ define void @foo(i32 zeroext %n, ptr %ptr, ptr %ptr2) { ; CHECK-NEXT: cmplwi r3, 0 ; CHECK-NEXT: beqlr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.lr.ph -; CHECK-NEXT: addi r4, r4, 64 ; CHECK-NEXT: addi r5, r5, 64 +; CHECK-NEXT: addi r4, r4, 64 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %for.body @@ -41,8 +41,8 @@ define void @foo(i32 zeroext %n, ptr %ptr, ptr %ptr2) { ; CHECK-BE-NEXT: cmplwi r3, 0 ; CHECK-BE-NEXT: beqlr cr0 ; CHECK-BE-NEXT: # %bb.1: # %for.body.lr.ph -; CHECK-BE-NEXT: addi r4, r4, 64 ; CHECK-BE-NEXT: addi r5, r5, 64 +; CHECK-BE-NEXT: addi r4, r4, 64 ; CHECK-BE-NEXT: mtctr r3 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_2: # %for.body diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll index 79f2ef3e3746a..7508ac12e9b46 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -8,31 +8,31 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: cmpd 5, 7 ; CHECK-NEXT: bgelr 0 ; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: addi 12, 5, 3 ; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill ; CHECK-NEXT: addi 27, 5, 2 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 29, 5, 1 +; CHECK-NEXT: addi 11, 3, 16 ; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 28, 5, 3 -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 30, 5, 1 -; CHECK-NEXT: mulld 12, 8, 5 ; CHECK-NEXT: mulld 0, 9, 8 -; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 29, 3, 16 -; CHECK-NEXT: sldi 11, 10, 3 +; CHECK-NEXT: mulld 28, 8, 5 +; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: sldi 30, 10, 3 ; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill ; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill ; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill ; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill -; CHECK-NEXT: mulld 30, 8, 30 -; CHECK-NEXT: mulld 28, 8, 28 +; CHECK-NEXT: mulld 12, 8, 12 +; CHECK-NEXT: mulld 29, 8, 29 ; CHECK-NEXT: mulld 8, 8, 27 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: add 5, 5, 9 ; CHECK-NEXT: add 12, 12, 0 -; CHECK-NEXT: add 30, 30, 0 +; CHECK-NEXT: add 29, 29, 0 ; CHECK-NEXT: add 28, 28, 0 ; CHECK-NEXT: add 8, 8, 0 ; CHECK-NEXT: cmpd 5, 7 @@ -43,24 +43,24 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: cmpd 6, 27 ; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: add 25, 6, 12 +; CHECK-NEXT: add 24, 6, 28 +; CHECK-NEXT: add 26, 6, 12 +; CHECK-NEXT: add 25, 6, 29 +; CHECK-NEXT: sldi 23, 24, 3 ; CHECK-NEXT: add 24, 6, 8 -; CHECK-NEXT: sldi 26, 6, 3 -; CHECK-NEXT: sldi 23, 25, 3 -; CHECK-NEXT: add 25, 6, 30 -; CHECK-NEXT: sldi 24, 24, 3 -; CHECK-NEXT: add 26, 4, 26 +; CHECK-NEXT: sldi 26, 26, 3 ; CHECK-NEXT: sldi 22, 25, 3 -; CHECK-NEXT: add 25, 6, 28 -; CHECK-NEXT: add 24, 29, 24 +; CHECK-NEXT: sldi 25, 6, 3 +; CHECK-NEXT: sldi 24, 24, 3 +; CHECK-NEXT: add 26, 11, 26 +; CHECK-NEXT: add 25, 4, 25 ; CHECK-NEXT: add 23, 3, 23 -; CHECK-NEXT: sldi 25, 25, 3 ; CHECK-NEXT: add 22, 3, 22 -; CHECK-NEXT: add 25, 29, 25 +; CHECK-NEXT: add 24, 11, 24 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lfd 0, 0(26) +; CHECK-NEXT: lfd 0, 0(25) ; CHECK-NEXT: lfd 1, 0(23) ; CHECK-NEXT: add 6, 6, 10 ; CHECK-NEXT: cmpd 6, 27 @@ -70,7 +70,7 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: lfd 1, 16(23) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 24(23) -; CHECK-NEXT: add 23, 23, 11 +; CHECK-NEXT: add 23, 23, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 0(22) ; CHECK-NEXT: xsadddp 0, 0, 1 @@ -79,7 +79,7 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: lfd 1, 16(22) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 24(22) -; CHECK-NEXT: add 22, 22, 11 +; CHECK-NEXT: add 22, 22, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, -16(24) ; CHECK-NEXT: xsadddp 0, 0, 1 @@ -88,19 +88,19 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-NEXT: lfd 1, 0(24) ; CHECK-NEXT: xsadddp 0, 0, 1 ; CHECK-NEXT: lfd 1, 8(24) -; CHECK-NEXT: add 24, 24, 11 +; CHECK-NEXT: add 24, 24, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -16(25) +; CHECK-NEXT: lfd 1, -16(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, -8(25) +; CHECK-NEXT: lfd 1, -8(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 0(25) +; CHECK-NEXT: lfd 1, 0(26) ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: lfd 1, 8(25) -; CHECK-NEXT: add 25, 25, 11 +; CHECK-NEXT: lfd 1, 8(26) +; CHECK-NEXT: add 26, 26, 30 ; CHECK-NEXT: xsadddp 0, 0, 1 -; CHECK-NEXT: stfd 0, 0(26) -; CHECK-NEXT: add 26, 26, 11 +; CHECK-NEXT: stfd 0, 0(25) +; CHECK-NEXT: add 25, 25, 30 ; CHECK-NEXT: blt 0, .LBB0_5 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_6: diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index af0942e99182d..5c7eb283aa6f0 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -18,8 +18,8 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: cmpwi 3, 1 ; CHECK-NEXT: bltlr 0 ; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader -; CHECK-NEXT: stdu 1, -592(1) -; CHECK-NEXT: .cfi_def_cfa_offset 592 +; CHECK-NEXT: stdu 1, -608(1) +; CHECK-NEXT: .cfi_def_cfa_offset 608 ; CHECK-NEXT: .cfi_offset r14, -192 ; CHECK-NEXT: .cfi_offset r15, -184 ; CHECK-NEXT: .cfi_offset r16, -176 @@ -56,300 +56,293 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 -; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 2, 728(1) -; CHECK-NEXT: ld 14, 688(1) -; CHECK-NEXT: ld 11, 704(1) -; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 21, 5 -; CHECK-NEXT: lwa 5, 0(7) -; CHECK-NEXT: ld 7, 720(1) -; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 22, 6 -; CHECK-NEXT: ld 6, 848(1) +; CHECK-NEXT: std 28, 528(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 28, 5 +; CHECK-NEXT: ld 5, 864(1) ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: ld 15, 736(1) -; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 19, 768(1) -; CHECK-NEXT: ld 18, 760(1) -; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 12, 696(1) -; CHECK-NEXT: lxv 0, 0(9) -; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 1, 0(8) +; CHECK-NEXT: ld 2, 848(1) +; CHECK-NEXT: ld 12, 784(1) +; CHECK-NEXT: std 22, 480(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 488(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 22, 6 +; CHECK-NEXT: li 6, 9 +; CHECK-NEXT: ld 23, 800(1) +; CHECK-NEXT: ld 29, 712(1) +; CHECK-NEXT: std 24, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 25, 816(1) ; CHECK-NEXT: cmpldi 3, 9 -; CHECK-NEXT: ld 30, 824(1) -; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 29, 840(1) -; CHECK-NEXT: ld 28, 832(1) -; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 23, 784(1) -; CHECK-NEXT: ld 20, 776(1) -; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 25, 800(1) -; CHECK-NEXT: ld 24, 792(1) -; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 27, 816(1) -; CHECK-NEXT: ld 26, 808(1) -; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 17, 752(1) -; CHECK-NEXT: extswsli 9, 5, 3 -; CHECK-NEXT: lxv 4, 0(14) -; CHECK-NEXT: std 14, 32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 12, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: mulli 0, 5, 40 -; CHECK-NEXT: sldi 14, 5, 5 -; CHECK-NEXT: mulli 31, 5, 24 -; CHECK-NEXT: lxv 38, 0(2) -; CHECK-NEXT: lxv 2, 0(11) -; CHECK-NEXT: std 2, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: mulli 2, 5, 48 -; CHECK-NEXT: sldi 5, 5, 4 -; CHECK-NEXT: ld 16, 744(1) -; CHECK-NEXT: lxv 5, 0(10) -; CHECK-NEXT: std 6, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 6, 712(1) -; CHECK-NEXT: mr 10, 7 -; CHECK-NEXT: add 7, 14, 21 -; CHECK-NEXT: lxv 13, 0(19) -; CHECK-NEXT: std 8, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 8, 11 -; CHECK-NEXT: li 11, 9 -; CHECK-NEXT: iselgt 3, 3, 11 +; CHECK-NEXT: ld 24, 808(1) +; CHECK-NEXT: std 26, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 26, 824(1) +; CHECK-NEXT: ld 27, 832(1) +; CHECK-NEXT: std 14, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 15, 728(1) +; CHECK-NEXT: ld 14, 720(1) +; CHECK-NEXT: std 16, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 17, 744(1) +; CHECK-NEXT: ld 16, 736(1) +; CHECK-NEXT: std 18, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: iselgt 3, 3, 6 +; CHECK-NEXT: ld 19, 760(1) +; CHECK-NEXT: ld 18, 752(1) +; CHECK-NEXT: std 20, 464(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 472(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 21, 776(1) +; CHECK-NEXT: ld 20, 768(1) +; CHECK-NEXT: std 30, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 30, 840(1) +; CHECK-NEXT: ld 31, 792(1) +; CHECK-NEXT: std 8, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: std 9, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 11, 704(1) +; CHECK-NEXT: lxv 39, 0(8) +; CHECK-NEXT: stfd 26, 560(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 568(1) # 8-byte Folded Spill ; CHECK-NEXT: addi 3, 3, -2 -; CHECK-NEXT: rldicl 11, 3, 61, 3 -; CHECK-NEXT: lxv 3, 0(12) -; CHECK-NEXT: lxv 40, 0(6) -; CHECK-NEXT: std 18, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 120(1) # 8-byte Folded Spill -; CHECK-NEXT: add 19, 21, 5 -; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 39, 0(10) -; CHECK-NEXT: addi 3, 7, 32 -; CHECK-NEXT: add 12, 31, 21 -; CHECK-NEXT: std 20, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 33, 0(15) -; CHECK-NEXT: lxv 32, 0(16) -; CHECK-NEXT: std 26, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 37, 0(17) -; CHECK-NEXT: lxv 36, 0(18) -; CHECK-NEXT: std 30, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 12, 0(20) -; CHECK-NEXT: lxv 11, 0(23) -; CHECK-NEXT: add 20, 21, 9 -; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 10, 0(24) -; CHECK-NEXT: lxv 9, 0(25) -; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 8, 0(26) -; CHECK-NEXT: lxv 7, 0(27) -; CHECK-NEXT: addi 12, 12, 32 -; CHECK-NEXT: li 27, 0 -; CHECK-NEXT: mr 26, 21 -; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: lxv 6, 0(30) -; CHECK-NEXT: lxv 41, 0(28) -; CHECK-NEXT: addi 7, 11, 1 -; CHECK-NEXT: add 11, 0, 21 -; CHECK-NEXT: li 28, 1 -; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill -; CHECK-NEXT: lxv 43, 0(29) -; CHECK-NEXT: lxv 42, 0(5) -; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill -; CHECK-NEXT: addi 11, 11, 32 -; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill -; CHECK-NEXT: std 16, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 4, 0(23) +; CHECK-NEXT: lxv 1, 0(26) +; CHECK-NEXT: std 5, 216(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 5, 856(1) +; CHECK-NEXT: lxv 3, 0(24) +; CHECK-NEXT: lxv 2, 0(25) +; CHECK-NEXT: std 25, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 38, 0(9) +; CHECK-NEXT: lxv 33, 0(10) +; CHECK-NEXT: std 12, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: rldicl 3, 3, 61, 3 +; CHECK-NEXT: lxv 32, 0(11) +; CHECK-NEXT: lxv 37, 0(29) +; CHECK-NEXT: mr 8, 11 +; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 36, 0(14) +; CHECK-NEXT: lxv 13, 0(15) +; CHECK-NEXT: stfd 28, 576(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 12, 0(16) +; CHECK-NEXT: lxv 11, 0(17) +; CHECK-NEXT: stfd 30, 592(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 600(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 10, 0(18) +; CHECK-NEXT: lxv 9, 0(19) +; CHECK-NEXT: stxv 52, 224(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 53, 240(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 8, 0(20) +; CHECK-NEXT: lxv 7, 0(21) +; CHECK-NEXT: stxv 54, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 55, 272(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 6, 0(12) +; CHECK-NEXT: lxv 5, 0(31) +; CHECK-NEXT: stxv 56, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 57, 304(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 0, 0(27) +; CHECK-NEXT: lxv 40, 0(30) +; CHECK-NEXT: li 30, 1 +; CHECK-NEXT: stxv 58, 320(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 59, 336(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 41, 0(2) +; CHECK-NEXT: std 5, 208(1) # 8-byte Folded Spill +; CHECK-NEXT: std 2, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: lwa 5, 0(7) +; CHECK-NEXT: addi 7, 3, 1 +; CHECK-NEXT: mulli 3, 5, 40 +; CHECK-NEXT: extswsli 6, 5, 3 +; CHECK-NEXT: mulli 31, 5, 48 +; CHECK-NEXT: add 0, 28, 6 +; CHECK-NEXT: ld 6, 208(1) # 8-byte Folded Reload +; CHECK-NEXT: add 23, 28, 3 +; CHECK-NEXT: sldi 3, 5, 4 +; CHECK-NEXT: stxv 60, 352(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 61, 368(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 62, 384(1) # 16-byte Folded Spill +; CHECK-NEXT: add 26, 28, 3 +; CHECK-NEXT: sldi 3, 5, 5 +; CHECK-NEXT: stxv 63, 400(1) # 16-byte Folded Spill +; CHECK-NEXT: std 10, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 14, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 42, 0(6) +; CHECK-NEXT: std 16, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: add 24, 28, 3 +; CHECK-NEXT: mulli 3, 5, 24 +; CHECK-NEXT: std 18, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: add 25, 28, 3 +; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: std 20, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 43, 0(3) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 5, 2, 27, 0 ; CHECK-NEXT: mr 6, 22 -; CHECK-NEXT: mr 30, 20 -; CHECK-NEXT: mr 29, 19 +; CHECK-NEXT: mr 5, 28 +; CHECK-NEXT: mr 27, 0 +; CHECK-NEXT: mr 11, 26 +; CHECK-NEXT: mr 2, 25 +; CHECK-NEXT: mr 12, 24 +; CHECK-NEXT: mr 3, 23 ; CHECK-NEXT: mtctr 7 -; CHECK-NEXT: add 25, 21, 5 -; CHECK-NEXT: maddld 5, 2, 27, 14 -; CHECK-NEXT: add 24, 21, 5 -; CHECK-NEXT: maddld 5, 2, 27, 31 -; CHECK-NEXT: add 23, 21, 5 -; CHECK-NEXT: mr 5, 26 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) ; CHECK-NEXT: lxvp 44, 0(5) -; CHECK-NEXT: xvmaddadp 1, 45, 35 -; CHECK-NEXT: lxvp 46, 0(30) -; CHECK-NEXT: xvmaddadp 0, 47, 35 -; CHECK-NEXT: lxvp 48, 0(29) -; CHECK-NEXT: lxvp 50, 0(23) -; CHECK-NEXT: lxvp 62, 0(24) -; CHECK-NEXT: lxvp 60, 0(25) +; CHECK-NEXT: xvmaddadp 39, 45, 35 +; CHECK-NEXT: lxvp 46, 0(27) +; CHECK-NEXT: xvmaddadp 38, 47, 35 +; CHECK-NEXT: lxvp 48, 0(11) +; CHECK-NEXT: lxvp 50, 0(2) +; CHECK-NEXT: lxvp 62, 0(12) +; CHECK-NEXT: lxvp 60, 0(3) ; CHECK-NEXT: lxvp 58, 32(6) ; CHECK-NEXT: lxvp 56, 32(5) -; CHECK-NEXT: lxvp 54, 32(30) -; CHECK-NEXT: lxvp 52, 32(29) -; CHECK-NEXT: lxvp 30, 32(23) -; CHECK-NEXT: lxvp 28, 32(24) -; CHECK-NEXT: lxvp 26, 32(25) -; CHECK-NEXT: xvmaddadp 5, 49, 35 -; CHECK-NEXT: xvmaddadp 4, 51, 35 -; CHECK-NEXT: xvmaddadp 3, 63, 35 -; CHECK-NEXT: xvmaddadp 2, 61, 35 -; CHECK-NEXT: xvmaddadp 40, 44, 34 -; CHECK-NEXT: xvmaddadp 39, 46, 34 -; CHECK-NEXT: xvmaddadp 38, 48, 34 -; CHECK-NEXT: xvmaddadp 33, 50, 34 -; CHECK-NEXT: xvmaddadp 32, 62, 34 -; CHECK-NEXT: xvmaddadp 37, 60, 34 -; CHECK-NEXT: xvmaddadp 36, 57, 59 -; CHECK-NEXT: xvmaddadp 13, 55, 59 -; CHECK-NEXT: xvmaddadp 12, 53, 59 -; CHECK-NEXT: xvmaddadp 11, 31, 59 -; CHECK-NEXT: xvmaddadp 10, 29, 59 -; CHECK-NEXT: xvmaddadp 9, 27, 59 -; CHECK-NEXT: xvmaddadp 8, 56, 58 -; CHECK-NEXT: xvmaddadp 7, 54, 58 -; CHECK-NEXT: xvmaddadp 6, 52, 58 +; CHECK-NEXT: lxvp 54, 32(27) +; CHECK-NEXT: lxvp 52, 32(11) +; CHECK-NEXT: lxvp 30, 32(2) +; CHECK-NEXT: lxvp 28, 32(12) +; CHECK-NEXT: lxvp 26, 32(3) +; CHECK-NEXT: xvmaddadp 33, 49, 35 +; CHECK-NEXT: xvmaddadp 32, 51, 35 +; CHECK-NEXT: xvmaddadp 37, 63, 35 +; CHECK-NEXT: xvmaddadp 36, 61, 35 +; CHECK-NEXT: xvmaddadp 13, 44, 34 +; CHECK-NEXT: xvmaddadp 12, 46, 34 +; CHECK-NEXT: xvmaddadp 11, 48, 34 +; CHECK-NEXT: xvmaddadp 10, 50, 34 +; CHECK-NEXT: xvmaddadp 9, 62, 34 +; CHECK-NEXT: xvmaddadp 8, 60, 34 +; CHECK-NEXT: xvmaddadp 7, 57, 59 +; CHECK-NEXT: xvmaddadp 6, 55, 59 +; CHECK-NEXT: xvmaddadp 5, 53, 59 +; CHECK-NEXT: xvmaddadp 4, 31, 59 +; CHECK-NEXT: xvmaddadp 3, 29, 59 +; CHECK-NEXT: xvmaddadp 2, 27, 59 +; CHECK-NEXT: xvmaddadp 1, 56, 58 +; CHECK-NEXT: xvmaddadp 0, 54, 58 +; CHECK-NEXT: xvmaddadp 40, 52, 58 ; CHECK-NEXT: xvmaddadp 41, 30, 58 -; CHECK-NEXT: xvmaddadp 43, 28, 58 -; CHECK-NEXT: xvmaddadp 42, 26, 58 +; CHECK-NEXT: xvmaddadp 42, 28, 58 +; CHECK-NEXT: xvmaddadp 43, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 ; CHECK-NEXT: addi 5, 5, 64 -; CHECK-NEXT: addi 30, 30, 64 -; CHECK-NEXT: addi 29, 29, 64 -; CHECK-NEXT: addi 23, 23, 64 -; CHECK-NEXT: addi 24, 24, 64 -; CHECK-NEXT: addi 25, 25, 64 +; CHECK-NEXT: addi 27, 27, 64 +; CHECK-NEXT: addi 11, 11, 64 +; CHECK-NEXT: addi 2, 2, 64 +; CHECK-NEXT: addi 12, 12, 64 +; CHECK-NEXT: addi 3, 3, 64 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # -; CHECK-NEXT: addi 28, 28, 6 -; CHECK-NEXT: add 26, 26, 2 -; CHECK-NEXT: add 20, 20, 2 -; CHECK-NEXT: add 11, 11, 2 -; CHECK-NEXT: add 19, 19, 2 -; CHECK-NEXT: add 3, 3, 2 -; CHECK-NEXT: add 12, 12, 2 -; CHECK-NEXT: addi 27, 27, 1 -; CHECK-NEXT: cmpld 28, 4 +; CHECK-NEXT: addi 30, 30, 6 +; CHECK-NEXT: add 0, 0, 31 +; CHECK-NEXT: add 23, 23, 31 +; CHECK-NEXT: add 26, 26, 31 +; CHECK-NEXT: add 24, 24, 31 +; CHECK-NEXT: add 28, 28, 31 +; CHECK-NEXT: add 25, 25, 31 +; CHECK-NEXT: cmpld 30, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload +; CHECK-NEXT: stxv 39, 0(3) ; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 61, 368(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 60, 352(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 59, 336(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 58, 320(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 57, 304(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 56, 288(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 55, 272(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 54, 256(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 53, 240(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 52, 224(1) # 16-byte Folded Reload +; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 31, 600(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 592(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 584(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 576(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 27, 568(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 26, 560(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, 552(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, 544(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, 536(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 528(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, 520(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: stxv 32, 0(8) +; CHECK-NEXT: ld 26, 512(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, 504(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, 496(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, 488(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, 480(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 472(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 464(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 456(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 37, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) -; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(3) -; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 3, 0(3) -; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(8) -; CHECK-NEXT: stxv 40, 0(3) +; CHECK-NEXT: ld 15, 424(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 14, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 36, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(10) -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 13, 0(3) ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) +; CHECK-NEXT: stxv 12, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) +; CHECK-NEXT: stxv 11, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(3) +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 36, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 13, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: stxv 7, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 5, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 4, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 3, 0(3) ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: stxv 2, 0(3) ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) +; CHECK-NEXT: stxv 1, 0(3) ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 43, 0(3) +; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 42, 0(3) -; CHECK-NEXT: addi 1, 1, 592 +; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 43, 0(3) +; CHECK-NEXT: addi 1, 1, 608 ; CHECK-NEXT: blr entry: %_val_l_ = load i32, ptr %.l, align 4 diff --git a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll index 912a74ba8df8f..242f28b4be9d3 100644 --- a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll +++ b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll @@ -41,24 +41,24 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) % ; CHECK-P9-NEXT: lxvdsx vs0, 0, r8 ; CHECK-P9-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-NEXT: extswsli r5, r5, 3 +; CHECK-P9-NEXT: add r4, r9, r10 ; CHECK-P9-NEXT: addi r3, r3, 1 -; CHECK-P9-NEXT: sub r4, r10, r5 -; CHECK-P9-NEXT: add r5, r9, r10 +; CHECK-P9-NEXT: sub r5, r10, r5 ; CHECK-P9-NEXT: mtctr r3 -; CHECK-P9-NEXT: add r4, r6, r4 +; CHECK-P9-NEXT: add r5, r6, r5 ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB0_2: # %_loop_2_do_ ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: lxv vs1, -16(r5) -; CHECK-P9-NEXT: lxv vs2, 0(r5) -; CHECK-P9-NEXT: lxv vs3, -16(r4) -; CHECK-P9-NEXT: lxv vs4, 0(r4) -; CHECK-P9-NEXT: addi r4, r4, 128 +; CHECK-P9-NEXT: lxv vs1, -16(r4) +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: lxv vs3, -16(r5) +; CHECK-P9-NEXT: lxv vs4, 0(r5) +; CHECK-P9-NEXT: addi r5, r5, 128 ; CHECK-P9-NEXT: xvmaddadp vs1, vs3, vs1 -; CHECK-P9-NEXT: stxv vs1, -16(r5) +; CHECK-P9-NEXT: stxv vs1, -16(r4) ; CHECK-P9-NEXT: xvmaddadp vs2, vs4, vs0 -; CHECK-P9-NEXT: stxv vs2, 0(r5) -; CHECK-P9-NEXT: addi r5, r5, 128 +; CHECK-P9-NEXT: stxv vs2, 0(r4) +; CHECK-P9-NEXT: addi r4, r4, 128 ; CHECK-P9-NEXT: bdnz .LBB0_2 ; CHECK-P9-NEXT: # %bb.3: # %_return_bb ; CHECK-P9-NEXT: blr @@ -92,25 +92,25 @@ define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) % ; CHECK-P10-NEXT: sub r3, r7, r3 ; CHECK-P10-NEXT: lxvdsx vs0, 0, r8 ; CHECK-P10-NEXT: rldicl r3, r3, 60, 4 +; CHECK-P10-NEXT: add r4, r9, r10 ; CHECK-P10-NEXT: extswsli r5, r5, 3 ; CHECK-P10-NEXT: addi r3, r3, 1 -; CHECK-P10-NEXT: sub r4, r10, r5 -; CHECK-P10-NEXT: add r5, r9, r10 +; CHECK-P10-NEXT: sub r5, r10, r5 ; CHECK-P10-NEXT: mtctr r3 -; CHECK-P10-NEXT: add r4, r6, r4 +; CHECK-P10-NEXT: add r5, r6, r5 ; CHECK-P10-NEXT: .p2align 4 ; CHECK-P10-NEXT: .LBB0_2: # %_loop_2_do_ ; CHECK-P10-NEXT: # -; CHECK-P10-NEXT: lxv vs1, -16(r5) -; CHECK-P10-NEXT: lxv vs2, 0(r5) -; CHECK-P10-NEXT: lxv vs3, -16(r4) +; CHECK-P10-NEXT: lxv vs1, -16(r4) +; CHECK-P10-NEXT: lxv vs2, 0(r4) +; CHECK-P10-NEXT: lxv vs3, -16(r5) ; CHECK-P10-NEXT: xvmaddadp vs1, vs3, vs1 -; CHECK-P10-NEXT: lxv vs4, 0(r4) +; CHECK-P10-NEXT: lxv vs4, 0(r5) ; CHECK-P10-NEXT: xvmaddadp vs2, vs4, vs0 -; CHECK-P10-NEXT: addi r4, r4, 128 -; CHECK-P10-NEXT: stxv vs1, -16(r5) -; CHECK-P10-NEXT: stxv vs2, 0(r5) ; CHECK-P10-NEXT: addi r5, r5, 128 +; CHECK-P10-NEXT: stxv vs1, -16(r4) +; CHECK-P10-NEXT: stxv vs2, 0(r4) +; CHECK-P10-NEXT: addi r4, r4, 128 ; CHECK-P10-NEXT: bdnz .LBB0_2 ; CHECK-P10-NEXT: # %bb.3: # %_return_bb ; CHECK-P10-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll index 5d5445f9f473a..f3e34101efa29 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll @@ -187,34 +187,34 @@ define void @foo() { ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB0_1: # %vector.body ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: lxv 2, -32(3) -; CHECK-P9-NEXT: lxv 3, -32(4) -; CHECK-P9-NEXT: lxv 4, -16(4) -; CHECK-P9-NEXT: vadduwm 2, 3, 2 +; CHECK-P9-NEXT: lxv 2, -32(6) ; CHECK-P9-NEXT: lxv 3, -32(5) +; CHECK-P9-NEXT: lxv 4, -16(5) +; CHECK-P9-NEXT: vadduwm 2, 3, 2 +; CHECK-P9-NEXT: lxv 3, -32(4) ; CHECK-P9-NEXT: vmuluwm 2, 2, 3 -; CHECK-P9-NEXT: lxv 3, -16(3) -; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: lxv 4, 0(4) -; CHECK-P9-NEXT: stxv 2, -32(6) -; CHECK-P9-NEXT: lxv 2, -16(5) -; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: lxv 3, 0(3) +; CHECK-P9-NEXT: lxv 3, -16(6) ; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: lxv 4, 16(4) -; CHECK-P9-NEXT: addi 4, 4, 64 -; CHECK-P9-NEXT: stxv 2, -16(6) -; CHECK-P9-NEXT: lxv 2, 0(5) +; CHECK-P9-NEXT: lxv 4, 0(5) +; CHECK-P9-NEXT: stxv 2, -32(3) +; CHECK-P9-NEXT: lxv 2, -16(4) ; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: lxv 3, 16(3) -; CHECK-P9-NEXT: addi 3, 3, 64 +; CHECK-P9-NEXT: lxv 3, 0(6) ; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: stxv 2, 0(6) -; CHECK-P9-NEXT: lxv 2, 16(5) +; CHECK-P9-NEXT: lxv 4, 16(5) ; CHECK-P9-NEXT: addi 5, 5, 64 +; CHECK-P9-NEXT: stxv 2, -16(3) +; CHECK-P9-NEXT: lxv 2, 0(4) ; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: stxv 2, 16(6) +; CHECK-P9-NEXT: lxv 3, 16(6) ; CHECK-P9-NEXT: addi 6, 6, 64 +; CHECK-P9-NEXT: vadduwm 3, 4, 3 +; CHECK-P9-NEXT: stxv 2, 0(3) +; CHECK-P9-NEXT: lxv 2, 16(4) +; CHECK-P9-NEXT: addi 4, 4, 64 +; CHECK-P9-NEXT: vmuluwm 2, 3, 2 +; CHECK-P9-NEXT: stxv 2, 16(3) +; CHECK-P9-NEXT: addi 3, 3, 64 ; CHECK-P9-NEXT: bdnz .LBB0_1 ; CHECK-P9-NEXT: # %bb.2: # %for.end ; CHECK-P9-NEXT: blr diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index 7c6c7e90413b1..5df68cc931b6d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -1737,38 +1737,38 @@ define arm_aapcs_vfpcc float @half_short_mac(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldr s0, .LCPI11_0 -; CHECK-NEXT: adds r4, r0, #4 +; CHECK-NEXT: adds r4, r1, #4 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 -; CHECK-NEXT: adds r3, r1, #4 +; CHECK-NEXT: adds r3, r0, #4 ; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: .LBB11_5: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh.w r5, [r3, #2] -; CHECK-NEXT: vldr.16 s2, [r4, #2] +; CHECK-NEXT: ldrsh.w r5, [r4, #2] +; CHECK-NEXT: vldr.16 s2, [r3, #2] ; CHECK-NEXT: adds r2, #4 ; CHECK-NEXT: vmov s4, r5 -; CHECK-NEXT: ldrsh r5, [r3], #8 +; CHECK-NEXT: ldrsh r5, [r4], #8 ; CHECK-NEXT: vcvt.f16.s32 s4, s4 -; CHECK-NEXT: ldrsh r6, [r3, #-10] +; CHECK-NEXT: ldrsh r6, [r4, #-10] ; CHECK-NEXT: vmul.f16 s2, s2, s4 ; CHECK-NEXT: vmov s6, r5 -; CHECK-NEXT: vldr.16 s4, [r4] +; CHECK-NEXT: vldr.16 s4, [r3] ; CHECK-NEXT: vcvt.f16.s32 s6, s6 -; CHECK-NEXT: ldrsh r5, [r3, #-12] +; CHECK-NEXT: ldrsh r5, [r4, #-12] ; CHECK-NEXT: vmul.f16 s4, s4, s6 ; CHECK-NEXT: vmov s8, r6 -; CHECK-NEXT: vldr.16 s6, [r4, #-2] +; CHECK-NEXT: vldr.16 s6, [r3, #-2] ; CHECK-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-NEXT: vmov s10, r5 ; CHECK-NEXT: vcvtb.f32.f16 s4, s4 ; CHECK-NEXT: vmul.f16 s6, s6, s8 -; CHECK-NEXT: vldr.16 s8, [r4, #-4] +; CHECK-NEXT: vldr.16 s8, [r3, #-4] ; CHECK-NEXT: vcvt.f16.s32 s10, s10 ; CHECK-NEXT: vcvtb.f32.f16 s6, s6 ; CHECK-NEXT: vmul.f16 s8, s8, s10 ; CHECK-NEXT: vcvtb.f32.f16 s2, s2 ; CHECK-NEXT: vcvtb.f32.f16 s8, s8 -; CHECK-NEXT: adds r4, #8 +; CHECK-NEXT: adds r3, #8 ; CHECK-NEXT: vadd.f32 s0, s0, s8 ; CHECK-NEXT: vadd.f32 s0, s0, s6 ; CHECK-NEXT: vadd.f32 s0, s0, s4 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 8a5a15a57912c..1937e60ce95bb 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -369,31 +369,31 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(ptr nocapture readonly ; CHECK-NEXT: bic r7, r4, #3 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: adds r5, r0, #3 ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: adds r6, r0, #3 -; CHECK-NEXT: adds r7, r1, #1 +; CHECK-NEXT: adds r6, r1, #3 +; CHECK-NEXT: add.w r7, r3, #8 ; CHECK-NEXT: .LBB5_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r9, [r6, #-3] +; CHECK-NEXT: ldrb r9, [r5, #-3] ; CHECK-NEXT: add.w r8, r8, #4 -; CHECK-NEXT: ldrb r4, [r7, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-3] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-8] -; CHECK-NEXT: ldrb r9, [r6, #-2] -; CHECK-NEXT: ldrb r4, [r7], #4 +; CHECK-NEXT: str r4, [r7, #-8] +; CHECK-NEXT: ldrb r9, [r5, #-2] +; CHECK-NEXT: ldrb r4, [r6, #-2] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-4] -; CHECK-NEXT: ldrb r9, [r6, #-1] -; CHECK-NEXT: ldrb r4, [r7, #-3] +; CHECK-NEXT: str r4, [r7, #-4] +; CHECK-NEXT: ldrb r9, [r5, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-1] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: ldrb r9, [r6], #4 -; CHECK-NEXT: ldrb r4, [r7, #-2] +; CHECK-NEXT: str r4, [r7] +; CHECK-NEXT: ldrb r9, [r5], #4 +; CHECK-NEXT: ldrb r4, [r6], #4 ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #4] -; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: le lr, .LBB5_7 ; CHECK-NEXT: .LBB5_8: @ %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: wls lr, r12, .LBB5_11 @@ -645,31 +645,31 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(ptr nocapture readonl ; CHECK-NEXT: bic r7, r4, #3 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: adds r5, r0, #3 ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: adds r6, r0, #3 -; CHECK-NEXT: adds r7, r1, #1 +; CHECK-NEXT: adds r6, r1, #3 +; CHECK-NEXT: add.w r7, r3, #8 ; CHECK-NEXT: .LBB7_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r9, [r6, #-3] +; CHECK-NEXT: ldrb r9, [r5, #-3] ; CHECK-NEXT: add.w r8, r8, #4 -; CHECK-NEXT: ldrb r4, [r7, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-3] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-8] -; CHECK-NEXT: ldrb r9, [r6, #-2] -; CHECK-NEXT: ldrb r4, [r7], #4 +; CHECK-NEXT: str r4, [r7, #-8] +; CHECK-NEXT: ldrb r9, [r5, #-2] +; CHECK-NEXT: ldrb r4, [r6, #-2] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-4] -; CHECK-NEXT: ldrb r9, [r6, #-1] -; CHECK-NEXT: ldrb r4, [r7, #-3] +; CHECK-NEXT: str r4, [r7, #-4] +; CHECK-NEXT: ldrb r9, [r5, #-1] +; CHECK-NEXT: ldrb r4, [r6, #-1] ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: ldrb r9, [r6], #4 -; CHECK-NEXT: ldrb r4, [r7, #-2] +; CHECK-NEXT: str r4, [r7] +; CHECK-NEXT: ldrb r9, [r5], #4 +; CHECK-NEXT: ldrb r4, [r6], #4 ; CHECK-NEXT: smlabb r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #4] -; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: le lr, .LBB7_7 ; CHECK-NEXT: .LBB7_8: @ %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: wls lr, r12, .LBB7_11 @@ -921,33 +921,33 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(ptr nocapture readonly ; CHECK-NEXT: bic r7, r4, #3 ; CHECK-NEXT: movs r6, #1 ; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w r5, r3, #8 +; CHECK-NEXT: add.w r5, r0, #8 ; CHECK-NEXT: mov.w r8, #0 ; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: add.w r6, r0, #8 -; CHECK-NEXT: add.w r7, r1, #8 +; CHECK-NEXT: add.w r6, r1, #8 +; CHECK-NEXT: add.w r7, r3, #8 ; CHECK-NEXT: .LBB9_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r9, [r6, #-8] +; CHECK-NEXT: ldr r9, [r5, #-8] ; CHECK-NEXT: add.w r8, r8, #4 -; CHECK-NEXT: ldr r4, [r7, #-8] +; CHECK-NEXT: ldr r4, [r6, #-8] ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-8] -; CHECK-NEXT: ldr r9, [r6, #-4] -; CHECK-NEXT: ldr r4, [r7, #-4] +; CHECK-NEXT: str r4, [r7, #-8] +; CHECK-NEXT: ldr r9, [r5, #-4] +; CHECK-NEXT: ldr r4, [r6, #-4] ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #-4] -; CHECK-NEXT: ldr.w r9, [r6] -; CHECK-NEXT: ldr r4, [r7] +; CHECK-NEXT: str r4, [r7, #-4] +; CHECK-NEXT: ldr.w r9, [r5] +; CHECK-NEXT: ldr r4, [r6] ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5] -; CHECK-NEXT: ldr.w r9, [r6, #4] +; CHECK-NEXT: str r4, [r7] +; CHECK-NEXT: ldr.w r9, [r5, #4] +; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: ldr r4, [r6, #4] ; CHECK-NEXT: adds r6, #16 -; CHECK-NEXT: ldr r4, [r7, #4] -; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: mla r4, r4, r9, r2 -; CHECK-NEXT: str r4, [r5, #4] -; CHECK-NEXT: adds r5, #16 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: adds r7, #16 ; CHECK-NEXT: le lr, .LBB9_7 ; CHECK-NEXT: .LBB9_8: @ %for.cond.cleanup.loopexit.unr-lcssa ; CHECK-NEXT: wls lr, r12, .LBB9_11 diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll index 71ad598abe683..53985c74440db 100644 --- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -136,7 +136,7 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: movl (%r8), %r9d ; CHECK-NEXT: leal 8(,%rdx,8), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: leaq 32(%rsi), %rbx +; CHECK-NEXT: leaq 48(%rsi), %rbx ; CHECK-NEXT: leaq 8(,%rdx,8), %r14 ; CHECK-NEXT: xorl %r15d, %r15d ; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12 @@ -223,10 +223,10 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: .LBB1_12: # %vector.body ; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 +; CHECK-NEXT: movdqu %xmm0, -48(%r11) ; CHECK-NEXT: movdqu %xmm0, -32(%r11) ; CHECK-NEXT: movdqu %xmm0, -16(%r11) ; CHECK-NEXT: movdqu %xmm0, (%r11) -; CHECK-NEXT: movdqu %xmm0, 16(%r11) ; CHECK-NEXT: addq $64, %r11 ; CHECK-NEXT: addq $8, %rax ; CHECK-NEXT: jne .LBB1_12 diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll index 2eb41cd5c2fc6..0bdc3d253ad85 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll @@ -5,25 +5,17 @@ define void @convolution(ptr %src0, ptr %src1, i64 %stride_xm, i64 %stride_xp, ptr %dst, i32 %w) { ; CHECK-LABEL: convolution: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add x9, x1, x3 -; CHECK-NEXT: add x10, x1, x2 -; CHECK-NEXT: add x11, x0, x2 -; CHECK-NEXT: add x12, x0, x3 ; CHECK-NEXT: .LBB0_1: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x13, x1, x8 -; CHECK-NEXT: add x14, x0, x8 -; CHECK-NEXT: ldr q0, [x11, x8] -; CHECK-NEXT: ldp q2, q3, [x14] -; CHECK-NEXT: ldr q1, [x12, x8] -; CHECK-NEXT: ldp q6, q7, [x13] +; CHECK-NEXT: ldr q0, [x0, x2] +; CHECK-NEXT: ldr q1, [x0, x3] ; CHECK-NEXT: subs w5, w5, #1 -; CHECK-NEXT: ldr q4, [x10, x8] -; CHECK-NEXT: ldr q5, [x9, x8] +; CHECK-NEXT: ldp q2, q3, [x0], #32 +; CHECK-NEXT: ldr q4, [x1, x2] +; CHECK-NEXT: ldr q5, [x1, x3] ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldp q6, q7, [x1], #32 ; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s -; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: fadd v2.4s, v4.4s, v5.4s ; CHECK-NEXT: fadd v3.4s, v6.4s, v7.4s ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll index b22ae3c175e32..1ff6f59c94734 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/unrolled-loop.ll @@ -10,13 +10,13 @@ define void @unrolled_loop(ptr %src, ptr %dst, i32 %low, i32 %high, i64 %n) { ; CHECK-LABEL: define void @unrolled_loop( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i32 [[LOW:%.*]], i32 [[HIGH:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 ; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 8 +; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i8, ptr [[SRC]], i64 8 ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_INC_3:.*]] ], [ [[SCEVGEP6]], %[[ENTRY]] ] -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC_3]] ], [ [[SCEVGEP]], %[[ENTRY]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_INC_3]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV14:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_INC_3:.*]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP16:%.*]], %[[FOR_INC_3]] ], [ [[SCEVGEP15]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_INC_3]] ], [ [[SCEVGEP6]], %[[ENTRY]] ] ; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 -8 ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SCEVGEP3]], align 4 ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[VAL]], [[HIGH]] @@ -57,7 +57,6 @@ define void @unrolled_loop(ptr %src, ptr %dst, i32 %low, i32 %high, i64 %n) { ; CHECK: [[FOR_INC_1]]: ; CHECK-NEXT: [[VAL_2:%.*]] = load i32, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[CMP1_2:%.*]] = icmp sgt i32 [[VAL_2]], [[HIGH]] -; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 16 ; CHECK-NEXT: br i1 [[CMP1_2]], label %[[IF_THEN_2:.*]], label %[[IF_ELSE_2:.*]] ; CHECK: [[IF_ELSE_2]]: ; CHECK-NEXT: [[CMP2_2:%.*]] = icmp slt i32 [[VAL_2]], [[LOW]] @@ -75,6 +74,7 @@ define void @unrolled_loop(ptr %src, ptr %dst, i32 %low, i32 %high, i64 %n) { ; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: [[VAL_3:%.*]] = load i32, ptr [[SCEVGEP4]], align 4 ; CHECK-NEXT: [[CMP1_3:%.*]] = icmp sgt i32 [[VAL_3]], [[HIGH]] +; CHECK-NEXT: [[SCEVGEP16]] = getelementptr i8, ptr [[LSR_IV1]], i64 16 ; CHECK-NEXT: br i1 [[CMP1_3]], label %[[IF_THEN_3:.*]], label %[[IF_ELSE_3:.*]] ; CHECK: [[IF_ELSE_3]]: ; CHECK-NEXT: [[CMP2_3:%.*]] = icmp slt i32 [[VAL_3]], [[LOW]] @@ -91,8 +91,8 @@ define void @unrolled_loop(ptr %src, ptr %dst, i32 %low, i32 %high, i64 %n) { ; CHECK-NEXT: store i32 [[HIGH]], ptr [[SCEVGEP11]], align 4 ; CHECK-NEXT: br label %[[FOR_INC_3]] ; CHECK: [[FOR_INC_3]]: -; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -4 ; CHECK-NEXT: [[SCEVGEP8]] = getelementptr i8, ptr [[LSR_IV7]], i64 16 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV14]], -4 ; CHECK-NEXT: [[NITER_NCMP:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[NITER_NCMP]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; CHECK: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll index 1b64ade50f219..ab3585387aa5d 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/complexity.ll @@ -4,10 +4,9 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" ; RUN: opt -mtriple=thumbv7em %s -S -loop-reduce -lsr-complexity-limit=2147483647 -o - | FileCheck %s ; CHECK-LABEL: for.body12.us.us: -; CHECK: [[LSR_IV6:%[^ ]+]] = phi ptr [ [[SCEVGEP7:%[^ ]+]], %for.body12.us.us ], [ [[SCEVGEP5:%[^ ]+]], %for.cond9.preheader.us.us ] ; CHECK: phi i32 +; CHECK: [[LSR_IV6:%[^ ]+]] = phi ptr [ [[SCEVGEP7:%[^ ]+]], %for.body12.us.us ], [ [[SCEVGEP5:%[^ ]+]], %for.cond9.preheader.us.us ] ; CHECK: [[LSR_IV:%[^ ]+]] = phi ptr [ [[SCEVGEP1:%[^ ]+]], %for.body12.us.us ], [ [[SCEVGEP:%[^ ]+]], %for.cond9.preheader.us.us ] -; CHECK: phi i32 ; CHECK: [[SCEVGEP1]] = getelementptr i8, ptr [[LSR_IV]], i32 8 ; CHECK: [[SCEVGEP7]] = getelementptr i8, ptr [[LSR_IV6]], i32 8 diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll index 4914bb72d8945..4f5f8ee16a704 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/many-geps.ll @@ -20,17 +20,17 @@ define i32 @main() { ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[CALL]], align 4 ; CHECK-NEXT: ret i32 0 ; CHECK: [[BB2]]: -; CHECK-NEXT: [[LSR_IV30:%.*]] = phi i64 [ [[LSR_IV_NEXT31:%.*]], %[[BB2]] ], [ 8, [[BB:%.*]] ] -; CHECK-NEXT: [[LSR_IV27:%.*]] = phi i64 [ [[LSR_IV_NEXT28:%.*]], %[[BB2]] ], [ 12, [[BB]] ] -; CHECK-NEXT: [[LSR_IV24:%.*]] = phi i64 [ [[LSR_IV_NEXT25:%.*]], %[[BB2]] ], [ 16, [[BB]] ] -; CHECK-NEXT: [[LSR_IV21:%.*]] = phi i64 [ [[LSR_IV_NEXT22:%.*]], %[[BB2]] ], [ 20, [[BB]] ] -; CHECK-NEXT: [[LSR_IV18:%.*]] = phi i64 [ [[LSR_IV_NEXT19:%.*]], %[[BB2]] ], [ 24, [[BB]] ] -; CHECK-NEXT: [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], %[[BB2]] ], [ 28, [[BB]] ] -; CHECK-NEXT: [[LSR_IV12:%.*]] = phi i64 [ [[LSR_IV_NEXT13:%.*]], %[[BB2]] ], [ 32, [[BB]] ] -; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i64 [ [[LSR_IV_NEXT10:%.*]], %[[BB2]] ], [ 36, [[BB]] ] -; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], %[[BB2]] ], [ 40, [[BB]] ] -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[BB2]] ], [ 48, [[BB]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[BB2]] ], [ 72, [[BB]] ] +; CHECK-NEXT: [[LSR_IV30:%.*]] = phi i64 [ [[LSR_IV_NEXT31:%.*]], %[[BB2]] ], [ 8, [[TMP0:%.*]] ] +; CHECK-NEXT: [[LSR_IV27:%.*]] = phi i64 [ [[LSR_IV_NEXT28:%.*]], %[[BB2]] ], [ 12, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV24:%.*]] = phi i64 [ [[LSR_IV_NEXT25:%.*]], %[[BB2]] ], [ 16, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV21:%.*]] = phi i64 [ [[LSR_IV_NEXT22:%.*]], %[[BB2]] ], [ 20, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV18:%.*]] = phi i64 [ [[LSR_IV_NEXT19:%.*]], %[[BB2]] ], [ 24, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV15:%.*]] = phi i64 [ [[LSR_IV_NEXT16:%.*]], %[[BB2]] ], [ 28, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV12:%.*]] = phi i64 [ [[LSR_IV_NEXT13:%.*]], %[[BB2]] ], [ 32, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV9:%.*]] = phi i64 [ [[LSR_IV_NEXT10:%.*]], %[[BB2]] ], [ 36, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], %[[BB2]] ], [ 48, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[BB2]] ], [ 72, [[TMP0]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[BB2]] ], [ 80, [[TMP0]] ] ; CHECK-NEXT: [[SCEVGEP32:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV30]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP32]], align 8 ; CHECK-NEXT: [[SCEVGEP29:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV27]] @@ -47,14 +47,14 @@ define i32 @main() { ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP14]], align 8 ; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV9]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP11]], align 4 -; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP1]], i64 -40 ; CHECK-NEXT: store i64 0, ptr [[SCEVGEP6]], align 8 -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV1]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP3]], align 8 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV1]] ; CHECK-NEXT: store i32 0, ptr [[SCEVGEP]], align 8 -; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV4]] -; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 40 +; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[CALL]], i64 [[LSR_IV]] ; CHECK-NEXT: store i64 0, ptr [[SCEVGEP8]], align 8 ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 88 ; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], 88 From 7e49992e4ece9c0961caca66d03fba17d4024949 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Mon, 15 Dec 2025 15:27:09 +0000 Subject: [PATCH 3/4] clang-format --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 2b0e98c2fcfd8..1de5134b5d6d9 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4957,7 +4957,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { // This is especially useful for unrolled loops. - for (ssize_t LUIdx = Uses.size()-1; LUIdx >= 0; --LUIdx) { + for (ssize_t LUIdx = Uses.size() - 1; LUIdx >= 0; --LUIdx) { LSRUse &LU = Uses[LUIdx]; for (const Formula &F : LU.Formulae) { if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1)) From a1f41a280c36da524d56f9c8dcb78888fdd6f491 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Mon, 15 Dec 2025 17:01:37 +0000 Subject: [PATCH 4/4] Adjust test --- llvm/test/Transforms/HardwareLoops/ARM/structure.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll index 6993fd16dbad5..04a85fe1a66f6 100644 --- a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll @@ -393,6 +393,7 @@ for.body: ; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: ; CHECK-UNROLL: le lr, [[PROLOGUE]] ; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: +; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: ; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: ; CHECK-UNROLL: le lr, [[BODY]] ; CHECK-UNROLL-NOT: b