From bebeffd8557c82ef417612f7dae0310cdd1f0263 Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Fri, 28 Nov 2025 18:13:26 +0000 Subject: [PATCH 1/2] [AArch64][NFC] Add test for vector sdiv exact scalarization --- .../AArch64/sdiv-by-const-promoted-ops.ll | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll index 053cbc0616454..528fafdd77aa2 100644 --- a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll +++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll @@ -75,3 +75,137 @@ define <16 x i16> @srem_v16i16_by_7(<16 x i16> %x) { %rem = srem <16 x i16> %x, ret <16 x i16> %rem } + +define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) { +; CHECK-LABEL: sdiv_exact_v8i16_by_255: +; CHECK: // %bb.0: +; CHECK-NEXT: mvni v1.8h, #1, lsl #8 +; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret + %div = sdiv exact <8 x i16> %x, splat (i16 255) + ret <8 x i16> %div +} + +define <16 x i16> @sdiv_exact_v16i16_by_255(<16 x i16> %x) { +; CHECK-LABEL: sdiv_exact_v16i16_by_255: +; CHECK: // %bb.0: +; CHECK-NEXT: smov x11, v0.h[1] +; CHECK-NEXT: smov x10, v0.h[0] +; CHECK-NEXT: mov x8, #-32639 // =0xffffffffffff8081 +; CHECK-NEXT: smov x13, v0.h[3] +; CHECK-NEXT: smov x14, v1.h[1] +; CHECK-NEXT: movk x8, #32896, lsl #16 +; CHECK-NEXT: smov x16, v1.h[0] +; CHECK-NEXT: smov w12, v0.h[1] +; CHECK-NEXT: smov w15, v0.h[0] +; CHECK-NEXT: smov x18, v1.h[2] +; CHECK-NEXT: smov w0, v0.h[3] +; CHECK-NEXT: smov w1, v1.h[1] +; CHECK-NEXT: smull x11, w11, w8 +; CHECK-NEXT: smov w2, v1.h[0] +; CHECK-NEXT: smov x9, v0.h[2] +; CHECK-NEXT: smull x10, w10, w8 +; CHECK-NEXT: smov w17, v0.h[2] +; CHECK-NEXT: smov w3, v1.h[2] +; CHECK-NEXT: smull x13, w13, w8 +; CHECK-NEXT: smull x14, w14, w8 +; CHECK-NEXT: add x12, x12, x11, lsr #32 +; CHECK-NEXT: smull x16, w16, w8 +; CHECK-NEXT: add x10, x15, x10, lsr #32 +; CHECK-NEXT: smull x15, w18, w8 +; CHECK-NEXT: add x11, x0, x13, lsr #32 +; CHECK-NEXT: smov x0, v0.h[4] +; CHECK-NEXT: add x13, x1, x14, lsr #32 +; CHECK-NEXT: asr w18, w10, #7 +; CHECK-NEXT: smull x9, w9, w8 +; CHECK-NEXT: add x14, x2, x16, lsr #32 +; CHECK-NEXT: asr w16, w12, #7 +; CHECK-NEXT: smov x2, v1.h[3] +; CHECK-NEXT: add w18, w18, w10, lsr #31 +; CHECK-NEXT: add x15, x3, x15, lsr #32 +; CHECK-NEXT: smov w10, v0.h[5] +; CHECK-NEXT: add w12, w16, w12, lsr #31 +; CHECK-NEXT: asr w16, w14, #7 +; CHECK-NEXT: add x9, x17, x9, lsr #32 +; CHECK-NEXT: fmov s2, w18 +; CHECK-NEXT: smov w17, v0.h[4] +; CHECK-NEXT: smull x0, w0, w8 +; CHECK-NEXT: add w14, w16, w14, lsr #31 +; CHECK-NEXT: asr w16, w13, #7 +; CHECK-NEXT: asr w1, w9, #7 +; CHECK-NEXT: smov x18, v0.h[5] +; CHECK-NEXT: fmov s3, w14 +; CHECK-NEXT: mov v2.h[1], w12 +; CHECK-NEXT: add w12, w16, w13, lsr #31 +; CHECK-NEXT: smov w13, v1.h[3] +; CHECK-NEXT: smov x14, v1.h[4] +; CHECK-NEXT: smull x16, w2, w8 +; CHECK-NEXT: add w1, w1, w9, lsr #31 +; CHECK-NEXT: add x17, x17, x0, lsr #32 +; CHECK-NEXT: asr w0, w15, #7 +; CHECK-NEXT: mov v3.h[1], w12 +; CHECK-NEXT: smov w12, v1.h[4] +; CHECK-NEXT: smull x18, w18, w8 +; CHECK-NEXT: mov v2.h[2], w1 +; CHECK-NEXT: asr w1, w11, #7 +; CHECK-NEXT: add w15, w0, w15, lsr #31 +; CHECK-NEXT: add x13, x13, x16, lsr #32 +; CHECK-NEXT: smov x16, v1.h[5] +; CHECK-NEXT: smull x14, w14, w8 +; CHECK-NEXT: add w11, w1, w11, lsr #31 +; CHECK-NEXT: smov x0, v0.h[6] +; CHECK-NEXT: add x10, x10, x18, lsr #32 +; CHECK-NEXT: asr w1, w13, #7 +; CHECK-NEXT: mov v3.h[2], w15 +; CHECK-NEXT: smov w15, v1.h[5] +; CHECK-NEXT: add x12, x12, x14, lsr #32 +; CHECK-NEXT: mov v2.h[3], w11 +; CHECK-NEXT: asr w11, w17, #7 +; CHECK-NEXT: add w13, w1, w13, lsr #31 +; CHECK-NEXT: smull x16, w16, w8 +; CHECK-NEXT: smov x14, v1.h[6] +; CHECK-NEXT: asr w18, w12, #7 +; CHECK-NEXT: add w11, w11, w17, lsr #31 +; CHECK-NEXT: smov w9, v0.h[6] +; CHECK-NEXT: mov v3.h[3], w13 +; CHECK-NEXT: smull x17, w0, w8 +; CHECK-NEXT: smov x0, v1.h[7] +; CHECK-NEXT: add x13, x15, x16, lsr #32 +; CHECK-NEXT: add w12, w18, w12, lsr #31 +; CHECK-NEXT: smov w16, v1.h[6] +; CHECK-NEXT: mov v2.h[4], w11 +; CHECK-NEXT: smov x11, v0.h[7] +; CHECK-NEXT: smull x14, w14, w8 +; CHECK-NEXT: asr w15, w10, #7 +; CHECK-NEXT: asr w18, w13, #7 +; CHECK-NEXT: smov w1, v0.h[7] +; CHECK-NEXT: mov v3.h[4], w12 +; CHECK-NEXT: add x9, x9, x17, lsr #32 +; CHECK-NEXT: add w10, w15, w10, lsr #31 +; CHECK-NEXT: add w12, w18, w13, lsr #31 +; CHECK-NEXT: add x13, x16, x14, lsr #32 +; CHECK-NEXT: smov w14, v1.h[7] +; CHECK-NEXT: smull x11, w11, w8 +; CHECK-NEXT: smull x8, w0, w8 +; CHECK-NEXT: mov v2.h[5], w10 +; CHECK-NEXT: asr w10, w9, #7 +; CHECK-NEXT: mov v3.h[5], w12 +; CHECK-NEXT: asr w12, w13, #7 +; CHECK-NEXT: add w9, w10, w9, lsr #31 +; CHECK-NEXT: add x10, x1, x11, lsr #32 +; CHECK-NEXT: add w11, w12, w13, lsr #31 +; CHECK-NEXT: add x8, x14, x8, lsr #32 +; CHECK-NEXT: mov v2.h[6], w9 +; CHECK-NEXT: asr w9, w10, #7 +; CHECK-NEXT: mov v3.h[6], w11 +; CHECK-NEXT: asr w11, w8, #7 +; CHECK-NEXT: add w9, w9, w10, lsr #31 +; CHECK-NEXT: add w8, w11, w8, lsr #31 +; CHECK-NEXT: mov v2.h[7], w9 +; CHECK-NEXT: mov v3.h[7], w8 +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ret + %div = sdiv exact <16 x i16> %x, splat (i16 255) + ret <16 x i16> %div +} From 35e2971b1f176105f74aea04a59d5352f02cdcb9 Mon Sep 17 00:00:00 2001 From: Valeriy Savchenko Date: Fri, 28 Nov 2025 18:16:13 +0000 Subject: [PATCH 2/2] [DAGCombiner] Allow promoted constants when lowering vector SDIV exacts --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 8 +- .../AArch64/sdiv-by-const-promoted-ops.ll | 119 +----------------- 2 files changed, 8 insertions(+), 119 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1183f562c274d..b698ef4fcf0cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6345,7 +6345,6 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); EVT VT = N->getValueType(0); - EVT SVT = VT.getScalarType(); EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); EVT ShSVT = ShVT.getScalarType(); @@ -6355,6 +6354,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, auto BuildSDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; + + EVT CT = C->getValueType(0); APInt Divisor = C->getAPIntValue(); unsigned Shift = Divisor.countr_zero(); if (Shift) { @@ -6363,12 +6364,13 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, } APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); - Factors.push_back(DAG.getConstant(Factor, dl, SVT)); + Factors.push_back(DAG.getConstant(Factor, dl, CT)); return true; }; // Collect all magic values from the build vector. - if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern)) + if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) return SDValue(); SDValue Shift, Factor; diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll index 528fafdd77aa2..840501ec48e42 100644 --- a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll +++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll @@ -89,122 +89,9 @@ define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) { define <16 x i16> @sdiv_exact_v16i16_by_255(<16 x i16> %x) { ; CHECK-LABEL: sdiv_exact_v16i16_by_255: ; CHECK: // %bb.0: -; CHECK-NEXT: smov x11, v0.h[1] -; CHECK-NEXT: smov x10, v0.h[0] -; CHECK-NEXT: mov x8, #-32639 // =0xffffffffffff8081 -; CHECK-NEXT: smov x13, v0.h[3] -; CHECK-NEXT: smov x14, v1.h[1] -; CHECK-NEXT: movk x8, #32896, lsl #16 -; CHECK-NEXT: smov x16, v1.h[0] -; CHECK-NEXT: smov w12, v0.h[1] -; CHECK-NEXT: smov w15, v0.h[0] -; CHECK-NEXT: smov x18, v1.h[2] -; CHECK-NEXT: smov w0, v0.h[3] -; CHECK-NEXT: smov w1, v1.h[1] -; CHECK-NEXT: smull x11, w11, w8 -; CHECK-NEXT: smov w2, v1.h[0] -; CHECK-NEXT: smov x9, v0.h[2] -; CHECK-NEXT: smull x10, w10, w8 -; CHECK-NEXT: smov w17, v0.h[2] -; CHECK-NEXT: smov w3, v1.h[2] -; CHECK-NEXT: smull x13, w13, w8 -; CHECK-NEXT: smull x14, w14, w8 -; CHECK-NEXT: add x12, x12, x11, lsr #32 -; CHECK-NEXT: smull x16, w16, w8 -; CHECK-NEXT: add x10, x15, x10, lsr #32 -; CHECK-NEXT: smull x15, w18, w8 -; CHECK-NEXT: add x11, x0, x13, lsr #32 -; CHECK-NEXT: smov x0, v0.h[4] -; CHECK-NEXT: add x13, x1, x14, lsr #32 -; CHECK-NEXT: asr w18, w10, #7 -; CHECK-NEXT: smull x9, w9, w8 -; CHECK-NEXT: add x14, x2, x16, lsr #32 -; CHECK-NEXT: asr w16, w12, #7 -; CHECK-NEXT: smov x2, v1.h[3] -; CHECK-NEXT: add w18, w18, w10, lsr #31 -; CHECK-NEXT: add x15, x3, x15, lsr #32 -; CHECK-NEXT: smov w10, v0.h[5] -; CHECK-NEXT: add w12, w16, w12, lsr #31 -; CHECK-NEXT: asr w16, w14, #7 -; CHECK-NEXT: add x9, x17, x9, lsr #32 -; CHECK-NEXT: fmov s2, w18 -; CHECK-NEXT: smov w17, v0.h[4] -; CHECK-NEXT: smull x0, w0, w8 -; CHECK-NEXT: add w14, w16, w14, lsr #31 -; CHECK-NEXT: asr w16, w13, #7 -; CHECK-NEXT: asr w1, w9, #7 -; CHECK-NEXT: smov x18, v0.h[5] -; CHECK-NEXT: fmov s3, w14 -; CHECK-NEXT: mov v2.h[1], w12 -; CHECK-NEXT: add w12, w16, w13, lsr #31 -; CHECK-NEXT: smov w13, v1.h[3] -; CHECK-NEXT: smov x14, v1.h[4] -; CHECK-NEXT: smull x16, w2, w8 -; CHECK-NEXT: add w1, w1, w9, lsr #31 -; CHECK-NEXT: add x17, x17, x0, lsr #32 -; CHECK-NEXT: asr w0, w15, #7 -; CHECK-NEXT: mov v3.h[1], w12 -; CHECK-NEXT: smov w12, v1.h[4] -; CHECK-NEXT: smull x18, w18, w8 -; CHECK-NEXT: mov v2.h[2], w1 -; CHECK-NEXT: asr w1, w11, #7 -; CHECK-NEXT: add w15, w0, w15, lsr #31 -; CHECK-NEXT: add x13, x13, x16, lsr #32 -; CHECK-NEXT: smov x16, v1.h[5] -; CHECK-NEXT: smull x14, w14, w8 -; CHECK-NEXT: add w11, w1, w11, lsr #31 -; CHECK-NEXT: smov x0, v0.h[6] -; CHECK-NEXT: add x10, x10, x18, lsr #32 -; CHECK-NEXT: asr w1, w13, #7 -; CHECK-NEXT: mov v3.h[2], w15 -; CHECK-NEXT: smov w15, v1.h[5] -; CHECK-NEXT: add x12, x12, x14, lsr #32 -; CHECK-NEXT: mov v2.h[3], w11 -; CHECK-NEXT: asr w11, w17, #7 -; CHECK-NEXT: add w13, w1, w13, lsr #31 -; CHECK-NEXT: smull x16, w16, w8 -; CHECK-NEXT: smov x14, v1.h[6] -; CHECK-NEXT: asr w18, w12, #7 -; CHECK-NEXT: add w11, w11, w17, lsr #31 -; CHECK-NEXT: smov w9, v0.h[6] -; CHECK-NEXT: mov v3.h[3], w13 -; CHECK-NEXT: smull x17, w0, w8 -; CHECK-NEXT: smov x0, v1.h[7] -; CHECK-NEXT: add x13, x15, x16, lsr #32 -; CHECK-NEXT: add w12, w18, w12, lsr #31 -; CHECK-NEXT: smov w16, v1.h[6] -; CHECK-NEXT: mov v2.h[4], w11 -; CHECK-NEXT: smov x11, v0.h[7] -; CHECK-NEXT: smull x14, w14, w8 -; CHECK-NEXT: asr w15, w10, #7 -; CHECK-NEXT: asr w18, w13, #7 -; CHECK-NEXT: smov w1, v0.h[7] -; CHECK-NEXT: mov v3.h[4], w12 -; CHECK-NEXT: add x9, x9, x17, lsr #32 -; CHECK-NEXT: add w10, w15, w10, lsr #31 -; CHECK-NEXT: add w12, w18, w13, lsr #31 -; CHECK-NEXT: add x13, x16, x14, lsr #32 -; CHECK-NEXT: smov w14, v1.h[7] -; CHECK-NEXT: smull x11, w11, w8 -; CHECK-NEXT: smull x8, w0, w8 -; CHECK-NEXT: mov v2.h[5], w10 -; CHECK-NEXT: asr w10, w9, #7 -; CHECK-NEXT: mov v3.h[5], w12 -; CHECK-NEXT: asr w12, w13, #7 -; CHECK-NEXT: add w9, w10, w9, lsr #31 -; CHECK-NEXT: add x10, x1, x11, lsr #32 -; CHECK-NEXT: add w11, w12, w13, lsr #31 -; CHECK-NEXT: add x8, x14, x8, lsr #32 -; CHECK-NEXT: mov v2.h[6], w9 -; CHECK-NEXT: asr w9, w10, #7 -; CHECK-NEXT: mov v3.h[6], w11 -; CHECK-NEXT: asr w11, w8, #7 -; CHECK-NEXT: add w9, w9, w10, lsr #31 -; CHECK-NEXT: add w8, w11, w8, lsr #31 -; CHECK-NEXT: mov v2.h[7], w9 -; CHECK-NEXT: mov v3.h[7], w8 -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: mvni v2.8h, #1, lsl #8 +; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h +; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h ; CHECK-NEXT: ret %div = sdiv exact <16 x i16> %x, splat (i16 255) ret <16 x i16> %div