diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5377f22e5c61f..0f539655ff0ed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5186,7 +5186,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) { return false; }; - return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo); + return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false, + /*AllowTruncation=*/true); } SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { @@ -5250,7 +5251,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // alternate sequence. Targets may check function attributes for size/speed // trade-offs. AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - if (isConstantOrConstantVector(N1) && + if (isConstantOrConstantVector(N1, /*NoOpaques=*/false, + /*AllowTruncation=*/true) && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1e71937372159..e8110ed549653 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6562,8 +6562,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, auto BuildSDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; - - const APInt &Divisor = C->getAPIntValue(); + // Truncate the divisor to the target scalar type in case it was promoted + // during type legalization. + APInt Divisor = C->getAPIntValue().trunc(EltBits); SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor); int NumeratorFactor = 0; int ShiftMask = -1; @@ -6593,7 +6594,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); // Collect the shifts / magic values from each element. - if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern)) + if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false, + /*AllowTruncation=*/true)) return SDValue(); SDValue MagicFactor, Factor, Shift, ShiftMask; diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll index ffaf045fa45c2..c19ded18c94c9 100644 --- a/llvm/test/CodeGen/AArch64/rem-by-const.ll +++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll @@ -893,46 +893,15 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-LABEL: sv4i8_7: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493 -; CHECK-SD-NEXT: movk x8, #37449, lsl #16 +; CHECK-SD-NEXT: mov w8, #18725 // =0x4925 +; CHECK-SD-NEXT: movi v2.4h, #7 +; CHECK-SD-NEXT: dup v1.4h, w8 ; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-SD-NEXT: smov x10, v0.h[0] -; CHECK-SD-NEXT: smov x9, v0.h[1] -; CHECK-SD-NEXT: smov w12, v0.h[0] -; CHECK-SD-NEXT: smov w11, v0.h[1] -; CHECK-SD-NEXT: smov x13, v0.h[2] -; CHECK-SD-NEXT: smov w14, v0.h[2] -; CHECK-SD-NEXT: smov x17, v0.h[3] -; CHECK-SD-NEXT: smull x10, w10, w8 -; CHECK-SD-NEXT: smull x9, w9, w8 -; CHECK-SD-NEXT: smull x13, w13, w8 -; CHECK-SD-NEXT: add x10, x12, x10, lsr #32 -; CHECK-SD-NEXT: smull x8, w17, w8 -; CHECK-SD-NEXT: add x9, x11, x9, lsr #32 -; CHECK-SD-NEXT: asr w16, w10, #2 -; CHECK-SD-NEXT: add x13, x14, x13, lsr #32 -; CHECK-SD-NEXT: asr w15, w9, #2 -; CHECK-SD-NEXT: add w10, w16, w10, lsr #31 -; CHECK-SD-NEXT: asr w16, w13, #2 -; CHECK-SD-NEXT: add w9, w15, w9, lsr #31 -; CHECK-SD-NEXT: smov w15, v0.h[3] -; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3 -; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3 -; CHECK-SD-NEXT: add w10, w12, w10 -; CHECK-SD-NEXT: fmov s0, w10 -; CHECK-SD-NEXT: add w9, w11, w9 -; CHECK-SD-NEXT: add w10, w16, w13, lsr #31 -; CHECK-SD-NEXT: add x8, x15, x8, lsr #32 -; CHECK-SD-NEXT: mov v0.h[1], w9 -; CHECK-SD-NEXT: sub w9, w10, w10, lsl #3 -; CHECK-SD-NEXT: asr w10, w8, #2 -; CHECK-SD-NEXT: add w9, w14, w9 -; CHECK-SD-NEXT: add w8, w10, w8, lsr #31 -; CHECK-SD-NEXT: mov v0.h[2], w9 -; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3 -; CHECK-SD-NEXT: add w8, w15, w8 -; CHECK-SD-NEXT: mov v0.h[3], w8 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #17 +; CHECK-SD-NEXT: xtn v1.4h, v1.4s +; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15 +; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i8_7: @@ -978,39 +947,15 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) { ; CHECK-SD-LABEL: sv4i8_100: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-SD-NEXT: mov w8, #34079 // =0x851f -; CHECK-SD-NEXT: mov w14, #100 // =0x64 -; CHECK-SD-NEXT: movk w8, #20971, lsl #16 -; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8 -; CHECK-SD-NEXT: smov x9, v1.h[0] -; CHECK-SD-NEXT: smov x10, v1.h[1] -; CHECK-SD-NEXT: smov x11, v1.h[2] -; CHECK-SD-NEXT: smov w12, v1.h[0] -; CHECK-SD-NEXT: smov x13, v1.h[3] -; CHECK-SD-NEXT: smov w15, v1.h[1] -; CHECK-SD-NEXT: smull x9, w9, w8 -; CHECK-SD-NEXT: smull x10, w10, w8 -; CHECK-SD-NEXT: smull x11, w11, w8 -; CHECK-SD-NEXT: asr x9, x9, #37 -; CHECK-SD-NEXT: smull x8, w13, w8 -; CHECK-SD-NEXT: asr x10, x10, #37 -; CHECK-SD-NEXT: add w9, w9, w9, lsr #31 -; CHECK-SD-NEXT: asr x11, x11, #37 -; CHECK-SD-NEXT: add w10, w10, w10, lsr #31 -; CHECK-SD-NEXT: asr x8, x8, #37 -; CHECK-SD-NEXT: msub w9, w9, w14, w12 -; CHECK-SD-NEXT: msub w10, w10, w14, w15 -; CHECK-SD-NEXT: add w8, w8, w8, lsr #31 -; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: add w9, w11, w11, lsr #31 -; CHECK-SD-NEXT: smov w11, v1.h[2] -; CHECK-SD-NEXT: msub w9, w9, w14, w11 -; CHECK-SD-NEXT: mov v0.h[1], w10 -; CHECK-SD-NEXT: smov w10, v1.h[3] -; CHECK-SD-NEXT: msub w8, w8, w14, w10 -; CHECK-SD-NEXT: mov v0.h[2], w9 -; CHECK-SD-NEXT: mov v0.h[3], w8 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: mov w8, #5243 // =0x147b +; CHECK-SD-NEXT: movi v2.4h, #100 +; CHECK-SD-NEXT: dup v1.4h, w8 +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #19 +; CHECK-SD-NEXT: xtn v1.4h, v1.4s +; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15 +; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: sv4i8_100: diff --git a/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll new file mode 100644 index 0000000000000..053cbc0616454 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) { +; CHECK-LABEL: sdiv_v8i16_by_7: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #18725 // =0x4925 +; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; CHECK-NEXT: sshr v0.8h, v0.8h, #1 +; CHECK-NEXT: usra v0.8h, v0.8h, #15 +; CHECK-NEXT: ret + %div = sdiv <8 x i16> %x, + ret <8 x i16> %div +} + +define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) { +; CHECK-LABEL: sdiv_v16i16_by_7: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #18725 // =0x4925 +; CHECK-NEXT: dup v2.8h, w8 +; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h +; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h +; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h +; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h +; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h +; CHECK-NEXT: sshr v0.8h, v0.8h, #1 +; CHECK-NEXT: sshr v1.8h, v1.8h, #1 +; CHECK-NEXT: usra v0.8h, v0.8h, #15 +; CHECK-NEXT: usra v1.8h, v1.8h, #15 +; CHECK-NEXT: ret + %div = sdiv <16 x i16> %x, + ret <16 x i16> %div +} + +define <8 x i16> @srem_v8i16_by_7(<8 x i16> %x) { +; CHECK-LABEL: srem_v8i16_by_7: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #18725 // =0x4925 +; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h +; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h +; CHECK-NEXT: movi v2.8h, #7 +; CHECK-NEXT: sshr v1.8h, v1.8h, #1 +; CHECK-NEXT: usra v1.8h, v1.8h, #15 +; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h +; CHECK-NEXT: ret + %rem = srem <8 x i16> %x, + ret <8 x i16> %rem +} + +define <16 x i16> @srem_v16i16_by_7(<16 x i16> %x) { +; CHECK-LABEL: srem_v16i16_by_7: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #18725 // =0x4925 +; CHECK-NEXT: dup v2.8h, w8 +; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h +; CHECK-NEXT: smull v4.4s, v0.4h, v2.4h +; CHECK-NEXT: smull2 v5.4s, v1.8h, v2.8h +; CHECK-NEXT: smull v2.4s, v1.4h, v2.4h +; CHECK-NEXT: uzp2 v3.8h, v4.8h, v3.8h +; CHECK-NEXT: movi v4.8h, #7 +; CHECK-NEXT: uzp2 v2.8h, v2.8h, v5.8h +; CHECK-NEXT: sshr v3.8h, v3.8h, #1 +; CHECK-NEXT: sshr v2.8h, v2.8h, #1 +; CHECK-NEXT: usra v3.8h, v3.8h, #15 +; CHECK-NEXT: usra v2.8h, v2.8h, #15 +; CHECK-NEXT: mls v0.8h, v3.8h, v4.8h +; CHECK-NEXT: mls v1.8h, v2.8h, v4.8h +; CHECK-NEXT: ret + %rem = srem <16 x i16> %x, + ret <16 x i16> %rem +}