Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5186,7 +5186,8 @@ static bool isDivisorPowerOfTwo(SDValue Divisor) {
return false;
};

return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo, /*AllowUndefs=*/false,
/*AllowTruncation=*/true);
}

SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
Expand Down Expand Up @@ -5250,7 +5251,8 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isConstantOrConstantVector(N1) &&
if (isConstantOrConstantVector(N1, /*NoOpaques=*/false,
/*AllowTruncation=*/true) &&
!TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6562,8 +6562,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
if (C->isZero())
return false;

const APInt &Divisor = C->getAPIntValue();
// Truncate the divisor to the target scalar type in case it was promoted
// during type legalization.
APInt Divisor = C->getAPIntValue().trunc(EltBits);
SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
Expand Down Expand Up @@ -6593,7 +6594,8 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);

// Collect the shifts / magic values from each element.
if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
/*AllowTruncation=*/true))
return SDValue();

SDValue MagicFactor, Factor, Shift, ShiftMask;
Expand Down
89 changes: 17 additions & 72 deletions llvm/test/CodeGen/AArch64/rem-by-const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -893,46 +893,15 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: sv4i8_7:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT: mov x8, #-56173 // =0xffffffffffff2493
; CHECK-SD-NEXT: movk x8, #37449, lsl #16
; CHECK-SD-NEXT: mov w8, #18725 // =0x4925
; CHECK-SD-NEXT: movi v2.4h, #7
; CHECK-SD-NEXT: dup v1.4h, w8
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT: smov x10, v0.h[0]
; CHECK-SD-NEXT: smov x9, v0.h[1]
; CHECK-SD-NEXT: smov w12, v0.h[0]
; CHECK-SD-NEXT: smov w11, v0.h[1]
; CHECK-SD-NEXT: smov x13, v0.h[2]
; CHECK-SD-NEXT: smov w14, v0.h[2]
; CHECK-SD-NEXT: smov x17, v0.h[3]
; CHECK-SD-NEXT: smull x10, w10, w8
; CHECK-SD-NEXT: smull x9, w9, w8
; CHECK-SD-NEXT: smull x13, w13, w8
; CHECK-SD-NEXT: add x10, x12, x10, lsr #32
; CHECK-SD-NEXT: smull x8, w17, w8
; CHECK-SD-NEXT: add x9, x11, x9, lsr #32
; CHECK-SD-NEXT: asr w16, w10, #2
; CHECK-SD-NEXT: add x13, x14, x13, lsr #32
; CHECK-SD-NEXT: asr w15, w9, #2
; CHECK-SD-NEXT: add w10, w16, w10, lsr #31
; CHECK-SD-NEXT: asr w16, w13, #2
; CHECK-SD-NEXT: add w9, w15, w9, lsr #31
; CHECK-SD-NEXT: smov w15, v0.h[3]
; CHECK-SD-NEXT: sub w10, w10, w10, lsl #3
; CHECK-SD-NEXT: sub w9, w9, w9, lsl #3
; CHECK-SD-NEXT: add w10, w12, w10
; CHECK-SD-NEXT: fmov s0, w10
; CHECK-SD-NEXT: add w9, w11, w9
; CHECK-SD-NEXT: add w10, w16, w13, lsr #31
; CHECK-SD-NEXT: add x8, x15, x8, lsr #32
; CHECK-SD-NEXT: mov v0.h[1], w9
; CHECK-SD-NEXT: sub w9, w10, w10, lsl #3
; CHECK-SD-NEXT: asr w10, w8, #2
; CHECK-SD-NEXT: add w9, w14, w9
; CHECK-SD-NEXT: add w8, w10, w8, lsr #31
; CHECK-SD-NEXT: mov v0.h[2], w9
; CHECK-SD-NEXT: sub w8, w8, w8, lsl #3
; CHECK-SD-NEXT: add w8, w15, w8
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #17
; CHECK-SD-NEXT: xtn v1.4h, v1.4s
; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i8_7:
Expand Down Expand Up @@ -978,39 +947,15 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-SD-LABEL: sv4i8_100:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
; CHECK-SD-NEXT: mov w14, #100 // =0x64
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
; CHECK-SD-NEXT: sshr v1.4h, v0.4h, #8
; CHECK-SD-NEXT: smov x9, v1.h[0]
; CHECK-SD-NEXT: smov x10, v1.h[1]
; CHECK-SD-NEXT: smov x11, v1.h[2]
; CHECK-SD-NEXT: smov w12, v1.h[0]
; CHECK-SD-NEXT: smov x13, v1.h[3]
; CHECK-SD-NEXT: smov w15, v1.h[1]
; CHECK-SD-NEXT: smull x9, w9, w8
; CHECK-SD-NEXT: smull x10, w10, w8
; CHECK-SD-NEXT: smull x11, w11, w8
; CHECK-SD-NEXT: asr x9, x9, #37
; CHECK-SD-NEXT: smull x8, w13, w8
; CHECK-SD-NEXT: asr x10, x10, #37
; CHECK-SD-NEXT: add w9, w9, w9, lsr #31
; CHECK-SD-NEXT: asr x11, x11, #37
; CHECK-SD-NEXT: add w10, w10, w10, lsr #31
; CHECK-SD-NEXT: asr x8, x8, #37
; CHECK-SD-NEXT: msub w9, w9, w14, w12
; CHECK-SD-NEXT: msub w10, w10, w14, w15
; CHECK-SD-NEXT: add w8, w8, w8, lsr #31
; CHECK-SD-NEXT: fmov s0, w9
; CHECK-SD-NEXT: add w9, w11, w11, lsr #31
; CHECK-SD-NEXT: smov w11, v1.h[2]
; CHECK-SD-NEXT: msub w9, w9, w14, w11
; CHECK-SD-NEXT: mov v0.h[1], w10
; CHECK-SD-NEXT: smov w10, v1.h[3]
; CHECK-SD-NEXT: msub w8, w8, w14, w10
; CHECK-SD-NEXT: mov v0.h[2], w9
; CHECK-SD-NEXT: mov v0.h[3], w8
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: mov w8, #5243 // =0x147b
; CHECK-SD-NEXT: movi v2.4h, #100
; CHECK-SD-NEXT: dup v1.4h, w8
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #19
; CHECK-SD-NEXT: xtn v1.4h, v1.4s
; CHECK-SD-NEXT: usra v1.4h, v1.4h, #15
; CHECK-SD-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sv4i8_100:
Expand Down
77 changes: 77 additions & 0 deletions llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s

define <8 x i16> @sdiv_v8i16_by_7(<8 x i16> %x) {
; CHECK-LABEL: sdiv_v8i16_by_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #18725 // =0x4925
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h
; CHECK-NEXT: sshr v0.8h, v0.8h, #1
; CHECK-NEXT: usra v0.8h, v0.8h, #15
; CHECK-NEXT: ret
%div = sdiv <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %div
}

define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
; CHECK-LABEL: sdiv_v16i16_by_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #18725 // =0x4925
; CHECK-NEXT: dup v2.8h, w8
; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h
; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h
; CHECK-NEXT: smull2 v4.4s, v1.8h, v2.8h
; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v3.8h
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v4.8h
; CHECK-NEXT: sshr v0.8h, v0.8h, #1
; CHECK-NEXT: sshr v1.8h, v1.8h, #1
; CHECK-NEXT: usra v0.8h, v0.8h, #15
; CHECK-NEXT: usra v1.8h, v1.8h, #15
; CHECK-NEXT: ret
%div = sdiv <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %div
}

define <8 x i16> @srem_v8i16_by_7(<8 x i16> %x) {
; CHECK-LABEL: srem_v8i16_by_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #18725 // =0x4925
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; CHECK-NEXT: movi v2.8h, #7
; CHECK-NEXT: sshr v1.8h, v1.8h, #1
; CHECK-NEXT: usra v1.8h, v1.8h, #15
; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
; CHECK-NEXT: ret
%rem = srem <8 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %rem
}

define <16 x i16> @srem_v16i16_by_7(<16 x i16> %x) {
; CHECK-LABEL: srem_v16i16_by_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #18725 // =0x4925
; CHECK-NEXT: dup v2.8h, w8
; CHECK-NEXT: smull2 v3.4s, v0.8h, v2.8h
; CHECK-NEXT: smull v4.4s, v0.4h, v2.4h
; CHECK-NEXT: smull2 v5.4s, v1.8h, v2.8h
; CHECK-NEXT: smull v2.4s, v1.4h, v2.4h
; CHECK-NEXT: uzp2 v3.8h, v4.8h, v3.8h
; CHECK-NEXT: movi v4.8h, #7
; CHECK-NEXT: uzp2 v2.8h, v2.8h, v5.8h
; CHECK-NEXT: sshr v3.8h, v3.8h, #1
; CHECK-NEXT: sshr v2.8h, v2.8h, #1
; CHECK-NEXT: usra v3.8h, v3.8h, #15
; CHECK-NEXT: usra v2.8h, v2.8h, #15
; CHECK-NEXT: mls v0.8h, v3.8h, v4.8h
; CHECK-NEXT: mls v1.8h, v2.8h, v4.8h
; CHECK-NEXT: ret
%rem = srem <16 x i16> %x, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %rem
}