Skip to content

Commit 35e2971

Browse files
[DAGCombiner] Allow promoted constants when lowering vector SDIV exacts
1 parent bebeffd commit 35e2971

File tree

2 files changed

+8
-119
lines changed

2 files changed

+8
-119
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6345,7 +6345,6 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
63456345
SDValue Op0 = N->getOperand(0);
63466346
SDValue Op1 = N->getOperand(1);
63476347
EVT VT = N->getValueType(0);
6348-
EVT SVT = VT.getScalarType();
63496348
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
63506349
EVT ShSVT = ShVT.getScalarType();
63516350

@@ -6355,6 +6354,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
63556354
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
63566355
if (C->isZero())
63576356
return false;
6357+
6358+
EVT CT = C->getValueType(0);
63586359
APInt Divisor = C->getAPIntValue();
63596360
unsigned Shift = Divisor.countr_zero();
63606361
if (Shift) {
@@ -6363,12 +6364,13 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
63636364
}
63646365
APInt Factor = Divisor.multiplicativeInverse();
63656366
Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6366-
Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6367+
Factors.push_back(DAG.getConstant(Factor, dl, CT));
63676368
return true;
63686369
};
63696370

63706371
// Collect all magic values from the build vector.
6371-
if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6372+
if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
6373+
/*AllowTruncation=*/true))
63726374
return SDValue();
63736375

63746376
SDValue Shift, Factor;

llvm/test/CodeGen/AArch64/sdiv-by-const-promoted-ops.ll

Lines changed: 3 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -89,122 +89,9 @@ define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) {
8989
define <16 x i16> @sdiv_exact_v16i16_by_255(<16 x i16> %x) {
9090
; CHECK-LABEL: sdiv_exact_v16i16_by_255:
9191
; CHECK: // %bb.0:
92-
; CHECK-NEXT: smov x11, v0.h[1]
93-
; CHECK-NEXT: smov x10, v0.h[0]
94-
; CHECK-NEXT: mov x8, #-32639 // =0xffffffffffff8081
95-
; CHECK-NEXT: smov x13, v0.h[3]
96-
; CHECK-NEXT: smov x14, v1.h[1]
97-
; CHECK-NEXT: movk x8, #32896, lsl #16
98-
; CHECK-NEXT: smov x16, v1.h[0]
99-
; CHECK-NEXT: smov w12, v0.h[1]
100-
; CHECK-NEXT: smov w15, v0.h[0]
101-
; CHECK-NEXT: smov x18, v1.h[2]
102-
; CHECK-NEXT: smov w0, v0.h[3]
103-
; CHECK-NEXT: smov w1, v1.h[1]
104-
; CHECK-NEXT: smull x11, w11, w8
105-
; CHECK-NEXT: smov w2, v1.h[0]
106-
; CHECK-NEXT: smov x9, v0.h[2]
107-
; CHECK-NEXT: smull x10, w10, w8
108-
; CHECK-NEXT: smov w17, v0.h[2]
109-
; CHECK-NEXT: smov w3, v1.h[2]
110-
; CHECK-NEXT: smull x13, w13, w8
111-
; CHECK-NEXT: smull x14, w14, w8
112-
; CHECK-NEXT: add x12, x12, x11, lsr #32
113-
; CHECK-NEXT: smull x16, w16, w8
114-
; CHECK-NEXT: add x10, x15, x10, lsr #32
115-
; CHECK-NEXT: smull x15, w18, w8
116-
; CHECK-NEXT: add x11, x0, x13, lsr #32
117-
; CHECK-NEXT: smov x0, v0.h[4]
118-
; CHECK-NEXT: add x13, x1, x14, lsr #32
119-
; CHECK-NEXT: asr w18, w10, #7
120-
; CHECK-NEXT: smull x9, w9, w8
121-
; CHECK-NEXT: add x14, x2, x16, lsr #32
122-
; CHECK-NEXT: asr w16, w12, #7
123-
; CHECK-NEXT: smov x2, v1.h[3]
124-
; CHECK-NEXT: add w18, w18, w10, lsr #31
125-
; CHECK-NEXT: add x15, x3, x15, lsr #32
126-
; CHECK-NEXT: smov w10, v0.h[5]
127-
; CHECK-NEXT: add w12, w16, w12, lsr #31
128-
; CHECK-NEXT: asr w16, w14, #7
129-
; CHECK-NEXT: add x9, x17, x9, lsr #32
130-
; CHECK-NEXT: fmov s2, w18
131-
; CHECK-NEXT: smov w17, v0.h[4]
132-
; CHECK-NEXT: smull x0, w0, w8
133-
; CHECK-NEXT: add w14, w16, w14, lsr #31
134-
; CHECK-NEXT: asr w16, w13, #7
135-
; CHECK-NEXT: asr w1, w9, #7
136-
; CHECK-NEXT: smov x18, v0.h[5]
137-
; CHECK-NEXT: fmov s3, w14
138-
; CHECK-NEXT: mov v2.h[1], w12
139-
; CHECK-NEXT: add w12, w16, w13, lsr #31
140-
; CHECK-NEXT: smov w13, v1.h[3]
141-
; CHECK-NEXT: smov x14, v1.h[4]
142-
; CHECK-NEXT: smull x16, w2, w8
143-
; CHECK-NEXT: add w1, w1, w9, lsr #31
144-
; CHECK-NEXT: add x17, x17, x0, lsr #32
145-
; CHECK-NEXT: asr w0, w15, #7
146-
; CHECK-NEXT: mov v3.h[1], w12
147-
; CHECK-NEXT: smov w12, v1.h[4]
148-
; CHECK-NEXT: smull x18, w18, w8
149-
; CHECK-NEXT: mov v2.h[2], w1
150-
; CHECK-NEXT: asr w1, w11, #7
151-
; CHECK-NEXT: add w15, w0, w15, lsr #31
152-
; CHECK-NEXT: add x13, x13, x16, lsr #32
153-
; CHECK-NEXT: smov x16, v1.h[5]
154-
; CHECK-NEXT: smull x14, w14, w8
155-
; CHECK-NEXT: add w11, w1, w11, lsr #31
156-
; CHECK-NEXT: smov x0, v0.h[6]
157-
; CHECK-NEXT: add x10, x10, x18, lsr #32
158-
; CHECK-NEXT: asr w1, w13, #7
159-
; CHECK-NEXT: mov v3.h[2], w15
160-
; CHECK-NEXT: smov w15, v1.h[5]
161-
; CHECK-NEXT: add x12, x12, x14, lsr #32
162-
; CHECK-NEXT: mov v2.h[3], w11
163-
; CHECK-NEXT: asr w11, w17, #7
164-
; CHECK-NEXT: add w13, w1, w13, lsr #31
165-
; CHECK-NEXT: smull x16, w16, w8
166-
; CHECK-NEXT: smov x14, v1.h[6]
167-
; CHECK-NEXT: asr w18, w12, #7
168-
; CHECK-NEXT: add w11, w11, w17, lsr #31
169-
; CHECK-NEXT: smov w9, v0.h[6]
170-
; CHECK-NEXT: mov v3.h[3], w13
171-
; CHECK-NEXT: smull x17, w0, w8
172-
; CHECK-NEXT: smov x0, v1.h[7]
173-
; CHECK-NEXT: add x13, x15, x16, lsr #32
174-
; CHECK-NEXT: add w12, w18, w12, lsr #31
175-
; CHECK-NEXT: smov w16, v1.h[6]
176-
; CHECK-NEXT: mov v2.h[4], w11
177-
; CHECK-NEXT: smov x11, v0.h[7]
178-
; CHECK-NEXT: smull x14, w14, w8
179-
; CHECK-NEXT: asr w15, w10, #7
180-
; CHECK-NEXT: asr w18, w13, #7
181-
; CHECK-NEXT: smov w1, v0.h[7]
182-
; CHECK-NEXT: mov v3.h[4], w12
183-
; CHECK-NEXT: add x9, x9, x17, lsr #32
184-
; CHECK-NEXT: add w10, w15, w10, lsr #31
185-
; CHECK-NEXT: add w12, w18, w13, lsr #31
186-
; CHECK-NEXT: add x13, x16, x14, lsr #32
187-
; CHECK-NEXT: smov w14, v1.h[7]
188-
; CHECK-NEXT: smull x11, w11, w8
189-
; CHECK-NEXT: smull x8, w0, w8
190-
; CHECK-NEXT: mov v2.h[5], w10
191-
; CHECK-NEXT: asr w10, w9, #7
192-
; CHECK-NEXT: mov v3.h[5], w12
193-
; CHECK-NEXT: asr w12, w13, #7
194-
; CHECK-NEXT: add w9, w10, w9, lsr #31
195-
; CHECK-NEXT: add x10, x1, x11, lsr #32
196-
; CHECK-NEXT: add w11, w12, w13, lsr #31
197-
; CHECK-NEXT: add x8, x14, x8, lsr #32
198-
; CHECK-NEXT: mov v2.h[6], w9
199-
; CHECK-NEXT: asr w9, w10, #7
200-
; CHECK-NEXT: mov v3.h[6], w11
201-
; CHECK-NEXT: asr w11, w8, #7
202-
; CHECK-NEXT: add w9, w9, w10, lsr #31
203-
; CHECK-NEXT: add w8, w11, w8, lsr #31
204-
; CHECK-NEXT: mov v2.h[7], w9
205-
; CHECK-NEXT: mov v3.h[7], w8
206-
; CHECK-NEXT: mov v0.16b, v2.16b
207-
; CHECK-NEXT: mov v1.16b, v3.16b
92+
; CHECK-NEXT: mvni v2.8h, #1, lsl #8
93+
; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
94+
; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h
20895
; CHECK-NEXT: ret
20996
%div = sdiv exact <16 x i16> %x, splat (i16 255)
21097
ret <16 x i16> %div

0 commit comments

Comments
 (0)