@@ -89,122 +89,9 @@ define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) {
8989define <16 x i16 > @sdiv_exact_v16i16_by_255 (<16 x i16 > %x ) {
9090; CHECK-LABEL: sdiv_exact_v16i16_by_255:
9191; CHECK: // %bb.0:
92- ; CHECK-NEXT: smov x11, v0.h[1]
93- ; CHECK-NEXT: smov x10, v0.h[0]
94- ; CHECK-NEXT: mov x8, #-32639 // =0xffffffffffff8081
95- ; CHECK-NEXT: smov x13, v0.h[3]
96- ; CHECK-NEXT: smov x14, v1.h[1]
97- ; CHECK-NEXT: movk x8, #32896, lsl #16
98- ; CHECK-NEXT: smov x16, v1.h[0]
99- ; CHECK-NEXT: smov w12, v0.h[1]
100- ; CHECK-NEXT: smov w15, v0.h[0]
101- ; CHECK-NEXT: smov x18, v1.h[2]
102- ; CHECK-NEXT: smov w0, v0.h[3]
103- ; CHECK-NEXT: smov w1, v1.h[1]
104- ; CHECK-NEXT: smull x11, w11, w8
105- ; CHECK-NEXT: smov w2, v1.h[0]
106- ; CHECK-NEXT: smov x9, v0.h[2]
107- ; CHECK-NEXT: smull x10, w10, w8
108- ; CHECK-NEXT: smov w17, v0.h[2]
109- ; CHECK-NEXT: smov w3, v1.h[2]
110- ; CHECK-NEXT: smull x13, w13, w8
111- ; CHECK-NEXT: smull x14, w14, w8
112- ; CHECK-NEXT: add x12, x12, x11, lsr #32
113- ; CHECK-NEXT: smull x16, w16, w8
114- ; CHECK-NEXT: add x10, x15, x10, lsr #32
115- ; CHECK-NEXT: smull x15, w18, w8
116- ; CHECK-NEXT: add x11, x0, x13, lsr #32
117- ; CHECK-NEXT: smov x0, v0.h[4]
118- ; CHECK-NEXT: add x13, x1, x14, lsr #32
119- ; CHECK-NEXT: asr w18, w10, #7
120- ; CHECK-NEXT: smull x9, w9, w8
121- ; CHECK-NEXT: add x14, x2, x16, lsr #32
122- ; CHECK-NEXT: asr w16, w12, #7
123- ; CHECK-NEXT: smov x2, v1.h[3]
124- ; CHECK-NEXT: add w18, w18, w10, lsr #31
125- ; CHECK-NEXT: add x15, x3, x15, lsr #32
126- ; CHECK-NEXT: smov w10, v0.h[5]
127- ; CHECK-NEXT: add w12, w16, w12, lsr #31
128- ; CHECK-NEXT: asr w16, w14, #7
129- ; CHECK-NEXT: add x9, x17, x9, lsr #32
130- ; CHECK-NEXT: fmov s2, w18
131- ; CHECK-NEXT: smov w17, v0.h[4]
132- ; CHECK-NEXT: smull x0, w0, w8
133- ; CHECK-NEXT: add w14, w16, w14, lsr #31
134- ; CHECK-NEXT: asr w16, w13, #7
135- ; CHECK-NEXT: asr w1, w9, #7
136- ; CHECK-NEXT: smov x18, v0.h[5]
137- ; CHECK-NEXT: fmov s3, w14
138- ; CHECK-NEXT: mov v2.h[1], w12
139- ; CHECK-NEXT: add w12, w16, w13, lsr #31
140- ; CHECK-NEXT: smov w13, v1.h[3]
141- ; CHECK-NEXT: smov x14, v1.h[4]
142- ; CHECK-NEXT: smull x16, w2, w8
143- ; CHECK-NEXT: add w1, w1, w9, lsr #31
144- ; CHECK-NEXT: add x17, x17, x0, lsr #32
145- ; CHECK-NEXT: asr w0, w15, #7
146- ; CHECK-NEXT: mov v3.h[1], w12
147- ; CHECK-NEXT: smov w12, v1.h[4]
148- ; CHECK-NEXT: smull x18, w18, w8
149- ; CHECK-NEXT: mov v2.h[2], w1
150- ; CHECK-NEXT: asr w1, w11, #7
151- ; CHECK-NEXT: add w15, w0, w15, lsr #31
152- ; CHECK-NEXT: add x13, x13, x16, lsr #32
153- ; CHECK-NEXT: smov x16, v1.h[5]
154- ; CHECK-NEXT: smull x14, w14, w8
155- ; CHECK-NEXT: add w11, w1, w11, lsr #31
156- ; CHECK-NEXT: smov x0, v0.h[6]
157- ; CHECK-NEXT: add x10, x10, x18, lsr #32
158- ; CHECK-NEXT: asr w1, w13, #7
159- ; CHECK-NEXT: mov v3.h[2], w15
160- ; CHECK-NEXT: smov w15, v1.h[5]
161- ; CHECK-NEXT: add x12, x12, x14, lsr #32
162- ; CHECK-NEXT: mov v2.h[3], w11
163- ; CHECK-NEXT: asr w11, w17, #7
164- ; CHECK-NEXT: add w13, w1, w13, lsr #31
165- ; CHECK-NEXT: smull x16, w16, w8
166- ; CHECK-NEXT: smov x14, v1.h[6]
167- ; CHECK-NEXT: asr w18, w12, #7
168- ; CHECK-NEXT: add w11, w11, w17, lsr #31
169- ; CHECK-NEXT: smov w9, v0.h[6]
170- ; CHECK-NEXT: mov v3.h[3], w13
171- ; CHECK-NEXT: smull x17, w0, w8
172- ; CHECK-NEXT: smov x0, v1.h[7]
173- ; CHECK-NEXT: add x13, x15, x16, lsr #32
174- ; CHECK-NEXT: add w12, w18, w12, lsr #31
175- ; CHECK-NEXT: smov w16, v1.h[6]
176- ; CHECK-NEXT: mov v2.h[4], w11
177- ; CHECK-NEXT: smov x11, v0.h[7]
178- ; CHECK-NEXT: smull x14, w14, w8
179- ; CHECK-NEXT: asr w15, w10, #7
180- ; CHECK-NEXT: asr w18, w13, #7
181- ; CHECK-NEXT: smov w1, v0.h[7]
182- ; CHECK-NEXT: mov v3.h[4], w12
183- ; CHECK-NEXT: add x9, x9, x17, lsr #32
184- ; CHECK-NEXT: add w10, w15, w10, lsr #31
185- ; CHECK-NEXT: add w12, w18, w13, lsr #31
186- ; CHECK-NEXT: add x13, x16, x14, lsr #32
187- ; CHECK-NEXT: smov w14, v1.h[7]
188- ; CHECK-NEXT: smull x11, w11, w8
189- ; CHECK-NEXT: smull x8, w0, w8
190- ; CHECK-NEXT: mov v2.h[5], w10
191- ; CHECK-NEXT: asr w10, w9, #7
192- ; CHECK-NEXT: mov v3.h[5], w12
193- ; CHECK-NEXT: asr w12, w13, #7
194- ; CHECK-NEXT: add w9, w10, w9, lsr #31
195- ; CHECK-NEXT: add x10, x1, x11, lsr #32
196- ; CHECK-NEXT: add w11, w12, w13, lsr #31
197- ; CHECK-NEXT: add x8, x14, x8, lsr #32
198- ; CHECK-NEXT: mov v2.h[6], w9
199- ; CHECK-NEXT: asr w9, w10, #7
200- ; CHECK-NEXT: mov v3.h[6], w11
201- ; CHECK-NEXT: asr w11, w8, #7
202- ; CHECK-NEXT: add w9, w9, w10, lsr #31
203- ; CHECK-NEXT: add w8, w11, w8, lsr #31
204- ; CHECK-NEXT: mov v2.h[7], w9
205- ; CHECK-NEXT: mov v3.h[7], w8
206- ; CHECK-NEXT: mov v0.16b, v2.16b
207- ; CHECK-NEXT: mov v1.16b, v3.16b
92+ ; CHECK-NEXT: mvni v2.8h, #1, lsl #8
93+ ; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
94+ ; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h
20895; CHECK-NEXT: ret
20996 %div = sdiv exact <16 x i16 > %x , splat (i16 255 )
21097 ret <16 x i16 > %div
0 commit comments