@@ -49,122 +49,9 @@ define <8 x i16> @sdiv_exact_v8i16_by_255(<8 x i16> %x) {
4949define <16 x i16 > @sdiv_exact_v16i16_by_255 (<16 x i16 > %x ) {
5050; CHECK-LABEL: sdiv_exact_v16i16_by_255:
5151; CHECK: // %bb.0:
52- ; CHECK-NEXT: smov x11, v0.h[1]
53- ; CHECK-NEXT: smov x10, v0.h[0]
54- ; CHECK-NEXT: mov x8, #-32639 // =0xffffffffffff8081
55- ; CHECK-NEXT: smov x13, v0.h[3]
56- ; CHECK-NEXT: smov x14, v1.h[1]
57- ; CHECK-NEXT: movk x8, #32896, lsl #16
58- ; CHECK-NEXT: smov x16, v1.h[0]
59- ; CHECK-NEXT: smov w12, v0.h[1]
60- ; CHECK-NEXT: smov w15, v0.h[0]
61- ; CHECK-NEXT: smov x18, v1.h[2]
62- ; CHECK-NEXT: smov w0, v0.h[3]
63- ; CHECK-NEXT: smov w1, v1.h[1]
64- ; CHECK-NEXT: smull x11, w11, w8
65- ; CHECK-NEXT: smov w2, v1.h[0]
66- ; CHECK-NEXT: smov x9, v0.h[2]
67- ; CHECK-NEXT: smull x10, w10, w8
68- ; CHECK-NEXT: smov w17, v0.h[2]
69- ; CHECK-NEXT: smov w3, v1.h[2]
70- ; CHECK-NEXT: smull x13, w13, w8
71- ; CHECK-NEXT: smull x14, w14, w8
72- ; CHECK-NEXT: add x12, x12, x11, lsr #32
73- ; CHECK-NEXT: smull x16, w16, w8
74- ; CHECK-NEXT: add x10, x15, x10, lsr #32
75- ; CHECK-NEXT: smull x15, w18, w8
76- ; CHECK-NEXT: add x11, x0, x13, lsr #32
77- ; CHECK-NEXT: smov x0, v0.h[4]
78- ; CHECK-NEXT: add x13, x1, x14, lsr #32
79- ; CHECK-NEXT: asr w18, w10, #7
80- ; CHECK-NEXT: smull x9, w9, w8
81- ; CHECK-NEXT: add x14, x2, x16, lsr #32
82- ; CHECK-NEXT: asr w16, w12, #7
83- ; CHECK-NEXT: smov x2, v1.h[3]
84- ; CHECK-NEXT: add w18, w18, w10, lsr #31
85- ; CHECK-NEXT: add x15, x3, x15, lsr #32
86- ; CHECK-NEXT: smov w10, v0.h[5]
87- ; CHECK-NEXT: add w12, w16, w12, lsr #31
88- ; CHECK-NEXT: asr w16, w14, #7
89- ; CHECK-NEXT: add x9, x17, x9, lsr #32
90- ; CHECK-NEXT: fmov s2, w18
91- ; CHECK-NEXT: smov w17, v0.h[4]
92- ; CHECK-NEXT: smull x0, w0, w8
93- ; CHECK-NEXT: add w14, w16, w14, lsr #31
94- ; CHECK-NEXT: asr w16, w13, #7
95- ; CHECK-NEXT: asr w1, w9, #7
96- ; CHECK-NEXT: smov x18, v0.h[5]
97- ; CHECK-NEXT: fmov s3, w14
98- ; CHECK-NEXT: mov v2.h[1], w12
99- ; CHECK-NEXT: add w12, w16, w13, lsr #31
100- ; CHECK-NEXT: smov w13, v1.h[3]
101- ; CHECK-NEXT: smov x14, v1.h[4]
102- ; CHECK-NEXT: smull x16, w2, w8
103- ; CHECK-NEXT: add w1, w1, w9, lsr #31
104- ; CHECK-NEXT: add x17, x17, x0, lsr #32
105- ; CHECK-NEXT: asr w0, w15, #7
106- ; CHECK-NEXT: mov v3.h[1], w12
107- ; CHECK-NEXT: smov w12, v1.h[4]
108- ; CHECK-NEXT: smull x18, w18, w8
109- ; CHECK-NEXT: mov v2.h[2], w1
110- ; CHECK-NEXT: asr w1, w11, #7
111- ; CHECK-NEXT: add w15, w0, w15, lsr #31
112- ; CHECK-NEXT: add x13, x13, x16, lsr #32
113- ; CHECK-NEXT: smov x16, v1.h[5]
114- ; CHECK-NEXT: smull x14, w14, w8
115- ; CHECK-NEXT: add w11, w1, w11, lsr #31
116- ; CHECK-NEXT: smov x0, v0.h[6]
117- ; CHECK-NEXT: add x10, x10, x18, lsr #32
118- ; CHECK-NEXT: asr w1, w13, #7
119- ; CHECK-NEXT: mov v3.h[2], w15
120- ; CHECK-NEXT: smov w15, v1.h[5]
121- ; CHECK-NEXT: add x12, x12, x14, lsr #32
122- ; CHECK-NEXT: mov v2.h[3], w11
123- ; CHECK-NEXT: asr w11, w17, #7
124- ; CHECK-NEXT: add w13, w1, w13, lsr #31
125- ; CHECK-NEXT: smull x16, w16, w8
126- ; CHECK-NEXT: smov x14, v1.h[6]
127- ; CHECK-NEXT: asr w18, w12, #7
128- ; CHECK-NEXT: add w11, w11, w17, lsr #31
129- ; CHECK-NEXT: smov w9, v0.h[6]
130- ; CHECK-NEXT: mov v3.h[3], w13
131- ; CHECK-NEXT: smull x17, w0, w8
132- ; CHECK-NEXT: smov x0, v1.h[7]
133- ; CHECK-NEXT: add x13, x15, x16, lsr #32
134- ; CHECK-NEXT: add w12, w18, w12, lsr #31
135- ; CHECK-NEXT: smov w16, v1.h[6]
136- ; CHECK-NEXT: mov v2.h[4], w11
137- ; CHECK-NEXT: smov x11, v0.h[7]
138- ; CHECK-NEXT: smull x14, w14, w8
139- ; CHECK-NEXT: asr w15, w10, #7
140- ; CHECK-NEXT: asr w18, w13, #7
141- ; CHECK-NEXT: smov w1, v0.h[7]
142- ; CHECK-NEXT: mov v3.h[4], w12
143- ; CHECK-NEXT: add x9, x9, x17, lsr #32
144- ; CHECK-NEXT: add w10, w15, w10, lsr #31
145- ; CHECK-NEXT: add w12, w18, w13, lsr #31
146- ; CHECK-NEXT: add x13, x16, x14, lsr #32
147- ; CHECK-NEXT: smov w14, v1.h[7]
148- ; CHECK-NEXT: smull x11, w11, w8
149- ; CHECK-NEXT: smull x8, w0, w8
150- ; CHECK-NEXT: mov v2.h[5], w10
151- ; CHECK-NEXT: asr w10, w9, #7
152- ; CHECK-NEXT: mov v3.h[5], w12
153- ; CHECK-NEXT: asr w12, w13, #7
154- ; CHECK-NEXT: add w9, w10, w9, lsr #31
155- ; CHECK-NEXT: add x10, x1, x11, lsr #32
156- ; CHECK-NEXT: add w11, w12, w13, lsr #31
157- ; CHECK-NEXT: add x8, x14, x8, lsr #32
158- ; CHECK-NEXT: mov v2.h[6], w9
159- ; CHECK-NEXT: asr w9, w10, #7
160- ; CHECK-NEXT: mov v3.h[6], w11
161- ; CHECK-NEXT: asr w11, w8, #7
162- ; CHECK-NEXT: add w9, w9, w10, lsr #31
163- ; CHECK-NEXT: add w8, w11, w8, lsr #31
164- ; CHECK-NEXT: mov v2.h[7], w9
165- ; CHECK-NEXT: mov v3.h[7], w8
166- ; CHECK-NEXT: mov v0.16b, v2.16b
167- ; CHECK-NEXT: mov v1.16b, v3.16b
52+ ; CHECK-NEXT: mvni v2.8h, #1, lsl #8
53+ ; CHECK-NEXT: mul v0.8h, v0.8h, v2.8h
54+ ; CHECK-NEXT: mul v1.8h, v1.8h, v2.8h
16855; CHECK-NEXT: ret
16956 %div = sdiv exact <16 x i16 > %x , splat (i16 255 )
17057 ret <16 x i16 > %div
0 commit comments