@@ -35,3 +35,137 @@ define <16 x i16> @sdiv_v16i16_by_7(<16 x i16> %x) {
3535 %div = sdiv <16 x i16 > %x , <i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 >
3636 ret <16 x i16 > %div
3737}
38+
39+ define <8 x i16 > @sdiv_exact_v8i16_by_255 (<8 x i16 > %x ) {
40+ ; CHECK-LABEL: sdiv_exact_v8i16_by_255:
41+ ; CHECK: // %bb.0:
42+ ; CHECK-NEXT: mvni v1.8h, #1, lsl #8
43+ ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
44+ ; CHECK-NEXT: ret
45+ %div = sdiv exact <8 x i16 > %x , splat (i16 255 )
46+ ret <8 x i16 > %div
47+ }
48+
49+ define <16 x i16 > @sdiv_exact_v16i16_by_255 (<16 x i16 > %x ) {
50+ ; CHECK-LABEL: sdiv_exact_v16i16_by_255:
51+ ; CHECK: // %bb.0:
52+ ; CHECK-NEXT: smov x11, v0.h[1]
53+ ; CHECK-NEXT: smov x10, v0.h[0]
54+ ; CHECK-NEXT: mov x8, #-32639 // =0xffffffffffff8081
55+ ; CHECK-NEXT: smov x13, v0.h[3]
56+ ; CHECK-NEXT: smov x14, v1.h[1]
57+ ; CHECK-NEXT: movk x8, #32896, lsl #16
58+ ; CHECK-NEXT: smov x16, v1.h[0]
59+ ; CHECK-NEXT: smov w12, v0.h[1]
60+ ; CHECK-NEXT: smov w15, v0.h[0]
61+ ; CHECK-NEXT: smov x18, v1.h[2]
62+ ; CHECK-NEXT: smov w0, v0.h[3]
63+ ; CHECK-NEXT: smov w1, v1.h[1]
64+ ; CHECK-NEXT: smull x11, w11, w8
65+ ; CHECK-NEXT: smov w2, v1.h[0]
66+ ; CHECK-NEXT: smov x9, v0.h[2]
67+ ; CHECK-NEXT: smull x10, w10, w8
68+ ; CHECK-NEXT: smov w17, v0.h[2]
69+ ; CHECK-NEXT: smov w3, v1.h[2]
70+ ; CHECK-NEXT: smull x13, w13, w8
71+ ; CHECK-NEXT: smull x14, w14, w8
72+ ; CHECK-NEXT: add x12, x12, x11, lsr #32
73+ ; CHECK-NEXT: smull x16, w16, w8
74+ ; CHECK-NEXT: add x10, x15, x10, lsr #32
75+ ; CHECK-NEXT: smull x15, w18, w8
76+ ; CHECK-NEXT: add x11, x0, x13, lsr #32
77+ ; CHECK-NEXT: smov x0, v0.h[4]
78+ ; CHECK-NEXT: add x13, x1, x14, lsr #32
79+ ; CHECK-NEXT: asr w18, w10, #7
80+ ; CHECK-NEXT: smull x9, w9, w8
81+ ; CHECK-NEXT: add x14, x2, x16, lsr #32
82+ ; CHECK-NEXT: asr w16, w12, #7
83+ ; CHECK-NEXT: smov x2, v1.h[3]
84+ ; CHECK-NEXT: add w18, w18, w10, lsr #31
85+ ; CHECK-NEXT: add x15, x3, x15, lsr #32
86+ ; CHECK-NEXT: smov w10, v0.h[5]
87+ ; CHECK-NEXT: add w12, w16, w12, lsr #31
88+ ; CHECK-NEXT: asr w16, w14, #7
89+ ; CHECK-NEXT: add x9, x17, x9, lsr #32
90+ ; CHECK-NEXT: fmov s2, w18
91+ ; CHECK-NEXT: smov w17, v0.h[4]
92+ ; CHECK-NEXT: smull x0, w0, w8
93+ ; CHECK-NEXT: add w14, w16, w14, lsr #31
94+ ; CHECK-NEXT: asr w16, w13, #7
95+ ; CHECK-NEXT: asr w1, w9, #7
96+ ; CHECK-NEXT: smov x18, v0.h[5]
97+ ; CHECK-NEXT: fmov s3, w14
98+ ; CHECK-NEXT: mov v2.h[1], w12
99+ ; CHECK-NEXT: add w12, w16, w13, lsr #31
100+ ; CHECK-NEXT: smov w13, v1.h[3]
101+ ; CHECK-NEXT: smov x14, v1.h[4]
102+ ; CHECK-NEXT: smull x16, w2, w8
103+ ; CHECK-NEXT: add w1, w1, w9, lsr #31
104+ ; CHECK-NEXT: add x17, x17, x0, lsr #32
105+ ; CHECK-NEXT: asr w0, w15, #7
106+ ; CHECK-NEXT: mov v3.h[1], w12
107+ ; CHECK-NEXT: smov w12, v1.h[4]
108+ ; CHECK-NEXT: smull x18, w18, w8
109+ ; CHECK-NEXT: mov v2.h[2], w1
110+ ; CHECK-NEXT: asr w1, w11, #7
111+ ; CHECK-NEXT: add w15, w0, w15, lsr #31
112+ ; CHECK-NEXT: add x13, x13, x16, lsr #32
113+ ; CHECK-NEXT: smov x16, v1.h[5]
114+ ; CHECK-NEXT: smull x14, w14, w8
115+ ; CHECK-NEXT: add w11, w1, w11, lsr #31
116+ ; CHECK-NEXT: smov x0, v0.h[6]
117+ ; CHECK-NEXT: add x10, x10, x18, lsr #32
118+ ; CHECK-NEXT: asr w1, w13, #7
119+ ; CHECK-NEXT: mov v3.h[2], w15
120+ ; CHECK-NEXT: smov w15, v1.h[5]
121+ ; CHECK-NEXT: add x12, x12, x14, lsr #32
122+ ; CHECK-NEXT: mov v2.h[3], w11
123+ ; CHECK-NEXT: asr w11, w17, #7
124+ ; CHECK-NEXT: add w13, w1, w13, lsr #31
125+ ; CHECK-NEXT: smull x16, w16, w8
126+ ; CHECK-NEXT: smov x14, v1.h[6]
127+ ; CHECK-NEXT: asr w18, w12, #7
128+ ; CHECK-NEXT: add w11, w11, w17, lsr #31
129+ ; CHECK-NEXT: smov w9, v0.h[6]
130+ ; CHECK-NEXT: mov v3.h[3], w13
131+ ; CHECK-NEXT: smull x17, w0, w8
132+ ; CHECK-NEXT: smov x0, v1.h[7]
133+ ; CHECK-NEXT: add x13, x15, x16, lsr #32
134+ ; CHECK-NEXT: add w12, w18, w12, lsr #31
135+ ; CHECK-NEXT: smov w16, v1.h[6]
136+ ; CHECK-NEXT: mov v2.h[4], w11
137+ ; CHECK-NEXT: smov x11, v0.h[7]
138+ ; CHECK-NEXT: smull x14, w14, w8
139+ ; CHECK-NEXT: asr w15, w10, #7
140+ ; CHECK-NEXT: asr w18, w13, #7
141+ ; CHECK-NEXT: smov w1, v0.h[7]
142+ ; CHECK-NEXT: mov v3.h[4], w12
143+ ; CHECK-NEXT: add x9, x9, x17, lsr #32
144+ ; CHECK-NEXT: add w10, w15, w10, lsr #31
145+ ; CHECK-NEXT: add w12, w18, w13, lsr #31
146+ ; CHECK-NEXT: add x13, x16, x14, lsr #32
147+ ; CHECK-NEXT: smov w14, v1.h[7]
148+ ; CHECK-NEXT: smull x11, w11, w8
149+ ; CHECK-NEXT: smull x8, w0, w8
150+ ; CHECK-NEXT: mov v2.h[5], w10
151+ ; CHECK-NEXT: asr w10, w9, #7
152+ ; CHECK-NEXT: mov v3.h[5], w12
153+ ; CHECK-NEXT: asr w12, w13, #7
154+ ; CHECK-NEXT: add w9, w10, w9, lsr #31
155+ ; CHECK-NEXT: add x10, x1, x11, lsr #32
156+ ; CHECK-NEXT: add w11, w12, w13, lsr #31
157+ ; CHECK-NEXT: add x8, x14, x8, lsr #32
158+ ; CHECK-NEXT: mov v2.h[6], w9
159+ ; CHECK-NEXT: asr w9, w10, #7
160+ ; CHECK-NEXT: mov v3.h[6], w11
161+ ; CHECK-NEXT: asr w11, w8, #7
162+ ; CHECK-NEXT: add w9, w9, w10, lsr #31
163+ ; CHECK-NEXT: add w8, w11, w8, lsr #31
164+ ; CHECK-NEXT: mov v2.h[7], w9
165+ ; CHECK-NEXT: mov v3.h[7], w8
166+ ; CHECK-NEXT: mov v0.16b, v2.16b
167+ ; CHECK-NEXT: mov v1.16b, v3.16b
168+ ; CHECK-NEXT: ret
169+ %div = sdiv exact <16 x i16 > %x , splat (i16 255 )
170+ ret <16 x i16 > %div
171+ }
0 commit comments