blob: a64218572d3ad3815f08c82cc0e7f56af49dd4f0 [file] [log] [blame]
Gabe Black169c0df2013-10-08 18:24:10 -07001/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 * - contributed to gcc-3.4 on Sep 30, 2003
6 * - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file. (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
28General Public License for more details.
Patrick Georgia73b9312015-10-31 11:55:10 +010029*/
Gabe Black169c0df2013-10-08 18:24:10 -070030
31
32#if defined __GNUC__
33
34#include <arch/asm.h>
35
36.macro ARM_DIV_BODY dividend, divisor, result, curbit
37
38#if __COREBOOT_ARM_ARCH__ >= 5
39
40 clz \curbit, \divisor
41 clz \result, \dividend
42 sub \result, \curbit, \result
43 mov \curbit, #1
44 mov \divisor, \divisor, lsl \result
45 mov \curbit, \curbit, lsl \result
46 mov \result, #0
47
48#else
49
50 @ Initially shift the divisor left 3 bits if possible,
51 @ set curbit accordingly. This allows for curbit to be located
52 @ at the left end of each 4 bit nibbles in the division loop
53 @ to save one loop in most cases.
54 tst \divisor, #0xe0000000
55 moveq \divisor, \divisor, lsl #3
56 moveq \curbit, #8
57 movne \curbit, #1
58
59 @ Unless the divisor is very big, shift it up in multiples of
60 @ four bits, since this is the amount of unwinding in the main
61 @ division loop. Continue shifting until the divisor is
62 @ larger than the dividend.
631: cmp \divisor, #0x10000000
64 cmplo \divisor, \dividend
65 movlo \divisor, \divisor, lsl #4
66 movlo \curbit, \curbit, lsl #4
67 blo 1b
68
69 @ For very big divisors, we must shift it a bit at a time, or
70 @ we will be in danger of overflowing.
711: cmp \divisor, #0x80000000
72 cmplo \divisor, \dividend
73 movlo \divisor, \divisor, lsl #1
74 movlo \curbit, \curbit, lsl #1
75 blo 1b
76
77 mov \result, #0
78
79#endif
80
81 @ Division loop
821: cmp \dividend, \divisor
83 subhs \dividend, \dividend, \divisor
84 orrhs \result, \result, \curbit
85 cmp \dividend, \divisor, lsr #1
86 subhs \dividend, \dividend, \divisor, lsr #1
87 orrhs \result, \result, \curbit, lsr #1
88 cmp \dividend, \divisor, lsr #2
89 subhs \dividend, \dividend, \divisor, lsr #2
90 orrhs \result, \result, \curbit, lsr #2
91 cmp \dividend, \divisor, lsr #3
92 subhs \dividend, \dividend, \divisor, lsr #3
93 orrhs \result, \result, \curbit, lsr #3
94 cmp \dividend, #0 @ Early termination?
95 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
96 movne \divisor, \divisor, lsr #4
97 bne 1b
98
99.endm
100
101
102.macro ARM_DIV2_ORDER divisor, order
103
104#if __COREBOOT_ARM_ARCH__ >= 5
105
106 clz \order, \divisor
107 rsb \order, \order, #31
108
109#else
110
111 cmp \divisor, #(1 << 16)
112 movhs \divisor, \divisor, lsr #16
113 movhs \order, #16
114 movlo \order, #0
115
116 cmp \divisor, #(1 << 8)
117 movhs \divisor, \divisor, lsr #8
118 addhs \order, \order, #8
119
120 cmp \divisor, #(1 << 4)
121 movhs \divisor, \divisor, lsr #4
122 addhs \order, \order, #4
123
124 cmp \divisor, #(1 << 2)
125 addhi \order, \order, #3
126 addls \order, \order, \divisor, lsr #1
127
128#endif
129
130.endm
131
132
133.macro ARM_MOD_BODY dividend, divisor, order, spare
134
135#if __COREBOOT_ARM_ARCH__ >= 5
136
137 clz \order, \divisor
138 clz \spare, \dividend
139 sub \order, \order, \spare
140 mov \divisor, \divisor, lsl \order
141
142#else
143
144 mov \order, #0
145
146 @ Unless the divisor is very big, shift it up in multiples of
147 @ four bits, since this is the amount of unwinding in the main
148 @ division loop. Continue shifting until the divisor is
149 @ larger than the dividend.
1501: cmp \divisor, #0x10000000
151 cmplo \divisor, \dividend
152 movlo \divisor, \divisor, lsl #4
153 addlo \order, \order, #4
154 blo 1b
155
156 @ For very big divisors, we must shift it a bit at a time, or
157 @ we will be in danger of overflowing.
1581: cmp \divisor, #0x80000000
159 cmplo \divisor, \dividend
160 movlo \divisor, \divisor, lsl #1
161 addlo \order, \order, #1
162 blo 1b
163
164#endif
165
166 @ Perform all needed substractions to keep only the reminder.
167 @ Do comparisons in batch of 4 first.
168 subs \order, \order, #3 @ yes, 3 is intended here
169 blt 2f
170
1711: cmp \dividend, \divisor
172 subhs \dividend, \dividend, \divisor
173 cmp \dividend, \divisor, lsr #1
174 subhs \dividend, \dividend, \divisor, lsr #1
175 cmp \dividend, \divisor, lsr #2
176 subhs \dividend, \dividend, \divisor, lsr #2
177 cmp \dividend, \divisor, lsr #3
178 subhs \dividend, \dividend, \divisor, lsr #3
179 cmp \dividend, #1
180 mov \divisor, \divisor, lsr #4
181 subges \order, \order, #4
182 bge 1b
183
184 tst \order, #3
185 teqne \dividend, #0
186 beq 5f
187
188 @ Either 1, 2 or 3 comparison/substractions are left.
1892: cmn \order, #2
190 blt 4f
191 beq 3f
192 cmp \dividend, \divisor
193 subhs \dividend, \dividend, \divisor
194 mov \divisor, \divisor, lsr #1
1953: cmp \dividend, \divisor
196 subhs \dividend, \dividend, \divisor
197 mov \divisor, \divisor, lsr #1
1984: cmp \dividend, \divisor
199 subhs \dividend, \dividend, \divisor
2005:
201.endm
202
203
204ENTRY(__udivsi3)
Julius Werner7c6e4892014-01-24 16:23:08 -0800205.global __aeabi_uidiv
206__aeabi_uidiv:
Gabe Black169c0df2013-10-08 18:24:10 -0700207
208 subs r2, r1, #1
209 moveq pc, lr
210 bcc Ldiv0
211 cmp r0, r1
212 bls 11f
213 tst r1, r2
214 beq 12f
215
216 ARM_DIV_BODY r0, r1, r2, r3
217
218 mov r0, r2
219 mov pc, lr
220
22111: moveq r0, #1
222 movne r0, #0
223 mov pc, lr
224
22512: ARM_DIV2_ORDER r1, r2
226
227 mov r0, r0, lsr r2
228 mov pc, lr
229
Julius Werner7c6e4892014-01-24 16:23:08 -0800230.type __aeabi_uidiv, %function
231.size __aeabi_uidiv, .-__aeabi_uidiv
Gabe Black169c0df2013-10-08 18:24:10 -0700232ENDPROC(__udivsi3)
Gabe Black169c0df2013-10-08 18:24:10 -0700233
234ENTRY(__umodsi3)
235
236 subs r2, r1, #1 @ compare divisor with 1
237 bcc Ldiv0
238 cmpne r0, r1 @ compare dividend with divisor
239 moveq r0, #0
240 tsthi r1, r2 @ see if divisor is power of 2
241 andeq r0, r0, r2
242 movls pc, lr
243
244 ARM_MOD_BODY r0, r1, r2, r3
245
246 mov pc, lr
247
248ENDPROC(__umodsi3)
249
250ENTRY(__divsi3)
Julius Werner7c6e4892014-01-24 16:23:08 -0800251.global __aeabi_idiv
252__aeabi_idiv:
Gabe Black169c0df2013-10-08 18:24:10 -0700253
254 cmp r1, #0
255 eor ip, r0, r1 @ save the sign of the result.
256 beq Ldiv0
257 rsbmi r1, r1, #0 @ loops below use unsigned.
258 subs r2, r1, #1 @ division by 1 or -1 ?
259 beq 10f
260 movs r3, r0
261 rsbmi r3, r0, #0 @ positive dividend value
262 cmp r3, r1
263 bls 11f
264 tst r1, r2 @ divisor is power of 2 ?
265 beq 12f
266
267 ARM_DIV_BODY r3, r1, r0, r2
268
269 cmp ip, #0
270 rsbmi r0, r0, #0
271 mov pc, lr
272
27310: teq ip, r0 @ same sign ?
274 rsbmi r0, r0, #0
275 mov pc, lr
276
27711: movlo r0, #0
278 moveq r0, ip, asr #31
279 orreq r0, r0, #1
280 mov pc, lr
281
28212: ARM_DIV2_ORDER r1, r2
283
284 cmp ip, #0
285 mov r0, r3, lsr r2
286 rsbmi r0, r0, #0
287 mov pc, lr
288
Julius Werner7c6e4892014-01-24 16:23:08 -0800289.type __aeabi_idiv, %function
290.size __aeabi_idiv, .-__aeabi_idiv
Gabe Black169c0df2013-10-08 18:24:10 -0700291ENDPROC(__divsi3)
Gabe Black169c0df2013-10-08 18:24:10 -0700292
293ENTRY(__modsi3)
294
295 cmp r1, #0
296 beq Ldiv0
297 rsbmi r1, r1, #0 @ loops below use unsigned.
298 movs ip, r0 @ preserve sign of dividend
299 rsbmi r0, r0, #0 @ if negative make positive
300 subs r2, r1, #1 @ compare divisor with 1
301 cmpne r0, r1 @ compare dividend with divisor
302 moveq r0, #0
303 tsthi r1, r2 @ see if divisor is power of 2
304 andeq r0, r0, r2
305 bls 10f
306
307 ARM_MOD_BODY r0, r1, r2, r3
308
30910: cmp ip, #0
310 rsbmi r0, r0, #0
311 mov pc, lr
312
313ENDPROC(__modsi3)
314
315ENTRY(__aeabi_uidivmod)
316
317 stmfd sp!, {r0, r1, ip, lr}
318 bl __aeabi_uidiv
319 ldmfd sp!, {r1, r2, ip, lr}
320 mul r3, r0, r2
321 sub r1, r1, r3
322 mov pc, lr
323
324ENDPROC(__aeabi_uidivmod)
325
326ENTRY(__aeabi_idivmod)
327 stmfd sp!, {r0, r1, ip, lr}
328 bl __aeabi_idiv
329 ldmfd sp!, {r1, r2, ip, lr}
330 mul r3, r0, r2
331 sub r1, r1, r3
332 mov pc, lr
333
334ENDPROC(__aeabi_idivmod)
335
336
337Ldiv0:
338 str lr, [sp, #-8]!
339 bl __div0
340 mov r0, #0 @ About as wrong as it could be.
341 ldr pc, [sp], #8
342ENDPROC(Ldiv0)
343
344#endif