Nicolas Pitre <nico@cam.org>
Nicolas Pitre <nico@cam.org> * arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY and ARM_MOD_BODY. (ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5. (ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5, added better divisor alignment in the other case. (ARM_DIV2_ORDER): added, finds the order of a single bit divisor. (__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the macros above, add fast exits for divisor >= dividend, etc. From-SVN: r71940
This commit is contained in:
parent
47edf59e90
commit
6a436e5e45
@ -1,3 +1,14 @@
|
||||
2003-09-30 Nicolas Pitre <nico@cam.org>
|
||||
|
||||
* arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY
|
||||
and ARM_MOD_BODY.
|
||||
(ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5.
|
||||
(ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5,
|
||||
added better divisor alignment in the other case.
|
||||
(ARM_DIV2_ORDER): added, finds the order of a single bit divisor.
|
||||
(__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the
|
||||
macros above, add fast exits for divisor >= dividend, etc.
|
||||
|
||||
2003-09-30 Nicolas Pitre <nico@cam.org>
|
||||
|
||||
* arm/ieee754-df.S: split compilation of fixunsdfsi from
|
||||
|
@ -208,6 +208,7 @@ _L__\name: /* A hook to tell gdb that we've switched to ARM */
|
||||
.endm
|
||||
#endif
|
||||
|
||||
#ifdef __thumb__
|
||||
/* Register aliases. */
|
||||
|
||||
work .req r4 @ XXXX is this safe ?
|
||||
@ -216,102 +217,180 @@ divisor .req r1
|
||||
overdone .req r2
|
||||
result .req r2
|
||||
curbit .req r3
|
||||
#endif
|
||||
#if 0
|
||||
ip .req r12
|
||||
sp .req r13
|
||||
lr .req r14
|
||||
pc .req r15
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------ */
|
||||
/* Bodies of the division and modulo routines. */
|
||||
/* ------------------------------------------------------------------------ */
|
||||
.macro ARM_DIV_MOD_BODY modulo
|
||||
LSYM(Loop1):
|
||||
.macro ARM_DIV_BODY dividend, divisor, result, curbit
|
||||
|
||||
#if __ARM_ARCH__ >= 5
|
||||
|
||||
clz \curbit, \divisor
|
||||
clz \result, \dividend
|
||||
sub \result, \curbit, \result
|
||||
mov \curbit, #1
|
||||
mov \divisor, \divisor, lsl \result
|
||||
mov \curbit, \curbit, lsl \result
|
||||
mov \result, #0
|
||||
|
||||
#else
|
||||
|
||||
@ Initially shift the divisor left 3 bits if possible,
|
||||
@ set curbit accordingly. This allows for curbit to be located
|
||||
@ at the left end of each 4 bit nibbles in the division loop
|
||||
@ to save one loop in most cases.
|
||||
tst \divisor, #0xe0000000
|
||||
moveq \divisor, \divisor, lsl #3
|
||||
moveq \curbit, #8
|
||||
movne \curbit, #1
|
||||
|
||||
@ Unless the divisor is very big, shift it up in multiples of
|
||||
@ four bits, since this is the amount of unwinding in the main
|
||||
@ division loop. Continue shifting until the divisor is
|
||||
@ larger than the dividend.
|
||||
cmp divisor, #0x10000000
|
||||
cmplo divisor, dividend
|
||||
movlo divisor, divisor, lsl #4
|
||||
movlo curbit, curbit, lsl #4
|
||||
blo LSYM(Loop1)
|
||||
1: cmp \divisor, #0x10000000
|
||||
cmplo \divisor, \dividend
|
||||
movlo \divisor, \divisor, lsl #4
|
||||
movlo \curbit, \curbit, lsl #4
|
||||
blo 1b
|
||||
|
||||
LSYM(Lbignum):
|
||||
@ For very big divisors, we must shift it a bit at a time, or
|
||||
@ we will be in danger of overflowing.
|
||||
cmp divisor, #0x80000000
|
||||
cmplo divisor, dividend
|
||||
movlo divisor, divisor, lsl #1
|
||||
movlo curbit, curbit, lsl #1
|
||||
blo LSYM(Lbignum)
|
||||
1: cmp \divisor, #0x80000000
|
||||
cmplo \divisor, \dividend
|
||||
movlo \divisor, \divisor, lsl #1
|
||||
movlo \curbit, \curbit, lsl #1
|
||||
blo 1b
|
||||
|
||||
LSYM(Loop3):
|
||||
@ Test for possible subtractions. On the final pass, this may
|
||||
@ subtract too much from the dividend ...
|
||||
mov \result, #0
|
||||
|
||||
#endif
|
||||
|
||||
@ Division loop
|
||||
1: cmp \dividend, \divisor
|
||||
subhs \dividend, \dividend, \divisor
|
||||
orrhs \result, \result, \curbit
|
||||
cmp \dividend, \divisor, lsr #1
|
||||
subhs \dividend, \dividend, \divisor, lsr #1
|
||||
orrhs \result, \result, \curbit, lsr #1
|
||||
cmp \dividend, \divisor, lsr #2
|
||||
subhs \dividend, \dividend, \divisor, lsr #2
|
||||
orrhs \result, \result, \curbit, lsr #2
|
||||
cmp \dividend, \divisor, lsr #3
|
||||
subhs \dividend, \dividend, \divisor, lsr #3
|
||||
orrhs \result, \result, \curbit, lsr #3
|
||||
cmp \dividend, #0 @ Early termination?
|
||||
movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
|
||||
movne \divisor, \divisor, lsr #4
|
||||
bne 1b
|
||||
|
||||
.endm
|
||||
/* ------------------------------------------------------------------------ */
|
||||
.macro ARM_DIV2_ORDER divisor, order
|
||||
|
||||
#if __ARM_ARCH__ >= 5
|
||||
|
||||
clz \order, \divisor
|
||||
rsb \order, \order, #31
|
||||
|
||||
#else
|
||||
|
||||
cmp \divisor, #(1 << 16)
|
||||
movhs \divisor, \divisor, lsr #16
|
||||
movhs \order, #16
|
||||
movlo \order, #0
|
||||
|
||||
cmp \divisor, #(1 << 8)
|
||||
movhs \divisor, \divisor, lsr #8
|
||||
addhs \order, \order, #8
|
||||
|
||||
cmp \divisor, #(1 << 4)
|
||||
movhs \divisor, \divisor, lsr #4
|
||||
addhs \order, \order, #4
|
||||
|
||||
cmp \divisor, #(1 << 2)
|
||||
addhi \order, \order, #3
|
||||
addls \order, \order, \divisor, lsr #1
|
||||
|
||||
#endif
|
||||
|
||||
.endm
|
||||
/* ------------------------------------------------------------------------ */
|
||||
.macro ARM_MOD_BODY dividend, divisor, order, spare
|
||||
|
||||
#if __ARM_ARCH__ >= 5
|
||||
|
||||
clz \order, \divisor
|
||||
clz \spare, \dividend
|
||||
sub \order, \order, \spare
|
||||
mov \divisor, \divisor, lsl \order
|
||||
|
||||
.if \modulo
|
||||
@ ... so keep track of which subtractions are done in OVERDONE.
|
||||
@ We can fix them up afterwards.
|
||||
mov overdone, #0
|
||||
cmp dividend, divisor
|
||||
subhs dividend, dividend, divisor
|
||||
cmp dividend, divisor, lsr #1
|
||||
subhs dividend, dividend, divisor, lsr #1
|
||||
orrhs overdone, overdone, curbit, ror #1
|
||||
cmp dividend, divisor, lsr #2
|
||||
subhs dividend, dividend, divisor, lsr #2
|
||||
orrhs overdone, overdone, curbit, ror #2
|
||||
cmp dividend, divisor, lsr #3
|
||||
subhs dividend, dividend, divisor, lsr #3
|
||||
orrhs overdone, overdone, curbit, ror #3
|
||||
mov ip, curbit
|
||||
.else
|
||||
@ ... so keep track of which subtractions are done in RESULT.
|
||||
@ The result will be ok, since the "bit" will have been
|
||||
@ shifted out at the bottom.
|
||||
cmp dividend, divisor
|
||||
subhs dividend, dividend, divisor
|
||||
orrhs result, result, curbit
|
||||
cmp dividend, divisor, lsr #1
|
||||
subhs dividend, dividend, divisor, lsr #1
|
||||
orrhs result, result, curbit, lsr #1
|
||||
cmp dividend, divisor, lsr #2
|
||||
subhs dividend, dividend, divisor, lsr #2
|
||||
orrhs result, result, curbit, lsr #2
|
||||
cmp dividend, divisor, lsr #3
|
||||
subhs dividend, dividend, divisor, lsr #3
|
||||
orrhs result, result, curbit, lsr #3
|
||||
.endif
|
||||
#else
|
||||
|
||||
cmp dividend, #0 @ Early termination?
|
||||
movnes curbit, curbit, lsr #4 @ No, any more bits to do?
|
||||
movne divisor, divisor, lsr #4
|
||||
bne LSYM(Loop3)
|
||||
mov \order, #0
|
||||
|
||||
.if \modulo
|
||||
LSYM(Lfixup_dividend):
|
||||
@ Any subtractions that we should not have done will be recorded in
|
||||
@ the top three bits of OVERDONE. Exactly which were not needed
|
||||
@ are governed by the position of the bit, stored in IP.
|
||||
ands overdone, overdone, #0xe0000000
|
||||
@ If we terminated early, because dividend became zero, then the
|
||||
@ bit in ip will not be in the bottom nibble, and we should not
|
||||
@ perform the additions below. We must test for this though
|
||||
@ (rather relying upon the TSTs to prevent the additions) since
|
||||
@ the bit in ip could be in the top two bits which might then match
|
||||
@ with one of the smaller RORs.
|
||||
tstne ip, #0x7
|
||||
beq LSYM(Lgot_result)
|
||||
tst overdone, ip, ror #3
|
||||
addne dividend, dividend, divisor, lsr #3
|
||||
tst overdone, ip, ror #2
|
||||
addne dividend, dividend, divisor, lsr #2
|
||||
tst overdone, ip, ror #1
|
||||
addne dividend, dividend, divisor, lsr #1
|
||||
.endif
|
||||
@ Unless the divisor is very big, shift it up in multiples of
|
||||
@ four bits, since this is the amount of unwinding in the main
|
||||
@ division loop. Continue shifting until the divisor is
|
||||
@ larger than the dividend.
|
||||
1: cmp \divisor, #0x10000000
|
||||
cmplo \divisor, \dividend
|
||||
movlo \divisor, \divisor, lsl #4
|
||||
addlo \order, \order, #4
|
||||
blo 1b
|
||||
|
||||
LSYM(Lgot_result):
|
||||
@ For very big divisors, we must shift it a bit at a time, or
|
||||
@ we will be in danger of overflowing.
|
||||
1: cmp \divisor, #0x80000000
|
||||
cmplo \divisor, \dividend
|
||||
movlo \divisor, \divisor, lsl #1
|
||||
addlo \order, \order, #1
|
||||
blo 1b
|
||||
|
||||
#endif
|
||||
|
||||
@ Perform all needed substractions to keep only the reminder.
|
||||
@ Do comparisons in batch of 4 first.
|
||||
subs \order, \order, #3 @ yes, 3 is intended here
|
||||
blt 2f
|
||||
|
||||
1: cmp \dividend, \divisor
|
||||
subhs \dividend, \dividend, \divisor
|
||||
cmp \dividend, \divisor, lsr #1
|
||||
subhs \dividend, \dividend, \divisor, lsr #1
|
||||
cmp \dividend, \divisor, lsr #2
|
||||
subhs \dividend, \dividend, \divisor, lsr #2
|
||||
cmp \dividend, \divisor, lsr #3
|
||||
subhs \dividend, \dividend, \divisor, lsr #3
|
||||
cmp \dividend, #1
|
||||
mov \divisor, \divisor, lsr #4
|
||||
subges \order, \order, #4
|
||||
bge 1b
|
||||
|
||||
tst \order, #3
|
||||
teqne \dividend, #0
|
||||
beq 5f
|
||||
|
||||
@ Either 1, 2 or 3 comparison/substractions are left.
|
||||
2: cmn \order, #2
|
||||
blt 4f
|
||||
beq 3f
|
||||
cmp \dividend, \divisor
|
||||
subhs \dividend, \dividend, \divisor
|
||||
mov \divisor, \divisor, lsr #1
|
||||
3: cmp \dividend, \divisor
|
||||
subhs \dividend, \dividend, \divisor
|
||||
mov \divisor, \divisor, lsr #1
|
||||
4: cmp \dividend, \divisor
|
||||
subhs \dividend, \dividend, \divisor
|
||||
5:
|
||||
.endm
|
||||
/* ------------------------------------------------------------------------ */
|
||||
.macro THUMB_DIV_MOD_BODY modulo
|
||||
@ -495,19 +574,29 @@ LSYM(Lgot_result):
|
||||
RET
|
||||
|
||||
#else /* ARM version. */
|
||||
|
||||
subs r2, r1, #1
|
||||
RETc(eq)
|
||||
bcc LSYM(Ldiv0)
|
||||
cmp r0, r1
|
||||
bls 11f
|
||||
tst r1, r2
|
||||
beq 12f
|
||||
|
||||
cmp divisor, #0
|
||||
beq LSYM(Ldiv0)
|
||||
mov curbit, #1
|
||||
mov result, #0
|
||||
cmp dividend, divisor
|
||||
blo LSYM(Lgot_result)
|
||||
ARM_DIV_BODY r0, r1, r2, r3
|
||||
|
||||
ARM_DIV_MOD_BODY 0
|
||||
|
||||
mov r0, result
|
||||
mov r0, r2
|
||||
RET
|
||||
|
||||
11: moveq r0, #1
|
||||
movne r0, #0
|
||||
RET
|
||||
|
||||
12: ARM_DIV2_ORDER r1, r2
|
||||
|
||||
mov r0, r0, lsr r2
|
||||
RET
|
||||
|
||||
#endif /* ARM version */
|
||||
|
||||
DIV_FUNC_END udivsi3
|
||||
@ -537,15 +626,15 @@ LSYM(Lover10):
|
||||
|
||||
#else /* ARM version. */
|
||||
|
||||
cmp divisor, #0
|
||||
beq LSYM(Ldiv0)
|
||||
cmp divisor, #1
|
||||
cmpne dividend, divisor
|
||||
moveq dividend, #0
|
||||
RETc(lo)
|
||||
mov curbit, #1
|
||||
subs r2, r1, #1 @ compare divisor with 1
|
||||
bcc LSYM(Ldiv0)
|
||||
cmpne r0, r1 @ compare dividend with divisor
|
||||
moveq r0, #0
|
||||
tsthi r1, r2 @ see if divisor is power of 2
|
||||
andeq r0, r0, r2
|
||||
RETc(ls)
|
||||
|
||||
ARM_DIV_MOD_BODY 1
|
||||
ARM_MOD_BODY r0, r1, r2, r3
|
||||
|
||||
RET
|
||||
|
||||
@ -593,24 +682,41 @@ LSYM(Lover12):
|
||||
|
||||
#else /* ARM version. */
|
||||
|
||||
eor ip, dividend, divisor @ Save the sign of the result.
|
||||
mov curbit, #1
|
||||
mov result, #0
|
||||
cmp divisor, #0
|
||||
rsbmi divisor, divisor, #0 @ Loops below use unsigned.
|
||||
cmp r1, #0
|
||||
eor ip, r0, r1 @ save the sign of the result.
|
||||
beq LSYM(Ldiv0)
|
||||
cmp dividend, #0
|
||||
rsbmi dividend, dividend, #0
|
||||
cmp dividend, divisor
|
||||
blo LSYM(Lgot_result)
|
||||
rsbmi r1, r1, #0 @ loops below use unsigned.
|
||||
subs r2, r1, #1 @ division by 1 or -1 ?
|
||||
beq 10f
|
||||
movs r3, r0
|
||||
rsbmi r3, r0, #0 @ positive dividend value
|
||||
cmp r3, r1
|
||||
bls 11f
|
||||
tst r1, r2 @ divisor is power of 2 ?
|
||||
beq 12f
|
||||
|
||||
ARM_DIV_MOD_BODY 0
|
||||
ARM_DIV_BODY r3, r1, r0, r2
|
||||
|
||||
mov r0, result
|
||||
cmp ip, #0
|
||||
rsbmi r0, r0, #0
|
||||
RET
|
||||
|
||||
10: teq ip, r0 @ same sign ?
|
||||
rsbmi r0, r0, #0
|
||||
RET
|
||||
|
||||
11: movlo r0, #0
|
||||
moveq r0, ip, asr #31
|
||||
orreq r0, r0, #1
|
||||
RET
|
||||
|
||||
12: ARM_DIV2_ORDER r1, r2
|
||||
|
||||
cmp ip, #0
|
||||
mov r0, r3, lsr r2
|
||||
rsbmi r0, r0, #0
|
||||
RET
|
||||
|
||||
#endif /* ARM version */
|
||||
|
||||
DIV_FUNC_END divsi3
|
||||
@ -653,23 +759,22 @@ LSYM(Lover12):
|
||||
|
||||
#else /* ARM version. */
|
||||
|
||||
cmp divisor, #0
|
||||
rsbmi divisor, divisor, #0 @ Loops below use unsigned.
|
||||
cmp r1, #0
|
||||
beq LSYM(Ldiv0)
|
||||
@ Need to save the sign of the dividend, unfortunately, we need
|
||||
@ ip later on; this is faster than pushing lr and using that.
|
||||
str dividend, [sp, #-4]!
|
||||
cmp dividend, #0 @ Test dividend against zero
|
||||
rsbmi dividend, dividend, #0 @ If negative make positive
|
||||
cmp dividend, divisor @ else if zero return zero
|
||||
blo LSYM(Lgot_result) @ if smaller return dividend
|
||||
mov curbit, #1
|
||||
rsbmi r1, r1, #0 @ loops below use unsigned.
|
||||
movs ip, r0 @ preserve sign of dividend
|
||||
rsbmi r0, r0, #0 @ if negative make positive
|
||||
subs r2, r1, #1 @ compare divisor with 1
|
||||
cmpne r0, r1 @ compare dividend with divisor
|
||||
moveq r0, #0
|
||||
tsthi r1, r2 @ see if divisor is power of 2
|
||||
andeq r0, r0, r2
|
||||
bls 10f
|
||||
|
||||
ARM_DIV_MOD_BODY 1
|
||||
ARM_MOD_BODY r0, r1, r2, r3
|
||||
|
||||
ldr ip, [sp], #4
|
||||
cmp ip, #0
|
||||
rsbmi dividend, dividend, #0
|
||||
10: cmp ip, #0
|
||||
rsbmi r0, r0, #0
|
||||
RET
|
||||
|
||||
#endif /* ARM version */
|
||||
|
Loading…
Reference in New Issue
Block a user