Nicolas Pitre <nico@cam.org>

Nicolas Pitre <nico@cam.org>
* arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY
and ARM_MOD_BODY.
(ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5.
(ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5,
added better divisor alignment in the other case.
(ARM_DIV2_ORDER): added, finds the order of a single bit divisor.
(__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the
macros above, add fast exits for divisor >= dividend, etc.

From-SVN: r71940
This commit is contained in:
Nicolas Pitre 2003-09-30 10:30:32 +00:00 committed by Richard Earnshaw
parent 47edf59e90
commit 6a436e5e45
2 changed files with 233 additions and 117 deletions

View File

@ -1,3 +1,14 @@
2003-09-30 Nicolas Pitre <nico@cam.org>
* arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY
and ARM_MOD_BODY.
(ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5.
(ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5,
added better divisor alignment in the other case.
(ARM_DIV2_ORDER): added, finds the order of a single bit divisor.
(__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the
macros above, add fast exits for divisor >= dividend, etc.
2003-09-30 Nicolas Pitre <nico@cam.org>
* arm/ieee754-df.S: split compilation of fixunsdfsi from

View File

@ -208,6 +208,7 @@ _L__\name: /* A hook to tell gdb that we've switched to ARM */
.endm
#endif
#ifdef __thumb__
/* Register aliases. */
work .req r4 @ XXXX is this safe ?
@ -216,102 +217,180 @@ divisor .req r1
overdone .req r2
result .req r2
curbit .req r3
#endif
#if 0
ip .req r12
sp .req r13
lr .req r14
pc .req r15
#endif
/* ------------------------------------------------------------------------ */
/* Bodies of the division and modulo routines. */
/* ------------------------------------------------------------------------ */
.macro ARM_DIV_MOD_BODY modulo
LSYM(Loop1):
.macro ARM_DIV_BODY dividend, divisor, result, curbit
#if __ARM_ARCH__ >= 5
clz \curbit, \divisor
clz \result, \dividend
sub \result, \curbit, \result
mov \curbit, #1
mov \divisor, \divisor, lsl \result
mov \curbit, \curbit, lsl \result
mov \result, #0
#else
@ Initially shift the divisor left 3 bits if possible,
@ set curbit accordingly. This allows for curbit to be located
@ at the left end of each 4 bit nibbles in the division loop
@ to save one loop in most cases.
tst \divisor, #0xe0000000
moveq \divisor, \divisor, lsl #3
moveq \curbit, #8
movne \curbit, #1
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
cmp divisor, #0x10000000
cmplo divisor, dividend
movlo divisor, divisor, lsl #4
movlo curbit, curbit, lsl #4
blo LSYM(Loop1)
1: cmp \divisor, #0x10000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
movlo \curbit, \curbit, lsl #4
blo 1b
LSYM(Lbignum):
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
cmp divisor, #0x80000000
cmplo divisor, dividend
movlo divisor, divisor, lsl #1
movlo curbit, curbit, lsl #1
blo LSYM(Lbignum)
1: cmp \divisor, #0x80000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
movlo \curbit, \curbit, lsl #1
blo 1b
LSYM(Loop3):
@ Test for possible subtractions. On the final pass, this may
@ subtract too much from the dividend ...
mov \result, #0
#endif
@ Division loop
1: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
orrhs \result, \result, \curbit
cmp \dividend, \divisor, lsr #1
subhs \dividend, \dividend, \divisor, lsr #1
orrhs \result, \result, \curbit, lsr #1
cmp \dividend, \divisor, lsr #2
subhs \dividend, \dividend, \divisor, lsr #2
orrhs \result, \result, \curbit, lsr #2
cmp \dividend, \divisor, lsr #3
subhs \dividend, \dividend, \divisor, lsr #3
orrhs \result, \result, \curbit, lsr #3
cmp \dividend, #0 @ Early termination?
movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
movne \divisor, \divisor, lsr #4
bne 1b
.endm
/* ------------------------------------------------------------------------ */
.macro ARM_DIV2_ORDER divisor, order
#if __ARM_ARCH__ >= 5
clz \order, \divisor
rsb \order, \order, #31
#else
cmp \divisor, #(1 << 16)
movhs \divisor, \divisor, lsr #16
movhs \order, #16
movlo \order, #0
cmp \divisor, #(1 << 8)
movhs \divisor, \divisor, lsr #8
addhs \order, \order, #8
cmp \divisor, #(1 << 4)
movhs \divisor, \divisor, lsr #4
addhs \order, \order, #4
cmp \divisor, #(1 << 2)
addhi \order, \order, #3
addls \order, \order, \divisor, lsr #1
#endif
.endm
/* ------------------------------------------------------------------------ */
.macro ARM_MOD_BODY dividend, divisor, order, spare
#if __ARM_ARCH__ >= 5
clz \order, \divisor
clz \spare, \dividend
sub \order, \order, \spare
mov \divisor, \divisor, lsl \order
.if \modulo
@ ... so keep track of which subtractions are done in OVERDONE.
@ We can fix them up afterwards.
mov overdone, #0
cmp dividend, divisor
subhs dividend, dividend, divisor
cmp dividend, divisor, lsr #1
subhs dividend, dividend, divisor, lsr #1
orrhs overdone, overdone, curbit, ror #1
cmp dividend, divisor, lsr #2
subhs dividend, dividend, divisor, lsr #2
orrhs overdone, overdone, curbit, ror #2
cmp dividend, divisor, lsr #3
subhs dividend, dividend, divisor, lsr #3
orrhs overdone, overdone, curbit, ror #3
mov ip, curbit
.else
@ ... so keep track of which subtractions are done in RESULT.
@ The result will be ok, since the "bit" will have been
@ shifted out at the bottom.
cmp dividend, divisor
subhs dividend, dividend, divisor
orrhs result, result, curbit
cmp dividend, divisor, lsr #1
subhs dividend, dividend, divisor, lsr #1
orrhs result, result, curbit, lsr #1
cmp dividend, divisor, lsr #2
subhs dividend, dividend, divisor, lsr #2
orrhs result, result, curbit, lsr #2
cmp dividend, divisor, lsr #3
subhs dividend, dividend, divisor, lsr #3
orrhs result, result, curbit, lsr #3
.endif
#else
cmp dividend, #0 @ Early termination?
movnes curbit, curbit, lsr #4 @ No, any more bits to do?
movne divisor, divisor, lsr #4
bne LSYM(Loop3)
mov \order, #0
.if \modulo
LSYM(Lfixup_dividend):
@ Any subtractions that we should not have done will be recorded in
@ the top three bits of OVERDONE. Exactly which were not needed
@ are governed by the position of the bit, stored in IP.
ands overdone, overdone, #0xe0000000
@ If we terminated early, because dividend became zero, then the
@ bit in ip will not be in the bottom nibble, and we should not
@ perform the additions below. We must test for this though
@ (rather relying upon the TSTs to prevent the additions) since
@ the bit in ip could be in the top two bits which might then match
@ with one of the smaller RORs.
tstne ip, #0x7
beq LSYM(Lgot_result)
tst overdone, ip, ror #3
addne dividend, dividend, divisor, lsr #3
tst overdone, ip, ror #2
addne dividend, dividend, divisor, lsr #2
tst overdone, ip, ror #1
addne dividend, dividend, divisor, lsr #1
.endif
@ Unless the divisor is very big, shift it up in multiples of
@ four bits, since this is the amount of unwinding in the main
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1: cmp \divisor, #0x10000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
addlo \order, \order, #4
blo 1b
LSYM(Lgot_result):
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1: cmp \divisor, #0x80000000
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
addlo \order, \order, #1
blo 1b
#endif
@ Perform all needed substractions to keep only the reminder.
@ Do comparisons in batch of 4 first.
subs \order, \order, #3 @ yes, 3 is intended here
blt 2f
1: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
cmp \dividend, \divisor, lsr #1
subhs \dividend, \dividend, \divisor, lsr #1
cmp \dividend, \divisor, lsr #2
subhs \dividend, \dividend, \divisor, lsr #2
cmp \dividend, \divisor, lsr #3
subhs \dividend, \dividend, \divisor, lsr #3
cmp \dividend, #1
mov \divisor, \divisor, lsr #4
subges \order, \order, #4
bge 1b
tst \order, #3
teqne \dividend, #0
beq 5f
@ Either 1, 2 or 3 comparison/substractions are left.
2: cmn \order, #2
blt 4f
beq 3f
cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
3: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
4: cmp \dividend, \divisor
subhs \dividend, \dividend, \divisor
5:
.endm
/* ------------------------------------------------------------------------ */
.macro THUMB_DIV_MOD_BODY modulo
@ -495,19 +574,29 @@ LSYM(Lgot_result):
RET
#else /* ARM version. */
subs r2, r1, #1
RETc(eq)
bcc LSYM(Ldiv0)
cmp r0, r1
bls 11f
tst r1, r2
beq 12f
cmp divisor, #0
beq LSYM(Ldiv0)
mov curbit, #1
mov result, #0
cmp dividend, divisor
blo LSYM(Lgot_result)
ARM_DIV_BODY r0, r1, r2, r3
ARM_DIV_MOD_BODY 0
mov r0, result
mov r0, r2
RET
11: moveq r0, #1
movne r0, #0
RET
12: ARM_DIV2_ORDER r1, r2
mov r0, r0, lsr r2
RET
#endif /* ARM version */
DIV_FUNC_END udivsi3
@ -537,15 +626,15 @@ LSYM(Lover10):
#else /* ARM version. */
cmp divisor, #0
beq LSYM(Ldiv0)
cmp divisor, #1
cmpne dividend, divisor
moveq dividend, #0
RETc(lo)
mov curbit, #1
subs r2, r1, #1 @ compare divisor with 1
bcc LSYM(Ldiv0)
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
RETc(ls)
ARM_DIV_MOD_BODY 1
ARM_MOD_BODY r0, r1, r2, r3
RET
@ -593,24 +682,41 @@ LSYM(Lover12):
#else /* ARM version. */
eor ip, dividend, divisor @ Save the sign of the result.
mov curbit, #1
mov result, #0
cmp divisor, #0
rsbmi divisor, divisor, #0 @ Loops below use unsigned.
cmp r1, #0
eor ip, r0, r1 @ save the sign of the result.
beq LSYM(Ldiv0)
cmp dividend, #0
rsbmi dividend, dividend, #0
cmp dividend, divisor
blo LSYM(Lgot_result)
rsbmi r1, r1, #0 @ loops below use unsigned.
subs r2, r1, #1 @ division by 1 or -1 ?
beq 10f
movs r3, r0
rsbmi r3, r0, #0 @ positive dividend value
cmp r3, r1
bls 11f
tst r1, r2 @ divisor is power of 2 ?
beq 12f
ARM_DIV_MOD_BODY 0
ARM_DIV_BODY r3, r1, r0, r2
mov r0, result
cmp ip, #0
rsbmi r0, r0, #0
RET
10: teq ip, r0 @ same sign ?
rsbmi r0, r0, #0
RET
11: movlo r0, #0
moveq r0, ip, asr #31
orreq r0, r0, #1
RET
12: ARM_DIV2_ORDER r1, r2
cmp ip, #0
mov r0, r3, lsr r2
rsbmi r0, r0, #0
RET
#endif /* ARM version */
DIV_FUNC_END divsi3
@ -653,23 +759,22 @@ LSYM(Lover12):
#else /* ARM version. */
cmp divisor, #0
rsbmi divisor, divisor, #0 @ Loops below use unsigned.
cmp r1, #0
beq LSYM(Ldiv0)
@ Need to save the sign of the dividend, unfortunately, we need
@ ip later on; this is faster than pushing lr and using that.
str dividend, [sp, #-4]!
cmp dividend, #0 @ Test dividend against zero
rsbmi dividend, dividend, #0 @ If negative make positive
cmp dividend, divisor @ else if zero return zero
blo LSYM(Lgot_result) @ if smaller return dividend
mov curbit, #1
rsbmi r1, r1, #0 @ loops below use unsigned.
movs ip, r0 @ preserve sign of dividend
rsbmi r0, r0, #0 @ if negative make positive
subs r2, r1, #1 @ compare divisor with 1
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
bls 10f
ARM_DIV_MOD_BODY 1
ARM_MOD_BODY r0, r1, r2, r3
ldr ip, [sp], #4
cmp ip, #0
rsbmi dividend, dividend, #0
10: cmp ip, #0
rsbmi r0, r0, #0
RET
#endif /* ARM version */