From 6a436e5e453997882b10105880ef33a4322c944f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 30 Sep 2003 10:30:32 +0000 Subject: [PATCH] Nicolas Pitre Nicolas Pitre * arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY and ARM_MOD_BODY. (ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5. (ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5, added better divisor alignment in the other case. (ARM_DIV2_ORDER): added, finds the order of a single bit divisor. (__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the macros above, add fast exits for divisor >= dividend, etc. From-SVN: r71940 --- gcc/ChangeLog | 11 ++ gcc/config/arm/lib1funcs.asm | 339 +++++++++++++++++++++++------------ 2 files changed, 233 insertions(+), 117 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cb968684491..1a505c1652c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2003-09-30 Nicolas Pitre + + * arm/lib1funcs.asm (ARM_DIV_MOD_BODY) split into ARM_DIV_BODY + and ARM_MOD_BODY. + (ARM_MOD_BODY): rewritten. added clz insns for __ARM_ARCH__ >= 5. + (ARM_DIV_BODY): added clz insns for __ARM_ARCH__ >= 5, + added better divisor alignment in the other case. + (ARM_DIV2_ORDER): added, finds the order of a single bit divisor. + (__divsi3, __udivsi3, __modsi3, __umodsi3): rewritten using the + macros above, add fast exits for divisor >= dividend, etc. + 2003-09-30 Nicolas Pitre * arm/ieee754-df.S: split compilation of fixunsdfsi from diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm index 34cf986d03e..fe14070e812 100644 --- a/gcc/config/arm/lib1funcs.asm +++ b/gcc/config/arm/lib1funcs.asm @@ -208,6 +208,7 @@ _L__\name: /* A hook to tell gdb that we've switched to ARM */ .endm #endif +#ifdef __thumb__ /* Register aliases. */ work .req r4 @ XXXX is this safe ? @@ -216,102 +217,180 @@ divisor .req r1 overdone .req r2 result .req r2 curbit .req r3 +#endif #if 0 ip .req r12 sp .req r13 lr .req r14 pc .req r15 #endif + /* ------------------------------------------------------------------------ */ /* Bodies of the division and modulo routines. */ /* ------------------------------------------------------------------------ */ -.macro ARM_DIV_MOD_BODY modulo -LSYM(Loop1): +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + @ Unless the divisor is very big, shift it up in multiples of @ four bits, since this is the amount of unwinding in the main @ division loop. Continue shifting until the divisor is @ larger than the dividend. - cmp divisor, #0x10000000 - cmplo divisor, dividend - movlo divisor, divisor, lsl #4 - movlo curbit, curbit, lsl #4 - blo LSYM(Loop1) +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b -LSYM(Lbignum): @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmplo divisor, dividend - movlo divisor, divisor, lsl #1 - movlo curbit, curbit, lsl #1 - blo LSYM(Lbignum) +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b -LSYM(Loop3): - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend ... + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV2_ORDER divisor, order + +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm +/* ------------------------------------------------------------------------ */ +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order - .if \modulo - @ ... so keep track of which subtractions are done in OVERDONE. - @ We can fix them up afterwards. - mov overdone, #0 - cmp dividend, divisor - subhs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subhs dividend, dividend, divisor, lsr #1 - orrhs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subhs dividend, dividend, divisor, lsr #2 - orrhs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subhs dividend, dividend, divisor, lsr #3 - orrhs overdone, overdone, curbit, ror #3 - mov ip, curbit - .else - @ ... so keep track of which subtractions are done in RESULT. - @ The result will be ok, since the "bit" will have been - @ shifted out at the bottom. - cmp dividend, divisor - subhs dividend, dividend, divisor - orrhs result, result, curbit - cmp dividend, divisor, lsr #1 - subhs dividend, dividend, divisor, lsr #1 - orrhs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subhs dividend, dividend, divisor, lsr #2 - orrhs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subhs dividend, dividend, divisor, lsr #3 - orrhs result, result, curbit, lsr #3 - .endif +#else - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne LSYM(Loop3) + mov \order, #0 - .if \modulo -LSYM(Lfixup_dividend): - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of OVERDONE. Exactly which were not needed - @ are governed by the position of the bit, stored in IP. - ands overdone, overdone, #0xe0000000 - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - tstne ip, #0x7 - beq LSYM(Lgot_result) - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 - .endif + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b -LSYM(Lgot_result): + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: .endm /* ------------------------------------------------------------------------ */ .macro THUMB_DIV_MOD_BODY modulo @@ -495,19 +574,29 @@ LSYM(Lgot_result): RET #else /* ARM version. */ + + subs r2, r1, #1 + RETc(eq) + bcc LSYM(Ldiv0) + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f - cmp divisor, #0 - beq LSYM(Ldiv0) - mov curbit, #1 - mov result, #0 - cmp dividend, divisor - blo LSYM(Lgot_result) + ARM_DIV_BODY r0, r1, r2, r3 - ARM_DIV_MOD_BODY 0 - - mov r0, result + mov r0, r2 RET +11: moveq r0, #1 + movne r0, #0 + RET + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + RET + #endif /* ARM version */ DIV_FUNC_END udivsi3 @@ -537,15 +626,15 @@ LSYM(Lover10): #else /* ARM version. */ - cmp divisor, #0 - beq LSYM(Ldiv0) - cmp divisor, #1 - cmpne dividend, divisor - moveq dividend, #0 - RETc(lo) - mov curbit, #1 + subs r2, r1, #1 @ compare divisor with 1 + bcc LSYM(Ldiv0) + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + RETc(ls) - ARM_DIV_MOD_BODY 1 + ARM_MOD_BODY r0, r1, r2, r3 RET @@ -593,24 +682,41 @@ LSYM(Lover12): #else /* ARM version. */ - eor ip, dividend, divisor @ Save the sign of the result. - mov curbit, #1 - mov result, #0 - cmp divisor, #0 - rsbmi divisor, divisor, #0 @ Loops below use unsigned. + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. beq LSYM(Ldiv0) - cmp dividend, #0 - rsbmi dividend, dividend, #0 - cmp dividend, divisor - blo LSYM(Lgot_result) + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f - ARM_DIV_MOD_BODY 0 + ARM_DIV_BODY r3, r1, r0, r2 - mov r0, result cmp ip, #0 rsbmi r0, r0, #0 RET +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + RET + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + RET + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + RET + #endif /* ARM version */ DIV_FUNC_END divsi3 @@ -653,23 +759,22 @@ LSYM(Lover12): #else /* ARM version. */ - cmp divisor, #0 - rsbmi divisor, divisor, #0 @ Loops below use unsigned. + cmp r1, #0 beq LSYM(Ldiv0) - @ Need to save the sign of the dividend, unfortunately, we need - @ ip later on; this is faster than pushing lr and using that. - str dividend, [sp, #-4]! - cmp dividend, #0 @ Test dividend against zero - rsbmi dividend, dividend, #0 @ If negative make positive - cmp dividend, divisor @ else if zero return zero - blo LSYM(Lgot_result) @ if smaller return dividend - mov curbit, #1 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f - ARM_DIV_MOD_BODY 1 + ARM_MOD_BODY r0, r1, r2, r3 - ldr ip, [sp], #4 - cmp ip, #0 - rsbmi dividend, dividend, #0 +10: cmp ip, #0 + rsbmi r0, r0, #0 RET #endif /* ARM version */