2ecc7cad44
From-SVN: r10059
1600 lines
30 KiB
NASM
1600 lines
30 KiB
NASM
@ libgcc1 routines for ARM cpu.
|
|
@ Division and remainder, from Appendix E of the Sparc Version 8
|
|
@ Architecture Manual, with fixes from Gordon Irlam.
|
|
@ Rewritten for the ARM by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
|
|
|
|
/* Copyright (C) 1995 Free Software Foundation, Inc.
|
|
|
|
This file is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation; either version 2, or (at your option) any
|
|
later version.
|
|
|
|
In addition to the permissions in the GNU General Public License, the
|
|
Free Software Foundation gives you unlimited permission to link the
|
|
compiled version of this file with other programs, and to distribute
|
|
those programs without any restriction coming from the use of this
|
|
file. (The General Public License restrictions do apply in other
|
|
respects; for example, they cover modification of the file, and
|
|
distribution when not linked into another program.)
|
|
|
|
This file is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING. If not, write to
|
|
the Free Software Foundation, 59 Temple Place - Suite 330,
|
|
Boston, MA 02111-1307, USA. */
|
|
|
|
/* As a special exception, if you link this library with other files,
|
|
some of which are compiled with GCC, to produce an executable,
|
|
this library does not by itself cause the resulting executable
|
|
to be covered by the GNU General Public License.
|
|
This exception does not however invalidate any other reasons why
|
|
the executable file might be covered by the GNU General Public License. */
|
|
|
|
/*
|
|
* Input: dividend and divisor in r0 and r1 respectively.
|
|
*
|
|
* m4 parameters:
|
|
* NAME name of function to generate
|
|
* OP OP=div => r0 / r1; OP=mod => r0 % r1
|
|
* S S=true => signed; S=false => unsigned
|
|
*
|
|
* Algorithm parameters:
|
|
* N how many bits per iteration we try to get (4)
|
|
* WORDSIZE total number of bits (32)
|
|
*
|
|
* Derived constants:
|
|
* TOPBITS number of bits in the top `decade' of a number
|
|
*
|
|
* Important variables:
|
|
* Q the partial quotient under development (initially 0)
|
|
* R the remainder so far, initially the dividend
|
|
* ITER number of main division loop iterations required;
|
|
* equal to ceil(log2(quotient) / N). Note that this
|
|
* is the log base (2^N) of the quotient.
|
|
* V the current comparand, initially divisor*2^(ITER*N-1)
|
|
*
|
|
* Cost:
|
|
* Current estimate for non-large dividend is
|
|
* ceil(log2(quotient) / N) * (10 + 7N/2) + C
|
|
* A large dividend is one greater than 2^(31-TOPBITS) and takes a
|
|
* different path, as the upper bits of the quotient must be developed
|
|
* one bit at a time.
|
|
*/
|
|
|
|
/*
|
|
define(N, `4')dnl
|
|
define(WORDSIZE, `32')dnl
|
|
define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N)))dnl
|
|
dnl
|
|
define(dividend, `r0')dnl
|
|
define(divisor, `r1')dnl
|
|
define(Q, `r2')dnl
|
|
define(R, `r3')dnl
|
|
define(ITER, `ip')dnl
|
|
define(V, `lr')dnl
|
|
dnl
|
|
dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
|
|
define(T, `r4')dnl
|
|
define(SC, `r5')dnl
|
|
ifelse(S, `true', `define(SIGN, `r6')')dnl
|
|
define(REGLIST, `ifelse(S, `true', `{r4, r5, r6,', `{r4, r5,')')dnl
|
|
define(ret, `ldmia sp!, REGLIST pc}')dnl
|
|
dnl
|
|
dnl This is the recursive definition for developing quotient digits.
|
|
dnl
|
|
dnl Parameters:
|
|
dnl $1 the current depth, 1 <= $1 <= N
|
|
dnl $2 the current accumulation of quotient bits
|
|
dnl N max depth
|
|
dnl
|
|
dnl We add a new bit to $2 and either recurse or insert the bits in
|
|
dnl the quotient. R, Q, and V are inputs and outputs as defined above;
|
|
dnl the condition codes are expected to reflect the input R, and are
|
|
dnl modified to reflect the output R.
|
|
dnl
|
|
define(DEVELOP_QUOTIENT_BITS,
|
|
` @ depth $1, accumulated bits $2
|
|
mov V, V, lsr #1
|
|
blt L.$1.eval(2^N+$2+999)
|
|
@ remainder is positive
|
|
subs R, R, V
|
|
ifelse($1, N,
|
|
` ifelse(eval(2*$2+1<0), `0',
|
|
`add Q, Q, `#'eval($2*2+1)',
|
|
`sub Q, Q, `#'eval(-($2*2+1))')
|
|
|
|
b 9f
|
|
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
|
|
L.$1.eval(2^N+$2+999):
|
|
@ remainder is negative
|
|
adds R, R, V
|
|
ifelse($1, N,
|
|
` ifelse(eval(2*$2-1<0), `0',
|
|
`add Q, Q, `#'eval($2*2-1)',
|
|
`sub Q, Q, `#'eval(-($2*2-1))')
|
|
b 9f
|
|
|
|
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
|
|
ifelse($1, 1, `9:')')dnl
|
|
|
|
#include "trap.h"
|
|
|
|
ip .req r12
|
|
sp .req r13
|
|
lr .req r14
|
|
pc .req r15
|
|
.text
|
|
.globl NAME
|
|
.align 0
|
|
NAME:
|
|
stmdb sp!, REGLIST lr}
|
|
ifelse(S, `true',
|
|
` @ compute sign of result; if neither is negative, no problem
|
|
ifelse(OP, `div', `eor SIGN, divisor, dividend @ compute sign',
|
|
`mov SIGN, dividend')
|
|
cmp divisor, #0
|
|
rsbmi divisor, divisor, #0
|
|
beq Ldiv_zero
|
|
mov V, divisor
|
|
movs R, dividend
|
|
rsbmi R, R, #0 @ make dividend nonnegative
|
|
',
|
|
` @ Ready to divide. Compute size of quotient; scale comparand.
|
|
movs V, divisor
|
|
mov R, dividend
|
|
beq Ldiv_zero
|
|
')
|
|
|
|
cmp R, V @ if divisor exceeds dividend, done
|
|
mov Q, #0
|
|
bcc Lgot_result @ (and algorithm fails otherwise)
|
|
mov T, `#'(1 << (WORDSIZE - TOPBITS - 1))
|
|
cmp R, T
|
|
mov ITER, #0
|
|
bcc Lnot_really_big
|
|
|
|
@ `Here the dividend is >= 2^(31-N) or so. We must be careful here,
|
|
@ as our usual N-at-a-shot divide step will cause overflow and havoc.
|
|
@ The number of bits in the result here is N*ITER+SC, where SC <= N.
|
|
@ Compute ITER in an unorthodox manner: know we need to shift V into
|
|
@ the top decade: so do not even bother to compare to R.'
|
|
mov SC, #1
|
|
1:
|
|
cmp V, T
|
|
bcs 3f
|
|
mov V, V, lsl `#'N
|
|
add ITER, ITER, #1
|
|
b 1b
|
|
|
|
@ Now compute SC.
|
|
2: adds V, V, V
|
|
add SC, SC, #1
|
|
bcc Lnot_too_big
|
|
|
|
@ We get here if the divisor overflowed while shifting.
|
|
@ This means that R has the high-order bit set.
|
|
@ Restore V and subtract from R.
|
|
mov T, T, lsl `#'TOPBITS
|
|
mov V, V, lsr #1
|
|
add V, T, V
|
|
sub SC, SC, #1
|
|
b Ldo_single_div
|
|
|
|
Lnot_too_big:
|
|
3: cmp V, R
|
|
bcc 2b
|
|
@ beq Ldo_single_div
|
|
|
|
/-* NB: these are commented out in the V8-Sparc manual as well *-/
|
|
/-* (I do not understand this) *-/
|
|
@ V > R: went too far: back up 1 step
|
|
@ srl V, 1, V
|
|
@ dec SC
|
|
@ do single-bit divide steps
|
|
@
|
|
@ We have to be careful here. We know that R >= V, so we can do the
|
|
@ first divide step without thinking. BUT, the others are conditional,
|
|
@ and are only done if R >= 0. Because both R and V may have the high-
|
|
@ order bit set in the first step, just falling into the regular
|
|
@ division loop will mess up the first time around.
|
|
@ So we unroll slightly...
|
|
Ldo_single_div:
|
|
subs SC, SC, #1
|
|
blt Lend_regular_divide
|
|
sub R, R, V
|
|
mov Q, #1
|
|
b Lend_single_divloop
|
|
Lsingle_divloop:
|
|
cmp R, #0
|
|
mov Q, Q, lsl #1
|
|
mov V, V, lsr #1
|
|
@ R >= 0
|
|
subpl R, R, V
|
|
addpl Q, Q, #1
|
|
@ R < 0
|
|
addmi R, R, V
|
|
submi Q, Q, #1
|
|
Lend_single_divloop:
|
|
subs SC, SC, #1
|
|
bge Lsingle_divloop
|
|
b Lend_regular_divide
|
|
|
|
1:
|
|
add ITER, ITER, #1
|
|
Lnot_really_big:
|
|
mov V, V, lsl `#'N
|
|
cmp V, R
|
|
bls 1b
|
|
@
|
|
@ HOW CAN ITER EVER BE -1 HERE ?????
|
|
@
|
|
cmn ITER, #1
|
|
beq Lgot_result
|
|
|
|
Ldivloop:
|
|
cmp R, #0 @ set up for initial iteration
|
|
mov Q, Q, lsl `#'N
|
|
DEVELOP_QUOTIENT_BITS(1, 0)
|
|
Lend_regular_divide:
|
|
subs ITER, ITER, #1
|
|
bge Ldivloop
|
|
cmp R, #0
|
|
@ non-restoring fixup here (one instruction only!)
|
|
ifelse(OP, `div',
|
|
` sublt Q, Q, #1
|
|
', ` addlt R, divisor, R
|
|
')
|
|
|
|
Lgot_result:
|
|
ifelse(S, `true',
|
|
` @ check to see if answer should be < 0
|
|
cmp SIGN, #0
|
|
ifelse(OP, `div', `rsbmi Q, Q, #0', `rsbmi R, R, #0')
|
|
')
|
|
ifelse(OP, `div', `mov r0, Q', `mov r0, R')
|
|
ret
|
|
|
|
Ldiv_zero:
|
|
@ Divide by zero trap. If it returns, return 0 (about as
|
|
@ wrong as possible, but that is what SunOS does...).
|
|
bl ___div0
|
|
mov r0, #0
|
|
ret
|
|
*/
|
|
|
|
#ifdef L_udivsi3
|
|
|
|
ip .req r12
|
|
sp .req r13
|
|
lr .req r14
|
|
pc .req r15
|
|
.text
|
|
.globl ___udivsi3
|
|
.align 0
|
|
___udivsi3:
|
|
stmdb sp!, {r4, r5, lr}
|
|
@ Ready to divide. Compute size of quotient; scale comparand.
|
|
movs lr, r1
|
|
mov r3, r0
|
|
beq Ldiv_zero
|
|
|
|
|
|
cmp r3, lr @ if r1 exceeds r0, done
|
|
mov r2, #0
|
|
bcc Lgot_result @ (and algorithm fails otherwise)
|
|
mov r4, #(1 << (32 - 4 - 1))
|
|
cmp r3, r4
|
|
mov ip, #0
|
|
bcc Lnot_really_big
|
|
|
|
@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
|
|
@ as our usual N-at-a-shot divide step will cause overflow and havoc.
|
|
@ The number of bits in the result here is N*ITER+SC, where SC <= N.
|
|
@ Compute ITER in an unorthodox manner: know we need to shift V into
|
|
@ the top decade: so do not even bother to compare to R.
|
|
mov r5, #1
|
|
1:
|
|
cmp lr, r4
|
|
bcs 3f
|
|
mov lr, lr, lsl #4
|
|
add ip, ip, #1
|
|
b 1b
|
|
|
|
@ Now compute r5.
|
|
2: adds lr, lr, lr
|
|
add r5, r5, #1
|
|
bcc Lnot_too_big
|
|
|
|
@ We get here if the r1 overflowed while shifting.
|
|
@ This means that r3 has the high-order bit set.
|
|
@ Restore lr and subtract from r3.
|
|
mov r4, r4, lsl #4
|
|
mov lr, lr, lsr #1
|
|
add lr, r4, lr
|
|
sub r5, r5, #1
|
|
b Ldo_single_div
|
|
|
|
Lnot_too_big:
|
|
3: cmp lr, r3
|
|
bcc 2b
|
|
@ beq Ldo_single_div
|
|
|
|
/* NB: these are commented out in the V8-Sparc manual as well */
|
|
/* (I do not understand this) */
|
|
@ lr > r3: went too far: back up 1 step
|
|
@ srl lr, 1, lr
|
|
@ dec r5
|
|
@ do single-bit divide steps
|
|
@
|
|
@ We have to be careful here. We know that r3 >= lr, so we can do the
|
|
@ first divide step without thinking. BUT, the others are conditional,
|
|
@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
|
|
@ order bit set in the first step, just falling into the regular
|
|
@ division loop will mess up the first time around.
|
|
@ So we unroll slightly...
|
|
Ldo_single_div:
|
|
subs r5, r5, #1
|
|
blt Lend_regular_divide
|
|
sub r3, r3, lr
|
|
mov r2, #1
|
|
b Lend_single_divloop
|
|
Lsingle_divloop:
|
|
cmp r3, #0
|
|
mov r2, r2, lsl #1
|
|
mov lr, lr, lsr #1
|
|
@ r3 >= 0
|
|
subpl r3, r3, lr
|
|
addpl r2, r2, #1
|
|
@ r3 < 0
|
|
addmi r3, r3, lr
|
|
submi r2, r2, #1
|
|
Lend_single_divloop:
|
|
subs r5, r5, #1
|
|
bge Lsingle_divloop
|
|
b Lend_regular_divide
|
|
|
|
1:
|
|
add ip, ip, #1
|
|
Lnot_really_big:
|
|
mov lr, lr, lsl #4
|
|
cmp lr, r3
|
|
bls 1b
|
|
@
|
|
@ HOW CAN ip EVER BE -1 HERE ?????
|
|
@
|
|
cmn ip, #1
|
|
beq Lgot_result
|
|
|
|
Ldivloop:
|
|
cmp r3, #0 @ set up for initial iteration
|
|
mov r2, r2, lsl #4
|
|
@ depth 1, accumulated bits 0
|
|
mov lr, lr, lsr #1
|
|
blt L.1.1015
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 2, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1022
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #15
|
|
|
|
b 9f
|
|
|
|
L.4.1022:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #13
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1020
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #11
|
|
|
|
b 9f
|
|
|
|
L.4.1020:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #9
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #7
|
|
|
|
b 9f
|
|
|
|
L.4.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #5
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #3
|
|
|
|
b 9f
|
|
|
|
L.4.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #1
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
L.1.1015:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 2, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #1
|
|
|
|
b 9f
|
|
|
|
L.4.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #3
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #5
|
|
|
|
b 9f
|
|
|
|
L.4.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #7
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1010
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #9
|
|
|
|
b 9f
|
|
|
|
L.4.1010:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #11
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1008
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #13
|
|
|
|
b 9f
|
|
|
|
L.4.1008:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #15
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
9:
|
|
Lend_regular_divide:
|
|
subs ip, ip, #1
|
|
bge Ldivloop
|
|
cmp r3, #0
|
|
@ non-restoring fixup here (one instruction only!)
|
|
sublt r2, r2, #1
|
|
|
|
|
|
Lgot_result:
|
|
|
|
mov r0, r2
|
|
ldmia sp!, {r4, r5, pc}
|
|
|
|
Ldiv_zero:
|
|
@ Divide by zero trap. If it returns, return 0 (about as
|
|
@ wrong as possible, but that is what SunOS does...).
|
|
bl ___div0
|
|
mov r0, #0
|
|
ldmia sp!, {r4, r5, pc}
|
|
|
|
#endif /* L_udivsi3 */
|
|
|
|
#ifdef L_divsi3
|
|
|
|
ip .req r12
|
|
sp .req r13
|
|
lr .req r14
|
|
pc .req r15
|
|
.text
|
|
.globl ___divsi3
|
|
.align 0
|
|
___divsi3:
|
|
stmdb sp!, {r4, r5, r6, lr}
|
|
@ compute sign of result; if neither is negative, no problem
|
|
eor r6, r1, r0 @ compute sign
|
|
cmp r1, #0
|
|
rsbmi r1, r1, #0
|
|
beq Ldiv_zero
|
|
mov lr, r1
|
|
movs r3, r0
|
|
rsbmi r3, r3, #0 @ make dividend nonnegative
|
|
|
|
|
|
cmp r3, lr @ if r1 exceeds r0, done
|
|
mov r2, #0
|
|
bcc Lgot_result @ (and algorithm fails otherwise)
|
|
mov r4, #(1 << (32 - 4 - 1))
|
|
cmp r3, r4
|
|
mov ip, #0
|
|
bcc Lnot_really_big
|
|
|
|
@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
|
|
@ as our usual N-at-a-shot divide step will cause overflow and havoc.
|
|
@ The number of bits in the result here is N*ITER+SC, where SC <= N.
|
|
@ Compute ITER in an unorthodox manner: know we need to shift V into
|
|
@ the top decade: so do not even bother to compare to R.
|
|
mov r5, #1
|
|
1:
|
|
cmp lr, r4
|
|
bcs 3f
|
|
mov lr, lr, lsl #4
|
|
add ip, ip, #1
|
|
b 1b
|
|
|
|
@ Now compute r5.
|
|
2: adds lr, lr, lr
|
|
add r5, r5, #1
|
|
bcc Lnot_too_big
|
|
|
|
@ We get here if the r1 overflowed while shifting.
|
|
@ This means that r3 has the high-order bit set.
|
|
@ Restore lr and subtract from r3.
|
|
mov r4, r4, lsl #4
|
|
mov lr, lr, lsr #1
|
|
add lr, r4, lr
|
|
sub r5, r5, #1
|
|
b Ldo_single_div
|
|
|
|
Lnot_too_big:
|
|
3: cmp lr, r3
|
|
bcc 2b
|
|
@ beq Ldo_single_div
|
|
|
|
/* NB: these are commented out in the V8-Sparc manual as well */
|
|
/* (I do not understand this) */
|
|
@ lr > r3: went too far: back up 1 step
|
|
@ srl lr, 1, lr
|
|
@ dec r5
|
|
@ do single-bit divide steps
|
|
@
|
|
@ We have to be careful here. We know that r3 >= lr, so we can do the
|
|
@ first divide step without thinking. BUT, the others are conditional,
|
|
@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
|
|
@ order bit set in the first step, just falling into the regular
|
|
@ division loop will mess up the first time around.
|
|
@ So we unroll slightly...
|
|
Ldo_single_div:
|
|
subs r5, r5, #1
|
|
blt Lend_regular_divide
|
|
sub r3, r3, lr
|
|
mov r2, #1
|
|
b Lend_single_divloop
|
|
Lsingle_divloop:
|
|
cmp r3, #0
|
|
mov r2, r2, lsl #1
|
|
mov lr, lr, lsr #1
|
|
@ r3 >= 0
|
|
subpl r3, r3, lr
|
|
addpl r2, r2, #1
|
|
@ r3 < 0
|
|
addmi r3, r3, lr
|
|
submi r2, r2, #1
|
|
Lend_single_divloop:
|
|
subs r5, r5, #1
|
|
bge Lsingle_divloop
|
|
b Lend_regular_divide
|
|
|
|
1:
|
|
add ip, ip, #1
|
|
Lnot_really_big:
|
|
mov lr, lr, lsl #4
|
|
cmp lr, r3
|
|
bls 1b
|
|
@
|
|
@ HOW CAN ip EVER BE -1 HERE ?????
|
|
@
|
|
cmn ip, #1
|
|
beq Lgot_result
|
|
|
|
Ldivloop:
|
|
cmp r3, #0 @ set up for initial iteration
|
|
mov r2, r2, lsl #4
|
|
@ depth 1, accumulated bits 0
|
|
mov lr, lr, lsr #1
|
|
blt L.1.1015
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 2, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1022
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #15
|
|
|
|
b 9f
|
|
|
|
L.4.1022:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #13
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1020
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #11
|
|
|
|
b 9f
|
|
|
|
L.4.1020:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #9
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #7
|
|
|
|
b 9f
|
|
|
|
L.4.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #5
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #3
|
|
|
|
b 9f
|
|
|
|
L.4.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #1
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
L.1.1015:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 2, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #1
|
|
|
|
b 9f
|
|
|
|
L.4.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #3
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #5
|
|
|
|
b 9f
|
|
|
|
L.4.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #7
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1010
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #9
|
|
|
|
b 9f
|
|
|
|
L.4.1010:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #11
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1008
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #13
|
|
|
|
b 9f
|
|
|
|
L.4.1008:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #15
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
9:
|
|
Lend_regular_divide:
|
|
subs ip, ip, #1
|
|
bge Ldivloop
|
|
cmp r3, #0
|
|
@ non-restoring fixup here (one instruction only!)
|
|
sublt r2, r2, #1
|
|
|
|
|
|
Lgot_result:
|
|
@ check to see if answer should be < 0
|
|
cmp r6, #0
|
|
rsbmi r2, r2, #0
|
|
|
|
mov r0, r2
|
|
ldmia sp!, {r4, r5, r6, pc}
|
|
|
|
Ldiv_zero:
|
|
@ Divide by zero trap. If it returns, return 0 (about as
|
|
@ wrong as possible, but that is what SunOS does...).
|
|
bl ___div0
|
|
mov r0, #0
|
|
ldmia sp!, {r4, r5, r6, pc}
|
|
|
|
#endif /* L_divsi3 */
|
|
|
|
#ifdef L_umodsi3
|
|
|
|
ip .req r12
|
|
sp .req r13
|
|
lr .req r14
|
|
pc .req r15
|
|
.text
|
|
.globl ___umodsi3
|
|
.align 0
|
|
___umodsi3:
|
|
stmdb sp!, {r4, r5, lr}
|
|
@ Ready to divide. Compute size of quotient; scale comparand.
|
|
movs lr, r1
|
|
mov r3, r0
|
|
beq Ldiv_zero
|
|
|
|
|
|
cmp r3, lr @ if r1 exceeds r0, done
|
|
mov r2, #0
|
|
bcc Lgot_result @ (and algorithm fails otherwise)
|
|
mov r4, #(1 << (32 - 4 - 1))
|
|
cmp r3, r4
|
|
mov ip, #0
|
|
bcc Lnot_really_big
|
|
|
|
@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
|
|
@ as our usual N-at-a-shot divide step will cause overflow and havoc.
|
|
@ The number of bits in the result here is N*ITER+SC, where SC <= N.
|
|
@ Compute ITER in an unorthodox manner: know we need to shift V into
|
|
@ the top decade: so do not even bother to compare to R.
|
|
mov r5, #1
|
|
1:
|
|
cmp lr, r4
|
|
bcs 3f
|
|
mov lr, lr, lsl #4
|
|
add ip, ip, #1
|
|
b 1b
|
|
|
|
@ Now compute r5.
|
|
2: adds lr, lr, lr
|
|
add r5, r5, #1
|
|
bcc Lnot_too_big
|
|
|
|
@ We get here if the r1 overflowed while shifting.
|
|
@ This means that r3 has the high-order bit set.
|
|
@ Restore lr and subtract from r3.
|
|
mov r4, r4, lsl #4
|
|
mov lr, lr, lsr #1
|
|
add lr, r4, lr
|
|
sub r5, r5, #1
|
|
b Ldo_single_div
|
|
|
|
Lnot_too_big:
|
|
3: cmp lr, r3
|
|
bcc 2b
|
|
@ beq Ldo_single_div
|
|
|
|
/* NB: these are commented out in the V8-Sparc manual as well */
|
|
/* (I do not understand this) */
|
|
@ lr > r3: went too far: back up 1 step
|
|
@ srl lr, 1, lr
|
|
@ dec r5
|
|
@ do single-bit divide steps
|
|
@
|
|
@ We have to be careful here. We know that r3 >= lr, so we can do the
|
|
@ first divide step without thinking. BUT, the others are conditional,
|
|
@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
|
|
@ order bit set in the first step, just falling into the regular
|
|
@ division loop will mess up the first time around.
|
|
@ So we unroll slightly...
|
|
Ldo_single_div:
|
|
subs r5, r5, #1
|
|
blt Lend_regular_divide
|
|
sub r3, r3, lr
|
|
mov r2, #1
|
|
b Lend_single_divloop
|
|
Lsingle_divloop:
|
|
cmp r3, #0
|
|
mov r2, r2, lsl #1
|
|
mov lr, lr, lsr #1
|
|
@ r3 >= 0
|
|
subpl r3, r3, lr
|
|
addpl r2, r2, #1
|
|
@ r3 < 0
|
|
addmi r3, r3, lr
|
|
submi r2, r2, #1
|
|
Lend_single_divloop:
|
|
subs r5, r5, #1
|
|
bge Lsingle_divloop
|
|
b Lend_regular_divide
|
|
|
|
1:
|
|
add ip, ip, #1
|
|
Lnot_really_big:
|
|
mov lr, lr, lsl #4
|
|
cmp lr, r3
|
|
bls 1b
|
|
@
|
|
@ HOW CAN ip EVER BE -1 HERE ?????
|
|
@
|
|
cmn ip, #1
|
|
beq Lgot_result
|
|
|
|
Ldivloop:
|
|
cmp r3, #0 @ set up for initial iteration
|
|
mov r2, r2, lsl #4
|
|
@ depth 1, accumulated bits 0
|
|
mov lr, lr, lsr #1
|
|
blt L.1.1015
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 2, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1022
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #15
|
|
|
|
b 9f
|
|
|
|
L.4.1022:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #13
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1020
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #11
|
|
|
|
b 9f
|
|
|
|
L.4.1020:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #9
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #7
|
|
|
|
b 9f
|
|
|
|
L.4.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #5
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #3
|
|
|
|
b 9f
|
|
|
|
L.4.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #1
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
L.1.1015:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 2, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #1
|
|
|
|
b 9f
|
|
|
|
L.4.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #3
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #5
|
|
|
|
b 9f
|
|
|
|
L.4.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #7
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1010
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #9
|
|
|
|
b 9f
|
|
|
|
L.4.1010:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #11
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1008
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #13
|
|
|
|
b 9f
|
|
|
|
L.4.1008:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #15
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
9:
|
|
Lend_regular_divide:
|
|
subs ip, ip, #1
|
|
bge Ldivloop
|
|
cmp r3, #0
|
|
@ non-restoring fixup here (one instruction only!)
|
|
addlt r3, r1, r3
|
|
|
|
|
|
Lgot_result:
|
|
|
|
mov r0, r3
|
|
ldmia sp!, {r4, r5, pc}
|
|
|
|
Ldiv_zero:
|
|
@ Divide by zero trap. If it returns, return 0 (about as
|
|
@ wrong as possible, but that is what SunOS does...).
|
|
bl ___div0
|
|
mov r0, #0
|
|
ldmia sp!, {r4, r5, pc}
|
|
|
|
#endif /* L_umodsi3 */
|
|
|
|
#ifdef L_modsi3
|
|
|
|
ip .req r12
|
|
sp .req r13
|
|
lr .req r14
|
|
pc .req r15
|
|
.text
|
|
.globl ___modsi3
|
|
.align 0
|
|
___modsi3:
|
|
stmdb sp!, {r4, r5, r6, lr}
|
|
@ compute sign of result; if neither is negative, no problem
|
|
mov r6, r0
|
|
cmp r1, #0
|
|
rsbmi r1, r1, #0
|
|
beq Ldiv_zero
|
|
mov lr, r1
|
|
movs r3, r0
|
|
rsbmi r3, r3, #0 @ make dividend nonnegative
|
|
|
|
|
|
cmp r3, lr @ if r1 exceeds r0, done
|
|
mov r2, #0
|
|
bcc Lgot_result @ (and algorithm fails otherwise)
|
|
mov r4, #(1 << (32 - 4 - 1))
|
|
cmp r3, r4
|
|
mov ip, #0
|
|
bcc Lnot_really_big
|
|
|
|
@ Here the dividend is >= 2^(31-N) or so. We must be careful here,
|
|
@ as our usual N-at-a-shot divide step will cause overflow and havoc.
|
|
@ The number of bits in the result here is N*ITER+SC, where SC <= N.
|
|
@ Compute ITER in an unorthodox manner: know we need to shift V into
|
|
@ the top decade: so do not even bother to compare to R.
|
|
mov r5, #1
|
|
1:
|
|
cmp lr, r4
|
|
bcs 3f
|
|
mov lr, lr, lsl #4
|
|
add ip, ip, #1
|
|
b 1b
|
|
|
|
@ Now compute r5.
|
|
2: adds lr, lr, lr
|
|
add r5, r5, #1
|
|
bcc Lnot_too_big
|
|
|
|
@ We get here if the r1 overflowed while shifting.
|
|
@ This means that r3 has the high-order bit set.
|
|
@ Restore lr and subtract from r3.
|
|
mov r4, r4, lsl #4
|
|
mov lr, lr, lsr #1
|
|
add lr, r4, lr
|
|
sub r5, r5, #1
|
|
b Ldo_single_div
|
|
|
|
Lnot_too_big:
|
|
3: cmp lr, r3
|
|
bcc 2b
|
|
@ beq Ldo_single_div
|
|
|
|
/* NB: these are commented out in the V8-Sparc manual as well */
|
|
/* (I do not understand this) */
|
|
@ lr > r3: went too far: back up 1 step
|
|
@ srl lr, 1, lr
|
|
@ dec r5
|
|
@ do single-bit divide steps
|
|
@
|
|
@ We have to be careful here. We know that r3 >= lr, so we can do the
|
|
@ first divide step without thinking. BUT, the others are conditional,
|
|
@ and are only done if r3 >= 0. Because both r3 and lr may have the high-
|
|
@ order bit set in the first step, just falling into the regular
|
|
@ division loop will mess up the first time around.
|
|
@ So we unroll slightly...
|
|
Ldo_single_div:
|
|
subs r5, r5, #1
|
|
blt Lend_regular_divide
|
|
sub r3, r3, lr
|
|
mov r2, #1
|
|
b Lend_single_divloop
|
|
Lsingle_divloop:
|
|
cmp r3, #0
|
|
mov r2, r2, lsl #1
|
|
mov lr, lr, lsr #1
|
|
@ r3 >= 0
|
|
subpl r3, r3, lr
|
|
addpl r2, r2, #1
|
|
@ r3 < 0
|
|
addmi r3, r3, lr
|
|
submi r2, r2, #1
|
|
Lend_single_divloop:
|
|
subs r5, r5, #1
|
|
bge Lsingle_divloop
|
|
b Lend_regular_divide
|
|
|
|
1:
|
|
add ip, ip, #1
|
|
Lnot_really_big:
|
|
mov lr, lr, lsl #4
|
|
cmp lr, r3
|
|
bls 1b
|
|
@
|
|
@ HOW CAN ip EVER BE -1 HERE ?????
|
|
@
|
|
cmn ip, #1
|
|
beq Lgot_result
|
|
|
|
Ldivloop:
|
|
cmp r3, #0 @ set up for initial iteration
|
|
mov r2, r2, lsl #4
|
|
@ depth 1, accumulated bits 0
|
|
mov lr, lr, lsr #1
|
|
blt L.1.1015
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 2, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1022
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #15
|
|
|
|
b 9f
|
|
|
|
L.4.1022:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #13
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1020
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #11
|
|
|
|
b 9f
|
|
|
|
L.4.1020:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #9
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits 3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1018
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #7
|
|
|
|
b 9f
|
|
|
|
L.4.1018:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #5
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits 1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1016
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
add r2, r2, #3
|
|
|
|
b 9f
|
|
|
|
L.4.1016:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
add r2, r2, #1
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
L.1.1015:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 2, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.2.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 3, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -1
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1014
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #1
|
|
|
|
b 9f
|
|
|
|
L.4.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #3
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #5
|
|
|
|
b 9f
|
|
|
|
L.4.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #7
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
L.2.1014:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 3, accumulated bits -3
|
|
mov lr, lr, lsr #1
|
|
blt L.3.1012
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
@ depth 4, accumulated bits -5
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1010
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #9
|
|
|
|
b 9f
|
|
|
|
L.4.1010:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #11
|
|
b 9f
|
|
|
|
|
|
|
|
L.3.1012:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
@ depth 4, accumulated bits -7
|
|
mov lr, lr, lsr #1
|
|
blt L.4.1008
|
|
@ remainder is positive
|
|
subs r3, r3, lr
|
|
sub r2, r2, #13
|
|
|
|
b 9f
|
|
|
|
L.4.1008:
|
|
@ remainder is negative
|
|
adds r3, r3, lr
|
|
sub r2, r2, #15
|
|
b 9f
|
|
|
|
|
|
|
|
|
|
|
|
9:
|
|
Lend_regular_divide:
|
|
subs ip, ip, #1
|
|
bge Ldivloop
|
|
cmp r3, #0
|
|
@ non-restoring fixup here (one instruction only!)
|
|
addlt r3, r1, r3
|
|
|
|
|
|
Lgot_result:
|
|
@ check to see if answer should be < 0
|
|
cmp r6, #0
|
|
rsbmi r3, r3, #0
|
|
|
|
mov r0, r3
|
|
ldmia sp!, {r4, r5, r6, pc}
|
|
|
|
Ldiv_zero:
|
|
@ Divide by zero trap. If it returns, return 0 (about as
|
|
@ wrong as possible, but that is what SunOS does...).
|
|
bl ___div0
|
|
mov r0, #0
|
|
ldmia sp!, {r4, r5, r6, pc}
|
|
|
|
#endif /* L_modsi3 */
|
|
|
|
#ifdef L_dvmd_tls
|
|
|
|
.globl ___div0
|
|
.align 0
|
|
___div0:
|
|
mov pc, lr
|
|
|
|
#endif /* L_divmodsi_tools */
|