225e4f43cc
* config/sh/lib1funcs.asm (___movstr_i4_even, ___movstr_i4_odd): Define. (___movstrSI12_i4, ___sdivsi3_i4, ___udivsi3_i4): Define. * sh.c (reg_class_from_letter, regno_reg_class): Add DF_REGS. (fp_reg_names, assembler_dialect): New variables. (print_operand_address): Handle SUBREGs. (print_operand): Added 'o' case. Don't use adj_offsettable_operand on PRE_DEC / POST_INC. Name of FP registers depends on mode. (expand_block_move): Emit different code for SH4 hardware. (prepare_scc_operands): Use emit_sf_insn / emit_df_insn as appropriate. (from_compare): Likewise. (add_constant): New argument last_value. Changed all callers. (find_barrier): Don't try HImode load for FPUL_REG. (machine_dependent_reorg): Likewise. (sfunc_uses_reg): A CLOBBER cannot be the address register use. (gen_far_branch): Emit a barrier after the new jump. (barrier_align): Don't trust instruction lengths before fixing up pcloads. (machine_dependent_reorg): Add support for FIRST_XD_REG .. LAST_XD_REG. Use auto-inc addressing for fp registers if doubles need to be loaded in two steps. Set sh_flag_remove_dead_before_cse. (push): Support for TARGET_FMOVD. Use gen_push_fpul for fpul. (pop): Support for TARGET_FMOVD. Use gen_pop_fpul for fpul. (calc_live_regs): Support for TARGET_FMOVD. Don't save FPSCR. Support for FIRST_XD_REG .. LAST_XD_REG. (sh_expand_prologue): Support for FIRST_XD_REG .. LAST_XD_REG. (sh_expand_epilogue): Likewise. (sh_builtin_saveregs): Use DFmode moves for fp regs on SH4. (initial_elimination_offset): Take TARGET_ALIGN_DOUBLE into account. (arith_reg_operand): FPUL_REG is OK for SH4. (fp_arith_reg_operand, fp_extended_operand) New functions. (tertiary_reload_operand, fpscr_operand): Likewise. (commutative_float_operator, noncommutative_float_operator): Likewise. (binary_float_operator, get_fpscr_rtx, emit_sf_insn): Likewise. (emit_df_insn, expand_sf_unop, expand_sf_binop): Likewise. (expand_df_unop, expand_df_binop, expand_fp_branch): Likewise. (emit_fpscr_use, mark_use, remove_dead_before_cse): Likewise. * sh.h (CPP_SPEC): Add support for -m4, m4-single, m4-single-only. (CONDITIONAL_REGISTER_USAGE): Likewise. (HARD_SH4_BIT, FPU_SINGLE_BIT, SH4_BIT, FMOVD_BIT): Define. (TARGET_CACHE32, TARGET_SUPERSCALAR, TARGET_HARWARD): Define. (TARGET_HARD_SH4, TARGET_FPU_SINGLE, TARGET_SH4, TARGET_FMOVD): Define. (target_flag): Add -m4, m4-single, m4-single-only, -mfmovd. (OPTIMIZATION_OPTIONS): If optimizing, set flag_omit_frame_pointer to -1 and sh_flag_remove_dead_before_cse to 1. (ASSEMBLER_DIALECT): Define to assembler_dialect. (assembler_dialect, fp_reg_names): Declare. (OVERRIDE_OPTIONS): Add code for TARGET_SH4. Hide names of registers that are not accessible. (CACHE_LOG): Take TARGET_CACHE32 into account. (LOOP_ALIGN): Take TARGET_HARWARD into account. (FIRST_XD_REG, LAST_XD_REG, FPSCR_REG): Define. (FIRST_PSEUDO_REGISTER: Now 49. (FIXED_REGISTERS, CALL_USED_REGISTERS): Include values for registers. (HARD_REGNO_NREGS): Special treatment of FIRST_XD_REG .. LAST_XD_REG. (HARD_REGNO_MODE_OK): Update. (enum reg_class): Add DF_REGS and FPSCR_REGS. (REG_CLASS_NAMES, REG_CLASS_CONTENTS, REG_ALLOC_ORDER): Likewise. (SECONDARY_OUTPUT_RELOAD_CLASS, SECONDARY_INPUT_RELOAD_CLASS): Update. (CLASS_CANNOT_CHANGE_SIZE, DEBUG_REGISTER_NAMES): Define. (NPARM_REGS): Eight floating point parameter registers on SH4. (BASE_RETURN_VALUE_REG): SH4 also passes double values in floating point registers. (GET_SH_ARG_CLASS) Likewise. Complex float types are also returned in float registers. (BASE_ARG_REG): Complex float types are also passes in float registers. (FUNCTION_VALUE): Change mode like PROMOTE_MODE does. (LIBCALL_VALUE): Remove trailing semicolon. (ROUND_REG): Round when double precision value is passed in floating point register(s). (FUNCTION_ARG_ADVANCE): No change wanted for SH4 when things are passed on the stack. (FUNCTION_ARG): Little endian adjustment for SH4 SFmode. (FUNCTION_ARG_PARTIAL_NREGS): Zero for SH4. (TRAMPOLINE_ALIGNMENT): Take TARGET_HARWARD into account. (INITIALIZE_TRAMPOLINE): Emit ic_invalidate_line for TARGET_HARWARD. (MODE_DISP_OK_8): Not for SH4 DFmode. (GO_IF_LEGITIMATE_ADDRESS): No base reg + index reg for SH4 DFmode. Allow indexed addressing for PSImode after reload. (LEGITIMIZE_ADDRESS): Not for SH4 DFmode. (LEGITIMIZE_RELOAD_ADDRESS): Handle SH3E SFmode. Don't change SH4 DFmode nor PSImode RELOAD_FOR_INPUT_ADDRESS. (DOUBLE_TYPE_SIZE): 64 for SH4. (RTX_COSTS): Add PLUS case. Increae cost of ASHIFT, ASHIFTRT, LSHIFTRT case. (REGISTER_MOVE_COST): Add handling of R0_REGS, FPUL_REGS, T_REGS, MAC_REGS, PR_REGS, DF_REGS. (REGISTER_NAMES): Use fp_reg_names. (enum processor_type): Add PROCESSOR_SH4. (sh_flag_remove_dead_before_cse): Declare. (rtx_equal_function_value_matters, fpscr_rtx, get_fpscr_rtx): Declare. (PREDICATE_CODES): Add binary_float_operator, commutative_float_operator, fp_arith_reg_operand, fp_extended_operand, fpscr_operand, noncommutative_float_operator. (ADJUST_COST): Use different scale for TARGET_SUPERSCALAR. (SH_DYNAMIC_SHIFT_COST): Cheaper for SH4. * sh.md (attribute cpu): Add value sh4. (attrbutes fmovd, issues): Define. (attribute type): Add values dfp_arith, dfp_cmp, dfp_conv, dfdiv. (function units memory, int, mpy, fp): Make dependent on issue rate. (function units issue, single_issue, load_si, load): Define. (function units load_store, fdiv, gp_fpul): Define. (attribute hit_stack): Provide proper default. (use_sfunc_addr+1, udivsi3): Predicated on ! TARGET_SH4. (udivsi3_i4, udivsi3_i4_single, divsi3_i4, divsi3_i4_single): New insns. (udivsi3, divsi3): Emit special patterns for SH4 hardware, (mulsi3_call): Now uses match_operand for function address. (mulsi3): Also emit code for SH1 case. Wrap result in REG_LIBCALL / REG_RETVAL notes. (push, pop, push_e, pop_e): Now define_expands. (push_fpul, push_4, pop_fpul, pop_4, ic_invalidate_line): New expanders. (movsi_ie): Added y/i alternative. (ic_invalidate_line_i, movdf_i4): New insns. (movdf_i4+[123], reload_outdf+[12345], movsi_y+[12]): New splitters. (reload_indf, reload_outdf, reload_outsf, reload_insi): New expanders. (movdf): Add special code for SH4. (movsf_ie, movsf_ie+1, reload_insf, calli): Make use of fpscr visible. (call_valuei, calli, call_value): Likewise. (movsf): Emit no-op move. (mov_nop, movsi_y): New insns. (blt, sge): generalize to handle DFmode. (return predicate): Call emit_fpscr_use and remove_dead_before_cse. (block_move_real, block_lump_real): Predicate on ! TARGET_HARD_SH4. (block_move_real_i4, block_lump_real_i4, fpu_switch): New insns. (fpu_switch0, fpu_switch1, movpsi): New expanders. (fpu_switch+[12], fix_truncsfsi2_i4_2+1): New splitters. (toggle_sz): New insn. (addsf3, subsf3, mulsf3, divsf3): Now define_expands. (addsf3_i, subsf3_i, mulsf3_i4, mulsf3_ie, divsf3_i): New insns. (macsf3): Make use of fpscr visible. Disable for SH4. (floatsisf2): Make use of fpscr visible. (floatsisf2_i4): New insn. (floatsisf2_ie, fixsfsi, cmpgtsf_t, cmpeqsf_t): Disable for SH4. (ieee_ccmpeqsf_t): Likewise. (fix_truncsfsi2): Emit different code for SH4. (fix_truncsfsi2_i4, fix_truncsfsi2_i4_2, cmpgtsf_t_i4): New insns. (cmpeqsf_t_i4, ieee_ccmpeqsf_t_4): New insns. (negsf2, sqrtsf2, abssf2): Now expanders. (adddf3, subdf3i, muldf2, divdf3, floatsidf2): New expanders. (negsf2_i, sqrtsf2_i, abssf2_i, adddf3_i, subdf3_i): New insns. (muldf3_i, divdf3_i, floatsidf2_i, fix_truncdfsi2_i): New insns. (fix_truncdfsi2, cmpdf, negdf2, sqrtdf2, absdf2): New expanders. (fix_truncdfsi2_i4, cmpgtdf_t, cmpeqdf_t, ieee_ccmpeqdf_t): New insns. (fix_truncdfsi2_i4_2+1): New splitters. (negdf2_i, sqrtdf2_i, absdf2_i, extendsfdf2_i4): New insns. (extendsfdf2, truncdfsf2): New expanders. (truncdfsf2_i4): New insn. * t-sh (LIB1ASMFUNCS): Add _movstr_i4, _sdivsi3_i4, _udivsi3_i4. (MULTILIB_OPTIONS): Add m4-single-only/m4-single/m4. * float-sh.h: When testing for __SH3E__, also test for __SH4_SINGLE_ONLY__ . * va-sh.h (__va_freg): Define to float. (__va_greg, __fa_freg, __gnuc_va_list, va_start): Define for __SH4_SINGLE_ONLY__ like for __SH3E__ . (__PASS_AS_FLOAT, __TARGET_SH4_P): Likewise. (__PASS_AS_FLOAT): Use different definition for __SH4__ and __SH4_SINGLE__. (TARGET_SH4_P): Define. (va_arg): Use it. * sh.md (movdf_k, movsf_i): Tweak the condition so that init_expr_once is satisfied about the existence of load / store insns. * sh.md (movsi_i, movsi_ie, movsi_i_lowpart, movsf_i, movsf_ie): change m constraint in source operand to mr / mf . * va-sh.h (__va_arg_sh1): Use __asm instead of asm. * (__VA_REEF): Define. (__va_arg_sh1): Use it. * va-sh.h (va_start, va_arg, va_copy): Add parenteses. From-SVN: r23777
1207 lines
21 KiB
NASM
1207 lines
21 KiB
NASM
/* Copyright (C) 1994, 1995, 1997 Free Software Foundation, Inc.
|
|
|
|
This file is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by the
|
|
Free Software Foundation; either version 2, or (at your option) any
|
|
later version.
|
|
|
|
In addition to the permissions in the GNU General Public License, the
|
|
Free Software Foundation gives you unlimited permission to link the
|
|
compiled version of this file with other programs, and to distribute
|
|
those programs without any restriction coming from the use of this
|
|
file. (The General Public License restrictions do apply in other
|
|
respects; for example, they cover modification of the file, and
|
|
distribution when not linked into another program.)
|
|
|
|
This file is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING. If not, write to
|
|
the Free Software Foundation, 59 Temple Place - Suite 330,
|
|
Boston, MA 02111-1307, USA. */
|
|
|
|
/* As a special exception, if you link this library with other files,
|
|
some of which are compiled with GCC, to produce an executable,
|
|
this library does not by itself cause the resulting executable
|
|
to be covered by the GNU General Public License.
|
|
This exception does not however invalidate any other reasons why
|
|
the executable file might be covered by the GNU General Public License. */
|
|
|
|
|
|
!! libgcc1 routines for the Hitachi SH cpu.
|
|
!! Contributed by Steve Chamberlain.
|
|
!! sac@cygnus.com
|
|
|
|
!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
|
|
!! recoded in assembly by Toshiyasu Morita
|
|
!! tm@netcom.com
|
|
|
|
/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
|
|
ELF local label prefixes by J"orn Rennecke
|
|
amylaar@cygnus.com */
|
|
|
|
#ifdef __ELF__
|
|
#define LOCAL(X) .L_##X
|
|
#else
|
|
#define LOCAL(X) L_##X
|
|
#endif
|
|
|
|
#ifdef L_ashiftrt
|
|
.global ___ashiftrt_r4_0
|
|
.global ___ashiftrt_r4_1
|
|
.global ___ashiftrt_r4_2
|
|
.global ___ashiftrt_r4_3
|
|
.global ___ashiftrt_r4_4
|
|
.global ___ashiftrt_r4_5
|
|
.global ___ashiftrt_r4_6
|
|
.global ___ashiftrt_r4_7
|
|
.global ___ashiftrt_r4_8
|
|
.global ___ashiftrt_r4_9
|
|
.global ___ashiftrt_r4_10
|
|
.global ___ashiftrt_r4_11
|
|
.global ___ashiftrt_r4_12
|
|
.global ___ashiftrt_r4_13
|
|
.global ___ashiftrt_r4_14
|
|
.global ___ashiftrt_r4_15
|
|
.global ___ashiftrt_r4_16
|
|
.global ___ashiftrt_r4_17
|
|
.global ___ashiftrt_r4_18
|
|
.global ___ashiftrt_r4_19
|
|
.global ___ashiftrt_r4_20
|
|
.global ___ashiftrt_r4_21
|
|
.global ___ashiftrt_r4_22
|
|
.global ___ashiftrt_r4_23
|
|
.global ___ashiftrt_r4_24
|
|
.global ___ashiftrt_r4_25
|
|
.global ___ashiftrt_r4_26
|
|
.global ___ashiftrt_r4_27
|
|
.global ___ashiftrt_r4_28
|
|
.global ___ashiftrt_r4_29
|
|
.global ___ashiftrt_r4_30
|
|
.global ___ashiftrt_r4_31
|
|
.global ___ashiftrt_r4_32
|
|
|
|
.align 1
|
|
___ashiftrt_r4_32:
|
|
___ashiftrt_r4_31:
|
|
rotcl r4
|
|
rts
|
|
subc r4,r4
|
|
|
|
___ashiftrt_r4_30:
|
|
shar r4
|
|
___ashiftrt_r4_29:
|
|
shar r4
|
|
___ashiftrt_r4_28:
|
|
shar r4
|
|
___ashiftrt_r4_27:
|
|
shar r4
|
|
___ashiftrt_r4_26:
|
|
shar r4
|
|
___ashiftrt_r4_25:
|
|
shar r4
|
|
___ashiftrt_r4_24:
|
|
shlr16 r4
|
|
shlr8 r4
|
|
rts
|
|
exts.b r4,r4
|
|
|
|
___ashiftrt_r4_23:
|
|
shar r4
|
|
___ashiftrt_r4_22:
|
|
shar r4
|
|
___ashiftrt_r4_21:
|
|
shar r4
|
|
___ashiftrt_r4_20:
|
|
shar r4
|
|
___ashiftrt_r4_19:
|
|
shar r4
|
|
___ashiftrt_r4_18:
|
|
shar r4
|
|
___ashiftrt_r4_17:
|
|
shar r4
|
|
___ashiftrt_r4_16:
|
|
shlr16 r4
|
|
rts
|
|
exts.w r4,r4
|
|
|
|
___ashiftrt_r4_15:
|
|
shar r4
|
|
___ashiftrt_r4_14:
|
|
shar r4
|
|
___ashiftrt_r4_13:
|
|
shar r4
|
|
___ashiftrt_r4_12:
|
|
shar r4
|
|
___ashiftrt_r4_11:
|
|
shar r4
|
|
___ashiftrt_r4_10:
|
|
shar r4
|
|
___ashiftrt_r4_9:
|
|
shar r4
|
|
___ashiftrt_r4_8:
|
|
shar r4
|
|
___ashiftrt_r4_7:
|
|
shar r4
|
|
___ashiftrt_r4_6:
|
|
shar r4
|
|
___ashiftrt_r4_5:
|
|
shar r4
|
|
___ashiftrt_r4_4:
|
|
shar r4
|
|
___ashiftrt_r4_3:
|
|
shar r4
|
|
___ashiftrt_r4_2:
|
|
shar r4
|
|
___ashiftrt_r4_1:
|
|
rts
|
|
shar r4
|
|
|
|
___ashiftrt_r4_0:
|
|
rts
|
|
nop
|
|
#endif
|
|
|
|
#ifdef L_ashiftrt_n
|
|
|
|
!
|
|
! ___ashrsi3
|
|
!
|
|
! Entry:
|
|
!
|
|
! r4: Value to shift
|
|
! r5: Shifts
|
|
!
|
|
! Exit:
|
|
!
|
|
! r0: Result
|
|
!
|
|
! Destroys:
|
|
!
|
|
! (none)
|
|
!
|
|
|
|
.global ___ashrsi3
|
|
.align 2
|
|
___ashrsi3:
|
|
mov #31,r0
|
|
and r0,r5
|
|
mova LOCAL(ashrsi3_table),r0
|
|
mov.b @(r0,r5),r5
|
|
#ifdef __sh1__
|
|
add r5,r0
|
|
jmp @r0
|
|
#else
|
|
braf r5
|
|
#endif
|
|
mov r4,r0
|
|
|
|
.align 2
|
|
LOCAL(ashrsi3_table):
|
|
.byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
|
|
.byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
|
|
|
|
LOCAL(ashrsi3_31):
|
|
rotcl r0
|
|
rts
|
|
subc r0,r0
|
|
|
|
LOCAL(ashrsi3_30):
|
|
shar r0
|
|
LOCAL(ashrsi3_29):
|
|
shar r0
|
|
LOCAL(ashrsi3_28):
|
|
shar r0
|
|
LOCAL(ashrsi3_27):
|
|
shar r0
|
|
LOCAL(ashrsi3_26):
|
|
shar r0
|
|
LOCAL(ashrsi3_25):
|
|
shar r0
|
|
LOCAL(ashrsi3_24):
|
|
shlr16 r0
|
|
shlr8 r0
|
|
rts
|
|
exts.b r0,r0
|
|
|
|
LOCAL(ashrsi3_23):
|
|
shar r0
|
|
LOCAL(ashrsi3_22):
|
|
shar r0
|
|
LOCAL(ashrsi3_21):
|
|
shar r0
|
|
LOCAL(ashrsi3_20):
|
|
shar r0
|
|
LOCAL(ashrsi3_19):
|
|
shar r0
|
|
LOCAL(ashrsi3_18):
|
|
shar r0
|
|
LOCAL(ashrsi3_17):
|
|
shar r0
|
|
LOCAL(ashrsi3_16):
|
|
shlr16 r0
|
|
rts
|
|
exts.w r0,r0
|
|
|
|
LOCAL(ashrsi3_15):
|
|
shar r0
|
|
LOCAL(ashrsi3_14):
|
|
shar r0
|
|
LOCAL(ashrsi3_13):
|
|
shar r0
|
|
LOCAL(ashrsi3_12):
|
|
shar r0
|
|
LOCAL(ashrsi3_11):
|
|
shar r0
|
|
LOCAL(ashrsi3_10):
|
|
shar r0
|
|
LOCAL(ashrsi3_9):
|
|
shar r0
|
|
LOCAL(ashrsi3_8):
|
|
shar r0
|
|
LOCAL(ashrsi3_7):
|
|
shar r0
|
|
LOCAL(ashrsi3_6):
|
|
shar r0
|
|
LOCAL(ashrsi3_5):
|
|
shar r0
|
|
LOCAL(ashrsi3_4):
|
|
shar r0
|
|
LOCAL(ashrsi3_3):
|
|
shar r0
|
|
LOCAL(ashrsi3_2):
|
|
shar r0
|
|
LOCAL(ashrsi3_1):
|
|
rts
|
|
shar r0
|
|
|
|
LOCAL(ashrsi3_0):
|
|
rts
|
|
nop
|
|
|
|
#endif
|
|
|
|
#ifdef L_ashiftlt
|
|
|
|
!
|
|
! ___ashlsi3
|
|
!
|
|
! Entry:
|
|
!
|
|
! r4: Value to shift
|
|
! r5: Shifts
|
|
!
|
|
! Exit:
|
|
!
|
|
! r0: Result
|
|
!
|
|
! Destroys:
|
|
!
|
|
! (none)
|
|
!
|
|
.global ___ashlsi3
|
|
.align 2
|
|
___ashlsi3:
|
|
mov #31,r0
|
|
and r0,r5
|
|
mova LOCAL(ashlsi3_table),r0
|
|
mov.b @(r0,r5),r5
|
|
#ifdef __sh1__
|
|
add r5,r0
|
|
jmp @r0
|
|
#else
|
|
braf r5
|
|
#endif
|
|
mov r4,r0
|
|
|
|
.align 2
|
|
LOCAL(ashlsi3_table):
|
|
.byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
|
|
.byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
|
|
|
|
LOCAL(ashlsi3_6):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_4):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_2):
|
|
rts
|
|
shll2 r0
|
|
|
|
LOCAL(ashlsi3_7):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_5):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_3):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_1):
|
|
rts
|
|
shll r0
|
|
|
|
LOCAL(ashlsi3_14):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_12):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_10):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_8):
|
|
rts
|
|
shll8 r0
|
|
|
|
LOCAL(ashlsi3_15):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_13):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_11):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_9):
|
|
shll8 r0
|
|
rts
|
|
shll r0
|
|
|
|
LOCAL(ashlsi3_22):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_20):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_18):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_16):
|
|
rts
|
|
shll16 r0
|
|
|
|
LOCAL(ashlsi3_23):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_21):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_19):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_17):
|
|
shll16 r0
|
|
rts
|
|
shll r0
|
|
|
|
LOCAL(ashlsi3_30):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_28):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_26):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_24):
|
|
shll16 r0
|
|
rts
|
|
shll8 r0
|
|
|
|
LOCAL(ashlsi3_31):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_29):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_27):
|
|
shll2 r0
|
|
LOCAL(ashlsi3_25):
|
|
shll16 r0
|
|
shll8 r0
|
|
rts
|
|
shll r0
|
|
|
|
LOCAL(ashlsi3_0):
|
|
rts
|
|
nop
|
|
|
|
#endif
|
|
|
|
#ifdef L_lshiftrt
|
|
|
|
!
|
|
! ___lshrsi3
|
|
!
|
|
! Entry:
|
|
!
|
|
! r4: Value to shift
|
|
! r5: Shifts
|
|
!
|
|
! Exit:
|
|
!
|
|
! r0: Result
|
|
!
|
|
! Destroys:
|
|
!
|
|
! (none)
|
|
!
|
|
.global ___lshrsi3
|
|
.align 2
|
|
___lshrsi3:
|
|
mov #31,r0
|
|
and r0,r5
|
|
mova LOCAL(lshrsi3_table),r0
|
|
mov.b @(r0,r5),r5
|
|
#ifdef __sh1__
|
|
add r5,r0
|
|
jmp @r0
|
|
#else
|
|
braf r5
|
|
#endif
|
|
mov r4,r0
|
|
|
|
.align 2
|
|
LOCAL(lshrsi3_table):
|
|
.byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
|
|
.byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
|
|
|
|
LOCAL(lshrsi3_6):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_4):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_2):
|
|
rts
|
|
shlr2 r0
|
|
|
|
LOCAL(lshrsi3_7):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_5):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_3):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_1):
|
|
rts
|
|
shlr r0
|
|
|
|
LOCAL(lshrsi3_14):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_12):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_10):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_8):
|
|
rts
|
|
shlr8 r0
|
|
|
|
LOCAL(lshrsi3_15):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_13):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_11):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_9):
|
|
shlr8 r0
|
|
rts
|
|
shlr r0
|
|
|
|
LOCAL(lshrsi3_22):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_20):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_18):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_16):
|
|
rts
|
|
shlr16 r0
|
|
|
|
LOCAL(lshrsi3_23):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_21):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_19):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_17):
|
|
shlr16 r0
|
|
rts
|
|
shlr r0
|
|
|
|
LOCAL(lshrsi3_30):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_28):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_26):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_24):
|
|
shlr16 r0
|
|
rts
|
|
shlr8 r0
|
|
|
|
LOCAL(lshrsi3_31):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_29):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_27):
|
|
shlr2 r0
|
|
LOCAL(lshrsi3_25):
|
|
shlr16 r0
|
|
shlr8 r0
|
|
rts
|
|
shlr r0
|
|
|
|
LOCAL(lshrsi3_0):
|
|
rts
|
|
nop
|
|
|
|
#endif
|
|
|
|
#ifdef L_movstr
|
|
.text
|
|
! done all the large groups, do the remainder
|
|
|
|
! jump to movstr+
|
|
done:
|
|
add #64,r5
|
|
mova ___movstrSI0,r0
|
|
shll2 r6
|
|
add r6,r0
|
|
jmp @r0
|
|
add #64,r4
|
|
.align 4
|
|
.global ___movstrSI64
|
|
___movstrSI64:
|
|
mov.l @(60,r5),r0
|
|
mov.l r0,@(60,r4)
|
|
.global ___movstrSI60
|
|
___movstrSI60:
|
|
mov.l @(56,r5),r0
|
|
mov.l r0,@(56,r4)
|
|
.global ___movstrSI56
|
|
___movstrSI56:
|
|
mov.l @(52,r5),r0
|
|
mov.l r0,@(52,r4)
|
|
.global ___movstrSI52
|
|
___movstrSI52:
|
|
mov.l @(48,r5),r0
|
|
mov.l r0,@(48,r4)
|
|
.global ___movstrSI48
|
|
___movstrSI48:
|
|
mov.l @(44,r5),r0
|
|
mov.l r0,@(44,r4)
|
|
.global ___movstrSI44
|
|
___movstrSI44:
|
|
mov.l @(40,r5),r0
|
|
mov.l r0,@(40,r4)
|
|
.global ___movstrSI40
|
|
___movstrSI40:
|
|
mov.l @(36,r5),r0
|
|
mov.l r0,@(36,r4)
|
|
.global ___movstrSI36
|
|
___movstrSI36:
|
|
mov.l @(32,r5),r0
|
|
mov.l r0,@(32,r4)
|
|
.global ___movstrSI32
|
|
___movstrSI32:
|
|
mov.l @(28,r5),r0
|
|
mov.l r0,@(28,r4)
|
|
.global ___movstrSI28
|
|
___movstrSI28:
|
|
mov.l @(24,r5),r0
|
|
mov.l r0,@(24,r4)
|
|
.global ___movstrSI24
|
|
___movstrSI24:
|
|
mov.l @(20,r5),r0
|
|
mov.l r0,@(20,r4)
|
|
.global ___movstrSI20
|
|
___movstrSI20:
|
|
mov.l @(16,r5),r0
|
|
mov.l r0,@(16,r4)
|
|
.global ___movstrSI16
|
|
___movstrSI16:
|
|
mov.l @(12,r5),r0
|
|
mov.l r0,@(12,r4)
|
|
.global ___movstrSI12
|
|
___movstrSI12:
|
|
mov.l @(8,r5),r0
|
|
mov.l r0,@(8,r4)
|
|
.global ___movstrSI8
|
|
___movstrSI8:
|
|
mov.l @(4,r5),r0
|
|
mov.l r0,@(4,r4)
|
|
.global ___movstrSI4
|
|
___movstrSI4:
|
|
mov.l @(0,r5),r0
|
|
mov.l r0,@(0,r4)
|
|
___movstrSI0:
|
|
rts
|
|
or r0,r0,r0
|
|
|
|
.align 4
|
|
|
|
.global ___movstr
|
|
___movstr:
|
|
mov.l @(60,r5),r0
|
|
mov.l r0,@(60,r4)
|
|
|
|
mov.l @(56,r5),r0
|
|
mov.l r0,@(56,r4)
|
|
|
|
mov.l @(52,r5),r0
|
|
mov.l r0,@(52,r4)
|
|
|
|
mov.l @(48,r5),r0
|
|
mov.l r0,@(48,r4)
|
|
|
|
mov.l @(44,r5),r0
|
|
mov.l r0,@(44,r4)
|
|
|
|
mov.l @(40,r5),r0
|
|
mov.l r0,@(40,r4)
|
|
|
|
mov.l @(36,r5),r0
|
|
mov.l r0,@(36,r4)
|
|
|
|
mov.l @(32,r5),r0
|
|
mov.l r0,@(32,r4)
|
|
|
|
mov.l @(28,r5),r0
|
|
mov.l r0,@(28,r4)
|
|
|
|
mov.l @(24,r5),r0
|
|
mov.l r0,@(24,r4)
|
|
|
|
mov.l @(20,r5),r0
|
|
mov.l r0,@(20,r4)
|
|
|
|
mov.l @(16,r5),r0
|
|
mov.l r0,@(16,r4)
|
|
|
|
mov.l @(12,r5),r0
|
|
mov.l r0,@(12,r4)
|
|
|
|
mov.l @(8,r5),r0
|
|
mov.l r0,@(8,r4)
|
|
|
|
mov.l @(4,r5),r0
|
|
mov.l r0,@(4,r4)
|
|
|
|
mov.l @(0,r5),r0
|
|
mov.l r0,@(0,r4)
|
|
|
|
add #-16,r6
|
|
cmp/pl r6
|
|
bf done
|
|
|
|
add #64,r5
|
|
bra ___movstr
|
|
add #64,r4
|
|
#endif
|
|
|
|
#ifdef L_movstr_i4
|
|
#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
|
|
.text
|
|
.global ___movstr_i4_even
|
|
.global ___movstr_i4_odd
|
|
.global ___movstrSI12_i4
|
|
|
|
.p2align 5
|
|
L_movstr_2mod4_end:
|
|
mov.l r0,@(16,r4)
|
|
rts
|
|
mov.l r1,@(20,r4)
|
|
|
|
.p2align 2
|
|
|
|
___movstr_i4_odd:
|
|
mov.l @r5+,r1
|
|
add #-4,r4
|
|
mov.l @r5+,r2
|
|
mov.l @r5+,r3
|
|
mov.l r1,@(4,r4)
|
|
mov.l r2,@(8,r4)
|
|
|
|
L_movstr_loop:
|
|
mov.l r3,@(12,r4)
|
|
dt r6
|
|
mov.l @r5+,r0
|
|
bt/s L_movstr_2mod4_end
|
|
mov.l @r5+,r1
|
|
add #16,r4
|
|
L_movstr_start_even:
|
|
mov.l @r5+,r2
|
|
mov.l @r5+,r3
|
|
mov.l r0,@r4
|
|
dt r6
|
|
mov.l r1,@(4,r4)
|
|
bf/s L_movstr_loop
|
|
mov.l r2,@(8,r4)
|
|
rts
|
|
mov.l r3,@(12,r4)
|
|
|
|
___movstr_i4_even:
|
|
mov.l @r5+,r0
|
|
bra L_movstr_start_even
|
|
mov.l @r5+,r1
|
|
|
|
.p2align 4
|
|
___movstrSI12_i4:
|
|
mov.l @r5,r0
|
|
mov.l @(4,r5),r1
|
|
mov.l @(8,r5),r2
|
|
mov.l r0,@r4
|
|
mov.l r1,@(4,r4)
|
|
rts
|
|
mov.l r2,@(8,r4)
|
|
#endif /* ! __SH4__ */
|
|
#endif
|
|
|
|
#ifdef L_mulsi3
|
|
|
|
|
|
.global ___mulsi3
|
|
|
|
! r4 = aabb
|
|
! r5 = ccdd
|
|
! r0 = aabb*ccdd via partial products
|
|
!
|
|
! if aa == 0 and cc = 0
|
|
! r0 = bb*dd
|
|
!
|
|
! else
|
|
! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
|
|
!
|
|
|
|
___mulsi3:
|
|
mulu r4,r5 ! multiply the lsws macl=bb*dd
|
|
mov r5,r3 ! r3 = ccdd
|
|
swap.w r4,r2 ! r2 = bbaa
|
|
xtrct r2,r3 ! r3 = aacc
|
|
tst r3,r3 ! msws zero ?
|
|
bf hiset
|
|
rts ! yes - then we have the answer
|
|
sts macl,r0
|
|
|
|
hiset: sts macl,r0 ! r0 = bb*dd
|
|
mulu r2,r5 | brewing macl = aa*dd
|
|
sts macl,r1
|
|
mulu r3,r4 | brewing macl = cc*bb
|
|
sts macl,r2
|
|
add r1,r2
|
|
shll16 r2
|
|
rts
|
|
add r2,r0
|
|
|
|
|
|
#endif
|
|
#ifdef L_sdivsi3_i4
|
|
.title "SH DIVIDE"
|
|
!! 4 byte integer Divide code for the Hitachi SH
|
|
#ifdef __SH4__
|
|
!! args in r4 and r5, result in fpul, clobber dr0, dr2
|
|
|
|
.global ___sdivsi3_i4
|
|
___sdivsi3_i4:
|
|
lds r4,fpul
|
|
float fpul,dr0
|
|
lds r5,fpul
|
|
float fpul,dr2
|
|
fdiv dr2,dr0
|
|
rts
|
|
ftrc dr0,fpul
|
|
|
|
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
|
|
!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
|
|
|
|
.global ___sdivsi3_i4
|
|
___sdivsi3_i4:
|
|
sts.l fpscr,@-r15
|
|
mov #8,r2
|
|
swap.w r2,r2
|
|
lds r2,fpscr
|
|
lds r4,fpul
|
|
float fpul,dr0
|
|
lds r5,fpul
|
|
float fpul,dr2
|
|
fdiv dr2,dr0
|
|
ftrc dr0,fpul
|
|
rts
|
|
lds.l @r15+,fpscr
|
|
|
|
#endif /* ! __SH4__ */
|
|
#endif
|
|
|
|
#ifdef L_sdivsi3
|
|
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
|
|
sh3e code. */
|
|
#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
|
|
!!
|
|
!! Steve Chamberlain
|
|
!! sac@cygnus.com
|
|
!!
|
|
!!
|
|
|
|
!! args in r4 and r5, result in r0 clobber r1,r2,r3
|
|
|
|
.global ___sdivsi3
|
|
___sdivsi3:
|
|
mov r4,r1
|
|
mov r5,r0
|
|
|
|
tst r0,r0
|
|
bt div0
|
|
mov #0,r2
|
|
div0s r2,r1
|
|
subc r3,r3
|
|
subc r2,r1
|
|
div0s r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
div1 r0,r3
|
|
rotcl r1
|
|
addc r2,r1
|
|
rts
|
|
mov r1,r0
|
|
|
|
|
|
div0: rts
|
|
mov #0,r0
|
|
|
|
#endif /* ! __SH4__ */
|
|
#endif
|
|
#ifdef L_udivsi3_i4
|
|
|
|
.title "SH DIVIDE"
|
|
!! 4 byte integer Divide code for the Hitachi SH
|
|
#ifdef __SH4__
|
|
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
|
|
|
|
.global ___udivsi3_i4
|
|
___udivsi3_i4:
|
|
mov #1,r1
|
|
cmp/hi r1,r5
|
|
bf trivial
|
|
rotr r1
|
|
xor r1,r4
|
|
lds r4,fpul
|
|
mova L1,r0
|
|
#ifdef FMOVD_WORKS
|
|
fmov.d @r0+,dr4
|
|
#else
|
|
#ifdef __LITTLE_ENDIAN__
|
|
fmov.s @r0+,fr5
|
|
fmov.s @r0,fr4
|
|
#else
|
|
fmov.s @r0+,fr4
|
|
fmov.s @r0,fr5
|
|
#endif
|
|
#endif
|
|
float fpul,dr0
|
|
xor r1,r5
|
|
lds r5,fpul
|
|
float fpul,dr2
|
|
fadd dr4,dr0
|
|
fadd dr4,dr2
|
|
fdiv dr2,dr0
|
|
rts
|
|
ftrc dr0,fpul
|
|
|
|
trivial:
|
|
rts
|
|
lds r4,fpul
|
|
|
|
.align 2
|
|
L1:
|
|
.double 2147483648
|
|
|
|
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
|
|
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
|
|
|
|
.global ___udivsi3_i4
|
|
___udivsi3_i4:
|
|
mov #1,r1
|
|
cmp/hi r1,r5
|
|
bf trivial
|
|
sts.l fpscr,@-r15
|
|
mova L1,r0
|
|
lds.l @r0+,fpscr
|
|
rotr r1
|
|
xor r1,r4
|
|
lds r4,fpul
|
|
#ifdef FMOVD_WORKS
|
|
fmov.d @r0+,dr4
|
|
#else
|
|
#ifdef __LITTLE_ENDIAN__
|
|
fmov.s @r0+,fr5
|
|
fmov.s @r0,fr4
|
|
#else
|
|
fmov.s @r0+,fr4
|
|
fmov.s @r0,fr5
|
|
#endif
|
|
#endif
|
|
float fpul,dr0
|
|
xor r1,r5
|
|
lds r5,fpul
|
|
float fpul,dr2
|
|
fadd dr4,dr0
|
|
fadd dr4,dr2
|
|
fdiv dr2,dr0
|
|
ftrc dr0,fpul
|
|
rts
|
|
lds.l @r15+,fpscr
|
|
|
|
trivial:
|
|
rts
|
|
lds r4,fpul
|
|
|
|
.align 2
|
|
L1:
|
|
#ifdef __LITTLE_ENDIAN__
|
|
.long 0x80000
|
|
#else
|
|
.long 0x180000
|
|
#endif
|
|
.double 2147483648
|
|
|
|
#endif /* ! __SH4__ */
|
|
#endif
|
|
|
|
#ifdef L_udivsi3
|
|
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
|
|
sh3e code. */
|
|
#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
|
|
!!
|
|
!! Steve Chamberlain
|
|
!! sac@cygnus.com
|
|
!!
|
|
!!
|
|
|
|
!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
|
|
.global ___udivsi3
|
|
|
|
___udivsi3:
|
|
longway:
|
|
mov #0,r0
|
|
div0u
|
|
! get one bit from the msb of the numerator into the T
|
|
! bit and divide it by whats in r5. Put the answer bit
|
|
! into the T bit so it can come out again at the bottom
|
|
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
shortway:
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
|
|
vshortway:
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4 ; div1 r5,r0
|
|
rotcl r4
|
|
ret: rts
|
|
mov r4,r0
|
|
|
|
#endif /* __SH4__ */
|
|
#endif
|
|
#ifdef L_set_fpscr
|
|
#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
|
|
.global ___set_fpscr
|
|
___set_fpscr:
|
|
lds r4,fpscr
|
|
mov.l ___set_fpscr_L1,r1
|
|
swap.w r4,r0
|
|
or #24,r0
|
|
#ifndef FMOVD_WORKS
|
|
xor #16,r0
|
|
#endif
|
|
#if defined(__SH4__)
|
|
swap.w r0,r3
|
|
mov.l r3,@(4,r1)
|
|
#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
|
|
swap.w r0,r2
|
|
mov.l r2,@r1
|
|
#endif
|
|
#ifndef FMOVD_WORKS
|
|
xor #8,r0
|
|
#else
|
|
xor #24,r0
|
|
#endif
|
|
#if defined(__SH4__)
|
|
swap.w r0,r2
|
|
rts
|
|
mov.l r2,@r1
|
|
#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
|
|
swap.w r0,r3
|
|
rts
|
|
mov.l r3,@(4,r1)
|
|
#endif
|
|
.align 2
|
|
___set_fpscr_L1:
|
|
.long ___fpscr_values
|
|
#ifdef __ELF__
|
|
.comm ___fpscr_values,8,4
|
|
#else
|
|
.comm ___fpscr_values,8
|
|
#endif /* ELF */
|
|
#endif /* SH3E / SH4 */
|
|
#endif /* L_set_fpscr */
|