8sa1-gcc/gcc/config/sh/lib1funcs.asm
J"orn Rennecke 225e4f43cc Add SH4 support:
* config/sh/lib1funcs.asm (___movstr_i4_even, ___movstr_i4_odd): Define.
	(___movstrSI12_i4, ___sdivsi3_i4, ___udivsi3_i4): Define.
	* sh.c (reg_class_from_letter, regno_reg_class): Add DF_REGS.
	(fp_reg_names, assembler_dialect): New variables.
	(print_operand_address): Handle SUBREGs.
	(print_operand): Added 'o' case.
	Don't use adj_offsettable_operand on PRE_DEC / POST_INC.
	Name of FP registers depends on mode.
	(expand_block_move): Emit different code for SH4 hardware.
	(prepare_scc_operands): Use emit_sf_insn / emit_df_insn as appropriate.
	(from_compare): Likewise.
	(add_constant): New argument last_value.  Changed all callers.
	(find_barrier): Don't try HImode load for FPUL_REG.
	(machine_dependent_reorg): Likewise.
	(sfunc_uses_reg): A CLOBBER cannot be the address register use.
	(gen_far_branch): Emit a barrier after the new jump.
	(barrier_align): Don't trust instruction lengths before
	fixing up pcloads.
	(machine_dependent_reorg): Add support for FIRST_XD_REG .. LAST_XD_REG.
	Use auto-inc addressing for fp registers if doubles need to
	be loaded in two steps.
	Set sh_flag_remove_dead_before_cse.
	(push): Support for TARGET_FMOVD.  Use gen_push_fpul for fpul.
	(pop): Support for TARGET_FMOVD.  Use gen_pop_fpul for fpul.
	(calc_live_regs): Support for TARGET_FMOVD.  Don't save FPSCR.
	Support for FIRST_XD_REG .. LAST_XD_REG.
	(sh_expand_prologue): Support for FIRST_XD_REG .. LAST_XD_REG.
	(sh_expand_epilogue): Likewise.
	(sh_builtin_saveregs): Use DFmode moves for fp regs on SH4.
	(initial_elimination_offset): Take TARGET_ALIGN_DOUBLE into account.
	(arith_reg_operand): FPUL_REG is OK for SH4.
	(fp_arith_reg_operand, fp_extended_operand) New functions.
	(tertiary_reload_operand, fpscr_operand): Likewise.
	(commutative_float_operator, noncommutative_float_operator): Likewise.
	(binary_float_operator, get_fpscr_rtx, emit_sf_insn): Likewise.
	(emit_df_insn, expand_sf_unop, expand_sf_binop): Likewise.
	(expand_df_unop, expand_df_binop, expand_fp_branch): Likewise.
	(emit_fpscr_use, mark_use, remove_dead_before_cse): Likewise.
	* sh.h (CPP_SPEC): Add support for -m4, m4-single, m4-single-only.
	(CONDITIONAL_REGISTER_USAGE): Likewise.
	(HARD_SH4_BIT, FPU_SINGLE_BIT, SH4_BIT, FMOVD_BIT): Define.
	(TARGET_CACHE32, TARGET_SUPERSCALAR, TARGET_HARWARD): Define.
	(TARGET_HARD_SH4, TARGET_FPU_SINGLE, TARGET_SH4, TARGET_FMOVD): Define.
	(target_flag): Add -m4, m4-single, m4-single-only, -mfmovd.
	(OPTIMIZATION_OPTIONS): If optimizing, set flag_omit_frame_pointer
	to -1 and sh_flag_remove_dead_before_cse to 1.
	(ASSEMBLER_DIALECT): Define to assembler_dialect.
	(assembler_dialect, fp_reg_names): Declare.
	(OVERRIDE_OPTIONS): Add code for TARGET_SH4.
	Hide names of registers that are not accessible.
	(CACHE_LOG): Take TARGET_CACHE32 into account.
	(LOOP_ALIGN): Take TARGET_HARWARD into account.
	(FIRST_XD_REG, LAST_XD_REG, FPSCR_REG): Define.
	(FIRST_PSEUDO_REGISTER: Now 49.
	(FIXED_REGISTERS, CALL_USED_REGISTERS): Include values for registers.
	(HARD_REGNO_NREGS): Special treatment of FIRST_XD_REG .. LAST_XD_REG.
	(HARD_REGNO_MODE_OK): Update.
	(enum reg_class): Add DF_REGS and FPSCR_REGS.
	(REG_CLASS_NAMES, REG_CLASS_CONTENTS, REG_ALLOC_ORDER): Likewise.
	(SECONDARY_OUTPUT_RELOAD_CLASS, SECONDARY_INPUT_RELOAD_CLASS): Update.
	(CLASS_CANNOT_CHANGE_SIZE, DEBUG_REGISTER_NAMES): Define.
	(NPARM_REGS): Eight floating point parameter registers on SH4.
	(BASE_RETURN_VALUE_REG): SH4 also passes double values
	in floating point registers.
	(GET_SH_ARG_CLASS) Likewise.
	Complex float types are also returned in float registers.
	(BASE_ARG_REG): Complex float types are also passes in float registers.
	(FUNCTION_VALUE): Change mode like PROMOTE_MODE does.
	(LIBCALL_VALUE): Remove trailing semicolon.
	(ROUND_REG): Round when double precision value is passed in floating
	point register(s).
	(FUNCTION_ARG_ADVANCE): No change wanted for SH4 when things are
	passed on the stack.
	(FUNCTION_ARG): Little endian adjustment for SH4 SFmode.
	(FUNCTION_ARG_PARTIAL_NREGS): Zero for SH4.
	(TRAMPOLINE_ALIGNMENT): Take TARGET_HARWARD into account.
	(INITIALIZE_TRAMPOLINE): Emit ic_invalidate_line for TARGET_HARWARD.
	(MODE_DISP_OK_8): Not for SH4 DFmode.
	(GO_IF_LEGITIMATE_ADDRESS): No base reg + index reg for SH4 DFmode.
	Allow indexed addressing for PSImode after reload.
	(LEGITIMIZE_ADDRESS): Not for SH4 DFmode.
	(LEGITIMIZE_RELOAD_ADDRESS): Handle SH3E SFmode.
	Don't change SH4 DFmode nor PSImode RELOAD_FOR_INPUT_ADDRESS.
	(DOUBLE_TYPE_SIZE): 64 for SH4.
	(RTX_COSTS): Add PLUS case.
	Increae cost of ASHIFT, ASHIFTRT, LSHIFTRT case.
	(REGISTER_MOVE_COST): Add handling of R0_REGS, FPUL_REGS, T_REGS,
	MAC_REGS, PR_REGS, DF_REGS.
	(REGISTER_NAMES): Use fp_reg_names.
	(enum processor_type): Add PROCESSOR_SH4.
	(sh_flag_remove_dead_before_cse): Declare.
	(rtx_equal_function_value_matters, fpscr_rtx, get_fpscr_rtx): Declare.
	(PREDICATE_CODES): Add binary_float_operator,
	commutative_float_operator, fp_arith_reg_operand, fp_extended_operand,
	fpscr_operand, noncommutative_float_operator.
	(ADJUST_COST): Use different scale for TARGET_SUPERSCALAR.
	(SH_DYNAMIC_SHIFT_COST): Cheaper for SH4.
	* sh.md (attribute cpu): Add value sh4.
	(attrbutes fmovd, issues): Define.
	(attribute type): Add values dfp_arith, dfp_cmp, dfp_conv, dfdiv.
	(function units memory, int, mpy, fp): Make dependent on issue rate.
	(function units issue, single_issue, load_si, load): Define.
	(function units load_store, fdiv, gp_fpul): Define.
	(attribute hit_stack): Provide proper default.
	(use_sfunc_addr+1, udivsi3): Predicated on ! TARGET_SH4.
	(udivsi3_i4, udivsi3_i4_single, divsi3_i4, divsi3_i4_single): New insns.
	(udivsi3, divsi3): Emit special patterns for SH4 hardware,
	(mulsi3_call): Now uses match_operand for function address.
	(mulsi3): Also emit code for SH1 case.  Wrap result in REG_LIBCALL /
	REG_RETVAL notes.
	(push, pop, push_e, pop_e): Now define_expands.
	(push_fpul, push_4, pop_fpul, pop_4, ic_invalidate_line): New expanders.
	(movsi_ie): Added y/i alternative.
	(ic_invalidate_line_i, movdf_i4): New insns.
	(movdf_i4+[123], reload_outdf+[12345], movsi_y+[12]): New splitters.
	(reload_indf, reload_outdf, reload_outsf, reload_insi): New expanders.
	(movdf): Add special code for SH4.
	(movsf_ie, movsf_ie+1, reload_insf, calli): Make use of fpscr visible.
	(call_valuei, calli, call_value): Likewise.
	(movsf): Emit no-op move.
	(mov_nop, movsi_y): New insns.
	(blt, sge): generalize to handle DFmode.
	(return predicate): Call emit_fpscr_use and remove_dead_before_cse.
	(block_move_real, block_lump_real): Predicate on ! TARGET_HARD_SH4.
	(block_move_real_i4, block_lump_real_i4, fpu_switch): New insns.
	(fpu_switch0, fpu_switch1, movpsi): New expanders.
	(fpu_switch+[12], fix_truncsfsi2_i4_2+1): New splitters.
	(toggle_sz): New insn.
	(addsf3, subsf3, mulsf3, divsf3): Now define_expands.
	(addsf3_i, subsf3_i, mulsf3_i4, mulsf3_ie, divsf3_i): New insns.
	(macsf3): Make use of fpscr visible.  Disable for SH4.
	(floatsisf2): Make use of fpscr visible.
	(floatsisf2_i4): New insn.
	(floatsisf2_ie, fixsfsi, cmpgtsf_t, cmpeqsf_t): Disable for SH4.
	(ieee_ccmpeqsf_t): Likewise.
	(fix_truncsfsi2): Emit different code for SH4.
	(fix_truncsfsi2_i4, fix_truncsfsi2_i4_2, cmpgtsf_t_i4): New insns.
	(cmpeqsf_t_i4, ieee_ccmpeqsf_t_4): New insns.
	(negsf2, sqrtsf2, abssf2): Now expanders.
	(adddf3, subdf3i, muldf2, divdf3, floatsidf2): New expanders.
	(negsf2_i, sqrtsf2_i, abssf2_i, adddf3_i, subdf3_i): New insns.
	(muldf3_i, divdf3_i, floatsidf2_i, fix_truncdfsi2_i): New insns.
	(fix_truncdfsi2, cmpdf, negdf2, sqrtdf2, absdf2): New expanders.
	(fix_truncdfsi2_i4, cmpgtdf_t, cmpeqdf_t, ieee_ccmpeqdf_t): New insns.
	(fix_truncdfsi2_i4_2+1): New splitters.
	(negdf2_i, sqrtdf2_i, absdf2_i, extendsfdf2_i4): New insns.
	(extendsfdf2, truncdfsf2): New expanders.
	(truncdfsf2_i4): New insn.
	* t-sh (LIB1ASMFUNCS): Add _movstr_i4, _sdivsi3_i4, _udivsi3_i4.
	(MULTILIB_OPTIONS): Add m4-single-only/m4-single/m4.
	* float-sh.h: When testing for __SH3E__, also test for
	__SH4_SINGLE_ONLY__ .
	* va-sh.h (__va_freg): Define to float.
	(__va_greg, __fa_freg, __gnuc_va_list, va_start):
        Define for __SH4_SINGLE_ONLY__ like for __SH3E__ .
        (__PASS_AS_FLOAT, __TARGET_SH4_P): Likewise.
	(__PASS_AS_FLOAT): Use different definition for __SH4__ and
	 __SH4_SINGLE__.
	(TARGET_SH4_P): Define.
	(va_arg): Use it.
	* sh.md (movdf_k, movsf_i): Tweak the condition so that
	init_expr_once is satisfied about the existence of load / store insns.
	* sh.md (movsi_i, movsi_ie, movsi_i_lowpart, movsf_i, movsf_ie):
        change m constraint in source operand to mr / mf .
	* va-sh.h (__va_arg_sh1): Use __asm instead of asm.
	* (__VA_REEF): Define.
	(__va_arg_sh1): Use it.
	* va-sh.h (va_start, va_arg, va_copy): Add parenteses.

From-SVN: r23777
1998-11-23 08:50:42 +00:00

1207 lines
21 KiB
NASM

/* Copyright (C) 1994, 1995, 1997 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file with other programs, and to distribute
those programs without any restriction coming from the use of this
file. (The General Public License restrictions do apply in other
respects; for example, they cover modification of the file, and
distribution when not linked into another program.)
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* As a special exception, if you link this library with other files,
some of which are compiled with GCC, to produce an executable,
this library does not by itself cause the resulting executable
to be covered by the GNU General Public License.
This exception does not however invalidate any other reasons why
the executable file might be covered by the GNU General Public License. */
!! libgcc1 routines for the Hitachi SH cpu.
!! Contributed by Steve Chamberlain.
!! sac@cygnus.com
!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
!! recoded in assembly by Toshiyasu Morita
!! tm@netcom.com
/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
ELF local label prefixes by J"orn Rennecke
amylaar@cygnus.com */
#ifdef __ELF__
#define LOCAL(X) .L_##X
#else
#define LOCAL(X) L_##X
#endif
#ifdef L_ashiftrt
.global ___ashiftrt_r4_0
.global ___ashiftrt_r4_1
.global ___ashiftrt_r4_2
.global ___ashiftrt_r4_3
.global ___ashiftrt_r4_4
.global ___ashiftrt_r4_5
.global ___ashiftrt_r4_6
.global ___ashiftrt_r4_7
.global ___ashiftrt_r4_8
.global ___ashiftrt_r4_9
.global ___ashiftrt_r4_10
.global ___ashiftrt_r4_11
.global ___ashiftrt_r4_12
.global ___ashiftrt_r4_13
.global ___ashiftrt_r4_14
.global ___ashiftrt_r4_15
.global ___ashiftrt_r4_16
.global ___ashiftrt_r4_17
.global ___ashiftrt_r4_18
.global ___ashiftrt_r4_19
.global ___ashiftrt_r4_20
.global ___ashiftrt_r4_21
.global ___ashiftrt_r4_22
.global ___ashiftrt_r4_23
.global ___ashiftrt_r4_24
.global ___ashiftrt_r4_25
.global ___ashiftrt_r4_26
.global ___ashiftrt_r4_27
.global ___ashiftrt_r4_28
.global ___ashiftrt_r4_29
.global ___ashiftrt_r4_30
.global ___ashiftrt_r4_31
.global ___ashiftrt_r4_32
.align 1
___ashiftrt_r4_32:
___ashiftrt_r4_31:
rotcl r4
rts
subc r4,r4
___ashiftrt_r4_30:
shar r4
___ashiftrt_r4_29:
shar r4
___ashiftrt_r4_28:
shar r4
___ashiftrt_r4_27:
shar r4
___ashiftrt_r4_26:
shar r4
___ashiftrt_r4_25:
shar r4
___ashiftrt_r4_24:
shlr16 r4
shlr8 r4
rts
exts.b r4,r4
___ashiftrt_r4_23:
shar r4
___ashiftrt_r4_22:
shar r4
___ashiftrt_r4_21:
shar r4
___ashiftrt_r4_20:
shar r4
___ashiftrt_r4_19:
shar r4
___ashiftrt_r4_18:
shar r4
___ashiftrt_r4_17:
shar r4
___ashiftrt_r4_16:
shlr16 r4
rts
exts.w r4,r4
___ashiftrt_r4_15:
shar r4
___ashiftrt_r4_14:
shar r4
___ashiftrt_r4_13:
shar r4
___ashiftrt_r4_12:
shar r4
___ashiftrt_r4_11:
shar r4
___ashiftrt_r4_10:
shar r4
___ashiftrt_r4_9:
shar r4
___ashiftrt_r4_8:
shar r4
___ashiftrt_r4_7:
shar r4
___ashiftrt_r4_6:
shar r4
___ashiftrt_r4_5:
shar r4
___ashiftrt_r4_4:
shar r4
___ashiftrt_r4_3:
shar r4
___ashiftrt_r4_2:
shar r4
___ashiftrt_r4_1:
rts
shar r4
___ashiftrt_r4_0:
rts
nop
#endif
#ifdef L_ashiftrt_n
!
! ___ashrsi3
!
! Entry:
!
! r4: Value to shift
! r5: Shifts
!
! Exit:
!
! r0: Result
!
! Destroys:
!
! (none)
!
.global ___ashrsi3
.align 2
___ashrsi3:
mov #31,r0
and r0,r5
mova LOCAL(ashrsi3_table),r0
mov.b @(r0,r5),r5
#ifdef __sh1__
add r5,r0
jmp @r0
#else
braf r5
#endif
mov r4,r0
.align 2
LOCAL(ashrsi3_table):
.byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
.byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
LOCAL(ashrsi3_31):
rotcl r0
rts
subc r0,r0
LOCAL(ashrsi3_30):
shar r0
LOCAL(ashrsi3_29):
shar r0
LOCAL(ashrsi3_28):
shar r0
LOCAL(ashrsi3_27):
shar r0
LOCAL(ashrsi3_26):
shar r0
LOCAL(ashrsi3_25):
shar r0
LOCAL(ashrsi3_24):
shlr16 r0
shlr8 r0
rts
exts.b r0,r0
LOCAL(ashrsi3_23):
shar r0
LOCAL(ashrsi3_22):
shar r0
LOCAL(ashrsi3_21):
shar r0
LOCAL(ashrsi3_20):
shar r0
LOCAL(ashrsi3_19):
shar r0
LOCAL(ashrsi3_18):
shar r0
LOCAL(ashrsi3_17):
shar r0
LOCAL(ashrsi3_16):
shlr16 r0
rts
exts.w r0,r0
LOCAL(ashrsi3_15):
shar r0
LOCAL(ashrsi3_14):
shar r0
LOCAL(ashrsi3_13):
shar r0
LOCAL(ashrsi3_12):
shar r0
LOCAL(ashrsi3_11):
shar r0
LOCAL(ashrsi3_10):
shar r0
LOCAL(ashrsi3_9):
shar r0
LOCAL(ashrsi3_8):
shar r0
LOCAL(ashrsi3_7):
shar r0
LOCAL(ashrsi3_6):
shar r0
LOCAL(ashrsi3_5):
shar r0
LOCAL(ashrsi3_4):
shar r0
LOCAL(ashrsi3_3):
shar r0
LOCAL(ashrsi3_2):
shar r0
LOCAL(ashrsi3_1):
rts
shar r0
LOCAL(ashrsi3_0):
rts
nop
#endif
#ifdef L_ashiftlt
!
! ___ashlsi3
!
! Entry:
!
! r4: Value to shift
! r5: Shifts
!
! Exit:
!
! r0: Result
!
! Destroys:
!
! (none)
!
.global ___ashlsi3
.align 2
___ashlsi3:
mov #31,r0
and r0,r5
mova LOCAL(ashlsi3_table),r0
mov.b @(r0,r5),r5
#ifdef __sh1__
add r5,r0
jmp @r0
#else
braf r5
#endif
mov r4,r0
.align 2
LOCAL(ashlsi3_table):
.byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
LOCAL(ashlsi3_6):
shll2 r0
LOCAL(ashlsi3_4):
shll2 r0
LOCAL(ashlsi3_2):
rts
shll2 r0
LOCAL(ashlsi3_7):
shll2 r0
LOCAL(ashlsi3_5):
shll2 r0
LOCAL(ashlsi3_3):
shll2 r0
LOCAL(ashlsi3_1):
rts
shll r0
LOCAL(ashlsi3_14):
shll2 r0
LOCAL(ashlsi3_12):
shll2 r0
LOCAL(ashlsi3_10):
shll2 r0
LOCAL(ashlsi3_8):
rts
shll8 r0
LOCAL(ashlsi3_15):
shll2 r0
LOCAL(ashlsi3_13):
shll2 r0
LOCAL(ashlsi3_11):
shll2 r0
LOCAL(ashlsi3_9):
shll8 r0
rts
shll r0
LOCAL(ashlsi3_22):
shll2 r0
LOCAL(ashlsi3_20):
shll2 r0
LOCAL(ashlsi3_18):
shll2 r0
LOCAL(ashlsi3_16):
rts
shll16 r0
LOCAL(ashlsi3_23):
shll2 r0
LOCAL(ashlsi3_21):
shll2 r0
LOCAL(ashlsi3_19):
shll2 r0
LOCAL(ashlsi3_17):
shll16 r0
rts
shll r0
LOCAL(ashlsi3_30):
shll2 r0
LOCAL(ashlsi3_28):
shll2 r0
LOCAL(ashlsi3_26):
shll2 r0
LOCAL(ashlsi3_24):
shll16 r0
rts
shll8 r0
LOCAL(ashlsi3_31):
shll2 r0
LOCAL(ashlsi3_29):
shll2 r0
LOCAL(ashlsi3_27):
shll2 r0
LOCAL(ashlsi3_25):
shll16 r0
shll8 r0
rts
shll r0
LOCAL(ashlsi3_0):
rts
nop
#endif
#ifdef L_lshiftrt
!
! ___lshrsi3
!
! Entry:
!
! r4: Value to shift
! r5: Shifts
!
! Exit:
!
! r0: Result
!
! Destroys:
!
! (none)
!
.global ___lshrsi3
.align 2
___lshrsi3:
mov #31,r0
and r0,r5
mova LOCAL(lshrsi3_table),r0
mov.b @(r0,r5),r5
#ifdef __sh1__
add r5,r0
jmp @r0
#else
braf r5
#endif
mov r4,r0
.align 2
LOCAL(lshrsi3_table):
.byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
LOCAL(lshrsi3_6):
shlr2 r0
LOCAL(lshrsi3_4):
shlr2 r0
LOCAL(lshrsi3_2):
rts
shlr2 r0
LOCAL(lshrsi3_7):
shlr2 r0
LOCAL(lshrsi3_5):
shlr2 r0
LOCAL(lshrsi3_3):
shlr2 r0
LOCAL(lshrsi3_1):
rts
shlr r0
LOCAL(lshrsi3_14):
shlr2 r0
LOCAL(lshrsi3_12):
shlr2 r0
LOCAL(lshrsi3_10):
shlr2 r0
LOCAL(lshrsi3_8):
rts
shlr8 r0
LOCAL(lshrsi3_15):
shlr2 r0
LOCAL(lshrsi3_13):
shlr2 r0
LOCAL(lshrsi3_11):
shlr2 r0
LOCAL(lshrsi3_9):
shlr8 r0
rts
shlr r0
LOCAL(lshrsi3_22):
shlr2 r0
LOCAL(lshrsi3_20):
shlr2 r0
LOCAL(lshrsi3_18):
shlr2 r0
LOCAL(lshrsi3_16):
rts
shlr16 r0
LOCAL(lshrsi3_23):
shlr2 r0
LOCAL(lshrsi3_21):
shlr2 r0
LOCAL(lshrsi3_19):
shlr2 r0
LOCAL(lshrsi3_17):
shlr16 r0
rts
shlr r0
LOCAL(lshrsi3_30):
shlr2 r0
LOCAL(lshrsi3_28):
shlr2 r0
LOCAL(lshrsi3_26):
shlr2 r0
LOCAL(lshrsi3_24):
shlr16 r0
rts
shlr8 r0
LOCAL(lshrsi3_31):
shlr2 r0
LOCAL(lshrsi3_29):
shlr2 r0
LOCAL(lshrsi3_27):
shlr2 r0
LOCAL(lshrsi3_25):
shlr16 r0
shlr8 r0
rts
shlr r0
LOCAL(lshrsi3_0):
rts
nop
#endif
#ifdef L_movstr
.text
! done all the large groups, do the remainder
! jump to movstr+
done:
add #64,r5
mova ___movstrSI0,r0
shll2 r6
add r6,r0
jmp @r0
add #64,r4
.align 4
.global ___movstrSI64
___movstrSI64:
mov.l @(60,r5),r0
mov.l r0,@(60,r4)
.global ___movstrSI60
___movstrSI60:
mov.l @(56,r5),r0
mov.l r0,@(56,r4)
.global ___movstrSI56
___movstrSI56:
mov.l @(52,r5),r0
mov.l r0,@(52,r4)
.global ___movstrSI52
___movstrSI52:
mov.l @(48,r5),r0
mov.l r0,@(48,r4)
.global ___movstrSI48
___movstrSI48:
mov.l @(44,r5),r0
mov.l r0,@(44,r4)
.global ___movstrSI44
___movstrSI44:
mov.l @(40,r5),r0
mov.l r0,@(40,r4)
.global ___movstrSI40
___movstrSI40:
mov.l @(36,r5),r0
mov.l r0,@(36,r4)
.global ___movstrSI36
___movstrSI36:
mov.l @(32,r5),r0
mov.l r0,@(32,r4)
.global ___movstrSI32
___movstrSI32:
mov.l @(28,r5),r0
mov.l r0,@(28,r4)
.global ___movstrSI28
___movstrSI28:
mov.l @(24,r5),r0
mov.l r0,@(24,r4)
.global ___movstrSI24
___movstrSI24:
mov.l @(20,r5),r0
mov.l r0,@(20,r4)
.global ___movstrSI20
___movstrSI20:
mov.l @(16,r5),r0
mov.l r0,@(16,r4)
.global ___movstrSI16
___movstrSI16:
mov.l @(12,r5),r0
mov.l r0,@(12,r4)
.global ___movstrSI12
___movstrSI12:
mov.l @(8,r5),r0
mov.l r0,@(8,r4)
.global ___movstrSI8
___movstrSI8:
mov.l @(4,r5),r0
mov.l r0,@(4,r4)
.global ___movstrSI4
___movstrSI4:
mov.l @(0,r5),r0
mov.l r0,@(0,r4)
___movstrSI0:
rts
or r0,r0,r0
.align 4
.global ___movstr
___movstr:
mov.l @(60,r5),r0
mov.l r0,@(60,r4)
mov.l @(56,r5),r0
mov.l r0,@(56,r4)
mov.l @(52,r5),r0
mov.l r0,@(52,r4)
mov.l @(48,r5),r0
mov.l r0,@(48,r4)
mov.l @(44,r5),r0
mov.l r0,@(44,r4)
mov.l @(40,r5),r0
mov.l r0,@(40,r4)
mov.l @(36,r5),r0
mov.l r0,@(36,r4)
mov.l @(32,r5),r0
mov.l r0,@(32,r4)
mov.l @(28,r5),r0
mov.l r0,@(28,r4)
mov.l @(24,r5),r0
mov.l r0,@(24,r4)
mov.l @(20,r5),r0
mov.l r0,@(20,r4)
mov.l @(16,r5),r0
mov.l r0,@(16,r4)
mov.l @(12,r5),r0
mov.l r0,@(12,r4)
mov.l @(8,r5),r0
mov.l r0,@(8,r4)
mov.l @(4,r5),r0
mov.l r0,@(4,r4)
mov.l @(0,r5),r0
mov.l r0,@(0,r4)
add #-16,r6
cmp/pl r6
bf done
add #64,r5
bra ___movstr
add #64,r4
#endif
#ifdef L_movstr_i4
#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
.text
.global ___movstr_i4_even
.global ___movstr_i4_odd
.global ___movstrSI12_i4
.p2align 5
L_movstr_2mod4_end:
mov.l r0,@(16,r4)
rts
mov.l r1,@(20,r4)
.p2align 2
___movstr_i4_odd:
mov.l @r5+,r1
add #-4,r4
mov.l @r5+,r2
mov.l @r5+,r3
mov.l r1,@(4,r4)
mov.l r2,@(8,r4)
L_movstr_loop:
mov.l r3,@(12,r4)
dt r6
mov.l @r5+,r0
bt/s L_movstr_2mod4_end
mov.l @r5+,r1
add #16,r4
L_movstr_start_even:
mov.l @r5+,r2
mov.l @r5+,r3
mov.l r0,@r4
dt r6
mov.l r1,@(4,r4)
bf/s L_movstr_loop
mov.l r2,@(8,r4)
rts
mov.l r3,@(12,r4)
___movstr_i4_even:
mov.l @r5+,r0
bra L_movstr_start_even
mov.l @r5+,r1
.p2align 4
___movstrSI12_i4:
mov.l @r5,r0
mov.l @(4,r5),r1
mov.l @(8,r5),r2
mov.l r0,@r4
mov.l r1,@(4,r4)
rts
mov.l r2,@(8,r4)
#endif /* ! __SH4__ */
#endif
#ifdef L_mulsi3
.global ___mulsi3
! r4 = aabb
! r5 = ccdd
! r0 = aabb*ccdd via partial products
!
! if aa == 0 and cc = 0
! r0 = bb*dd
!
! else
! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
!
___mulsi3:
mulu r4,r5 ! multiply the lsws macl=bb*dd
mov r5,r3 ! r3 = ccdd
swap.w r4,r2 ! r2 = bbaa
xtrct r2,r3 ! r3 = aacc
tst r3,r3 ! msws zero ?
bf hiset
rts ! yes - then we have the answer
sts macl,r0
hiset: sts macl,r0 ! r0 = bb*dd
mulu r2,r5 | brewing macl = aa*dd
sts macl,r1
mulu r3,r4 | brewing macl = cc*bb
sts macl,r2
add r1,r2
shll16 r2
rts
add r2,r0
#endif
#ifdef L_sdivsi3_i4
.title "SH DIVIDE"
!! 4 byte integer Divide code for the Hitachi SH
#ifdef __SH4__
!! args in r4 and r5, result in fpul, clobber dr0, dr2
.global ___sdivsi3_i4
___sdivsi3_i4:
lds r4,fpul
float fpul,dr0
lds r5,fpul
float fpul,dr2
fdiv dr2,dr0
rts
ftrc dr0,fpul
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
.global ___sdivsi3_i4
___sdivsi3_i4:
sts.l fpscr,@-r15
mov #8,r2
swap.w r2,r2
lds r2,fpscr
lds r4,fpul
float fpul,dr0
lds r5,fpul
float fpul,dr2
fdiv dr2,dr0
ftrc dr0,fpul
rts
lds.l @r15+,fpscr
#endif /* ! __SH4__ */
#endif
#ifdef L_sdivsi3
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
sh3e code. */
#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
!!
!! Steve Chamberlain
!! sac@cygnus.com
!!
!!
!! args in r4 and r5, result in r0 clobber r1,r2,r3
.global ___sdivsi3
___sdivsi3:
mov r4,r1
mov r5,r0
tst r0,r0
bt div0
mov #0,r2
div0s r2,r1
subc r3,r3
subc r2,r1
div0s r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
div1 r0,r3
rotcl r1
addc r2,r1
rts
mov r1,r0
div0: rts
mov #0,r0
#endif /* ! __SH4__ */
#endif
#ifdef L_udivsi3_i4
.title "SH DIVIDE"
!! 4 byte integer Divide code for the Hitachi SH
#ifdef __SH4__
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
.global ___udivsi3_i4
___udivsi3_i4:
mov #1,r1
cmp/hi r1,r5
bf trivial
rotr r1
xor r1,r4
lds r4,fpul
mova L1,r0
#ifdef FMOVD_WORKS
fmov.d @r0+,dr4
#else
#ifdef __LITTLE_ENDIAN__
fmov.s @r0+,fr5
fmov.s @r0,fr4
#else
fmov.s @r0+,fr4
fmov.s @r0,fr5
#endif
#endif
float fpul,dr0
xor r1,r5
lds r5,fpul
float fpul,dr2
fadd dr4,dr0
fadd dr4,dr2
fdiv dr2,dr0
rts
ftrc dr0,fpul
trivial:
rts
lds r4,fpul
.align 2
L1:
.double 2147483648
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
.global ___udivsi3_i4
___udivsi3_i4:
mov #1,r1
cmp/hi r1,r5
bf trivial
sts.l fpscr,@-r15
mova L1,r0
lds.l @r0+,fpscr
rotr r1
xor r1,r4
lds r4,fpul
#ifdef FMOVD_WORKS
fmov.d @r0+,dr4
#else
#ifdef __LITTLE_ENDIAN__
fmov.s @r0+,fr5
fmov.s @r0,fr4
#else
fmov.s @r0+,fr4
fmov.s @r0,fr5
#endif
#endif
float fpul,dr0
xor r1,r5
lds r5,fpul
float fpul,dr2
fadd dr4,dr0
fadd dr4,dr2
fdiv dr2,dr0
ftrc dr0,fpul
rts
lds.l @r15+,fpscr
trivial:
rts
lds r4,fpul
.align 2
L1:
#ifdef __LITTLE_ENDIAN__
.long 0x80000
#else
.long 0x180000
#endif
.double 2147483648
#endif /* ! __SH4__ */
#endif
#ifdef L_udivsi3
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
sh3e code. */
#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
!!
!! Steve Chamberlain
!! sac@cygnus.com
!!
!!
!! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
.global ___udivsi3
___udivsi3:
longway:
mov #0,r0
div0u
! get one bit from the msb of the numerator into the T
! bit and divide it by whats in r5. Put the answer bit
! into the T bit so it can come out again at the bottom
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
shortway:
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
vshortway:
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4 ; div1 r5,r0
rotcl r4
ret: rts
mov r4,r0
#endif /* __SH4__ */
#endif
#ifdef L_set_fpscr
#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
.global ___set_fpscr
___set_fpscr:
lds r4,fpscr
mov.l ___set_fpscr_L1,r1
swap.w r4,r0
or #24,r0
#ifndef FMOVD_WORKS
xor #16,r0
#endif
#if defined(__SH4__)
swap.w r0,r3
mov.l r3,@(4,r1)
#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
swap.w r0,r2
mov.l r2,@r1
#endif
#ifndef FMOVD_WORKS
xor #8,r0
#else
xor #24,r0
#endif
#if defined(__SH4__)
swap.w r0,r2
rts
mov.l r2,@r1
#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
swap.w r0,r3
rts
mov.l r3,@(4,r1)
#endif
.align 2
___set_fpscr_L1:
.long ___fpscr_values
#ifdef __ELF__
.comm ___fpscr_values,8,4
#else
.comm ___fpscr_values,8
#endif /* ELF */
#endif /* SH3E / SH4 */
#endif /* L_set_fpscr */