/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. In addition to the permissions in the GNU General Public License, the Free Software Foundation gives you unlimited permission to link the compiled version of this file into combinations with other programs, and to distribute those combinations without any restriction coming from the use of this file. (The General Public License restrictions do apply in other respects; for example, they cover modification of the file, and distribution when not linked into a combine executable.) This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ !! libgcc1 routines for the Hitachi SH cpu. !! Contributed by Steve Chamberlain. !! sac@cygnus.com !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines !! recoded in assembly by Toshiyasu Morita !! tm@netcom.com /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and ELF local label prefixes by J"orn Rennecke amylaar@cygnus.com */ #ifdef __ELF__ #define LOCAL(X) .L_##X #else #define LOCAL(X) L_##X #endif #ifdef __linux__ #define GLOBAL(X) __##X #endif #ifndef GLOBAL #define GLOBAL(X) ___##X #endif #ifdef L_ashiftrt .global GLOBAL(ashiftrt_r4_0) .global GLOBAL(ashiftrt_r4_1) .global GLOBAL(ashiftrt_r4_2) .global GLOBAL(ashiftrt_r4_3) .global GLOBAL(ashiftrt_r4_4) .global GLOBAL(ashiftrt_r4_5) .global GLOBAL(ashiftrt_r4_6) .global GLOBAL(ashiftrt_r4_7) .global GLOBAL(ashiftrt_r4_8) .global GLOBAL(ashiftrt_r4_9) .global GLOBAL(ashiftrt_r4_10) .global GLOBAL(ashiftrt_r4_11) .global GLOBAL(ashiftrt_r4_12) .global GLOBAL(ashiftrt_r4_13) .global GLOBAL(ashiftrt_r4_14) .global GLOBAL(ashiftrt_r4_15) .global GLOBAL(ashiftrt_r4_16) .global GLOBAL(ashiftrt_r4_17) .global GLOBAL(ashiftrt_r4_18) .global GLOBAL(ashiftrt_r4_19) .global GLOBAL(ashiftrt_r4_20) .global GLOBAL(ashiftrt_r4_21) .global GLOBAL(ashiftrt_r4_22) .global GLOBAL(ashiftrt_r4_23) .global GLOBAL(ashiftrt_r4_24) .global GLOBAL(ashiftrt_r4_25) .global GLOBAL(ashiftrt_r4_26) .global GLOBAL(ashiftrt_r4_27) .global GLOBAL(ashiftrt_r4_28) .global GLOBAL(ashiftrt_r4_29) .global GLOBAL(ashiftrt_r4_30) .global GLOBAL(ashiftrt_r4_31) .global GLOBAL(ashiftrt_r4_32) .align 1 GLOBAL(ashiftrt_r4_32): GLOBAL(ashiftrt_r4_31): rotcl r4 rts subc r4,r4 GLOBAL(ashiftrt_r4_30): shar r4 GLOBAL(ashiftrt_r4_29): shar r4 GLOBAL(ashiftrt_r4_28): shar r4 GLOBAL(ashiftrt_r4_27): shar r4 GLOBAL(ashiftrt_r4_26): shar r4 GLOBAL(ashiftrt_r4_25): shar r4 GLOBAL(ashiftrt_r4_24): shlr16 r4 shlr8 r4 rts exts.b r4,r4 GLOBAL(ashiftrt_r4_23): shar r4 GLOBAL(ashiftrt_r4_22): shar r4 GLOBAL(ashiftrt_r4_21): shar r4 GLOBAL(ashiftrt_r4_20): shar r4 GLOBAL(ashiftrt_r4_19): shar r4 GLOBAL(ashiftrt_r4_18): shar r4 GLOBAL(ashiftrt_r4_17): shar r4 GLOBAL(ashiftrt_r4_16): shlr16 r4 rts exts.w r4,r4 GLOBAL(ashiftrt_r4_15): shar r4 GLOBAL(ashiftrt_r4_14): shar r4 GLOBAL(ashiftrt_r4_13): shar r4 GLOBAL(ashiftrt_r4_12): shar r4 GLOBAL(ashiftrt_r4_11): shar r4 GLOBAL(ashiftrt_r4_10): shar r4 GLOBAL(ashiftrt_r4_9): shar r4 GLOBAL(ashiftrt_r4_8): shar r4 GLOBAL(ashiftrt_r4_7): shar r4 GLOBAL(ashiftrt_r4_6): shar r4 GLOBAL(ashiftrt_r4_5): shar r4 GLOBAL(ashiftrt_r4_4): shar r4 GLOBAL(ashiftrt_r4_3): shar r4 GLOBAL(ashiftrt_r4_2): shar r4 GLOBAL(ashiftrt_r4_1): rts shar r4 GLOBAL(ashiftrt_r4_0): rts nop #endif #ifdef L_ashiftrt_n ! ! GLOBAL(ashrsi3) ! ! Entry: ! ! r4: Value to shift ! r5: Shifts ! ! Exit: ! ! r0: Result ! ! Destroys: ! ! (none) ! .global GLOBAL(ashrsi3) .align 2 GLOBAL(ashrsi3): mov #31,r0 and r0,r5 mova LOCAL(ashrsi3_table),r0 mov.b @(r0,r5),r5 #ifdef __sh1__ add r5,r0 jmp @r0 #else braf r5 #endif mov r4,r0 .align 2 LOCAL(ashrsi3_table): .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) LOCAL(ashrsi3_31): rotcl r0 rts subc r0,r0 LOCAL(ashrsi3_30): shar r0 LOCAL(ashrsi3_29): shar r0 LOCAL(ashrsi3_28): shar r0 LOCAL(ashrsi3_27): shar r0 LOCAL(ashrsi3_26): shar r0 LOCAL(ashrsi3_25): shar r0 LOCAL(ashrsi3_24): shlr16 r0 shlr8 r0 rts exts.b r0,r0 LOCAL(ashrsi3_23): shar r0 LOCAL(ashrsi3_22): shar r0 LOCAL(ashrsi3_21): shar r0 LOCAL(ashrsi3_20): shar r0 LOCAL(ashrsi3_19): shar r0 LOCAL(ashrsi3_18): shar r0 LOCAL(ashrsi3_17): shar r0 LOCAL(ashrsi3_16): shlr16 r0 rts exts.w r0,r0 LOCAL(ashrsi3_15): shar r0 LOCAL(ashrsi3_14): shar r0 LOCAL(ashrsi3_13): shar r0 LOCAL(ashrsi3_12): shar r0 LOCAL(ashrsi3_11): shar r0 LOCAL(ashrsi3_10): shar r0 LOCAL(ashrsi3_9): shar r0 LOCAL(ashrsi3_8): shar r0 LOCAL(ashrsi3_7): shar r0 LOCAL(ashrsi3_6): shar r0 LOCAL(ashrsi3_5): shar r0 LOCAL(ashrsi3_4): shar r0 LOCAL(ashrsi3_3): shar r0 LOCAL(ashrsi3_2): shar r0 LOCAL(ashrsi3_1): rts shar r0 LOCAL(ashrsi3_0): rts nop #endif #ifdef L_ashiftlt ! ! GLOBAL(ashlsi3) ! ! Entry: ! ! r4: Value to shift ! r5: Shifts ! ! Exit: ! ! r0: Result ! ! Destroys: ! ! (none) ! .global GLOBAL(ashlsi3) .align 2 GLOBAL(ashlsi3): mov #31,r0 and r0,r5 mova LOCAL(ashlsi3_table),r0 mov.b @(r0,r5),r5 #ifdef __sh1__ add r5,r0 jmp @r0 #else braf r5 #endif mov r4,r0 .align 2 LOCAL(ashlsi3_table): .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) LOCAL(ashlsi3_6): shll2 r0 LOCAL(ashlsi3_4): shll2 r0 LOCAL(ashlsi3_2): rts shll2 r0 LOCAL(ashlsi3_7): shll2 r0 LOCAL(ashlsi3_5): shll2 r0 LOCAL(ashlsi3_3): shll2 r0 LOCAL(ashlsi3_1): rts shll r0 LOCAL(ashlsi3_14): shll2 r0 LOCAL(ashlsi3_12): shll2 r0 LOCAL(ashlsi3_10): shll2 r0 LOCAL(ashlsi3_8): rts shll8 r0 LOCAL(ashlsi3_15): shll2 r0 LOCAL(ashlsi3_13): shll2 r0 LOCAL(ashlsi3_11): shll2 r0 LOCAL(ashlsi3_9): shll8 r0 rts shll r0 LOCAL(ashlsi3_22): shll2 r0 LOCAL(ashlsi3_20): shll2 r0 LOCAL(ashlsi3_18): shll2 r0 LOCAL(ashlsi3_16): rts shll16 r0 LOCAL(ashlsi3_23): shll2 r0 LOCAL(ashlsi3_21): shll2 r0 LOCAL(ashlsi3_19): shll2 r0 LOCAL(ashlsi3_17): shll16 r0 rts shll r0 LOCAL(ashlsi3_30): shll2 r0 LOCAL(ashlsi3_28): shll2 r0 LOCAL(ashlsi3_26): shll2 r0 LOCAL(ashlsi3_24): shll16 r0 rts shll8 r0 LOCAL(ashlsi3_31): shll2 r0 LOCAL(ashlsi3_29): shll2 r0 LOCAL(ashlsi3_27): shll2 r0 LOCAL(ashlsi3_25): shll16 r0 shll8 r0 rts shll r0 LOCAL(ashlsi3_0): rts nop #endif #ifdef L_lshiftrt ! ! GLOBAL(lshrsi3) ! ! Entry: ! ! r4: Value to shift ! r5: Shifts ! ! Exit: ! ! r0: Result ! ! Destroys: ! ! (none) ! .global GLOBAL(lshrsi3) .align 2 GLOBAL(lshrsi3): mov #31,r0 and r0,r5 mova LOCAL(lshrsi3_table),r0 mov.b @(r0,r5),r5 #ifdef __sh1__ add r5,r0 jmp @r0 #else braf r5 #endif mov r4,r0 .align 2 LOCAL(lshrsi3_table): .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) LOCAL(lshrsi3_6): shlr2 r0 LOCAL(lshrsi3_4): shlr2 r0 LOCAL(lshrsi3_2): rts shlr2 r0 LOCAL(lshrsi3_7): shlr2 r0 LOCAL(lshrsi3_5): shlr2 r0 LOCAL(lshrsi3_3): shlr2 r0 LOCAL(lshrsi3_1): rts shlr r0 LOCAL(lshrsi3_14): shlr2 r0 LOCAL(lshrsi3_12): shlr2 r0 LOCAL(lshrsi3_10): shlr2 r0 LOCAL(lshrsi3_8): rts shlr8 r0 LOCAL(lshrsi3_15): shlr2 r0 LOCAL(lshrsi3_13): shlr2 r0 LOCAL(lshrsi3_11): shlr2 r0 LOCAL(lshrsi3_9): shlr8 r0 rts shlr r0 LOCAL(lshrsi3_22): shlr2 r0 LOCAL(lshrsi3_20): shlr2 r0 LOCAL(lshrsi3_18): shlr2 r0 LOCAL(lshrsi3_16): rts shlr16 r0 LOCAL(lshrsi3_23): shlr2 r0 LOCAL(lshrsi3_21): shlr2 r0 LOCAL(lshrsi3_19): shlr2 r0 LOCAL(lshrsi3_17): shlr16 r0 rts shlr r0 LOCAL(lshrsi3_30): shlr2 r0 LOCAL(lshrsi3_28): shlr2 r0 LOCAL(lshrsi3_26): shlr2 r0 LOCAL(lshrsi3_24): shlr16 r0 rts shlr8 r0 LOCAL(lshrsi3_31): shlr2 r0 LOCAL(lshrsi3_29): shlr2 r0 LOCAL(lshrsi3_27): shlr2 r0 LOCAL(lshrsi3_25): shlr16 r0 shlr8 r0 rts shlr r0 LOCAL(lshrsi3_0): rts nop #endif #ifdef L_movstr .text ! done all the large groups, do the remainder ! jump to movstr+ done: add #64,r5 mova GLOBAL(movstrSI0),r0 shll2 r6 add r6,r0 jmp @r0 add #64,r4 .align 4 .global GLOBAL(movstrSI64) GLOBAL(movstrSI64): mov.l @(60,r5),r0 mov.l r0,@(60,r4) .global GLOBAL(movstrSI60) GLOBAL(movstrSI60): mov.l @(56,r5),r0 mov.l r0,@(56,r4) .global GLOBAL(movstrSI56) GLOBAL(movstrSI56): mov.l @(52,r5),r0 mov.l r0,@(52,r4) .global GLOBAL(movstrSI52) GLOBAL(movstrSI52): mov.l @(48,r5),r0 mov.l r0,@(48,r4) .global GLOBAL(movstrSI48) GLOBAL(movstrSI48): mov.l @(44,r5),r0 mov.l r0,@(44,r4) .global GLOBAL(movstrSI44) GLOBAL(movstrSI44): mov.l @(40,r5),r0 mov.l r0,@(40,r4) .global GLOBAL(movstrSI40) GLOBAL(movstrSI40): mov.l @(36,r5),r0 mov.l r0,@(36,r4) .global GLOBAL(movstrSI36) GLOBAL(movstrSI36): mov.l @(32,r5),r0 mov.l r0,@(32,r4) .global GLOBAL(movstrSI32) GLOBAL(movstrSI32): mov.l @(28,r5),r0 mov.l r0,@(28,r4) .global GLOBAL(movstrSI28) GLOBAL(movstrSI28): mov.l @(24,r5),r0 mov.l r0,@(24,r4) .global GLOBAL(movstrSI24) GLOBAL(movstrSI24): mov.l @(20,r5),r0 mov.l r0,@(20,r4) .global GLOBAL(movstrSI20) GLOBAL(movstrSI20): mov.l @(16,r5),r0 mov.l r0,@(16,r4) .global GLOBAL(movstrSI16) GLOBAL(movstrSI16): mov.l @(12,r5),r0 mov.l r0,@(12,r4) .global GLOBAL(movstrSI12) GLOBAL(movstrSI12): mov.l @(8,r5),r0 mov.l r0,@(8,r4) .global GLOBAL(movstrSI8) GLOBAL(movstrSI8): mov.l @(4,r5),r0 mov.l r0,@(4,r4) .global GLOBAL(movstrSI4) GLOBAL(movstrSI4): mov.l @(0,r5),r0 mov.l r0,@(0,r4) GLOBAL(movstrSI0): rts nop .align 4 .global GLOBAL(movstr) GLOBAL(movstr): mov.l @(60,r5),r0 mov.l r0,@(60,r4) mov.l @(56,r5),r0 mov.l r0,@(56,r4) mov.l @(52,r5),r0 mov.l r0,@(52,r4) mov.l @(48,r5),r0 mov.l r0,@(48,r4) mov.l @(44,r5),r0 mov.l r0,@(44,r4) mov.l @(40,r5),r0 mov.l r0,@(40,r4) mov.l @(36,r5),r0 mov.l r0,@(36,r4) mov.l @(32,r5),r0 mov.l r0,@(32,r4) mov.l @(28,r5),r0 mov.l r0,@(28,r4) mov.l @(24,r5),r0 mov.l r0,@(24,r4) mov.l @(20,r5),r0 mov.l r0,@(20,r4) mov.l @(16,r5),r0 mov.l r0,@(16,r4) mov.l @(12,r5),r0 mov.l r0,@(12,r4) mov.l @(8,r5),r0 mov.l r0,@(8,r4) mov.l @(4,r5),r0 mov.l r0,@(4,r4) mov.l @(0,r5),r0 mov.l r0,@(0,r4) add #-16,r6 cmp/pl r6 bf done add #64,r5 bra GLOBAL(movstr) add #64,r4 #endif #ifdef L_movstr_i4 .text .global GLOBAL(movstr_i4_even) .global GLOBAL(movstr_i4_odd) .global GLOBAL(movstrSI12_i4) .p2align 5 L_movstr_2mod4_end: mov.l r0,@(16,r4) rts mov.l r1,@(20,r4) .p2align 2 GLOBAL(movstr_i4_odd): mov.l @r5+,r1 add #-4,r4 mov.l @r5+,r2 mov.l @r5+,r3 mov.l r1,@(4,r4) mov.l r2,@(8,r4) L_movstr_loop: mov.l r3,@(12,r4) dt r6 mov.l @r5+,r0 bt/s L_movstr_2mod4_end mov.l @r5+,r1 add #16,r4 L_movstr_start_even: mov.l @r5+,r2 mov.l @r5+,r3 mov.l r0,@r4 dt r6 mov.l r1,@(4,r4) bf/s L_movstr_loop mov.l r2,@(8,r4) rts mov.l r3,@(12,r4) GLOBAL(movstr_i4_even): mov.l @r5+,r0 bra L_movstr_start_even mov.l @r5+,r1 .p2align 4 GLOBAL(movstrSI12_i4): mov.l @r5,r0 mov.l @(4,r5),r1 mov.l @(8,r5),r2 mov.l r0,@r4 mov.l r1,@(4,r4) rts mov.l r2,@(8,r4) #endif #ifdef L_mulsi3 .global GLOBAL(mulsi3) ! r4 = aabb ! r5 = ccdd ! r0 = aabb*ccdd via partial products ! ! if aa == 0 and cc = 0 ! r0 = bb*dd ! ! else ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) ! GLOBAL(mulsi3): mulu r4,r5 ! multiply the lsws macl=bb*dd mov r5,r3 ! r3 = ccdd swap.w r4,r2 ! r2 = bbaa xtrct r2,r3 ! r3 = aacc tst r3,r3 ! msws zero ? bf hiset rts ! yes - then we have the answer sts macl,r0 hiset: sts macl,r0 ! r0 = bb*dd mulu r2,r5 ! brewing macl = aa*dd sts macl,r1 mulu r3,r4 ! brewing macl = cc*bb sts macl,r2 add r1,r2 shll16 r2 rts add r2,r0 #endif #ifdef L_sdivsi3_i4 .title "SH DIVIDE" !! 4 byte integer Divide code for the Hitachi SH #ifdef __SH4__ !! args in r4 and r5, result in fpul, clobber dr0, dr2 .global GLOBAL(sdivsi3_i4) GLOBAL(sdivsi3_i4): lds r4,fpul float fpul,dr0 lds r5,fpul float fpul,dr2 fdiv dr2,dr0 rts ftrc dr0,fpul #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 .global GLOBAL(sdivsi3_i4) GLOBAL(sdivsi3_i4): sts.l fpscr,@-r15 mov #8,r2 swap.w r2,r2 lds r2,fpscr lds r4,fpul float fpul,dr0 lds r5,fpul float fpul,dr2 fdiv dr2,dr0 ftrc dr0,fpul rts lds.l @r15+,fpscr #endif /* ! __SH4__ */ #endif #ifdef L_sdivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with sh3e code. */ #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) !! !! Steve Chamberlain !! sac@cygnus.com !! !! !! args in r4 and r5, result in r0 clobber r1,r2,r3 .global GLOBAL(sdivsi3) GLOBAL(sdivsi3): mov r4,r1 mov r5,r0 tst r0,r0 bt div0 mov #0,r2 div0s r2,r1 subc r3,r3 subc r2,r1 div0s r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 div1 r0,r3 rotcl r1 addc r2,r1 rts mov r1,r0 div0: rts mov #0,r0 #endif /* ! __SH4__ */ #endif #ifdef L_udivsi3_i4 .title "SH DIVIDE" !! 4 byte integer Divide code for the Hitachi SH #ifdef __SH4__ !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 .global GLOBAL(udivsi3_i4) GLOBAL(udivsi3_i4): mov #1,r1 cmp/hi r1,r5 bf trivial rotr r1 xor r1,r4 lds r4,fpul mova L1,r0 #ifdef FMOVD_WORKS fmov.d @r0+,dr4 #else #ifdef __LITTLE_ENDIAN__ fmov.s @r0+,fr5 fmov.s @r0,fr4 #else fmov.s @r0+,fr4 fmov.s @r0,fr5 #endif #endif float fpul,dr0 xor r1,r5 lds r5,fpul float fpul,dr2 fadd dr4,dr0 fadd dr4,dr2 fdiv dr2,dr0 rts ftrc dr0,fpul trivial: rts lds r4,fpul .align 2 #ifdef FMOVD_WORKS .align 3 ! make double below 8 byte aligned. #endif L1: .double 2147483648 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 .global GLOBAL(udivsi3_i4) GLOBAL(udivsi3_i4): mov #1,r1 cmp/hi r1,r5 bf trivial sts.l fpscr,@-r15 mova L1,r0 lds.l @r0+,fpscr rotr r1 xor r1,r4 lds r4,fpul #ifdef FMOVD_WORKS fmov.d @r0+,dr4 #else #ifdef __LITTLE_ENDIAN__ fmov.s @r0+,fr5 fmov.s @r0,fr4 #else fmov.s @r0+,fr4 fmov.s @r0,fr5 #endif #endif float fpul,dr0 xor r1,r5 lds r5,fpul float fpul,dr2 fadd dr4,dr0 fadd dr4,dr2 fdiv dr2,dr0 ftrc dr0,fpul rts lds.l @r15+,fpscr #ifdef FMOVD_WORKS .align 3 ! make double below 8 byte aligned. #endif trivial: rts lds r4,fpul .align 2 L1: #ifndef FMOVD_WORKS .long 0x80000 #else .long 0x180000 #endif .double 2147483648 #endif /* ! __SH4__ */ #endif #ifdef L_udivsi3 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with sh3e code. */ #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) !! !! Steve Chamberlain !! sac@cygnus.com !! !! !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit .global GLOBAL(udivsi3) GLOBAL(udivsi3): longway: mov #0,r0 div0u ! get one bit from the msb of the numerator into the T ! bit and divide it by whats in r5. Put the answer bit ! into the T bit so it can come out again at the bottom rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 shortway: rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 vshortway: rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ; div1 r5,r0 rotcl r4 ret: rts mov r4,r0 #endif /* __SH4__ */ #endif #ifdef L_set_fpscr #if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) .global GLOBAL(set_fpscr) GLOBAL(set_fpscr): lds r4,fpscr mov.l LOCAL(set_fpscr_L1),r1 swap.w r4,r0 or #24,r0 #ifndef FMOVD_WORKS xor #16,r0 #endif #if defined(__SH4__) swap.w r0,r3 mov.l r3,@(4,r1) #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ swap.w r0,r2 mov.l r2,@r1 #endif #ifndef FMOVD_WORKS xor #8,r0 #else xor #24,r0 #endif #if defined(__SH4__) swap.w r0,r2 rts mov.l r2,@r1 #else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */ swap.w r0,r3 rts mov.l r3,@(4,r1) #endif .align 2 LOCAL(set_fpscr_L1): .long GLOBAL(fpscr_values) #ifdef __ELF__ .comm GLOBAL(fpscr_values),8,4 #else .comm GLOBAL(fpscr_values),8 #endif /* ELF */ #endif /* SH3E / SH4 */ #endif /* L_set_fpscr */ #ifdef L_ic_invalidate #if defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) .global GLOBAL(ic_invalidate) GLOBAL(ic_invalidate): ocbwb @r4 mova 0f,r0 mov.w 1f,r1 sub r0,r4 and r1,r4 add #4,r4 braf r4 nop 1: .short 0x1fe0 nop 0: .rept 2048 rts nop .endr #endif /* SH4 */ #endif /* L_ic_invalidate */