1998-07-20 09:35:38 -04:00
|
|
|
/* Multibyte Character Functions.
|
|
|
|
Copyright (C) 1998 Free Software Foundation, Inc.
|
|
|
|
|
Makefile.in, [...]: replace "GNU CC" with "GCC".
* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
bitmap.h, builtin-types.def, builtins.c, builtins.def,
c-aux-info.c, c-common.c, c-common.def, c-common.h,
c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
conditions.h, config.gcc, configure.frag, configure.in,
conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
emit-rtl.c, errors.c, errors.h, except.c, except.h,
exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
gencheck.c, gencodes.c, genconfig.c, genemit.c,
genextract.c, genflags.c, gengenrtl.c, genmultilib,
genopinit.c, genoutput.c, genpeep.c, genrecog.c,
gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
params.h, predict.c, predict.def, predict.h, prefix.c,
prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
regclass.c, regmove.c, regrename.c, regs.h, reload.c,
reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
xcoffout.h: replace "GNU CC" with "GCC".
From-SVN: r45105
2001-08-22 10:35:51 -04:00
|
|
|
This file is part of GCC.
|
1998-07-20 09:35:38 -04:00
|
|
|
|
Makefile.in, [...]: replace "GNU CC" with "GCC".
* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
bitmap.h, builtin-types.def, builtins.c, builtins.def,
c-aux-info.c, c-common.c, c-common.def, c-common.h,
c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
conditions.h, config.gcc, configure.frag, configure.in,
conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
emit-rtl.c, errors.c, errors.h, except.c, except.h,
exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
gencheck.c, gencodes.c, genconfig.c, genemit.c,
genextract.c, genflags.c, gengenrtl.c, genmultilib,
genopinit.c, genoutput.c, genpeep.c, genrecog.c,
gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
params.h, predict.c, predict.def, predict.h, prefix.c,
prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
regclass.c, regmove.c, regrename.c, regs.h, reload.c,
reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
xcoffout.h: replace "GNU CC" with "GCC".
From-SVN: r45105
2001-08-22 10:35:51 -04:00
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free
|
|
|
|
Software Foundation; either version 2, or (at your option) any later
|
|
|
|
version.
|
1998-07-20 09:35:38 -04:00
|
|
|
|
Makefile.in, [...]: replace "GNU CC" with "GCC".
* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
bitmap.h, builtin-types.def, builtins.c, builtins.def,
c-aux-info.c, c-common.c, c-common.def, c-common.h,
c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
conditions.h, config.gcc, configure.frag, configure.in,
conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
emit-rtl.c, errors.c, errors.h, except.c, except.h,
exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
gencheck.c, gencodes.c, genconfig.c, genemit.c,
genextract.c, genflags.c, gengenrtl.c, genmultilib,
genopinit.c, genoutput.c, genpeep.c, genrecog.c,
gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
params.h, predict.c, predict.def, predict.h, prefix.c,
prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
regclass.c, regmove.c, regrename.c, regs.h, reload.c,
reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
xcoffout.h: replace "GNU CC" with "GCC".
From-SVN: r45105
2001-08-22 10:35:51 -04:00
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
|
|
for more details.
|
1998-07-20 09:35:38 -04:00
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
Makefile.in, [...]: replace "GNU CC" with "GCC".
* Makefile.in, alias.c, basic-block.h, bb-reorder.c, bitmap.c,
bitmap.h, builtin-types.def, builtins.c, builtins.def,
c-aux-info.c, c-common.c, c-common.def, c-common.h,
c-convert.c, c-decl.c, c-dump.c, c-dump.h, c-errors.c,
c-format.c, c-lang.c, c-lex.c, c-lex.h, c-parse.in,
c-pragma.c, c-pragma.h, c-semantics.c, c-tree.h, c-typeck.c,
caller-save.c, calls.c, collect2.c, collect2.h, combine.c,
conditions.h, config.gcc, configure.frag, configure.in,
conflict.c, convert.c, convert.h, cppspec.c, crtstuff.c,
cse.c, cselib.c, cselib.h, dbxout.c, dbxout.h, defaults.h,
dependence.c, df.c, df.h, diagnostic.c, diagnostic.h,
doloop.c, dominance.c, dwarf.h, dwarf2.h, dwarf2asm.c,
dwarf2asm.h, dwarf2out.c, dwarf2out.h, dwarfout.c,
emit-rtl.c, errors.c, errors.h, except.c, except.h,
exgettext, explow.c, expmed.c, expr.c, expr.h, final.c,
fixproto, flags.h, flow.c, fold-const.c, fp-test.c,
function.c, function.h, gbl-ctors.h, gcc.c, gcc.h, gcc.hlp,
gccspec.c, gcov-io.h, gcse.c, genattr.c, genattrtab.c,
gencheck.c, gencodes.c, genconfig.c, genemit.c,
genextract.c, genflags.c, gengenrtl.c, genmultilib,
genopinit.c, genoutput.c, genpeep.c, genrecog.c,
gensupport.c, gensupport.h, ggc-callbacks.c, ggc-common.c,
ggc-none.c, ggc-page.c, ggc-simple.c, ggc.h, global.c,
graph.c, graph.h, gthr-aix.h, gthr-dce.h, gthr-posix.h,
gthr-rtems.h, gthr-single.h, gthr-solaris.h, gthr-vxworks.h,
gthr-win32.h, gthr.h, haifa-sched.c, halfpic.c, halfpic.h,
hard-reg-set.h, hwint.h, ifcvt.c, input.h, insn-addr.h,
integrate.c, integrate.h, jump.c, lcm.c, libgcc2.c,
libgcc2.h, lists.c, local-alloc.c, loop.c, loop.h,
machmode.def, machmode.h, main.c, mbchar.c, mbchar.h,
mips-tdump.c, mips-tfile.c, mklibgcc.in, mkmap-flat.awk,
mkmap-symver.awk, optabs.c, output.h, params.c, params.def,
params.h, predict.c, predict.def, predict.h, prefix.c,
prefix.h, print-rtl.c, print-tree.c, profile.c, protoize.c,
read-rtl.c, real.c, real.h, recog.c, recog.h, reg-stack.c,
regclass.c, regmove.c, regrename.c, regs.h, reload.c,
reload.h, reload1.c, reorg.c, resource.c, resource.h, rtl.c,
rtl.def, rtl.h, rtlanal.c, sbitmap.c, sbitmap.h,
sched-deps.c, sched-ebb.c, sched-int.h, sched-rgn.c,
sched-vis.c, sdbout.c, sdbout.h, sibcall.c, simplify-rtx.c,
ssa-ccp.c, ssa-dce.c, ssa.c, ssa.h, stmt.c, stor-layout.c,
stringpool.c, system.h, timevar.c, timevar.def, timevar.h,
tlink.c, toplev.c, toplev.h, tree.c, tree.def, tree.h,
tsystem.h, unroll.c, unwind-dw2-fde.c, unwind-dw2-fde.h,
unwind-dw2.c, unwind-pe.h, unwind-sjlj.c, unwind.h,
unwind.inc, varasm.c, varray.c, varray.h, xcoffout.c,
xcoffout.h: replace "GNU CC" with "GCC".
From-SVN: r45105
2001-08-22 10:35:51 -04:00
|
|
|
along with GCC; see the file COPYING. If not, write to the Free
|
|
|
|
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|
|
|
02111-1307, USA. */
|
1998-07-20 09:35:38 -04:00
|
|
|
|
|
|
|
/* Note regarding cross compilation:
|
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
In general, translation of multibyte characters to wide characters can
|
1998-07-20 09:35:38 -04:00
|
|
|
only work in a native compiler since the translation function (mbtowc)
|
|
|
|
needs to know about both the source and target character encoding. However,
|
|
|
|
this particular implementation for JIS, SJIS and EUCJP source characters
|
|
|
|
will work for any compiler with a newlib target. Other targets may also
|
|
|
|
work provided that their wchar_t implementation is 2 bytes and the encoding
|
|
|
|
leaves the source character values unchanged (except for removing the
|
|
|
|
state shifting markers). */
|
|
|
|
|
|
|
|
#include "config.h"
|
2001-04-07 18:39:10 -04:00
|
|
|
#ifdef MULTIBYTE_CHARS
|
1998-07-20 09:35:38 -04:00
|
|
|
#include "system.h"
|
|
|
|
#include "mbchar.h"
|
|
|
|
#include <locale.h>
|
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
typedef enum {ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER,
|
|
|
|
JIS_C_NUM} JIS_CHAR_TYPE;
|
1998-07-20 09:35:38 -04:00
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
typedef enum {ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
|
|
|
|
J2_ESC, J2_ESC_BR, INV, JIS_S_NUM} JIS_STATE;
|
|
|
|
|
|
|
|
typedef enum {COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP,
|
|
|
|
EMPTY, ERROR} JIS_ACTION;
|
|
|
|
|
|
|
|
/* State/action tables for processing JIS encoding:
|
|
|
|
|
|
|
|
Where possible, switches to JIS are grouped with proceding JIS characters
|
|
|
|
and switches to ASCII are grouped with preceding JIS characters.
|
|
|
|
Thus, maximum returned length is:
|
|
|
|
2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6. */
|
1998-07-20 09:35:38 -04:00
|
|
|
|
|
|
|
static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
|
1999-09-07 01:49:18 -04:00
|
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH*/
|
1998-07-20 09:35:38 -04:00
|
|
|
/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
|
|
|
|
/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
|
|
|
|
/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
|
|
|
|
/*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
|
|
|
|
/*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
|
|
|
|
/*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
|
|
|
|
/*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
|
|
|
|
/*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
|
|
|
|
/*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
|
|
|
|
/*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
|
|
|
|
};
|
|
|
|
|
|
|
|
static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
|
1999-09-07 01:49:18 -04:00
|
|
|
/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTH */
|
1998-07-20 09:35:38 -04:00
|
|
|
/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
|
|
|
|
/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
|
|
|
|
/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
|
1999-09-07 01:49:18 -04:00
|
|
|
/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR},
|
|
|
|
/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR},
|
1998-07-20 09:35:38 -04:00
|
|
|
/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
|
1999-09-07 01:49:18 -04:00
|
|
|
/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
|
|
|
|
/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR},
|
|
|
|
/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR},
|
|
|
|
/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR},
|
1998-07-20 09:35:38 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
1999-10-05 12:21:34 -04:00
|
|
|
const char *literal_codeset = NULL;
|
1998-07-20 09:35:38 -04:00
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
/* Store into *PWC (if PWC is not null) the wide character
|
|
|
|
corresponding to the multibyte character at the start of the
|
|
|
|
buffer S of size N. Return the number of bytes in the multibyte
|
|
|
|
character. Return -1 if the bytes do not form a valid character,
|
|
|
|
or 0 if S is null or points to a null byte.
|
|
|
|
|
|
|
|
This function behaves like the Standard C function mbtowc, except
|
|
|
|
it treats locale names of the form "C-..." specially. */
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
int
|
|
|
|
local_mbtowc (pwc, s, n)
|
1999-09-07 01:49:18 -04:00
|
|
|
wchar_t *pwc;
|
1999-10-05 12:21:34 -04:00
|
|
|
const char *s;
|
1999-09-07 01:49:18 -04:00
|
|
|
size_t n;
|
1998-07-20 09:35:38 -04:00
|
|
|
{
|
|
|
|
static JIS_STATE save_state = ASCII;
|
|
|
|
JIS_STATE curr_state = save_state;
|
1999-10-05 12:21:34 -04:00
|
|
|
const unsigned char *t = (const unsigned char *) s;
|
1998-07-20 09:35:38 -04:00
|
|
|
|
|
|
|
if (s != NULL && n == 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
|
1999-09-07 01:49:18 -04:00
|
|
|
/* This must be the "C" locale or unknown locale -- fall thru */
|
|
|
|
;
|
1998-07-20 09:35:38 -04:00
|
|
|
else if (! strcmp (literal_codeset, "C-SJIS"))
|
|
|
|
{
|
|
|
|
int char1;
|
|
|
|
if (s == NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
/* Not state-dependent. */
|
|
|
|
return 0;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
char1 = *t;
|
|
|
|
if (ISSJIS1 (char1))
|
|
|
|
{
|
|
|
|
int char2 = t[1];
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (n <= 1)
|
|
|
|
return -1;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (ISSJIS2 (char2))
|
|
|
|
{
|
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (((wchar_t) *t) << 8) + (wchar_t) (*(t + 1));
|
1998-07-20 09:35:38 -04:00
|
|
|
return 2;
|
|
|
|
}
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
return -1;
|
|
|
|
}
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (wchar_t) *t;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (*t == '\0')
|
|
|
|
return 0;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
else if (! strcmp (literal_codeset, "C-EUCJP"))
|
|
|
|
{
|
|
|
|
int char1;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (s == NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
/* Not state-dependent. */
|
|
|
|
return 0;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
char1 = *t;
|
|
|
|
if (ISEUCJP (char1))
|
|
|
|
{
|
|
|
|
int char2 = t[1];
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (n <= 1)
|
|
|
|
return -1;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (ISEUCJP (char2))
|
|
|
|
{
|
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (((wchar_t) *t) << 8) + (wchar_t) (*(t + 1));
|
1998-07-20 09:35:38 -04:00
|
|
|
return 2;
|
|
|
|
}
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
return -1;
|
|
|
|
}
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (wchar_t) *t;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (*t == '\0')
|
|
|
|
return 0;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
else if (! strcmp (literal_codeset, "C-JIS"))
|
|
|
|
{
|
|
|
|
JIS_ACTION action;
|
|
|
|
JIS_CHAR_TYPE ch;
|
1999-10-05 12:21:34 -04:00
|
|
|
const unsigned char *ptr;
|
|
|
|
size_t i, curr_ch;
|
1998-07-20 09:35:38 -04:00
|
|
|
|
|
|
|
if (s == NULL)
|
|
|
|
{
|
|
|
|
save_state = ASCII;
|
2001-08-12 19:40:53 -04:00
|
|
|
/* State-dependent. */
|
1999-09-07 01:49:18 -04:00
|
|
|
return 1;
|
1998-07-20 09:35:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
ptr = t;
|
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
for (i = 0; i < n; i++)
|
1998-07-20 09:35:38 -04:00
|
|
|
{
|
|
|
|
curr_ch = t[i];
|
|
|
|
switch (curr_ch)
|
|
|
|
{
|
|
|
|
case JIS_ESC_CHAR:
|
|
|
|
ch = ESCAPE;
|
|
|
|
break;
|
|
|
|
case '$':
|
|
|
|
ch = DOLLAR;
|
|
|
|
break;
|
|
|
|
case '@':
|
|
|
|
ch = AT;
|
|
|
|
break;
|
|
|
|
case '(':
|
|
|
|
ch = BRACKET;
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
ch = B;
|
|
|
|
break;
|
|
|
|
case 'J':
|
|
|
|
ch = J;
|
|
|
|
break;
|
|
|
|
case '\0':
|
|
|
|
ch = NUL;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (ISJIS (curr_ch))
|
|
|
|
ch = JIS_CHAR;
|
|
|
|
else
|
|
|
|
ch = OTHER;
|
|
|
|
}
|
|
|
|
|
|
|
|
action = JIS_action_table[curr_state][ch];
|
|
|
|
curr_state = JIS_state_table[curr_state][ch];
|
|
|
|
|
|
|
|
switch (action)
|
|
|
|
{
|
|
|
|
case NOOP:
|
|
|
|
break;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
case EMPTY:
|
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (wchar_t) 0;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
save_state = curr_state;
|
|
|
|
return i;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
case COPYA:
|
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (wchar_t) *ptr;
|
1998-07-20 09:35:38 -04:00
|
|
|
save_state = curr_state;
|
1999-09-07 01:49:18 -04:00
|
|
|
return i + 1;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
case COPYJ:
|
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (((wchar_t) *ptr) << 8) + (wchar_t) (*(ptr + 1));
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
save_state = curr_state;
|
1999-09-07 01:49:18 -04:00
|
|
|
return i + 1;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
case COPYJ2:
|
|
|
|
if (pwc != NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
*pwc = (((wchar_t) *ptr) << 8) + (wchar_t) (*(ptr + 1));
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
save_state = curr_state;
|
1999-09-07 01:49:18 -04:00
|
|
|
return ptr - t + 2;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
case MAKE_A:
|
|
|
|
case MAKE_J:
|
1999-10-05 12:21:34 -04:00
|
|
|
ptr = (const unsigned char *) (t + i + 1);
|
1998-07-20 09:35:38 -04:00
|
|
|
break;
|
1999-09-07 01:49:18 -04:00
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
case ERROR:
|
|
|
|
default:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
/* More than n bytes needed. */
|
|
|
|
return -1;
|
1998-07-20 09:35:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CROSS_COMPILE
|
|
|
|
if (s == NULL)
|
1999-09-07 01:49:18 -04:00
|
|
|
/* Not state-dependent. */
|
|
|
|
return 0;
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
if (pwc != NULL)
|
|
|
|
*pwc = *s;
|
|
|
|
return 1;
|
|
|
|
#else
|
1999-09-07 01:49:18 -04:00
|
|
|
|
2001-08-12 19:40:53 -04:00
|
|
|
/* This must be the "C" locale or unknown locale. */
|
1998-07-20 09:35:38 -04:00
|
|
|
return mbtowc (pwc, s, n);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
/* Return the number of bytes in the multibyte character at the start
|
|
|
|
of the buffer S of size N. Return -1 if the bytes do not form a
|
|
|
|
valid character, or 0 if S is null or points to a null byte.
|
|
|
|
|
|
|
|
This function behaves like the Standard C function mblen, except
|
|
|
|
it treats locale names of the form "C-..." specially. */
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
int
|
|
|
|
local_mblen (s, n)
|
1999-10-05 12:21:34 -04:00
|
|
|
const char *s;
|
1999-09-07 01:49:18 -04:00
|
|
|
size_t n;
|
1998-07-20 09:35:38 -04:00
|
|
|
{
|
|
|
|
return local_mbtowc (NULL, s, n);
|
|
|
|
}
|
|
|
|
|
1999-09-07 01:49:18 -04:00
|
|
|
/* Return the maximum mumber of bytes in a multibyte character.
|
|
|
|
|
|
|
|
This function returns the same value as the Standard C macro MB_CUR_MAX,
|
|
|
|
except it treats locale names of the form "C-..." specially. */
|
|
|
|
|
1998-07-20 09:35:38 -04:00
|
|
|
int
|
|
|
|
local_mb_cur_max ()
|
|
|
|
{
|
|
|
|
if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
|
|
|
|
;
|
|
|
|
else if (! strcmp (literal_codeset, "C-SJIS"))
|
|
|
|
return 2;
|
|
|
|
else if (! strcmp (literal_codeset, "C-EUCJP"))
|
|
|
|
return 2;
|
|
|
|
else if (! strcmp (literal_codeset, "C-JIS"))
|
|
|
|
return 8; /* 3 + 2 + 3 */
|
|
|
|
|
|
|
|
#ifdef CROSS_COMPILE
|
|
|
|
return 1;
|
|
|
|
#else
|
1999-02-10 06:00:12 -05:00
|
|
|
if (MB_CUR_MAX > 0)
|
|
|
|
return MB_CUR_MAX;
|
|
|
|
|
|
|
|
return 1; /* default */
|
1998-07-20 09:35:38 -04:00
|
|
|
#endif
|
|
|
|
}
|
cpphash.h (U): New define, to correct type of string constants.
gcc:
* cpphash.h (U): New define, to correct type of string constants.
(ustrcmp, ustrncmp, ustrlen, uxstrdup, ustrchr): New wrapper
routines, to do casts when passing unsigned strings to libc.
* cppexp.c, cppfiles.c, cpphash.c, cppinit.c, cpplib.c: Use them.
* cppfiles.c (_cpp_execute_include): Make filename an U_CHAR *.
* cpphash.c (_cpp_quote_string): Make string an U_CHAR *.
* cppinit.c (dump_special_to_buffer): Make macro name an U_CHAR *.
* cpplex.c (parse_ifdef, parse_include, validate_else): Make
second argument an U_CHAR *.
* cppinit.c (builtin_array): Make name and value U_CHAR *, add
length field, clean up initializer.
(ISTABLE): Add __extension__ to designated-
initializers version.
* cpplex.c (CHARTAB): Likewise.
* mbchar.c: Add dummy external declaration to the !MULTIBYTE_CHARS
case so the file won't be empty.
include:
* symcat.h: Remove #endif label.
From-SVN: r33657
2000-05-04 00:38:01 -04:00
|
|
|
#else /* MULTIBYTE_CHARS */
|
|
|
|
extern int dummy; /* silence 'ANSI C forbids an empty source file' warning */
|
1998-07-20 09:35:38 -04:00
|
|
|
#endif /* MULTIBYTE_CHARS */
|