w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

nkf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3  * Copyright (c) 1996-2009, The nkf Project.
4  *
5  * This software is provided 'as-is', without any express or implied
6  * warranty. In no event will the authors be held liable for any damages
7  * arising from the use of this software.
8  *
9  * Permission is granted to anyone to use this software for any purpose,
10  * including commercial applications, and to alter it and redistribute it
11  * freely, subject to the following restrictions:
12  *
13  * 1. The origin of this software must not be misrepresented; you must not
14  * claim that you wrote the original software. If you use this software
15  * in a product, an acknowledgment in the product documentation would be
16  * appreciated but is not required.
17  *
18  * 2. Altered source versions must be plainly marked as such, and must not be
19  * misrepresented as being the original software.
20  *
21  * 3. This notice may not be removed or altered from any source distribution.
22  */
23 #define NKF_VERSION "2.1.0"
24 #define NKF_RELEASE_DATE "2009-11-17"
25 #define COPY_RIGHT \
26  "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27  "Copyright (C) 1996-2009, The nkf Project."
28 
29 #include "config.h"
30 #include "nkf.h"
31 #include "utf8tbl.h"
32 #ifdef __WIN32__
33 #include <windows.h>
34 #include <locale.h>
35 #endif
36 #if defined(__OS2__)
37 # define INCL_DOS
38 # define INCL_DOSERRORS
39 # include <os2.h>
40 #endif
41 #include <assert.h>
42 
43 
44 /* state of output_mode and input_mode
45 
46  c2 0 means ASCII
47  JIS_X_0201_1976_K
48  ISO_8859_1
49  JIS_X_0208
50  EOF all termination
51  c1 32bit data
52 
53  */
54 
55 /* MIME ENCODE */
56 
57 #define FIXED_MIME 7
58 #define STRICT_MIME 8
59 
60 /* byte order */
61 enum byte_order {
65  ENDIAN_3412 = 4
66 };
67 
68 /* ASCII CODE */
69 
70 #define BS 0x08
71 #define TAB 0x09
72 #define LF 0x0a
73 #define CR 0x0d
74 #define ESC 0x1b
75 #define SP 0x20
76 #define DEL 0x7f
77 #define SI 0x0f
78 #define SO 0x0e
79 #define SS2 0x8e
80 #define SS3 0x8f
81 #define CRLF 0x0D0A
82 
83 
84 /* encodings */
85 
124  JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125  /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126  /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127  /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128  JIS_X_0208 = 0x1168, /* @B */
129  JIS_X_0212 = 0x1159, /* D */
130  /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131  JIS_X_0213_2 = 0x1229, /* P */
132  JIS_X_0213_1 = 0x1233 /* Q */
133 };
134 
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
146 
147 typedef struct {
148  const char *name;
152 
160 
161 typedef struct {
162  const int id;
163  const char *name;
164  const nkf_native_encoding *base_encoding;
165 } nkf_encoding;
166 
168  {ASCII, "US-ASCII", &NkfEncodingASCII},
169  {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170  {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171  {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172  {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173  {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174  {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175  {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176  {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177  {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178  {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179  {CP10001, "CP10001", &NkfEncodingShift_JIS},
180  {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181  {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182  {CP51932, "CP51932", &NkfEncodingEUC_JP},
183  {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184  {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185  {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186  {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187  {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188  {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189  {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190  {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191  {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192  {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193  {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194  {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195  {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196  {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197  {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198  {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199  {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200  {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201  {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202  {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203  {BINARY, "BINARY", &NkfEncodingASCII},
204  {-1, NULL, NULL}
205 };
206 
207 struct {
208  const char *name;
209  const int id;
211  {"US-ASCII", ASCII},
212  {"ASCII", ASCII},
213  {"ISO-2022-JP", ISO_2022_JP},
214  {"ISO2022JP-CP932", CP50220},
215  {"CP50220", CP50220},
216  {"CP50221", CP50221},
217  {"CSISO2022JP", CP50221},
218  {"CP50222", CP50222},
219  {"ISO-2022-JP-1", ISO_2022_JP_1},
220  {"ISO-2022-JP-3", ISO_2022_JP_3},
221  {"ISO-2022-JP-2004", ISO_2022_JP_2004},
222  {"SHIFT_JIS", SHIFT_JIS},
223  {"SJIS", SHIFT_JIS},
224  {"WINDOWS-31J", WINDOWS_31J},
225  {"CSWINDOWS31J", WINDOWS_31J},
226  {"CP932", WINDOWS_31J},
227  {"MS932", WINDOWS_31J},
228  {"CP10001", CP10001},
229  {"EUCJP", EUC_JP},
230  {"EUC-JP", EUC_JP},
231  {"EUCJP-NKF", EUCJP_NKF},
232  {"CP51932", CP51932},
233  {"EUC-JP-MS", EUCJP_MS},
234  {"EUCJP-MS", EUCJP_MS},
235  {"EUCJPMS", EUCJP_MS},
236  {"EUC-JP-ASCII", EUCJP_ASCII},
237  {"EUCJP-ASCII", EUCJP_ASCII},
238  {"SHIFT_JISX0213", SHIFT_JISX0213},
239  {"SHIFT_JIS-2004", SHIFT_JIS_2004},
240  {"EUC-JISX0213", EUC_JISX0213},
241  {"EUC-JIS-2004", EUC_JIS_2004},
242  {"UTF-8", UTF_8},
243  {"UTF-8N", UTF_8N},
244  {"UTF-8-BOM", UTF_8_BOM},
245  {"UTF8-MAC", UTF8_MAC},
246  {"UTF-8-MAC", UTF8_MAC},
247  {"UTF-16", UTF_16},
248  {"UTF-16BE", UTF_16BE},
249  {"UTF-16BE-BOM", UTF_16BE_BOM},
250  {"UTF-16LE", UTF_16LE},
251  {"UTF-16LE-BOM", UTF_16LE_BOM},
252  {"UTF-32", UTF_32},
253  {"UTF-32BE", UTF_32BE},
254  {"UTF-32BE-BOM", UTF_32BE_BOM},
255  {"UTF-32LE", UTF_32LE},
256  {"UTF-32LE-BOM", UTF_32LE_BOM},
257  {"BINARY", BINARY},
258  {NULL, -1}
259 };
260 
261 #if defined(DEFAULT_CODE_JIS)
262 #define DEFAULT_ENCIDX ISO_2022_JP
263 #elif defined(DEFAULT_CODE_SJIS)
264 #define DEFAULT_ENCIDX SHIFT_JIS
265 #elif defined(DEFAULT_CODE_WINDOWS_31J)
266 #define DEFAULT_ENCIDX WINDOWS_31J
267 #elif defined(DEFAULT_CODE_EUC)
268 #define DEFAULT_ENCIDX EUC_JP
269 #elif defined(DEFAULT_CODE_UTF8)
270 #define DEFAULT_ENCIDX UTF_8
271 #endif
272 
273 
274 #define is_alnum(c) \
275  (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
276 
277 /* I don't trust portablity of toupper */
278 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
279 #define nkf_isoctal(c) ('0'<=c && c<='7')
280 #define nkf_isdigit(c) ('0'<=c && c<='9')
281 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
282 #define nkf_isblank(c) (c == SP || c == TAB)
283 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
284 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
285 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
286 #define nkf_isprint(c) (SP<=c && c<='~')
287 #define nkf_isgraph(c) ('!'<=c && c<='~')
288 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
289  ('A'<=c&&c<='F') ? (c-'A'+10) : \
290  ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
291 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
292 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
293 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
294  ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
295  && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
296 
297 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
298 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
299 
300 #define HOLD_SIZE 1024
301 #if defined(INT_IS_SHORT)
302 #define IOBUF_SIZE 2048
303 #else
304 #define IOBUF_SIZE 16384
305 #endif
306 
307 #define DEFAULT_J 'B'
308 #define DEFAULT_R 'B'
309 
310 
311 #define GETA1 0x22
312 #define GETA2 0x2e
313 
314 
315 /* MIME preprocessor */
316 
317 #ifdef EASYWIN /*Easy Win */
318 extern POINT _BufferSize;
319 #endif
320 
321 struct input_code{
322  const char *name;
323  nkf_char stat;
324  nkf_char score;
325  nkf_char index;
326  nkf_char buf[3];
327  void (*status_func)(struct input_code *, nkf_char);
329  int _file_stat;
330 };
331 
332 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
335 
336 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
337 /* UCS Mapping
338  * 0: Shift_JIS, eucJP-ascii
339  * 1: eucJP-ms
340  * 2: CP932, CP51932
341  * 3: CP10001
342  */
343 #define UCS_MAP_ASCII 0
344 #define UCS_MAP_MS 1
345 #define UCS_MAP_CP932 2
346 #define UCS_MAP_CP10001 3
348 #endif
349 #ifdef UTF8_INPUT_ENABLE
350 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
351 static int no_cp932ext_f = FALSE;
352 /* ignore ZERO WIDTH NO-BREAK SPACE */
355 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
357 static void w_status(struct input_code *, nkf_char);
358 #endif
359 #ifdef UTF8_OUTPUT_ENABLE
360 static int output_bom_f = FALSE;
362 #endif
363 
364 static void std_putc(nkf_char c);
365 static nkf_char std_getc(FILE *f);
367 
368 static nkf_char broken_getc(FILE *f);
370 
371 static nkf_char mime_getc(FILE *f);
372 
373 static void mime_putc(nkf_char c);
374 
375 /* buffers */
376 
377 #if !defined(PERL_XS) && !defined(WIN32DLL)
378 static unsigned char stdibuf[IOBUF_SIZE];
379 static unsigned char stdobuf[IOBUF_SIZE];
380 #endif
381 
382 /* flags */
383 static int unbuf_f = FALSE;
384 static int estab_f = FALSE;
385 static int nop_f = FALSE;
386 static int binmode_f = TRUE; /* binary mode */
387 static int rot_f = FALSE; /* rot14/43 mode */
388 static int hira_f = FALSE; /* hira/kata henkan */
389 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
390 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
391 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
392 static int mimebuf_f = FALSE; /* MIME buffered input */
393 static int broken_f = FALSE; /* convert ESC-less broken JIS */
394 static int iso8859_f = FALSE; /* ISO8859 through */
395 static int mimeout_f = FALSE; /* base64 mode */
396 static int x0201_f = X0201_DEFAULT; /* convert JIS X 0201 */
397 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
398 
399 #ifdef UNICODE_NORMALIZATION
400 static int nfc_f = FALSE;
401 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
403 #endif
404 
405 #ifdef INPUT_OPTION
406 static int cap_f = FALSE;
407 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
409 
410 static int url_f = FALSE;
411 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
413 #endif
414 
415 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
416 #define CLASS_MASK NKF_INT32_C(0xFF000000)
417 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
418 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
419 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
420 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
421 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
422 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
423 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
424 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
425 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
426 
427 #ifdef NUMCHAR_OPTION
428 static int numchar_f = FALSE;
429 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
431 #endif
432 
433 #ifdef CHECK_OPTION
434 static int noout_f = FALSE;
435 static void no_putc(nkf_char c);
436 static int debug_f = FALSE;
437 static void debug(const char *str);
439 #endif
440 
441 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
442 static void set_input_codename(const char *codename);
443 
444 #ifdef EXEC_IO
445 static int exec_f = 0;
446 #endif
447 
448 #ifdef SHIFTJIS_CP932
449 /* invert IBM extended characters to others */
450 static int cp51932_f = FALSE;
451 
452 /* invert NEC-selected IBM extended characters to IBM extended characters */
453 static int cp932inv_f = TRUE;
454 
455 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
456 #endif /* SHIFTJIS_CP932 */
457 
458 static int x0212_f = FALSE;
459 static int x0213_f = FALSE;
460 
461 static unsigned char prefix_table[256];
462 
463 static void e_status(struct input_code *, nkf_char);
464 static void s_status(struct input_code *, nkf_char);
465 
466 struct input_code input_code_list[] = {
467  {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
468  {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
469 #ifdef UTF8_INPUT_ENABLE
470  {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
471  {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
472  {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
473 #endif
474  {0}
475 };
476 
477 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
478 static int base64_count = 0;
479 
480 /* X0208 -> ASCII converter */
481 
482 /* fold parameter */
483 static int f_line = 0; /* chars in line */
484 static int f_prev = 0;
485 static int fold_preserve_f = FALSE; /* preserve new lines */
486 static int fold_f = FALSE;
487 static int fold_len = 0;
488 
489 /* options */
490 static unsigned char kanji_intro = DEFAULT_J;
491 static unsigned char ascii_intro = DEFAULT_R;
492 
493 /* Folding */
494 
495 #define FOLD_MARGIN 10
496 #define DEFAULT_FOLD 60
497 
499 
500 /* process default */
501 
502 static nkf_char
504 {
505  fprintf(stderr,"nkf internal module connection failure.\n");
507  return 0; /* LINT */
508 }
509 
510 static void
512 {
513  no_connection2(c2,c1,0);
514 }
515 
518 
526 
527 /* static redirections */
528 
530 
531 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
533 
534 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
536 
537 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
538 
539 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
541 
542 /* for strict mime */
543 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
545 
546 /* Global states */
547 static int output_mode = ASCII; /* output kanji mode */
548 static int input_mode = ASCII; /* input kanji mode */
549 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
550 
551 /* X0201 / X0208 conversion tables */
552 
553 /* X0201 kana conversion table */
554 /* 90-9F A0-DF */
555 static const unsigned char cv[]= {
556  0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
557  0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
558  0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
559  0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
560  0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
561  0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
562  0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
563  0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
564  0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
565  0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
566  0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
567  0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
568  0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
569  0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
570  0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
571  0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
572  0x00,0x00};
573 
574 
575 /* X0201 kana conversion table for daguten */
576 /* 90-9F A0-DF */
577 static const unsigned char dv[]= {
578  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
579  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
580  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582  0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
583  0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
584  0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
585  0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
586  0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
587  0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
588  0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
589  0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
590  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
591  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
592  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
593  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
594  0x00,0x00};
595 
596 /* X0201 kana conversion table for han-daguten */
597 /* 90-9F A0-DF */
598 static const unsigned char ev[]= {
599  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
601  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
602  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
603  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
604  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
605  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609  0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
610  0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
611  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615  0x00,0x00};
616 
617 
618 /* X0208 kigou conversion table */
619 /* 0x8140 - 0x819e */
620 static const unsigned char fv[] = {
621 
622  0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
623  0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
624  0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
625  0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
626  0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
627  0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
628  0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
629  0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
630  0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
631  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
632  0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
633  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
634 } ;
635 
636 
637 
638 static int option_mode = 0;
639 static int file_out_f = FALSE;
640 #ifdef OVERWRITE
641 static int overwrite_f = FALSE;
642 static int preserve_time_f = FALSE;
643 static int backup_f = FALSE;
644 static char *backup_suffix = "";
645 #endif
646 
647 static int eolmode_f = 0; /* CR, LF, CRLF */
648 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
649 static nkf_char prev_cr = 0; /* CR or 0 */
650 #ifdef EASYWIN /*Easy Win */
651 static int end_check;
652 #endif /*Easy Win */
653 
654 static void *
656 {
657  void *ptr;
658 
659  if (size == 0) size = 1;
660 
661  ptr = malloc(size);
662  if (ptr == NULL) {
663  perror("can't malloc");
665  }
666 
667  return ptr;
668 }
669 
670 static void *
671 nkf_xrealloc(void *ptr, size_t size)
672 {
673  if (size == 0) size = 1;
674 
675  ptr = realloc(ptr, size);
676  if (ptr == NULL) {
677  perror("can't realloc");
679  }
680 
681  return ptr;
682 }
683 
684 #define nkf_xfree(ptr) free(ptr)
685 
686 static int
687 nkf_str_caseeql(const char *src, const char *target)
688 {
689  int i;
690  for (i = 0; src[i] && target[i]; i++) {
691  if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
692  }
693  if (src[i] || target[i]) return FALSE;
694  else return TRUE;
695 }
696 
697 static nkf_encoding*
699 {
700  if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
701  return 0;
702  }
703  return &nkf_encoding_table[idx];
704 }
705 
706 static int
708 {
709  int i;
710  if (name[0] == 'X' && *(name+1) == '-') name += 2;
711  for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
713  return encoding_name_to_id_table[i].id;
714  }
715  }
716  return -1;
717 }
718 
719 static nkf_encoding*
720 nkf_enc_find(const char *name)
721 {
722  int idx = -1;
724  if (idx < 0) return 0;
725  return nkf_enc_from_index(idx);
726 }
727 
728 #define nkf_enc_name(enc) (enc)->name
729 #define nkf_enc_to_index(enc) (enc)->id
730 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
731 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
732 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
733 #define nkf_enc_asciicompat(enc) (\
734  nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
735  nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
736 #define nkf_enc_unicode_p(enc) (\
737  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
738  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
739  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
740 #define nkf_enc_cp5022x_p(enc) (\
741  nkf_enc_to_index(enc) == CP50220 ||\
742  nkf_enc_to_index(enc) == CP50221 ||\
743  nkf_enc_to_index(enc) == CP50222)
744 
745 #ifdef DEFAULT_CODE_LOCALE
746 static const char*
748 {
749 #ifdef HAVE_LANGINFO_H
750  return nl_langinfo(CODESET);
751 #elif defined(__WIN32__)
752  static char buf[16];
753  sprintf(buf, "CP%d", GetACP());
754  return buf;
755 #elif defined(__OS2__)
756 # if defined(INT_IS_SHORT)
757  /* OS/2 1.x */
758  return NULL;
759 # else
760  /* OS/2 32bit */
761  static char buf[16];
762  ULONG ulCP[1], ulncp;
763  DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
764  if (ulCP[0] == 932 || ulCP[0] == 943)
765  strcpy(buf, "Shift_JIS");
766  else
767  sprintf(buf, "CP%lu", ulCP[0]);
768  return buf;
769 # endif
770 #endif
771  return NULL;
772 }
773 
774 static nkf_encoding*
776 {
777  nkf_encoding *enc = 0;
778  const char *encname = nkf_locale_charmap();
779  if (encname)
780  enc = nkf_enc_find(encname);
781  return enc;
782 }
783 #endif /* DEFAULT_CODE_LOCALE */
784 
785 static nkf_encoding*
787 {
788  return &nkf_encoding_table[UTF_8];
789 }
790 
791 static nkf_encoding*
793 {
794  nkf_encoding *enc = 0;
795 #ifdef DEFAULT_CODE_LOCALE
797 #elif defined(DEFAULT_ENCIDX)
798  enc = nkf_enc_from_index(DEFAULT_ENCIDX);
799 #endif
800  if (!enc) enc = nkf_utf8_encoding();
801  return enc;
802 }
803 
804 typedef struct {
805  long capa;
806  long len;
807  nkf_char *ptr;
808 } nkf_buf_t;
809 
810 static nkf_buf_t *
812 {
813  nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
814  buf->ptr = nkf_xmalloc(length);
815  buf->capa = length;
816  buf->len = 0;
817  return buf;
818 }
819 
820 #if 0
821 static void
822 nkf_buf_dispose(nkf_buf_t *buf)
823 {
824  nkf_xfree(buf->ptr);
825  nkf_xfree(buf);
826 }
827 #endif
828 
829 #define nkf_buf_length(buf) ((buf)->len)
830 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
831 
832 static unsigned char
834 {
836  return buf->ptr[index];
837 }
838 
839 static void
841 {
842  buf->len = 0;
843 }
844 
845 static void
847 {
848  if (buf->capa <= buf->len) {
850  }
851  buf->ptr[buf->len++] = c;
852 }
853 
854 static unsigned char
856 {
858  return buf->ptr[--buf->len];
859 }
860 
861 /* Normalization Form C */
862 #ifndef PERL_XS
863 #ifdef WIN32DLL
864 #define fprintf dllprintf
865 #endif
866 
867 static void
868 version(void)
869 {
870  fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
871 }
872 
873 static void
874 usage(void)
875 {
877  "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
878 #ifdef UTF8_OUTPUT_ENABLE
879  " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
880  " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
881 #else
882 #endif
883 #ifdef UTF8_INPUT_ENABLE
884  " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
885  " UTF option is -W[8,[16,32][B,L]]\n"
886 #else
887  " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
888 #endif
889  );
891  " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
892  " M[BQ] MIME encode [B:base64 Q:quoted]\n"
893  " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
894  );
896  " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
897  " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
898  " 4: JISX0208 Katakana to JISX0201 Katakana\n"
899  " X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"
900  );
902  " O Output to File (DEFAULT 'nkf.out')\n"
903  " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
904  );
906  " --ic=<encoding> Specify the input encoding\n"
907  " --oc=<encoding> Specify the output encoding\n"
908  " --hiragana --katakana Hiragana/Katakana Conversion\n"
909  " --katakana-hiragana Converts each other\n"
910  );
912 #ifdef INPUT_OPTION
913  " --{cap, url}-input Convert hex after ':' or '%%'\n"
914 #endif
915 #ifdef NUMCHAR_OPTION
916  " --numchar-input Convert Unicode Character Reference\n"
917 #endif
918 #ifdef UTF8_INPUT_ENABLE
919  " --fb-{skip, html, xml, perl, java, subchar}\n"
920  " Specify unassigned character's replacement\n"
921 #endif
922  );
924 #ifdef OVERWRITE
925  " --in-place[=SUF] Overwrite original files\n"
926  " --overwrite[=SUF] Preserve timestamp of original files\n"
927 #endif
928  " -g --guess Guess the input code\n"
929  " -v --version Print the version\n"
930  " --help/-V Print this help / configuration\n"
931  );
932  version();
933 }
934 
935 static void
936 show_configuration(void)
937 {
939  "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
940  " Compile-time options:\n"
941  " Compiled at: " __DATE__ " " __TIME__ "\n"
942  );
944  " Default output encoding: "
945 #ifdef DEFAULT_CODE_LOCALE
946  "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
947 #elif defined(DEFAULT_ENCIDX)
948  "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
949 #else
950  "NONE\n"
951 #endif
952  );
954  " Default output end of line: "
955 #if DEFAULT_NEWLINE == CR
956  "CR"
957 #elif DEFAULT_NEWLINE == CRLF
958  "CRLF"
959 #else
960  "LF"
961 #endif
962  "\n"
963  " Decode MIME encoded string: "
965  "ON"
966 #else
967  "OFF"
968 #endif
969  "\n"
970  " Convert JIS X 0201 Katakana: "
971 #if X0201_DEFAULT
972  "ON"
973 #else
974  "OFF"
975 #endif
976  "\n"
977  " --help, --version output: "
978 #if HELP_OUTPUT_HELP_OUTPUT
979  "HELP_OUTPUT"
980 #else
981  "STDOUT"
982 #endif
983  "\n");
984 }
985 #endif /*PERL_XS*/
986 
987 #ifdef OVERWRITE
988 static char*
989 get_backup_filename(const char *suffix, const char *filename)
990 {
991  char *backup_filename;
992  int asterisk_count = 0;
993  int i, j;
994  int filename_length = strlen(filename);
995 
996  for(i = 0; suffix[i]; i++){
997  if(suffix[i] == '*') asterisk_count++;
998  }
999 
1000  if(asterisk_count){
1001  backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1002  for(i = 0, j = 0; suffix[i];){
1003  if(suffix[i] == '*'){
1004  backup_filename[j] = '\0';
1005  strncat(backup_filename, filename, filename_length);
1006  i++;
1007  j += filename_length;
1008  }else{
1009  backup_filename[j++] = suffix[i++];
1010  }
1011  }
1012  backup_filename[j] = '\0';
1013  }else{
1014  j = filename_length + strlen(suffix);
1015  backup_filename = nkf_xmalloc(j + 1);
1016  strcpy(backup_filename, filename);
1017  strcat(backup_filename, suffix);
1018  backup_filename[j] = '\0';
1019  }
1020  return backup_filename;
1021 }
1022 #endif
1023 
1024 #ifdef UTF8_INPUT_ENABLE
1025 static void
1027 {
1028  int shift = 20;
1029  c &= VALUE_MASK;
1030  while(shift >= 0){
1031  if(c >= 1<<shift){
1032  while(shift >= 0){
1033  (*f)(0, bin2hex(c>>shift));
1034  shift -= 4;
1035  }
1036  }else{
1037  shift -= 4;
1038  }
1039  }
1040  return;
1041 }
1042 
1043 static void
1045 {
1046  (*oconv)(0, '&');
1047  (*oconv)(0, '#');
1048  c &= VALUE_MASK;
1049  if(c >= NKF_INT32_C(1000000))
1050  (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1051  if(c >= NKF_INT32_C(100000))
1052  (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1053  if(c >= 10000)
1054  (*oconv)(0, 0x30+(c/10000 )%10);
1055  if(c >= 1000)
1056  (*oconv)(0, 0x30+(c/1000 )%10);
1057  if(c >= 100)
1058  (*oconv)(0, 0x30+(c/100 )%10);
1059  if(c >= 10)
1060  (*oconv)(0, 0x30+(c/10 )%10);
1061  if(c >= 0)
1062  (*oconv)(0, 0x30+ c %10);
1063  (*oconv)(0, ';');
1064  return;
1065 }
1066 
1067 static void
1069 {
1070  (*oconv)(0, '&');
1071  (*oconv)(0, '#');
1072  (*oconv)(0, 'x');
1074  (*oconv)(0, ';');
1075  return;
1076 }
1077 
1078 static void
1080 {
1081  (*oconv)(0, '\\');
1082  c &= VALUE_MASK;
1083  if(!nkf_char_unicode_bmp_p(c)){
1084  (*oconv)(0, 'U');
1085  (*oconv)(0, '0');
1086  (*oconv)(0, '0');
1087  (*oconv)(0, bin2hex(c>>20));
1088  (*oconv)(0, bin2hex(c>>16));
1089  }else{
1090  (*oconv)(0, 'u');
1091  }
1092  (*oconv)(0, bin2hex(c>>12));
1093  (*oconv)(0, bin2hex(c>> 8));
1094  (*oconv)(0, bin2hex(c>> 4));
1095  (*oconv)(0, bin2hex(c ));
1096  return;
1097 }
1098 
1099 static void
1101 {
1102  (*oconv)(0, '\\');
1103  (*oconv)(0, 'x');
1104  (*oconv)(0, '{');
1106  (*oconv)(0, '}');
1107  return;
1108 }
1109 
1110 static void
1112 {
1113  c = unicode_subchar;
1114  (*oconv)((c>>8)&0xFF, c&0xFF);
1115  return;
1116 }
1117 #endif
1118 
1119 static const struct {
1120  const char *name;
1121  const char *alias;
1122 } long_option[] = {
1123  {"ic=", ""},
1124  {"oc=", ""},
1125  {"base64","jMB"},
1126  {"euc","e"},
1127  {"euc-input","E"},
1128  {"fj","jm"},
1129  {"help",""},
1130  {"jis","j"},
1131  {"jis-input","J"},
1132  {"mac","sLm"},
1133  {"mime","jM"},
1134  {"mime-input","m"},
1135  {"msdos","sLw"},
1136  {"sjis","s"},
1137  {"sjis-input","S"},
1138  {"unix","eLu"},
1139  {"version","v"},
1140  {"windows","sLw"},
1141  {"hiragana","h1"},
1142  {"katakana","h2"},
1143  {"katakana-hiragana","h3"},
1144  {"guess=", ""},
1145  {"guess", "g2"},
1146  {"cp932", ""},
1147  {"no-cp932", ""},
1148 #ifdef X0212_ENABLE
1149  {"x0212", ""},
1150 #endif
1151 #ifdef UTF8_OUTPUT_ENABLE
1152  {"utf8", "w"},
1153  {"utf16", "w16"},
1154  {"ms-ucs-map", ""},
1155  {"fb-skip", ""},
1156  {"fb-html", ""},
1157  {"fb-xml", ""},
1158  {"fb-perl", ""},
1159  {"fb-java", ""},
1160  {"fb-subchar", ""},
1161  {"fb-subchar=", ""},
1162 #endif
1163 #ifdef UTF8_INPUT_ENABLE
1164  {"utf8-input", "W"},
1165  {"utf16-input", "W16"},
1166  {"no-cp932ext", ""},
1167  {"no-best-fit-chars",""},
1168 #endif
1169 #ifdef UNICODE_NORMALIZATION
1170  {"utf8mac-input", ""},
1171 #endif
1172 #ifdef OVERWRITE
1173  {"overwrite", ""},
1174  {"overwrite=", ""},
1175  {"in-place", ""},
1176  {"in-place=", ""},
1177 #endif
1178 #ifdef INPUT_OPTION
1179  {"cap-input", ""},
1180  {"url-input", ""},
1181 #endif
1182 #ifdef NUMCHAR_OPTION
1183  {"numchar-input", ""},
1184 #endif
1185 #ifdef CHECK_OPTION
1186  {"no-output", ""},
1187  {"debug", ""},
1188 #endif
1189 #ifdef SHIFTJIS_CP932
1190  {"cp932inv", ""},
1191 #endif
1192 #ifdef EXEC_IO
1193  {"exec-in", ""},
1194  {"exec-out", ""},
1195 #endif
1196  {"prefix=", ""},
1197 };
1198 
1199 static void
1201 {
1202  switch (nkf_enc_to_index(enc)) {
1203  case ISO_8859_1:
1204  iso8859_f = TRUE;
1205  break;
1206  case CP50220:
1207  case CP50221:
1208  case CP50222:
1209  x0201_f = TRUE;
1210 #ifdef SHIFTJIS_CP932
1211  cp51932_f = TRUE;
1212 #endif
1213 #ifdef UTF8_OUTPUT_ENABLE
1215 #endif
1216  break;
1217  case ISO_2022_JP_1:
1218  x0212_f = TRUE;
1219  break;
1220  case ISO_2022_JP_3:
1221  x0212_f = TRUE;
1222  x0213_f = TRUE;
1223  break;
1224  case ISO_2022_JP_2004:
1225  x0212_f = TRUE;
1226  x0213_f = TRUE;
1227  break;
1228  case SHIFT_JIS:
1229  break;
1230  case WINDOWS_31J:
1231  x0201_f = TRUE;
1232 #ifdef SHIFTJIS_CP932
1233  cp51932_f = TRUE;
1234 #endif
1235 #ifdef UTF8_OUTPUT_ENABLE
1237 #endif
1238  break;
1239  break;
1240  case CP10001:
1241 #ifdef SHIFTJIS_CP932
1242  cp51932_f = TRUE;
1243 #endif
1244 #ifdef UTF8_OUTPUT_ENABLE
1246 #endif
1247  break;
1248  case EUC_JP:
1249  break;
1250  case EUCJP_NKF:
1251  break;
1252  case CP51932:
1253  x0201_f = TRUE;
1254 #ifdef SHIFTJIS_CP932
1255  cp51932_f = TRUE;
1256 #endif
1257 #ifdef UTF8_OUTPUT_ENABLE
1259 #endif
1260  break;
1261  case EUCJP_MS:
1262 #ifdef SHIFTJIS_CP932
1263  cp51932_f = FALSE;
1264 #endif
1265 #ifdef UTF8_OUTPUT_ENABLE
1267 #endif
1268  break;
1269  case EUCJP_ASCII:
1270 #ifdef SHIFTJIS_CP932
1271  cp51932_f = FALSE;
1272 #endif
1273 #ifdef UTF8_OUTPUT_ENABLE
1275 #endif
1276  break;
1277  case SHIFT_JISX0213:
1278  case SHIFT_JIS_2004:
1279  x0213_f = TRUE;
1280 #ifdef SHIFTJIS_CP932
1281  cp51932_f = FALSE;
1282 #endif
1283  break;
1284  case EUC_JISX0213:
1285  case EUC_JIS_2004:
1286  x0213_f = TRUE;
1287 #ifdef SHIFTJIS_CP932
1288  cp51932_f = FALSE;
1289 #endif
1290  break;
1291 #ifdef UTF8_INPUT_ENABLE
1292 #ifdef UNICODE_NORMALIZATION
1293  case UTF8_MAC:
1294  nfc_f = TRUE;
1295  break;
1296 #endif
1297  case UTF_16:
1298  case UTF_16BE:
1299  case UTF_16BE_BOM:
1301  break;
1302  case UTF_16LE:
1303  case UTF_16LE_BOM:
1305  break;
1306  case UTF_32:
1307  case UTF_32BE:
1308  case UTF_32BE_BOM:
1310  break;
1311  case UTF_32LE:
1312  case UTF_32LE_BOM:
1314  break;
1315 #endif
1316  }
1317 }
1318 
1319 static void
1321 {
1322  switch (nkf_enc_to_index(enc)) {
1323  case CP50220:
1324  x0201_f = TRUE;
1325 #ifdef SHIFTJIS_CP932
1326  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1327 #endif
1328 #ifdef UTF8_OUTPUT_ENABLE
1330 #endif
1331  break;
1332  case CP50221:
1333  x0201_f = TRUE;
1334 #ifdef SHIFTJIS_CP932
1335  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1336 #endif
1337 #ifdef UTF8_OUTPUT_ENABLE
1339 #endif
1340  break;
1341  case ISO_2022_JP:
1342 #ifdef SHIFTJIS_CP932
1343  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1344 #endif
1345  break;
1346  case ISO_2022_JP_1:
1347  x0212_f = TRUE;
1348 #ifdef SHIFTJIS_CP932
1349  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1350 #endif
1351  break;
1352  case ISO_2022_JP_3:
1353  x0212_f = TRUE;
1354  x0213_f = TRUE;
1355 #ifdef SHIFTJIS_CP932
1356  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1357 #endif
1358  break;
1359  case SHIFT_JIS:
1360  break;
1361  case WINDOWS_31J:
1362  x0201_f = TRUE;
1363 #ifdef UTF8_OUTPUT_ENABLE
1365 #endif
1366  break;
1367  case CP10001:
1368 #ifdef UTF8_OUTPUT_ENABLE
1370 #endif
1371  break;
1372  case EUC_JP:
1373  x0212_f = TRUE;
1374 #ifdef SHIFTJIS_CP932
1375  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1376 #endif
1377 #ifdef UTF8_OUTPUT_ENABLE
1379 #endif
1380  break;
1381  case EUCJP_NKF:
1382  x0212_f = FALSE;
1383 #ifdef SHIFTJIS_CP932
1384  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1385 #endif
1386 #ifdef UTF8_OUTPUT_ENABLE
1388 #endif
1389  break;
1390  case CP51932:
1391  x0201_f = TRUE;
1392 #ifdef SHIFTJIS_CP932
1393  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1394 #endif
1395 #ifdef UTF8_OUTPUT_ENABLE
1397 #endif
1398  break;
1399  case EUCJP_MS:
1400  x0212_f = TRUE;
1401 #ifdef UTF8_OUTPUT_ENABLE
1403 #endif
1404  break;
1405  case EUCJP_ASCII:
1406  x0212_f = TRUE;
1407 #ifdef UTF8_OUTPUT_ENABLE
1409 #endif
1410  break;
1411  case SHIFT_JISX0213:
1412  case SHIFT_JIS_2004:
1413  x0213_f = TRUE;
1414 #ifdef SHIFTJIS_CP932
1415  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1416 #endif
1417  break;
1418  case EUC_JISX0213:
1419  case EUC_JIS_2004:
1420  x0212_f = TRUE;
1421  x0213_f = TRUE;
1422 #ifdef SHIFTJIS_CP932
1423  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1424 #endif
1425  break;
1426 #ifdef UTF8_OUTPUT_ENABLE
1427  case UTF_8_BOM:
1428  output_bom_f = TRUE;
1429  break;
1430  case UTF_16:
1431  case UTF_16BE_BOM:
1432  output_bom_f = TRUE;
1433  break;
1434  case UTF_16LE:
1436  output_bom_f = FALSE;
1437  break;
1438  case UTF_16LE_BOM:
1440  output_bom_f = TRUE;
1441  break;
1442  case UTF_32:
1443  case UTF_32BE_BOM:
1444  output_bom_f = TRUE;
1445  break;
1446  case UTF_32LE:
1448  output_bom_f = FALSE;
1449  break;
1450  case UTF_32LE_BOM:
1452  output_bom_f = TRUE;
1453  break;
1454 #endif
1455  }
1456 }
1457 
1458 static struct input_code*
1460 {
1461  if (iconv_func){
1462  struct input_code *p = input_code_list;
1463  while (p->name){
1464  if (iconv_func == p->iconv_func){
1465  return p;
1466  }
1467  p++;
1468  }
1469  }
1470  return 0;
1471 }
1472 
1473 static void
1475 {
1476 #ifdef INPUT_CODE_FIX
1477  if (f || !input_encoding)
1478 #endif
1479  if (estab_f != f){
1480  estab_f = f;
1481  }
1482 
1483  if (iconv_func
1484 #ifdef INPUT_CODE_FIX
1485  && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1486 #endif
1487  ){
1488  iconv = iconv_func;
1489  }
1490 #ifdef CHECK_OPTION
1491  if (estab_f && iconv_for_check != iconv){
1493  if (p){
1494  set_input_codename(p->name);
1495  debug(p->name);
1496  }
1498  }
1499 #endif
1500 }
1501 
1502 #ifdef X0212_ENABLE
1503 static nkf_char
1505 {
1506  nkf_char ret = c;
1507  c &= 0x7f;
1508  if (is_eucg3(ret)){
1509  if (0x75 <= c && c <= 0x7f){
1510  ret = c + (0x109 - 0x75);
1511  }
1512  }else{
1513  if (0x75 <= c && c <= 0x7f){
1514  ret = c + (0x113 - 0x75);
1515  }
1516  }
1517  return ret;
1518 }
1519 
1520 
1521 static nkf_char
1523 {
1524  nkf_char ret = c;
1525  if (0x7f <= c && c <= 0x88){
1526  ret = c + (0x75 - 0x7f);
1527  }else if (0x89 <= c && c <= 0x92){
1528  ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1529  }
1530  return ret;
1531 }
1532 #endif /* X0212_ENABLE */
1533 
1534 static nkf_char
1536 {
1537  nkf_char ndx;
1538  if (is_eucg3(c2)){
1539  ndx = c2 & 0x7f;
1540  if (x0213_f){
1541  if((0x21 <= ndx && ndx <= 0x2F)){
1542  if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1543  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1544  return 0;
1545  }else if(0x6E <= ndx && ndx <= 0x7E){
1546  if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1547  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1548  return 0;
1549  }
1550  return 1;
1551  }
1552 #ifdef X0212_ENABLE
1553  else if(nkf_isgraph(ndx)){
1554  nkf_char val = 0;
1555  const unsigned short *ptr;
1556  ptr = x0212_shiftjis[ndx - 0x21];
1557  if (ptr){
1558  val = ptr[(c1 & 0x7f) - 0x21];
1559  }
1560  if (val){
1561  c2 = val >> 8;
1562  c1 = val & 0xff;
1563  if (p2) *p2 = c2;
1564  if (p1) *p1 = c1;
1565  return 0;
1566  }
1567  c2 = x0212_shift(c2);
1568  }
1569 #endif /* X0212_ENABLE */
1570  }
1571  if(0x7F < c2) return 1;
1572  if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1573  if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1574  return 0;
1575 }
1576 
1577 static nkf_char
1579 {
1580 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1581  nkf_char val;
1582 #endif
1583  static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1584  if (0xFC < c1) return 1;
1585 #ifdef SHIFTJIS_CP932
1586  if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1587  val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1588  if (val){
1589  c2 = val >> 8;
1590  c1 = val & 0xff;
1591  }
1592  }
1593  if (cp932inv_f
1594  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1595  val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1596  if (val){
1597  c2 = val >> 8;
1598  c1 = val & 0xff;
1599  }
1600  }
1601 #endif /* SHIFTJIS_CP932 */
1602 #ifdef X0212_ENABLE
1603  if (!x0213_f && is_ibmext_in_sjis(c2)){
1604  val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1605  if (val){
1606  if (val > 0x7FFF){
1607  c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1608  c1 = val & 0xff;
1609  }else{
1610  c2 = val >> 8;
1611  c1 = val & 0xff;
1612  }
1613  if (p2) *p2 = c2;
1614  if (p1) *p1 = c1;
1615  return 0;
1616  }
1617  }
1618 #endif
1619  if(c2 >= 0x80){
1620  if(x0213_f && c2 >= 0xF0){
1621  if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1622  c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1623  }else{ /* 78<=k<=94 */
1624  c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1625  if (0x9E < c1) c2++;
1626  }
1627  }else{
1628 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1629 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1630  c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1631  if (0x9E < c1) c2++;
1632  }
1633  if (c1 < 0x9F)
1634  c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1635  else {
1636  c1 = c1 - 0x7E;
1637  }
1638  }
1639 
1640 #ifdef X0212_ENABLE
1641  c2 = x0212_unshift(c2);
1642 #endif
1643  if (p2) *p2 = c2;
1644  if (p1) *p1 = c1;
1645  return 0;
1646 }
1647 
1648 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1649 static void
1651 {
1652  val &= VALUE_MASK;
1653  if (val < 0x80){
1654  *p1 = val;
1655  *p2 = 0;
1656  *p3 = 0;
1657  *p4 = 0;
1658  }else if (val < 0x800){
1659  *p1 = 0xc0 | (val >> 6);
1660  *p2 = 0x80 | (val & 0x3f);
1661  *p3 = 0;
1662  *p4 = 0;
1663  } else if (nkf_char_unicode_bmp_p(val)) {
1664  *p1 = 0xe0 | (val >> 12);
1665  *p2 = 0x80 | ((val >> 6) & 0x3f);
1666  *p3 = 0x80 | ( val & 0x3f);
1667  *p4 = 0;
1668  } else if (nkf_char_unicode_value_p(val)) {
1669  *p1 = 0xf0 | (val >> 18);
1670  *p2 = 0x80 | ((val >> 12) & 0x3f);
1671  *p3 = 0x80 | ((val >> 6) & 0x3f);
1672  *p4 = 0x80 | ( val & 0x3f);
1673  } else {
1674  *p1 = 0;
1675  *p2 = 0;
1676  *p3 = 0;
1677  *p4 = 0;
1678  }
1679 }
1680 
1681 static nkf_char
1683 {
1684  nkf_char wc;
1685  if (c1 <= 0x7F) {
1686  /* single byte */
1687  wc = c1;
1688  }
1689  else if (c1 <= 0xC3) {
1690  /* trail byte or invalid */
1691  return -1;
1692  }
1693  else if (c1 <= 0xDF) {
1694  /* 2 bytes */
1695  wc = (c1 & 0x1F) << 6;
1696  wc |= (c2 & 0x3F);
1697  }
1698  else if (c1 <= 0xEF) {
1699  /* 3 bytes */
1700  wc = (c1 & 0x0F) << 12;
1701  wc |= (c2 & 0x3F) << 6;
1702  wc |= (c3 & 0x3F);
1703  }
1704  else if (c2 <= 0xF4) {
1705  /* 4 bytes */
1706  wc = (c1 & 0x0F) << 18;
1707  wc |= (c2 & 0x3F) << 12;
1708  wc |= (c3 & 0x3F) << 6;
1709  wc |= (c4 & 0x3F);
1710  }
1711  else {
1712  return -1;
1713  }
1714  return wc;
1715 }
1716 #endif
1717 
1718 #ifdef UTF8_INPUT_ENABLE
1719 static int
1721  const unsigned short *const *pp, nkf_char psize,
1722  nkf_char *p2, nkf_char *p1)
1723 {
1724  nkf_char c2;
1725  const unsigned short *p;
1726  unsigned short val;
1727 
1728  if (pp == 0) return 1;
1729 
1730  c1 -= 0x80;
1731  if (c1 < 0 || psize <= c1) return 1;
1732  p = pp[c1];
1733  if (p == 0) return 1;
1734 
1735  c0 -= 0x80;
1736  if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1737  val = p[c0];
1738  if (val == 0) return 1;
1739  if (no_cp932ext_f && (
1740  (val>>8) == 0x2D || /* NEC special characters */
1741  val > NKF_INT32_C(0xF300) /* IBM extended characters */
1742  )) return 1;
1743 
1744  c2 = val >> 8;
1745  if (val > 0x7FFF){
1746  c2 &= 0x7f;
1747  c2 |= PREFIX_EUCG3;
1748  }
1749  if (c2 == SO) c2 = JIS_X_0201_1976_K;
1750  c1 = val & 0xFF;
1751  if (p2) *p2 = c2;
1752  if (p1) *p1 = c1;
1753  return 0;
1754 }
1755 
1756 static int
1758 {
1759  const unsigned short *const *pp;
1760  const unsigned short *const *const *ppp;
1761  static const char no_best_fit_chars_table_C2[] =
1762  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1763  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1764  1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1765  0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1766  static const char no_best_fit_chars_table_C2_ms[] =
1767  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1768  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1769  1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1770  0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1771  static const char no_best_fit_chars_table_932_C2[] =
1772  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1773  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1774  1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1775  0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1776  static const char no_best_fit_chars_table_932_C3[] =
1777  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1778  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1779  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1780  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1781  nkf_char ret = 0;
1782 
1783  if(c2 < 0x80){
1784  *p2 = 0;
1785  *p1 = c2;
1786  }else if(c2 < 0xe0){
1787  if(no_best_fit_chars_f){
1788  if(ms_ucs_map_f == UCS_MAP_CP932){
1789  switch(c2){
1790  case 0xC2:
1791  if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1792  break;
1793  case 0xC3:
1794  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1795  break;
1796  }
1797  }else if(!cp932inv_f){
1798  switch(c2){
1799  case 0xC2:
1800  if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1801  break;
1802  case 0xC3:
1803  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1804  break;
1805  }
1806  }else if(ms_ucs_map_f == UCS_MAP_MS){
1807  if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1808  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1809  switch(c2){
1810  case 0xC2:
1811  switch(c1){
1812  case 0xA2:
1813  case 0xA3:
1814  case 0xA5:
1815  case 0xA6:
1816  case 0xAC:
1817  case 0xAF:
1818  case 0xB8:
1819  return 1;
1820  }
1821  break;
1822  }
1823  }
1824  }
1825  pp =
1830  ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1831  }else if(c0 < 0xF0){
1832  if(no_best_fit_chars_f){
1833  if(ms_ucs_map_f == UCS_MAP_CP932){
1834  if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1835  }else if(ms_ucs_map_f == UCS_MAP_MS){
1836  switch(c2){
1837  case 0xE2:
1838  switch(c1){
1839  case 0x80:
1840  if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1841  break;
1842  case 0x88:
1843  if(c0 == 0x92) return 1;
1844  break;
1845  }
1846  break;
1847  case 0xE3:
1848  if(c1 == 0x80 || c0 == 0x9C) return 1;
1849  break;
1850  }
1851  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1852  switch(c2){
1853  case 0xE3:
1854  switch(c1){
1855  case 0x82:
1856  if(c0 == 0x94) return 1;
1857  break;
1858  case 0x83:
1859  if(c0 == 0xBB) return 1;
1860  break;
1861  }
1862  break;
1863  }
1864  }else{
1865  switch(c2){
1866  case 0xE2:
1867  switch(c1){
1868  case 0x80:
1869  if(c0 == 0x95) return 1;
1870  break;
1871  case 0x88:
1872  if(c0 == 0xA5) return 1;
1873  break;
1874  }
1875  break;
1876  case 0xEF:
1877  switch(c1){
1878  case 0xBC:
1879  if(c0 == 0x8D) return 1;
1880  break;
1881  case 0xBD:
1882  if(c0 == 0x9E && !cp932inv_f) return 1;
1883  break;
1884  case 0xBF:
1885  if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1886  break;
1887  }
1888  break;
1889  }
1890  }
1891  }
1892  ppp =
1897  ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1898  }else return -1;
1899 #ifdef SHIFTJIS_CP932
1900  if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1901  nkf_char s2, s1;
1902  if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1903  s2e_conv(s2, s1, p2, p1);
1904  }else{
1905  ret = 1;
1906  }
1907  }
1908 #endif
1909  return ret;
1910 }
1911 
1912 #ifdef UTF8_OUTPUT_ENABLE
1913 static nkf_char
1915 {
1916  const unsigned short *p;
1917 
1918  if (c2 == JIS_X_0201_1976_K) {
1919  if (ms_ucs_map_f == UCS_MAP_CP10001) {
1920  switch (c1) {
1921  case 0x20:
1922  return 0xA0;
1923  case 0x7D:
1924  return 0xA9;
1925  }
1926  }
1927  p = euc_to_utf8_1byte;
1928 #ifdef X0212_ENABLE
1929  } else if (is_eucg3(c2)){
1930  if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1931  return 0xA6;
1932  }
1933  c2 = (c2&0x7f) - 0x21;
1934  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1936  else
1937  return 0;
1938 #endif
1939  } else {
1940  c2 &= 0x7f;
1941  c2 = (c2&0x7f) - 0x21;
1942  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1943  p =
1947  else
1948  return 0;
1949  }
1950  if (!p) return 0;
1951  c1 = (c1 & 0x7f) - 0x21;
1952  if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1953  return p[c1];
1954  return 0;
1955 }
1956 #endif
1957 
1958 static nkf_char
1960 {
1961  nkf_char ret = 0;
1962 
1963  if (!c1){
1964  *p2 = 0;
1965  *p1 = c2;
1966  }else if (0xc0 <= c2 && c2 <= 0xef) {
1967  ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1968 #ifdef NUMCHAR_OPTION
1969  if (ret > 0){
1970  if (p2) *p2 = 0;
1971  if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1972  ret = 0;
1973  }
1974 #endif
1975  }
1976  return ret;
1977 }
1978 
1979 #ifdef UTF8_INPUT_ENABLE
1980 static nkf_char
1982 {
1983  nkf_char c1, c2, c3, c4;
1984  nkf_char ret = 0;
1985  val &= VALUE_MASK;
1986  if (val < 0x80) {
1987  *p2 = 0;
1988  *p1 = val;
1989  }
1990  else if (nkf_char_unicode_bmp_p(val)){
1991  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
1992  ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
1993  if (ret > 0){
1994  *p2 = 0;
1996  ret = 0;
1997  }
1998  }
1999  else {
2000  *p2 = 0;
2002  }
2003  return ret;
2004 }
2005 #endif
2006 
2007 static nkf_char
2009 {
2010  if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2011  if (iso2022jp_f && !x0201_f) {
2012  c2 = GETA1; c1 = GETA2;
2013  } else {
2015  c1 &= 0x7f;
2016  }
2017 #ifdef X0212_ENABLE
2018  }else if (c2 == 0x8f){
2019  if (c0 == 0){
2020  return -1;
2021  }
2022  if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2023  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2024  c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2025  c2 = 0;
2026  } else {
2027  c2 = (c2 << 8) | (c1 & 0x7f);
2028  c1 = c0 & 0x7f;
2029 #ifdef SHIFTJIS_CP932
2030  if (cp51932_f){
2031  nkf_char s2, s1;
2032  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2033  s2e_conv(s2, s1, &c2, &c1);
2034  if (c2 < 0x100){
2035  c1 &= 0x7f;
2036  c2 &= 0x7f;
2037  }
2038  }
2039  }
2040 #endif /* SHIFTJIS_CP932 */
2041  }
2042 #endif /* X0212_ENABLE */
2043  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2044  /* NOP */
2045  } else {
2046  if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2047  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2048  c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2049  c2 = 0;
2050  } else {
2051  c1 &= 0x7f;
2052  c2 &= 0x7f;
2053 #ifdef SHIFTJIS_CP932
2054  if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2055  nkf_char s2, s1;
2056  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2057  s2e_conv(s2, s1, &c2, &c1);
2058  if (c2 < 0x100){
2059  c1 &= 0x7f;
2060  c2 &= 0x7f;
2061  }
2062  }
2063  }
2064 #endif /* SHIFTJIS_CP932 */
2065  }
2066  }
2067  (*oconv)(c2, c1);
2068  return 0;
2069 }
2070 
2071 static nkf_char
2073 {
2074  if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2075  if (iso2022jp_f && !x0201_f) {
2076  c2 = GETA1; c1 = GETA2;
2077  } else {
2078  c1 &= 0x7f;
2079  }
2080  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2081  /* NOP */
2082  } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2083  /* CP932 UDC */
2084  if(c1 == 0x7F) return 0;
2085  c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2086  c2 = 0;
2087  } else {
2088  nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2089  if (ret) return ret;
2090  }
2091  (*oconv)(c2, c1);
2092  return 0;
2093 }
2094 
2095 static nkf_char
2097 {
2098  nkf_char ret = 0, c4 = 0;
2099  static const char w_iconv_utf8_1st_byte[] =
2100  { /* 0xC0 - 0xFF */
2101  20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2102  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2103  30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2104  40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2105 
2106  if (c3 > 0xFF) {
2107  c4 = c3 & 0xFF;
2108  c3 >>= 8;
2109  }
2110 
2111  if (c1 < 0 || 0xff < c1) {
2112  }else if (c1 == 0) { /* 0 : 1 byte*/
2113  c3 = 0;
2114  } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2115  return 0;
2116  } else{
2117  switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2118  case 21:
2119  if (c2 < 0x80 || 0xBF < c2) return 0;
2120  break;
2121  case 30:
2122  if (c3 == 0) return -1;
2123  if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2124  return 0;
2125  break;
2126  case 31:
2127  case 33:
2128  if (c3 == 0) return -1;
2129  if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2130  return 0;
2131  break;
2132  case 32:
2133  if (c3 == 0) return -1;
2134  if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2135  return 0;
2136  break;
2137  case 40:
2138  if (c3 == 0) return -2;
2139  if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2140  return 0;
2141  break;
2142  case 41:
2143  if (c3 == 0) return -2;
2144  if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2145  return 0;
2146  break;
2147  case 42:
2148  if (c3 == 0) return -2;
2149  if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2150  return 0;
2151  break;
2152  default:
2153  return 0;
2154  break;
2155  }
2156  }
2157  if (c1 == 0 || c1 == EOF){
2158  } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2160  c1 = 0;
2161  } else {
2162  ret = w2e_conv(c1, c2, c3, &c1, &c2);
2163  }
2164  if (ret == 0){
2165  (*oconv)(c1, c2);
2166  }
2167  return ret;
2168 }
2169 
2170 #define NKF_ICONV_INVALID_CODE_RANGE -13
2171 static size_t
2173 {
2174  nkf_char c1, c2;
2175  int ret = 0;
2176 
2177  if (wc < 0x80) {
2178  c2 = 0;
2179  c1 = wc;
2180  }else if ((wc>>11) == 27) {
2181  /* unpaired surrogate */
2183  }else if (wc < 0xFFFF) {
2184  ret = w16e_conv(wc, &c2, &c1);
2185  if (ret) return ret;
2186  }else if (wc < 0x10FFFF) {
2187  c2 = 0;
2188  c1 = nkf_char_unicode_new(wc);
2189  } else {
2191  }
2192  (*oconv)(c2, c1);
2193  return 0;
2194 }
2195 
2196 #define NKF_ICONV_NEED_ONE_MORE_BYTE -1
2197 #define NKF_ICONV_NEED_TWO_MORE_BYTES -2
2198 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2199 static size_t
2201 {
2202  nkf_char wc;
2203 
2204  if (c1 == EOF) {
2205  (*oconv)(EOF, 0);
2206  return 0;
2207  }
2208 
2209  if (input_endian == ENDIAN_BIG) {
2210  if (0xD8 <= c1 && c1 <= 0xDB) {
2211  if (0xDC <= c3 && c3 <= 0xDF) {
2212  wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2213  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2214  } else {
2215  wc = c1 << 8 | c2;
2216  }
2217  } else {
2218  if (0xD8 <= c2 && c2 <= 0xDB) {
2219  if (0xDC <= c4 && c4 <= 0xDF) {
2220  wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2221  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2222  } else {
2223  wc = c2 << 8 | c1;
2224  }
2225  }
2226 
2227  return (*unicode_iconv)(wc);
2228 }
2229 
2230 static nkf_char
2232 {
2233  return 0;
2234 }
2235 
2236 static nkf_char
2238 {
2239  return 0;
2240 }
2241 
2242 static size_t
2244 {
2245  nkf_char wc;
2246 
2247  if (c1 == EOF) {
2248  (*oconv)(EOF, 0);
2249  return 0;
2250  }
2251 
2252  switch(input_endian){
2253  case ENDIAN_BIG:
2254  wc = c2 << 16 | c3 << 8 | c4;
2255  break;
2256  case ENDIAN_LITTLE:
2257  wc = c3 << 16 | c2 << 8 | c1;
2258  break;
2259  case ENDIAN_2143:
2260  wc = c1 << 16 | c4 << 8 | c3;
2261  break;
2262  case ENDIAN_3412:
2263  wc = c4 << 16 | c1 << 8 | c2;
2264  break;
2265  default:
2267  }
2268 
2269  return (*unicode_iconv)(wc);
2270 }
2271 #endif
2272 
2273 #define output_ascii_escape_sequence(mode) do { \
2274  if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2275  (*o_putc)(ESC); \
2276  (*o_putc)('('); \
2277  (*o_putc)(ascii_intro); \
2278  output_mode = mode; \
2279  } \
2280  } while (0)
2281 
2282 static void
2284 {
2285  if (output_mode == mode)
2286  return;
2287  switch(mode) {
2288  case ISO_8859_1:
2289  (*o_putc)(ESC);
2290  (*o_putc)('.');
2291  (*o_putc)('A');
2292  break;
2293  case JIS_X_0201_1976_K:
2294  (*o_putc)(ESC);
2295  (*o_putc)('(');
2296  (*o_putc)('I');
2297  break;
2298  case JIS_X_0208:
2299  (*o_putc)(ESC);
2300  (*o_putc)('$');
2301  (*o_putc)(kanji_intro);
2302  break;
2303  case JIS_X_0212:
2304  (*o_putc)(ESC);
2305  (*o_putc)('$');
2306  (*o_putc)('(');
2307  (*o_putc)('D');
2308  break;
2309  case JIS_X_0213_1:
2310  (*o_putc)(ESC);
2311  (*o_putc)('$');
2312  (*o_putc)('(');
2313  (*o_putc)('Q');
2314  break;
2315  case JIS_X_0213_2:
2316  (*o_putc)(ESC);
2317  (*o_putc)('$');
2318  (*o_putc)('(');
2319  (*o_putc)('P');
2320  break;
2321  }
2322  output_mode = mode;
2323 }
2324 
2325 static void
2327 {
2328 #ifdef NUMCHAR_OPTION
2329  if (c2 == 0 && nkf_char_unicode_p(c1)){
2330  w16e_conv(c1, &c2, &c1);
2331  if (c2 == 0 && nkf_char_unicode_p(c1)){
2332  c2 = c1 & VALUE_MASK;
2333  if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2334  /* CP5022x UDC */
2335  c1 &= 0xFFF;
2336  c2 = 0x7F + c1 / 94;
2337  c1 = 0x21 + c1 % 94;
2338  } else {
2339  if (encode_fallback) (*encode_fallback)(c1);
2340  return;
2341  }
2342  }
2343  }
2344 #endif
2345  if (c2 == 0) {
2347  (*o_putc)(c1);
2348  }
2349  else if (c2 == EOF) {
2351  (*o_putc)(EOF);
2352  }
2353  else if (c2 == ISO_8859_1) {
2355  (*o_putc)(c1|0x80);
2356  }
2357  else if (c2 == JIS_X_0201_1976_K) {
2359  (*o_putc)(c1);
2360 #ifdef X0212_ENABLE
2361  } else if (is_eucg3(c2)){
2363  (*o_putc)(c2 & 0x7f);
2364  (*o_putc)(c1);
2365 #endif
2366  } else {
2367  if(ms_ucs_map_f
2368  ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2369  : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2371  (*o_putc)(c2);
2372  (*o_putc)(c1);
2373  }
2374 }
2375 
2376 static void
2378 {
2379  if (c2 == 0 && nkf_char_unicode_p(c1)){
2380  w16e_conv(c1, &c2, &c1);
2381  if (c2 == 0 && nkf_char_unicode_p(c1)){
2382  c2 = c1 & VALUE_MASK;
2383  if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2384  /* eucJP-ms UDC */
2385  c1 &= 0xFFF;
2386  c2 = c1 / 94;
2387  c2 += c2 < 10 ? 0x75 : 0x8FEB;
2388  c1 = 0x21 + c1 % 94;
2389  if (is_eucg3(c2)){
2390  (*o_putc)(0x8f);
2391  (*o_putc)((c2 & 0x7f) | 0x080);
2392  (*o_putc)(c1 | 0x080);
2393  }else{
2394  (*o_putc)((c2 & 0x7f) | 0x080);
2395  (*o_putc)(c1 | 0x080);
2396  }
2397  return;
2398  } else {
2399  if (encode_fallback) (*encode_fallback)(c1);
2400  return;
2401  }
2402  }
2403  }
2404 
2405  if (c2 == EOF) {
2406  (*o_putc)(EOF);
2407  } else if (c2 == 0) {
2408  output_mode = ASCII;
2409  (*o_putc)(c1);
2410  } else if (c2 == JIS_X_0201_1976_K) {
2411  output_mode = EUC_JP;
2412  (*o_putc)(SS2); (*o_putc)(c1|0x80);
2413  } else if (c2 == ISO_8859_1) {
2415  (*o_putc)(c1 | 0x080);
2416 #ifdef X0212_ENABLE
2417  } else if (is_eucg3(c2)){
2418  output_mode = EUC_JP;
2419 #ifdef SHIFTJIS_CP932
2420  if (!cp932inv_f){
2421  nkf_char s2, s1;
2422  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2423  s2e_conv(s2, s1, &c2, &c1);
2424  }
2425  }
2426 #endif
2427  if (c2 == 0) {
2428  output_mode = ASCII;
2429  (*o_putc)(c1);
2430  }else if (is_eucg3(c2)){
2431  if (x0212_f){
2432  (*o_putc)(0x8f);
2433  (*o_putc)((c2 & 0x7f) | 0x080);
2434  (*o_putc)(c1 | 0x080);
2435  }
2436  }else{
2437  (*o_putc)((c2 & 0x7f) | 0x080);
2438  (*o_putc)(c1 | 0x080);
2439  }
2440 #endif
2441  } else {
2442  if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2443  set_iconv(FALSE, 0);
2444  return; /* too late to rescue this char */
2445  }
2446  output_mode = EUC_JP;
2447  (*o_putc)(c2 | 0x080);
2448  (*o_putc)(c1 | 0x080);
2449  }
2450 }
2451 
2452 static void
2454 {
2455 #ifdef NUMCHAR_OPTION
2456  if (c2 == 0 && nkf_char_unicode_p(c1)){
2457  w16e_conv(c1, &c2, &c1);
2458  if (c2 == 0 && nkf_char_unicode_p(c1)){
2459  c2 = c1 & VALUE_MASK;
2460  if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2461  /* CP932 UDC */
2462  c1 &= 0xFFF;
2463  c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2464  c1 = c1 % 188;
2465  c1 += 0x40 + (c1 > 0x3e);
2466  (*o_putc)(c2);
2467  (*o_putc)(c1);
2468  return;
2469  } else {
2470  if(encode_fallback)(*encode_fallback)(c1);
2471  return;
2472  }
2473  }
2474  }
2475 #endif
2476  if (c2 == EOF) {
2477  (*o_putc)(EOF);
2478  return;
2479  } else if (c2 == 0) {
2480  output_mode = ASCII;
2481  (*o_putc)(c1);
2482  } else if (c2 == JIS_X_0201_1976_K) {
2484  (*o_putc)(c1|0x80);
2485  } else if (c2 == ISO_8859_1) {
2487  (*o_putc)(c1 | 0x080);
2488 #ifdef X0212_ENABLE
2489  } else if (is_eucg3(c2)){
2491  if (e2s_conv(c2, c1, &c2, &c1) == 0){
2492  (*o_putc)(c2);
2493  (*o_putc)(c1);
2494  }
2495 #endif
2496  } else {
2497  if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2498  set_iconv(FALSE, 0);
2499  return; /* too late to rescue this char */
2500  }
2502  e2s_conv(c2, c1, &c2, &c1);
2503 
2504 #ifdef SHIFTJIS_CP932
2505  if (cp932inv_f
2506  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2507  nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2508  if (c){
2509  c2 = c >> 8;
2510  c1 = c & 0xff;
2511  }
2512  }
2513 #endif /* SHIFTJIS_CP932 */
2514 
2515  (*o_putc)(c2);
2516  if (prefix_table[(unsigned char)c1]){
2517  (*o_putc)(prefix_table[(unsigned char)c1]);
2518  }
2519  (*o_putc)(c1);
2520  }
2521 }
2522 
2523 #ifdef UTF8_OUTPUT_ENABLE
2524 static void
2526 {
2527  nkf_char c3, c4;
2528  nkf_char val;
2529 
2530  if (output_bom_f) {
2531  output_bom_f = FALSE;
2532  (*o_putc)('\357');
2533  (*o_putc)('\273');
2534  (*o_putc)('\277');
2535  }
2536 
2537  if (c2 == EOF) {
2538  (*o_putc)(EOF);
2539  return;
2540  }
2541 
2542  if (c2 == 0 && nkf_char_unicode_p(c1)){
2543  val = c1 & VALUE_MASK;
2544  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2545  (*o_putc)(c1);
2546  if (c2) (*o_putc)(c2);
2547  if (c3) (*o_putc)(c3);
2548  if (c4) (*o_putc)(c4);
2549  return;
2550  }
2551 
2552  if (c2 == 0) {
2553  (*o_putc)(c1);
2554  } else {
2555  val = e2w_conv(c2, c1);
2556  if (val){
2557  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2558  (*o_putc)(c1);
2559  if (c2) (*o_putc)(c2);
2560  if (c3) (*o_putc)(c3);
2561  if (c4) (*o_putc)(c4);
2562  }
2563  }
2564 }
2565 
2566 static void
2568 {
2569  if (output_bom_f) {
2570  output_bom_f = FALSE;
2571  if (output_endian == ENDIAN_LITTLE){
2572  (*o_putc)(0xFF);
2573  (*o_putc)(0xFE);
2574  }else{
2575  (*o_putc)(0xFE);
2576  (*o_putc)(0xFF);
2577  }
2578  }
2579 
2580  if (c2 == EOF) {
2581  (*o_putc)(EOF);
2582  return;
2583  }
2584 
2585  if (c2 == 0 && nkf_char_unicode_p(c1)) {
2586  if (nkf_char_unicode_bmp_p(c1)) {
2587  c2 = (c1 >> 8) & 0xff;
2588  c1 &= 0xff;
2589  } else {
2590  c1 &= VALUE_MASK;
2591  if (c1 <= UNICODE_MAX) {
2592  c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2593  c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2594  if (output_endian == ENDIAN_LITTLE){
2595  (*o_putc)(c2 & 0xff);
2596  (*o_putc)((c2 >> 8) & 0xff);
2597  (*o_putc)(c1 & 0xff);
2598  (*o_putc)((c1 >> 8) & 0xff);
2599  }else{
2600  (*o_putc)((c2 >> 8) & 0xff);
2601  (*o_putc)(c2 & 0xff);
2602  (*o_putc)((c1 >> 8) & 0xff);
2603  (*o_putc)(c1 & 0xff);
2604  }
2605  }
2606  return;
2607  }
2608  } else if (c2) {
2609  nkf_char val = e2w_conv(c2, c1);
2610  c2 = (val >> 8) & 0xff;
2611  c1 = val & 0xff;
2612  if (!val) return;
2613  }
2614 
2615  if (output_endian == ENDIAN_LITTLE){
2616  (*o_putc)(c1);
2617  (*o_putc)(c2);
2618  }else{
2619  (*o_putc)(c2);
2620  (*o_putc)(c1);
2621  }
2622 }
2623 
2624 static void
2626 {
2627  if (output_bom_f) {
2628  output_bom_f = FALSE;
2629  if (output_endian == ENDIAN_LITTLE){
2630  (*o_putc)(0xFF);
2631  (*o_putc)(0xFE);
2632  (*o_putc)(0);
2633  (*o_putc)(0);
2634  }else{
2635  (*o_putc)(0);
2636  (*o_putc)(0);
2637  (*o_putc)(0xFE);
2638  (*o_putc)(0xFF);
2639  }
2640  }
2641 
2642  if (c2 == EOF) {
2643  (*o_putc)(EOF);
2644  return;
2645  }
2646 
2647  if (c2 == ISO_8859_1) {
2648  c1 |= 0x80;
2649  } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2650  c1 &= VALUE_MASK;
2651  } else if (c2) {
2652  c1 = e2w_conv(c2, c1);
2653  if (!c1) return;
2654  }
2655  if (output_endian == ENDIAN_LITTLE){
2656  (*o_putc)( c1 & 0xFF);
2657  (*o_putc)((c1 >> 8) & 0xFF);
2658  (*o_putc)((c1 >> 16) & 0xFF);
2659  (*o_putc)(0);
2660  }else{
2661  (*o_putc)(0);
2662  (*o_putc)((c1 >> 16) & 0xFF);
2663  (*o_putc)((c1 >> 8) & 0xFF);
2664  (*o_putc)( c1 & 0xFF);
2665  }
2666 }
2667 #endif
2668 
2669 #define SCORE_L2 (1) /* Kanji Level 2 */
2670 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2671 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2672 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2673 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2674 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /* Undefined Characters */
2675 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2676 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2677 
2678 #define SCORE_INIT (SCORE_iMIME)
2679 
2680 static const nkf_char score_table_A0[] = {
2681  0, 0, 0, 0,
2682  0, 0, 0, 0,
2685 };
2686 
2687 static const nkf_char score_table_F0[] = {
2692 };
2693 
2694 static void
2696 {
2697  if (ptr){
2698  ptr->score |= score;
2699  }
2700 }
2701 
2702 static void
2704 {
2705  if (ptr){
2706  ptr->score &= ~~score;
2707  }
2708 }
2709 
2710 static void
2712 {
2713  nkf_char c2 = ptr->buf[0];
2714 #ifdef UTF8_OUTPUT_ENABLE
2715  nkf_char c1 = ptr->buf[1];
2716 #endif
2717  if (c2 < 0){
2719  }else if (c2 == SS2){
2721  }else if (c2 == 0x8f){
2723 #ifdef UTF8_OUTPUT_ENABLE
2724  }else if (!e2w_conv(c2, c1)){
2726 #endif
2727  }else if ((c2 & 0x70) == 0x20){
2728  set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2729  }else if ((c2 & 0x70) == 0x70){
2730  set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2731  }else if ((c2 & 0x70) >= 0x50){
2733  }
2734 }
2735 
2736 static void
2738 {
2739  ptr->stat = -1;
2740  ptr->buf[0] = -1;
2741  code_score(ptr);
2742  if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2743 }
2744 
2745 static void
2747 {
2748  ptr->buf[ptr->index++] = c;
2749 }
2750 
2751 static void
2753 {
2754  ptr->stat = 0;
2755  ptr->index = 0;
2756 }
2757 
2758 static void
2760 {
2761  status_clear(ptr);
2762  ptr->score = SCORE_INIT;
2763 }
2764 
2765 static void
2767 {
2768  status_reset(ptr);
2769  ptr->_file_stat = 0;
2770 }
2771 
2772 static void
2774 {
2775  if (c <= DEL && estab_f){
2776  status_reset(ptr);
2777  }
2778 }
2779 
2780 static void
2782 {
2783  switch(ptr->stat){
2784  case -1:
2785  status_check(ptr, c);
2786  break;
2787  case 0:
2788  if (c <= DEL){
2789  break;
2790  }else if (nkf_char_unicode_p(c)){
2791  break;
2792  }else if (0xa1 <= c && c <= 0xdf){
2794  status_push_ch(ptr, c);
2795  code_score(ptr);
2796  status_clear(ptr);
2797  }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2798  ptr->stat = 1;
2799  status_push_ch(ptr, c);
2800  }else if (0xed <= c && c <= 0xee){
2801  ptr->stat = 3;
2802  status_push_ch(ptr, c);
2803 #ifdef SHIFTJIS_CP932
2804  }else if (is_ibmext_in_sjis(c)){
2805  ptr->stat = 2;
2806  status_push_ch(ptr, c);
2807 #endif /* SHIFTJIS_CP932 */
2808 #ifdef X0212_ENABLE
2809  }else if (0xf0 <= c && c <= 0xfc){
2810  ptr->stat = 1;
2811  status_push_ch(ptr, c);
2812 #endif /* X0212_ENABLE */
2813  }else{
2815  }
2816  break;
2817  case 1:
2818  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2819  status_push_ch(ptr, c);
2820  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2821  code_score(ptr);
2822  status_clear(ptr);
2823  }else{
2825  }
2826  break;
2827  case 2:
2828 #ifdef SHIFTJIS_CP932
2829  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2830  status_push_ch(ptr, c);
2831  if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2833  status_clear(ptr);
2834  break;
2835  }
2836  }
2837 #endif /* SHIFTJIS_CP932 */
2839  break;
2840  case 3:
2841  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2842  status_push_ch(ptr, c);
2843  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2845  status_clear(ptr);
2846  }else{
2848  }
2849  break;
2850  }
2851 }
2852 
2853 static void
2855 {
2856  switch (ptr->stat){
2857  case -1:
2858  status_check(ptr, c);
2859  break;
2860  case 0:
2861  if (c <= DEL){
2862  break;
2863  }else if (nkf_char_unicode_p(c)){
2864  break;
2865  }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2866  ptr->stat = 1;
2867  status_push_ch(ptr, c);
2868 #ifdef X0212_ENABLE
2869  }else if (0x8f == c){
2870  ptr->stat = 2;
2871  status_push_ch(ptr, c);
2872 #endif /* X0212_ENABLE */
2873  }else{
2875  }
2876  break;
2877  case 1:
2878  if (0xa1 <= c && c <= 0xfe){
2879  status_push_ch(ptr, c);
2880  code_score(ptr);
2881  status_clear(ptr);
2882  }else{
2884  }
2885  break;
2886 #ifdef X0212_ENABLE
2887  case 2:
2888  if (0xa1 <= c && c <= 0xfe){
2889  ptr->stat = 1;
2890  status_push_ch(ptr, c);
2891  }else{
2893  }
2894 #endif /* X0212_ENABLE */
2895  }
2896 }
2897 
2898 #ifdef UTF8_INPUT_ENABLE
2899 static void
2901 {
2902  switch (ptr->stat){
2903  case -1:
2904  status_check(ptr, c);
2905  break;
2906  case 0:
2907  if (c <= DEL){
2908  break;
2909  }else if (nkf_char_unicode_p(c)){
2910  break;
2911  }else if (0xc0 <= c && c <= 0xdf){
2912  ptr->stat = 1;
2913  status_push_ch(ptr, c);
2914  }else if (0xe0 <= c && c <= 0xef){
2915  ptr->stat = 2;
2916  status_push_ch(ptr, c);
2917  }else if (0xf0 <= c && c <= 0xf4){
2918  ptr->stat = 3;
2919  status_push_ch(ptr, c);
2920  }else{
2922  }
2923  break;
2924  case 1:
2925  case 2:
2926  if (0x80 <= c && c <= 0xbf){
2927  status_push_ch(ptr, c);
2928  if (ptr->index > ptr->stat){
2929  int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2930  && ptr->buf[2] == 0xbf);
2931  w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2932  &ptr->buf[0], &ptr->buf[1]);
2933  if (!bom){
2934  code_score(ptr);
2935  }
2936  status_clear(ptr);
2937  }
2938  }else{
2940  }
2941  break;
2942  case 3:
2943  if (0x80 <= c && c <= 0xbf){
2944  if (ptr->index < ptr->stat){
2945  status_push_ch(ptr, c);
2946  } else {
2947  status_clear(ptr);
2948  }
2949  }else{
2951  }
2952  break;
2953  }
2954 }
2955 #endif
2956 
2957 static void
2959 {
2960  int action_flag = 1;
2961  struct input_code *result = 0;
2962  struct input_code *p = input_code_list;
2963  while (p->name){
2964  if (!p->status_func) {
2965  ++p;
2966  continue;
2967  }
2968  if (!p->status_func)
2969  continue;
2970  (p->status_func)(p, c);
2971  if (p->stat > 0){
2972  action_flag = 0;
2973  }else if(p->stat == 0){
2974  if (result){
2975  action_flag = 0;
2976  }else{
2977  result = p;
2978  }
2979  }
2980  ++p;
2981  }
2982 
2983  if (action_flag){
2984  if (result && !estab_f){
2985  set_iconv(TRUE, result->iconv_func);
2986  }else if (c <= DEL){
2987  struct input_code *ptr = input_code_list;
2988  while (ptr->name){
2989  status_reset(ptr);
2990  ++ptr;
2991  }
2992  }
2993  }
2994 }
2995 
2996 typedef struct {
2997  nkf_buf_t *std_gc_buf;
2998  nkf_char broken_state;
2999  nkf_buf_t *broken_buf;
3001  nkf_buf_t *nfc_buf;
3002 } nkf_state_t;
3003 
3005 
3006 #define STD_GC_BUFSIZE (256)
3007 
3008 static void
3010 {
3011  if (nkf_state) {
3015  }
3016  else {
3017  nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3021  }
3022  nkf_state->broken_state = 0;
3023  nkf_state->mimeout_state = 0;
3024 }
3025 
3026 #ifndef WIN32DLL
3027 static nkf_char
3029 {
3031  return nkf_buf_pop(nkf_state->std_gc_buf);
3032  }
3033  return getc(f);
3034 }
3035 #endif /*WIN32DLL*/
3036 
3037 static nkf_char
3039 {
3041  return c;
3042 }
3043 
3044 #ifndef WIN32DLL
3045 static void
3047 {
3048  if(c!=EOF)
3049  putchar(c);
3050 }
3051 #endif /*WIN32DLL*/
3052 
3053 static unsigned char hold_buf[HOLD_SIZE*2];
3054 static int hold_count = 0;
3055 static nkf_char
3057 {
3058  if (hold_count >= HOLD_SIZE*2)
3059  return (EOF);
3060  hold_buf[hold_count++] = (unsigned char)c2;
3061  return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3062 }
3063 
3064 static int
3065 h_conv(FILE *f, int c1, int c2)
3066 {
3067  int ret, c4, c3;
3068  int hold_index;
3069 
3070 
3071  /** it must NOT be in the kanji shifte sequence */
3072  /** it must NOT be written in JIS7 */
3073  /** and it must be after 2 byte 8bit code */
3074 
3075  hold_count = 0;
3076  push_hold_buf(c1);
3077  push_hold_buf(c2);
3078 
3079  while ((c2 = (*i_getc)(f)) != EOF) {
3080  if (c2 == ESC){
3081  (*i_ungetc)(c2,f);
3082  break;
3083  }
3084  code_status(c2);
3085  if (push_hold_buf(c2) == EOF || estab_f) {
3086  break;
3087  }
3088  }
3089 
3090  if (!estab_f) {
3091  struct input_code *p = input_code_list;
3092  struct input_code *result = p;
3093  if (c2 == EOF) {
3094  code_status(c2);
3095  }
3096  while (p->name) {
3097  if (p->status_func && p->score < result->score) {
3098  result = p;
3099  }
3100  p++;
3101  }
3102  set_iconv(TRUE, result->iconv_func);
3103  }
3104 
3105 
3106  /** now,
3107  ** 1) EOF is detected, or
3108  ** 2) Code is established, or
3109  ** 3) Buffer is FULL (but last word is pushed)
3110  **
3111  ** in 1) and 3) cases, we continue to use
3112  ** Kanji codes by oconv and leave estab_f unchanged.
3113  **/
3114 
3115  ret = c2;
3116  hold_index = 0;
3117  while (hold_index < hold_count){
3118  c1 = hold_buf[hold_index++];
3119  if (c1 <= DEL){
3120  (*iconv)(0, c1, 0);
3121  continue;
3122  }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3123  (*iconv)(JIS_X_0201_1976_K, c1, 0);
3124  continue;
3125  }
3126  if (hold_index < hold_count){
3127  c2 = hold_buf[hold_index++];
3128  }else{
3129  c2 = (*i_getc)(f);
3130  if (c2 == EOF){
3131  c4 = EOF;
3132  break;
3133  }
3134  code_status(c2);
3135  }
3136  c3 = 0;
3137  switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3138  case -2:
3139  /* 4 bytes UTF-8 */
3140  if (hold_index < hold_count){
3141  c3 = hold_buf[hold_index++];
3142  } else if ((c3 = (*i_getc)(f)) == EOF) {
3143  ret = EOF;
3144  break;
3145  }
3146  code_status(c3);
3147  if (hold_index < hold_count){
3148  c4 = hold_buf[hold_index++];
3149  } else if ((c4 = (*i_getc)(f)) == EOF) {
3150  c3 = ret = EOF;
3151  break;
3152  }
3153  code_status(c4);
3154  (*iconv)(c1, c2, (c3<<8)|c4);
3155  break;
3156  case -1:
3157  /* 3 bytes EUC or UTF-8 */
3158  if (hold_index < hold_count){
3159  c3 = hold_buf[hold_index++];
3160  } else if ((c3 = (*i_getc)(f)) == EOF) {
3161  ret = EOF;
3162  break;
3163  } else {
3164  code_status(c3);
3165  }
3166  (*iconv)(c1, c2, c3);
3167  break;
3168  }
3169  if (c3 == EOF) break;
3170  }
3171  return ret;
3172 }
3173 
3174 /*
3175  * Check and Ignore BOM
3176  */
3177 static void
3179 {
3180  int c2;
3181  switch(c2 = (*i_getc)(f)){
3182  case 0x00:
3183  if((c2 = (*i_getc)(f)) == 0x00){
3184  if((c2 = (*i_getc)(f)) == 0xFE){
3185  if((c2 = (*i_getc)(f)) == 0xFF){
3186  if(!input_encoding){
3188  }
3189  if (iconv == w_iconv32) {
3191  return;
3192  }
3193  (*i_ungetc)(0xFF,f);
3194  }else (*i_ungetc)(c2,f);
3195  (*i_ungetc)(0xFE,f);
3196  }else if(c2 == 0xFF){
3197  if((c2 = (*i_getc)(f)) == 0xFE){
3198  if(!input_encoding){
3200  }
3201  if (iconv == w_iconv32) {
3203  return;
3204  }
3205  (*i_ungetc)(0xFF,f);
3206  }else (*i_ungetc)(c2,f);
3207  (*i_ungetc)(0xFF,f);
3208  }else (*i_ungetc)(c2,f);
3209  (*i_ungetc)(0x00,f);
3210  }else (*i_ungetc)(c2,f);
3211  (*i_ungetc)(0x00,f);
3212  break;
3213  case 0xEF:
3214  if((c2 = (*i_getc)(f)) == 0xBB){
3215  if((c2 = (*i_getc)(f)) == 0xBF){
3216  if(!input_encoding){
3218  }
3219  if (iconv == w_iconv) {
3220  return;
3221  }
3222  (*i_ungetc)(0xBF,f);
3223  }else (*i_ungetc)(c2,f);
3224  (*i_ungetc)(0xBB,f);
3225  }else (*i_ungetc)(c2,f);
3226  (*i_ungetc)(0xEF,f);
3227  break;
3228  case 0xFE:
3229  if((c2 = (*i_getc)(f)) == 0xFF){
3230  if((c2 = (*i_getc)(f)) == 0x00){
3231  if((c2 = (*i_getc)(f)) == 0x00){
3232  if(!input_encoding){
3234  }
3235  if (iconv == w_iconv32) {
3237  return;
3238  }
3239  (*i_ungetc)(0x00,f);
3240  }else (*i_ungetc)(c2,f);
3241  (*i_ungetc)(0x00,f);
3242  }else (*i_ungetc)(c2,f);
3243  if(!input_encoding){
3245  }
3246  if (iconv == w_iconv16) {
3248  return;
3249  }
3250  (*i_ungetc)(0xFF,f);
3251  }else (*i_ungetc)(c2,f);
3252  (*i_ungetc)(0xFE,f);
3253  break;
3254  case 0xFF:
3255  if((c2 = (*i_getc)(f)) == 0xFE){
3256  if((c2 = (*i_getc)(f)) == 0x00){
3257  if((c2 = (*i_getc)(f)) == 0x00){
3258  if(!input_encoding){
3260  }
3261  if (iconv == w_iconv32) {
3263  return;
3264  }
3265  (*i_ungetc)(0x00,f);
3266  }else (*i_ungetc)(c2,f);
3267  (*i_ungetc)(0x00,f);
3268  }else (*i_ungetc)(c2,f);
3269  if(!input_encoding){
3271  }
3272  if (iconv == w_iconv16) {
3274  return;
3275  }
3276  (*i_ungetc)(0xFE,f);
3277  }else (*i_ungetc)(c2,f);
3278  (*i_ungetc)(0xFF,f);
3279  break;
3280  default:
3281  (*i_ungetc)(c2,f);
3282  break;
3283  }
3284 }
3285 
3286 static nkf_char
3288 {
3289  nkf_char c, c1;
3290 
3292  return nkf_buf_pop(nkf_state->broken_buf);
3293  }
3294  c = (*i_bgetc)(f);
3295  if (c=='$' && nkf_state->broken_state != ESC
3296  && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3297  c1= (*i_bgetc)(f);
3298  nkf_state->broken_state = 0;
3299  if (c1=='@'|| c1=='B') {
3302  return ESC;
3303  } else {
3304  (*i_bungetc)(c1,f);
3305  return c;
3306  }
3307  } else if (c=='(' && nkf_state->broken_state != ESC
3309  c1= (*i_bgetc)(f);
3310  nkf_state->broken_state = 0;
3311  if (c1=='J'|| c1=='B') {
3314  return ESC;
3315  } else {
3316  (*i_bungetc)(c1,f);
3317  return c;
3318  }
3319  } else {
3321  return c;
3322  }
3323 }
3324 
3325 static nkf_char
3327 {
3330  return c;
3331 }
3332 
3333 static void
3335 {
3336  if (guess_f && input_eol != EOF) {
3337  if (c2 == 0 && c1 == LF) {
3338  if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3339  else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3340  } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3341  else if (!prev_cr);
3342  else if (!input_eol) input_eol = CR;
3343  else if (input_eol != CR) input_eol = EOF;
3344  }
3345  if (prev_cr || (c2 == 0 && c1 == LF)) {
3346  prev_cr = 0;
3347  if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3348  if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3349  }
3350  if (c2 == 0 && c1 == CR) prev_cr = CR;
3351  else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3352 }
3353 
3354 /*
3355  Return value of fold_conv()
3356 
3357  LF add newline and output char
3358  CR add newline and output nothing
3359  SP space
3360  0 skip
3361  1 (or else) normal output
3362 
3363  fold state in prev (previous character)
3364 
3365  >0x80 Japanese (X0208/X0201)
3366  <0x80 ASCII
3367  LF new line
3368  SP space
3369 
3370  This fold algorthm does not preserve heading space in a line.
3371  This is the main difference from fmt.
3372  */
3373 
3374 #define char_size(c2,c1) (c2?2:1)
3375 
3376 static void
3378 {
3379  nkf_char prev0;
3380  nkf_char fold_state;
3381 
3382  if (c1== CR && !fold_preserve_f) {
3383  fold_state=0; /* ignore cr */
3384  }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3385  f_prev = LF;
3386  fold_state=0; /* ignore cr */
3387  } else if (c1== BS) {
3388  if (f_line>0) f_line--;
3389  fold_state = 1;
3390  } else if (c2==EOF && f_line != 0) { /* close open last line */
3391  fold_state = LF;
3392  } else if ((c1==LF && !fold_preserve_f)
3393  || ((c1==CR||(c1==LF&&f_prev!=CR))
3394  && fold_preserve_f)) {
3395  /* new line */
3396  if (fold_preserve_f) {
3397  f_prev = c1;
3398  f_line = 0;
3399  fold_state = CR;
3400  } else if ((f_prev == c1 && !fold_preserve_f)
3401  || (f_prev == LF && fold_preserve_f)
3402  ) { /* duplicate newline */
3403  if (f_line) {
3404  f_line = 0;
3405  fold_state = LF; /* output two newline */
3406  } else {
3407  f_line = 0;
3408  fold_state = 1;
3409  }
3410  } else {
3411  if (f_prev&0x80) { /* Japanese? */
3412  f_prev = c1;
3413  fold_state = 0; /* ignore given single newline */
3414  } else if (f_prev==SP) {
3415  fold_state = 0;
3416  } else {
3417  f_prev = c1;
3418  if (++f_line<=fold_len)
3419  fold_state = SP;
3420  else {
3421  f_line = 0;
3422  fold_state = CR; /* fold and output nothing */
3423  }
3424  }
3425  }
3426  } else if (c1=='\f') {
3427  f_prev = LF;
3428  f_line = 0;
3429  fold_state = LF; /* output newline and clear */
3430  } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3431  /* X0208 kankaku or ascii space */
3432  if (f_prev == SP) {
3433  fold_state = 0; /* remove duplicate spaces */
3434  } else {
3435  f_prev = SP;
3436  if (++f_line<=fold_len)
3437  fold_state = SP; /* output ASCII space only */
3438  else {
3439  f_prev = SP; f_line = 0;
3440  fold_state = CR; /* fold and output nothing */
3441  }
3442  }
3443  } else {
3444  prev0 = f_prev; /* we still need this one... , but almost done */
3445  f_prev = c1;
3446  if (c2 || c2 == JIS_X_0201_1976_K)
3447  f_prev |= 0x80; /* this is Japanese */
3448  f_line += char_size(c2,c1);
3449  if (f_line<=fold_len) { /* normal case */
3450  fold_state = 1;
3451  } else {
3452  if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3453  f_line = char_size(c2,c1);
3454  fold_state = LF; /* We can't wait, do fold now */
3455  } else if (c2 == JIS_X_0201_1976_K) {
3456  /* simple kinsoku rules return 1 means no folding */
3457  if (c1==(0xde&0x7f)) fold_state = 1; /* ␛$B!+␛(B*/
3458  else if (c1==(0xdf&0x7f)) fold_state = 1; /* ␛$B!,␛(B*/
3459  else if (c1==(0xa4&0x7f)) fold_state = 1; /* ␛$B!#␛(B*/
3460  else if (c1==(0xa3&0x7f)) fold_state = 1; /* ␛$B!$␛(B*/
3461  else if (c1==(0xa1&0x7f)) fold_state = 1; /* ␛$B!W␛(B*/
3462  else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3463  else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3464  f_line = 1;
3465  fold_state = LF;/* add one new f_line before this character */
3466  } else {
3467  f_line = 1;
3468  fold_state = LF;/* add one new f_line before this character */
3469  }
3470  } else if (c2==0) {
3471  /* kinsoku point in ASCII */
3472  if ( c1==')'|| /* { [ ( */
3473  c1==']'||
3474  c1=='}'||
3475  c1=='.'||
3476  c1==','||
3477  c1=='!'||
3478  c1=='?'||
3479  c1=='/'||
3480  c1==':'||
3481  c1==';') {
3482  fold_state = 1;
3483  /* just after special */
3484  } else if (!is_alnum(prev0)) {
3485  f_line = char_size(c2,c1);
3486  fold_state = LF;
3487  } else if ((prev0==SP) || /* ignored new f_line */
3488  (prev0==LF)|| /* ignored new f_line */
3489  (prev0&0x80)) { /* X0208 - ASCII */
3490  f_line = char_size(c2,c1);
3491  fold_state = LF;/* add one new f_line before this character */
3492  } else {
3493  fold_state = 1; /* default no fold in ASCII */
3494  }
3495  } else {
3496  if (c2=='!') {
3497  if (c1=='"') fold_state = 1; /* ␛$B!"␛(B */
3498  else if (c1=='#') fold_state = 1; /* ␛$B!#␛(B */
3499  else if (c1=='W') fold_state = 1; /* ␛$B!W␛(B */
3500  else if (c1=='K') fold_state = 1; /* ␛$B!K␛(B */
3501  else if (c1=='$') fold_state = 1; /* ␛$B!$␛(B */
3502  else if (c1=='%') fold_state = 1; /* ␛$B!%␛(B */
3503  else if (c1=='\'') fold_state = 1; /* ␛$B!\␛(B */
3504  else if (c1=='(') fold_state = 1; /* ␛$B!(␛(B */
3505  else if (c1==')') fold_state = 1; /* ␛$B!)␛(B */
3506  else if (c1=='*') fold_state = 1; /* ␛$B!*␛(B */
3507  else if (c1=='+') fold_state = 1; /* ␛$B!+␛(B */
3508  else if (c1==',') fold_state = 1; /* ␛$B!,␛(B */
3509  /* default no fold in kinsoku */
3510  else {
3511  fold_state = LF;
3512  f_line = char_size(c2,c1);
3513  /* add one new f_line before this character */
3514  }
3515  } else {
3516  f_line = char_size(c2,c1);
3517  fold_state = LF;
3518  /* add one new f_line before this character */
3519  }
3520  }
3521  }
3522  }
3523  /* terminator process */
3524  switch(fold_state) {
3525  case LF:
3526  OCONV_NEWLINE((*o_fconv));
3527  (*o_fconv)(c2,c1);
3528  break;
3529  case 0:
3530  return;
3531  case CR:
3532  OCONV_NEWLINE((*o_fconv));
3533  break;
3534  case TAB:
3535  case SP:
3536  (*o_fconv)(0,SP);
3537  break;
3538  default:
3539  (*o_fconv)(c2,c1);
3540  }
3541 }
3542 
3544 
3545 static void
3547 {
3548 
3549  /* if (c2) c1 &= 0x7f; assertion */
3550 
3551  if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3552  (*o_zconv)(c2,c1);
3553  return;
3554  }
3555 
3556  if (x0201_f) {
3557  if (z_prev2 == JIS_X_0201_1976_K) {
3558  if (c2 == JIS_X_0201_1976_K) {
3559  if (c1 == (0xde&0x7f)) { /* ␛$BByE@␛(B */
3560  z_prev2 = 0;
3561  (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3562  return;
3563  } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /* ␛$BH>ByE@␛(B */
3564  z_prev2 = 0;
3565  (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3566  return;
3567  }
3568  }
3569  z_prev2 = 0;
3570  (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3571  }
3572  if (c2 == JIS_X_0201_1976_K) {
3573  if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3574  /* wait for ␛$BByE@␛(B or ␛$BH>ByE@␛(B */
3575  z_prev1 = c1;
3576  z_prev2 = c2;
3577  return;
3578  } else {
3579  (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3580  return;
3581  }
3582  }
3583  }
3584 
3585  if (c2 == EOF) {
3586  (*o_zconv)(c2, c1);
3587  return;
3588  }
3589 
3590  if (alpha_f&1 && c2 == 0x23) {
3591  /* JISX0208 Alphabet */
3592  c2 = 0;
3593  } else if (c2 == 0x21) {
3594  /* JISX0208 Kigou */
3595  if (0x21==c1) {
3596  if (alpha_f&2) {
3597  c2 = 0;
3598  c1 = SP;
3599  } else if (alpha_f&4) {
3600  (*o_zconv)(0, SP);
3601  (*o_zconv)(0, SP);
3602  return;
3603  }
3604  } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3605  c2 = 0;
3606  c1 = fv[c1-0x20];
3607  }
3608  }
3609 
3610  if (alpha_f&8 && c2 == 0) {
3611  /* HTML Entity */
3612  const char *entity = 0;
3613  switch (c1){
3614  case '>': entity = "&gt;"; break;
3615  case '<': entity = "&lt;"; break;
3616  case '\"': entity = "&quot;"; break;
3617  case '&': entity = "&amp;"; break;
3618  }
3619  if (entity){
3620  while (*entity) (*o_zconv)(0, *entity++);
3621  return;
3622  }
3623  }
3624 
3625  if (alpha_f & 16) {
3626  /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3627  if (c2 == 0x21) {
3628  nkf_char c = 0;
3629  switch (c1) {
3630  case 0x23:
3631  /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3632  c = 0xA1;
3633  break;
3634  case 0x56:
3635  /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3636  c = 0xA2;
3637  break;
3638  case 0x57:
3639  /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3640  c = 0xA3;
3641  break;
3642  case 0x22:
3643  /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3644  c = 0xA4;
3645  break;
3646  case 0x26:
3647  /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3648  c = 0xA5;
3649  break;
3650  case 0x3C:
3651  /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3652  c = 0xB0;
3653  break;
3654  case 0x2B:
3655  /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3656  c = 0xDE;
3657  break;
3658  case 0x2C:
3659  /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3660  c = 0xDF;
3661  break;
3662  }
3663  if (c) {
3664  (*o_zconv)(JIS_X_0201_1976_K, c);
3665  return;
3666  }
3667  } else if (c2 == 0x25) {
3668  /* JISX0208 Katakana */
3669  static const int fullwidth_to_halfwidth[] =
3670  {
3671  0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3672  0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3673  0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3674  0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3675  0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3676  0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3677  0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3678  0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3679  0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3680  0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3681  0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3682  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3683  };
3684  if (fullwidth_to_halfwidth[c1-0x20]){
3685  c2 = fullwidth_to_halfwidth[c1-0x20];
3686  (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3687  if (c2 & 0xFF) {
3688  (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3689  }
3690  return;
3691  }
3692  }
3693  }
3694  (*o_zconv)(c2,c1);
3695 }
3696 
3697 
3698 #define rot13(c) ( \
3699  ( c < 'A') ? c: \
3700  (c <= 'M') ? (c + 13): \
3701  (c <= 'Z') ? (c - 13): \
3702  (c < 'a') ? (c): \
3703  (c <= 'm') ? (c + 13): \
3704  (c <= 'z') ? (c - 13): \
3705  (c) \
3706  )
3707 
3708 #define rot47(c) ( \
3709  ( c < '!') ? c: \
3710  ( c <= 'O') ? (c + 47) : \
3711  ( c <= '~') ? (c - 47) : \
3712  c \
3713  )
3714 
3715 static void
3717 {
3718  if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3719  c1 = rot13(c1);
3720  } else if (c2) {
3721  c1 = rot47(c1);
3722  c2 = rot47(c2);
3723  }
3724  (*o_rot_conv)(c2,c1);
3725 }
3726 
3727 static void
3729 {
3730  if (hira_f & 1) {
3731  if (c2 == 0x25) {
3732  if (0x20 < c1 && c1 < 0x74) {
3733  c2 = 0x24;
3734  (*o_hira_conv)(c2,c1);
3735  return;
3736  } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3737  c2 = 0;
3738  c1 = nkf_char_unicode_new(0x3094);
3739  (*o_hira_conv)(c2,c1);
3740  return;
3741  }
3742  } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3743  c1 += 2;
3744  (*o_hira_conv)(c2,c1);
3745  return;
3746  }
3747  }
3748  if (hira_f & 2) {
3749  if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3750  c2 = 0x25;
3751  c1 = 0x74;
3752  } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3753  c2 = 0x25;
3754  } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3755  c1 -= 2;
3756  }
3757  }
3758  (*o_hira_conv)(c2,c1);
3759 }
3760 
3761 
3762 static void
3764 {
3765 #define RANGE_NUM_MAX 18
3766  static const nkf_char range[RANGE_NUM_MAX][2] = {
3767  {0x222f, 0x2239,},
3768  {0x2242, 0x2249,},
3769  {0x2251, 0x225b,},
3770  {0x226b, 0x2271,},
3771  {0x227a, 0x227d,},
3772  {0x2321, 0x232f,},
3773  {0x233a, 0x2340,},
3774  {0x235b, 0x2360,},
3775  {0x237b, 0x237e,},
3776  {0x2474, 0x247e,},
3777  {0x2577, 0x257e,},
3778  {0x2639, 0x2640,},
3779  {0x2659, 0x267e,},
3780  {0x2742, 0x2750,},
3781  {0x2772, 0x277e,},
3782  {0x2841, 0x287e,},
3783  {0x4f54, 0x4f7e,},
3784  {0x7425, 0x747e},
3785  };
3786  nkf_char i;
3787  nkf_char start, end, c;
3788 
3789  if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3790  c2 = GETA1;
3791  c1 = GETA2;
3792  }
3793  if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3794  c2 = GETA1;
3795  c1 = GETA2;
3796  }
3797 
3798  for (i = 0; i < RANGE_NUM_MAX; i++) {
3799  start = range[i][0];
3800  end = range[i][1];
3801  c = (c2 << 8) + c1;
3802  if (c >= start && c <= end) {
3803  c2 = GETA1;
3804  c1 = GETA2;
3805  }
3806  }
3807  (*o_iso2022jp_check_conv)(c2,c1);
3808 }
3809 
3810 
3811 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3812 
3813 static const unsigned char *mime_pattern[] = {
3814  (const unsigned char *)"\075?EUC-JP?B?",
3815  (const unsigned char *)"\075?SHIFT_JIS?B?",
3816  (const unsigned char *)"\075?ISO-8859-1?Q?",
3817  (const unsigned char *)"\075?ISO-8859-1?B?",
3818  (const unsigned char *)"\075?ISO-2022-JP?B?",
3819  (const unsigned char *)"\075?ISO-2022-JP?Q?",
3820 #if defined(UTF8_INPUT_ENABLE)
3821  (const unsigned char *)"\075?UTF-8?B?",
3822  (const unsigned char *)"\075?UTF-8?Q?",
3823 #endif
3824  (const unsigned char *)"\075?US-ASCII?Q?",
3825  NULL
3826 };
3827 
3828 
3829 /* ␛$B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u␛(B */
3831  e_iconv, s_iconv, 0, 0, 0, 0,
3832 #if defined(UTF8_INPUT_ENABLE)
3833  w_iconv, w_iconv,
3834 #endif
3835  0,
3836 };
3837 
3838 static const nkf_char mime_encode[] = {
3840 #if defined(UTF8_INPUT_ENABLE)
3841  UTF_8, UTF_8,
3842 #endif
3843  ASCII,
3844  0
3845 };
3846 
3847 static const nkf_char mime_encode_method[] = {
3848  'B', 'B','Q', 'B', 'B', 'Q',
3849 #if defined(UTF8_INPUT_ENABLE)
3850  'B', 'Q',
3851 #endif
3852  'Q',
3853  0
3854 };
3855 
3856 
3857 /* MIME preprocessor fifo */
3858 
3859 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3860 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3861 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3862 static struct {
3863  unsigned char buf[MIME_BUF_SIZE];
3864  unsigned int top;
3865  unsigned int last; /* decoded */
3866  unsigned int input; /* undecoded */
3869 
3870 #define MAXRECOVER 20
3871 
3872 static void
3874 {
3875  mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3876 }
3877 
3878 static nkf_char
3880 {
3882  return c;
3883 }
3884 
3885 static nkf_char
3887 {
3888  if (mimebuf_f)
3889  (*i_mungetc_buf)(c,f);
3890  else
3891  mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3892  return c;
3893 }
3894 
3895 static nkf_char
3897 {
3898  /* we don't keep eof of mime_input_buf, becase it contains ?= as
3899  a terminator. It was checked in mime_integrity. */
3900  return ((mimebuf_f)?
3902 }
3903 
3904 static void
3906 {
3907  if (i_getc!=mime_getc) {
3910  if(mime_f==STRICT_MIME) {
3913  }
3914  }
3915 }
3916 
3917 static void
3919 {
3920  if(mime_f==STRICT_MIME) {
3921  i_mgetc = i_mgetc_buf;
3923  }
3924  i_getc = i_mgetc;
3925  i_ungetc = i_mungetc;
3928 }
3929 
3930 static nkf_char
3931 mime_integrity(FILE *f, const unsigned char *p)
3932 {
3933  nkf_char c,d;
3934  unsigned int q;
3935  /* In buffered mode, read until =? or NL or buffer full
3936  */
3937  mime_input_state.input = mime_input_state.top;
3939 
3940  while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3941  d = 0;
3942  q = mime_input_state.input;
3943  while((c=(*i_getc)(f))!=EOF) {
3944  if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3945  break; /* buffer full */
3946  }
3947  if (c=='=' && d=='?') {
3948  /* checked. skip header, start decode */
3949  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3950  /* mime_last_input = mime_input_state.input; */
3951  mime_input_state.input = q;
3952  switch_mime_getc();
3953  return 1;
3954  }
3955  if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
3956  break;
3957  /* Should we check length mod 4? */
3958  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3959  d=c;
3960  }
3961  /* In case of Incomplete MIME, no MIME decode */
3962  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
3963  mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
3964  mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
3965  switch_mime_getc(); /* anyway we need buffered getc */
3966  return 1;
3967 }
3968 
3969 static nkf_char
3971 {
3972  nkf_char c1 = 0;
3973  int i,j,k;
3974  const unsigned char *p,*q;
3975  nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
3976 
3978  /* =? has been checked */
3979  j = 0;
3980  p = mime_pattern[j];
3981  r[0]='='; r[1]='?';
3982 
3983  for(i=2;p[i]>SP;i++) { /* start at =? */
3984  if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
3985  /* pattern fails, try next one */
3986  q = p;
3987  while (mime_pattern[++j]) {
3988  p = mime_pattern[j];
3989  for(k=2;k<i;k++) /* assume length(p) > i */
3990  if (p[k]!=q[k]) break;
3991  if (k==i && nkf_toupper(c1)==p[k]) break;
3992  }
3993  p = mime_pattern[j];
3994  if (p) continue; /* found next one, continue */
3995  /* all fails, output from recovery buffer */
3996  (*i_ungetc)(c1,f);
3997  for(j=0;j<i;j++) {
3998  (*oconv)(0,r[j]);
3999  }
4000  return c1;
4001  }
4002  }
4003  mime_decode_mode = p[i-2];
4004 
4008 
4009  if (mime_decode_mode=='B') {
4010  mimebuf_f = unbuf_f;
4011  if (!unbuf_f) {
4012  /* do MIME integrity check */
4013  return mime_integrity(f,mime_pattern[j]);
4014  }
4015  }
4016  switch_mime_getc();
4017  mimebuf_f = TRUE;
4018  return c1;
4019 }
4020 
4021 static nkf_char
4023 {
4024  nkf_char c1;
4025  int i,k;
4026 
4027  /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4028  /* re-read and convert again from mime_buffer. */
4029 
4030  /* =? has been checked */
4031  k = mime_input_state.last;
4033  for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4034  /* We accept any character type even if it is breaked by new lines */
4035  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4036  if (c1==LF||c1==SP||c1==CR||
4037  c1=='-'||c1=='_'||is_alnum(c1)) continue;
4038  if (c1=='=') {
4039  /* Failed. But this could be another MIME preemble */
4040  (*i_ungetc)(c1,f);
4041  mime_input_state.last--;
4042  break;
4043  }
4044  if (c1!='?') break;
4045  else {
4046  /* c1=='?' */
4047  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4048  if (!(++i<MAXRECOVER) || c1==EOF) break;
4049  if (c1=='b'||c1=='B') {
4050  mime_decode_mode = 'B';
4051  } else if (c1=='q'||c1=='Q') {
4052  mime_decode_mode = 'Q';
4053  } else {
4054  break;
4055  }
4056  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4057  if (!(++i<MAXRECOVER) || c1==EOF) break;
4058  if (c1!='?') {
4060  }
4061  break;
4062  }
4063  }
4064  switch_mime_getc();
4065  if (!mime_decode_mode) {
4066  /* false MIME premble, restart from mime_buffer */
4067  mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4068  /* Since we are in MIME mode until buffer becomes empty, */
4069  /* we never go into mime_begin again for a while. */
4070  return c1;
4071  }
4072  /* discard mime preemble, and goto MIME mode */
4073  mime_input_state.last = k;
4074  /* do no MIME integrity check */
4075  return c1; /* used only for checking EOF */
4076 }
4077 
4078 #ifdef CHECK_OPTION
4079 static void
4081 {
4082  ;
4083 }
4084 
4085 static void
4086 debug(const char *str)
4087 {
4088  if (debug_f){
4089  fprintf(stderr, "%s\n", str ? str : "NULL");
4090  }
4091 }
4092 #endif
4093 
4094 static void
4095 set_input_codename(const char *codename)
4096 {
4097  if (!input_codename) {
4098  input_codename = codename;
4099  } else if (strcmp(codename, input_codename) != 0) {
4100  input_codename = "";
4101  }
4102 }
4103 
4104 static const char*
4106 {
4107  if (input_codename && !*input_codename) {
4108  input_codename = "BINARY";
4109  } else {
4111  if (!input_codename) {
4112  input_codename = "ASCII";
4113  } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4114  if (p->score & (SCORE_DEPEND|SCORE_CP932))
4115  input_codename = "CP932";
4116  } else if (strcmp(input_codename, "EUC-JP") == 0) {
4117  if (p->score & (SCORE_X0212))
4118  input_codename = "EUCJP-MS";
4119  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4120  input_codename = "CP51932";
4121  } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4122  if (p->score & (SCORE_KANA))
4123  input_codename = "CP50221";
4124  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4125  input_codename = "CP50220";
4126  }
4127  }
4128  return input_codename;
4129 }
4130 
4131 #if !defined(PERL_XS) && !defined(WIN32DLL)
4132 static void
4133 print_guessed_code(char *filename)
4134 {
4135  if (filename != NULL) printf("%s: ", filename);
4136  if (input_codename != NULL && input_codename[0] == '\0') {
4137  printf("BINARY\n");
4138  } else {
4140  if (guess_f == 1) {
4141  printf("%s\n", input_codename);
4142  } else {
4143  printf("%s%s\n",
4145  input_eol == CR ? " (CR)" :
4146  input_eol == LF ? " (LF)" :
4147  input_eol == CRLF ? " (CRLF)" :
4148  input_eol == EOF ? " (MIXED NL)" :
4149  "");
4150  }
4151  }
4152 }
4153 #endif /*WIN32DLL*/
4154 
4155 #ifdef INPUT_OPTION
4156 
4157 static nkf_char
4159 {
4160  nkf_char c1, c2, c3;
4161  c1 = (*g)(f);
4162  if (c1 != ch){
4163  return c1;
4164  }
4165  c2 = (*g)(f);
4166  if (!nkf_isxdigit(c2)){
4167  (*u)(c2, f);
4168  return c1;
4169  }
4170  c3 = (*g)(f);
4171  if (!nkf_isxdigit(c3)){
4172  (*u)(c2, f);
4173  (*u)(c3, f);
4174  return c1;
4175  }
4176  return (hex2bin(c2) << 4) | hex2bin(c3);
4177 }
4178 
4179 static nkf_char
4181 {
4182  return hex_getc(':', f, i_cgetc, i_cungetc);
4183 }
4184 
4185 static nkf_char
4187 {
4188  return (*i_cungetc)(c, f);
4189 }
4190 
4191 static nkf_char
4193 {
4194  return hex_getc('%', f, i_ugetc, i_uungetc);
4195 }
4196 
4197 static nkf_char
4199 {
4200  return (*i_uungetc)(c, f);
4201 }
4202 #endif
4203 
4204 #ifdef NUMCHAR_OPTION
4205 static nkf_char
4207 {
4208  nkf_char (*g)(FILE *) = i_ngetc;
4209  nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4210  int i = 0, j;
4211  nkf_char buf[12];
4212  long c = -1;
4213 
4214  buf[i] = (*g)(f);
4215  if (buf[i] == '&'){
4216  buf[++i] = (*g)(f);
4217  if (buf[i] == '#'){
4218  c = 0;
4219  buf[++i] = (*g)(f);
4220  if (buf[i] == 'x' || buf[i] == 'X'){
4221  for (j = 0; j < 7; j++){
4222  buf[++i] = (*g)(f);
4223  if (!nkf_isxdigit(buf[i])){
4224  if (buf[i] != ';'){
4225  c = -1;
4226  }
4227  break;
4228  }
4229  c <<= 4;
4230  c |= hex2bin(buf[i]);
4231  }
4232  }else{
4233  for (j = 0; j < 8; j++){
4234  if (j){
4235  buf[++i] = (*g)(f);
4236  }
4237  if (!nkf_isdigit(buf[i])){
4238  if (buf[i] != ';'){
4239  c = -1;
4240  }
4241  break;
4242  }
4243  c *= 10;
4244  c += hex2bin(buf[i]);
4245  }
4246  }
4247  }
4248  }
4249  if (c != -1){
4250  return nkf_char_unicode_new(c);
4251  }
4252  while (i > 0){
4253  (*u)(buf[i], f);
4254  --i;
4255  }
4256  return buf[0];
4257 }
4258 
4259 static nkf_char
4261 {
4262  return (*i_nungetc)(c, f);
4263 }
4264 #endif
4265 
4266 #ifdef UNICODE_NORMALIZATION
4267 
4268 static nkf_char
4270 {
4271  nkf_char (*g)(FILE *f) = i_nfc_getc;
4274  const unsigned char *array;
4275  int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4276  nkf_char c = (*g)(f);
4277 
4278  if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4279 
4280  nkf_buf_push(buf, c);
4281  do {
4282  while (lower <= upper) {
4283  int mid = (lower+upper) / 2;
4284  int len;
4285  array = normalization_table[mid].nfd;
4286  for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4287  if (len >= nkf_buf_length(buf)) {
4288  c = (*g)(f);
4289  if (c == EOF) {
4290  len = 0;
4291  lower = 1, upper = 0;
4292  break;
4293  }
4294  nkf_buf_push(buf, c);
4295  }
4296  if (array[len] != nkf_buf_at(buf, len)) {
4297  if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4298  else upper = mid - 1;
4299  len = 0;
4300  break;
4301  }
4302  }
4303  if (len > 0) {
4304  int i;
4305  array = normalization_table[mid].nfc;
4306  nkf_buf_clear(buf);
4307  for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4308  nkf_buf_push(buf, array[i]);
4309  break;
4310  }
4311  }
4312  } while (lower <= upper);
4313 
4314  while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4315  c = nkf_buf_pop(buf);
4316 
4317  return c;
4318 }
4319 
4320 static nkf_char
4322 {
4323  return (*i_nfc_ungetc)(c, f);
4324 }
4325 #endif /* UNICODE_NORMALIZATION */
4326 
4327 
4328 static nkf_char
4330 {
4331  int i;
4332  if (c > '@') {
4333  if (c < '[') {
4334  i = c - 'A'; /* A..Z 0-25 */
4335  } else if (c == '_') {
4336  i = '?' /* 63 */ ; /* _ 63 */
4337  } else {
4338  i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4339  }
4340  } else if (c > '/') {
4341  i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4342  } else if (c == '+' || c == '-') {
4343  i = '>' /* 62 */ ; /* + and - 62 */
4344  } else {
4345  i = '?' /* 63 */ ; /* / 63 */
4346  }
4347  return (i);
4348 }
4349 
4350 static nkf_char
4352 {
4353  nkf_char c1, c2, c3, c4, cc;
4354  nkf_char t1, t2, t3, t4, mode, exit_mode;
4355  nkf_char lwsp_count;
4356  char *lwsp_buf;
4357  char *lwsp_buf_new;
4358  nkf_char lwsp_size = 128;
4359 
4360  if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4361  return mime_input_buf(mime_input_state.top++);
4362  }
4366  return (*i_getc)(f);
4367  }
4368 
4369  if (mimebuf_f == FIXED_MIME)
4370  exit_mode = mime_decode_mode;
4371  else
4372  exit_mode = FALSE;
4373  if (mime_decode_mode == 'Q') {
4374  if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4375  restart_mime_q:
4376  if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4377  if (c1<=SP || DEL<=c1) {
4378  mime_decode_mode = exit_mode; /* prepare for quit */
4379  return c1;
4380  }
4381  if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4382  return c1;
4383  }
4384 
4385  mime_decode_mode = exit_mode; /* prepare for quit */
4386  if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4387  if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4388  /* end Q encoding */
4389  input_mode = exit_mode;
4390  lwsp_count = 0;
4391  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4392  while ((c1=(*i_getc)(f))!=EOF) {
4393  switch (c1) {
4394  case LF:
4395  case CR:
4396  if (c1==LF) {
4397  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4398  i_ungetc(SP,f);
4399  continue;
4400  } else {
4401  i_ungetc(c1,f);
4402  }
4403  c1 = LF;
4404  } else {
4405  if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4406  if ((c1