"Fossies" - the Fresh Open Source Software Archive 
Member "xterm-379/ptydata.c" (10 Oct 2022, 23394 Bytes) of package /linux/misc/xterm-379.tgz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "ptydata.c" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
373_vs_374.
1 /* $XTermId: ptydata.c,v 1.158 2022/10/10 19:27:56 tom Exp $ */
2
3 /*
4 * Copyright 1999-2020,2022 by Thomas E. Dickey
5 *
6 * All Rights Reserved
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sublicense, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 * Except as contained in this notice, the name(s) of the above copyright
28 * holders shall not be used in advertising or otherwise to promote the
29 * sale, use or other dealings in this Software without prior written
30 * authorization.
31 */
32
33 #include <data.h>
34
35 #if OPT_WIDE_CHARS
36 #include <menu.h>
37 #include <wcwidth.h>
38 #endif
39
40 #ifdef TEST_DRIVER
41 #undef TRACE
42 #define TRACE(p) if (1) printf p
43 #undef TRACE2
44 #define TRACE2(p) if (0) printf p
45 #define visibleChars(buf, len) "buffer"
46 #endif
47
48 /*
49 * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX
50 * systems are broken and return EWOULDBLOCK when they should return EAGAIN.
51 * Note that this macro may evaluate its argument more than once.
52 */
53 #if defined(EAGAIN) && defined(EWOULDBLOCK)
54 #define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK)
55 #else
56 #ifdef EAGAIN
57 #define E_TEST(err) ((err) == EAGAIN)
58 #else
59 #define E_TEST(err) ((err) == EWOULDBLOCK)
60 #endif
61 #endif
62
63 #if OPT_WIDE_CHARS
64 /*
65 * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data.
66 * The number of bytes converted will be nonzero iff there is data.
67 */
68 Bool
69 decodeUtf8(TScreen *screen, PtyData *data)
70 {
71 size_t i;
72 size_t length = (size_t) (data->last - data->next);
73 int utf_count = 0;
74 unsigned utf_char = 0;
75
76 data->utf_size = 0;
77 for (i = 0; i < length; i++) {
78 unsigned c = data->next[i];
79
80 /* Combine UTF-8 into Unicode */
81 if (c < 0x80) {
82 /* We received an ASCII character */
83 if (utf_count > 0) {
84 data->utf_data = UCS_REPL; /* prev. sequence incomplete */
85 data->utf_size = i;
86 } else {
87 data->utf_data = (IChar) c;
88 data->utf_size = 1;
89 }
90 break;
91 } else if (screen->vt100_graphics
92 && (c < 0x100)
93 && (utf_count == 0)
94 && screen->gsets[(int) screen->curgr] != nrc_ASCII) {
95 data->utf_data = (IChar) c;
96 data->utf_size = 1;
97 break;
98 } else if (c < 0xc0) {
99 /* We received a continuation byte */
100 if (utf_count < 1) {
101 /*
102 * We received a continuation byte before receiving a sequence
103 * state. Or an attempt to use a C1 control string. Either
104 * way, it is mapped to the replacement character, unless
105 * allowed by optional feature.
106 */
107 data->utf_data = (IChar) (screen->c1_printable ? c : UCS_REPL);
108 data->utf_size = (i + 1);
109 break;
110 } else if (screen->utf8_weblike
111 && (utf_count == 3
112 && utf_char == 0x04
113 && c >= 0x90)) {
114 /* The encoding would form a code point beyond U+10FFFF. */
115 data->utf_size = i;
116 data->utf_data = UCS_REPL;
117 break;
118 } else if (screen->utf8_weblike
119 && (utf_count == 2
120 && utf_char == 0x0d
121 && c >= 0xa0)) {
122 /* The encoding would form a surrogate code point. */
123 data->utf_size = i;
124 data->utf_data = UCS_REPL;
125 break;
126 } else {
127 /* Check for overlong UTF-8 sequences for which a shorter
128 * encoding would exist and replace them with UCS_REPL.
129 * An overlong UTF-8 sequence can have any of the following
130 * forms:
131 * 1100000x 10xxxxxx
132 * 11100000 100xxxxx 10xxxxxx
133 * 11110000 1000xxxx 10xxxxxx 10xxxxxx
134 * 11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
135 * 11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
136 */
137 if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) {
138 if (screen->utf8_weblike) {
139 /* overlong sequence continued */
140 data->utf_data = UCS_REPL;
141 data->utf_size = i;
142 break;
143 } else {
144 utf_char = UCS_REPL;
145 }
146 }
147 utf_char <<= 6;
148 utf_char |= (c & 0x3f);
149 if ((utf_char >= 0xd800 &&
150 utf_char <= 0xdfff) ||
151 (utf_char == 0xfffe) ||
152 (utf_char == HIDDEN_CHAR)) {
153 utf_char = UCS_REPL;
154 }
155 utf_count--;
156 if (utf_count == 0) {
157 #if !OPT_WIDER_ICHAR
158 /* characters outside UCS-2 become UCS_REPL */
159 if (utf_char > NARROW_ICHAR) {
160 TRACE(("using replacement for %#x\n", utf_char));
161 utf_char = UCS_REPL;
162 }
163 #endif
164 data->utf_data = (IChar) utf_char;
165 data->utf_size = (i + 1);
166 break;
167 }
168 }
169 } else {
170 /* We received a sequence start byte */
171 if (utf_count > 0) {
172 /* previous sequence is incomplete */
173 data->utf_data = UCS_REPL;
174 data->utf_size = i;
175 break;
176 }
177 if (screen->utf8_weblike) {
178 if (c < 0xe0) {
179 if (!(c & 0x1e)) {
180 /* overlong sequence start */
181 data->utf_data = UCS_REPL;
182 data->utf_size = (i + 1);
183 break;
184 }
185 utf_count = 1;
186 utf_char = (c & 0x1f);
187 } else if (c < 0xf0) {
188 utf_count = 2;
189 utf_char = (c & 0x0f);
190 } else if (c < 0xf5) {
191 utf_count = 3;
192 utf_char = (c & 0x07);
193 } else {
194 data->utf_data = UCS_REPL;
195 data->utf_size = (i + 1);
196 break;
197 }
198 } else {
199 if (c < 0xe0) {
200 utf_count = 1;
201 utf_char = (c & 0x1f);
202 if (!(c & 0x1e)) {
203 /* overlong sequence */
204 utf_char = UCS_REPL;
205 }
206 } else if (c < 0xf0) {
207 utf_count = 2;
208 utf_char = (c & 0x0f);
209 } else if (c < 0xf8) {
210 utf_count = 3;
211 utf_char = (c & 0x07);
212 } else if (c < 0xfc) {
213 utf_count = 4;
214 utf_char = (c & 0x03);
215 } else if (c < 0xfe) {
216 utf_count = 5;
217 utf_char = (c & 0x01);
218 } else {
219 data->utf_data = UCS_REPL;
220 data->utf_size = (i + 1);
221 break;
222 }
223 }
224 }
225 }
226 #if OPT_TRACE > 1
227 TRACE(("UTF-8 char %04X [%d..%d]\n",
228 data->utf_data,
229 (size_t) (data->next - data->buffer),
230 (size_t) (data->next - data->buffer + data->utf_size - 1)));
231 #endif
232
233 return (data->utf_size != 0);
234 }
235 #endif
236
237 int
238 readPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data)
239 {
240 TScreen *screen = TScreenOf(xw);
241 int size = 0;
242
243 #ifdef VMS
244 if (*select_mask & pty_mask) {
245 trimPtyData(xw, data);
246 if (read_queue.flink != 0) {
247 size = tt_read(data->next);
248 if (size == 0) {
249 Panic("input: read returned zero\n", 0);
250 }
251 } else {
252 sys$hiber();
253 }
254 }
255 #else /* !VMS */
256 if (FD_ISSET(screen->respond, select_mask)) {
257 int save_err;
258 trimPtyData(xw, data);
259
260 size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE);
261 save_err = errno;
262 #if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__)
263 /*
264 * Yes, I know this is a majorly f*ugly hack, however it seems to
265 * be necessary for Solaris x86. DWH 11/15/94
266 * Dunno why though..
267 * (and now CYGWIN, alanh@xfree86.org 08/15/01
268 */
269 if (size <= 0) {
270 if (save_err == EIO || save_err == 0)
271 NormalExit();
272 else if (!E_TEST(save_err))
273 Panic("input: read returned unexpected error (%d)\n", save_err);
274 size = 0;
275 }
276 #else /* !f*ugly */
277 if (size < 0) {
278 if (save_err == EIO)
279 NormalExit();
280 else if (!E_TEST(save_err))
281 Panic("input: read returned unexpected error (%d)\n", save_err);
282 size = 0;
283 } else if (size == 0) {
284 #if defined(__FreeBSD__)
285 NormalExit();
286 #else
287 Panic("input: read returned zero\n", 0);
288 #endif
289 }
290 #endif /* f*ugly */
291 }
292 #endif /* VMS */
293
294 if (size) {
295 #if OPT_TRACE
296 int i;
297
298 TRACE(("read %d bytes from pty\n", size));
299 for (i = 0; i < size; i++) {
300 if (!(i % 16))
301 TRACE(("%s", i ? "\n " : "READ"));
302 TRACE((" %02X", data->last[i]));
303 }
304 TRACE(("\n"));
305 #endif
306 data->last += size;
307 #ifdef ALLOWLOGGING
308 TScreenOf(term)->logstart = VTbuffer->next;
309 #endif
310 }
311
312 return (size);
313 }
314
315 /*
316 * Return the next value from the input buffer. Note that morePtyData() is
317 * always called before this function, so we can do the UTF-8 input conversion
318 * in that function and simply return the result here.
319 */
320 #if OPT_WIDE_CHARS
321 IChar
322 nextPtyData(TScreen *screen, PtyData *data)
323 {
324 IChar result;
325 if (screen->utf8_inparse) {
326 skipPtyData(data, result);
327 } else {
328 result = *((data)->next++);
329 if (!screen->output_eight_bits) {
330 result = (IChar) (result & 0x7f);
331 }
332 }
333 TRACE2(("nextPtyData returns %#x\n", result));
334 return result;
335 }
336 #endif
337
338 #if OPT_WIDE_CHARS
339 /*
340 * Called when UTF-8 mode has been turned on/off.
341 */
342 void
343 switchPtyData(TScreen *screen, int flag)
344 {
345 if (screen->utf8_mode != flag) {
346 screen->utf8_mode = flag;
347 screen->utf8_inparse = (Boolean) (flag != 0);
348 mk_wcwidth_init(screen->utf8_mode);
349
350 TRACE(("turning UTF-8 mode %s\n", BtoS(flag)));
351 update_font_utf8_mode();
352 }
353 }
354 #endif
355
356 /*
357 * Allocate a buffer.
358 */
359 void
360 initPtyData(PtyData **result)
361 {
362 PtyData *data;
363
364 TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n",
365 FRG_SIZE, BUF_SIZE));
366
367 if (FRG_SIZE < 64)
368 FRG_SIZE = 64;
369 if (BUF_SIZE < FRG_SIZE)
370 BUF_SIZE = FRG_SIZE;
371 if (BUF_SIZE % FRG_SIZE)
372 BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE);
373
374 TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n",
375 FRG_SIZE, BUF_SIZE));
376
377 data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE));
378
379 memset(data, 0, sizeof(*data));
380 data->next = data->buffer;
381 data->last = data->buffer;
382 *result = data;
383 }
384
385 /*
386 * Initialize a buffer for the caller, using its data in 'next'.
387 */
388 #if OPT_WIDE_CHARS
389 PtyData *
390 fakePtyData(PtyData *result, Char *next, Char *last)
391 {
392 PtyData *data = result;
393
394 memset(data, 0, sizeof(*data));
395 data->next = next;
396 data->last = last;
397
398 return data;
399 }
400 #endif
401
402 /*
403 * Remove used data by shifting the buffer down, to make room for more data,
404 * e.g., a continuation-read.
405 */
406 void
407 trimPtyData(XtermWidget xw, PtyData *data)
408 {
409 (void) xw;
410 FlushLog(xw);
411
412 if (data->next != data->buffer) {
413 size_t i;
414 size_t n = (size_t) (data->last - data->next);
415
416 TRACE(("shifting buffer down by %lu\n", (unsigned long) n));
417 for (i = 0; i < n; ++i) {
418 data->buffer[i] = data->next[i];
419 }
420 data->next = data->buffer;
421 data->last = data->next + n;
422 }
423
424 }
425
426 /*
427 * Insert new data into the input buffer so the next calls to morePtyData()
428 * and nextPtyData() will return that.
429 */
430 void
431 fillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length)
432 {
433 size_t size;
434 size_t n;
435
436 /* remove the used portion of the buffer */
437 trimPtyData(xw, data);
438
439 VTbuffer->last += length;
440 size = (size_t) (VTbuffer->last - VTbuffer->next);
441
442 /* shift the unused portion up to make room */
443 for (n = size; n >= length; --n)
444 VTbuffer->next[n] = VTbuffer->next[n - length];
445
446 /* insert the new bytes to interpret */
447 for (n = 0; n < length; n++)
448 VTbuffer->next[n] = CharOf(value[n]);
449 }
450
451 #if OPT_WIDE_CHARS
452 /*
453 * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target
454 * 'lp', and returning a pointer past the converted character.
455 */
456 Char *
457 convertToUTF8(Char *lp, unsigned c)
458 {
459 #define CH(n) (Char)((c) >> ((n) * 8))
460 if (c < 0x80) {
461 /* 0******* */
462 *lp++ = (Char) CH(0);
463 } else if (c < 0x800) {
464 /* 110***** 10****** */
465 *lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2));
466 *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
467 } else if (c < 0x00010000) {
468 /* 1110**** 10****** 10****** */
469 *lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4));
470 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
471 *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
472 } else if (c < 0x00200000) {
473 *lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2));
474 *lp++ = (Char) (0x80 |
475 ((int) (CH(1) & 0xf0) >> 4) |
476 ((int) (CH(2) & 0x03) << 4));
477 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
478 *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
479 } else if (c < 0x04000000) {
480 *lp++ = (Char) (0xf8 | (CH(3) & 0x03));
481 *lp++ = (Char) (0x80 | (CH(2) >> 2));
482 *lp++ = (Char) (0x80 |
483 ((int) (CH(1) & 0xf0) >> 4) |
484 ((int) (CH(2) & 0x03) << 4));
485 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
486 *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
487 } else {
488 *lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6));
489 *lp++ = (Char) (0x80 | (CH(3) & 0x3f));
490 *lp++ = (Char) (0x80 | (CH(2) >> 2));
491 *lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4));
492 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
493 *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
494 }
495 return lp;
496 #undef CH
497 }
498
499 /*
500 * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer
501 * past the converted UTF-8 input. The first 256 values align with ISO-8859-1,
502 * making it possible to use this to convert to Latin-1.
503 *
504 * If the conversion fails, return null.
505 */
506 Char *
507 convertFromUTF8(Char *lp, unsigned *cp)
508 {
509 int want;
510
511 /*
512 * Find the number of bytes we will need from the source.
513 */
514 if ((*lp & 0x80) == 0) {
515 want = 1;
516 } else if ((*lp & 0xe0) == 0xc0) {
517 want = 2;
518 } else if ((*lp & 0xf0) == 0xe0) {
519 want = 3;
520 } else if ((*lp & 0xf8) == 0xf0) {
521 want = 4;
522 } else if ((*lp & 0xfc) == 0xf8) {
523 want = 5;
524 } else if ((*lp & 0xfe) == 0xfc) {
525 want = 6;
526 } else {
527 want = 0;
528 }
529
530 if (want) {
531 int have = 1;
532
533 while (lp[have] != '\0') {
534 if ((lp[have] & 0xc0) != 0x80)
535 break;
536 ++have;
537 }
538 if (want == have) {
539 unsigned mask = 0;
540 int j;
541 int shift = 0;
542
543 *cp = 0;
544 switch (want) {
545 case 1:
546 mask = (*lp);
547 break;
548 case 2:
549 mask = (*lp & 0x1f);
550 break;
551 case 3:
552 mask = (*lp & 0x0f);
553 break;
554 case 4:
555 mask = (*lp & 0x07);
556 break;
557 case 5:
558 mask = (*lp & 0x03);
559 break;
560 case 6:
561 mask = (*lp & 0x01);
562 break;
563 default:
564 mask = 0;
565 break;
566 }
567
568 for (j = 1; j < want; j++) {
569 *cp |= (unsigned) ((lp[want - j] & 0x3f) << shift);
570 shift += 6;
571 }
572 *cp |= mask << shift;
573 lp += want;
574 } else {
575 *cp = BAD_ASCII;
576 lp = NULL;
577 }
578 } else {
579 *cp = BAD_ASCII;
580 lp = NULL;
581 }
582 return lp;
583 }
584
585 /*
586 * Returns true if the entire string is valid UTF-8.
587 */
588 Boolean
589 isValidUTF8(Char *lp)
590 {
591 Boolean result = True;
592 while (*lp) {
593 unsigned ch;
594 Char *next = convertFromUTF8(lp, &ch);
595 if (next == NULL || ch == 0) {
596 result = False;
597 break;
598 }
599 lp = next;
600 }
601 return result;
602 }
603
604 /*
605 * Write data back to the PTY
606 */
607 void
608 writePtyData(int f, IChar *d, size_t len)
609 {
610 size_t n = (len << 1);
611
612 if (VTbuffer->write_len <= len) {
613 VTbuffer->write_len = n;
614 VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len);
615 }
616
617 for (n = 0; n < len; n++)
618 VTbuffer->write_buf[n] = (Char) d[n];
619
620 TRACE(("writePtyData %lu:%s\n", (unsigned long) n,
621 visibleChars(VTbuffer->write_buf, n)));
622 v_write(f, VTbuffer->write_buf, n);
623 }
624 #endif /* OPT_WIDE_CHARS */
625
626 #ifdef NO_LEAKS
627 void
628 noleaks_ptydata(void)
629 {
630 if (VTbuffer != 0) {
631 #if OPT_WIDE_CHARS
632 free(VTbuffer->write_buf);
633 #endif
634 FreeAndNull(VTbuffer);
635 }
636 }
637 #endif
638
639 #ifdef TEST_DRIVER
640
641 #include "data.c"
642
643 void
644 NormalExit(void)
645 {
646 fprintf(stderr, "NormalExit!\n");
647 exit(EXIT_SUCCESS);
648 }
649
650 void
651 Panic(const char *s, int a)
652 {
653 (void) s;
654 (void) a;
655 fprintf(stderr, "Panic!\n");
656 exit(EXIT_FAILURE);
657 }
658
659 #if OPT_WIDE_CHARS
660
661 #ifdef ALLOWLOGGING
662 void
663 FlushLog(XtermWidget xw)
664 {
665 (void) xw;
666 }
667 #endif
668
669 void
670 v_write(int f, const Char *data, size_t len)
671 {
672 (void) f;
673 (void) data;
674 (void) len;
675 }
676
677 void
678 mk_wcwidth_init(int mode)
679 {
680 (void) mode;
681 }
682
683 void
684 update_font_utf8_mode(void)
685 {
686 }
687
688 static int message_level = 0;
689 static int opt_all = 0;
690 static int opt_illegal = 0;
691 static int opt_convert = 0;
692 static int opt_reverse = 0;
693 static long total_test = 0;
694 static long total_errs = 0;
695
696 static void
697 usage(void)
698 {
699 static const char *msg[] =
700 {
701 "Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]",
702 "",
703 "Options:",
704 " -a exercise all legal encode/decode to/from UTF-8",
705 " -c call convertFromUTF8 rather than decodeUTF8",
706 " -i ignore illegal UTF-8 when testing -r option",
707 " -q quieter",
708 " -r reverse/decode from UTF-8 byte-string to/from Unicode",
709 " -v more verbose"
710 };
711 size_t n;
712 for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
713 fprintf(stderr, "%s\n", msg[n]);
714 }
715 exit(EXIT_FAILURE);
716 }
717
718 /*
719 * http://www.unicode.org/versions/corrigendum1.html, table 3.1B
720 */
721 #define OkRange(n,lo,hi) \
722 if (value[n] < lo || value[n] > hi) { \
723 result = False; \
724 break; \
725 }
726 static Bool
727 is_legal_utf8(const Char *value)
728 {
729 Bool result = True;
730 Char ch;
731 while ((ch = *value) != '\0') {
732 if (ch <= 0x7f) {
733 ++value;
734 } else if (ch >= 0xc2 && ch <= 0xdf) {
735 OkRange(1, 0x80, 0xbf);
736 value += 2;
737 } else if (ch == 0xe0) {
738 OkRange(1, 0xa0, 0xbf);
739 OkRange(2, 0x80, 0xbf);
740 value += 3;
741 } else if (ch >= 0xe1 && ch <= 0xef) {
742 OkRange(1, 0x80, 0xbf);
743 OkRange(2, 0x80, 0xbf);
744 value += 3;
745 } else if (ch == 0xf0) {
746 OkRange(1, 0x90, 0xbf);
747 OkRange(2, 0x80, 0xbf);
748 OkRange(3, 0x80, 0xbf);
749 value += 4;
750 } else if (ch >= 0xf1 && ch <= 0xf3) {
751 OkRange(1, 0x80, 0xbf);
752 OkRange(2, 0x80, 0xbf);
753 OkRange(3, 0x80, 0xbf);
754 value += 4;
755 } else if (ch == 0xf4) {
756 OkRange(1, 0x80, 0x8f);
757 OkRange(2, 0x80, 0xbf);
758 OkRange(3, 0x80, 0xbf);
759 value += 4;
760 } else {
761 result = False;
762 break;
763 }
764 }
765 return result;
766 }
767
768 static void
769 test_utf8_convert(void)
770 {
771 unsigned c_in, c_out;
772 Char buffer[10];
773 Char *result;
774 unsigned limit = 0x110000;
775 unsigned success = 0;
776 unsigned bucket[256];
777
778 memset(bucket, 0, sizeof(bucket));
779 for (c_in = 0; c_in < limit; ++c_in) {
780 memset(buffer, 0, sizeof(buffer));
781 if ((result = convertToUTF8(buffer, c_in)) == 0) {
782 TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in));
783 } else {
784 if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
785 TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in));
786 } else if (c_in != c_out) {
787 TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n",
788 c_in, c_out));
789 } else {
790 while (result-- != buffer) {
791 bucket[*result]++;
792 }
793 ++success;
794 }
795 }
796 }
797 TRACE(("%u/%u successful\n", success, limit));
798 for (c_in = 0; c_in < 256; ++c_in) {
799 if ((c_in % 8) == 0) {
800 TRACE((" %02X:", c_in));
801 }
802 TRACE((" %8X", bucket[c_in]));
803 if (((c_in + 1) % 8) == 0) {
804 TRACE(("\n"));
805 }
806 }
807 }
808
809 static int
810 decode_one(const char *source, char **target)
811 {
812 int result = -1;
813 long check;
814 int radix = 0;
815 if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
816 source += 2;
817 radix = 16;
818 } else if (source[0] == '0' && source[1] == 'b') {
819 source += 2;
820 radix = 2;
821 }
822 check = strtol(source, target, radix);
823 if (*target != NULL && *target != source)
824 result = (int) check;
825 return result;
826 }
827
828 static int
829 decode_range(const char *source, int *lo, int *hi)
830 {
831 int result = 0;
832 char *after1;
833 char *after2;
834 if ((*lo = decode_one(source, &after1)) >= 0) {
835 after1 += strspn(after1, ":-.\t ");
836 if ((*hi = decode_one(after1, &after2)) < 0) {
837 *hi = *lo;
838 }
839 result = 1;
840 }
841 return result;
842 }
843
844 #define MAX_BYTES 6
845
846 static void
847 do_range(const char *source)
848 {
849 int lo, hi;
850
851 TScreen screen;
852 memset(&screen, 0, sizeof(screen));
853
854 if (decode_range(source, &lo, &hi)) {
855 while (lo <= hi) {
856 unsigned c_in = (unsigned) lo++;
857 PtyData *data;
858 Char *next;
859 Char buffer[MAX_BYTES + 1];
860
861 if (opt_reverse) {
862 Bool skip = False;
863 Bool first = True;
864 int j, k;
865 for (j = 0; j < MAX_BYTES; ++j) {
866 unsigned long bits = ((unsigned long) c_in >> (8 * j));
867 if ((buffer[j] = (Char) bits) == 0) {
868 skip = (bits != 0);
869 break;
870 }
871 }
872 if (skip)
873 continue;
874 initPtyData(&data);
875 for (k = 0; k <= j; ++k) {
876 data->buffer[k] = buffer[j - k - 1];
877 }
878 if (opt_illegal && !is_legal_utf8(data->buffer)) {
879 free(data);
880 continue;
881 }
882 if (message_level > 1) {
883 printf("TEST ");
884 for (k = 0; k < j; ++k) {
885 printf("%02X", data->buffer[k]);
886 }
887 }
888 data->next = data->buffer;
889 data->last = data->buffer + j;
890 while (decodeUtf8(&screen, data)) {
891 total_test++;
892 if (data->utf_data == UCS_REPL)
893 total_errs++;
894 data->next += data->utf_size;
895 if (message_level > 1) {
896 printf("%s%04X", first ? " ->" : ", ", data->utf_data);
897 }
898 first = False;
899 }
900 if (!first)
901 total_test--;
902 if (message_level > 1) {
903 printf("\n");
904 fflush(stdout);
905 }
906 free(data);
907 } else if (opt_convert) {
908 unsigned c_out;
909 Char *result;
910
911 memset(buffer, 0, sizeof(buffer));
912 if ((result = next = convertToUTF8(buffer, c_in)) == 0) {
913 fprintf(stderr,
914 "conversion of U+%04X to UTF-8 failed\n", c_in);
915 } else if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
916 fprintf(stderr,
917 "conversion of U+%04X from UTF-8 failed\n", c_in);
918 total_errs++;
919 } else if (c_in != c_out) {
920 fprintf(stderr,
921 "conversion of U+%04X to/from UTF-8 gave U+%04X\n",
922 c_in, c_out);
923 } else if (message_level > 1) {
924 *next = '\0';
925 printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
926 (unsigned long) (next - buffer),
927 buffer,
928 c_out);
929 fflush(stdout);
930 }
931 } else {
932 initPtyData(&data);
933 next = convertToUTF8(data->buffer, c_in);
934 *next = 0;
935 data->next = data->buffer;
936 data->last = next;
937 decodeUtf8(&screen, data);
938 if (message_level > 1) {
939 printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
940 (unsigned long) (next - data->buffer),
941 data->buffer,
942 data->utf_data);
943 fflush(stdout);
944 }
945 if (c_in != data->utf_data) {
946 fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data);
947 total_errs++;
948 }
949 free(data);
950 }
951 total_test++;
952 }
953 }
954 }
955
956 int
957 main(int argc, char **argv)
958 {
959 int ch;
960
961 setlocale(LC_ALL, "");
962 while ((ch = getopt(argc, argv, "aciqrv")) != -1) {
963 switch (ch) {
964 case 'a':
965 opt_all = 1;
966 break;
967 case 'c':
968 opt_convert = 1;
969 break;
970 case 'i':
971 opt_illegal = 1;
972 break;
973 case 'q':
974 message_level--;
975 break;
976 case 'r':
977 opt_reverse = 1;
978 break;
979 case 'v':
980 message_level++;
981 break;
982 default:
983 usage();
984 }
985 }
986 if (opt_all) {
987 test_utf8_convert();
988 } else {
989 if (optind >= argc)
990 usage();
991 while (optind < argc) {
992 do_range(argv[optind++]);
993 }
994 if (total_test) {
995 printf("%ld/%ld mismatches (%.0f%%)\n",
996 total_errs,
997 total_test,
998 (100.0 * (double) total_errs) / (double) total_test);
999 }
1000 }
1001 return EXIT_SUCCESS;
1002 }
1003 #else
1004 int
1005 main(int argc, char **argv)
1006 {
1007 (void) argc;
1008 (void) argv;
1009 printf("Nothing to be done here...\n");
1010 return EXIT_SUCCESS;
1011 }
1012 #endif /* OPT_WIDE_CHARS */
1013 #endif