"Fossies" - the Fresh Open Source Software Archive

Member "xterm-379/ptydata.c" (10 Oct 2022, 23394 Bytes) of package /linux/misc/xterm-379.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ptydata.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 373_vs_374.

    1 /* $XTermId: ptydata.c,v 1.158 2022/10/10 19:27:56 tom Exp $ */
    2 
    3 /*
    4  * Copyright 1999-2020,2022 by Thomas E. Dickey
    5  *
    6  *                         All Rights Reserved
    7  *
    8  * Permission is hereby granted, free of charge, to any person obtaining a
    9  * copy of this software and associated documentation files (the
   10  * "Software"), to deal in the Software without restriction, including
   11  * without limitation the rights to use, copy, modify, merge, publish,
   12  * distribute, sublicense, and/or sell copies of the Software, and to
   13  * permit persons to whom the Software is furnished to do so, subject to
   14  * the following conditions:
   15  *
   16  * The above copyright notice and this permission notice shall be included
   17  * in all copies or substantial portions of the Software.
   18  *
   19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   22  * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
   23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   26  *
   27  * Except as contained in this notice, the name(s) of the above copyright
   28  * holders shall not be used in advertising or otherwise to promote the
   29  * sale, use or other dealings in this Software without prior written
   30  * authorization.
   31  */
   32 
   33 #include <data.h>
   34 
   35 #if OPT_WIDE_CHARS
   36 #include <menu.h>
   37 #include <wcwidth.h>
   38 #endif
   39 
   40 #ifdef TEST_DRIVER
   41 #undef TRACE
   42 #define TRACE(p) if (1) printf p
   43 #undef TRACE2
   44 #define TRACE2(p) if (0) printf p
   45 #define visibleChars(buf, len) "buffer"
   46 #endif
   47 
   48 /*
   49  * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX
   50  * systems are broken and return EWOULDBLOCK when they should return EAGAIN.
   51  * Note that this macro may evaluate its argument more than once.
   52  */
   53 #if defined(EAGAIN) && defined(EWOULDBLOCK)
   54 #define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK)
   55 #else
   56 #ifdef EAGAIN
   57 #define E_TEST(err) ((err) == EAGAIN)
   58 #else
   59 #define E_TEST(err) ((err) == EWOULDBLOCK)
   60 #endif
   61 #endif
   62 
   63 #if OPT_WIDE_CHARS
   64 /*
   65  * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data.
   66  * The number of bytes converted will be nonzero iff there is data.
   67  */
   68 Bool
   69 decodeUtf8(TScreen *screen, PtyData *data)
   70 {
   71     size_t i;
   72     size_t length = (size_t) (data->last - data->next);
   73     int utf_count = 0;
   74     unsigned utf_char = 0;
   75 
   76     data->utf_size = 0;
   77     for (i = 0; i < length; i++) {
   78     unsigned c = data->next[i];
   79 
   80     /* Combine UTF-8 into Unicode */
   81     if (c < 0x80) {
   82         /* We received an ASCII character */
   83         if (utf_count > 0) {
   84         data->utf_data = UCS_REPL;  /* prev. sequence incomplete */
   85         data->utf_size = i;
   86         } else {
   87         data->utf_data = (IChar) c;
   88         data->utf_size = 1;
   89         }
   90         break;
   91     } else if (screen->vt100_graphics
   92            && (c < 0x100)
   93            && (utf_count == 0)
   94            && screen->gsets[(int) screen->curgr] != nrc_ASCII) {
   95         data->utf_data = (IChar) c;
   96         data->utf_size = 1;
   97         break;
   98     } else if (c < 0xc0) {
   99         /* We received a continuation byte */
  100         if (utf_count < 1) {
  101         /*
  102          * We received a continuation byte before receiving a sequence
  103          * state.  Or an attempt to use a C1 control string.  Either
  104          * way, it is mapped to the replacement character, unless
  105          * allowed by optional feature.
  106          */
  107         data->utf_data = (IChar) (screen->c1_printable ? c : UCS_REPL);
  108         data->utf_size = (i + 1);
  109         break;
  110         } else if (screen->utf8_weblike
  111                && (utf_count == 3
  112                && utf_char == 0x04
  113                && c >= 0x90)) {
  114         /* The encoding would form a code point beyond U+10FFFF. */
  115         data->utf_size = i;
  116         data->utf_data = UCS_REPL;
  117         break;
  118         } else if (screen->utf8_weblike
  119                && (utf_count == 2
  120                && utf_char == 0x0d
  121                && c >= 0xa0)) {
  122         /* The encoding would form a surrogate code point. */
  123         data->utf_size = i;
  124         data->utf_data = UCS_REPL;
  125         break;
  126         } else {
  127         /* Check for overlong UTF-8 sequences for which a shorter
  128          * encoding would exist and replace them with UCS_REPL.
  129          * An overlong UTF-8 sequence can have any of the following
  130          * forms:
  131          *   1100000x 10xxxxxx
  132          *   11100000 100xxxxx 10xxxxxx
  133          *   11110000 1000xxxx 10xxxxxx 10xxxxxx
  134          *   11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
  135          *   11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  136          */
  137         if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) {
  138             if (screen->utf8_weblike) {
  139             /* overlong sequence continued */
  140             data->utf_data = UCS_REPL;
  141             data->utf_size = i;
  142             break;
  143             } else {
  144             utf_char = UCS_REPL;
  145             }
  146         }
  147         utf_char <<= 6;
  148         utf_char |= (c & 0x3f);
  149         if ((utf_char >= 0xd800 &&
  150              utf_char <= 0xdfff) ||
  151             (utf_char == 0xfffe) ||
  152             (utf_char == HIDDEN_CHAR)) {
  153             utf_char = UCS_REPL;
  154         }
  155         utf_count--;
  156         if (utf_count == 0) {
  157 #if !OPT_WIDER_ICHAR
  158             /* characters outside UCS-2 become UCS_REPL */
  159             if (utf_char > NARROW_ICHAR) {
  160             TRACE(("using replacement for %#x\n", utf_char));
  161             utf_char = UCS_REPL;
  162             }
  163 #endif
  164             data->utf_data = (IChar) utf_char;
  165             data->utf_size = (i + 1);
  166             break;
  167         }
  168         }
  169     } else {
  170         /* We received a sequence start byte */
  171         if (utf_count > 0) {
  172         /* previous sequence is incomplete */
  173         data->utf_data = UCS_REPL;
  174         data->utf_size = i;
  175         break;
  176         }
  177         if (screen->utf8_weblike) {
  178         if (c < 0xe0) {
  179             if (!(c & 0x1e)) {
  180             /* overlong sequence start */
  181             data->utf_data = UCS_REPL;
  182             data->utf_size = (i + 1);
  183             break;
  184             }
  185             utf_count = 1;
  186             utf_char = (c & 0x1f);
  187         } else if (c < 0xf0) {
  188             utf_count = 2;
  189             utf_char = (c & 0x0f);
  190         } else if (c < 0xf5) {
  191             utf_count = 3;
  192             utf_char = (c & 0x07);
  193         } else {
  194             data->utf_data = UCS_REPL;
  195             data->utf_size = (i + 1);
  196             break;
  197         }
  198         } else {
  199         if (c < 0xe0) {
  200             utf_count = 1;
  201             utf_char = (c & 0x1f);
  202             if (!(c & 0x1e)) {
  203             /* overlong sequence */
  204             utf_char = UCS_REPL;
  205             }
  206         } else if (c < 0xf0) {
  207             utf_count = 2;
  208             utf_char = (c & 0x0f);
  209         } else if (c < 0xf8) {
  210             utf_count = 3;
  211             utf_char = (c & 0x07);
  212         } else if (c < 0xfc) {
  213             utf_count = 4;
  214             utf_char = (c & 0x03);
  215         } else if (c < 0xfe) {
  216             utf_count = 5;
  217             utf_char = (c & 0x01);
  218         } else {
  219             data->utf_data = UCS_REPL;
  220             data->utf_size = (i + 1);
  221             break;
  222         }
  223         }
  224     }
  225     }
  226 #if OPT_TRACE > 1
  227     TRACE(("UTF-8 char %04X [%d..%d]\n",
  228        data->utf_data,
  229        (size_t) (data->next - data->buffer),
  230        (size_t) (data->next - data->buffer + data->utf_size - 1)));
  231 #endif
  232 
  233     return (data->utf_size != 0);
  234 }
  235 #endif
  236 
  237 int
  238 readPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data)
  239 {
  240     TScreen *screen = TScreenOf(xw);
  241     int size = 0;
  242 
  243 #ifdef VMS
  244     if (*select_mask & pty_mask) {
  245     trimPtyData(xw, data);
  246     if (read_queue.flink != 0) {
  247         size = tt_read(data->next);
  248         if (size == 0) {
  249         Panic("input: read returned zero\n", 0);
  250         }
  251     } else {
  252         sys$hiber();
  253     }
  254     }
  255 #else /* !VMS */
  256     if (FD_ISSET(screen->respond, select_mask)) {
  257     int save_err;
  258     trimPtyData(xw, data);
  259 
  260     size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE);
  261     save_err = errno;
  262 #if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__)
  263     /*
  264      * Yes, I know this is a majorly f*ugly hack, however it seems to
  265      * be necessary for Solaris x86.  DWH 11/15/94
  266      * Dunno why though..
  267      * (and now CYGWIN, alanh@xfree86.org 08/15/01
  268      */
  269     if (size <= 0) {
  270         if (save_err == EIO || save_err == 0)
  271         NormalExit();
  272         else if (!E_TEST(save_err))
  273         Panic("input: read returned unexpected error (%d)\n", save_err);
  274         size = 0;
  275     }
  276 #else /* !f*ugly */
  277     if (size < 0) {
  278         if (save_err == EIO)
  279         NormalExit();
  280         else if (!E_TEST(save_err))
  281         Panic("input: read returned unexpected error (%d)\n", save_err);
  282         size = 0;
  283     } else if (size == 0) {
  284 #if defined(__FreeBSD__)
  285         NormalExit();
  286 #else
  287         Panic("input: read returned zero\n", 0);
  288 #endif
  289     }
  290 #endif /* f*ugly */
  291     }
  292 #endif /* VMS */
  293 
  294     if (size) {
  295 #if OPT_TRACE
  296     int i;
  297 
  298     TRACE(("read %d bytes from pty\n", size));
  299     for (i = 0; i < size; i++) {
  300         if (!(i % 16))
  301         TRACE(("%s", i ? "\n    " : "READ"));
  302         TRACE((" %02X", data->last[i]));
  303     }
  304     TRACE(("\n"));
  305 #endif
  306     data->last += size;
  307 #ifdef ALLOWLOGGING
  308     TScreenOf(term)->logstart = VTbuffer->next;
  309 #endif
  310     }
  311 
  312     return (size);
  313 }
  314 
  315 /*
  316  * Return the next value from the input buffer.  Note that morePtyData() is
  317  * always called before this function, so we can do the UTF-8 input conversion
  318  * in that function and simply return the result here.
  319  */
  320 #if OPT_WIDE_CHARS
  321 IChar
  322 nextPtyData(TScreen *screen, PtyData *data)
  323 {
  324     IChar result;
  325     if (screen->utf8_inparse) {
  326     skipPtyData(data, result);
  327     } else {
  328     result = *((data)->next++);
  329     if (!screen->output_eight_bits) {
  330         result = (IChar) (result & 0x7f);
  331     }
  332     }
  333     TRACE2(("nextPtyData returns %#x\n", result));
  334     return result;
  335 }
  336 #endif
  337 
  338 #if OPT_WIDE_CHARS
  339 /*
  340  * Called when UTF-8 mode has been turned on/off.
  341  */
  342 void
  343 switchPtyData(TScreen *screen, int flag)
  344 {
  345     if (screen->utf8_mode != flag) {
  346     screen->utf8_mode = flag;
  347     screen->utf8_inparse = (Boolean) (flag != 0);
  348     mk_wcwidth_init(screen->utf8_mode);
  349 
  350     TRACE(("turning UTF-8 mode %s\n", BtoS(flag)));
  351     update_font_utf8_mode();
  352     }
  353 }
  354 #endif
  355 
  356 /*
  357  * Allocate a buffer.
  358  */
  359 void
  360 initPtyData(PtyData **result)
  361 {
  362     PtyData *data;
  363 
  364     TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n",
  365         FRG_SIZE, BUF_SIZE));
  366 
  367     if (FRG_SIZE < 64)
  368     FRG_SIZE = 64;
  369     if (BUF_SIZE < FRG_SIZE)
  370     BUF_SIZE = FRG_SIZE;
  371     if (BUF_SIZE % FRG_SIZE)
  372     BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE);
  373 
  374     TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n",
  375         FRG_SIZE, BUF_SIZE));
  376 
  377     data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE));
  378 
  379     memset(data, 0, sizeof(*data));
  380     data->next = data->buffer;
  381     data->last = data->buffer;
  382     *result = data;
  383 }
  384 
  385 /*
  386  * Initialize a buffer for the caller, using its data in 'next'.
  387  */
  388 #if OPT_WIDE_CHARS
  389 PtyData *
  390 fakePtyData(PtyData *result, Char *next, Char *last)
  391 {
  392     PtyData *data = result;
  393 
  394     memset(data, 0, sizeof(*data));
  395     data->next = next;
  396     data->last = last;
  397 
  398     return data;
  399 }
  400 #endif
  401 
  402 /*
  403  * Remove used data by shifting the buffer down, to make room for more data,
  404  * e.g., a continuation-read.
  405  */
  406 void
  407 trimPtyData(XtermWidget xw, PtyData *data)
  408 {
  409     (void) xw;
  410     FlushLog(xw);
  411 
  412     if (data->next != data->buffer) {
  413     size_t i;
  414     size_t n = (size_t) (data->last - data->next);
  415 
  416     TRACE(("shifting buffer down by %lu\n", (unsigned long) n));
  417     for (i = 0; i < n; ++i) {
  418         data->buffer[i] = data->next[i];
  419     }
  420     data->next = data->buffer;
  421     data->last = data->next + n;
  422     }
  423 
  424 }
  425 
  426 /*
  427  * Insert new data into the input buffer so the next calls to morePtyData()
  428  * and nextPtyData() will return that.
  429  */
  430 void
  431 fillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length)
  432 {
  433     size_t size;
  434     size_t n;
  435 
  436     /* remove the used portion of the buffer */
  437     trimPtyData(xw, data);
  438 
  439     VTbuffer->last += length;
  440     size = (size_t) (VTbuffer->last - VTbuffer->next);
  441 
  442     /* shift the unused portion up to make room */
  443     for (n = size; n >= length; --n)
  444     VTbuffer->next[n] = VTbuffer->next[n - length];
  445 
  446     /* insert the new bytes to interpret */
  447     for (n = 0; n < length; n++)
  448     VTbuffer->next[n] = CharOf(value[n]);
  449 }
  450 
  451 #if OPT_WIDE_CHARS
  452 /*
  453  * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target
  454  * 'lp', and returning a pointer past the converted character.
  455  */
  456 Char *
  457 convertToUTF8(Char *lp, unsigned c)
  458 {
  459 #define CH(n) (Char)((c) >> ((n) * 8))
  460     if (c < 0x80) {
  461     /*  0*******  */
  462     *lp++ = (Char) CH(0);
  463     } else if (c < 0x800) {
  464     /*  110***** 10******  */
  465     *lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2));
  466     *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
  467     } else if (c < 0x00010000) {
  468     /*  1110**** 10****** 10******  */
  469     *lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4));
  470     *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
  471     *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
  472     } else if (c < 0x00200000) {
  473     *lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2));
  474     *lp++ = (Char) (0x80 |
  475             ((int) (CH(1) & 0xf0) >> 4) |
  476             ((int) (CH(2) & 0x03) << 4));
  477     *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
  478     *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
  479     } else if (c < 0x04000000) {
  480     *lp++ = (Char) (0xf8 | (CH(3) & 0x03));
  481     *lp++ = (Char) (0x80 | (CH(2) >> 2));
  482     *lp++ = (Char) (0x80 |
  483             ((int) (CH(1) & 0xf0) >> 4) |
  484             ((int) (CH(2) & 0x03) << 4));
  485     *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
  486     *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
  487     } else {
  488     *lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6));
  489     *lp++ = (Char) (0x80 | (CH(3) & 0x3f));
  490     *lp++ = (Char) (0x80 | (CH(2) >> 2));
  491     *lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4));
  492     *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
  493     *lp++ = (Char) (0x80 | (CH(0) & 0x3f));
  494     }
  495     return lp;
  496 #undef CH
  497 }
  498 
  499 /*
  500  * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer
  501  * past the converted UTF-8 input.  The first 256 values align with ISO-8859-1,
  502  * making it possible to use this to convert to Latin-1.
  503  *
  504  * If the conversion fails, return null.
  505  */
  506 Char *
  507 convertFromUTF8(Char *lp, unsigned *cp)
  508 {
  509     int want;
  510 
  511     /*
  512      * Find the number of bytes we will need from the source.
  513      */
  514     if ((*lp & 0x80) == 0) {
  515     want = 1;
  516     } else if ((*lp & 0xe0) == 0xc0) {
  517     want = 2;
  518     } else if ((*lp & 0xf0) == 0xe0) {
  519     want = 3;
  520     } else if ((*lp & 0xf8) == 0xf0) {
  521     want = 4;
  522     } else if ((*lp & 0xfc) == 0xf8) {
  523     want = 5;
  524     } else if ((*lp & 0xfe) == 0xfc) {
  525     want = 6;
  526     } else {
  527     want = 0;
  528     }
  529 
  530     if (want) {
  531     int have = 1;
  532 
  533     while (lp[have] != '\0') {
  534         if ((lp[have] & 0xc0) != 0x80)
  535         break;
  536         ++have;
  537     }
  538     if (want == have) {
  539         unsigned mask = 0;
  540         int j;
  541         int shift = 0;
  542 
  543         *cp = 0;
  544         switch (want) {
  545         case 1:
  546         mask = (*lp);
  547         break;
  548         case 2:
  549         mask = (*lp & 0x1f);
  550         break;
  551         case 3:
  552         mask = (*lp & 0x0f);
  553         break;
  554         case 4:
  555         mask = (*lp & 0x07);
  556         break;
  557         case 5:
  558         mask = (*lp & 0x03);
  559         break;
  560         case 6:
  561         mask = (*lp & 0x01);
  562         break;
  563         default:
  564         mask = 0;
  565         break;
  566         }
  567 
  568         for (j = 1; j < want; j++) {
  569         *cp |= (unsigned) ((lp[want - j] & 0x3f) << shift);
  570         shift += 6;
  571         }
  572         *cp |= mask << shift;
  573         lp += want;
  574     } else {
  575         *cp = BAD_ASCII;
  576         lp = NULL;
  577     }
  578     } else {
  579     *cp = BAD_ASCII;
  580     lp = NULL;
  581     }
  582     return lp;
  583 }
  584 
  585 /*
  586  * Returns true if the entire string is valid UTF-8.
  587  */
  588 Boolean
  589 isValidUTF8(Char *lp)
  590 {
  591     Boolean result = True;
  592     while (*lp) {
  593     unsigned ch;
  594     Char *next = convertFromUTF8(lp, &ch);
  595     if (next == NULL || ch == 0) {
  596         result = False;
  597         break;
  598     }
  599     lp = next;
  600     }
  601     return result;
  602 }
  603 
  604 /*
  605  * Write data back to the PTY
  606  */
  607 void
  608 writePtyData(int f, IChar *d, size_t len)
  609 {
  610     size_t n = (len << 1);
  611 
  612     if (VTbuffer->write_len <= len) {
  613     VTbuffer->write_len = n;
  614     VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len);
  615     }
  616 
  617     for (n = 0; n < len; n++)
  618     VTbuffer->write_buf[n] = (Char) d[n];
  619 
  620     TRACE(("writePtyData %lu:%s\n", (unsigned long) n,
  621        visibleChars(VTbuffer->write_buf, n)));
  622     v_write(f, VTbuffer->write_buf, n);
  623 }
  624 #endif /* OPT_WIDE_CHARS */
  625 
  626 #ifdef NO_LEAKS
  627 void
  628 noleaks_ptydata(void)
  629 {
  630     if (VTbuffer != 0) {
  631 #if OPT_WIDE_CHARS
  632     free(VTbuffer->write_buf);
  633 #endif
  634     FreeAndNull(VTbuffer);
  635     }
  636 }
  637 #endif
  638 
  639 #ifdef TEST_DRIVER
  640 
  641 #include "data.c"
  642 
  643 void
  644 NormalExit(void)
  645 {
  646     fprintf(stderr, "NormalExit!\n");
  647     exit(EXIT_SUCCESS);
  648 }
  649 
  650 void
  651 Panic(const char *s, int a)
  652 {
  653     (void) s;
  654     (void) a;
  655     fprintf(stderr, "Panic!\n");
  656     exit(EXIT_FAILURE);
  657 }
  658 
  659 #if OPT_WIDE_CHARS
  660 
  661 #ifdef ALLOWLOGGING
  662 void
  663 FlushLog(XtermWidget xw)
  664 {
  665     (void) xw;
  666 }
  667 #endif
  668 
  669 void
  670 v_write(int f, const Char *data, size_t len)
  671 {
  672     (void) f;
  673     (void) data;
  674     (void) len;
  675 }
  676 
  677 void
  678 mk_wcwidth_init(int mode)
  679 {
  680     (void) mode;
  681 }
  682 
  683 void
  684 update_font_utf8_mode(void)
  685 {
  686 }
  687 
  688 static int message_level = 0;
  689 static int opt_all = 0;
  690 static int opt_illegal = 0;
  691 static int opt_convert = 0;
  692 static int opt_reverse = 0;
  693 static long total_test = 0;
  694 static long total_errs = 0;
  695 
  696 static void
  697 usage(void)
  698 {
  699     static const char *msg[] =
  700     {
  701     "Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]",
  702     "",
  703     "Options:",
  704     " -a  exercise all legal encode/decode to/from UTF-8",
  705     " -c  call convertFromUTF8 rather than decodeUTF8",
  706     " -i  ignore illegal UTF-8 when testing -r option",
  707     " -q  quieter",
  708     " -r  reverse/decode from UTF-8 byte-string to/from Unicode",
  709     " -v  more verbose"
  710     };
  711     size_t n;
  712     for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
  713     fprintf(stderr, "%s\n", msg[n]);
  714     }
  715     exit(EXIT_FAILURE);
  716 }
  717 
  718 /*
  719  * http://www.unicode.org/versions/corrigendum1.html, table 3.1B
  720  */
  721 #define OkRange(n,lo,hi) \
  722     if (value[n] < lo || value[n] > hi) { \
  723         result = False; \
  724         break; \
  725     }
  726 static Bool
  727 is_legal_utf8(const Char *value)
  728 {
  729     Bool result = True;
  730     Char ch;
  731     while ((ch = *value) != '\0') {
  732     if (ch <= 0x7f) {
  733         ++value;
  734     } else if (ch >= 0xc2 && ch <= 0xdf) {
  735         OkRange(1, 0x80, 0xbf);
  736         value += 2;
  737     } else if (ch == 0xe0) {
  738         OkRange(1, 0xa0, 0xbf);
  739         OkRange(2, 0x80, 0xbf);
  740         value += 3;
  741     } else if (ch >= 0xe1 && ch <= 0xef) {
  742         OkRange(1, 0x80, 0xbf);
  743         OkRange(2, 0x80, 0xbf);
  744         value += 3;
  745     } else if (ch == 0xf0) {
  746         OkRange(1, 0x90, 0xbf);
  747         OkRange(2, 0x80, 0xbf);
  748         OkRange(3, 0x80, 0xbf);
  749         value += 4;
  750     } else if (ch >= 0xf1 && ch <= 0xf3) {
  751         OkRange(1, 0x80, 0xbf);
  752         OkRange(2, 0x80, 0xbf);
  753         OkRange(3, 0x80, 0xbf);
  754         value += 4;
  755     } else if (ch == 0xf4) {
  756         OkRange(1, 0x80, 0x8f);
  757         OkRange(2, 0x80, 0xbf);
  758         OkRange(3, 0x80, 0xbf);
  759         value += 4;
  760     } else {
  761         result = False;
  762         break;
  763     }
  764     }
  765     return result;
  766 }
  767 
  768 static void
  769 test_utf8_convert(void)
  770 {
  771     unsigned c_in, c_out;
  772     Char buffer[10];
  773     Char *result;
  774     unsigned limit = 0x110000;
  775     unsigned success = 0;
  776     unsigned bucket[256];
  777 
  778     memset(bucket, 0, sizeof(bucket));
  779     for (c_in = 0; c_in < limit; ++c_in) {
  780     memset(buffer, 0, sizeof(buffer));
  781     if ((result = convertToUTF8(buffer, c_in)) == 0) {
  782         TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in));
  783     } else {
  784         if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
  785         TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in));
  786         } else if (c_in != c_out) {
  787         TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n",
  788                c_in, c_out));
  789         } else {
  790         while (result-- != buffer) {
  791             bucket[*result]++;
  792         }
  793         ++success;
  794         }
  795     }
  796     }
  797     TRACE(("%u/%u successful\n", success, limit));
  798     for (c_in = 0; c_in < 256; ++c_in) {
  799     if ((c_in % 8) == 0) {
  800         TRACE((" %02X:", c_in));
  801     }
  802     TRACE((" %8X", bucket[c_in]));
  803     if (((c_in + 1) % 8) == 0) {
  804         TRACE(("\n"));
  805     }
  806     }
  807 }
  808 
  809 static int
  810 decode_one(const char *source, char **target)
  811 {
  812     int result = -1;
  813     long check;
  814     int radix = 0;
  815     if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
  816     source += 2;
  817     radix = 16;
  818     } else if (source[0] == '0' && source[1] == 'b') {
  819     source += 2;
  820     radix = 2;
  821     }
  822     check = strtol(source, target, radix);
  823     if (*target != NULL && *target != source)
  824     result = (int) check;
  825     return result;
  826 }
  827 
  828 static int
  829 decode_range(const char *source, int *lo, int *hi)
  830 {
  831     int result = 0;
  832     char *after1;
  833     char *after2;
  834     if ((*lo = decode_one(source, &after1)) >= 0) {
  835     after1 += strspn(after1, ":-.\t ");
  836     if ((*hi = decode_one(after1, &after2)) < 0) {
  837         *hi = *lo;
  838     }
  839     result = 1;
  840     }
  841     return result;
  842 }
  843 
  844 #define MAX_BYTES 6
  845 
  846 static void
  847 do_range(const char *source)
  848 {
  849     int lo, hi;
  850 
  851     TScreen screen;
  852     memset(&screen, 0, sizeof(screen));
  853 
  854     if (decode_range(source, &lo, &hi)) {
  855     while (lo <= hi) {
  856         unsigned c_in = (unsigned) lo++;
  857         PtyData *data;
  858         Char *next;
  859         Char buffer[MAX_BYTES + 1];
  860 
  861         if (opt_reverse) {
  862         Bool skip = False;
  863         Bool first = True;
  864         int j, k;
  865         for (j = 0; j < MAX_BYTES; ++j) {
  866             unsigned long bits = ((unsigned long) c_in >> (8 * j));
  867             if ((buffer[j] = (Char) bits) == 0) {
  868             skip = (bits != 0);
  869             break;
  870             }
  871         }
  872         if (skip)
  873             continue;
  874         initPtyData(&data);
  875         for (k = 0; k <= j; ++k) {
  876             data->buffer[k] = buffer[j - k - 1];
  877         }
  878         if (opt_illegal && !is_legal_utf8(data->buffer)) {
  879             free(data);
  880             continue;
  881         }
  882         if (message_level > 1) {
  883             printf("TEST ");
  884             for (k = 0; k < j; ++k) {
  885             printf("%02X", data->buffer[k]);
  886             }
  887         }
  888         data->next = data->buffer;
  889         data->last = data->buffer + j;
  890         while (decodeUtf8(&screen, data)) {
  891             total_test++;
  892             if (data->utf_data == UCS_REPL)
  893             total_errs++;
  894             data->next += data->utf_size;
  895             if (message_level > 1) {
  896             printf("%s%04X", first ? " ->" : ", ", data->utf_data);
  897             }
  898             first = False;
  899         }
  900         if (!first)
  901             total_test--;
  902         if (message_level > 1) {
  903             printf("\n");
  904             fflush(stdout);
  905         }
  906         free(data);
  907         } else if (opt_convert) {
  908         unsigned c_out;
  909         Char *result;
  910 
  911         memset(buffer, 0, sizeof(buffer));
  912         if ((result = next = convertToUTF8(buffer, c_in)) == 0) {
  913             fprintf(stderr,
  914                 "conversion of U+%04X to UTF-8 failed\n", c_in);
  915         } else if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
  916             fprintf(stderr,
  917                 "conversion of U+%04X from UTF-8 failed\n", c_in);
  918             total_errs++;
  919         } else if (c_in != c_out) {
  920             fprintf(stderr,
  921                 "conversion of U+%04X to/from UTF-8 gave U+%04X\n",
  922                 c_in, c_out);
  923         } else if (message_level > 1) {
  924             *next = '\0';
  925             printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
  926                (unsigned long) (next - buffer),
  927                buffer,
  928                c_out);
  929             fflush(stdout);
  930         }
  931         } else {
  932         initPtyData(&data);
  933         next = convertToUTF8(data->buffer, c_in);
  934         *next = 0;
  935         data->next = data->buffer;
  936         data->last = next;
  937         decodeUtf8(&screen, data);
  938         if (message_level > 1) {
  939             printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
  940                (unsigned long) (next - data->buffer),
  941                data->buffer,
  942                data->utf_data);
  943             fflush(stdout);
  944         }
  945         if (c_in != data->utf_data) {
  946             fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data);
  947             total_errs++;
  948         }
  949         free(data);
  950         }
  951         total_test++;
  952     }
  953     }
  954 }
  955 
  956 int
  957 main(int argc, char **argv)
  958 {
  959     int ch;
  960 
  961     setlocale(LC_ALL, "");
  962     while ((ch = getopt(argc, argv, "aciqrv")) != -1) {
  963     switch (ch) {
  964     case 'a':
  965         opt_all = 1;
  966         break;
  967     case 'c':
  968         opt_convert = 1;
  969         break;
  970     case 'i':
  971         opt_illegal = 1;
  972         break;
  973     case 'q':
  974         message_level--;
  975         break;
  976     case 'r':
  977         opt_reverse = 1;
  978         break;
  979     case 'v':
  980         message_level++;
  981         break;
  982     default:
  983         usage();
  984     }
  985     }
  986     if (opt_all) {
  987     test_utf8_convert();
  988     } else {
  989     if (optind >= argc)
  990         usage();
  991     while (optind < argc) {
  992         do_range(argv[optind++]);
  993     }
  994     if (total_test) {
  995         printf("%ld/%ld mismatches (%.0f%%)\n",
  996            total_errs,
  997            total_test,
  998            (100.0 * (double) total_errs) / (double) total_test);
  999     }
 1000     }
 1001     return EXIT_SUCCESS;
 1002 }
 1003 #else
 1004 int
 1005 main(int argc, char **argv)
 1006 {
 1007     (void) argc;
 1008     (void) argv;
 1009     printf("Nothing to be done here...\n");
 1010     return EXIT_SUCCESS;
 1011 }
 1012 #endif /* OPT_WIDE_CHARS */
 1013 #endif