"Fossies" - the Fresh Open Source Software Archive

Member "lynx2.9.0dev.1/src/LYCharUtils.c" (5 Mar 2018, 87554 Bytes) of package /linux/www/lynx2.9.0dev.1.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "LYCharUtils.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.8.8rel.2_vs_2.8.9rel.1.

    1 /*
    2  * $LynxId: LYCharUtils.c,v 1.131 2018/03/05 22:32:14 tom Exp $
    3  *
    4  *  Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM
    5  *  ==========================================================================
    6  */
    7 #include <HTUtils.h>
    8 #include <SGML.h>
    9 
   10 #define Lynx_HTML_Handler
   11 #include <HTChunk.h>
   12 #include <HText.h>
   13 #include <HTStyle.h>
   14 #include <HTMIME.h>
   15 #include <HTML.h>
   16 
   17 #include <HTCJK.h>
   18 #include <HTAtom.h>
   19 #include <HTMLGen.h>
   20 #include <HTParse.h>
   21 #include <UCMap.h>
   22 #include <UCDefs.h>
   23 #include <UCAux.h>
   24 
   25 #include <LYGlobalDefs.h>
   26 #include <LYCharUtils.h>
   27 #include <LYCharSets.h>
   28 
   29 #include <HTAlert.h>
   30 #include <HTForms.h>
   31 #include <HTNestedList.h>
   32 #include <GridText.h>
   33 #include <LYStrings.h>
   34 #include <LYUtils.h>
   35 #include <LYMap.h>
   36 #include <LYBookmark.h>
   37 #include <LYCurses.h>
   38 #include <LYCookie.h>
   39 
   40 #include <LYexit.h>
   41 #include <LYLeaks.h>
   42 
   43 /*
   44  * Used for nested lists.  - FM
   45  */
   46 int OL_CONTINUE = -29999;   /* flag for whether CONTINUE is set */
   47 int OL_VOID = -29998;       /* flag for whether a count is set */
   48 
   49 static size_t count_char(const char *value, int ch)
   50 {
   51     const char *found;
   52     size_t result = 0;
   53 
   54     while ((*value != '\0') && (found = StrChr(value, ch)) != NULL) {
   55     ++result;
   56     value = (found + 1);
   57     }
   58     return result;
   59 }
   60 
   61 /*
   62  * This function converts any ampersands in a pre-allocated string to "&amp;". 
   63  * If brackets is TRUE, it also converts any angle-brackets to "&lt;" or "&gt;".
   64  */
   65 void LYEntify(char **in_out,
   66           int brackets)
   67 {
   68     char *source = *in_out;
   69     char *target;
   70     char *result = NULL;
   71     size_t count_AMPs = 0;
   72     size_t count_LTs = 0;
   73     size_t count_GTs = 0;
   74 
   75 #ifdef CJK_EX
   76     enum _state {
   77     S_text,
   78     S_esc,
   79     S_dollar,
   80     S_paren,
   81     S_nonascii_text,
   82     S_dollar_paren
   83     } state = S_text;
   84     int in_sjis = 0;
   85 #endif
   86 
   87     if (non_empty(source)) {
   88     count_AMPs = count_char(*in_out, '&');
   89     if (brackets) {
   90         count_LTs = count_char(*in_out, '<');
   91         count_GTs = count_char(*in_out, '>');
   92     }
   93 
   94     if (count_AMPs != 0 || count_LTs != 0 || count_GTs != 0) {
   95 
   96         target = typecallocn(char,
   97                    (strlen(*in_out)
   98                     + (4 * count_AMPs)
   99                     + (3 * count_LTs)
  100                     + (3 * count_GTs) + 1));
  101 
  102         if ((result = target) == NULL)
  103         outofmem(__FILE__, "LYEntify");
  104 
  105         for (source = *in_out; *source; source++) {
  106 #ifdef CJK_EX
  107         if (IS_CJK_TTY) {
  108             switch (state) {
  109             case S_text:
  110             if (*source == '\033') {
  111                 state = S_esc;
  112                 *target++ = *source;
  113                 continue;
  114             }
  115             break;
  116 
  117             case S_esc:
  118             if (*source == '$') {
  119                 state = S_dollar;
  120             } else if (*source == '(') {
  121                 state = S_paren;
  122             } else {
  123                 state = S_text;
  124             }
  125             *target++ = *source;
  126             continue;
  127 
  128             case S_dollar:
  129             if (*source == '@' || *source == 'B' || *source == 'A') {
  130                 state = S_nonascii_text;
  131             } else if (*source == '(') {
  132                 state = S_dollar_paren;
  133             } else {
  134                 state = S_text;
  135             }
  136             *target++ = *source;
  137             continue;
  138 
  139             case S_dollar_paren:
  140             if (*source == 'C') {
  141                 state = S_nonascii_text;
  142             } else {
  143                 state = S_text;
  144             }
  145             *target++ = *source;
  146             continue;
  147 
  148             case S_paren:
  149             if (*source == 'B' || *source == 'J' || *source == 'T') {
  150                 state = S_text;
  151             } else if (*source == 'I') {
  152                 state = S_nonascii_text;
  153             } else if (*source == '\033') {
  154                 state = S_esc;
  155             }
  156             *target++ = *source;
  157             continue;
  158 
  159             case S_nonascii_text:
  160             if (*source == '\033')
  161                 state = S_esc;
  162             *target++ = *source;
  163             continue;
  164 
  165             default:
  166             break;
  167             }
  168             if (*(source + 1) != '\0' &&
  169             (IS_EUC(UCH(*source), UCH(*(source + 1))) ||
  170              IS_SJIS(UCH(*source), UCH(*(source + 1)), in_sjis) ||
  171              IS_BIG5(UCH(*source), UCH(*(source + 1))))) {
  172             *target++ = *source++;
  173             *target++ = *source;
  174             continue;
  175             }
  176         }
  177 #endif
  178         switch (*source) {
  179         case '&':
  180             *target++ = '&';
  181             *target++ = 'a';
  182             *target++ = 'm';
  183             *target++ = 'p';
  184             *target++ = ';';
  185             break;
  186         case '<':
  187             if (brackets) {
  188             *target++ = '&';
  189             *target++ = 'l';
  190             *target++ = 't';
  191             *target++ = ';';
  192             break;
  193             }
  194             /* FALLTHRU */
  195         case '>':
  196             if (brackets) {
  197             *target++ = '&';
  198             *target++ = 'g';
  199             *target++ = 't';
  200             *target++ = ';';
  201             break;
  202             }
  203             /* FALLTHRU */
  204         default:
  205             *target++ = *source;
  206             break;
  207         }
  208         }
  209         *target = '\0';
  210         FREE(*in_out);
  211         *in_out = result;
  212     }
  213     }
  214 }
  215 
  216 /*
  217  * Callers to LYEntifyTitle/LYEntifyValue do not look at the 'target' param.
  218  * Optimize things a little by avoiding the memory allocation if not needed,
  219  * as is usually the case.
  220  */
  221 static BOOL MustEntify(const char *source)
  222 {
  223     BOOL result;
  224 
  225 #ifdef CJK_EX
  226     if (IS_CJK_TTY && StrChr(source, '\033') != 0) {
  227     result = TRUE;
  228     } else
  229 #endif
  230     {
  231     size_t length = strlen(source);
  232     size_t reject = strcspn(source, "<&>");
  233 
  234     result = (BOOL) (length != reject);
  235     }
  236 
  237     return result;
  238 }
  239 
  240 /*
  241  * Wrappers for LYEntify() which do not assume that the source was allocated,
  242  * e.g., output from gettext().
  243  */
  244 const char *LYEntifyTitle(char **target, const char *source)
  245 {
  246     const char *result = 0;
  247 
  248     if (MustEntify(source)) {
  249     StrAllocCopy(*target, source);
  250     LYEntify(target, TRUE);
  251     result = *target;
  252     } else {
  253     result = source;
  254     }
  255     return result;
  256 }
  257 
  258 const char *LYEntifyValue(char **target, const char *source)
  259 {
  260     const char *result = 0;
  261 
  262     if (MustEntify(source)) {
  263     StrAllocCopy(*target, source);
  264     LYEntify(target, FALSE);
  265     result = *target;
  266     } else {
  267     result = source;
  268     }
  269     return result;
  270 }
  271 
  272 /*
  273  *  This function trims characters <= that of a space (32),
  274  *  including HT_NON_BREAK_SPACE (1) and HT_EN_SPACE (2),
  275  *  but not ESC, from the heads of strings. - FM
  276  */
  277 void LYTrimHead(char *str)
  278 {
  279     const char *s = str;
  280 
  281     if (isEmpty(s))
  282     return;
  283 
  284     while (*s && WHITE(*s) && UCH(*s) != UCH(CH_ESC))   /* S/390 -- gil -- 1669 */
  285     s++;
  286     if (s > str) {
  287     char *ns = str;
  288 
  289     while (*s) {
  290         *ns++ = *s++;
  291     }
  292     *ns = '\0';
  293     }
  294 }
  295 
  296 /*
  297  *  This function trims characters <= that of a space (32),
  298  *  including HT_NON_BREAK_SPACE (1), HT_EN_SPACE (2), and
  299  *  ESC from the tails of strings. - FM
  300  */
  301 void LYTrimTail(char *str)
  302 {
  303     int i;
  304 
  305     if (isEmpty(str))
  306     return;
  307 
  308     i = (int) strlen(str) - 1;
  309     while (i >= 0) {
  310     if (WHITE(str[i]))
  311         str[i] = '\0';
  312     else
  313         break;
  314     i--;
  315     }
  316 }
  317 
  318 /*
  319  * This function should receive a pointer to the start
  320  * of a comment.  It returns a pointer to the end ('>')
  321  * character of comment, or it's best guess if the comment
  322  * is invalid. - FM
  323  */
  324 char *LYFindEndOfComment(char *str)
  325 {
  326     char *cp, *cp1;
  327     enum comment_state {
  328     start1,
  329     start2,
  330     end1,
  331     end2
  332     } state;
  333 
  334     if (str == NULL)
  335     /*
  336      * We got NULL, so return NULL.  - FM
  337      */
  338     return NULL;
  339 
  340     if (StrNCmp(str, "<!--", 4))
  341     /*
  342      * We don't have the start of a comment, so return the beginning of the
  343      * string.  - FM
  344      */
  345     return str;
  346 
  347     cp = (str + 4);
  348     if (*cp == '>')
  349     /*
  350      * It's an invalid comment, so
  351      * return this end character. - FM
  352      */
  353     return cp;
  354 
  355     if ((cp1 = StrChr(cp, '>')) == NULL)
  356     /*
  357      * We don't have an end character, so return the beginning of the
  358      * string.  - FM
  359      */
  360     return str;
  361 
  362     if (*cp == '-')
  363     /*
  364      * Ugh, it's a "decorative" series of dashes, so return the next end
  365      * character.  - FM
  366      */
  367     return cp1;
  368 
  369     /*
  370      * OK, we're ready to start parsing.  - FM
  371      */
  372     state = start2;
  373     while (*cp != '\0') {
  374     switch (state) {
  375     case start1:
  376         if (*cp == '-')
  377         state = start2;
  378         else
  379         /*
  380          * Invalid comment, so return the first '>' from the start of
  381          * the string.  - FM
  382          */
  383         return cp1;
  384         break;
  385 
  386     case start2:
  387         if (*cp == '-')
  388         state = end1;
  389         break;
  390 
  391     case end1:
  392         if (*cp == '-')
  393         state = end2;
  394         else
  395         /*
  396          * Invalid comment, so return the first '>' from the start of
  397          * the string.  - FM
  398          */
  399         return cp1;
  400         break;
  401 
  402     case end2:
  403         if (*cp == '>')
  404         /*
  405          * Valid comment, so return the end character.  - FM
  406          */
  407         return cp;
  408         if (*cp == '-') {
  409         state = start1;
  410         } else if (!(WHITE(*cp) && UCH(*cp) != UCH(CH_ESC))) {  /* S/390 -- gil -- 1686 */
  411         /*
  412          * Invalid comment, so return the first '>' from the start of
  413          * the string.  - FM
  414          */
  415         return cp1;
  416         }
  417         break;
  418 
  419     default:
  420         break;
  421     }
  422     cp++;
  423     }
  424 
  425     /*
  426      * Invalid comment, so return the first '>' from the start of the string. 
  427      * - FM
  428      */
  429     return cp1;
  430 }
  431 
  432 /*
  433  *  If an HREF, itself or if resolved against a base,
  434  *  represents a file URL, and the host is defaulted,
  435  *  force in "//localhost".  We need this until
  436  *  all the other Lynx code which performs security
  437  *  checks based on the "localhost" string is changed
  438  *  to assume "//localhost" when a host field is not
  439  *  present in file URLs - FM
  440  */
  441 void LYFillLocalFileURL(char **href,
  442             const char *base)
  443 {
  444     char *temp = NULL;
  445 
  446     if (isEmpty(*href))
  447     return;
  448 
  449     if (!strcmp(*href, "//") || !StrNCmp(*href, "///", 3)) {
  450     if (base != NULL && isFILE_URL(base)) {
  451         StrAllocCopy(temp, STR_FILE_URL);
  452         StrAllocCat(temp, *href);
  453         StrAllocCopy(*href, temp);
  454     }
  455     }
  456     if (isFILE_URL(*href)) {
  457     if (*(*href + 5) == '\0') {
  458         StrAllocCat(*href, "//localhost");
  459     } else if (!strcmp(*href, "file://")) {
  460         StrAllocCat(*href, "localhost");
  461     } else if (!StrNCmp(*href, "file:///", 8)) {
  462         StrAllocCopy(temp, (*href + 7));
  463         LYLocalFileToURL(href, temp);
  464     } else if (!StrNCmp(*href, "file:/", 6) && !LYIsHtmlSep(*(*href + 6))) {
  465         StrAllocCopy(temp, (*href + 5));
  466         LYLocalFileToURL(href, temp);
  467     }
  468     }
  469 #if defined(USE_DOS_DRIVES)
  470     if (LYIsDosDrive(*href)) {
  471     /*
  472      * If it's a local DOS path beginning with drive letter,
  473      * add file://localhost/ prefix and go ahead.
  474      */
  475     StrAllocCopy(temp, *href);
  476     LYLocalFileToURL(href, temp);
  477     }
  478 
  479     /* use below: strlen("file://localhost/") = 17 */
  480     if (!StrNCmp(*href, "file://localhost/", 17)
  481     && (strlen(*href) == 19)
  482     && LYIsDosDrive(*href + 17)) {
  483     /*
  484      * Terminate DOS drive letter with a slash to surf root successfully.
  485      * Here seems a proper place to do so.
  486      */
  487     LYAddPathSep(href);
  488     }
  489 #endif /* USE_DOS_DRIVES */
  490 
  491     /*
  492      * No path in a file://localhost URL means a
  493      * directory listing for the current default. - FM
  494      */
  495     if (!strcmp(*href, "file://localhost")) {
  496     const char *temp2;
  497 
  498 #ifdef VMS
  499     temp2 = HTVMS_wwwName(LYGetEnv("PATH"));
  500 #else
  501     char curdir[LY_MAXPATH];
  502 
  503     temp2 = wwwName(Current_Dir(curdir));
  504 #endif /* VMS */
  505     if (!LYIsHtmlSep(*temp2))
  506         LYAddHtmlSep(href);
  507     /*
  508      * Check for pathological cases - current dir has chars which MUST BE
  509      * URL-escaped - kw
  510      */
  511     if (StrChr(temp2, '%') != NULL || StrChr(temp2, '#') != NULL) {
  512         FREE(temp);
  513         temp = HTEscape(temp2, URL_PATH);
  514         StrAllocCat(*href, temp);
  515     } else {
  516         StrAllocCat(*href, temp2);
  517     }
  518     }
  519 #ifdef VMS
  520     /*
  521      * On VMS, a file://localhost/ URL means
  522      * a listing for the login directory. - FM
  523      */
  524     if (!strcmp(*href, "file://localhost/"))
  525     StrAllocCat(*href, (HTVMS_wwwName(Home_Dir()) + 1));
  526 #endif /* VMS */
  527 
  528     FREE(temp);
  529     return;
  530 }
  531 
  532 void LYAddMETAcharsetToStream(HTStream *target, int disp_chndl)
  533 {
  534     char *buf = 0;
  535 
  536     if (disp_chndl == -1)
  537     /*
  538      * -1 means use current_char_set.
  539      */
  540     disp_chndl = current_char_set;
  541 
  542     if (target != 0 && disp_chndl >= 0) {
  543     HTSprintf0(&buf, "<META %s content=\"" STR_HTML ";charset=%s\">\n",
  544            "http-equiv=\"content-type\"",
  545            LYCharSet_UC[disp_chndl].MIMEname);
  546     (*target->isa->put_string) (target, buf);
  547     FREE(buf);
  548     }
  549 }
  550 
  551 /*
  552  *  This function writes a line with a META tag to an open file,
  553  *  which will specify a charset parameter to use when the file is
  554  *  read back in.  It is meant for temporary HTML files used by the
  555  *  various special pages which may show titles of documents.  When those
  556  *  files are created, the title strings normally have been translated and
  557  *  expanded to the display character set, so we have to make sure they
  558  *  don't get translated again.
  559  *  If the user has changed the display character set during the lifetime
  560  *  of the Lynx session (or, more exactly, during the time the title
  561  *  strings to be written were generated), they may now have different
  562  *  character encodings and there is currently no way to get it all right.
  563  *  To change this, we would have to add a variable for each string which
  564  *  keeps track of its character encoding.
  565  *  But at least we can try to ensure that reading the file after future
  566  *  display character set changes will give reasonable output.
  567  *
  568  *  The META tag is not written if the display character set (passed as
  569  *  disp_chndl) already corresponds to the charset assumption that
  570  *  would be made when the file is read. - KW
  571  *
  572  *  Currently this function is used for temporary files like "Lynx Info Page"
  573  *  and for one permanent - bookmarks (so it may be a problem if you change
  574  *  the display charset later: new bookmark entries may be mistranslated).
  575  *                               - LP
  576  */
  577 void LYAddMETAcharsetToFD(FILE *fd, int disp_chndl)
  578 {
  579     if (disp_chndl == -1)
  580     /*
  581      * -1 means use current_char_set.
  582      */
  583     disp_chndl = current_char_set;
  584 
  585     if (fd == NULL || disp_chndl < 0)
  586     /*
  587      * Should not happen.
  588      */
  589     return;
  590 
  591     if (UCLYhndl_HTFile_for_unspec == disp_chndl)
  592     /*
  593      * Not need to do, so we don't.
  594      */
  595     return;
  596 
  597     if (LYCharSet_UC[disp_chndl].enc == UCT_ENC_7BIT)
  598     /*
  599      * There shouldn't be any 8-bit characters in this case.
  600      */
  601     return;
  602 
  603     /*
  604      * In other cases we don't know because UCLYhndl_for_unspec may change
  605      * during the lifetime of the file (by toggling raw mode or changing the
  606      * display character set), so proceed.
  607      */
  608     fprintf(fd, "<META %s content=\"" STR_HTML ";charset=%s\">\n",
  609         "http-equiv=\"content-type\"",
  610         LYCharSet_UC[disp_chndl].MIMEname);
  611 }
  612 
  613 /*
  614  * This function returns OL TYPE="A" strings in
  615  * the range of " A." (1) to "ZZZ." (18278). - FM
  616  */
  617 char *LYUppercaseA_OL_String(int seqnum)
  618 {
  619     static char OLstring[8];
  620 
  621     if (seqnum <= 1) {
  622     strcpy(OLstring, " A.");
  623     return OLstring;
  624     }
  625     if (seqnum < 27) {
  626     sprintf(OLstring, " %c.", (seqnum + 64));
  627     return OLstring;
  628     }
  629     if (seqnum < 703) {
  630     sprintf(OLstring, "%c%c.", ((seqnum - 1) / 26 + 64),
  631         (seqnum - ((seqnum - 1) / 26) * 26 + 64));
  632     return OLstring;
  633     }
  634     if (seqnum < 18279) {
  635     sprintf(OLstring, "%c%c%c.", ((seqnum - 27) / 676 + 64),
  636         (((seqnum - ((seqnum - 27) / 676) * 676) - 1) / 26 + 64),
  637         (seqnum - ((seqnum - 1) / 26) * 26 + 64));
  638     return OLstring;
  639     }
  640     strcpy(OLstring, "ZZZ.");
  641     return OLstring;
  642 }
  643 
  644 /*
  645  * This function returns OL TYPE="a" strings in
  646  * the range of " a." (1) to "zzz." (18278). - FM
  647  */
  648 char *LYLowercaseA_OL_String(int seqnum)
  649 {
  650     static char OLstring[8];
  651 
  652     if (seqnum <= 1) {
  653     strcpy(OLstring, " a.");
  654     return OLstring;
  655     }
  656     if (seqnum < 27) {
  657     sprintf(OLstring, " %c.", (seqnum + 96));
  658     return OLstring;
  659     }
  660     if (seqnum < 703) {
  661     sprintf(OLstring, "%c%c.", ((seqnum - 1) / 26 + 96),
  662         (seqnum - ((seqnum - 1) / 26) * 26 + 96));
  663     return OLstring;
  664     }
  665     if (seqnum < 18279) {
  666     sprintf(OLstring, "%c%c%c.", ((seqnum - 27) / 676 + 96),
  667         (((seqnum - ((seqnum - 27) / 676) * 676) - 1) / 26 + 96),
  668         (seqnum - ((seqnum - 1) / 26) * 26 + 96));
  669     return OLstring;
  670     }
  671     strcpy(OLstring, "zzz.");
  672     return OLstring;
  673 }
  674 
  675 /*
  676  * This function returns OL TYPE="I" strings in the
  677  * range of " I." (1) to "MMM." (3000).- FM
  678  * Maximum length: 16 -TD
  679  */
  680 char *LYUppercaseI_OL_String(int seqnum)
  681 {
  682     static char OLstring[20];
  683     int Arabic = seqnum;
  684 
  685     if (Arabic >= 3000) {
  686     strcpy(OLstring, "MMM.");
  687     return OLstring;
  688     }
  689 
  690     switch (Arabic) {
  691     case 1:
  692     strcpy(OLstring, " I.");
  693     return OLstring;
  694     case 5:
  695     strcpy(OLstring, " V.");
  696     return OLstring;
  697     case 10:
  698     strcpy(OLstring, " X.");
  699     return OLstring;
  700     case 50:
  701     strcpy(OLstring, " L.");
  702     return OLstring;
  703     case 100:
  704     strcpy(OLstring, " C.");
  705     return OLstring;
  706     case 500:
  707     strcpy(OLstring, " D.");
  708     return OLstring;
  709     case 1000:
  710     strcpy(OLstring, " M.");
  711     return OLstring;
  712     default:
  713     OLstring[0] = '\0';
  714     break;
  715     }
  716 
  717     while (Arabic >= 1000) {
  718     strcat(OLstring, "M");
  719     Arabic -= 1000;
  720     }
  721 
  722     if (Arabic >= 900) {
  723     strcat(OLstring, "CM");
  724     Arabic -= 900;
  725     }
  726 
  727     if (Arabic >= 500) {
  728     strcat(OLstring, "D");
  729     Arabic -= 500;
  730     }
  731 
  732     if (Arabic >= 400) {
  733     strcat(OLstring, "CD");
  734     Arabic -= 400;
  735     }
  736 
  737     while (Arabic >= 100) {
  738     strcat(OLstring, "C");
  739     Arabic -= 100;
  740     }
  741 
  742     if (Arabic >= 90) {
  743     strcat(OLstring, "XC");
  744     Arabic -= 90;
  745     }
  746 
  747     if (Arabic >= 50) {
  748     strcat(OLstring, "L");
  749     Arabic -= 50;
  750     }
  751 
  752     if (Arabic >= 40) {
  753     strcat(OLstring, "XL");
  754     Arabic -= 40;
  755     }
  756 
  757     while (Arabic > 10) {
  758     strcat(OLstring, "X");
  759     Arabic -= 10;
  760     }
  761 
  762     switch (Arabic) {
  763     case 1:
  764     strcat(OLstring, "I.");
  765     break;
  766     case 2:
  767     strcat(OLstring, "II.");
  768     break;
  769     case 3:
  770     strcat(OLstring, "III.");
  771     break;
  772     case 4:
  773     strcat(OLstring, "IV.");
  774     break;
  775     case 5:
  776     strcat(OLstring, "V.");
  777     break;
  778     case 6:
  779     strcat(OLstring, "VI.");
  780     break;
  781     case 7:
  782     strcat(OLstring, "VII.");
  783     break;
  784     case 8:
  785     strcat(OLstring, "VIII.");
  786     break;
  787     case 9:
  788     strcat(OLstring, "IX.");
  789     break;
  790     case 10:
  791     strcat(OLstring, "X.");
  792     break;
  793     default:
  794     strcat(OLstring, ".");
  795     break;
  796     }
  797 
  798     return OLstring;
  799 }
  800 
  801 /*
  802  * This function returns OL TYPE="i" strings in
  803  * range of " i." (1) to "mmm." (3000).- FM
  804  * Maximum length: 16 -TD
  805  */
  806 char *LYLowercaseI_OL_String(int seqnum)
  807 {
  808     static char OLstring[20];
  809     int Arabic = seqnum;
  810 
  811     if (Arabic >= 3000) {
  812     strcpy(OLstring, "mmm.");
  813     return OLstring;
  814     }
  815 
  816     switch (Arabic) {
  817     case 1:
  818     strcpy(OLstring, " i.");
  819     return OLstring;
  820     case 5:
  821     strcpy(OLstring, " v.");
  822     return OLstring;
  823     case 10:
  824     strcpy(OLstring, " x.");
  825     return OLstring;
  826     case 50:
  827     strcpy(OLstring, " l.");
  828     return OLstring;
  829     case 100:
  830     strcpy(OLstring, " c.");
  831     return OLstring;
  832     case 500:
  833     strcpy(OLstring, " d.");
  834     return OLstring;
  835     case 1000:
  836     strcpy(OLstring, " m.");
  837     return OLstring;
  838     default:
  839     OLstring[0] = '\0';
  840     break;
  841     }
  842 
  843     while (Arabic >= 1000) {
  844     strcat(OLstring, "m");
  845     Arabic -= 1000;
  846     }
  847 
  848     if (Arabic >= 900) {
  849     strcat(OLstring, "cm");
  850     Arabic -= 900;
  851     }
  852 
  853     if (Arabic >= 500) {
  854     strcat(OLstring, "d");
  855     Arabic -= 500;
  856     }
  857 
  858     if (Arabic >= 400) {
  859     strcat(OLstring, "cd");
  860     Arabic -= 400;
  861     }
  862 
  863     while (Arabic >= 100) {
  864     strcat(OLstring, "c");
  865     Arabic -= 100;
  866     }
  867 
  868     if (Arabic >= 90) {
  869     strcat(OLstring, "xc");
  870     Arabic -= 90;
  871     }
  872 
  873     if (Arabic >= 50) {
  874     strcat(OLstring, "l");
  875     Arabic -= 50;
  876     }
  877 
  878     if (Arabic >= 40) {
  879     strcat(OLstring, "xl");
  880     Arabic -= 40;
  881     }
  882 
  883     while (Arabic > 10) {
  884     strcat(OLstring, "x");
  885     Arabic -= 10;
  886     }
  887 
  888     switch (Arabic) {
  889     case 1:
  890     strcat(OLstring, "i.");
  891     break;
  892     case 2:
  893     strcat(OLstring, "ii.");
  894     break;
  895     case 3:
  896     strcat(OLstring, "iii.");
  897     break;
  898     case 4:
  899     strcat(OLstring, "iv.");
  900     break;
  901     case 5:
  902     strcat(OLstring, "v.");
  903     break;
  904     case 6:
  905     strcat(OLstring, "vi.");
  906     break;
  907     case 7:
  908     strcat(OLstring, "vii.");
  909     break;
  910     case 8:
  911     strcat(OLstring, "viii.");
  912     break;
  913     case 9:
  914     strcat(OLstring, "ix.");
  915     break;
  916     case 10:
  917     strcat(OLstring, "x.");
  918     break;
  919     default:
  920     strcat(OLstring, ".");
  921     break;
  922     }
  923 
  924     return OLstring;
  925 }
  926 
  927 /*
  928  *  This function initializes the Ordered List counter. - FM
  929  */
  930 void LYZero_OL_Counter(HTStructured * me)
  931 {
  932     int i;
  933 
  934     if (!me)
  935     return;
  936 
  937     for (i = 0; i < 12; i++) {
  938     me->OL_Counter[i] = OL_VOID;
  939     me->OL_Type[i] = '1';
  940     }
  941 
  942     me->Last_OL_Count = 0;
  943     me->Last_OL_Type = '1';
  944 
  945     return;
  946 }
  947 
  948 /*
  949  *  This function is used by the HTML Structured object. - KW
  950  */
  951 void LYGetChartransInfo(HTStructured * me)
  952 {
  953     me->UCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,
  954                     UCT_STAGE_STRUCTURED);
  955     if (me->UCLYhndl < 0) {
  956     int chndl = HTAnchor_getUCLYhndl(me->node_anchor, UCT_STAGE_HTEXT);
  957 
  958     if (chndl < 0) {
  959         chndl = current_char_set;
  960         HTAnchor_setUCInfoStage(me->node_anchor, chndl,
  961                     UCT_STAGE_HTEXT,
  962                     UCT_SETBY_STRUCTURED);
  963     }
  964     HTAnchor_setUCInfoStage(me->node_anchor, chndl,
  965                 UCT_STAGE_STRUCTURED,
  966                 UCT_SETBY_STRUCTURED);
  967     me->UCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,
  968                         UCT_STAGE_STRUCTURED);
  969     }
  970     me->UCI = HTAnchor_getUCInfoStage(me->node_anchor,
  971                       UCT_STAGE_STRUCTURED);
  972 }
  973 
  974     /* as in HTParse.c, saves some calls - kw */
  975 static const char *hex = "0123456789ABCDEF";
  976 
  977 /*
  978  *    Any raw 8-bit or multibyte characters already have been
  979  *    handled in relation to the display character set
  980  *    in SGML_character(), including named and numeric entities.
  981  *
  982  *  This function used for translations HTML special fields inside tags
  983  *  (ALT=, VALUE=, etc.) from charset `cs_from' to charset `cs_to'.
  984  *  It also unescapes non-ASCII characters from URL (#fragments !)
  985  *  if st_URL is active.
  986  *
  987  *  If `do_ent' is YES, it converts named entities
  988  *  and numeric character references (NCRs) to their `cs_to' replacements.
  989  *
  990  *  Named entities converted to unicodes.  NCRs (unicodes) converted
  991  *  by UCdomap.c chartrans functions.
  992  *  ???NCRs with values in the ISO-8859-1 range 160-255 may be converted
  993  *  to their HTML entity names (via old-style entities) and then translated
  994  *  according to the LYCharSets.c array for `cs_out'???.
  995  *
  996  *  Some characters (see descriptions in `put_special_unicodes' from SGML.c)
  997  *  translated in relation with the state of boolean variables
  998  *  `use_lynx_specials', `plain_space' and `hidden'.  It is not clear yet:
  999  *
 1000  *  If plain_space is TRUE, nbsp (160) will be treated as an ASCII
 1001  *  space (32).  If hidden is TRUE, entities will be translated
 1002  *  (if `do_ent' is YES) but escape sequences will be passed unaltered.
 1003  *  If `hidden' is FALSE, some characters are converted to Lynx special
 1004  *  codes (see `put_special_unicodes') or ASCII space if `plain_space'
 1005  *  applies).  @@ is `use_lynx_specials' needed, does it have any effect? @@
 1006  *  If `use_lynx_specials' is YES, translate byte values 160 and 173
 1007  *  meaning U+00A0 and U+00AD given as or converted from raw char input
 1008  *  are converted to HT_NON_BREAK_SPACE and LY_SOFT_HYPHEN, respectively
 1009  *  (unless input and output charset are both iso-8859-1, for compatibility
 1010  *  with previous usage in HTML.c) even if `hidden' or `plain_space' is set.
 1011  *
 1012  *  If `Back' is YES, the reverse is done instead i.e., Lynx special codes
 1013  *  in the input are translated back to character values.
 1014  *
 1015  *  If `Back' is YES, an attempt is made to use UCReverseTransChar() for
 1016  *  back translation which may be more efficient. (?)
 1017  *
 1018  *  If `stype' is st_URL, non-ASCII characters are URL-encoded instead.
 1019  *  The sequence of bytes being URL-encoded is the raw input character if
 1020  *  we couldn't translate it from `cs_in' (CJK etc.); otherwise it is the
 1021  *  UTF-8 representation if either `cs_to' requires this or if the
 1022  *  character's Unicode value is > 255, otherwise it should be the iso-8859-1
 1023  *  representation.
 1024  *  No general URL-encoding occurs for displayable ASCII characters and
 1025  *  spaces and some C0 controls valid in HTML (LF, TAB), it is expected
 1026  *  that other functions will take care of that as appropriate.
 1027  *
 1028  *  Escape characters (0x1B, '\033') are
 1029  *  - URL-encoded   if `stype'  is st_URL,   otherwise
 1030  *  - dropped       if `stype'  is st_other, otherwise (i.e., st_HTML)
 1031  *  - passed        if `hidden' is TRUE or HTCJK is set, otherwise
 1032  *  - dropped.
 1033  *
 1034  *  (If `stype' is st_URL or st_other most of the parameters really predefined:
 1035  *  cs_from=cs_to, use_lynx_specials=plain_space=NO, and hidden=YES)
 1036  *
 1037  *
 1038  *  Returns pointer to the char** passed in
 1039  *       if string translated or translation unnecessary,
 1040  *      NULL otherwise
 1041  *       (in which case something probably went wrong.)
 1042  *
 1043  *
 1044  *  In general, this somehow ugly function (KW)
 1045  *  cover three functions from v.2.7.2 (FM):
 1046  *          extern void LYExpandString (
 1047  *             HTStructured *          me,
 1048  *             char **             str);
 1049  *          extern void LYUnEscapeEntities (
 1050  *             HTStructured *          me,
 1051  *             char **             str);
 1052  *          extern void LYUnEscapeToLatinOne (
 1053  *             HTStructured *          me,
 1054  *             char **             str,
 1055  *             BOOLEAN             isURL);
 1056  */
 1057 
 1058 char **LYUCFullyTranslateString(char **str,
 1059                 int cs_from,
 1060                 int cs_to,
 1061                 int do_ent,
 1062                 int use_lynx_specials,
 1063                 int plain_space,
 1064                 int hidden,
 1065                 int Back,
 1066                 CharUtil_st stype)
 1067 {
 1068     char *p;
 1069     char *q, *qs;
 1070     HTChunk *chunk = NULL;
 1071     char *cp = 0;
 1072     char cpe = 0;
 1073     char *esc = NULL;
 1074     char replace_buf[64];
 1075     int uck;
 1076     int lowest_8;
 1077     UCode_t code = 0;
 1078     BOOL output_utf8 = 0, repl_translated_C0 = 0;
 1079     size_t len;
 1080     const char *name = NULL;
 1081     BOOLEAN no_bytetrans;
 1082     UCTransParams T;
 1083     BOOL from_is_utf8 = FALSE;
 1084     char *puni = 0;
 1085     enum _state {
 1086     S_text,
 1087     S_esc,
 1088     S_dollar,
 1089     S_paren,
 1090     S_nonascii_text,
 1091     S_dollar_paren,
 1092     S_trans_byte,
 1093     S_check_ent,
 1094     S_ncr,
 1095     S_check_uni,
 1096     S_named,
 1097     S_check_name,
 1098     S_recover,
 1099     S_got_oututf8,
 1100     S_got_outstring,
 1101     S_put_urlstring,
 1102     S_got_outchar,
 1103     S_put_urlchar,
 1104     S_next_char,
 1105     S_done
 1106     } state = S_text;
 1107     enum _parsing_what {
 1108     P_text,
 1109     P_utf8,
 1110     P_hex,
 1111     P_decimal,
 1112     P_named
 1113     } what = P_text;
 1114 
 1115 #ifdef KANJI_CODE_OVERRIDE
 1116     static unsigned char sjis_1st = '\0';
 1117 
 1118     unsigned char sjis_str[3];
 1119 #endif
 1120 
 1121     /*
 1122      * Make sure we have a non-empty string.  - FM
 1123      */
 1124     if (isEmpty(*str))
 1125     return str;
 1126 
 1127     /*
 1128      * FIXME: something's wrong with the limit checks here (clearing the
 1129      * buffer helps).
 1130      */
 1131     memset(replace_buf, 0, sizeof(replace_buf));
 1132 
 1133     /*
 1134      * Don't do byte translation if original AND target character sets are both
 1135      * iso-8859-1 (and we are not called to back-translate), or if we are in
 1136      * CJK mode.
 1137      */
 1138     if (IS_CJK_TTY
 1139 #ifdef EXP_JAPANESEUTF8_SUPPORT
 1140     && (strcmp(LYCharSet_UC[cs_from].MIMEname, "utf-8") != 0)
 1141     && (strcmp(LYCharSet_UC[cs_to].MIMEname, "utf-8") != 0)
 1142 #endif
 1143     ) {
 1144     no_bytetrans = TRUE;
 1145     } else if (cs_to <= 0 && cs_from == cs_to && (!Back || cs_to < 0)) {
 1146     no_bytetrans = TRUE;
 1147     } else {
 1148     /* No need to translate or examine the string any further */
 1149     no_bytetrans = (BOOL) (!use_lynx_specials && !Back &&
 1150                    UCNeedNotTranslate(cs_from, cs_to));
 1151     }
 1152     /*
 1153      * Save malloc/calloc overhead in simple case - kw
 1154      */
 1155     if (do_ent && hidden && (stype != st_URL) && (StrChr(*str, '&') == NULL))
 1156     do_ent = FALSE;
 1157 
 1158     /* Can't do, caller should figure out what to do... */
 1159     if (!UCCanTranslateFromTo(cs_from, cs_to)) {
 1160     if (cs_to < 0)
 1161         return NULL;
 1162     if (!do_ent && no_bytetrans)
 1163         return NULL;
 1164     no_bytetrans = TRUE;
 1165     } else if (cs_to < 0) {
 1166     do_ent = FALSE;
 1167     }
 1168 
 1169     if (!do_ent && no_bytetrans)
 1170     return str;
 1171     p = *str;
 1172 
 1173     if (!no_bytetrans) {
 1174     UCTransParams_clear(&T);
 1175     UCSetTransParams(&T, cs_from, &LYCharSet_UC[cs_from],
 1176              cs_to, &LYCharSet_UC[cs_to]);
 1177     from_is_utf8 = (BOOL) (LYCharSet_UC[cs_from].enc == UCT_ENC_UTF8);
 1178     output_utf8 = T.output_utf8;
 1179     repl_translated_C0 = T.repl_translated_C0;
 1180     puni = p;
 1181     } else if (do_ent) {
 1182     output_utf8 = (BOOL) (LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8 ||
 1183                   HText_hasUTF8OutputSet(HTMainText));
 1184     repl_translated_C0 = (BOOL) (LYCharSet_UC[cs_to].enc == UCT_ENC_8BIT_C0);
 1185     }
 1186 
 1187     lowest_8 = LYlowest_eightbit[cs_to];
 1188 
 1189     /*
 1190      * Create a buffer string seven times the length of the original, so we
 1191      * have plenty of room for expansions.  - FM
 1192      */
 1193     len = strlen(p) + 16;
 1194     q = p;
 1195 
 1196     qs = q;
 1197 
 1198 /*  Create the HTChunk only if we need it */
 1199 #define CHUNK (chunk ? chunk : (chunk = HTChunkCreate2(128, len+1)))
 1200 
 1201 #define REPLACE_STRING(s) \
 1202         if (q != qs) HTChunkPutb(CHUNK, qs, (int) (q - qs)); \
 1203         HTChunkPuts(CHUNK, s); \
 1204         qs = q = *str
 1205 
 1206 #define REPLACE_CHAR(c) if (q > p) { \
 1207         HTChunkPutb(CHUNK, qs, (int) (q - qs)); \
 1208         qs = q = *str; \
 1209         *q++ = c; \
 1210         } else \
 1211         *q++ = c
 1212 
 1213     /*
 1214      * Loop through string, making conversions as needed.
 1215      *
 1216      * The while() checks for a non-'\0' char only for the normal text states
 1217      * since other states may temporarily modify p or *p (which should be
 1218      * restored before S_done!) - kw
 1219      */
 1220     while (*p || (state != S_text && state != S_nonascii_text)) {
 1221     switch (state) {
 1222     case S_text:
 1223         code = UCH(*p);
 1224 #ifdef KANJI_CODE_OVERRIDE
 1225         if (HTCJK == JAPANESE && last_kcode == SJIS) {
 1226         if (sjis_1st == '\0' && (IS_SJIS_HI1(code) || IS_SJIS_HI2(code))) {
 1227             sjis_1st = UCH(code);
 1228         } else if (sjis_1st && IS_SJIS_LO(code)) {
 1229             sjis_1st = '\0';
 1230         } else {
 1231             if (conv_jisx0201kana && 0xA1 <= code && code <= 0xDF) {
 1232             sjis_str[2] = '\0';
 1233             JISx0201TO0208_SJIS(UCH(code),
 1234                         sjis_str, sjis_str + 1);
 1235             REPLACE_STRING(sjis_str);
 1236             p++;
 1237             continue;
 1238             }
 1239         }
 1240         }
 1241 #endif
 1242         if (*p == '\033') {
 1243         if ((IS_CJK_TTY && !hidden) || stype != st_HTML) {
 1244             state = S_esc;
 1245             if (stype == st_URL) {
 1246             REPLACE_STRING("%1B");
 1247             p++;
 1248             continue;
 1249             } else if (stype != st_HTML) {
 1250             p++;
 1251             continue;
 1252             } else {
 1253             *q++ = *p++;
 1254             continue;
 1255             }
 1256         } else if (!hidden) {
 1257             /*
 1258              * CJK handling not on, and not a hidden INPUT, so block
 1259              * escape.  - FM
 1260              */
 1261             state = S_next_char;
 1262         } else {
 1263             state = S_trans_byte;
 1264         }
 1265         } else {
 1266         state = (do_ent ? S_check_ent : S_trans_byte);
 1267         }
 1268         break;
 1269 
 1270     case S_esc:
 1271         if (*p == '$') {
 1272         state = S_dollar;
 1273         *q++ = *p++;
 1274         continue;
 1275         } else if (*p == '(') {
 1276         state = S_paren;
 1277         *q++ = *p++;
 1278         continue;
 1279         } else {
 1280         state = S_text;
 1281         }
 1282         break;
 1283 
 1284     case S_dollar:
 1285         if (*p == '@' || *p == 'B' || *p == 'A') {
 1286         state = S_nonascii_text;
 1287         *q++ = *p++;
 1288         continue;
 1289         } else if (*p == '(') {
 1290         state = S_dollar_paren;
 1291         *q++ = *p++;
 1292         continue;
 1293         } else {
 1294         state = S_text;
 1295         }
 1296         break;
 1297 
 1298     case S_dollar_paren:
 1299         if (*p == 'C') {
 1300         state = S_nonascii_text;
 1301         *q++ = *p++;
 1302         continue;
 1303         } else {
 1304         state = S_text;
 1305         }
 1306         break;
 1307 
 1308     case S_paren:
 1309         if (*p == 'B' || *p == 'J' || *p == 'T') {
 1310         state = S_text;
 1311         *q++ = *p++;
 1312         continue;
 1313         } else if (*p == 'I') {
 1314         state = S_nonascii_text;
 1315         *q++ = *p++;
 1316         continue;
 1317         } else {
 1318         state = S_text;
 1319         }
 1320         break;
 1321 
 1322     case S_nonascii_text:
 1323         if (*p == '\033') {
 1324         if ((IS_CJK_TTY && !hidden) || stype != st_HTML) {
 1325             state = S_esc;
 1326             if (stype == st_URL) {
 1327             REPLACE_STRING("%1B");
 1328             p++;
 1329             continue;
 1330             } else if (stype != st_HTML) {
 1331             p++;
 1332             continue;
 1333             }
 1334         }
 1335         }
 1336         *q++ = *p++;
 1337         continue;
 1338 
 1339     case S_trans_byte:
 1340         /* character translation goes here */
 1341         /*
 1342          * Don't do anything if we have no string, or if original AND
 1343          * target character sets are both iso-8859-1, or if we are in CJK
 1344          * mode.
 1345          */
 1346         if (*p == '\0' || no_bytetrans) {
 1347         state = S_got_outchar;
 1348         break;
 1349         }
 1350 
 1351         if (Back) {
 1352         int rev_c;
 1353 
 1354         if ((*p) == HT_NON_BREAK_SPACE ||
 1355             (*p) == HT_EN_SPACE) {
 1356             if (plain_space) {
 1357             code = *p = ' ';
 1358             state = S_got_outchar;
 1359             break;
 1360             } else {
 1361             code = 160;
 1362             if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||
 1363                 (LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) {
 1364                 state = S_got_outchar;
 1365                 break;
 1366             } else if (!(LYCharSet_UC[cs_from].enc == UCT_ENC_8859
 1367                      || (LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {
 1368                 state = S_check_uni;
 1369                 break;
 1370             } else {
 1371                 *(unsigned char *) p = UCH(160);
 1372             }
 1373             }
 1374         } else if ((*p) == LY_SOFT_HYPHEN) {
 1375             code = 173;
 1376             if (LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||
 1377             (LYCharSet_UC[cs_to].like8859 & UCT_R_8859SPECL)) {
 1378             state = S_got_outchar;
 1379             break;
 1380             } else if (!(LYCharSet_UC[cs_from].enc == UCT_ENC_8859
 1381                  || (LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {
 1382             state = S_check_uni;
 1383             break;
 1384             } else {
 1385             *(unsigned char *) p = UCH(173);
 1386             }
 1387 #ifdef EXP_JAPANESEUTF8_SUPPORT
 1388         } else if (output_utf8) {
 1389             if ((!strcmp(LYCharSet_UC[cs_from].MIMEname, "euc-jp") &&
 1390              (IS_EUC((unsigned char) (*p),
 1391                  (unsigned char) (*(p + 1))))) ||
 1392             (!strcmp(LYCharSet_UC[cs_from].MIMEname, "shift_jis") &&
 1393              (IS_SJIS_2BYTE((unsigned char) (*p),
 1394                     (unsigned char) (*(p + 1)))))) {
 1395             code = UCTransJPToUni(p, 2, cs_from);
 1396             p++;
 1397             state = S_check_uni;
 1398             break;
 1399             }
 1400 #endif
 1401         } else if (code < 127 || T.transp) {
 1402             state = S_got_outchar;
 1403             break;
 1404         }
 1405         rev_c = UCReverseTransChar(*p, cs_to, cs_from);
 1406         if (rev_c > 127) {
 1407             *p = (char) rev_c;
 1408             code = rev_c;
 1409             state = S_got_outchar;
 1410             break;
 1411         }
 1412         } else if (code < 127) {
 1413         state = S_got_outchar;
 1414         break;
 1415         }
 1416 
 1417         if (from_is_utf8) {
 1418         if (((*p) & 0xc0) == 0xc0) {
 1419             const char *pq = p;
 1420 
 1421             puni = p;
 1422             code = UCGetUniFromUtf8String(&pq);
 1423             if (code <= 0) {
 1424             code = UCH(*p);
 1425             } else {
 1426             what = P_utf8;
 1427             puni += (pq - (const char *) p);
 1428             }
 1429         }
 1430         } else if (use_lynx_specials && !Back &&
 1431                (code == 160 || code == 173) &&
 1432                (LYCharSet_UC[cs_from].enc == UCT_ENC_8859 ||
 1433             (LYCharSet_UC[cs_from].like8859 & UCT_R_8859SPECL))) {
 1434         if (code == 160)
 1435             code = *p = HT_NON_BREAK_SPACE;
 1436         else if (code == 173)
 1437             code = *p = LY_SOFT_HYPHEN;
 1438         state = S_got_outchar;
 1439         break;
 1440         } else if (T.trans_to_uni) {
 1441         code = UCTransToUni(*p, cs_from);
 1442         if (code <= 0) {
 1443             /* What else can we do? */
 1444             code = UCH(*p);
 1445         }
 1446         } else if (!T.trans_from_uni) {
 1447         state = S_got_outchar;
 1448         break;
 1449         }
 1450         /*
 1451          * Substitute Lynx special character for 160 (nbsp) if
 1452          * use_lynx_specials is set.
 1453          */
 1454         if (use_lynx_specials && !Back &&
 1455         (code == 160 || code == 173)) {
 1456         code = ((code == 160 ? HT_NON_BREAK_SPACE : LY_SOFT_HYPHEN));
 1457         state = S_got_outchar;
 1458         break;
 1459         }
 1460 
 1461         state = S_check_uni;
 1462         break;
 1463 
 1464     case S_check_ent:
 1465         if (*p == '&') {
 1466         char *pp = p + 1;
 1467 
 1468         len = strlen(pp);
 1469         /*
 1470          * Check for a numeric entity.  - FM
 1471          */
 1472         if (*pp == '#' && len > 2 &&
 1473             (*(pp + 1) == 'x' || *(pp + 1) == 'X') &&
 1474             UCH(*(pp + 2)) < 127 &&
 1475             isxdigit(UCH(*(pp + 2)))) {
 1476             what = P_hex;
 1477             state = S_ncr;
 1478         } else if (*pp == '#' && len > 2 &&
 1479                UCH(*(pp + 1)) < 127 &&
 1480                isdigit(UCH(*(pp + 1)))) {
 1481             what = P_decimal;
 1482             state = S_ncr;
 1483         } else if (UCH(*pp) < 127 &&
 1484                isalpha(UCH(*pp))) {
 1485             what = P_named;
 1486             state = S_named;
 1487         } else {
 1488             state = S_trans_byte;
 1489         }
 1490         } else {
 1491         state = S_trans_byte;
 1492         }
 1493         break;
 1494 
 1495     case S_ncr:
 1496         if (what == P_hex) {
 1497         p += 3;
 1498         } else {        /* P_decimal */
 1499         p += 2;
 1500         }
 1501         cp = p;
 1502         while (*p && UCH(*p) < 127 &&
 1503            (what == P_hex ? isxdigit(UCH(*p)) :
 1504             isdigit(UCH(*p)))) {
 1505         p++;
 1506         }
 1507         /*
 1508          * Save the terminator and isolate the digit(s).  - FM
 1509          */
 1510         cpe = *p;
 1511         if (*p)
 1512         *p++ = '\0';
 1513         /*
 1514          * Show the numeric entity if the value:
 1515          * (1) Is greater than 255 and unhandled Unicode.
 1516          * (2) Is less than 32, and not valid and we don't have HTCJK set.
 1517          * (3) Is 127 and we don't have HTPassHighCtrlRaw or HTCJK set.
 1518          * (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set.
 1519          */
 1520         if (UCScanCode(&code, cp, (BOOL) (what == P_hex))) {
 1521         code = LYcp1252ToUnicode(code);
 1522         state = S_check_uni;
 1523         } else {
 1524         state = S_recover;
 1525         break;
 1526         }
 1527         break;
 1528 
 1529     case S_check_uni:
 1530         /*
 1531          * Show the numeric entity if the value:
 1532          * (2) Is less than 32, and not valid and we don't have HTCJK set.
 1533          * (3) Is 127 and we don't have HTPassHighCtrlRaw or HTCJK set.
 1534          * (4) Is 128 - 159 and we don't have HTPassHighCtrlNum set.
 1535          */
 1536         if ((code < 32 &&
 1537          code != 9 && code != 10 && code != 13 &&
 1538          !IS_CJK_TTY) ||
 1539         (code == 127 &&
 1540          !(HTPassHighCtrlRaw || IS_CJK_TTY)) ||
 1541         (code > 127 && code < 160 &&
 1542          !HTPassHighCtrlNum)) {
 1543         state = S_recover;
 1544         break;
 1545         }
 1546         /*
 1547          * Convert the value as an unsigned char, hex escaped if isURL is
 1548          * set and it's 8-bit, and then recycle the terminator if it is not
 1549          * a semicolon.  - FM
 1550          */
 1551         if (code > 159 && stype == st_URL) {
 1552         state = S_got_oututf8;
 1553         break;
 1554         }
 1555         /*
 1556          * For 160 (nbsp), use that value if it's a hidden INPUT, otherwise
 1557          * use an ASCII space (32) if plain_space is TRUE, otherwise use
 1558          * the Lynx special character.  - FM
 1559          */
 1560         if (code == 160) {
 1561         if (plain_space) {
 1562             code = ' ';
 1563             state = S_got_outchar;
 1564             break;
 1565         } else if (use_lynx_specials) {
 1566             code = HT_NON_BREAK_SPACE;
 1567             state = S_got_outchar;
 1568             break;
 1569         } else if ((hidden && !Back)
 1570                || (LYCharSet_UC[cs_to].codepoints & UCT_CP_SUPERSETOF_LAT1)
 1571                || LYCharSet_UC[cs_to].enc == UCT_ENC_8859
 1572                || (LYCharSet_UC[cs_to].like8859 &
 1573                    UCT_R_8859SPECL)) {
 1574             state = S_got_outchar;
 1575             break;
 1576         } else if (
 1577                   (LYCharSet_UC[cs_to].repertoire & UCT_REP_SUPERSETOF_LAT1)) {
 1578             ;       /* nothing, may be translated later */
 1579         } else {
 1580             code = ' ';
 1581             state = S_got_outchar;
 1582             break;
 1583         }
 1584         }
 1585         /*
 1586          * For 173 (shy), use that value if it's a hidden INPUT, otherwise
 1587          * ignore it if plain_space is TRUE, otherwise use the Lynx special
 1588          * character.  - FM
 1589          */
 1590         if (code == 173) {
 1591         if (plain_space) {
 1592             replace_buf[0] = '\0';
 1593             state = S_got_outstring;
 1594             break;
 1595         } else if (Back &&
 1596                !(LYCharSet_UC[cs_to].enc == UCT_ENC_8859 ||
 1597                  (LYCharSet_UC[cs_to].like8859 &
 1598                   UCT_R_8859SPECL))) {
 1599             ;       /* nothing, may be translated later */
 1600         } else if (hidden || Back) {
 1601             state = S_got_outchar;
 1602             break;
 1603         } else if (use_lynx_specials) {
 1604             code = LY_SOFT_HYPHEN;
 1605             state = S_got_outchar;
 1606             break;
 1607         }
 1608         }
 1609         /*
 1610          * Seek a translation from the chartrans tables.
 1611          */
 1612         if ((uck = UCTransUniChar(code,
 1613                       cs_to)) >= 32 &&
 1614         uck < 256 &&
 1615         (uck < 127 || uck >= lowest_8)) {
 1616         code = uck;
 1617         state = S_got_outchar;
 1618         break;
 1619         } else if ((uck == -4 ||
 1620             (repl_translated_C0 &&
 1621              uck > 0 && uck < 32)) &&
 1622         /*
 1623          * Not found; look for replacement string.
 1624          */
 1625                UCTransUniCharStr(replace_buf,
 1626                      60, code,
 1627                      cs_to,
 1628                      0) >= 0) {
 1629         state = S_got_outstring;
 1630         break;
 1631         }
 1632         if (output_utf8 &&
 1633         code > 127 && code < 0x7fffffffL) {
 1634         state = S_got_oututf8;
 1635         break;
 1636         }
 1637         /*
 1638          * For 8194 (ensp), 8195 (emsp), or 8201 (thinsp), use the
 1639          * character reference if it's a hidden INPUT, otherwise use an
 1640          * ASCII space (32) if plain_space is TRUE, otherwise use the Lynx
 1641          * special character.  - FM
 1642          */
 1643         if (code == 8194 || code == 8195 || code == 8201) {
 1644         if (hidden) {
 1645             state = S_recover;
 1646         } else if (plain_space) {
 1647             code = ' ';
 1648             state = S_got_outchar;
 1649         } else {
 1650             code = HT_EN_SPACE;
 1651             state = S_got_outchar;
 1652         }
 1653         break;
 1654         /*
 1655          * Ignore 8204 (zwnj), 8205 (zwj) 8206 (lrm), and 8207 (rlm),
 1656          * for now, if we got this far without finding a representation
 1657          * for them.
 1658          */
 1659         } else if (code == 8204 || code == 8205 ||
 1660                code == 8206 || code == 8207) {
 1661         CTRACE((tfp, "LYUCFullyTranslateString: Ignoring '%"
 1662             PRI_UCode_t "'.\n", code));
 1663         replace_buf[0] = '\0';
 1664         state = S_got_outstring;
 1665         break;
 1666         /*
 1667          * Show the numeric entity if the value:  (1) Is greater than
 1668          * 255 and unhandled Unicode.
 1669          */
 1670         } else if (code > 255) {
 1671         /*
 1672          * Illegal or not yet handled value.  Return "&#" verbatim and
 1673          * continue from there.  - FM
 1674          */
 1675         state = S_recover;
 1676         break;
 1677         /*
 1678          * If it's ASCII, or is 8-bit but HTPassEightBitNum is set or
 1679          * the character set is "ISO Latin 1", use it's value.  - FM
 1680          */
 1681         } else if (code < 161 ||
 1682                (code < 256 &&
 1683             (HTPassEightBitNum || cs_to == LATIN1))) {
 1684         /*
 1685          * No conversion needed.
 1686          */
 1687         state = S_got_outchar;
 1688         break;
 1689 
 1690         /* The following disabled section doesn't make sense any more. 
 1691          * It used to make sense in the past, when S_check_named would
 1692          * look in "old style" tables in addition to what it does now. 
 1693          * Disabling of going to S_check_name here prevents endless
 1694          * looping between S_check_uni and S_check_names states, which
 1695          * could occur here for Latin 1 codes for some cs_to if they
 1696          * had no translation in that cs_to.  Normally all cs_to
 1697          * *should* now have valid translations via UCTransUniChar or
 1698          * UCTransUniCharStr for all Latin 1 codes, so that we would
 1699          * not get here anyway, and no loop could occur.  Still, if we
 1700          * *do* get here, FALL THROUGH to case S_recover now.  - kw
 1701          */
 1702 #if 0
 1703         /*
 1704          * If we get to here, convert and handle the character as a
 1705          * named entity.  - FM
 1706          */
 1707         } else {
 1708         name = HTMLGetEntityName(code - 160);
 1709         state = S_check_name;
 1710         break;
 1711 #endif
 1712         }
 1713         /* FALLTHRU */
 1714 
 1715     case S_recover:
 1716         if (what == P_decimal || what == P_hex) {
 1717         /*
 1718          * Illegal or not yet handled value.  Return "&#" verbatim and
 1719          * continue from there.  - FM
 1720          */
 1721         *q++ = '&';
 1722         *q++ = '#';
 1723         if (what == P_hex)
 1724             *q++ = 'x';
 1725         if (cpe != '\0')
 1726             *(p - 1) = cpe;
 1727         p = cp;
 1728         state = S_done;
 1729         } else if (what == P_named) {
 1730         *cp = cpe;
 1731         *q++ = '&';
 1732         state = S_done;
 1733         } else if (!T.output_utf8 && stype == st_HTML && !hidden &&
 1734                !(HTPassEightBitRaw &&
 1735              UCH(*p) >= lowest_8)) {
 1736         sprintf(replace_buf, "U%.2" PRI_UCode_t "", code);
 1737 
 1738         state = S_got_outstring;
 1739         } else {
 1740         puni = p;
 1741         code = UCH(*p);
 1742         state = S_got_outchar;
 1743         }
 1744         break;
 1745 
 1746     case S_named:
 1747         cp = ++p;
 1748         while (*cp && UCH(*cp) < 127 &&
 1749            isalnum(UCH(*cp)))
 1750         cp++;
 1751         cpe = *cp;
 1752         *cp = '\0';
 1753         name = p;
 1754         state = S_check_name;
 1755         break;
 1756 
 1757     case S_check_name:
 1758         /*
 1759          * Seek the Unicode value for the named entity.
 1760          *
 1761          * !!!!  We manually recover the case of '=' terminator which is
 1762          * commonly found on query to CGI-scripts enclosed as href= URLs
 1763          * like "somepath/?x=1&yz=2" Without this dirty fix, submission of
 1764          * such URLs was broken if &yz string happened to be a recognized
 1765          * entity name.  - LP
 1766          */
 1767         if (((code = HTMLGetEntityUCValue(name)) > 0) &&
 1768         !((cpe == '=') && (stype == st_URL))) {
 1769         state = S_check_uni;
 1770         break;
 1771         }
 1772         /*
 1773          * Didn't find the entity.  Return verbatim.
 1774          */
 1775         state = S_recover;
 1776         break;
 1777 
 1778         /* * * O U T P U T   S T A T E S * * */
 1779 
 1780     case S_got_oututf8:
 1781         if (code > 255 ||
 1782         (code >= 128 && LYCharSet_UC[cs_to].enc == UCT_ENC_UTF8)) {
 1783         UCConvertUniToUtf8(code, replace_buf);
 1784         state = S_got_outstring;
 1785         } else {
 1786         state = S_got_outchar;
 1787         }
 1788         break;
 1789     case S_got_outstring:
 1790         if (what == P_decimal || what == P_hex) {
 1791         if (cpe != ';' && cpe != '\0')
 1792             *(--p) = cpe;
 1793         p--;
 1794         } else if (what == P_named) {
 1795         *cp = cpe;
 1796         p = (*cp != ';') ? (cp - 1) : cp;
 1797         } else if (what == P_utf8) {
 1798         p = puni;
 1799         }
 1800         if (replace_buf[0] == '\0') {
 1801         state = S_next_char;
 1802         break;
 1803         }
 1804         if (stype == st_URL) {
 1805         code = replace_buf[0];  /* assume string OK if first char is */
 1806         if (code >= 127 ||
 1807             (code < 32 && (code != 9 && code != 10 && code != 0))) {
 1808             state = S_put_urlstring;
 1809             break;
 1810         }
 1811         }
 1812         REPLACE_STRING(replace_buf);
 1813         state = S_next_char;
 1814         break;
 1815     case S_put_urlstring:
 1816         esc = HTEscape(replace_buf, URL_XALPHAS);
 1817         REPLACE_STRING(esc);
 1818         FREE(esc);
 1819         state = S_next_char;
 1820         break;
 1821     case S_got_outchar:
 1822         if (what == P_decimal || what == P_hex) {
 1823         if (cpe != ';' && cpe != '\0')
 1824             *(--p) = cpe;
 1825         p--;
 1826         } else if (what == P_named) {
 1827         *cp = cpe;
 1828         p = (*cp != ';') ? (cp - 1) : cp;
 1829         } else if (what == P_utf8) {
 1830         p = puni;
 1831         }
 1832         if (stype == st_URL &&
 1833         /*  Not a full HTEscape, only for 8bit and ctrl chars */
 1834         (TOASCII(code) >= 127 ||    /* S/390 -- gil -- 1925 */
 1835          (code < ' ' && (code != '\t' && code != '\n')))) {
 1836         state = S_put_urlchar;
 1837         break;
 1838         } else if (!hidden && code == 10 && *p == 10
 1839                && q != qs && *(q - 1) == 13) {
 1840         /*
 1841          * If this is not a hidden string, and the current char is the
 1842          * LF ('\n') of a CRLF pair, drop the CR ('\r').  - KW
 1843          */
 1844         *(q - 1) = *p++;
 1845         state = S_done;
 1846         break;
 1847         }
 1848         *q++ = (char) code;
 1849         state = S_next_char;
 1850         break;
 1851     case S_put_urlchar:
 1852         *q++ = '%';
 1853         REPLACE_CHAR(hex[(TOASCII(code) >> 4) & 15]);   /* S/390 -- gil -- 1944 */
 1854         REPLACE_CHAR(hex[(TOASCII(code) & 15)]);
 1855         /* fall through */
 1856     case S_next_char:
 1857         p++;        /* fall through */
 1858     case S_done:
 1859         state = S_text;
 1860         what = P_text;
 1861         /* for next round */
 1862     }
 1863     }
 1864 
 1865     *q = '\0';
 1866     if (chunk) {
 1867     HTChunkPutb(CHUNK, qs, (int) (q - qs + 1)); /* also terminates */
 1868     if (stype == st_URL || stype == st_other) {
 1869         LYTrimHead(chunk->data);
 1870         LYTrimTail(chunk->data);
 1871     }
 1872     StrAllocCopy(*str, chunk->data);
 1873     HTChunkFree(chunk);
 1874     } else {
 1875     if (stype == st_URL || stype == st_other) {
 1876         LYTrimHead(qs);
 1877         LYTrimTail(qs);
 1878     }
 1879     }
 1880     return str;
 1881 }
 1882 
 1883 #undef REPLACE_CHAR
 1884 #undef REPLACE_STRING
 1885 
 1886 BOOL LYUCTranslateHTMLString(char **str,
 1887                  int cs_from,
 1888                  int cs_to,
 1889                  int use_lynx_specials,
 1890                  int plain_space,
 1891                  int hidden,
 1892                  CharUtil_st stype)
 1893 {
 1894     BOOL ret = YES;
 1895 
 1896     /* May reallocate *str even if cs_to == 0 */
 1897     if (!LYUCFullyTranslateString(str, cs_from, cs_to, TRUE,
 1898                   use_lynx_specials, plain_space, hidden,
 1899                   NO, stype)) {
 1900     ret = NO;
 1901     }
 1902     return ret;
 1903 }
 1904 
 1905 BOOL LYUCTranslateBackFormData(char **str,
 1906                    int cs_from,
 1907                    int cs_to,
 1908                    int plain_space)
 1909 {
 1910     char **ret;
 1911 
 1912     /* May reallocate *str */
 1913     ret = (LYUCFullyTranslateString(str, cs_from, cs_to, FALSE,
 1914                     NO, plain_space, YES,
 1915                     YES, st_HTML));
 1916     return (BOOL) (ret != NULL);
 1917 }
 1918 
 1919 /*
 1920  * Parse a parameter from an HTML META tag, i.e., the CONTENT.
 1921  */
 1922 char *LYParseTagParam(char *from,
 1923               const char *name)
 1924 {
 1925     size_t len = strlen(name);
 1926     char *result = NULL;
 1927     char *string = from;
 1928 
 1929     do {
 1930     if ((string = StrChr(string, ';')) == NULL)
 1931         return NULL;
 1932     while (*string != '\0' && (*string == ';' || isspace(UCH(*string)))) {
 1933         string++;
 1934     }
 1935     if (strlen(string) < len)
 1936         return NULL;
 1937     } while (strncasecomp(string, name, (int) len) != 0);
 1938     string += len;
 1939     while (*string != '\0' && (isspace(UCH(*string)) || *string == '=')) {
 1940     string++;
 1941     }
 1942 
 1943     StrAllocCopy(result, string);
 1944     len = 0;
 1945     while (isprint(UCH(string[len])) && !isspace(UCH(string[len]))) {
 1946     len++;
 1947     }
 1948     result[len] = '\0';
 1949 
 1950     /*
 1951      * Strip single quotes, just in case.
 1952      */
 1953     if (len > 2 && result[0] == '\'' && result[len - 1] == result[0]) {
 1954     result[len - 1] = '\0';
 1955     for (string = result; (string[0] = string[1]) != '\0'; ++string) ;
 1956     }
 1957     return result;
 1958 }
 1959 
 1960 /*
 1961  * Given a refresh-URL content string, parses the delay time and the URL
 1962  * string.  Ignore the remainder of the content.
 1963  */
 1964 void LYParseRefreshURL(char *content,
 1965                char **p_seconds,
 1966                char **p_address)
 1967 {
 1968     char *cp;
 1969     char *cp1 = NULL;
 1970     char *Seconds = NULL;
 1971 
 1972     /*
 1973      * Look for the Seconds field.  - FM
 1974      */
 1975     cp = LYSkipBlanks(content);
 1976     if (*cp && isdigit(UCH(*cp))) {
 1977     cp1 = cp;
 1978     while (*cp1 && isdigit(UCH(*cp1)))
 1979         cp1++;
 1980     StrnAllocCopy(Seconds, cp, (size_t) (cp1 - cp));
 1981     }
 1982     *p_seconds = Seconds;
 1983     *p_address = LYParseTagParam(content, "URL");
 1984 
 1985     CTRACE((tfp,
 1986         "LYParseRefreshURL\n\tcontent: %s\n\tseconds: %s\n\taddress: %s\n",
 1987         content, NonNull(*p_seconds), NonNull(*p_address)));
 1988 }
 1989 
 1990 /*
 1991  *  This function processes META tags in HTML streams. - FM
 1992  */
 1993 void LYHandleMETA(HTStructured * me, const BOOL *present,
 1994           STRING2PTR value,
 1995           char **include GCC_UNUSED)
 1996 {
 1997     char *http_equiv = NULL, *name = NULL, *content = NULL, *charset = NULL;
 1998     char *href = NULL, *id_string = NULL, *temp = NULL;
 1999     char *cp, *cp0, *cp1 = NULL;
 2000     int url_type = 0;
 2001 
 2002     if (!me || !present)
 2003     return;
 2004 
 2005     /*
 2006      * Load the attributes for possible use by Lynx.  - FM
 2007      */
 2008     if (present[HTML_META_HTTP_EQUIV] &&
 2009     non_empty(value[HTML_META_HTTP_EQUIV])) {
 2010     StrAllocCopy(http_equiv, value[HTML_META_HTTP_EQUIV]);
 2011     convert_to_spaces(http_equiv, TRUE);
 2012     LYUCTranslateHTMLString(&http_equiv, me->tag_charset, me->tag_charset,
 2013                 NO, NO, YES, st_other);
 2014     if (*http_equiv == '\0') {
 2015         FREE(http_equiv);
 2016     }
 2017     }
 2018     if (present[HTML_META_NAME] &&
 2019     non_empty(value[HTML_META_NAME])) {
 2020     StrAllocCopy(name, value[HTML_META_NAME]);
 2021     convert_to_spaces(name, TRUE);
 2022     LYUCTranslateHTMLString(&name, me->tag_charset, me->tag_charset,
 2023                 NO, NO, YES, st_other);
 2024     if (*name == '\0') {
 2025         FREE(name);
 2026     }
 2027     }
 2028     if (present[HTML_META_CONTENT] &&
 2029     non_empty(value[HTML_META_CONTENT])) {
 2030     /*
 2031      * Technically, we should be creating a comma-separated list, but META
 2032      * tags come one at a time, and we'll handle (or ignore) them as each
 2033      * is received.  Also, at this point, we only trim leading and trailing
 2034      * blanks from the CONTENT value, without translating any named
 2035      * entities or numeric character references, because how we should do
 2036      * that depends on what type of information it contains, and whether or
 2037      * not any of it might be sent to the screen.  - FM
 2038      */
 2039     StrAllocCopy(content, value[HTML_META_CONTENT]);
 2040     convert_to_spaces(content, FALSE);
 2041     LYTrimHead(content);
 2042     LYTrimTail(content);
 2043     if (*content == '\0') {
 2044         FREE(content);
 2045     }
 2046     }
 2047     if (present[HTML_META_CHARSET] &&
 2048     non_empty(value[HTML_META_CHARSET])) {
 2049     StrAllocCopy(charset, value[HTML_META_CHARSET]);
 2050     convert_to_spaces(charset, TRUE);
 2051     LYUCTranslateHTMLString(&charset, me->tag_charset, me->tag_charset,
 2052                 NO, NO, YES, st_other);
 2053     if (*charset == '\0') {
 2054         FREE(charset);
 2055     }
 2056     }
 2057     CTRACE((tfp,
 2058         "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\" CHARSET=\"%s\"\n",
 2059         NONNULL(http_equiv),
 2060         NONNULL(name),
 2061         NONNULL(content),
 2062         NONNULL(charset)));
 2063 
 2064     /*
 2065      * Check for a text/html Content-Type with a charset directive, if we
 2066      * didn't already set the charset via a server's header.  - AAC & FM
 2067      */
 2068     if (isEmpty(me->node_anchor->charset) &&
 2069     (charset ||
 2070      (!strcasecomp(NonNull(http_equiv), "Content-Type") && content))) {
 2071     LYUCcharset *p_in = NULL;
 2072     LYUCcharset *p_out = NULL;
 2073 
 2074     if (charset) {
 2075         LYLowerCase(charset);
 2076     } else {
 2077         LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
 2078                     NO, NO, YES, st_other);
 2079         LYLowerCase(content);
 2080     }
 2081 
 2082     if ((cp1 = charset) != NULL ||
 2083         (cp1 = strstr(content, "charset")) != NULL) {
 2084         BOOL chartrans_ok = NO;
 2085         char *cp3 = NULL, *cp4;
 2086         int chndl;
 2087 
 2088         if (!charset)
 2089         cp1 += 7;
 2090         while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"')
 2091         cp1++;
 2092 
 2093         StrAllocCopy(cp3, cp1); /* copy to mutilate more */
 2094         for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' &&
 2095                  *cp4 != ';' && *cp4 != ':' &&
 2096                  !WHITE(*cp4)); cp4++) {
 2097         ;       /* do nothing */
 2098         }
 2099         *cp4 = '\0';
 2100         cp4 = cp3;
 2101         chndl = UCGetLYhndl_byMIME(cp3);
 2102 
 2103 #ifdef CAN_SWITCH_DISPLAY_CHARSET
 2104         /* Allow a switch to a more suitable display charset */
 2105         if (Switch_Display_Charset(chndl, SWITCH_DISPLAY_CHARSET_MAYBE)) {
 2106         /* UCT_STAGE_STRUCTURED and UCT_STAGE_HTEXT
 2107            should have the same setting for UCInfoStage. */
 2108         HTAnchor_getUCInfoStage(me->node_anchor, UCT_STAGE_STRUCTURED);
 2109 
 2110         me->outUCLYhndl = current_char_set;
 2111         HTAnchor_setUCInfoStage(me->node_anchor,
 2112                     current_char_set,
 2113                     UCT_STAGE_HTEXT,
 2114                     UCT_SETBY_MIME);    /* highest priorty! */
 2115         HTAnchor_setUCInfoStage(me->node_anchor,
 2116                     current_char_set,
 2117                     UCT_STAGE_STRUCTURED,
 2118                     UCT_SETBY_MIME);    /* highest priorty! */
 2119         me->outUCI = HTAnchor_getUCInfoStage(me->node_anchor,
 2120                              UCT_STAGE_HTEXT);
 2121         /* The SGML stage will be reset in change_chartrans_handling */
 2122         }
 2123 #endif
 2124 
 2125         if (UCCanTranslateFromTo(chndl, current_char_set)) {
 2126         chartrans_ok = YES;
 2127         StrAllocCopy(me->node_anchor->charset, cp4);
 2128         HTAnchor_setUCInfoStage(me->node_anchor, chndl,
 2129                     UCT_STAGE_PARSER,
 2130                     UCT_SETBY_STRUCTURED);
 2131         } else if (chndl < 0) {
 2132         /*
 2133          * Got something but we don't recognize it.
 2134          */
 2135         chndl = UCLYhndl_for_unrec;
 2136         if (chndl < 0)  /* UCLYhndl_for_unrec not defined :-( */
 2137             chndl = UCLYhndl_for_unspec;    /* always >= 0 */
 2138         if (UCCanTranslateFromTo(chndl, current_char_set)) {
 2139             chartrans_ok = YES;
 2140             HTAnchor_setUCInfoStage(me->node_anchor, chndl,
 2141                         UCT_STAGE_PARSER,
 2142                         UCT_SETBY_STRUCTURED);
 2143         }
 2144         }
 2145         if (chartrans_ok) {
 2146         p_in = HTAnchor_getUCInfoStage(me->node_anchor,
 2147                            UCT_STAGE_PARSER);
 2148         p_out = HTAnchor_setUCInfoStage(me->node_anchor,
 2149                         current_char_set,
 2150                         UCT_STAGE_HTEXT,
 2151                         UCT_SETBY_DEFAULT);
 2152         if (!p_out) {
 2153             /*
 2154              * Try again.
 2155              */
 2156             p_out = HTAnchor_getUCInfoStage(me->node_anchor,
 2157                             UCT_STAGE_HTEXT);
 2158         }
 2159         if (!strcmp(p_in->MIMEname, "x-transparent")) {
 2160             HTPassEightBitRaw = TRUE;
 2161             HTAnchor_setUCInfoStage(me->node_anchor,
 2162                         HTAnchor_getUCLYhndl(me->node_anchor,
 2163                                  UCT_STAGE_HTEXT),
 2164                         UCT_STAGE_PARSER,
 2165                         UCT_SETBY_DEFAULT);
 2166         }
 2167         if (!strcmp(p_out->MIMEname, "x-transparent")) {
 2168             HTPassEightBitRaw = TRUE;
 2169             HTAnchor_setUCInfoStage(me->node_anchor,
 2170                         HTAnchor_getUCLYhndl(me->node_anchor,
 2171                                  UCT_STAGE_PARSER),
 2172                         UCT_STAGE_HTEXT,
 2173                         UCT_SETBY_DEFAULT);
 2174         }
 2175         if ((p_in->enc != UCT_ENC_CJK)
 2176 #ifdef EXP_JAPANESEUTF8_SUPPORT
 2177             && (p_in->enc != UCT_ENC_UTF8)
 2178 #endif
 2179             ) {
 2180             HTCJK = NOCJK;
 2181             if (!(p_in->codepoints &
 2182               UCT_CP_SUBSETOF_LAT1) &&
 2183             chndl == current_char_set) {
 2184             HTPassEightBitRaw = TRUE;
 2185             }
 2186         } else if (p_out->enc == UCT_ENC_CJK) {
 2187             Set_HTCJK(p_in->MIMEname, p_out->MIMEname);
 2188         }
 2189         LYGetChartransInfo(me);
 2190         /*
 2191          * Update the chartrans info homologously to a Content-Type
 2192          * MIME header with a charset parameter.  - FM
 2193          */
 2194         if (me->UCLYhndl != chndl) {
 2195             HTAnchor_setUCInfoStage(me->node_anchor, chndl,
 2196                         UCT_STAGE_MIME,
 2197                         UCT_SETBY_STRUCTURED);
 2198             HTAnchor_setUCInfoStage(me->node_anchor, chndl,
 2199                         UCT_STAGE_PARSER,
 2200                         UCT_SETBY_STRUCTURED);
 2201             me->inUCLYhndl = HTAnchor_getUCLYhndl(me->node_anchor,
 2202                               UCT_STAGE_PARSER);
 2203             me->inUCI = HTAnchor_getUCInfoStage(me->node_anchor,
 2204                             UCT_STAGE_PARSER);
 2205         }
 2206         UCSetTransParams(&me->T,
 2207                  me->inUCLYhndl, me->inUCI,
 2208                  me->outUCLYhndl, me->outUCI);
 2209         } else {
 2210         /*
 2211          * Cannot translate.  If according to some heuristic the given
 2212          * charset and the current display character both are likely to
 2213          * be like ISO-8859 in structure, pretend we have some kind of
 2214          * match.
 2215          */
 2216         BOOL given_is_8859 = (BOOL) (!StrNCmp(cp4, "iso-8859-", 9) &&
 2217                          isdigit(UCH(cp4[9])));
 2218         BOOL given_is_8859like = (BOOL) (given_is_8859
 2219                          || !StrNCmp(cp4, "windows-", 8)
 2220                          || !StrNCmp(cp4, "cp12", 4)
 2221                          || !StrNCmp(cp4, "cp-12", 5));
 2222         BOOL given_and_display_8859like = (BOOL) (given_is_8859like &&
 2223                               (strstr(LYchar_set_names[current_char_set],
 2224                                   "ISO-8859") ||
 2225                                strstr(LYchar_set_names[current_char_set],
 2226                                   "windows-")));
 2227 
 2228         if (given_is_8859) {
 2229             cp1 = &cp4[10];
 2230             while (*cp1 &&
 2231                isdigit(UCH((*cp1))))
 2232             cp1++;
 2233             *cp1 = '\0';
 2234         }
 2235         if (given_and_display_8859like) {
 2236             StrAllocCopy(me->node_anchor->charset, cp4);
 2237             HTPassEightBitRaw = TRUE;
 2238         }
 2239         HTAlert(*cp4 ? cp4 : me->node_anchor->charset);
 2240 
 2241         }
 2242         FREE(cp3);
 2243 
 2244         if (me->node_anchor->charset) {
 2245         CTRACE((tfp,
 2246             "LYHandleMETA: New charset: %s\n",
 2247             me->node_anchor->charset));
 2248         }
 2249     }
 2250     /*
 2251      * Set the kcode element based on the charset.  - FM
 2252      */
 2253     HText_setKcode(me->text, me->node_anchor->charset, p_in);
 2254     }
 2255 
 2256     /*
 2257      * Make sure we have META name/value pairs to handle.  - FM
 2258      */
 2259     if (!(http_equiv || name) || !content)
 2260     goto free_META_copies;
 2261 
 2262     /*
 2263      * Check for a no-cache Pragma
 2264      * or Cache-Control directive. - FM
 2265      */
 2266     if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
 2267     !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
 2268     LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
 2269                 NO, NO, YES, st_other);
 2270     if (!strcasecomp(content, "no-cache")) {
 2271         me->node_anchor->no_cache = TRUE;
 2272         HText_setNoCache(me->text);
 2273     }
 2274 
 2275     /*
 2276      * If we didn't get a Cache-Control MIME header, and the META has one,
 2277      * convert to lowercase, store it in the anchor element, and if we
 2278      * haven't yet set no_cache, check whether we should.  - FM
 2279      */
 2280     if ((!me->node_anchor->cache_control) &&
 2281         !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
 2282         LYLowerCase(content);
 2283         StrAllocCopy(me->node_anchor->cache_control, content);
 2284         if (me->node_anchor->no_cache == FALSE) {
 2285         cp0 = content;
 2286         while ((cp = strstr(cp0, "no-cache")) != NULL) {
 2287             cp += 8;
 2288             while (*cp != '\0' && WHITE(*cp))
 2289             cp++;
 2290             if (*cp == '\0' || *cp == ';') {
 2291             me->node_anchor->no_cache = TRUE;
 2292             HText_setNoCache(me->text);
 2293             break;
 2294             }
 2295             cp0 = cp;
 2296         }
 2297         if (me->node_anchor->no_cache == TRUE)
 2298             goto free_META_copies;
 2299         cp0 = content;
 2300         while ((cp = strstr(cp0, "max-age")) != NULL) {
 2301             cp += 7;
 2302             while (*cp != '\0' && WHITE(*cp))
 2303             cp++;
 2304             if (*cp == '=') {
 2305             cp++;
 2306             while (*cp != '\0' && WHITE(*cp))
 2307                 cp++;
 2308             if (isdigit(UCH(*cp))) {
 2309                 cp0 = cp;
 2310                 while (isdigit(UCH(*cp)))
 2311                 cp++;
 2312                 if (*cp0 == '0' && cp == (cp0 + 1)) {
 2313                 me->node_anchor->no_cache = TRUE;
 2314                 HText_setNoCache(me->text);
 2315                 break;
 2316                 }
 2317             }
 2318             }
 2319             cp0 = cp;
 2320         }
 2321         }
 2322     }
 2323 
 2324     /*
 2325      * Check for an Expires directive. - FM
 2326      */
 2327     } else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
 2328     /*
 2329      * If we didn't get an Expires MIME header, store it in the anchor
 2330      * element, and if we haven't yet set no_cache, check whether we
 2331      * should.  Note that we don't accept a Date header via META tags,
 2332      * because it's likely to be untrustworthy, but do check for a Date
 2333      * header from a server when making the comparison.  - FM
 2334      */
 2335     LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
 2336                 NO, NO, YES, st_other);
 2337     StrAllocCopy(me->node_anchor->expires, content);
 2338     if (me->node_anchor->no_cache == FALSE) {
 2339         if (!strcmp(content, "0")) {
 2340         /*
 2341          * The value is zero, which we treat as an absolute no-cache
 2342          * directive.  - FM
 2343          */
 2344         me->node_anchor->no_cache = TRUE;
 2345         HText_setNoCache(me->text);
 2346         } else if (me->node_anchor->date != NULL) {
 2347         /*
 2348          * We have a Date header, so check if the value is less than or
 2349          * equal to that.  - FM
 2350          */
 2351         if (LYmktime(content, TRUE) <=
 2352             LYmktime(me->node_anchor->date, TRUE)) {
 2353             me->node_anchor->no_cache = TRUE;
 2354             HText_setNoCache(me->text);
 2355         }
 2356         } else if (LYmktime(content, FALSE) == 0) {
 2357         /*
 2358          * We don't have a Date header, and the value is in past for
 2359          * us.  - FM
 2360          */
 2361         me->node_anchor->no_cache = TRUE;
 2362         HText_setNoCache(me->text);
 2363         }
 2364     }
 2365 
 2366     /*
 2367      * Check for a Refresh directive.  - FM
 2368      */
 2369     } else if (!strcasecomp(NonNull(http_equiv), "Refresh")) {
 2370     char *Seconds = NULL;
 2371 
 2372     LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
 2373                 NO, NO, YES, st_other);
 2374     LYParseRefreshURL(content, &Seconds, &href);
 2375 
 2376     if (Seconds) {
 2377         if (href) {
 2378         /*
 2379          * We found a URL field, so check it out.  - FM
 2380          */
 2381         if (!LYLegitimizeHREF(me, &href, TRUE, FALSE)) {
 2382             /*
 2383              * The specs require a complete URL, but this is a
 2384              * Netscapism, so don't expect the author to know that.  -
 2385              * FM
 2386              */
 2387             HTUserMsg(REFRESH_URL_NOT_ABSOLUTE);
 2388             /*
 2389              * Use the document's address as the base.  - FM
 2390              */
 2391             if (*href != '\0') {
 2392             temp = HTParse(href,
 2393                        me->node_anchor->address, PARSE_ALL);
 2394             StrAllocCopy(href, temp);
 2395             FREE(temp);
 2396             } else {
 2397             StrAllocCopy(href, me->node_anchor->address);
 2398             HText_setNoCache(me->text);
 2399             }
 2400 
 2401         } else {
 2402             /*
 2403              * Check whether to fill in localhost.  - FM
 2404              */
 2405             LYFillLocalFileURL(&href,
 2406                        (me->inBASE ?
 2407                     me->base_href : me->node_anchor->address));
 2408         }
 2409 
 2410         /*
 2411          * Set the no_cache flag if the Refresh URL is the same as the
 2412          * document's address.  - FM
 2413          */
 2414         if (!strcmp(href, me->node_anchor->address)) {
 2415             HText_setNoCache(me->text);
 2416         }
 2417         } else {
 2418         /*
 2419          * We didn't find a URL field, so use the document's own
 2420          * address and set the no_cache flag.  - FM
 2421          */
 2422         StrAllocCopy(href, me->node_anchor->address);
 2423         HText_setNoCache(me->text);
 2424         }
 2425         /*
 2426          * Check for an anchor in http or https URLs.  - FM
 2427          */
 2428         cp = NULL;
 2429         /* id_string seems to be used wrong below if given.
 2430            not that it matters much.  avoid setting it here. - kw */
 2431         if (track_internal_links &&
 2432         (StrNCmp(href, "http", 4) == 0) &&
 2433         (cp = StrChr(href, '#')) != NULL) {
 2434         StrAllocCopy(id_string, cp);
 2435         *cp = '\0';
 2436         }
 2437         if (me->inA) {
 2438         /*
 2439          * Ugh!  The META tag, which is a HEAD element, is in an
 2440          * Anchor, which is BODY element.  All we can do is close the
 2441          * Anchor and cross our fingers.  - FM
 2442          */
 2443         if (me->inBoldA == TRUE && me->inBoldH == FALSE)
 2444             HText_appendCharacter(me->text, LY_BOLD_END_CHAR);
 2445         me->inBoldA = FALSE;
 2446         HText_endAnchor(me->text, me->CurrentANum);
 2447         me->inA = FALSE;
 2448         me->CurrentANum = 0;
 2449         }
 2450         me->CurrentA = HTAnchor_findChildAndLink
 2451         (
 2452             me->node_anchor,    /* Parent */
 2453             id_string,  /* Tag */
 2454             href,   /* Addresss */
 2455             (HTLinkType *) 0);  /* Type */
 2456         if (id_string)
 2457         *cp = '#';
 2458         FREE(id_string);
 2459         LYEnsureSingleSpace(me);
 2460         if (me->inUnderline == FALSE)
 2461         HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR);
 2462         HTML_put_string(me, "REFRESH(");
 2463         HTML_put_string(me, Seconds);
 2464         HTML_put_string(me, " sec):");
 2465         FREE(Seconds);
 2466         if (me->inUnderline == FALSE)
 2467         HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR);
 2468         HTML_put_character(me, ' ');
 2469         me->in_word = NO;
 2470         HText_beginAnchor(me->text, me->inUnderline, me->CurrentA);
 2471         if (me->inBoldH == FALSE)
 2472         HText_appendCharacter(me->text, LY_BOLD_START_CHAR);
 2473         HTML_put_string(me, href);
 2474         FREE(href);
 2475         if (me->inBoldH == FALSE)
 2476         HText_appendCharacter(me->text, LY_BOLD_END_CHAR);
 2477         HText_endAnchor(me->text, 0);
 2478         LYEnsureSingleSpace(me);
 2479     }
 2480 
 2481     /*
 2482      * Check for a suggested filename via a Content-Disposition with a
 2483      * filename=name.suffix in it, if we don't already have it via a server
 2484      * header.  - FM
 2485      */
 2486     } else if (isEmpty(me->node_anchor->SugFname) &&
 2487            !strcasecomp((http_equiv ?
 2488                  http_equiv : ""), "Content-Disposition")) {
 2489     cp = content;
 2490     while (*cp != '\0' && strncasecomp(cp, "filename", 8))
 2491         cp++;
 2492     if (*cp != '\0') {
 2493         cp = LYSkipBlanks(cp + 8);
 2494         if (*cp == '=')
 2495         cp++;
 2496         cp = LYSkipBlanks(cp);
 2497         if (*cp != '\0') {
 2498         StrAllocCopy(me->node_anchor->SugFname, cp);
 2499         if (*me->node_anchor->SugFname == '"') {
 2500             if ((cp = StrChr((me->node_anchor->SugFname + 1),
 2501                      '"')) != NULL) {
 2502             *(cp + 1) = '\0';
 2503             HTMIME_TrimDoubleQuotes(me->node_anchor->SugFname);
 2504             if (isEmpty(me->node_anchor->SugFname)) {
 2505                 FREE(me->node_anchor->SugFname);
 2506             }
 2507             } else {
 2508             FREE(me->node_anchor->SugFname);
 2509             }
 2510         }
 2511 #if defined(UNIX) && !defined(DOSPATH)
 2512         /*
 2513          * If blanks are not legal for local filenames, replace them
 2514          * with underscores.
 2515          */
 2516         if ((cp = me->node_anchor->SugFname) != NULL) {
 2517             while (*cp != '\0') {
 2518             if (isspace(UCH(*cp)))
 2519                 *cp = '_';
 2520             ++cp;
 2521             }
 2522         }
 2523 #endif
 2524         }
 2525     }
 2526     /*
 2527      * Check for a Set-Cookie directive.  - AK
 2528      */
 2529     } else if (!strcasecomp(NonNull(http_equiv), "Set-Cookie")) {
 2530     /*
 2531      * This will need to be updated when Set-Cookie/Set-Cookie2 handling is
 2532      * finalized.  For now, we'll still assume "historical" cookies in META
 2533      * directives.  - FM
 2534      */
 2535     url_type = is_url(me->inBASE ?
 2536               me->base_href : me->node_anchor->address);
 2537     if (url_type == HTTP_URL_TYPE || url_type == HTTPS_URL_TYPE) {
 2538         LYSetCookie(content,
 2539             NULL,
 2540             (me->inBASE ?
 2541              me->base_href : me->node_anchor->address));
 2542     }
 2543     }
 2544 
 2545     /*
 2546      * Free the copies.  - FM
 2547      */
 2548   free_META_copies:
 2549     FREE(http_equiv);
 2550     FREE(name);
 2551     FREE(content);
 2552     FREE(charset);
 2553 }
 2554 
 2555 /*
 2556  *  This function handles P elements in HTML streams.
 2557  *  If start is TRUE it handles a start tag, and if
 2558  *  FALSE, an end tag.  We presently handle start
 2559  *  and end tags identically, but this can lead to
 2560  *  a different number of blank lines between the
 2561  *  current paragraph and subsequent text when a P
 2562  *  end tag is present or not in the markup. - FM
 2563  */
 2564 void LYHandlePlike(HTStructured * me, const BOOL *present,
 2565            STRING2PTR value,
 2566            char **include GCC_UNUSED,
 2567            int align_idx,
 2568            int start)
 2569 {
 2570     /*
 2571      * FIG content should be a true block, which like P inherits the current
 2572      * style.  APPLET is like character elements or an ALT attribute, unless
 2573      * its content contains a block element.  If we encounter a P in either's
 2574      * content, we set flags to treat the content as a block - FM
 2575      */
 2576     if (start) {
 2577     if (me->inFIG)
 2578         me->inFIGwithP = TRUE;
 2579 
 2580     if (me->inAPPLET)
 2581         me->inAPPLETwithP = TRUE;
 2582     }
 2583 
 2584     UPDATE_STYLE;
 2585     if (me->List_Nesting_Level >= 0) {
 2586     /*
 2587      * We're in a list.  Treat P as an instruction to create one blank
 2588      * line, if not already present, then fall through to handle
 2589      * attributes, with the "second line" margins - FM
 2590      */
 2591     if (me->inP) {
 2592         if (me->inFIG || me->inAPPLET ||
 2593         me->inCAPTION || me->inCREDIT ||
 2594         me->sp->style->spaceAfter > 0 ||
 2595         (start && me->sp->style->spaceBefore > 0)) {
 2596         LYEnsureDoubleSpace(me);
 2597         } else {
 2598         LYEnsureSingleSpace(me);
 2599         }
 2600     }
 2601     } else if (me->sp[0].tag_number == HTML_ADDRESS) {
 2602     /*
 2603      * We're in an ADDRESS.  Treat P as an instruction to start a newline,
 2604      * if needed, then fall through to handle attributes - FM
 2605      */
 2606     if (!HText_LastLineEmpty(me->text, FALSE)) {
 2607         HText_setLastChar(me->text, ' ');   /* absorb white space */
 2608         HText_appendCharacter(me->text, '\r');
 2609     }
 2610     } else {
 2611     if (start) {
 2612         if (!(me->inLABEL && !me->inP)) {
 2613         HText_appendParagraph(me->text);
 2614         }
 2615     } else if (me->sp->style->spaceAfter > 0) {
 2616         LYEnsureDoubleSpace(me);
 2617     } else {
 2618         LYEnsureSingleSpace(me);
 2619     }
 2620     me->inLABEL = FALSE;
 2621     }
 2622     me->in_word = NO;
 2623 
 2624     if (LYoverride_default_alignment(me)) {
 2625     me->sp->style->alignment = LYstyles(me->sp[0].tag_number)->alignment;
 2626     } else if ((me->List_Nesting_Level >= 0 &&
 2627         (me->sp->style->id == ST_DivCenter ||
 2628          me->sp->style->id == ST_DivLeft ||
 2629          me->sp->style->id == ST_DivRight)) ||
 2630            ((me->Division_Level < 0) &&
 2631         (me->sp->style->id == ST_Normal ||
 2632          me->sp->style->id == ST_Preformatted))) {
 2633     me->sp->style->alignment = HT_LEFT;
 2634     } else {
 2635     me->sp->style->alignment = (short) me->current_default_alignment;
 2636     }
 2637 
 2638     if (start && align_idx >= 0) {
 2639     if (present && present[align_idx] && value[align_idx]) {
 2640         if (!strcasecomp(value[align_idx], "center") &&
 2641         !(me->List_Nesting_Level >= 0 && !me->inP))
 2642         me->sp->style->alignment = HT_CENTER;
 2643         else if (!strcasecomp(value[align_idx], "right") &&
 2644              !(me->List_Nesting_Level >= 0 && !me->inP))
 2645         me->sp->style->alignment = HT_RIGHT;
 2646         else if (!strcasecomp(value[align_idx], "left") ||
 2647              !strcasecomp(value[align_idx], "justify"))
 2648         me->sp->style->alignment = HT_LEFT;
 2649     }
 2650 
 2651     }
 2652 
 2653     /*
 2654      * Mark that we are starting a new paragraph and don't have any of its
 2655      * text yet - FM
 2656      */
 2657     me->inP = FALSE;
 2658 
 2659     return;
 2660 }
 2661 
 2662 /*
 2663  *  This function handles SELECT elements in HTML streams.
 2664  *  If start is TRUE it handles a start tag, and if FALSE,
 2665  *  an end tag. - FM
 2666  */
 2667 void LYHandleSELECT(HTStructured * me, const BOOL *present,
 2668             STRING2PTR value,
 2669             char **include GCC_UNUSED,
 2670             int start)
 2671 {
 2672     int i;
 2673 
 2674     if (start == TRUE) {
 2675     char *name = NULL;
 2676     BOOLEAN multiple = NO;
 2677     char *size = NULL;
 2678 
 2679     /*
 2680      * Initialize the disable attribute.
 2681      */
 2682     me->select_disabled = FALSE;
 2683 
 2684     /*
 2685      * Check for unclosed TEXTAREA.
 2686      */
 2687     if (me->inTEXTAREA) {
 2688         if (LYBadHTML(me)) {
 2689         LYShowBadHTML("Bad HTML: Missing TEXTAREA end tag\n");
 2690         }
 2691     }
 2692 
 2693     /*
 2694      * Set to know we are in a select tag.
 2695      */
 2696     me->inSELECT = TRUE;
 2697 
 2698     if (!(present && present[HTML_SELECT_NAME] &&
 2699           non_empty(value[HTML_SELECT_NAME]))) {
 2700         StrAllocCopy(name, "");
 2701     } else if (StrChr(value[HTML_SELECT_NAME], '&') == NULL) {
 2702         StrAllocCopy(name, value[HTML_SELECT_NAME]);
 2703     } else {
 2704         StrAllocCopy(name, value[HTML_SELECT_NAME]);
 2705         UNESCAPE_FIELDNAME_TO_STD(&name);
 2706     }
 2707     if (present && present[HTML_SELECT_MULTIPLE])
 2708         multiple = YES;
 2709     if (present && present[HTML_SELECT_DISABLED])
 2710         me->select_disabled = TRUE;
 2711     if (present && present[HTML_SELECT_SIZE] &&
 2712         non_empty(value[HTML_SELECT_SIZE])) {
 2713         /*
 2714          * Let the size be determined by the number of OPTIONs.  - FM
 2715          */
 2716         CTRACE((tfp, "LYHandleSELECT: Ignoring SIZE=\"%s\" for SELECT.\n",
 2717             value[HTML_SELECT_SIZE]));
 2718     }
 2719 
 2720     if (me->inBoldH == TRUE &&
 2721         (multiple == NO || LYSelectPopups == FALSE)) {
 2722         HText_appendCharacter(me->text, LY_BOLD_END_CHAR);
 2723         me->inBoldH = FALSE;
 2724         me->needBoldH = TRUE;
 2725     }
 2726     if (me->inUnderline == TRUE &&
 2727         (multiple == NO || LYSelectPopups == FALSE)) {
 2728         HText_appendCharacter(me->text, LY_UNDERLINE_END_CHAR);
 2729         me->inUnderline = FALSE;
 2730     }
 2731 
 2732     if ((multiple == NO && LYSelectPopups == TRUE) &&
 2733         (me->sp[0].tag_number == HTML_PRE || me->inPRE == TRUE ||
 2734          !me->sp->style->freeFormat) &&
 2735         HText_LastLineSize(me->text, FALSE) > (LYcolLimit - 7)) {
 2736         /*
 2737          * Force a newline when we're using a popup in a PRE block and are
 2738          * within 7 columns from the right margin.  This will allow for the
 2739          * '[' popup designator and help avoid a wrap in the underscore
 2740          * placeholder for the retracted popup entry in the HText
 2741          * structure.  - FM
 2742          */
 2743         HTML_put_character(me, '\n');
 2744         me->in_word = NO;
 2745     }
 2746 
 2747     LYCheckForID(me, present, value, (int) HTML_SELECT_ID);
 2748 
 2749     HText_beginSelect(name, ATTR_CS_IN, multiple, size);
 2750     FREE(name);
 2751     FREE(size);
 2752 
 2753     me->first_option = TRUE;
 2754     } else {
 2755     /*
 2756      * Handle end tag.
 2757      */
 2758     char *ptr;
 2759 
 2760     /*
 2761      * Make sure we had a select start tag.
 2762      */
 2763     if (!me->inSELECT) {
 2764         if (LYBadHTML(me)) {
 2765         LYShowBadHTML("Bad HTML: Unmatched SELECT end tag\n");
 2766         }
 2767         return;
 2768     }
 2769 
 2770     /*
 2771      * Set to know that we are no longer in a select tag.
 2772      */
 2773     me->inSELECT = FALSE;
 2774 
 2775     /*
 2776      * Clear the disable attribute.
 2777      */
 2778     me->select_disabled = FALSE;
 2779 
 2780     /*
 2781      * Finish the data off.
 2782      */
 2783     HTChunkTerminate(&me->option);
 2784     /*
 2785      * Finish the previous option.
 2786      */
 2787     ptr = HText_setLastOptionValue(me->text,
 2788                        me->option.data,
 2789                        me->LastOptionValue,
 2790                        LAST_ORDER,
 2791                        me->LastOptionChecked,
 2792                        me->UCLYhndl,
 2793                        ATTR_CS_IN);
 2794     FREE(me->LastOptionValue);
 2795 
 2796     me->LastOptionChecked = FALSE;
 2797 
 2798     if (HTCurSelectGroupType == F_CHECKBOX_TYPE ||
 2799         LYSelectPopups == FALSE) {
 2800         /*
 2801          * Start a newline after the last checkbox/button option.
 2802          */
 2803         LYEnsureSingleSpace(me);
 2804     } else {
 2805         /*
 2806          * Output popup box with the default option to screen, but use
 2807          * non-breaking spaces for output.
 2808          */
 2809         if (ptr &&
 2810         me->sp[0].tag_number == HTML_PRE && strlen(ptr) > 6) {
 2811         /*
 2812          * The code inadequately handles OPTION fields in PRE tags. 
 2813          * We'll put up a minimum of 6 characters, and if any more
 2814          * would exceed the wrap column, we'll ignore them.
 2815          */
 2816         for (i = 0; i < 6; i++) {
 2817             if (*ptr == ' ')
 2818             HText_appendCharacter(me->text, HT_NON_BREAK_SPACE);
 2819             else
 2820             HText_appendCharacter(me->text, *ptr);
 2821             ptr++;
 2822         }
 2823         }
 2824         for (; non_empty(ptr); ptr++) {
 2825         if (*ptr == ' ')
 2826             HText_appendCharacter(me->text, HT_NON_BREAK_SPACE);
 2827         else
 2828             HText_appendCharacter(me->text, *ptr);
 2829         }
 2830         /*
 2831          * Add end option character.
 2832          */
 2833         if (!me->first_option) {
 2834         HText_appendCharacter(me->text, ']');
 2835         HText_setLastChar(me->text, ']');
 2836         me->in_word = YES;
 2837         }
 2838     }
 2839     HTChunkClear(&me->option);
 2840 
 2841     if (me->Underline_Level > 0 && me->inUnderline == FALSE) {
 2842         HText_appendCharacter(me->text, LY_UNDERLINE_START_CHAR);
 2843         me->inUnderline = TRUE;
 2844     }
 2845     if (me->needBoldH == TRUE && me->inBoldH == FALSE) {
 2846         HText_appendCharacter(me->text, LY_BOLD_START_CHAR);
 2847         me->inBoldH = TRUE;
 2848         me->needBoldH = FALSE;
 2849     }
 2850     }
 2851 }
 2852 
 2853 /*
 2854  *  This function strips white characters and
 2855  *  generally fixes up attribute values that
 2856  *  were received from the SGML parser and
 2857  *  are to be treated as partial or absolute
 2858  *  URLs. - FM
 2859  */
 2860 int LYLegitimizeHREF(HTStructured * me, char **href,
 2861              int force_slash,
 2862              int strip_dots)
 2863 {
 2864     int url_type = 0;
 2865     char *p = NULL;
 2866     char *pound = NULL;
 2867     const char *Base = NULL;
 2868 
 2869     if (!me || !href || isEmpty(*href))
 2870     return (url_type);
 2871 
 2872     if (!LYTrimStartfile(*href)) {
 2873     /*
 2874      * Collapse spaces in the actual URL, but just protect against tabs or
 2875      * newlines in the fragment, if present.  This seeks to cope with
 2876      * atrocities inflicted on the Web by authoring tools such as
 2877      * Frontpage.  - FM
 2878      */
 2879 
 2880     /*  Before working on spaces check if we have any, usually none. */
 2881     p = LYSkipNonBlanks(*href);
 2882 
 2883     if (*p) {       /* p == first space character */
 2884         /* no reallocs below, all converted in place */
 2885 
 2886         pound = findPoundSelector(*href);
 2887 
 2888         if (pound != NULL && pound < p) {
 2889         convert_to_spaces(p, FALSE);    /* done */
 2890 
 2891         } else {
 2892         if (pound != NULL)
 2893             *pound = '\0';  /* mark */
 2894 
 2895         /*
 2896          * No blanks really belong in the HREF,
 2897          * but if it refers to an actual file,
 2898          * it may actually have blanks in the name.
 2899          * Try to accommodate. See also HTParse().
 2900          */
 2901         if (LYRemoveNewlines(p) || StrChr(p, '\t') != 0) {
 2902             LYRemoveBlanks(p);  /* a compromise... */
 2903         }
 2904 
 2905         if (pound != NULL) {
 2906             p = StrChr(p, '\0');
 2907             *pound = '#';   /* restore */
 2908             convert_to_spaces(pound, FALSE);
 2909             if (p < pound)
 2910             strcpy(p, pound);
 2911         }
 2912         }
 2913     }
 2914     }
 2915     if (**href == '\0')
 2916     return (url_type);
 2917 
 2918     TRANSLATE_AND_UNESCAPE_TO_STD(href);
 2919 
 2920     Base = me->inBASE ?
 2921     me->base_href : me->node_anchor->address;
 2922 
 2923     url_type = is_url(*href);
 2924     if (!url_type && force_slash && **href == '.' &&
 2925     (!strcmp(*href, ".") || !strcmp(*href, "..")) &&
 2926     !isFILE_URL(Base)) {
 2927     /*
 2928      * The Fielding RFC/ID for resolving partial HREFs says that a slash
 2929      * should be on the end of the preceding symbolic element for "." and
 2930      * "..", but all tested browsers only do that for an explicit "./" or
 2931      * "../", so we'll respect the RFC/ID only if force_slash was TRUE and
 2932      * it's not a file URL.  - FM
 2933      */
 2934     StrAllocCat(*href, "/");
 2935     }
 2936     if ((!url_type && LYStripDotDotURLs && strip_dots && **href == '.') &&
 2937     !strncasecomp(Base, "http", 4)) {
 2938     /*
 2939      * We will be resolving a partial reference versus an http or https
 2940      * URL, and it has lead dots, which may be retained when resolving via
 2941      * HTParse(), but the request would fail if the first element of the
 2942      * resultant path is two dots, because no http or https server accepts
 2943      * such paths, and the current URL draft, likely to become an RFC, says
 2944      * that it's optional for the UA to strip them as a form of error
 2945      * recovery.  So we will, recursively, for http/https URLs, like the
 2946      * "major market browsers" which made this problem so common on the
 2947      * Web, but we'll also issue a message about it, such that the bad
 2948      * partial reference might get corrected by the document provider.  -
 2949      * FM
 2950      */
 2951     char *temp = NULL, *path = NULL, *cp;
 2952     const char *str = "";
 2953 
 2954     temp = HTParse(*href, Base, PARSE_ALL);
 2955     path = HTParse(temp, "", PARSE_PATH + PARSE_PUNCTUATION);
 2956     if (!StrNCmp(path, "/..", 3)) {
 2957         cp = (path + 3);
 2958         if (LYIsHtmlSep(*cp) || *cp == '\0') {
 2959         if (Base[4] == 's') {
 2960             str = "s";
 2961         }
 2962         CTRACE((tfp,
 2963             "LYLegitimizeHREF: Bad value '%s' for http%s URL.\n",
 2964             *href, str));
 2965         CTRACE((tfp, "                  Stripping lead dots.\n"));
 2966         if (!me->inBadHREF) {
 2967             HTUserMsg(BAD_PARTIAL_REFERENCE);
 2968             me->inBadHREF = TRUE;
 2969         }
 2970         }
 2971         if (*cp == '\0') {
 2972         StrAllocCopy(*href, "/");
 2973         } else if (LYIsHtmlSep(*cp)) {
 2974         while (!StrNCmp(cp, "/..", 3)) {
 2975             if (*(cp + 3) == '/') {
 2976             cp += 3;
 2977             continue;
 2978             } else if (*(cp + 3) == '\0') {
 2979             *(cp + 1) = '\0';
 2980             *(cp + 2) = '\0';
 2981             }
 2982             break;
 2983         }
 2984         StrAllocCopy(*href, cp);
 2985         }
 2986     }
 2987     FREE(temp);
 2988     FREE(path);
 2989     }
 2990     return (url_type);
 2991 }
 2992 
 2993 /*
 2994  *  This function checks for a Content-Base header,
 2995  *  and if not present, a Content-Location header
 2996  *  which is an absolute URL, and sets the BASE
 2997  *  accordingly.  If set, it will be replaced by
 2998  *  any BASE tag in the HTML stream, itself. - FM
 2999  */
 3000 void LYCheckForContentBase(HTStructured * me)
 3001 {
 3002     char *cp = NULL;
 3003     BOOL present[HTML_BASE_ATTRIBUTES];
 3004     const char *value[HTML_BASE_ATTRIBUTES];
 3005     int i;
 3006 
 3007     if (!(me && me->node_anchor))
 3008     return;
 3009 
 3010     if (me->node_anchor->content_base != NULL) {
 3011     /*
 3012      * We have a Content-Base value.  Use it if it's non-zero length.  - FM
 3013      */
 3014     if (*me->node_anchor->content_base == '\0')
 3015         return;
 3016     StrAllocCopy(cp, me->node_anchor->content_base);
 3017     LYRemoveBlanks(cp);
 3018     } else if (me->node_anchor->content_location != NULL) {
 3019     /*
 3020      * We didn't have a Content-Base value, but do have a Content-Location
 3021      * value.  Use it if it's an absolute URL.  - FM
 3022      */
 3023     if (*me->node_anchor->content_location == '\0')
 3024         return;
 3025     StrAllocCopy(cp, me->node_anchor->content_location);
 3026     LYRemoveBlanks(cp);
 3027     if (!is_url(cp)) {
 3028         FREE(cp);
 3029         return;
 3030     }
 3031     } else {
 3032     /*
 3033      * We had neither a Content-Base nor Content-Location value.  - FM
 3034      */
 3035     return;
 3036     }
 3037 
 3038     /*
 3039      * If we collapsed to a zero-length value, ignore it.  - FM
 3040      */
 3041     if (*cp == '\0') {
 3042     FREE(cp);
 3043     return;
 3044     }
 3045 
 3046     /*
 3047      * Pass the value to HTML_start_element as the HREF of a BASE tag.  - FM
 3048      */
 3049     for (i = 0; i < HTML_BASE_ATTRIBUTES; i++)
 3050     present[i] = NO;
 3051     present[HTML_BASE_HREF] = YES;
 3052     value[HTML_BASE_HREF] = (const char *) cp;
 3053     (*me->isa->start_element) (me, HTML_BASE, present, value,
 3054                    0, 0);
 3055     FREE(cp);
 3056 }
 3057 
 3058 /*
 3059  *  This function creates NAMEd Anchors if a non-zero-length NAME
 3060  *  or ID attribute was present in the tag. - FM
 3061  */
 3062 void LYCheckForID(HTStructured * me, const BOOL *present,
 3063           STRING2PTR value,
 3064           int attribute)
 3065 {
 3066     HTChildAnchor *ID_A = NULL;
 3067     char *temp = NULL;
 3068 
 3069     if (!(me && me->text))
 3070     return;
 3071 
 3072     if (present && present[attribute]
 3073     && non_empty(value[attribute])) {
 3074     /*
 3075      * Translate any named or numeric character references.  - FM
 3076      */
 3077     StrAllocCopy(temp, value[attribute]);
 3078     LYUCTranslateHTMLString(&temp, me->tag_charset, me->tag_charset,
 3079                 NO, NO, YES, st_URL);
 3080 
 3081     /*
 3082      * Create the link if we still have a non-zero-length string.  - FM
 3083      */
 3084     if ((temp[0] != '\0') &&
 3085         (ID_A = HTAnchor_findChildAndLink
 3086          (
 3087          me->node_anchor,   /* Parent */
 3088          temp,      /* Tag */
 3089          NULL,      /* Addresss */
 3090          (HTLinkType *) 0))) {  /* Type */
 3091         HText_beginAnchor(me->text, me->inUnderline, ID_A);
 3092         HText_endAnchor(me->text, 0);
 3093     }
 3094     FREE(temp);
 3095     }
 3096 }
 3097 
 3098 /*
 3099  *  This function creates a NAMEd Anchor for the ID string
 3100  *  passed to it directly as an argument.  It assumes the
 3101  *  does not need checking for character references. - FM
 3102  */
 3103 void LYHandleID(HTStructured * me, const char *id)
 3104 {
 3105     HTChildAnchor *ID_A = NULL;
 3106 
 3107     if (!(me && me->text) ||
 3108     isEmpty(id))
 3109     return;
 3110 
 3111     /*
 3112      * Create the link if we still have a non-zero-length string.  - FM
 3113      */
 3114     if ((ID_A = HTAnchor_findChildAndLink
 3115      (
 3116          me->node_anchor,   /* Parent */
 3117          id,        /* Tag */
 3118          NULL,      /* Addresss */
 3119          (HTLinkType *) 0)) != NULL) {  /* Type */
 3120     HText_beginAnchor(me->text, me->inUnderline, ID_A);
 3121     HText_endAnchor(me->text, 0);
 3122     }
 3123 }
 3124 
 3125 /*
 3126  *  This function checks whether we want to override
 3127  *  the current default alignment for paragraphs and
 3128  *  instead use that specified in the element's style
 3129  *  sheet. - FM
 3130  */
 3131 BOOLEAN LYoverride_default_alignment(HTStructured * me)
 3132 {
 3133     if (!me)
 3134     return NO;
 3135 
 3136     switch (me->sp[0].tag_number) {
 3137     case HTML_BLOCKQUOTE:
 3138     case HTML_BQ:
 3139     case HTML_NOTE:
 3140     case HTML_FN:
 3141     case HTML_ADDRESS:
 3142     me->sp->style->alignment = HT_LEFT;
 3143     return YES;
 3144 
 3145     default:
 3146     break;
 3147     }
 3148     return NO;
 3149 }
 3150 
 3151 /*
 3152  *  This function inserts newlines if needed to create double spacing,
 3153  *  and sets the left margin for subsequent text to the second line
 3154  *  indentation of the current style. - FM
 3155  */
 3156 void LYEnsureDoubleSpace(HTStructured * me)
 3157 {
 3158     if (!me || !me->text)
 3159     return;
 3160 
 3161     if (!HText_LastLineEmpty(me->text, FALSE)) {
 3162     HText_setLastChar(me->text, ' ');   /* absorb white space */
 3163     HText_appendCharacter(me->text, '\r');
 3164     HText_appendCharacter(me->text, '\r');
 3165     } else if (!HText_PreviousLineEmpty(me->text, FALSE)) {
 3166     HText_setLastChar(me->text, ' ');   /* absorb white space */
 3167     HText_appendCharacter(me->text, '\r');
 3168     } else if (me->List_Nesting_Level >= 0) {
 3169     HText_NegateLineOne(me->text);
 3170     }
 3171     me->in_word = NO;
 3172     return;
 3173 }
 3174 
 3175 /*
 3176  *  This function inserts a newline if needed to create single spacing,
 3177  *  and sets the left margin for subsequent text to the second line
 3178  *  indentation of the current style. - FM
 3179  */
 3180 void LYEnsureSingleSpace(HTStructured * me)
 3181 {
 3182     if (!me || !me->text)
 3183     return;
 3184 
 3185     if (!HText_LastLineEmpty(me->text, FALSE)) {
 3186     HText_setLastChar(me->text, ' ');   /* absorb white space */
 3187     HText_appendCharacter(me->text, '\r');
 3188     } else if (me->List_Nesting_Level >= 0) {
 3189     HText_NegateLineOne(me->text);
 3190     }
 3191     me->in_word = NO;
 3192     return;
 3193 }
 3194 
 3195 /*
 3196  *  This function resets paragraph alignments for block
 3197  *  elements which do not have a defined style sheet. - FM
 3198  */
 3199 void LYResetParagraphAlignment(HTStructured * me)
 3200 {
 3201     if (!me)
 3202     return;
 3203 
 3204     if (me->List_Nesting_Level >= 0 ||
 3205     ((me->Division_Level < 0) &&
 3206      (me->sp->style->id == ST_Normal ||
 3207       me->sp->style->id == ST_Preformatted))) {
 3208     me->sp->style->alignment = HT_LEFT;
 3209     } else {
 3210     me->sp->style->alignment = (short) me->current_default_alignment;
 3211     }
 3212     return;
 3213 }
 3214 
 3215 /*
 3216  *  This example function checks whether the given anchor has
 3217  *  an address with a file scheme, and if so, loads it into the
 3218  *  the SGML parser's context->url element, which was passed as
 3219  *  the second argument.  The handle_comment() calling function in
 3220  *  SGML.c then calls LYDoCSI() in LYUtils.c to insert HTML markup
 3221  *  into the corresponding stream, homologously to an SSI by an
 3222  *  HTTP server. - FM
 3223  *
 3224  *  For functions similar to this but which depend on details of
 3225  *  the HTML handler's internal data, the calling interface should
 3226  *  be changed, and functions in SGML.c would have to make sure not
 3227  *  to call such functions inappropriately (e.g., calling a function
 3228  *  specific to the Lynx_HTML_Handler when SGML.c output goes to
 3229  *  some other HTStructured object like in HTMLGen.c), or the new
 3230  *  functions could be added to the SGML.h interface.
 3231  */
 3232 BOOLEAN LYCheckForCSI(HTParentAnchor *anchor,
 3233               char **url)
 3234 {
 3235     if (!(anchor && anchor->address))
 3236     return FALSE;
 3237 
 3238     if (!isFILE_URL(anchor->address))
 3239     return FALSE;
 3240 
 3241     if (!LYisLocalHost(anchor->address))
 3242     return FALSE;
 3243 
 3244     StrAllocCopy(*url, anchor->address);
 3245     return TRUE;
 3246 }
 3247 
 3248 /*
 3249  *  This function is called from the SGML parser to look at comments
 3250  *  and see whether we should collect some info from them.  Currently
 3251  *  it only looks for comments with Message-Id and Subject info, in the
 3252  *  exact form generated by MHonArc for archived mailing list.  If found,
 3253  *  the info is stored in the document's HTParentAnchor.  It can later be
 3254  *  used for generating a mail response.
 3255  *
 3256  *  We are extra picky here because there isn't any official definition
 3257  *  for these kinds of comments - we might (and still can) misinterpret
 3258  *  arbitrary comments as something they aren't.
 3259  *
 3260  *  If something doesn't look right, for example invalid characters, the
 3261  *  strings are not stored.  Mail responses will use something else as
 3262  *  the subject, probably the document URL, and will not have an
 3263  *  In-Reply-To header.
 3264  *
 3265  *  All this is a hack - to do this the right way, mailing list archivers
 3266  *  would have to agree on some better mechanism to make this kind of info
 3267  *  from original mail headers available, for example using LINK.  - kw
 3268  */
 3269 BOOLEAN LYCommentHacks(HTParentAnchor *anchor,
 3270                const char *comment)
 3271 {
 3272     const char *cp;
 3273     size_t len;
 3274 
 3275     if (comment == NULL)
 3276     return FALSE;
 3277 
 3278     if (!(anchor && anchor->address))
 3279     return FALSE;
 3280 
 3281     if (StrNCmp(comment, "!--X-Message-Id: ", 17) == 0) {
 3282     char *messageid = NULL;
 3283     char *p;
 3284 
 3285     for (cp = comment + 17; *cp; cp++) {
 3286         if (UCH(*cp) >= 127 || !isgraph(UCH(*cp))) {
 3287         break;
 3288         }
 3289     }
 3290     if (strcmp(cp, " --")) {
 3291         return FALSE;
 3292     }
 3293     cp = comment + 17;
 3294     StrAllocCopy(messageid, cp);
 3295     /* This should be ok - message-id should only contain 7-bit ASCII */
 3296     if (!LYUCTranslateHTMLString(&messageid, 0, 0, NO, NO, YES, st_URL))
 3297         return FALSE;
 3298     for (p = messageid; *p; p++) {
 3299         if (UCH(*p) >= 127 || !isgraph(UCH(*p))) {
 3300         break;
 3301         }
 3302     }
 3303     if (strcmp(p, " --")) {
 3304         FREE(messageid);
 3305         return FALSE;
 3306     }
 3307     if ((p = StrChr(messageid, '@')) == NULL || p[1] == '\0') {
 3308         FREE(messageid);
 3309         return FALSE;
 3310     }
 3311     p = messageid;
 3312     if ((len = strlen(p)) >= 8 && !strcmp(&p[len - 3], " --")) {
 3313         p[len - 3] = '\0';
 3314     } else {
 3315         FREE(messageid);
 3316         return FALSE;
 3317     }
 3318     if (HTAnchor_setMessageID(anchor, messageid)) {
 3319         FREE(messageid);
 3320         return TRUE;
 3321     } else {
 3322         FREE(messageid);
 3323         return FALSE;
 3324     }
 3325     }
 3326     if (StrNCmp(comment, "!--X-Subject: ", 14) == 0) {
 3327     char *subject = NULL;
 3328     char *p;
 3329 
 3330     for (cp = comment + 14; *cp; cp++) {
 3331         if (UCH(*cp) >= 127 || !isprint(UCH(*cp))) {
 3332         return FALSE;
 3333         }
 3334     }
 3335     cp = comment + 14;
 3336     StrAllocCopy(subject, cp);
 3337     /* @@@
 3338      * This may not be the right thing for the subject - but mail
 3339      * subjects shouldn't contain 8-bit characters in raw form anyway.
 3340      * We have to unescape character entities, since that's what MHonArc
 3341      * seems to generate.  But if after that there are 8-bit characters
 3342      * the string is rejected.  We would probably not know correctly
 3343      * what charset to assume anyway - the mail sender's can differ from
 3344      * the archive's.  And the code for sending mail cannot deal well
 3345      * with 8-bit characters - we should not put them in the Subject
 3346      * header in raw form, but don't have MIME encoding implemented.
 3347      * Someone may want to do more about this...  - kw
 3348      */
 3349     if (!LYUCTranslateHTMLString(&subject, 0, 0, NO, YES, NO, st_HTML))
 3350         return FALSE;
 3351     for (p = subject; *p; p++) {
 3352         if (UCH(*p) >= 127 || !isprint(UCH(*p))) {
 3353         FREE(subject);
 3354         return FALSE;
 3355         }
 3356     }
 3357     p = subject;
 3358     if ((len = strlen(p)) >= 4 && !strcmp(&p[len - 3], " --")) {
 3359         p[len - 3] = '\0';
 3360     } else {
 3361         FREE(subject);
 3362         return FALSE;
 3363     }
 3364     if (HTAnchor_setSubject(anchor, subject)) {
 3365         FREE(subject);
 3366         return TRUE;
 3367     } else {
 3368         FREE(subject);
 3369         return FALSE;
 3370     }
 3371     }
 3372 
 3373     return FALSE;
 3374 }
 3375 
 3376     /*
 3377      * Create the Title with any left-angle-brackets converted to &lt; entities
 3378      * and any ampersands converted to &amp; entities.  - FM
 3379      *
 3380      * Convert 8-bit letters to &#xUUUU to avoid dependencies from display
 3381      * character set which may need changing.  Do NOT convert any 8-bit chars
 3382      * if we have CJK display.  - LP
 3383      */
 3384 void LYformTitle(char **dst,
 3385          const char *src)
 3386 {
 3387     if (HTCJK == JAPANESE) {
 3388     char *tmp_buffer = NULL;
 3389 
 3390     if ((tmp_buffer = (char *) malloc(strlen(src) + 1)) == 0)
 3391         outofmem(__FILE__, "LYformTitle");
 3392 
 3393     switch (kanji_code) {   /* 1997/11/22 (Sat) 09:28:00 */
 3394     case EUC:
 3395         TO_EUC((const unsigned char *) src, (unsigned char *) tmp_buffer);
 3396         break;
 3397     case SJIS:
 3398         TO_SJIS((const unsigned char *) src, (unsigned char *) tmp_buffer);
 3399         break;
 3400     default:
 3401         CTRACE((tfp, "\nLYformTitle: kanji_code is an unexpected value."));
 3402         strcpy(tmp_buffer, src);
 3403         break;
 3404     }
 3405     StrAllocCopy(*dst, tmp_buffer);
 3406     FREE(tmp_buffer);
 3407     } else {
 3408     StrAllocCopy(*dst, src);
 3409     }
 3410 }