"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.6.2/src/cook.c" (9 Dec 2022, 28541 Bytes) of package /linux/misc/tin-2.6.2.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "cook.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.6.1_vs_2.6.2.

A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.


    1 /*
    2  *  Project   : tin - a Usenet reader
    3  *  Module    : cook.c
    4  *  Author    : J. Faultless
    5  *  Created   : 2000-03-08
    6  *  Updated   : 2022-09-19
    7  *  Notes     : Split from page.c
    8  *
    9  * Copyright (c) 2000-2023 Jason Faultless <jason@altarstone.com>
   10  * All rights reserved.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  *
   16  * 1. Redistributions of source code must retain the above copyright notice,
   17  *    this list of conditions and the following disclaimer.
   18  *
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  *
   23  * 3. Neither the name of the copyright holder nor the names of its
   24  *    contributors may be used to endorse or promote products derived from
   25  *    this software without specific prior written permission.
   26  *
   27  * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
   31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 
   40 #ifndef TIN_H
   41 #   include "tin.h"
   42 #endif /* !TIN_H */
   43 #ifndef TCURSES_H
   44 #   include "tcurses.h"
   45 #endif /* !TCURSES_H */
   46 
   47 
   48 /*
   49  * We malloc() this many t_lineinfo's at a time
   50  */
   51 #define CHUNK       50
   52 
   53 #define STRIP_ALTERNATIVE(x) \
   54             (curr_group->attribute->alternative_handling && \
   55             (x)->hdr.ext->type == TYPE_MULTIPART && \
   56             strcasecmp("alternative", (x)->hdr.ext->subtype) == 0)
   57 
   58 #define MATCH_REGEX(x,y,z)  (match_regex_ex(y, z, 0, 0, &(x)) >= 0)
   59 
   60 
   61 static t_bool charset_unsupported(const char *charset);
   62 static t_bool header_wanted(const char *line);
   63 static t_part *new_uue(t_part **part, char *name);
   64 static void process_text_body_part(t_bool wrap_lines, FILE *in, t_part *part, int hide_uue);
   65 static void put_cooked(size_t buf_len, t_bool wrap_lines, int flags, const char *fmt, ...);
   66 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
   67     static t_bool wexpand_ctrl_chars(wchar_t **wline, size_t *length, size_t lcook_width);
   68 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
   69 #ifdef DEBUG_ART
   70     static void dump_cooked(void);
   71 #endif /* DEBUG_ART */
   72 
   73 
   74 /*
   75  * These are used globally within this module for access to the context
   76  * currently being built. They must not leak outside.
   77  */
   78 static t_openartinfo *art;
   79 
   80 
   81 /*
   82  * Handle backspace, expand tabs, expand control chars to a literal ^[A-Z]
   83  * Allows \n through
   84  * Return TRUE if line contains a ^L (form-feed)
   85  */
   86 t_bool
   87 expand_ctrl_chars(
   88     char **line,
   89     size_t *length,
   90     size_t lcook_width)
   91 {
   92     t_bool ctrl_L = FALSE;
   93 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
   94     wchar_t *wline = char2wchar_t(*line);
   95     size_t wlen;
   96 
   97     /*
   98      * remove the assert() before release
   99      * it should help us find problems with wide-char strings
  100      * in the development branch
  101      */
  102     assert(wline != NULL);
  103     wlen = wcslen(wline);
  104     ctrl_L = wexpand_ctrl_chars(&wline, &wlen, lcook_width);
  105     free(*line);
  106     *line = wchar_t2char(wline);
  107     free(wline);
  108     assert(line != NULL);
  109     *length = strlen(*line);
  110 #else
  111     int curr_len = LEN;
  112     unsigned int i = 0, j, ln = 0;
  113     char *buf = my_malloc(curr_len);
  114     unsigned char *c;
  115 
  116     c = (unsigned char *) *line;
  117     while (*c) {
  118         if (i > curr_len - (lcook_width + 1)) {
  119             curr_len <<= 1;
  120             buf = my_realloc(buf, curr_len);
  121         }
  122         if (*c == '\n')
  123             ln = i + 1;
  124         if (*c == '\t') { /* expand tabs */
  125             j = i + lcook_width - ((i - ln) % lcook_width);
  126             for (; i < j; i++)
  127                 buf[i] = ' ';
  128         } else if (((*c) & 0xFF) < ' ' && *c != '\n' && (!IS_LOCAL_CHARSET("Big5") || *c != 27)) {  /* literal ctrl chars */
  129             buf[i++] = '^';
  130             buf[i++] = ((*c) & 0xFF) + '@';
  131             if (*c == '\f')     /* ^L detected */
  132                 ctrl_L = TRUE;
  133         } else {
  134             if (!my_isprint(*c) && *c != '\n')
  135                 buf[i++] = '?';
  136             else
  137                 buf[i++] = *c;
  138         }
  139         c++;
  140     }
  141     buf[i] = '\0';
  142     *length = i + 1;
  143     *line = my_realloc(*line, *length);
  144     strcpy(*line, buf);
  145     free(buf);
  146 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  147     return ctrl_L;
  148 }
  149 
  150 
  151 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  152 static t_bool
  153 wexpand_ctrl_chars(
  154     wchar_t **wline,
  155     size_t *length,
  156     size_t lcook_width)
  157 {
  158     size_t cur_len = LEN, i = 0, j, ln = 0;
  159     wchar_t *wbuf = my_malloc(cur_len * sizeof(wchar_t));
  160     wchar_t *wc;
  161     t_bool ctrl_L = FALSE;
  162 
  163     wc = *wline;
  164     while (*wc) {
  165         if (i > cur_len - (lcook_width + 1)) {
  166             cur_len <<= 1;
  167             wbuf = my_realloc(wbuf, cur_len * sizeof(wchar_t));
  168         }
  169         if (*wc == '\n')
  170             ln = i + 1;
  171         if (*wc == '\t') {      /* expand_tabs */
  172             j = i + lcook_width - ((i - ln) % lcook_width);
  173             for (; i < j; i++)
  174                 wbuf[i] = ' ';
  175         } else if (*wc < ' ' && *wc != '\n' && (!IS_LOCAL_CHARSET("Big5") || *wc != 27)) {  /* literal ctrl chars */
  176             wbuf[i++] = '^';
  177             wbuf[i++] = *wc + '@';
  178             if (*wc == '\f')    /* ^L detected */
  179                 ctrl_L = TRUE;
  180         } else {
  181             if (!iswprint((wint_t) *wc) && *wc != '\n')
  182                 wbuf[i++] = '?';
  183             else
  184                 wbuf[i++] = *wc;
  185         }
  186         wc++;
  187     }
  188     wbuf[i] = '\0';
  189     *length = i + 1;
  190     *wline = my_realloc(*wline, *length * sizeof(wchar_t));
  191     wcscpy(*wline, wbuf);
  192     free(wbuf);
  193     return ctrl_L;
  194 }
  195 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  196 
  197 
  198 /*
  199  * Output text to the cooked stream. Wrap lines as necessary.
  200  * Update the line count and the array of line offsets
  201  * Extend the lineoffset array as needed in CHUNK amounts.
  202  * flags are 'hints' to the pager about line content.
  203  * buf_len is the size put_cooked should use for its buffer.
  204  */
  205 static void
  206 put_cooked(
  207     size_t buf_len,
  208     t_bool wrap_lines,
  209     int flags,
  210     const char *fmt,
  211     ...)
  212 {
  213     char *p, *bufp, *buf;
  214     int wrap_column;
  215     int space;
  216     static int saved_flags = 0;
  217     va_list ap;
  218 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  219     int bytes;
  220     wint_t *wp;
  221 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  222 
  223     buf = my_malloc(buf_len + 1);
  224 
  225     va_start(ap, fmt);
  226     vsnprintf(buf, buf_len + 1, fmt, ap);
  227 
  228     if (tinrc.wrap_column < 0)
  229         wrap_column = ((tinrc.wrap_column > -cCOLS) ? cCOLS + tinrc.wrap_column : cCOLS);
  230     else
  231 #if 1
  232         wrap_column = ((tinrc.wrap_column > 0) ? tinrc.wrap_column : cCOLS);
  233 #else   /* never cut off long lines */
  234         wrap_column = (((tinrc.wrap_column > 0) && (tinrc.wrap_column < cCOLS)) ? tinrc.wrap_column : cCOLS);
  235 #endif /* 1 */
  236 
  237     p = bufp = buf;
  238 
  239 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  240     wp = my_malloc((MB_CUR_MAX + 1) * sizeof(wint_t));
  241 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  242 
  243     while (*p) {
  244         if (wrap_lines) {
  245             space = wrap_column;
  246             while (space > 0 && *p && *p != '\n') {
  247 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  248                 if ((bytes = mbtowc((wchar_t *) wp, p, MB_CUR_MAX)) > 0) {
  249                     if ((space -= wcwidth((wchar_t) *wp)) < 0)
  250                         break;
  251                     p += bytes;
  252                 } else
  253                     p++;
  254 #else
  255                 p++;
  256                 space--;
  257 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  258             }
  259         } else {
  260             while (*p && *p != '\n')
  261                 p++;
  262         }
  263         fwrite(bufp, 1, (size_t) (p - bufp), art->cooked);
  264         fputs("\n", art->cooked);
  265         if (*p == '\n')
  266             p++;
  267         bufp = p;
  268 
  269         if (art->cooked_lines == 0) {
  270             art->cookl = my_malloc(sizeof(t_lineinfo) * CHUNK);
  271             art->cookl[0].offset = 0;
  272         }
  273 
  274         /*
  275          * Pick up flags from a previous partial write
  276          */
  277         art->cookl[art->cooked_lines].flags = flags | saved_flags;
  278         saved_flags = 0;
  279         art->cooked_lines++;
  280 
  281         /*
  282          * Grow the array of lines if needed - we resize it properly at the end
  283          */
  284         if (art->cooked_lines % CHUNK == 0)
  285             art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * CHUNK * (size_t) ((art->cooked_lines / CHUNK) + 1));
  286 
  287         art->cookl[art->cooked_lines].offset = ftell(art->cooked);
  288     }
  289 
  290 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  291     free(wp);
  292 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  293 
  294     /*
  295      * If there is anything left over, then it must be a non \n terminated
  296      * partial line from base64 decoding etc.. Dump it now and the rest of
  297      * the line (with the \n) will fill in the t_lineinfo
  298      * We must save the flags now as the rest of the line may not have the same properties
  299      * We need to keep the length for accounting purposes
  300      */
  301     if (*bufp != '\0') {
  302         fputs(bufp, art->cooked);
  303         saved_flags = flags;
  304     }
  305 
  306     va_end(ap);
  307     free(buf);
  308 }
  309 
  310 
  311 /*
  312  * Add a new uuencode attachment description to the current part
  313  */
  314 static t_part *
  315 new_uue(
  316     t_part **part,
  317     char *name)
  318 {
  319     t_part *ptr = new_part((*part)->uue);
  320 
  321     if (!(*part)->uue)          /* new_part() is simple and doesn't attach list heads */
  322         (*part)->uue = ptr;
  323 
  324     free_list(ptr->params);
  325     /*
  326      * Load the name into the parameter list
  327      */
  328     ptr->params = new_params();
  329     ptr->params->name = my_strdup("name");
  330     ptr->params->value = my_strdup(str_trim(name));
  331 
  332     ptr->encoding = ENCODING_UUE;   /* treat as x-uuencode */
  333 
  334     ptr->offset = ftell(art->cooked);
  335     ptr->depth = (*part)->depth;    /* uue is at the same depth as the envelope */
  336 
  337     /*
  338      * If an extension is present, try and add a Content-Type
  339      */
  340     if ((name = strrchr(name, '.')) != NULL)
  341         lookup_mimetype(name + 1, ptr);
  342 
  343     return ptr;
  344 }
  345 
  346 
  347 /*
  348  * Get the suggested filename for an attachment. RFC says Content-Disposition
  349  * 'filename' supersedes Content-Type 'name'. We must also remove path
  350  * information.
  351  */
  352 const char *
  353 get_filename(
  354     t_param *ptr)
  355 {
  356     const char *name;
  357     char *p;
  358 
  359     if (!(name = get_param(ptr, "filename"))) {
  360         if (!(name = get_param(ptr, "name")))
  361             return NULL;
  362     }
  363 
  364     if ((p = strrchr(name, DIRSEP)))
  365         return p + 1;
  366 
  367     return name;
  368 }
  369 
  370 
  371 #define PUT_UUE(part, qualifier_text) \
  372     put_cooked(LEN, wrap_lines, C_UUE, _(txt_uue), \
  373         part->depth ? (part->depth - 1) * 4 : 0, "", \
  374         content_types[part->type], part->subtype, \
  375         qualifier_text, part->line_count, get_filename(part->params))
  376 
  377 #define PUT_ATTACH(part, depth, name, charset) \
  378     put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach), \
  379         depth, "", \
  380         content_types[part->type], part->subtype, \
  381         content_encodings[part->encoding], \
  382         charset ? _(txt_attach_charset) : "", BlankIfNull(charset), \
  383         part->line_count, \
  384         name ? _(txt_name) : "", BlankIfNull(name)); \
  385         \
  386     if (part->description) \
  387         put_cooked(LEN, wrap_lines, C_ATTACH, \
  388             _(txt_attach_description), \
  389             depth, "", \
  390             part->description); \
  391     if (part->next != NULL || IS_PLAINTEXT(part)) \
  392         put_cooked(1, wrap_lines, C_ATTACH, "\n")
  393 
  394 /*
  395  * Decodes text bodies, remove sig's, detects uuencoded sections
  396  */
  397 static void
  398 process_text_body_part(
  399     t_bool wrap_lines,
  400     FILE *in,
  401     t_part *part,
  402     int hide_uue)
  403 {
  404     char *rest = NULL;
  405     char *line = NULL, *buf, *tmpline;
  406     const char *ncharset;
  407     size_t max_line_len = 0;
  408     int flags, len, lines_left, len_blank;
  409     unsigned int lines_skipped = 0;
  410     t_bool in_sig = FALSE;          /* Set when in sig portion */
  411     t_bool in_uue = FALSE;          /* Set when in uuencoded section */
  412     t_bool in_verbatim = FALSE;     /* Set when in verbatim section */
  413     t_bool verbatim_begin = FALSE;  /* Set when verbatim_begin_regex matches */
  414     t_bool is_uubody;               /* Set when current line looks like a uuencoded line */
  415     t_bool first_line_blank = TRUE; /* Unset when first non-blank line is reached */
  416     t_bool put_blank_lines = FALSE; /* Set when previously skipped lines needs to put */
  417     t_part *curruue = NULL;
  418 
  419     if (part->uue) {                /* These are redone each time we recook/resize etc.. */
  420         free_parts(part->uue);
  421         part->uue = NULL;
  422     }
  423 
  424     fseek(in, part->offset, SEEK_SET);
  425 
  426     if (part->encoding == ENCODING_BASE64)
  427         (void) mmdecode(NULL, 'b', 0, NULL);        /* flush */
  428 
  429     lines_left = part->line_count;
  430     while ((lines_left > 0) || rest) {
  431         switch (part->encoding) {
  432             case ENCODING_BASE64:
  433                 lines_left -= read_decoded_base64_line(in, &line, &max_line_len, lines_left, &rest);
  434                 break;
  435 
  436             case ENCODING_QP:
  437                 lines_left -= read_decoded_qp_line(in, &line, &max_line_len, lines_left);
  438                 break;
  439 
  440             default:
  441                 if ((buf = tin_fgets(in, FALSE)) == NULL) {
  442                     FreeAndNull(line);
  443                     break;
  444                 }
  445 
  446                 /*
  447                  * tin_fgets() uses the returned space also internally
  448                  * so it's not advisable to use it for our own purposes
  449                  * especially if we must resize it.
  450                  * So copy buf to line (and resize line if necessary).
  451                  */
  452                 if (max_line_len < strlen(buf) + 2 || !line) {
  453                     max_line_len = strlen(buf) + 2;
  454                     line = my_realloc(line, max_line_len);
  455                 }
  456                 strcpy(line, buf);
  457 
  458                 /*
  459                  * FIXME: Some code in cook.c expects a '\n' at the end
  460                  * of the line. As tin_fgets() strips trailing '\n', re-add it.
  461                  * This should probably be fixed in that other code.
  462                  */
  463                 strcat(line, "\n");
  464 
  465                 lines_left--;
  466                 break;
  467         }
  468         if (!(line && strlen(line))) {
  469             FreeIfNeeded(rest);
  470             break;  /* premature end of file, file error etc. */
  471         }
  472 
  473         /* convert network to local charset, tex2iso, iso2asc etc. */
  474         ncharset = get_param(part->params, "charset");
  475         process_charsets(&line, &max_line_len, ncharset ? ncharset : "US-ASCII", tinrc.mm_local_charset, curr_group->attribute->tex2iso_conv && art->tex2iso);
  476 
  477 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  478         if (IS_LOCAL_CHARSET("UTF-8")) {
  479             utf8_valid(line);
  480 
  481             if (!in_verbatim && curr_group->attribute->suppress_soft_hyphens && ncharset && !strcasecmp(ncharset, "UTF-8"))
  482                 remove_soft_hyphens(line);
  483         }
  484 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  485 
  486         len = (int) strlen(line);
  487 
  488         /*
  489          * trim article body and sig (not verbatim blocks):
  490          * - skip leading blank lines
  491          * - replace multiple blank lines with one empty line
  492          * - skip tailing blank lines, keep one if an
  493          *   attachment follows
  494          */
  495         if (curr_group->attribute->trim_article_body && !in_uue && !in_verbatim && !verbatim_begin) {
  496             len_blank = 1;
  497             tmpline = line;
  498             /* check if line contains only whitespace */
  499             while ((*tmpline == ' ') || (*tmpline == '\t')) {
  500                 len_blank++;
  501                 tmpline++;
  502             }
  503             if (len_blank == len) {     /* line is blank */
  504                 if (lines_left == 0 && (curr_group->attribute->trim_article_body & SKIP_TRAILING)) {
  505                     if (!(part->next == NULL || (STRIP_ALTERNATIVE(art) && !IS_PLAINTEXT(part->next))))
  506                         put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
  507                     continue;
  508                 }
  509                 if (first_line_blank) {
  510                     if (curr_group->attribute->trim_article_body & SKIP_LEADING)
  511                         continue;
  512                 } else if ((curr_group->attribute->trim_article_body & (COMPACT_MULTIPLE | SKIP_TRAILING)) && (!in_sig || curr_group->attribute->show_signatures)) {
  513                     lines_skipped++;
  514                     if (lines_left == 0 && !(curr_group->attribute->trim_article_body & SKIP_TRAILING)) {
  515                         for (; lines_skipped > 0; lines_skipped--)
  516                             put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
  517                     }
  518                     continue;
  519                 }
  520             } else {    /* line is not blank */
  521                 if (first_line_blank)
  522                     first_line_blank = FALSE;
  523                 if (lines_skipped && (!in_sig || curr_group->attribute->show_signatures)) {
  524                     if (strcmp(line, SIGDASHES) != 0 || curr_group->attribute->show_signatures) {
  525                         if (curr_group->attribute->trim_article_body & COMPACT_MULTIPLE)
  526                             put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
  527                         else
  528                             put_blank_lines = TRUE;
  529                     } else if (!(curr_group->attribute->trim_article_body & SKIP_TRAILING))
  530                         put_blank_lines = TRUE;
  531                     if (put_blank_lines) {
  532                         for (; lines_skipped > 0; lines_skipped--)
  533                             put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
  534                     }
  535                     put_blank_lines = FALSE;
  536                     lines_skipped = 0;
  537                 }
  538             }
  539         } /* if (tinrc.trim_article_body...) */
  540 
  541         /* look for verbatim marks, set in_verbatim only for lines in between */
  542         if (curr_group->attribute->verbatim_handling) {
  543             if (verbatim_begin) {
  544                 in_verbatim = TRUE;
  545                 verbatim_begin = FALSE;
  546             } else if (!in_sig && !in_uue && !in_verbatim && MATCH_REGEX(verbatim_begin_regex, line, len))
  547                 verbatim_begin = TRUE;
  548             if (in_verbatim && MATCH_REGEX(verbatim_end_regex, line, len))
  549                 in_verbatim = FALSE;
  550         }
  551 
  552         if (!in_verbatim) {
  553             /*
  554              * Detect and skip signatures if necessary
  555              */
  556             if (!in_sig) {
  557                 if (strcmp(line, SIGDASHES) == 0) {
  558                     in_sig = TRUE;
  559                     if (in_uue) {
  560                         in_uue = FALSE;
  561                         if (hide_uue)
  562                             PUT_UUE(curruue, _(txt_incomplete));
  563                     }
  564                 }
  565             }
  566 
  567             if (in_sig && !(curr_group->attribute->show_signatures))
  568                 continue;                   /* No further processing needed */
  569 
  570             /*
  571              * Detect and process uuencoded sections
  572              * Look for the start or the end of a uuencoded section
  573              *
  574              * TODO: look for a tailing size line after end (non standard
  575              *       extension)?
  576              */
  577             if (match_regex_ex(line, len, 0, 0, &uubegin_regex) >= 0) {
  578                 REGEX_SIZE *ovector = regex_get_ovector_pointer(&uubegin_regex);
  579 
  580                 in_uue = TRUE;
  581                 curruue = new_uue(&part, line + ovector[1]);
  582                 if (hide_uue)
  583                     continue;               /* Don't cook the 'begin' line */
  584             } else if (strncmp(line, "end\n", 4) == 0) {
  585                 if (in_uue) {
  586                     in_uue = FALSE;
  587                     if (hide_uue) {
  588                         PUT_UUE(curruue, "");
  589                         continue;           /* Don't cook the 'end' line */
  590                     }
  591                 }
  592             }
  593 
  594             /*
  595              * See if this line looks like a uuencoded 'body' line
  596              */
  597             is_uubody = FALSE;
  598 
  599             if (MATCH_REGEX(uubody_regex, line, len)) {
  600                 int sum = (((*line) - ' ') & 077) * 4 / 3;      /* uuencode octet checksum */
  601 
  602                 /* sum = 0 in a uubody only on the last line, a single ` */
  603                 if (sum == 0 && len == 1 + 1)           /* +1 for the \n */
  604                     is_uubody = TRUE;
  605                 else if (len == sum + 1 + 1)
  606                     is_uubody = TRUE;
  607 #ifdef DEBUG_ART
  608                 if (debug & DEBUG_MISC)
  609                     fprintf(stderr, "%s sum=%d len=%d (%s)\n", bool_unparse(is_uubody), sum, len, line);
  610 #endif /* DEBUG_ART */
  611             }
  612 
  613             if (in_uue) {
  614                 if (is_uubody)
  615                     curruue->line_count++;
  616                 else {
  617                     if (line[0] == '\n') {      /* Blank line in a uubody - definitely a failure */
  618                         /* fprintf(stderr, "not a uue line while reading a uue body?\n"); */
  619                         in_uue = FALSE;
  620                         if (hide_uue)
  621                             /* don't continue here, so we see the line that 'broke' in_uue */
  622                             PUT_UUE(curruue, _(txt_incomplete));
  623                     }
  624                 }
  625             } else {
  626                 /*
  627                  * UUE_ALL = 'Try harder' - we never saw a begin line, but useful
  628                  * when uue sections are split across > 1 article
  629                  */
  630                 if (is_uubody && hide_uue == UUE_ALL) {
  631                     char name[] = N_("(unknown)");
  632 
  633                     curruue = new_uue(&part, name);
  634                     curruue->line_count++;
  635                     in_uue = TRUE;
  636                     continue;
  637                 }
  638             }
  639 
  640             /*
  641              * Skip output if we're hiding uue or the sig
  642              */
  643             if (in_uue && hide_uue)
  644                 continue;   /* No further processing needed */
  645         }
  646 
  647         flags = in_verbatim ? C_VERBATIM : in_sig ? C_SIG : C_BODY;
  648 
  649         /*
  650          * Don't do any further handling of uue || verbatim lines
  651          */
  652         if (in_uue) {
  653             put_cooked(max_line_len, wrap_lines, flags, "%s", line);
  654             continue;
  655         } else if (in_verbatim) {
  656             expand_ctrl_chars(&line, &max_line_len, 8);
  657             put_cooked(max_line_len, wrap_lines, flags, "%s", line);
  658             continue;
  659         }
  660 
  661 #ifdef HAVE_COLOR
  662         /* keep order in sync with color.c:draw_pager_line() */
  663         if (quote_regex3.re) {
  664             if (MATCH_REGEX(quote_regex3, line, len))
  665                 flags |= C_QUOTE3;
  666             else if (quote_regex2.re) {
  667                 if (MATCH_REGEX(quote_regex2, line, len))
  668                     flags |= C_QUOTE2;
  669                 else if (curr_group->attribute->extquote_handling && extquote_regex.re) {
  670                     if (MATCH_REGEX(extquote_regex, line, len))
  671                         flags |= C_EXTQUOTE;
  672                     else if (quote_regex.re) {
  673                         if (MATCH_REGEX(quote_regex, line, len))
  674                             flags |= C_QUOTE1;
  675                     }
  676                 } else if (quote_regex.re) {
  677                     if (MATCH_REGEX(quote_regex, line, len))
  678                         flags |= C_QUOTE1;
  679                 }
  680             }
  681         }
  682 #endif /* HAVE_COLOR */
  683 
  684         if (MATCH_REGEX(url_regex, line, len))
  685             flags |= C_URL;
  686         if (MATCH_REGEX(mail_regex, line, len))
  687             flags |= C_MAIL;
  688         if (MATCH_REGEX(news_regex, line, len))
  689             flags |= C_NEWS;
  690 
  691         if (expand_ctrl_chars(&line, &max_line_len, tabwidth))
  692             flags |= C_CTRLL;               /* Line contains form-feed */
  693 
  694         buf = line;
  695 
  696         /*
  697          * Skip over the first space in case of Format=Flowed (space-stuffing)
  698          */
  699         if (part->format == FORMAT_FLOWED) {
  700             if (line[0] == ' ')
  701                 ++buf;
  702         }
  703 
  704         put_cooked(max_line_len, wrap_lines && (!IS_LOCAL_CHARSET("Big5")), flags, "%s", buf);
  705     } /* while */
  706 
  707     /*
  708      * Were we reading uue and ran off the end ?
  709      */
  710     if (in_uue && hide_uue)
  711         PUT_UUE(curruue, _(txt_incomplete));
  712 
  713     free(line);
  714 }
  715 
  716 
  717 /*
  718  * Return TRUE if this header should be printed as per
  719  * news_headers_to_[not_]display
  720  */
  721 static t_bool
  722 header_wanted(
  723     const char *line)
  724 {
  725     int i;
  726     t_bool ret = FALSE;
  727 
  728     if (curr_group->attribute->headers_to_display->num && (curr_group->attribute->headers_to_display->header[0][0] == '*'))
  729         ret = TRUE; /* wild do */
  730     else {
  731         for (i = 0; i < curr_group->attribute->headers_to_display->num; i++) {
  732             if (!strncasecmp(line, curr_group->attribute->headers_to_display->header[i], strlen(curr_group->attribute->headers_to_display->header[i]))) {
  733                 ret = TRUE;
  734                 break;
  735             }
  736         }
  737     }
  738 
  739     if (curr_group->attribute->headers_to_not_display->num && (curr_group->attribute->headers_to_not_display->header[0][0] == '*'))
  740         ret = FALSE; /* wild don't: doesn't make sense! */
  741     else {
  742         for (i = 0; i < curr_group->attribute->headers_to_not_display->num; i++) {
  743             if (!strncasecmp(line, curr_group->attribute->headers_to_not_display->header[i], strlen(curr_group->attribute->headers_to_not_display->header[i]))) {
  744                 ret = FALSE;
  745                 break;
  746             }
  747         }
  748     }
  749 
  750     return ret;
  751 }
  752 
  753 
  754 /* #define DEBUG_ART    1 */
  755 #ifdef DEBUG_ART
  756 static void
  757 dump_cooked(
  758     void)
  759 {
  760     char *line;
  761     int i;
  762 
  763     for (i = 0; i < art->cooked_lines; i++) {
  764         fseek(art->cooked, art->cookl[i].offset, SEEK_SET);
  765         line = tin_fgets(art->cooked, FALSE);
  766         fprintf(stderr, "[%3d] %4ld %3x [%s]\n", i, art->cookl[i].offset, art->cookl[i].flags, line);
  767     }
  768     fprintf(stderr, "%d lines cooked\n", art->cooked_lines);
  769 }
  770 #endif /* DEBUG_ART */
  771 
  772 
  773 /*
  774  * Check for charsets which may contain NULL bytes and thus break string
  775  * functions. Possibly incomplete.
  776  *
  777  * TODO: fix the other code to handle those charsets properly.
  778  */
  779 static t_bool
  780 charset_unsupported(
  781     const char *charset)
  782 {
  783     static const char *charsets[] = {
  784         "csUnicode",    /* alias for ISO-10646-UCS-2 */
  785         "csUCS4",       /* alias for ISO-10646-UCS-4 */
  786         "ISO-10646-UCS-2",
  787         "ISO-10646-UCS-4",
  788         "UTF-16",       /* covers also BE/LE */
  789         "UTF-32",       /* covers also BE/LE */
  790         NULL };
  791     const char **charsetptr = charsets;
  792     t_bool ret = FALSE;
  793 
  794     if (!charset)
  795         return ret;
  796 
  797     do {
  798         if (!strncasecmp(charset, *charsetptr, strlen(*charsetptr)))
  799             ret = TRUE;
  800     } while (!ret && *(++charsetptr) != NULL);
  801 
  802     return ret;
  803 }
  804 
  805 
  806 /*
  807  * 'cooks' an article, ie, prepare what will actually appear on the screen
  808  * It is not easy to do this in the same pass as the initial read since
  809  * boundary conditions for multipart articles make it harder to do on the
  810  * fly decoding.
  811  * We could have cooked the headers whilst they were being read but we're
  812  * trying to keep this simple.
  813  *
  814  * Expects:
  815  *      Fresh article context to write into
  816  *      parse_uue is set only when the art is opened to create t_parts for
  817  *      uue sections found, when resizing this is not needed
  818  *      hide_uue determines the folding of uue sections
  819  * Handles:
  820  *      multipart articles
  821  *      stripping of non text sections if skip_alternative
  822  *      Q and B decoding of text sections
  823  *      handling of uuencoded sections
  824  *      stripping of sigs if !show_signatures
  825  * Returns:
  826  *      TRUE on success
  827  *
  828  * TODO:
  829  *      give an error-message on at least disk-full
  830  */
  831 t_bool
  832 cook_article(
  833     t_bool wrap_lines,
  834     t_openartinfo *artinfo,
  835     int hide_uue,
  836     t_bool show_all_headers)
  837 {
  838     const char *charset;
  839     const char *name;
  840     char *line;
  841     struct t_header *hdr = &artinfo->hdr;
  842     t_bool header_put = FALSE;
  843     static const char *struct_header[] = {
  844         "Approved: ", "From: ", "Originator: ",
  845         "Reply-To: ", "Sender: ", "X-Cancelled-By: ", "X-Comment-To: ",
  846         "X-Submissions-To: ", "To: ", "Cc: ", "Bcc: ", "X-Originator: ", NULL };
  847 
  848     art = artinfo;              /* Global saves lots of passing artinfo around */
  849 
  850     if (!(art->cooked = tmpfile()))
  851         return FALSE;
  852 
  853     art->cooked_lines = 0;
  854 
  855     rewind(artinfo->raw);
  856 
  857     /*
  858      * Put down just the headers we want
  859      */
  860     while ((line = tin_fgets(artinfo->raw, TRUE)) != NULL) {
  861         if (line[0] == '\0') {              /* End of headers? */
  862             if (STRIP_ALTERNATIVE(artinfo)) {
  863                 if (header_wanted(_(txt_info_x_conversion_note))) {
  864                     header_put = TRUE;
  865                     put_cooked(LEN, wrap_lines, C_HEADER, _(txt_info_x_conversion_note));
  866                 }
  867             }
  868             if (header_put)
  869                 put_cooked(1, TRUE, 0, "\n");       /* put a newline after headers */
  870             break;
  871         }
  872 
  873         if (show_all_headers || header_wanted(line)) {  /* Put cooked data */
  874             const char **strptr = struct_header;
  875             char *l = NULL, *ptr, *foo, *bar;
  876             size_t i = LEN;
  877             t_bool found = FALSE;
  878 
  879             /* structured headers */
  880             do {
  881                 if (!strncasecmp(line, *strptr, strlen(*strptr))) {
  882                     foo = my_strdup(*strptr);
  883                     if ((ptr = strchr(foo, ':'))) {
  884                         *ptr = '\0';
  885                         unfold_header(line);
  886                         if ((ptr = parse_header(line, foo, TRUE, TRUE, FALSE))) {
  887 #if 0
  888                             /*
  889                              * TODO:
  890                              * idna_decode() currently expects just a FQDN
  891                              * or a mailaddress (with all comments stripped).
  892                              *
  893                              * we need to look for something like
  894                              * (?i)((?:\S+\.)?xn--[a-z0-9\.\-]{3,}\S+)\b
  895                              * and just decode $1
  896                              * maybe also in process_text_body_part()
  897                              */
  898                             bar = idna_decode(ptr);
  899 #else
  900                             bar = my_strdup(ptr);
  901 #endif /* 0 */
  902                             l = my_calloc(1, strlen(bar) + strlen(*strptr) + 1);
  903                             strncpy(l, line, strlen(*strptr));
  904                             strcat(l, bar);
  905                             free(bar);
  906                         }
  907                     }
  908                     free(foo);
  909                     found = TRUE;
  910                 }
  911             } while (!found && *(++strptr) != NULL);
  912 
  913             /* unstructured but must not be decoded */
  914             if (l == NULL && (!strncasecmp(line, "References: ", 12) || !strncasecmp(line, "Message-ID: ", 12) || !strncasecmp(line, "Date: ", 6) || !strncasecmp(line, "Newsgroups: ", 12) || !strncasecmp(line, "Distribution: ", 14) || !strncasecmp(line, "Followup-To: ", 13) || !strncasecmp(line, "X-Face: ", 8) || !strncasecmp(line, "Cancel-Lock: ", 13) || !strncasecmp(line, "Cancel-Key: ", 12) || !strncasecmp(line, "Supersedes: ", 12)))
  915                 l = my_strdup(line);
  916 
  917             if (l == NULL)
  918                 l = my_strdup(rfc1522_decode(line));
  919 
  920 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  921             if (IS_LOCAL_CHARSET("UTF-8"))
  922                 utf8_valid(l);
  923 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  924             header_put = TRUE;
  925             expand_ctrl_chars(&l, &i, tabwidth);
  926             put_cooked(i, wrap_lines, C_HEADER, "%s", l);
  927             free(l);
  928         }
  929     }
  930 
  931     if (tin_errno != 0)
  932         return FALSE;
  933 
  934     /*
  935      * Process the attachments in turn, print a neato header, and process/decode
  936      * the body if of text type
  937      */
  938     if (hdr->mime && hdr->ext->type == TYPE_MULTIPART) {
  939         t_part *ptr;
  940 
  941         for (ptr = hdr->ext->next; ptr != NULL; ptr = ptr->next) {
  942             /*
  943              * Ignore non text/plain sections with alternative handling
  944              */
  945             if (STRIP_ALTERNATIVE(artinfo) && !IS_PLAINTEXT(ptr))
  946                 continue;
  947 
  948             name = get_filename(ptr->params);
  949             if (!strcmp(content_types[ptr->type], "text"))
  950                 charset = get_param(ptr->params, "charset");
  951             else
  952                 charset = NULL;
  953             PUT_ATTACH(ptr, (ptr->depth - 1) * 4, name, charset);
  954 
  955             /* Try to view anything of type text, may need to review this */
  956             if (IS_PLAINTEXT(ptr)) {
  957                 if (charset_unsupported(charset)) {
  958                     put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), (ptr->depth - 1) * 4, "", charset);
  959                     if (ptr->next)
  960                         put_cooked(1, wrap_lines, C_ATTACH, "\n");
  961                 } else
  962                     process_text_body_part(wrap_lines, artinfo->raw, ptr, hide_uue);
  963             }
  964         }
  965     } else {
  966         if (!strcmp(content_types[hdr->ext->type], "text"))
  967             charset = get_param(hdr->ext->params, "charset");
  968         else
  969             charset = NULL;
  970         /*
  971          * A regular single-body article
  972          */
  973         if (IS_PLAINTEXT(hdr->ext)) {
  974             if (charset_unsupported(charset))
  975                 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), 0, "", charset);
  976             else
  977                 process_text_body_part(wrap_lines, artinfo->raw, hdr->ext, hide_uue);
  978         } else {
  979             /*
  980              * Non-textual main body
  981              */
  982             name = get_filename(hdr->ext->params);
  983             PUT_ATTACH(hdr->ext, 0, name, charset);
  984         }
  985     }
  986 
  987 #ifdef DEBUG_ART
  988     dump_cooked();
  989 #endif /* DEBUG_ART */
  990 
  991     if (art->cooked_lines > 0)
  992         art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * (size_t) art->cooked_lines);
  993 
  994     rewind(art->cooked);
  995     return (tin_errno != 0) ? FALSE : TRUE;
  996 }