A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 /* 2 * Project : tin - a Usenet reader 3 * Module : cook.c 4 * Author : J. Faultless 5 * Created : 2000-03-08 6 * Updated : 2021-03-13 7 * Notes : Split from page.c 8 * 9 * Copyright (c) 2000-2022 Jason Faultless <jason@altarstone.com> 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * 1. Redistributions of source code must retain the above copyright notice, 17 * this list of conditions and the following disclaimer. 18 * 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * 3. Neither the name of the copyright holder nor the names of its 24 * contributors may be used to endorse or promote products derived from 25 * this software without specific prior written permission. 26 * 27 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #ifndef TIN_H 41 # include "tin.h" 42 #endif /* !TIN_H */ 43 #ifndef TCURSES_H 44 # include "tcurses.h" 45 #endif /* !TCURSES_H */ 46 47 48 /* 49 * We malloc() this many t_lineinfo's at a time 50 */ 51 #define CHUNK 50 52 53 #define STRIP_ALTERNATIVE(x) \ 54 (curr_group->attribute->alternative_handling && \ 55 (x)->hdr.ext->type == TYPE_MULTIPART && \ 56 strcasecmp("alternative", (x)->hdr.ext->subtype) == 0) 57 58 #define MATCH_REGEX(x,y,z) (pcre_exec(x.re, x.extra, y, z, 0, 0, NULL, 0) >= 0) 59 60 61 static t_bool charset_unsupported(const char *charset); 62 static t_bool header_wanted(const char *line); 63 static t_part *new_uue(t_part **part, char *name); 64 static void process_text_body_part(t_bool wrap_lines, FILE *in, t_part *part, int hide_uue); 65 static void put_cooked(size_t buf_len, t_bool wrap_lines, int flags, const char *fmt, ...); 66 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 67 static t_bool wexpand_ctrl_chars(wchar_t **wline, size_t *length, size_t lcook_width); 68 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 69 #ifdef DEBUG_ART 70 static void dump_cooked(void); 71 #endif /* DEBUG_ART */ 72 73 74 /* 75 * These are used globally within this module for access to the context 76 * currently being built. They must not leak outside. 77 */ 78 static t_openartinfo *art; 79 80 81 /* 82 * Handle backspace, expand tabs, expand control chars to a literal ^[A-Z] 83 * Allows \n through 84 * Return TRUE if line contains a ^L (form-feed) 85 */ 86 t_bool 87 expand_ctrl_chars( 88 char **line, 89 size_t *length, 90 size_t lcook_width) 91 { 92 t_bool ctrl_L = FALSE; 93 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 94 wchar_t *wline = char2wchar_t(*line); 95 size_t wlen; 96 97 /* 98 * remove the assert() before release 99 * it should help us find problems with wide-char strings 100 * in the development branch 101 */ 102 assert(wline != NULL); 103 wlen = wcslen(wline); 104 ctrl_L = wexpand_ctrl_chars(&wline, &wlen, lcook_width); 105 free(*line); 106 *line = wchar_t2char(wline); 107 free(wline); 108 assert(line != NULL); 109 *length = strlen(*line); 110 #else 111 int curr_len = LEN; 112 unsigned int i = 0, j, ln = 0; 113 char *buf = my_malloc(curr_len); 114 unsigned char *c; 115 116 c = (unsigned char *) *line; 117 while (*c) { 118 if (i > curr_len - (lcook_width + 1)) { 119 curr_len <<= 1; 120 buf = my_realloc(buf, curr_len); 121 } 122 if (*c == '\n') 123 ln = i + 1; 124 if (*c == '\t') { /* expand tabs */ 125 j = i + lcook_width - ((i - ln) % lcook_width); 126 for (; i < j; i++) 127 buf[i] = ' '; 128 } else if (((*c) & 0xFF) < ' ' && *c != '\n' && (!IS_LOCAL_CHARSET("Big5") || *c != 27)) { /* literal ctrl chars */ 129 buf[i++] = '^'; 130 buf[i++] = ((*c) & 0xFF) + '@'; 131 if (*c == '\f') /* ^L detected */ 132 ctrl_L = TRUE; 133 } else { 134 if (!my_isprint(*c) && *c != '\n') 135 buf[i++] = '?'; 136 else 137 buf[i++] = *c; 138 } 139 c++; 140 } 141 buf[i] = '\0'; 142 *length = i + 1; 143 *line = my_realloc(*line, *length); 144 strcpy(*line, buf); 145 free(buf); 146 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 147 return ctrl_L; 148 } 149 150 151 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 152 static t_bool 153 wexpand_ctrl_chars( 154 wchar_t **wline, 155 size_t *length, 156 size_t lcook_width) 157 { 158 size_t cur_len = LEN, i = 0, j, ln = 0; 159 wchar_t *wbuf = my_malloc(cur_len * sizeof(wchar_t)); 160 wchar_t *wc; 161 t_bool ctrl_L = FALSE; 162 163 wc = *wline; 164 while (*wc) { 165 if (i > cur_len - (lcook_width + 1)) { 166 cur_len <<= 1; 167 wbuf = my_realloc(wbuf, cur_len * sizeof(wchar_t)); 168 } 169 if (*wc == '\n') 170 ln = i + 1; 171 if (*wc == '\t') { /* expand_tabs */ 172 j = i + lcook_width - ((i - ln) % lcook_width); 173 for (; i < j; i++) 174 wbuf[i] = ' '; 175 } else if (*wc < ' ' && *wc != '\n' && (!IS_LOCAL_CHARSET("Big5") || *wc != 27)) { /* literal ctrl chars */ 176 wbuf[i++] = '^'; 177 wbuf[i++] = *wc + '@'; 178 if (*wc == '\f') /* ^L detected */ 179 ctrl_L = TRUE; 180 } else { 181 if (!iswprint((wint_t) *wc) && *wc != '\n') 182 wbuf[i++] = '?'; 183 else 184 wbuf[i++] = *wc; 185 } 186 wc++; 187 } 188 wbuf[i] = '\0'; 189 *length = i + 1; 190 *wline = my_realloc(*wline, *length * sizeof(wchar_t)); 191 wcscpy(*wline, wbuf); 192 free(wbuf); 193 return ctrl_L; 194 } 195 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 196 197 198 /* 199 * Output text to the cooked stream. Wrap lines as necessary. 200 * Update the line count and the array of line offsets 201 * Extend the lineoffset array as needed in CHUNK amounts. 202 * flags are 'hints' to the pager about line content. 203 * buf_len is the size put_cooked should use for its buffer. 204 */ 205 static void 206 put_cooked( 207 size_t buf_len, 208 t_bool wrap_lines, 209 int flags, 210 const char *fmt, 211 ...) 212 { 213 char *p, *bufp, *buf; 214 int wrap_column; 215 int space; 216 static int saved_flags = 0; 217 va_list ap; 218 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 219 int bytes; 220 wint_t *wp; 221 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 222 223 buf = my_malloc(buf_len + 1); 224 225 va_start(ap, fmt); 226 vsnprintf(buf, buf_len + 1, fmt, ap); 227 228 if (tinrc.wrap_column < 0) 229 wrap_column = ((tinrc.wrap_column > -cCOLS) ? cCOLS + tinrc.wrap_column : cCOLS); 230 else 231 #if 1 232 wrap_column = ((tinrc.wrap_column > 0) ? tinrc.wrap_column : cCOLS); 233 #else /* never cut off long lines */ 234 wrap_column = (((tinrc.wrap_column > 0) && (tinrc.wrap_column < cCOLS)) ? tinrc.wrap_column : cCOLS); 235 #endif /* 1 */ 236 237 p = bufp = buf; 238 239 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 240 wp = my_malloc((MB_CUR_MAX + 1) * sizeof(wint_t)); 241 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 242 243 while (*p) { 244 if (wrap_lines) { 245 space = wrap_column; 246 while (space > 0 && *p && *p != '\n') { 247 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 248 if ((bytes = mbtowc((wchar_t *) wp, p, MB_CUR_MAX)) > 0) { 249 if ((space -= wcwidth((wchar_t) *wp)) < 0) 250 break; 251 p += bytes; 252 } else 253 p++; 254 #else 255 p++; 256 space--; 257 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 258 } 259 } else { 260 while (*p && *p != '\n') 261 p++; 262 } 263 fwrite(bufp, 1, (size_t) (p - bufp), art->cooked); 264 fputs("\n", art->cooked); 265 if (*p == '\n') 266 p++; 267 bufp = p; 268 269 if (art->cooked_lines == 0) { 270 art->cookl = my_malloc(sizeof(t_lineinfo) * CHUNK); 271 art->cookl[0].offset = 0; 272 } 273 274 /* 275 * Pick up flags from a previous partial write 276 */ 277 art->cookl[art->cooked_lines].flags = flags | saved_flags; 278 saved_flags = 0; 279 art->cooked_lines++; 280 281 /* 282 * Grow the array of lines if needed - we resize it properly at the end 283 */ 284 if (art->cooked_lines % CHUNK == 0) 285 art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * CHUNK * (size_t) ((art->cooked_lines / CHUNK) + 1)); 286 287 art->cookl[art->cooked_lines].offset = ftell(art->cooked); 288 } 289 290 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 291 free(wp); 292 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 293 294 /* 295 * If there is anything left over, then it must be a non \n terminated 296 * partial line from base64 decoding etc.. Dump it now and the rest of 297 * the line (with the \n) will fill in the t_lineinfo 298 * We must save the flags now as the rest of the line may not have the same properties 299 * We need to keep the length for accounting purposes 300 */ 301 if (*bufp != '\0') { 302 fputs(bufp, art->cooked); 303 saved_flags = flags; 304 } 305 306 va_end(ap); 307 free(buf); 308 } 309 310 311 /* 312 * Add a new uuencode attachment description to the current part 313 */ 314 static t_part * 315 new_uue( 316 t_part **part, 317 char *name) 318 { 319 t_part *ptr = new_part((*part)->uue); 320 321 if (!(*part)->uue) /* new_part() is simple and doesn't attach list heads */ 322 (*part)->uue = ptr; 323 324 free_list(ptr->params); 325 /* 326 * Load the name into the parameter list 327 */ 328 ptr->params = new_params(); 329 ptr->params->name = my_strdup("name"); 330 ptr->params->value = my_strdup(str_trim(name)); 331 332 ptr->encoding = ENCODING_UUE; /* treat as x-uuencode */ 333 334 ptr->offset = ftell(art->cooked); 335 ptr->depth = (*part)->depth; /* uue is at the same depth as the envelope */ 336 337 /* 338 * If an extension is present, try and add a Content-Type 339 */ 340 if ((name = strrchr(name, '.')) != NULL) 341 lookup_mimetype(name + 1, ptr); 342 343 return ptr; 344 } 345 346 347 /* 348 * Get the suggested filename for an attachment. RFC says Content-Disposition 349 * 'filename' supersedes Content-Type 'name'. We must also remove path 350 * information. 351 */ 352 const char * 353 get_filename( 354 t_param *ptr) 355 { 356 const char *name; 357 char *p; 358 359 if (!(name = get_param(ptr, "filename"))) { 360 if (!(name = get_param(ptr, "name"))) 361 return NULL; 362 } 363 364 if ((p = strrchr(name, DIRSEP))) 365 return p + 1; 366 367 return name; 368 } 369 370 371 #define PUT_UUE(part, qualifier_text) \ 372 put_cooked(LEN, wrap_lines, C_UUE, _(txt_uue), \ 373 part->depth ? (part->depth - 1) * 4 : 0, "", \ 374 content_types[part->type], part->subtype, \ 375 qualifier_text, part->line_count, get_filename(part->params)) 376 377 #define PUT_ATTACH(part, depth, name, charset) \ 378 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach), \ 379 depth, "", \ 380 content_types[part->type], part->subtype, \ 381 content_encodings[part->encoding], \ 382 charset ? _(txt_attach_charset) : "", BlankIfNull(charset), \ 383 part->line_count, \ 384 name ? _(txt_name) : "", BlankIfNull(name)); \ 385 \ 386 if (part->description) \ 387 put_cooked(LEN, wrap_lines, C_ATTACH, \ 388 _(txt_attach_description), \ 389 depth, "", \ 390 part->description); \ 391 if (part->next != NULL || IS_PLAINTEXT(part)) \ 392 put_cooked(1, wrap_lines, C_ATTACH, "\n") 393 394 /* 395 * Decodes text bodies, remove sig's, detects uuencoded sections 396 */ 397 static void 398 process_text_body_part( 399 t_bool wrap_lines, 400 FILE *in, 401 t_part *part, 402 int hide_uue) 403 { 404 char *rest = NULL; 405 char *line = NULL, *buf, *tmpline; 406 const char *ncharset; 407 size_t max_line_len = 0; 408 int flags, len, lines_left, len_blank; 409 int offsets[6]; 410 int size_offsets = ARRAY_SIZE(offsets); 411 unsigned int lines_skipped = 0; 412 t_bool in_sig = FALSE; /* Set when in sig portion */ 413 t_bool in_uue = FALSE; /* Set when in uuencoded section */ 414 t_bool in_verbatim = FALSE; /* Set when in verbatim section */ 415 t_bool verbatim_begin = FALSE; /* Set when verbatim_begin_regex matches */ 416 t_bool is_uubody; /* Set when current line looks like a uuencoded line */ 417 t_bool first_line_blank = TRUE; /* Unset when first non-blank line is reached */ 418 t_bool put_blank_lines = FALSE; /* Set when previously skipped lines needs to put */ 419 t_part *curruue = NULL; 420 421 if (part->uue) { /* These are redone each time we recook/resize etc.. */ 422 free_parts(part->uue); 423 part->uue = NULL; 424 } 425 426 fseek(in, part->offset, SEEK_SET); 427 428 if (part->encoding == ENCODING_BASE64) 429 (void) mmdecode(NULL, 'b', 0, NULL); /* flush */ 430 431 lines_left = part->line_count; 432 while ((lines_left > 0) || rest) { 433 switch (part->encoding) { 434 case ENCODING_BASE64: 435 lines_left -= read_decoded_base64_line(in, &line, &max_line_len, lines_left, &rest); 436 break; 437 438 case ENCODING_QP: 439 lines_left -= read_decoded_qp_line(in, &line, &max_line_len, lines_left); 440 break; 441 442 default: 443 if ((buf = tin_fgets(in, FALSE)) == NULL) { 444 FreeAndNull(line); 445 break; 446 } 447 448 /* 449 * tin_fgets() uses the returned space also internally 450 * so it's not advisable to use it for our own purposes 451 * especially if we must resize it. 452 * So copy buf to line (and resize line if necessary). 453 */ 454 if (max_line_len < strlen(buf) + 2 || !line) { 455 max_line_len = strlen(buf) + 2; 456 line = my_realloc(line, max_line_len); 457 } 458 strcpy(line, buf); 459 460 /* 461 * FIXME: Some code in cook.c expects a '\n' at the end 462 * of the line. As tin_fgets() strips trailing '\n', re-add it. 463 * This should probably be fixed in that other code. 464 */ 465 strcat(line, "\n"); 466 467 lines_left--; 468 break; 469 } 470 if (!(line && strlen(line))) { 471 FreeIfNeeded(rest); 472 break; /* premature end of file, file error etc. */ 473 } 474 475 /* convert network to local charset, tex2iso, iso2asc etc. */ 476 ncharset = get_param(part->params, "charset"); 477 process_charsets(&line, &max_line_len, ncharset ? ncharset : "US-ASCII", tinrc.mm_local_charset, curr_group->attribute->tex2iso_conv && art->tex2iso); 478 479 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 480 if (IS_LOCAL_CHARSET("UTF-8")) 481 utf8_valid(line); 482 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 483 484 len = (int) strlen(line); 485 486 /* 487 * trim article body and sig (not verbatim blocks): 488 * - skip leading blank lines 489 * - replace multiple blank lines with one empty line 490 * - skip tailing blank lines, keep one if an 491 * attachment follows 492 */ 493 if (curr_group->attribute->trim_article_body && !in_uue && !in_verbatim && !verbatim_begin) { 494 len_blank = 1; 495 tmpline = line; 496 /* check if line contains only whitespace */ 497 while ((*tmpline == ' ') || (*tmpline == '\t')) { 498 len_blank++; 499 tmpline++; 500 } 501 if (len_blank == len) { /* line is blank */ 502 if (lines_left == 0 && (curr_group->attribute->trim_article_body & SKIP_TRAILING)) { 503 if (!(part->next == NULL || (STRIP_ALTERNATIVE(art) && !IS_PLAINTEXT(part->next)))) 504 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 505 continue; 506 } 507 if (first_line_blank) { 508 if (curr_group->attribute->trim_article_body & SKIP_LEADING) 509 continue; 510 } else if ((curr_group->attribute->trim_article_body & (COMPACT_MULTIPLE | SKIP_TRAILING)) && (!in_sig || curr_group->attribute->show_signatures)) { 511 lines_skipped++; 512 if (lines_left == 0 && !(curr_group->attribute->trim_article_body & SKIP_TRAILING)) { 513 for (; lines_skipped > 0; lines_skipped--) 514 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 515 } 516 continue; 517 } 518 } else { /* line is not blank */ 519 if (first_line_blank) 520 first_line_blank = FALSE; 521 if (lines_skipped && (!in_sig || curr_group->attribute->show_signatures)) { 522 if (strcmp(line, SIGDASHES) != 0 || curr_group->attribute->show_signatures) { 523 if (curr_group->attribute->trim_article_body & COMPACT_MULTIPLE) 524 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 525 else 526 put_blank_lines = TRUE; 527 } else if (!(curr_group->attribute->trim_article_body & SKIP_TRAILING)) 528 put_blank_lines = TRUE; 529 if (put_blank_lines) { 530 for (; lines_skipped > 0; lines_skipped--) 531 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 532 } 533 put_blank_lines = FALSE; 534 lines_skipped = 0; 535 } 536 } 537 } /* if (tinrc.trim_article_body...) */ 538 539 /* look for verbatim marks, set in_verbatim only for lines in between */ 540 if (curr_group->attribute->verbatim_handling) { 541 if (verbatim_begin) { 542 in_verbatim = TRUE; 543 verbatim_begin = FALSE; 544 } else if (!in_sig && !in_uue && !in_verbatim && MATCH_REGEX(verbatim_begin_regex, line, len)) 545 verbatim_begin = TRUE; 546 if (in_verbatim && MATCH_REGEX(verbatim_end_regex, line, len)) 547 in_verbatim = FALSE; 548 } 549 550 if (!in_verbatim) { 551 /* 552 * Detect and skip signatures if necessary 553 */ 554 if (!in_sig) { 555 if (strcmp(line, SIGDASHES) == 0) { 556 in_sig = TRUE; 557 if (in_uue) { 558 in_uue = FALSE; 559 if (hide_uue) 560 PUT_UUE(curruue, _(txt_incomplete)); 561 } 562 } 563 } 564 565 if (in_sig && !(curr_group->attribute->show_signatures)) 566 continue; /* No further processing needed */ 567 568 /* 569 * Detect and process uuencoded sections 570 * Look for the start or the end of a uuencoded section 571 * 572 * TODO: look for a tailing size line after end (non standard 573 * extension)? 574 */ 575 if (pcre_exec(uubegin_regex.re, uubegin_regex.extra, line, len, 0, 0, offsets, size_offsets) != PCRE_ERROR_NOMATCH) { 576 in_uue = TRUE; 577 curruue = new_uue(&part, line + offsets[1]); 578 if (hide_uue) 579 continue; /* Don't cook the 'begin' line */ 580 } else if (strncmp(line, "end\n", 4) == 0) { 581 if (in_uue) { 582 in_uue = FALSE; 583 if (hide_uue) { 584 PUT_UUE(curruue, ""); 585 continue; /* Don't cook the 'end' line */ 586 } 587 } 588 } 589 590 /* 591 * See if this line looks like a uuencoded 'body' line 592 */ 593 is_uubody = FALSE; 594 595 if (MATCH_REGEX(uubody_regex, line, len)) { 596 int sum = (((*line) - ' ') & 077) * 4 / 3; /* uuencode octet checksum */ 597 598 /* sum = 0 in a uubody only on the last line, a single ` */ 599 if (sum == 0 && len == 1 + 1) /* +1 for the \n */ 600 is_uubody = TRUE; 601 else if (len == sum + 1 + 1) 602 is_uubody = TRUE; 603 #ifdef DEBUG_ART 604 if (debug & DEBUG_MISC) 605 fprintf(stderr, "%s sum=%d len=%d (%s)\n", bool_unparse(is_uubody), sum, len, line); 606 #endif /* DEBUG_ART */ 607 } 608 609 if (in_uue) { 610 if (is_uubody) 611 curruue->line_count++; 612 else { 613 if (line[0] == '\n') { /* Blank line in a uubody - definitely a failure */ 614 /* fprintf(stderr, "not a uue line while reading a uue body?\n"); */ 615 in_uue = FALSE; 616 if (hide_uue) 617 /* don't continue here, so we see the line that 'broke' in_uue */ 618 PUT_UUE(curruue, _(txt_incomplete)); 619 } 620 } 621 } else { 622 /* 623 * UUE_ALL = 'Try harder' - we never saw a begin line, but useful 624 * when uue sections are split across > 1 article 625 */ 626 if (is_uubody && hide_uue == UUE_ALL) { 627 char name[] = N_("(unknown)"); 628 629 curruue = new_uue(&part, name); 630 curruue->line_count++; 631 in_uue = TRUE; 632 continue; 633 } 634 } 635 636 /* 637 * Skip output if we're hiding uue or the sig 638 */ 639 if (in_uue && hide_uue) 640 continue; /* No further processing needed */ 641 } 642 643 flags = in_verbatim ? C_VERBATIM : in_sig ? C_SIG : C_BODY; 644 645 /* 646 * Don't do any further handling of uue || verbatim lines 647 */ 648 if (in_uue) { 649 put_cooked(max_line_len, wrap_lines, flags, "%s", line); 650 continue; 651 } else if (in_verbatim) { 652 expand_ctrl_chars(&line, &max_line_len, 8); 653 put_cooked(max_line_len, wrap_lines, flags, "%s", line); 654 continue; 655 } 656 657 #ifdef HAVE_COLOR 658 /* keep order in sync with color.c:draw_pager_line() */ 659 if (quote_regex3.re) { 660 if (MATCH_REGEX(quote_regex3, line, len)) 661 flags |= C_QUOTE3; 662 else if (quote_regex2.re) { 663 if (MATCH_REGEX(quote_regex2, line, len)) 664 flags |= C_QUOTE2; 665 else if (curr_group->attribute->extquote_handling && extquote_regex.re) { 666 if (MATCH_REGEX(extquote_regex, line, len)) 667 flags |= C_EXTQUOTE; 668 else if (quote_regex.re) { 669 if (MATCH_REGEX(quote_regex, line, len)) 670 flags |= C_QUOTE1; 671 } 672 } else if (quote_regex.re) { 673 if (MATCH_REGEX(quote_regex, line, len)) 674 flags |= C_QUOTE1; 675 } 676 } 677 } 678 #endif /* HAVE_COLOR */ 679 680 if (MATCH_REGEX(url_regex, line, len)) 681 flags |= C_URL; 682 if (MATCH_REGEX(mail_regex, line, len)) 683 flags |= C_MAIL; 684 if (MATCH_REGEX(news_regex, line, len)) 685 flags |= C_NEWS; 686 687 if (expand_ctrl_chars(&line, &max_line_len, tabwidth)) 688 flags |= C_CTRLL; /* Line contains form-feed */ 689 690 buf = line; 691 692 /* 693 * Skip over the first space in case of Format=Flowed (space-stuffing) 694 */ 695 if (part->format == FORMAT_FLOWED) { 696 if (line[0] == ' ') 697 ++buf; 698 } 699 700 put_cooked(max_line_len, wrap_lines && (!IS_LOCAL_CHARSET("Big5")), flags, "%s", buf); 701 } /* while */ 702 703 /* 704 * Were we reading uue and ran off the end ? 705 */ 706 if (in_uue && hide_uue) 707 PUT_UUE(curruue, _(txt_incomplete)); 708 709 free(line); 710 } 711 712 713 /* 714 * Return TRUE if this header should be printed as per 715 * news_headers_to_[not_]display 716 */ 717 static t_bool 718 header_wanted( 719 const char *line) 720 { 721 int i; 722 t_bool ret = FALSE; 723 724 if (curr_group->attribute->headers_to_display->num && (curr_group->attribute->headers_to_display->header[0][0] == '*')) 725 ret = TRUE; /* wild do */ 726 else { 727 for (i = 0; i < curr_group->attribute->headers_to_display->num; i++) { 728 if (!strncasecmp(line, curr_group->attribute->headers_to_display->header[i], strlen(curr_group->attribute->headers_to_display->header[i]))) { 729 ret = TRUE; 730 break; 731 } 732 } 733 } 734 735 if (curr_group->attribute->headers_to_not_display->num && (curr_group->attribute->headers_to_not_display->header[0][0] == '*')) 736 ret = FALSE; /* wild don't: doesn't make sense! */ 737 else { 738 for (i = 0; i < curr_group->attribute->headers_to_not_display->num; i++) { 739 if (!strncasecmp(line, curr_group->attribute->headers_to_not_display->header[i], strlen(curr_group->attribute->headers_to_not_display->header[i]))) { 740 ret = FALSE; 741 break; 742 } 743 } 744 } 745 746 return ret; 747 } 748 749 750 /* #define DEBUG_ART 1 */ 751 #ifdef DEBUG_ART 752 static void 753 dump_cooked( 754 void) 755 { 756 char *line; 757 int i; 758 759 for (i = 0; i < art->cooked_lines; i++) { 760 fseek(art->cooked, art->cookl[i].offset, SEEK_SET); 761 line = tin_fgets(art->cooked, FALSE); 762 fprintf(stderr, "[%3d] %4ld %3x [%s]\n", i, art->cookl[i].offset, art->cookl[i].flags, line); 763 } 764 fprintf(stderr, "%d lines cooked\n", art->cooked_lines); 765 } 766 #endif /* DEBUG_ART */ 767 768 769 /* 770 * Check for charsets which may contain NULL bytes and thus break string 771 * functions. Possibly incomplete. 772 * 773 * TODO: fix the other code to handle those charsets properly. 774 */ 775 static t_bool 776 charset_unsupported( 777 const char *charset) 778 { 779 static const char *charsets[] = { 780 "csUnicode", /* alias for ISO-10646-UCS-2 */ 781 "csUCS4", /* alias for ISO-10646-UCS-4 */ 782 "ISO-10646-UCS-2", 783 "ISO-10646-UCS-4", 784 "UTF-16", /* covers also BE/LE */ 785 "UTF-32", /* covers also BE/LE */ 786 NULL }; 787 const char **charsetptr = charsets; 788 t_bool ret = FALSE; 789 790 if (!charset) 791 return ret; 792 793 do { 794 if (!strncasecmp(charset, *charsetptr, strlen(*charsetptr))) 795 ret = TRUE; 796 } while (!ret && *(++charsetptr) != NULL); 797 798 return ret; 799 } 800 801 802 /* 803 * 'cooks' an article, ie, prepare what will actually appear on the screen 804 * It is not easy to do this in the same pass as the initial read since 805 * boundary conditions for multipart articles make it harder to do on the 806 * fly decoding. 807 * We could have cooked the headers whilst they were being read but we're 808 * trying to keep this simple. 809 * 810 * Expects: 811 * Fresh article context to write into 812 * parse_uue is set only when the art is opened to create t_parts for 813 * uue sections found, when resizing this is not needed 814 * hide_uue determines the folding of uue sections 815 * Handles: 816 * multipart articles 817 * stripping of non text sections if skip_alternative 818 * Q and B decoding of text sections 819 * handling of uuencoded sections 820 * stripping of sigs if !show_signatures 821 * Returns: 822 * TRUE on success 823 * 824 * TODO: 825 * give an error-message on at least disk-full 826 */ 827 t_bool 828 cook_article( 829 t_bool wrap_lines, 830 t_openartinfo *artinfo, 831 int hide_uue, 832 t_bool show_all_headers) 833 { 834 const char *charset; 835 const char *name; 836 char *line; 837 struct t_header *hdr = &artinfo->hdr; 838 t_bool header_put = FALSE; 839 static const char *struct_header[] = { 840 "Approved: ", "From: ", "Originator: ", 841 "Reply-To: ", "Sender: ", "X-Cancelled-By: ", "X-Comment-To: ", 842 "X-Submissions-To: ", "To: ", "Cc: ", "Bcc: ", "X-Originator: ", NULL }; 843 844 art = artinfo; /* Global saves lots of passing artinfo around */ 845 846 if (!(art->cooked = tmpfile())) 847 return FALSE; 848 849 art->cooked_lines = 0; 850 851 rewind(artinfo->raw); 852 853 /* 854 * Put down just the headers we want 855 */ 856 while ((line = tin_fgets(artinfo->raw, TRUE)) != NULL) { 857 if (line[0] == '\0') { /* End of headers? */ 858 if (STRIP_ALTERNATIVE(artinfo)) { 859 if (header_wanted(_(txt_info_x_conversion_note))) { 860 header_put = TRUE; 861 put_cooked(LEN, wrap_lines, C_HEADER, _(txt_info_x_conversion_note)); 862 } 863 } 864 if (header_put) 865 put_cooked(1, TRUE, 0, "\n"); /* put a newline after headers */ 866 break; 867 } 868 869 if (show_all_headers || header_wanted(line)) { /* Put cooked data */ 870 const char **strptr = struct_header; 871 char *l = NULL, *ptr, *foo, *bar; 872 size_t i = LEN; 873 t_bool found = FALSE; 874 875 /* structured headers */ 876 do { 877 if (!strncasecmp(line, *strptr, strlen(*strptr))) { 878 foo = my_strdup(*strptr); 879 if ((ptr = strchr(foo, ':'))) { 880 *ptr = '\0'; 881 unfold_header(line); 882 if ((ptr = parse_header(line, foo, TRUE, TRUE, FALSE))) { 883 #if 0 884 /* 885 * TODO: 886 * idna_decode() currently expects just a FQDN 887 * or a mailaddress (with all comments stripped). 888 * 889 * we need to look for something like 890 * (?i)((?:\S+\.)?xn--[a-z0-9\.\-]{3,}\S+)\b 891 * and just decode $1 892 * maybe also in process_text_body_part() 893 */ 894 bar = idna_decode(ptr); 895 #else 896 bar = my_strdup(ptr); 897 #endif /* 0 */ 898 l = my_calloc(1, strlen(bar) + strlen(*strptr) + 1); 899 strncpy(l, line, strlen(*strptr)); 900 strcat(l, bar); 901 free(bar); 902 } 903 } 904 free(foo); 905 found = TRUE; 906 } 907 } while (!found && *(++strptr) != NULL); 908 909 /* unstructured but must not be decoded */ 910 if (l == NULL && (!strncasecmp(line, "References: ", 12) || !strncasecmp(line, "Message-ID: ", 12) || !strncasecmp(line, "Date: ", 6) || !strncasecmp(line, "Newsgroups: ", 12) || !strncasecmp(line, "Distribution: ", 14) || !strncasecmp(line, "Followup-To: ", 13) || !strncasecmp(line, "X-Face: ", 8) || !strncasecmp(line, "Cancel-Lock: ", 13) || !strncasecmp(line, "Cancel-Key: ", 12) || !strncasecmp(line, "Supersedes: ", 12))) 911 l = my_strdup(line); 912 913 if (l == NULL) 914 l = my_strdup(rfc1522_decode(line)); 915 916 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 917 if (IS_LOCAL_CHARSET("UTF-8")) 918 utf8_valid(l); 919 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 920 header_put = TRUE; 921 expand_ctrl_chars(&l, &i, tabwidth); 922 put_cooked(i, wrap_lines, C_HEADER, "%s", l); 923 free(l); 924 } 925 } 926 927 if (tin_errno != 0) 928 return FALSE; 929 930 /* 931 * Process the attachments in turn, print a neato header, and process/decode 932 * the body if of text type 933 */ 934 if (hdr->mime && hdr->ext->type == TYPE_MULTIPART) { 935 t_part *ptr; 936 937 for (ptr = hdr->ext->next; ptr != NULL; ptr = ptr->next) { 938 /* 939 * Ignore non text/plain sections with alternative handling 940 */ 941 if (STRIP_ALTERNATIVE(artinfo) && !IS_PLAINTEXT(ptr)) 942 continue; 943 944 name = get_filename(ptr->params); 945 if (!strcmp(content_types[ptr->type], "text")) 946 charset = get_param(ptr->params, "charset"); 947 else 948 charset = NULL; 949 PUT_ATTACH(ptr, (ptr->depth - 1) * 4, name, charset); 950 951 /* Try to view anything of type text, may need to review this */ 952 if (IS_PLAINTEXT(ptr)) { 953 if (charset_unsupported(charset)) { 954 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), (ptr->depth - 1) * 4, "", charset); 955 if (ptr->next) 956 put_cooked(1, wrap_lines, C_ATTACH, "\n"); 957 } else 958 process_text_body_part(wrap_lines, artinfo->raw, ptr, hide_uue); 959 } 960 } 961 } else { 962 if (!strcmp(content_types[hdr->ext->type], "text")) 963 charset = get_param(hdr->ext->params, "charset"); 964 else 965 charset = NULL; 966 /* 967 * A regular single-body article 968 */ 969 if (IS_PLAINTEXT(hdr->ext)) { 970 if (charset_unsupported(charset)) 971 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), 0, "", charset); 972 else 973 process_text_body_part(wrap_lines, artinfo->raw, hdr->ext, hide_uue); 974 } else { 975 /* 976 * Non-textual main body 977 */ 978 name = get_filename(hdr->ext->params); 979 PUT_ATTACH(hdr->ext, 0, name, charset); 980 } 981 } 982 983 #ifdef DEBUG_ART 984 dump_cooked(); 985 #endif /* DEBUG_ART */ 986 987 if (art->cooked_lines > 0) 988 art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * (size_t) art->cooked_lines); 989 990 rewind(art->cooked); 991 return (tin_errno != 0) ? FALSE : TRUE; 992 }