A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 /* 2 * Project : tin - a Usenet reader 3 * Module : cook.c 4 * Author : J. Faultless 5 * Created : 2000-03-08 6 * Updated : 2022-09-19 7 * Notes : Split from page.c 8 * 9 * Copyright (c) 2000-2023 Jason Faultless <jason@altarstone.com> 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * 1. Redistributions of source code must retain the above copyright notice, 17 * this list of conditions and the following disclaimer. 18 * 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * 3. Neither the name of the copyright holder nor the names of its 24 * contributors may be used to endorse or promote products derived from 25 * this software without specific prior written permission. 26 * 27 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #ifndef TIN_H 41 # include "tin.h" 42 #endif /* !TIN_H */ 43 #ifndef TCURSES_H 44 # include "tcurses.h" 45 #endif /* !TCURSES_H */ 46 47 48 /* 49 * We malloc() this many t_lineinfo's at a time 50 */ 51 #define CHUNK 50 52 53 #define STRIP_ALTERNATIVE(x) \ 54 (curr_group->attribute->alternative_handling && \ 55 (x)->hdr.ext->type == TYPE_MULTIPART && \ 56 strcasecmp("alternative", (x)->hdr.ext->subtype) == 0) 57 58 #define MATCH_REGEX(x,y,z) (match_regex_ex(y, z, 0, 0, &(x)) >= 0) 59 60 61 static t_bool charset_unsupported(const char *charset); 62 static t_bool header_wanted(const char *line); 63 static t_part *new_uue(t_part **part, char *name); 64 static void process_text_body_part(t_bool wrap_lines, FILE *in, t_part *part, int hide_uue); 65 static void put_cooked(size_t buf_len, t_bool wrap_lines, int flags, const char *fmt, ...); 66 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 67 static t_bool wexpand_ctrl_chars(wchar_t **wline, size_t *length, size_t lcook_width); 68 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 69 #ifdef DEBUG_ART 70 static void dump_cooked(void); 71 #endif /* DEBUG_ART */ 72 73 74 /* 75 * These are used globally within this module for access to the context 76 * currently being built. They must not leak outside. 77 */ 78 static t_openartinfo *art; 79 80 81 /* 82 * Handle backspace, expand tabs, expand control chars to a literal ^[A-Z] 83 * Allows \n through 84 * Return TRUE if line contains a ^L (form-feed) 85 */ 86 t_bool 87 expand_ctrl_chars( 88 char **line, 89 size_t *length, 90 size_t lcook_width) 91 { 92 t_bool ctrl_L = FALSE; 93 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 94 wchar_t *wline = char2wchar_t(*line); 95 size_t wlen; 96 97 /* 98 * remove the assert() before release 99 * it should help us find problems with wide-char strings 100 * in the development branch 101 */ 102 assert(wline != NULL); 103 wlen = wcslen(wline); 104 ctrl_L = wexpand_ctrl_chars(&wline, &wlen, lcook_width); 105 free(*line); 106 *line = wchar_t2char(wline); 107 free(wline); 108 assert(line != NULL); 109 *length = strlen(*line); 110 #else 111 int curr_len = LEN; 112 unsigned int i = 0, j, ln = 0; 113 char *buf = my_malloc(curr_len); 114 unsigned char *c; 115 116 c = (unsigned char *) *line; 117 while (*c) { 118 if (i > curr_len - (lcook_width + 1)) { 119 curr_len <<= 1; 120 buf = my_realloc(buf, curr_len); 121 } 122 if (*c == '\n') 123 ln = i + 1; 124 if (*c == '\t') { /* expand tabs */ 125 j = i + lcook_width - ((i - ln) % lcook_width); 126 for (; i < j; i++) 127 buf[i] = ' '; 128 } else if (((*c) & 0xFF) < ' ' && *c != '\n' && (!IS_LOCAL_CHARSET("Big5") || *c != 27)) { /* literal ctrl chars */ 129 buf[i++] = '^'; 130 buf[i++] = ((*c) & 0xFF) + '@'; 131 if (*c == '\f') /* ^L detected */ 132 ctrl_L = TRUE; 133 } else { 134 if (!my_isprint(*c) && *c != '\n') 135 buf[i++] = '?'; 136 else 137 buf[i++] = *c; 138 } 139 c++; 140 } 141 buf[i] = '\0'; 142 *length = i + 1; 143 *line = my_realloc(*line, *length); 144 strcpy(*line, buf); 145 free(buf); 146 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 147 return ctrl_L; 148 } 149 150 151 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 152 static t_bool 153 wexpand_ctrl_chars( 154 wchar_t **wline, 155 size_t *length, 156 size_t lcook_width) 157 { 158 size_t cur_len = LEN, i = 0, j, ln = 0; 159 wchar_t *wbuf = my_malloc(cur_len * sizeof(wchar_t)); 160 wchar_t *wc; 161 t_bool ctrl_L = FALSE; 162 163 wc = *wline; 164 while (*wc) { 165 if (i > cur_len - (lcook_width + 1)) { 166 cur_len <<= 1; 167 wbuf = my_realloc(wbuf, cur_len * sizeof(wchar_t)); 168 } 169 if (*wc == '\n') 170 ln = i + 1; 171 if (*wc == '\t') { /* expand_tabs */ 172 j = i + lcook_width - ((i - ln) % lcook_width); 173 for (; i < j; i++) 174 wbuf[i] = ' '; 175 } else if (*wc < ' ' && *wc != '\n' && (!IS_LOCAL_CHARSET("Big5") || *wc != 27)) { /* literal ctrl chars */ 176 wbuf[i++] = '^'; 177 wbuf[i++] = *wc + '@'; 178 if (*wc == '\f') /* ^L detected */ 179 ctrl_L = TRUE; 180 } else { 181 if (!iswprint((wint_t) *wc) && *wc != '\n') 182 wbuf[i++] = '?'; 183 else 184 wbuf[i++] = *wc; 185 } 186 wc++; 187 } 188 wbuf[i] = '\0'; 189 *length = i + 1; 190 *wline = my_realloc(*wline, *length * sizeof(wchar_t)); 191 wcscpy(*wline, wbuf); 192 free(wbuf); 193 return ctrl_L; 194 } 195 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 196 197 198 /* 199 * Output text to the cooked stream. Wrap lines as necessary. 200 * Update the line count and the array of line offsets 201 * Extend the lineoffset array as needed in CHUNK amounts. 202 * flags are 'hints' to the pager about line content. 203 * buf_len is the size put_cooked should use for its buffer. 204 */ 205 static void 206 put_cooked( 207 size_t buf_len, 208 t_bool wrap_lines, 209 int flags, 210 const char *fmt, 211 ...) 212 { 213 char *p, *bufp, *buf; 214 int wrap_column; 215 int space; 216 static int saved_flags = 0; 217 va_list ap; 218 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 219 int bytes; 220 wint_t *wp; 221 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 222 223 buf = my_malloc(buf_len + 1); 224 225 va_start(ap, fmt); 226 vsnprintf(buf, buf_len + 1, fmt, ap); 227 228 if (tinrc.wrap_column < 0) 229 wrap_column = ((tinrc.wrap_column > -cCOLS) ? cCOLS + tinrc.wrap_column : cCOLS); 230 else 231 #if 1 232 wrap_column = ((tinrc.wrap_column > 0) ? tinrc.wrap_column : cCOLS); 233 #else /* never cut off long lines */ 234 wrap_column = (((tinrc.wrap_column > 0) && (tinrc.wrap_column < cCOLS)) ? tinrc.wrap_column : cCOLS); 235 #endif /* 1 */ 236 237 p = bufp = buf; 238 239 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 240 wp = my_malloc((MB_CUR_MAX + 1) * sizeof(wint_t)); 241 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 242 243 while (*p) { 244 if (wrap_lines) { 245 space = wrap_column; 246 while (space > 0 && *p && *p != '\n') { 247 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 248 if ((bytes = mbtowc((wchar_t *) wp, p, MB_CUR_MAX)) > 0) { 249 if ((space -= wcwidth((wchar_t) *wp)) < 0) 250 break; 251 p += bytes; 252 } else 253 p++; 254 #else 255 p++; 256 space--; 257 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 258 } 259 } else { 260 while (*p && *p != '\n') 261 p++; 262 } 263 fwrite(bufp, 1, (size_t) (p - bufp), art->cooked); 264 fputs("\n", art->cooked); 265 if (*p == '\n') 266 p++; 267 bufp = p; 268 269 if (art->cooked_lines == 0) { 270 art->cookl = my_malloc(sizeof(t_lineinfo) * CHUNK); 271 art->cookl[0].offset = 0; 272 } 273 274 /* 275 * Pick up flags from a previous partial write 276 */ 277 art->cookl[art->cooked_lines].flags = flags | saved_flags; 278 saved_flags = 0; 279 art->cooked_lines++; 280 281 /* 282 * Grow the array of lines if needed - we resize it properly at the end 283 */ 284 if (art->cooked_lines % CHUNK == 0) 285 art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * CHUNK * (size_t) ((art->cooked_lines / CHUNK) + 1)); 286 287 art->cookl[art->cooked_lines].offset = ftell(art->cooked); 288 } 289 290 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 291 free(wp); 292 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 293 294 /* 295 * If there is anything left over, then it must be a non \n terminated 296 * partial line from base64 decoding etc.. Dump it now and the rest of 297 * the line (with the \n) will fill in the t_lineinfo 298 * We must save the flags now as the rest of the line may not have the same properties 299 * We need to keep the length for accounting purposes 300 */ 301 if (*bufp != '\0') { 302 fputs(bufp, art->cooked); 303 saved_flags = flags; 304 } 305 306 va_end(ap); 307 free(buf); 308 } 309 310 311 /* 312 * Add a new uuencode attachment description to the current part 313 */ 314 static t_part * 315 new_uue( 316 t_part **part, 317 char *name) 318 { 319 t_part *ptr = new_part((*part)->uue); 320 321 if (!(*part)->uue) /* new_part() is simple and doesn't attach list heads */ 322 (*part)->uue = ptr; 323 324 free_list(ptr->params); 325 /* 326 * Load the name into the parameter list 327 */ 328 ptr->params = new_params(); 329 ptr->params->name = my_strdup("name"); 330 ptr->params->value = my_strdup(str_trim(name)); 331 332 ptr->encoding = ENCODING_UUE; /* treat as x-uuencode */ 333 334 ptr->offset = ftell(art->cooked); 335 ptr->depth = (*part)->depth; /* uue is at the same depth as the envelope */ 336 337 /* 338 * If an extension is present, try and add a Content-Type 339 */ 340 if ((name = strrchr(name, '.')) != NULL) 341 lookup_mimetype(name + 1, ptr); 342 343 return ptr; 344 } 345 346 347 /* 348 * Get the suggested filename for an attachment. RFC says Content-Disposition 349 * 'filename' supersedes Content-Type 'name'. We must also remove path 350 * information. 351 */ 352 const char * 353 get_filename( 354 t_param *ptr) 355 { 356 const char *name; 357 char *p; 358 359 if (!(name = get_param(ptr, "filename"))) { 360 if (!(name = get_param(ptr, "name"))) 361 return NULL; 362 } 363 364 if ((p = strrchr(name, DIRSEP))) 365 return p + 1; 366 367 return name; 368 } 369 370 371 #define PUT_UUE(part, qualifier_text) \ 372 put_cooked(LEN, wrap_lines, C_UUE, _(txt_uue), \ 373 part->depth ? (part->depth - 1) * 4 : 0, "", \ 374 content_types[part->type], part->subtype, \ 375 qualifier_text, part->line_count, get_filename(part->params)) 376 377 #define PUT_ATTACH(part, depth, name, charset) \ 378 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach), \ 379 depth, "", \ 380 content_types[part->type], part->subtype, \ 381 content_encodings[part->encoding], \ 382 charset ? _(txt_attach_charset) : "", BlankIfNull(charset), \ 383 part->line_count, \ 384 name ? _(txt_name) : "", BlankIfNull(name)); \ 385 \ 386 if (part->description) \ 387 put_cooked(LEN, wrap_lines, C_ATTACH, \ 388 _(txt_attach_description), \ 389 depth, "", \ 390 part->description); \ 391 if (part->next != NULL || IS_PLAINTEXT(part)) \ 392 put_cooked(1, wrap_lines, C_ATTACH, "\n") 393 394 /* 395 * Decodes text bodies, remove sig's, detects uuencoded sections 396 */ 397 static void 398 process_text_body_part( 399 t_bool wrap_lines, 400 FILE *in, 401 t_part *part, 402 int hide_uue) 403 { 404 char *rest = NULL; 405 char *line = NULL, *buf, *tmpline; 406 const char *ncharset; 407 size_t max_line_len = 0; 408 int flags, len, lines_left, len_blank; 409 unsigned int lines_skipped = 0; 410 t_bool in_sig = FALSE; /* Set when in sig portion */ 411 t_bool in_uue = FALSE; /* Set when in uuencoded section */ 412 t_bool in_verbatim = FALSE; /* Set when in verbatim section */ 413 t_bool verbatim_begin = FALSE; /* Set when verbatim_begin_regex matches */ 414 t_bool is_uubody; /* Set when current line looks like a uuencoded line */ 415 t_bool first_line_blank = TRUE; /* Unset when first non-blank line is reached */ 416 t_bool put_blank_lines = FALSE; /* Set when previously skipped lines needs to put */ 417 t_part *curruue = NULL; 418 419 if (part->uue) { /* These are redone each time we recook/resize etc.. */ 420 free_parts(part->uue); 421 part->uue = NULL; 422 } 423 424 fseek(in, part->offset, SEEK_SET); 425 426 if (part->encoding == ENCODING_BASE64) 427 (void) mmdecode(NULL, 'b', 0, NULL); /* flush */ 428 429 lines_left = part->line_count; 430 while ((lines_left > 0) || rest) { 431 switch (part->encoding) { 432 case ENCODING_BASE64: 433 lines_left -= read_decoded_base64_line(in, &line, &max_line_len, lines_left, &rest); 434 break; 435 436 case ENCODING_QP: 437 lines_left -= read_decoded_qp_line(in, &line, &max_line_len, lines_left); 438 break; 439 440 default: 441 if ((buf = tin_fgets(in, FALSE)) == NULL) { 442 FreeAndNull(line); 443 break; 444 } 445 446 /* 447 * tin_fgets() uses the returned space also internally 448 * so it's not advisable to use it for our own purposes 449 * especially if we must resize it. 450 * So copy buf to line (and resize line if necessary). 451 */ 452 if (max_line_len < strlen(buf) + 2 || !line) { 453 max_line_len = strlen(buf) + 2; 454 line = my_realloc(line, max_line_len); 455 } 456 strcpy(line, buf); 457 458 /* 459 * FIXME: Some code in cook.c expects a '\n' at the end 460 * of the line. As tin_fgets() strips trailing '\n', re-add it. 461 * This should probably be fixed in that other code. 462 */ 463 strcat(line, "\n"); 464 465 lines_left--; 466 break; 467 } 468 if (!(line && strlen(line))) { 469 FreeIfNeeded(rest); 470 break; /* premature end of file, file error etc. */ 471 } 472 473 /* convert network to local charset, tex2iso, iso2asc etc. */ 474 ncharset = get_param(part->params, "charset"); 475 process_charsets(&line, &max_line_len, ncharset ? ncharset : "US-ASCII", tinrc.mm_local_charset, curr_group->attribute->tex2iso_conv && art->tex2iso); 476 477 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 478 if (IS_LOCAL_CHARSET("UTF-8")) { 479 utf8_valid(line); 480 481 if (!in_verbatim && curr_group->attribute->suppress_soft_hyphens && ncharset && !strcasecmp(ncharset, "UTF-8")) 482 remove_soft_hyphens(line); 483 } 484 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 485 486 len = (int) strlen(line); 487 488 /* 489 * trim article body and sig (not verbatim blocks): 490 * - skip leading blank lines 491 * - replace multiple blank lines with one empty line 492 * - skip tailing blank lines, keep one if an 493 * attachment follows 494 */ 495 if (curr_group->attribute->trim_article_body && !in_uue && !in_verbatim && !verbatim_begin) { 496 len_blank = 1; 497 tmpline = line; 498 /* check if line contains only whitespace */ 499 while ((*tmpline == ' ') || (*tmpline == '\t')) { 500 len_blank++; 501 tmpline++; 502 } 503 if (len_blank == len) { /* line is blank */ 504 if (lines_left == 0 && (curr_group->attribute->trim_article_body & SKIP_TRAILING)) { 505 if (!(part->next == NULL || (STRIP_ALTERNATIVE(art) && !IS_PLAINTEXT(part->next)))) 506 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 507 continue; 508 } 509 if (first_line_blank) { 510 if (curr_group->attribute->trim_article_body & SKIP_LEADING) 511 continue; 512 } else if ((curr_group->attribute->trim_article_body & (COMPACT_MULTIPLE | SKIP_TRAILING)) && (!in_sig || curr_group->attribute->show_signatures)) { 513 lines_skipped++; 514 if (lines_left == 0 && !(curr_group->attribute->trim_article_body & SKIP_TRAILING)) { 515 for (; lines_skipped > 0; lines_skipped--) 516 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 517 } 518 continue; 519 } 520 } else { /* line is not blank */ 521 if (first_line_blank) 522 first_line_blank = FALSE; 523 if (lines_skipped && (!in_sig || curr_group->attribute->show_signatures)) { 524 if (strcmp(line, SIGDASHES) != 0 || curr_group->attribute->show_signatures) { 525 if (curr_group->attribute->trim_article_body & COMPACT_MULTIPLE) 526 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 527 else 528 put_blank_lines = TRUE; 529 } else if (!(curr_group->attribute->trim_article_body & SKIP_TRAILING)) 530 put_blank_lines = TRUE; 531 if (put_blank_lines) { 532 for (; lines_skipped > 0; lines_skipped--) 533 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n"); 534 } 535 put_blank_lines = FALSE; 536 lines_skipped = 0; 537 } 538 } 539 } /* if (tinrc.trim_article_body...) */ 540 541 /* look for verbatim marks, set in_verbatim only for lines in between */ 542 if (curr_group->attribute->verbatim_handling) { 543 if (verbatim_begin) { 544 in_verbatim = TRUE; 545 verbatim_begin = FALSE; 546 } else if (!in_sig && !in_uue && !in_verbatim && MATCH_REGEX(verbatim_begin_regex, line, len)) 547 verbatim_begin = TRUE; 548 if (in_verbatim && MATCH_REGEX(verbatim_end_regex, line, len)) 549 in_verbatim = FALSE; 550 } 551 552 if (!in_verbatim) { 553 /* 554 * Detect and skip signatures if necessary 555 */ 556 if (!in_sig) { 557 if (strcmp(line, SIGDASHES) == 0) { 558 in_sig = TRUE; 559 if (in_uue) { 560 in_uue = FALSE; 561 if (hide_uue) 562 PUT_UUE(curruue, _(txt_incomplete)); 563 } 564 } 565 } 566 567 if (in_sig && !(curr_group->attribute->show_signatures)) 568 continue; /* No further processing needed */ 569 570 /* 571 * Detect and process uuencoded sections 572 * Look for the start or the end of a uuencoded section 573 * 574 * TODO: look for a tailing size line after end (non standard 575 * extension)? 576 */ 577 if (match_regex_ex(line, len, 0, 0, &uubegin_regex) >= 0) { 578 REGEX_SIZE *ovector = regex_get_ovector_pointer(&uubegin_regex); 579 580 in_uue = TRUE; 581 curruue = new_uue(&part, line + ovector[1]); 582 if (hide_uue) 583 continue; /* Don't cook the 'begin' line */ 584 } else if (strncmp(line, "end\n", 4) == 0) { 585 if (in_uue) { 586 in_uue = FALSE; 587 if (hide_uue) { 588 PUT_UUE(curruue, ""); 589 continue; /* Don't cook the 'end' line */ 590 } 591 } 592 } 593 594 /* 595 * See if this line looks like a uuencoded 'body' line 596 */ 597 is_uubody = FALSE; 598 599 if (MATCH_REGEX(uubody_regex, line, len)) { 600 int sum = (((*line) - ' ') & 077) * 4 / 3; /* uuencode octet checksum */ 601 602 /* sum = 0 in a uubody only on the last line, a single ` */ 603 if (sum == 0 && len == 1 + 1) /* +1 for the \n */ 604 is_uubody = TRUE; 605 else if (len == sum + 1 + 1) 606 is_uubody = TRUE; 607 #ifdef DEBUG_ART 608 if (debug & DEBUG_MISC) 609 fprintf(stderr, "%s sum=%d len=%d (%s)\n", bool_unparse(is_uubody), sum, len, line); 610 #endif /* DEBUG_ART */ 611 } 612 613 if (in_uue) { 614 if (is_uubody) 615 curruue->line_count++; 616 else { 617 if (line[0] == '\n') { /* Blank line in a uubody - definitely a failure */ 618 /* fprintf(stderr, "not a uue line while reading a uue body?\n"); */ 619 in_uue = FALSE; 620 if (hide_uue) 621 /* don't continue here, so we see the line that 'broke' in_uue */ 622 PUT_UUE(curruue, _(txt_incomplete)); 623 } 624 } 625 } else { 626 /* 627 * UUE_ALL = 'Try harder' - we never saw a begin line, but useful 628 * when uue sections are split across > 1 article 629 */ 630 if (is_uubody && hide_uue == UUE_ALL) { 631 char name[] = N_("(unknown)"); 632 633 curruue = new_uue(&part, name); 634 curruue->line_count++; 635 in_uue = TRUE; 636 continue; 637 } 638 } 639 640 /* 641 * Skip output if we're hiding uue or the sig 642 */ 643 if (in_uue && hide_uue) 644 continue; /* No further processing needed */ 645 } 646 647 flags = in_verbatim ? C_VERBATIM : in_sig ? C_SIG : C_BODY; 648 649 /* 650 * Don't do any further handling of uue || verbatim lines 651 */ 652 if (in_uue) { 653 put_cooked(max_line_len, wrap_lines, flags, "%s", line); 654 continue; 655 } else if (in_verbatim) { 656 expand_ctrl_chars(&line, &max_line_len, 8); 657 put_cooked(max_line_len, wrap_lines, flags, "%s", line); 658 continue; 659 } 660 661 #ifdef HAVE_COLOR 662 /* keep order in sync with color.c:draw_pager_line() */ 663 if (quote_regex3.re) { 664 if (MATCH_REGEX(quote_regex3, line, len)) 665 flags |= C_QUOTE3; 666 else if (quote_regex2.re) { 667 if (MATCH_REGEX(quote_regex2, line, len)) 668 flags |= C_QUOTE2; 669 else if (curr_group->attribute->extquote_handling && extquote_regex.re) { 670 if (MATCH_REGEX(extquote_regex, line, len)) 671 flags |= C_EXTQUOTE; 672 else if (quote_regex.re) { 673 if (MATCH_REGEX(quote_regex, line, len)) 674 flags |= C_QUOTE1; 675 } 676 } else if (quote_regex.re) { 677 if (MATCH_REGEX(quote_regex, line, len)) 678 flags |= C_QUOTE1; 679 } 680 } 681 } 682 #endif /* HAVE_COLOR */ 683 684 if (MATCH_REGEX(url_regex, line, len)) 685 flags |= C_URL; 686 if (MATCH_REGEX(mail_regex, line, len)) 687 flags |= C_MAIL; 688 if (MATCH_REGEX(news_regex, line, len)) 689 flags |= C_NEWS; 690 691 if (expand_ctrl_chars(&line, &max_line_len, tabwidth)) 692 flags |= C_CTRLL; /* Line contains form-feed */ 693 694 buf = line; 695 696 /* 697 * Skip over the first space in case of Format=Flowed (space-stuffing) 698 */ 699 if (part->format == FORMAT_FLOWED) { 700 if (line[0] == ' ') 701 ++buf; 702 } 703 704 put_cooked(max_line_len, wrap_lines && (!IS_LOCAL_CHARSET("Big5")), flags, "%s", buf); 705 } /* while */ 706 707 /* 708 * Were we reading uue and ran off the end ? 709 */ 710 if (in_uue && hide_uue) 711 PUT_UUE(curruue, _(txt_incomplete)); 712 713 free(line); 714 } 715 716 717 /* 718 * Return TRUE if this header should be printed as per 719 * news_headers_to_[not_]display 720 */ 721 static t_bool 722 header_wanted( 723 const char *line) 724 { 725 int i; 726 t_bool ret = FALSE; 727 728 if (curr_group->attribute->headers_to_display->num && (curr_group->attribute->headers_to_display->header[0][0] == '*')) 729 ret = TRUE; /* wild do */ 730 else { 731 for (i = 0; i < curr_group->attribute->headers_to_display->num; i++) { 732 if (!strncasecmp(line, curr_group->attribute->headers_to_display->header[i], strlen(curr_group->attribute->headers_to_display->header[i]))) { 733 ret = TRUE; 734 break; 735 } 736 } 737 } 738 739 if (curr_group->attribute->headers_to_not_display->num && (curr_group->attribute->headers_to_not_display->header[0][0] == '*')) 740 ret = FALSE; /* wild don't: doesn't make sense! */ 741 else { 742 for (i = 0; i < curr_group->attribute->headers_to_not_display->num; i++) { 743 if (!strncasecmp(line, curr_group->attribute->headers_to_not_display->header[i], strlen(curr_group->attribute->headers_to_not_display->header[i]))) { 744 ret = FALSE; 745 break; 746 } 747 } 748 } 749 750 return ret; 751 } 752 753 754 /* #define DEBUG_ART 1 */ 755 #ifdef DEBUG_ART 756 static void 757 dump_cooked( 758 void) 759 { 760 char *line; 761 int i; 762 763 for (i = 0; i < art->cooked_lines; i++) { 764 fseek(art->cooked, art->cookl[i].offset, SEEK_SET); 765 line = tin_fgets(art->cooked, FALSE); 766 fprintf(stderr, "[%3d] %4ld %3x [%s]\n", i, art->cookl[i].offset, art->cookl[i].flags, line); 767 } 768 fprintf(stderr, "%d lines cooked\n", art->cooked_lines); 769 } 770 #endif /* DEBUG_ART */ 771 772 773 /* 774 * Check for charsets which may contain NULL bytes and thus break string 775 * functions. Possibly incomplete. 776 * 777 * TODO: fix the other code to handle those charsets properly. 778 */ 779 static t_bool 780 charset_unsupported( 781 const char *charset) 782 { 783 static const char *charsets[] = { 784 "csUnicode", /* alias for ISO-10646-UCS-2 */ 785 "csUCS4", /* alias for ISO-10646-UCS-4 */ 786 "ISO-10646-UCS-2", 787 "ISO-10646-UCS-4", 788 "UTF-16", /* covers also BE/LE */ 789 "UTF-32", /* covers also BE/LE */ 790 NULL }; 791 const char **charsetptr = charsets; 792 t_bool ret = FALSE; 793 794 if (!charset) 795 return ret; 796 797 do { 798 if (!strncasecmp(charset, *charsetptr, strlen(*charsetptr))) 799 ret = TRUE; 800 } while (!ret && *(++charsetptr) != NULL); 801 802 return ret; 803 } 804 805 806 /* 807 * 'cooks' an article, ie, prepare what will actually appear on the screen 808 * It is not easy to do this in the same pass as the initial read since 809 * boundary conditions for multipart articles make it harder to do on the 810 * fly decoding. 811 * We could have cooked the headers whilst they were being read but we're 812 * trying to keep this simple. 813 * 814 * Expects: 815 * Fresh article context to write into 816 * parse_uue is set only when the art is opened to create t_parts for 817 * uue sections found, when resizing this is not needed 818 * hide_uue determines the folding of uue sections 819 * Handles: 820 * multipart articles 821 * stripping of non text sections if skip_alternative 822 * Q and B decoding of text sections 823 * handling of uuencoded sections 824 * stripping of sigs if !show_signatures 825 * Returns: 826 * TRUE on success 827 * 828 * TODO: 829 * give an error-message on at least disk-full 830 */ 831 t_bool 832 cook_article( 833 t_bool wrap_lines, 834 t_openartinfo *artinfo, 835 int hide_uue, 836 t_bool show_all_headers) 837 { 838 const char *charset; 839 const char *name; 840 char *line; 841 struct t_header *hdr = &artinfo->hdr; 842 t_bool header_put = FALSE; 843 static const char *struct_header[] = { 844 "Approved: ", "From: ", "Originator: ", 845 "Reply-To: ", "Sender: ", "X-Cancelled-By: ", "X-Comment-To: ", 846 "X-Submissions-To: ", "To: ", "Cc: ", "Bcc: ", "X-Originator: ", NULL }; 847 848 art = artinfo; /* Global saves lots of passing artinfo around */ 849 850 if (!(art->cooked = tmpfile())) 851 return FALSE; 852 853 art->cooked_lines = 0; 854 855 rewind(artinfo->raw); 856 857 /* 858 * Put down just the headers we want 859 */ 860 while ((line = tin_fgets(artinfo->raw, TRUE)) != NULL) { 861 if (line[0] == '\0') { /* End of headers? */ 862 if (STRIP_ALTERNATIVE(artinfo)) { 863 if (header_wanted(_(txt_info_x_conversion_note))) { 864 header_put = TRUE; 865 put_cooked(LEN, wrap_lines, C_HEADER, _(txt_info_x_conversion_note)); 866 } 867 } 868 if (header_put) 869 put_cooked(1, TRUE, 0, "\n"); /* put a newline after headers */ 870 break; 871 } 872 873 if (show_all_headers || header_wanted(line)) { /* Put cooked data */ 874 const char **strptr = struct_header; 875 char *l = NULL, *ptr, *foo, *bar; 876 size_t i = LEN; 877 t_bool found = FALSE; 878 879 /* structured headers */ 880 do { 881 if (!strncasecmp(line, *strptr, strlen(*strptr))) { 882 foo = my_strdup(*strptr); 883 if ((ptr = strchr(foo, ':'))) { 884 *ptr = '\0'; 885 unfold_header(line); 886 if ((ptr = parse_header(line, foo, TRUE, TRUE, FALSE))) { 887 #if 0 888 /* 889 * TODO: 890 * idna_decode() currently expects just a FQDN 891 * or a mailaddress (with all comments stripped). 892 * 893 * we need to look for something like 894 * (?i)((?:\S+\.)?xn--[a-z0-9\.\-]{3,}\S+)\b 895 * and just decode $1 896 * maybe also in process_text_body_part() 897 */ 898 bar = idna_decode(ptr); 899 #else 900 bar = my_strdup(ptr); 901 #endif /* 0 */ 902 l = my_calloc(1, strlen(bar) + strlen(*strptr) + 1); 903 strncpy(l, line, strlen(*strptr)); 904 strcat(l, bar); 905 free(bar); 906 } 907 } 908 free(foo); 909 found = TRUE; 910 } 911 } while (!found && *(++strptr) != NULL); 912 913 /* unstructured but must not be decoded */ 914 if (l == NULL && (!strncasecmp(line, "References: ", 12) || !strncasecmp(line, "Message-ID: ", 12) || !strncasecmp(line, "Date: ", 6) || !strncasecmp(line, "Newsgroups: ", 12) || !strncasecmp(line, "Distribution: ", 14) || !strncasecmp(line, "Followup-To: ", 13) || !strncasecmp(line, "X-Face: ", 8) || !strncasecmp(line, "Cancel-Lock: ", 13) || !strncasecmp(line, "Cancel-Key: ", 12) || !strncasecmp(line, "Supersedes: ", 12))) 915 l = my_strdup(line); 916 917 if (l == NULL) 918 l = my_strdup(rfc1522_decode(line)); 919 920 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE) 921 if (IS_LOCAL_CHARSET("UTF-8")) 922 utf8_valid(l); 923 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */ 924 header_put = TRUE; 925 expand_ctrl_chars(&l, &i, tabwidth); 926 put_cooked(i, wrap_lines, C_HEADER, "%s", l); 927 free(l); 928 } 929 } 930 931 if (tin_errno != 0) 932 return FALSE; 933 934 /* 935 * Process the attachments in turn, print a neato header, and process/decode 936 * the body if of text type 937 */ 938 if (hdr->mime && hdr->ext->type == TYPE_MULTIPART) { 939 t_part *ptr; 940 941 for (ptr = hdr->ext->next; ptr != NULL; ptr = ptr->next) { 942 /* 943 * Ignore non text/plain sections with alternative handling 944 */ 945 if (STRIP_ALTERNATIVE(artinfo) && !IS_PLAINTEXT(ptr)) 946 continue; 947 948 name = get_filename(ptr->params); 949 if (!strcmp(content_types[ptr->type], "text")) 950 charset = get_param(ptr->params, "charset"); 951 else 952 charset = NULL; 953 PUT_ATTACH(ptr, (ptr->depth - 1) * 4, name, charset); 954 955 /* Try to view anything of type text, may need to review this */ 956 if (IS_PLAINTEXT(ptr)) { 957 if (charset_unsupported(charset)) { 958 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), (ptr->depth - 1) * 4, "", charset); 959 if (ptr->next) 960 put_cooked(1, wrap_lines, C_ATTACH, "\n"); 961 } else 962 process_text_body_part(wrap_lines, artinfo->raw, ptr, hide_uue); 963 } 964 } 965 } else { 966 if (!strcmp(content_types[hdr->ext->type], "text")) 967 charset = get_param(hdr->ext->params, "charset"); 968 else 969 charset = NULL; 970 /* 971 * A regular single-body article 972 */ 973 if (IS_PLAINTEXT(hdr->ext)) { 974 if (charset_unsupported(charset)) 975 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), 0, "", charset); 976 else 977 process_text_body_part(wrap_lines, artinfo->raw, hdr->ext, hide_uue); 978 } else { 979 /* 980 * Non-textual main body 981 */ 982 name = get_filename(hdr->ext->params); 983 PUT_ATTACH(hdr->ext, 0, name, charset); 984 } 985 } 986 987 #ifdef DEBUG_ART 988 dump_cooked(); 989 #endif /* DEBUG_ART */ 990 991 if (art->cooked_lines > 0) 992 art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * (size_t) art->cooked_lines); 993 994 rewind(art->cooked); 995 return (tin_errno != 0) ? FALSE : TRUE; 996 }