tin  2.6.1
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.6.1.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

cook.c
Go to the documentation of this file.
1/*
2 * Project : tin - a Usenet reader
3 * Module : cook.c
4 * Author : J. Faultless
5 * Created : 2000-03-08
6 * Updated : 2021-03-13
7 * Notes : Split from page.c
8 *
9 * Copyright (c) 2000-2022 Jason Faultless <jason@altarstone.com>
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright notice,
17 * this list of conditions and the following disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 *
23 * 3. Neither the name of the copyright holder nor the names of its
24 * contributors may be used to endorse or promote products derived from
25 * this software without specific prior written permission.
26 *
27 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40#ifndef TIN_H
41# include "tin.h"
42#endif /* !TIN_H */
43#ifndef TCURSES_H
44# include "tcurses.h"
45#endif /* !TCURSES_H */
46
47
48/*
49 * We malloc() this many t_lineinfo's at a time
50 */
51#define CHUNK 50
52
53#define STRIP_ALTERNATIVE(x) \
54 (curr_group->attribute->alternative_handling && \
55 (x)->hdr.ext->type == TYPE_MULTIPART && \
56 strcasecmp("alternative", (x)->hdr.ext->subtype) == 0)
57
58#define MATCH_REGEX(x,y,z) (pcre_exec(x.re, x.extra, y, z, 0, 0, NULL, 0) >= 0)
59
60
61static t_bool charset_unsupported(const char *charset);
62static t_bool header_wanted(const char *line);
63static t_part *new_uue(t_part **part, char *name);
64static void process_text_body_part(t_bool wrap_lines, FILE *in, t_part *part, int hide_uue);
65static void put_cooked(size_t buf_len, t_bool wrap_lines, int flags, const char *fmt, ...);
66#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
67 static t_bool wexpand_ctrl_chars(wchar_t **wline, size_t *length, size_t lcook_width);
68#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
69#ifdef DEBUG_ART
70 static void dump_cooked(void);
71#endif /* DEBUG_ART */
72
73
74/*
75 * These are used globally within this module for access to the context
76 * currently being built. They must not leak outside.
77 */
79
80
81/*
82 * Handle backspace, expand tabs, expand control chars to a literal ^[A-Z]
83 * Allows \n through
84 * Return TRUE if line contains a ^L (form-feed)
85 */
88 char **line,
89 size_t *length,
90 size_t lcook_width)
91{
92 t_bool ctrl_L = FALSE;
93#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
94 wchar_t *wline = char2wchar_t(*line);
95 size_t wlen;
96
97 /*
98 * remove the assert() before release
99 * it should help us find problems with wide-char strings
100 * in the development branch
101 */
102 assert(wline != NULL);
103 wlen = wcslen(wline);
104 ctrl_L = wexpand_ctrl_chars(&wline, &wlen, lcook_width);
105 free(*line);
106 *line = wchar_t2char(wline);
107 free(wline);
108 assert(line != NULL);
109 *length = strlen(*line);
110#else
111 int curr_len = LEN;
112 unsigned int i = 0, j, ln = 0;
113 char *buf = my_malloc(curr_len);
114 unsigned char *c;
115
116 c = (unsigned char *) *line;
117 while (*c) {
118 if (i > curr_len - (lcook_width + 1)) {
119 curr_len <<= 1;
120 buf = my_realloc(buf, curr_len);
121 }
122 if (*c == '\n')
123 ln = i + 1;
124 if (*c == '\t') { /* expand tabs */
125 j = i + lcook_width - ((i - ln) % lcook_width);
126 for (; i < j; i++)
127 buf[i] = ' ';
128 } else if (((*c) & 0xFF) < ' ' && *c != '\n' && (!IS_LOCAL_CHARSET("Big5") || *c != 27)) { /* literal ctrl chars */
129 buf[i++] = '^';
130 buf[i++] = ((*c) & 0xFF) + '@';
131 if (*c == '\f') /* ^L detected */
132 ctrl_L = TRUE;
133 } else {
134 if (!my_isprint(*c) && *c != '\n')
135 buf[i++] = '?';
136 else
137 buf[i++] = *c;
138 }
139 c++;
140 }
141 buf[i] = '\0';
142 *length = i + 1;
143 *line = my_realloc(*line, *length);
144 strcpy(*line, buf);
145 free(buf);
146#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
147 return ctrl_L;
148}
149
150
151#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
152static t_bool
153wexpand_ctrl_chars(
154 wchar_t **wline,
155 size_t *length,
156 size_t lcook_width)
157{
158 size_t cur_len = LEN, i = 0, j, ln = 0;
159 wchar_t *wbuf = my_malloc(cur_len * sizeof(wchar_t));
160 wchar_t *wc;
161 t_bool ctrl_L = FALSE;
162
163 wc = *wline;
164 while (*wc) {
165 if (i > cur_len - (lcook_width + 1)) {
166 cur_len <<= 1;
167 wbuf = my_realloc(wbuf, cur_len * sizeof(wchar_t));
168 }
169 if (*wc == '\n')
170 ln = i + 1;
171 if (*wc == '\t') { /* expand_tabs */
172 j = i + lcook_width - ((i - ln) % lcook_width);
173 for (; i < j; i++)
174 wbuf[i] = ' ';
175 } else if (*wc < ' ' && *wc != '\n' && (!IS_LOCAL_CHARSET("Big5") || *wc != 27)) { /* literal ctrl chars */
176 wbuf[i++] = '^';
177 wbuf[i++] = *wc + '@';
178 if (*wc == '\f') /* ^L detected */
179 ctrl_L = TRUE;
180 } else {
181 if (!iswprint((wint_t) *wc) && *wc != '\n')
182 wbuf[i++] = '?';
183 else
184 wbuf[i++] = *wc;
185 }
186 wc++;
187 }
188 wbuf[i] = '\0';
189 *length = i + 1;
190 *wline = my_realloc(*wline, *length * sizeof(wchar_t));
191 wcscpy(*wline, wbuf);
192 free(wbuf);
193 return ctrl_L;
194}
195#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
196
197
198/*
199 * Output text to the cooked stream. Wrap lines as necessary.
200 * Update the line count and the array of line offsets
201 * Extend the lineoffset array as needed in CHUNK amounts.
202 * flags are 'hints' to the pager about line content.
203 * buf_len is the size put_cooked should use for its buffer.
204 */
205static void
207 size_t buf_len,
208 t_bool wrap_lines,
209 int flags,
210 const char *fmt,
211 ...)
212{
213 char *p, *bufp, *buf;
214 int wrap_column;
215 int space;
216 static int saved_flags = 0;
217 va_list ap;
218#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
219 int bytes;
220 wint_t *wp;
221#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
222
223 buf = my_malloc(buf_len + 1);
224
225 va_start(ap, fmt);
226 vsnprintf(buf, buf_len + 1, fmt, ap);
227
228 if (tinrc.wrap_column < 0)
229 wrap_column = ((tinrc.wrap_column > -cCOLS) ? cCOLS + tinrc.wrap_column : cCOLS);
230 else
231#if 1
232 wrap_column = ((tinrc.wrap_column > 0) ? tinrc.wrap_column : cCOLS);
233#else /* never cut off long lines */
234 wrap_column = (((tinrc.wrap_column > 0) && (tinrc.wrap_column < cCOLS)) ? tinrc.wrap_column : cCOLS);
235#endif /* 1 */
236
237 p = bufp = buf;
238
239#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
240 wp = my_malloc((MB_CUR_MAX + 1) * sizeof(wint_t));
241#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
242
243 while (*p) {
244 if (wrap_lines) {
245 space = wrap_column;
246 while (space > 0 && *p && *p != '\n') {
247#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
248 if ((bytes = mbtowc((wchar_t *) wp, p, MB_CUR_MAX)) > 0) {
249 if ((space -= wcwidth((wchar_t) *wp)) < 0)
250 break;
251 p += bytes;
252 } else
253 p++;
254#else
255 p++;
256 space--;
257#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
258 }
259 } else {
260 while (*p && *p != '\n')
261 p++;
262 }
263 fwrite(bufp, 1, (size_t) (p - bufp), art->cooked);
264 fputs("\n", art->cooked);
265 if (*p == '\n')
266 p++;
267 bufp = p;
268
269 if (art->cooked_lines == 0) {
270 art->cookl = my_malloc(sizeof(t_lineinfo) * CHUNK);
271 art->cookl[0].offset = 0;
272 }
273
274 /*
275 * Pick up flags from a previous partial write
276 */
277 art->cookl[art->cooked_lines].flags = flags | saved_flags;
278 saved_flags = 0;
279 art->cooked_lines++;
280
281 /*
282 * Grow the array of lines if needed - we resize it properly at the end
283 */
284 if (art->cooked_lines % CHUNK == 0)
285 art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * CHUNK * (size_t) ((art->cooked_lines / CHUNK) + 1));
286
288 }
289
290#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
291 free(wp);
292#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
293
294 /*
295 * If there is anything left over, then it must be a non \n terminated
296 * partial line from base64 decoding etc.. Dump it now and the rest of
297 * the line (with the \n) will fill in the t_lineinfo
298 * We must save the flags now as the rest of the line may not have the same properties
299 * We need to keep the length for accounting purposes
300 */
301 if (*bufp != '\0') {
302 fputs(bufp, art->cooked);
303 saved_flags = flags;
304 }
305
306 va_end(ap);
307 free(buf);
308}
309
310
311/*
312 * Add a new uuencode attachment description to the current part
313 */
314static t_part *
316 t_part **part,
317 char *name)
318{
319 t_part *ptr = new_part((*part)->uue);
320
321 if (!(*part)->uue) /* new_part() is simple and doesn't attach list heads */
322 (*part)->uue = ptr;
323
324 free_list(ptr->params);
325 /*
326 * Load the name into the parameter list
327 */
328 ptr->params = new_params();
329 ptr->params->name = my_strdup("name");
331
332 ptr->encoding = ENCODING_UUE; /* treat as x-uuencode */
333
334 ptr->offset = ftell(art->cooked);
335 ptr->depth = (*part)->depth; /* uue is at the same depth as the envelope */
336
337 /*
338 * If an extension is present, try and add a Content-Type
339 */
340 if ((name = strrchr(name, '.')) != NULL)
341 lookup_mimetype(name + 1, ptr);
342
343 return ptr;
344}
345
346
347/*
348 * Get the suggested filename for an attachment. RFC says Content-Disposition
349 * 'filename' supersedes Content-Type 'name'. We must also remove path
350 * information.
351 */
352const char *
354 t_param *ptr)
355{
356 const char *name;
357 char *p;
358
359 if (!(name = get_param(ptr, "filename"))) {
360 if (!(name = get_param(ptr, "name")))
361 return NULL;
362 }
363
364 if ((p = strrchr(name, DIRSEP)))
365 return p + 1;
366
367 return name;
368}
369
370
371#define PUT_UUE(part, qualifier_text) \
372 put_cooked(LEN, wrap_lines, C_UUE, _(txt_uue), \
373 part->depth ? (part->depth - 1) * 4 : 0, "", \
374 content_types[part->type], part->subtype, \
375 qualifier_text, part->line_count, get_filename(part->params))
376
377#define PUT_ATTACH(part, depth, name, charset) \
378 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach), \
379 depth, "", \
380 content_types[part->type], part->subtype, \
381 content_encodings[part->encoding], \
382 charset ? _(txt_attach_charset) : "", BlankIfNull(charset), \
383 part->line_count, \
384 name ? _(txt_name) : "", BlankIfNull(name)); \
385 \
386 if (part->description) \
387 put_cooked(LEN, wrap_lines, C_ATTACH, \
388 _(txt_attach_description), \
389 depth, "", \
390 part->description); \
391 if (part->next != NULL || IS_PLAINTEXT(part)) \
392 put_cooked(1, wrap_lines, C_ATTACH, "\n")
393
394/*
395 * Decodes text bodies, remove sig's, detects uuencoded sections
396 */
397static void
399 t_bool wrap_lines,
400 FILE *in,
401 t_part *part,
402 int hide_uue)
403{
404 char *rest = NULL;
405 char *line = NULL, *buf, *tmpline;
406 const char *ncharset;
407 size_t max_line_len = 0;
408 int flags, len, lines_left, len_blank;
409 int offsets[6];
410 int size_offsets = ARRAY_SIZE(offsets);
411 unsigned int lines_skipped = 0;
412 t_bool in_sig = FALSE; /* Set when in sig portion */
413 t_bool in_uue = FALSE; /* Set when in uuencoded section */
414 t_bool in_verbatim = FALSE; /* Set when in verbatim section */
415 t_bool verbatim_begin = FALSE; /* Set when verbatim_begin_regex matches */
416 t_bool is_uubody; /* Set when current line looks like a uuencoded line */
417 t_bool first_line_blank = TRUE; /* Unset when first non-blank line is reached */
418 t_bool put_blank_lines = FALSE; /* Set when previously skipped lines needs to put */
419 t_part *curruue = NULL;
420
421 if (part->uue) { /* These are redone each time we recook/resize etc.. */
423 part->uue = NULL;
424 }
425
426 fseek(in, part->offset, SEEK_SET);
427
429 (void) mmdecode(NULL, 'b', 0, NULL); /* flush */
430
431 lines_left = part->line_count;
432 while ((lines_left > 0) || rest) {
433 switch (part->encoding) {
434 case ENCODING_BASE64:
435 lines_left -= read_decoded_base64_line(in, &line, &max_line_len, lines_left, &rest);
436 break;
437
438 case ENCODING_QP:
439 lines_left -= read_decoded_qp_line(in, &line, &max_line_len, lines_left);
440 break;
441
442 default:
443 if ((buf = tin_fgets(in, FALSE)) == NULL) {
444 FreeAndNull(line);
445 break;
446 }
447
448 /*
449 * tin_fgets() uses the returned space also internally
450 * so it's not advisable to use it for our own purposes
451 * especially if we must resize it.
452 * So copy buf to line (and resize line if necessary).
453 */
454 if (max_line_len < strlen(buf) + 2 || !line) {
455 max_line_len = strlen(buf) + 2;
456 line = my_realloc(line, max_line_len);
457 }
458 strcpy(line, buf);
459
460 /*
461 * FIXME: Some code in cook.c expects a '\n' at the end
462 * of the line. As tin_fgets() strips trailing '\n', re-add it.
463 * This should probably be fixed in that other code.
464 */
465 strcat(line, "\n");
466
467 lines_left--;
468 break;
469 }
470 if (!(line && strlen(line))) {
471 FreeIfNeeded(rest);
472 break; /* premature end of file, file error etc. */
473 }
474
475 /* convert network to local charset, tex2iso, iso2asc etc. */
476 ncharset = get_param(part->params, "charset");
477 process_charsets(&line, &max_line_len, ncharset ? ncharset : "US-ASCII", tinrc.mm_local_charset, curr_group->attribute->tex2iso_conv && art->tex2iso);
478
479#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
480 if (IS_LOCAL_CHARSET("UTF-8"))
481 utf8_valid(line);
482#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
483
484 len = (int) strlen(line);
485
486 /*
487 * trim article body and sig (not verbatim blocks):
488 * - skip leading blank lines
489 * - replace multiple blank lines with one empty line
490 * - skip tailing blank lines, keep one if an
491 * attachment follows
492 */
493 if (curr_group->attribute->trim_article_body && !in_uue && !in_verbatim && !verbatim_begin) {
494 len_blank = 1;
495 tmpline = line;
496 /* check if line contains only whitespace */
497 while ((*tmpline == ' ') || (*tmpline == '\t')) {
498 len_blank++;
499 tmpline++;
500 }
501 if (len_blank == len) { /* line is blank */
502 if (lines_left == 0 && (curr_group->attribute->trim_article_body & SKIP_TRAILING)) {
503 if (!(part->next == NULL || (STRIP_ALTERNATIVE(art) && !IS_PLAINTEXT(part->next))))
504 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
505 continue;
506 }
507 if (first_line_blank) {
509 continue;
511 lines_skipped++;
512 if (lines_left == 0 && !(curr_group->attribute->trim_article_body & SKIP_TRAILING)) {
513 for (; lines_skipped > 0; lines_skipped--)
514 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
515 }
516 continue;
517 }
518 } else { /* line is not blank */
519 if (first_line_blank)
520 first_line_blank = FALSE;
521 if (lines_skipped && (!in_sig || curr_group->attribute->show_signatures)) {
522 if (strcmp(line, SIGDASHES) != 0 || curr_group->attribute->show_signatures) {
524 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
525 else
526 put_blank_lines = TRUE;
528 put_blank_lines = TRUE;
529 if (put_blank_lines) {
530 for (; lines_skipped > 0; lines_skipped--)
531 put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
532 }
533 put_blank_lines = FALSE;
534 lines_skipped = 0;
535 }
536 }
537 } /* if (tinrc.trim_article_body...) */
538
539 /* look for verbatim marks, set in_verbatim only for lines in between */
541 if (verbatim_begin) {
542 in_verbatim = TRUE;
543 verbatim_begin = FALSE;
544 } else if (!in_sig && !in_uue && !in_verbatim && MATCH_REGEX(verbatim_begin_regex, line, len))
545 verbatim_begin = TRUE;
546 if (in_verbatim && MATCH_REGEX(verbatim_end_regex, line, len))
547 in_verbatim = FALSE;
548 }
549
550 if (!in_verbatim) {
551 /*
552 * Detect and skip signatures if necessary
553 */
554 if (!in_sig) {
555 if (strcmp(line, SIGDASHES) == 0) {
556 in_sig = TRUE;
557 if (in_uue) {
558 in_uue = FALSE;
559 if (hide_uue)
560 PUT_UUE(curruue, _(txt_incomplete));
561 }
562 }
563 }
564
565 if (in_sig && !(curr_group->attribute->show_signatures))
566 continue; /* No further processing needed */
567
568 /*
569 * Detect and process uuencoded sections
570 * Look for the start or the end of a uuencoded section
571 *
572 * TODO: look for a tailing size line after end (non standard
573 * extension)?
574 */
575 if (pcre_exec(uubegin_regex.re, uubegin_regex.extra, line, len, 0, 0, offsets, size_offsets) != PCRE_ERROR_NOMATCH) {
576 in_uue = TRUE;
577 curruue = new_uue(&part, line + offsets[1]);
578 if (hide_uue)
579 continue; /* Don't cook the 'begin' line */
580 } else if (strncmp(line, "end\n", 4) == 0) {
581 if (in_uue) {
582 in_uue = FALSE;
583 if (hide_uue) {
584 PUT_UUE(curruue, "");
585 continue; /* Don't cook the 'end' line */
586 }
587 }
588 }
589
590 /*
591 * See if this line looks like a uuencoded 'body' line
592 */
593 is_uubody = FALSE;
594
595 if (MATCH_REGEX(uubody_regex, line, len)) {
596 int sum = (((*line) - ' ') & 077) * 4 / 3; /* uuencode octet checksum */
597
598 /* sum = 0 in a uubody only on the last line, a single ` */
599 if (sum == 0 && len == 1 + 1) /* +1 for the \n */
600 is_uubody = TRUE;
601 else if (len == sum + 1 + 1)
602 is_uubody = TRUE;
603#ifdef DEBUG_ART
604 if (debug & DEBUG_MISC)
605 fprintf(stderr, "%s sum=%d len=%d (%s)\n", bool_unparse(is_uubody), sum, len, line);
606#endif /* DEBUG_ART */
607 }
608
609 if (in_uue) {
610 if (is_uubody)
611 curruue->line_count++;
612 else {
613 if (line[0] == '\n') { /* Blank line in a uubody - definitely a failure */
614 /* fprintf(stderr, "not a uue line while reading a uue body?\n"); */
615 in_uue = FALSE;
616 if (hide_uue)
617 /* don't continue here, so we see the line that 'broke' in_uue */
618 PUT_UUE(curruue, _(txt_incomplete));
619 }
620 }
621 } else {
622 /*
623 * UUE_ALL = 'Try harder' - we never saw a begin line, but useful
624 * when uue sections are split across > 1 article
625 */
626 if (is_uubody && hide_uue == UUE_ALL) {
627 char name[] = N_("(unknown)");
628
629 curruue = new_uue(&part, name);
630 curruue->line_count++;
631 in_uue = TRUE;
632 continue;
633 }
634 }
635
636 /*
637 * Skip output if we're hiding uue or the sig
638 */
639 if (in_uue && hide_uue)
640 continue; /* No further processing needed */
641 }
642
643 flags = in_verbatim ? C_VERBATIM : in_sig ? C_SIG : C_BODY;
644
645 /*
646 * Don't do any further handling of uue || verbatim lines
647 */
648 if (in_uue) {
649 put_cooked(max_line_len, wrap_lines, flags, "%s", line);
650 continue;
651 } else if (in_verbatim) {
652 expand_ctrl_chars(&line, &max_line_len, 8);
653 put_cooked(max_line_len, wrap_lines, flags, "%s", line);
654 continue;
655 }
656
657#ifdef HAVE_COLOR
658 /* keep order in sync with color.c:draw_pager_line() */
659 if (quote_regex3.re) {
660 if (MATCH_REGEX(quote_regex3, line, len))
661 flags |= C_QUOTE3;
662 else if (quote_regex2.re) {
663 if (MATCH_REGEX(quote_regex2, line, len))
664 flags |= C_QUOTE2;
665 else if (curr_group->attribute->extquote_handling && extquote_regex.re) {
666 if (MATCH_REGEX(extquote_regex, line, len))
667 flags |= C_EXTQUOTE;
668 else if (quote_regex.re) {
669 if (MATCH_REGEX(quote_regex, line, len))
670 flags |= C_QUOTE1;
671 }
672 } else if (quote_regex.re) {
673 if (MATCH_REGEX(quote_regex, line, len))
674 flags |= C_QUOTE1;
675 }
676 }
677 }
678#endif /* HAVE_COLOR */
679
680 if (MATCH_REGEX(url_regex, line, len))
681 flags |= C_URL;
682 if (MATCH_REGEX(mail_regex, line, len))
683 flags |= C_MAIL;
684 if (MATCH_REGEX(news_regex, line, len))
685 flags |= C_NEWS;
686
687 if (expand_ctrl_chars(&line, &max_line_len, tabwidth))
688 flags |= C_CTRLL; /* Line contains form-feed */
689
690 buf = line;
691
692 /*
693 * Skip over the first space in case of Format=Flowed (space-stuffing)
694 */
695 if (part->format == FORMAT_FLOWED) {
696 if (line[0] == ' ')
697 ++buf;
698 }
699
700 put_cooked(max_line_len, wrap_lines && (!IS_LOCAL_CHARSET("Big5")), flags, "%s", buf);
701 } /* while */
702
703 /*
704 * Were we reading uue and ran off the end ?
705 */
706 if (in_uue && hide_uue)
707 PUT_UUE(curruue, _(txt_incomplete));
708
709 free(line);
710}
711
712
713/*
714 * Return TRUE if this header should be printed as per
715 * news_headers_to_[not_]display
716 */
717static t_bool
719 const char *line)
720{
721 int i;
722 t_bool ret = FALSE;
723
725 ret = TRUE; /* wild do */
726 else {
727 for (i = 0; i < curr_group->attribute->headers_to_display->num; i++) {
729 ret = TRUE;
730 break;
731 }
732 }
733 }
734
736 ret = FALSE; /* wild don't: doesn't make sense! */
737 else {
738 for (i = 0; i < curr_group->attribute->headers_to_not_display->num; i++) {
740 ret = FALSE;
741 break;
742 }
743 }
744 }
745
746 return ret;
747}
748
749
750/* #define DEBUG_ART 1 */
751#ifdef DEBUG_ART
752static void
753dump_cooked(
754 void)
755{
756 char *line;
757 int i;
758
759 for (i = 0; i < art->cooked_lines; i++) {
760 fseek(art->cooked, art->cookl[i].offset, SEEK_SET);
761 line = tin_fgets(art->cooked, FALSE);
762 fprintf(stderr, "[%3d] %4ld %3x [%s]\n", i, art->cookl[i].offset, art->cookl[i].flags, line);
763 }
764 fprintf(stderr, "%d lines cooked\n", art->cooked_lines);
765}
766#endif /* DEBUG_ART */
767
768
769/*
770 * Check for charsets which may contain NULL bytes and thus break string
771 * functions. Possibly incomplete.
772 *
773 * TODO: fix the other code to handle those charsets properly.
774 */
775static t_bool
777 const char *charset)
778{
779 static const char *charsets[] = {
780 "csUnicode", /* alias for ISO-10646-UCS-2 */
781 "csUCS4", /* alias for ISO-10646-UCS-4 */
782 "ISO-10646-UCS-2",
783 "ISO-10646-UCS-4",
784 "UTF-16", /* covers also BE/LE */
785 "UTF-32", /* covers also BE/LE */
786 NULL };
787 const char **charsetptr = charsets;
788 t_bool ret = FALSE;
789
790 if (!charset)
791 return ret;
792
793 do {
794 if (!strncasecmp(charset, *charsetptr, strlen(*charsetptr)))
795 ret = TRUE;
796 } while (!ret && *(++charsetptr) != NULL);
797
798 return ret;
799}
800
801
802/*
803 * 'cooks' an article, ie, prepare what will actually appear on the screen
804 * It is not easy to do this in the same pass as the initial read since
805 * boundary conditions for multipart articles make it harder to do on the
806 * fly decoding.
807 * We could have cooked the headers whilst they were being read but we're
808 * trying to keep this simple.
809 *
810 * Expects:
811 * Fresh article context to write into
812 * parse_uue is set only when the art is opened to create t_parts for
813 * uue sections found, when resizing this is not needed
814 * hide_uue determines the folding of uue sections
815 * Handles:
816 * multipart articles
817 * stripping of non text sections if skip_alternative
818 * Q and B decoding of text sections
819 * handling of uuencoded sections
820 * stripping of sigs if !show_signatures
821 * Returns:
822 * TRUE on success
823 *
824 * TODO:
825 * give an error-message on at least disk-full
826 */
827t_bool
829 t_bool wrap_lines,
830 t_openartinfo *artinfo,
831 int hide_uue,
833{
834 const char *charset;
835 const char *name;
836 char *line;
837 struct t_header *hdr = &artinfo->hdr;
838 t_bool header_put = FALSE;
839 static const char *struct_header[] = {
840 "Approved: ", "From: ", "Originator: ",
841 "Reply-To: ", "Sender: ", "X-Cancelled-By: ", "X-Comment-To: ",
842 "X-Submissions-To: ", "To: ", "Cc: ", "Bcc: ", "X-Originator: ", NULL };
843
844 art = artinfo; /* Global saves lots of passing artinfo around */
845
846 if (!(art->cooked = tmpfile()))
847 return FALSE;
848
849 art->cooked_lines = 0;
850
851 rewind(artinfo->raw);
852
853 /*
854 * Put down just the headers we want
855 */
856 while ((line = tin_fgets(artinfo->raw, TRUE)) != NULL) {
857 if (line[0] == '\0') { /* End of headers? */
858 if (STRIP_ALTERNATIVE(artinfo)) {
860 header_put = TRUE;
862 }
863 }
864 if (header_put)
865 put_cooked(1, TRUE, 0, "\n"); /* put a newline after headers */
866 break;
867 }
868
869 if (show_all_headers || header_wanted(line)) { /* Put cooked data */
870 const char **strptr = struct_header;
871 char *l = NULL, *ptr, *foo, *bar;
872 size_t i = LEN;
873 t_bool found = FALSE;
874
875 /* structured headers */
876 do {
877 if (!strncasecmp(line, *strptr, strlen(*strptr))) {
878 foo = my_strdup(*strptr);
879 if ((ptr = strchr(foo, ':'))) {
880 *ptr = '\0';
881 unfold_header(line);
882 if ((ptr = parse_header(line, foo, TRUE, TRUE, FALSE))) {
883#if 0
884 /*
885 * TODO:
886 * idna_decode() currently expects just a FQDN
887 * or a mailaddress (with all comments stripped).
888 *
889 * we need to look for something like
890 * (?i)((?:\S+\.)?xn--[a-z0-9\.\-]{3,}\S+)\b
891 * and just decode $1
892 * maybe also in process_text_body_part()
893 */
894 bar = idna_decode(ptr);
895#else
896 bar = my_strdup(ptr);
897#endif /* 0 */
898 l = my_calloc(1, strlen(bar) + strlen(*strptr) + 1);
899 strncpy(l, line, strlen(*strptr));
900 strcat(l, bar);
901 free(bar);
902 }
903 }
904 free(foo);
905 found = TRUE;
906 }
907 } while (!found && *(++strptr) != NULL);
908
909 /* unstructured but must not be decoded */
910 if (l == NULL && (!strncasecmp(line, "References: ", 12) || !strncasecmp(line, "Message-ID: ", 12) || !strncasecmp(line, "Date: ", 6) || !strncasecmp(line, "Newsgroups: ", 12) || !strncasecmp(line, "Distribution: ", 14) || !strncasecmp(line, "Followup-To: ", 13) || !strncasecmp(line, "X-Face: ", 8) || !strncasecmp(line, "Cancel-Lock: ", 13) || !strncasecmp(line, "Cancel-Key: ", 12) || !strncasecmp(line, "Supersedes: ", 12)))
911 l = my_strdup(line);
912
913 if (l == NULL)
914 l = my_strdup(rfc1522_decode(line));
915
916#if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
917 if (IS_LOCAL_CHARSET("UTF-8"))
918 utf8_valid(l);
919#endif /* MULTIBYTE_ABLE && !NO_LOCALE */
920 header_put = TRUE;
922 put_cooked(i, wrap_lines, C_HEADER, "%s", l);
923 free(l);
924 }
925 }
926
927 if (tin_errno != 0)
928 return FALSE;
929
930 /*
931 * Process the attachments in turn, print a neato header, and process/decode
932 * the body if of text type
933 */
934 if (hdr->mime && hdr->ext->type == TYPE_MULTIPART) {
935 t_part *ptr;
936
937 for (ptr = hdr->ext->next; ptr != NULL; ptr = ptr->next) {
938 /*
939 * Ignore non text/plain sections with alternative handling
940 */
941 if (STRIP_ALTERNATIVE(artinfo) && !IS_PLAINTEXT(ptr))
942 continue;
943
944 name = get_filename(ptr->params);
945 if (!strcmp(content_types[ptr->type], "text"))
946 charset = get_param(ptr->params, "charset");
947 else
948 charset = NULL;
949 PUT_ATTACH(ptr, (ptr->depth - 1) * 4, name, charset);
950
951 /* Try to view anything of type text, may need to review this */
952 if (IS_PLAINTEXT(ptr)) {
953 if (charset_unsupported(charset)) {
954 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), (ptr->depth - 1) * 4, "", charset);
955 if (ptr->next)
956 put_cooked(1, wrap_lines, C_ATTACH, "\n");
957 } else
958 process_text_body_part(wrap_lines, artinfo->raw, ptr, hide_uue);
959 }
960 }
961 } else {
962 if (!strcmp(content_types[hdr->ext->type], "text"))
963 charset = get_param(hdr->ext->params, "charset");
964 else
965 charset = NULL;
966 /*
967 * A regular single-body article
968 */
969 if (IS_PLAINTEXT(hdr->ext)) {
970 if (charset_unsupported(charset))
971 put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), 0, "", charset);
972 else
973 process_text_body_part(wrap_lines, artinfo->raw, hdr->ext, hide_uue);
974 } else {
975 /*
976 * Non-textual main body
977 */
978 name = get_filename(hdr->ext->params);
979 PUT_ATTACH(hdr->ext, 0, name, charset);
980 }
981 }
982
983#ifdef DEBUG_ART
984 dump_cooked();
985#endif /* DEBUG_ART */
986
987 if (art->cooked_lines > 0)
988 art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * (size_t) art->cooked_lines);
989
990 rewind(art->cooked);
991 return (tin_errno != 0) ? FALSE : TRUE;
992}
unsigned t_bool
Definition: bool.h:77
#define TRUE
Definition: bool.h:74
#define FALSE
Definition: bool.h:70
#define bool_unparse(b)
Definition: bool.h:83
static t_openartinfo * art
Definition: cook.c:78
#define CHUNK
Definition: cook.c:51
static t_part * new_uue(t_part **part, char *name)
Definition: cook.c:315
t_bool cook_article(t_bool wrap_lines, t_openartinfo *artinfo, int hide_uue, t_bool show_all_headers)
Definition: cook.c:828
const char * get_filename(t_param *ptr)
Definition: cook.c:353
static t_bool header_wanted(const char *line)
Definition: cook.c:718
static void process_text_body_part(t_bool wrap_lines, FILE *in, t_part *part, int hide_uue)
Definition: cook.c:398
#define PUT_UUE(part, qualifier_text)
Definition: cook.c:371
t_bool expand_ctrl_chars(char **line, size_t *length, size_t lcook_width)
Definition: cook.c:87
static void put_cooked(size_t buf_len, t_bool wrap_lines, int flags, const char *fmt,...)
Definition: cook.c:206
#define PUT_ATTACH(part, depth, name, charset)
Definition: cook.c:377
static t_bool charset_unsupported(const char *charset)
Definition: cook.c:776
#define STRIP_ALTERNATIVE(x)
Definition: cook.c:53
#define MATCH_REGEX(x, y, z)
Definition: cook.c:58
#define DEBUG_MISC
Definition: debug.h:54
constext txt_incomplete[]
Definition: lang.c:565
struct regex_cache verbatim_begin_regex
struct regex_cache uubody_regex
int tin_errno
Definition: read.c:59
size_t tabwidth
Definition: page.c:73
struct regex_cache uubegin_regex
constext txt_info_x_conversion_note[]
Definition: lang.c:561
constext txt_attach_unsup_charset[]
Definition: lang.c:81
struct regex_cache url_regex
struct regex_cache verbatim_end_regex
struct regex_cache news_regex
int cCOLS
Definition: curses.c:53
struct regex_cache mail_regex
constext * content_types[]
Definition: lang.c:1466
struct t_group * curr_group
Definition: group.c:55
struct t_config tinrc
Definition: init.c:192
unsigned short debug
Definition: debug.c:51
static char buf[16]
Definition: langinfo.c:50
static t_bool show_all_headers
Definition: page.c:88
static int hide_uue
Definition: page.c:80
t_part * new_part(t_part *part)
Definition: rfc2046.c:792
char * idna_decode(char *in)
Definition: misc.c:3810
int mmdecode(const char *what, int encoding, int delimiter, char *where)
Definition: rfc2047.c:147
char * parse_header(char *buf, const char *pat, t_bool decode, t_bool structured, t_bool keep_tab)
Definition: rfc2046.c:908
FILE * tmpfile(void)
Definition: tmpfile.c:53
char * str_trim(char *string)
Definition: string.c:539
void process_charsets(char **line, size_t *max_line_len, const char *network_charset, const char *local_charset, t_bool conv_tex2iso)
Definition: misc.c:2656
void free_list(t_param *list)
Definition: rfc2046.c:549
const char * get_param(t_param *list, const char *name)
Definition: rfc2046.c:568
void lookup_mimetype(const char *ext, t_part *part)
Definition: mimetypes.c:105
char * my_strdup(const char *str)
Definition: string.c:139
t_param * new_params(void)
Definition: rfc2046.c:527
void free_parts(t_part *ptr)
Definition: rfc2046.c:846
char * tin_fgets(FILE *fp, t_bool header)
Definition: read.c:317
void unfold_header(char *line)
Definition: rfc2046.c:1148
int read_decoded_qp_line(FILE *file, char **line, size_t *max_line_len, const int max_lines_to_read)
Definition: rfc2045.c:436
int strncasecmp(const char *p, const char *q, size_t n)
Definition: string.c:491
int read_decoded_base64_line(FILE *file, char **line, size_t *max_line_len, const int max_lines_to_read, char **rest)
Definition: rfc2045.c:323
int my_isprint(int c)
Definition: misc.c:978
char * rfc1522_decode(const char *s)
Definition: rfc2047.c:232
#define C_NEWS
Definition: rfc2046.h:165
#define C_URL
Definition: rfc2046.h:163
#define C_CTRLL
Definition: rfc2046.h:166
#define FORMAT_FLOWED
Definition: rfc2046.h:70
#define C_MAIL
Definition: rfc2046.h:164
#define C_BODY
Definition: rfc2046.h:153
#define C_SIG
Definition: rfc2046.h:154
#define C_VERBATIM
Definition: rfc2046.h:167
#define C_QUOTE1
Definition: rfc2046.h:159
#define ENCODING_BASE64
Definition: rfc2046.h:57
#define C_HEADER
Definition: rfc2046.h:152
#define C_QUOTE3
Definition: rfc2046.h:161
#define TYPE_MULTIPART
Definition: rfc2046.h:48
#define C_ATTACH
Definition: rfc2046.h:155
#define ENCODING_QP
Definition: rfc2046.h:56
#define ENCODING_UUE
Definition: rfc2046.h:60
#define C_QUOTE2
Definition: rfc2046.h:160
const char * name
Definition: signal.c:117
int flags
Definition: rfc2046.h:176
long offset
Definition: rfc2046.h:175
t_lineinfo * cookl
Definition: rfc2046.h:191
t_bool tex2iso
Definition: rfc2046.h:186
int cooked_lines
Definition: rfc2046.h:187
FILE * cooked
Definition: rfc2046.h:189
struct t_header hdr
Definition: rfc2046.h:185
FILE * raw
Definition: rfc2046.h:188
Definition: rfc2046.h:77
char * name
Definition: rfc2046.h:78
char * value
Definition: rfc2046.h:79
Definition: rfc2046.h:93
long offset
Definition: rfc2046.h:103
unsigned type
Definition: rfc2046.h:94
int line_count
Definition: rfc2046.h:104
struct part * uue
Definition: rfc2046.h:106
int depth
Definition: rfc2046.h:105
unsigned format
Definition: rfc2046.h:96
t_param * params
Definition: rfc2046.h:102
unsigned encoding
Definition: rfc2046.h:95
struct part * next
Definition: rfc2046.h:107
pcre_extra * extra
Definition: tin.h:1963
pcre * re
Definition: tin.h:1962
unsigned show_signatures
Definition: tin.h:1681
struct t_newsheader * headers_to_display
Definition: tin.h:1642
unsigned verbatim_handling
Definition: tin.h:1689
unsigned trim_article_body
Definition: tin.h:1682
struct t_newsheader * headers_to_not_display
Definition: tin.h:1643
unsigned tex2iso_conv
Definition: tin.h:1702
char mm_local_charset[LEN]
Definition: tinrc.h:115
int wrap_column
Definition: tinrc.h:207
struct t_attribute * attribute
Definition: tin.h:1834
t_part * ext
Definition: rfc2046.h:146
t_bool mime
Definition: rfc2046.h:145
int num
Definition: tin.h:1581
char ** header
Definition: tin.h:1580
#define vsnprintf
Definition: tin.h:2467
#define LEN
Definition: tin.h:860
#define SEEK_SET
Definition: tin.h:2512
#define IS_LOCAL_CHARSET(c)
Definition: tin.h:782
#define DIRSEP
Definition: tin.h:2154
#define SKIP_LEADING
Definition: tin.h:981
#define my_malloc(size)
Definition: tin.h:2245
#define N_(Str)
Definition: tin.h:82
#define ARRAY_SIZE(array)
Definition: tin.h:2250
#define FreeIfNeeded(p)
Definition: tin.h:2252
#define _(Text)
Definition: tin.h:94
#define IS_PLAINTEXT(x)
Definition: tin.h:1036
#define COMPACT_MULTIPLE
Definition: tin.h:983
#define FreeAndNull(p)
Definition: tin.h:2253
#define SIGDASHES
Definition: tin.h:749
#define my_realloc(ptr, size)
Definition: tin.h:2247
#define UUE_ALL
Definition: tin.h:1251
#define assert(p)
Definition: tin.h:1320
#define my_calloc(nmemb, size)
Definition: tin.h:2246
#define SKIP_TRAILING
Definition: tin.h:982