tin  2.4.4
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.4.4.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

cook.c
Go to the documentation of this file.
1 /*
2  * Project : tin - a Usenet reader
3  * Module : cook.c
4  * Author : J. Faultless
5  * Created : 2000-03-08
6  * Updated : 2019-02-03
7  * Notes : Split from page.c
8  *
9  * Copyright (c) 2000-2020 Jason Faultless <jason@altarstone.com>
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  *
16  * 1. Redistributions of source code must retain the above copyright notice,
17  * this list of conditions and the following disclaimer.
18  *
19  * 2. Redistributions in binary form must reproduce the above copyright
20  * notice, this list of conditions and the following disclaimer in the
21  * documentation and/or other materials provided with the distribution.
22  *
23  * 3. Neither the name of the copyright holder nor the names of its
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 #ifndef TIN_H
41 # include "tin.h"
42 #endif /* !TIN_H */
43 #ifndef TCURSES_H
44 # include "tcurses.h"
45 #endif /* !TCURSES_H */
46 
47 
48 /*
49  * We malloc() this many t_lineinfo's at a time
50  */
51 #define CHUNK 50
52 
53 #define STRIP_ALTERNATIVE(x) \
54  (curr_group->attribute->alternative_handling && \
55  (x)->hdr.ext->type == TYPE_MULTIPART && \
56  strcasecmp("alternative", (x)->hdr.ext->subtype) == 0)
57 
58 #define MATCH_REGEX(x,y,z) (pcre_exec(x.re, x.extra, y, z, 0, 0, NULL, 0) >= 0)
59 
60 
61 static t_bool charset_unsupported(const char *charset);
62 static t_bool header_wanted(const char *line);
63 static t_part *new_uue(t_part **part, char *name);
64 static void process_text_body_part(t_bool wrap_lines, FILE *in, t_part *part, int hide_uue);
65 static void put_cooked(size_t buf_len, t_bool wrap_lines, int flags, const char *fmt, ...);
66 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
67  static t_bool wexpand_ctrl_chars(wchar_t **wline, size_t *length, size_t lcook_width);
68 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
69 #ifdef DEBUG_ART
70  static void dump_cooked(void);
71 #endif /* DEBUG_ART */
72 
73 
74 /*
75  * These are used globally within this module for access to the context
76  * currently being built. They must not leak outside.
77  */
79 
80 
81 /*
82  * Handle backspace, expand tabs, expand control chars to a literal ^[A-Z]
83  * Allows \n through
84  * Return TRUE if line contains a ^L (form-feed)
85  */
86 t_bool
88  char **line,
89  size_t *length,
90  size_t lcook_width)
91 {
92  t_bool ctrl_L = FALSE;
93 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
94  wchar_t *wline = char2wchar_t(*line);
95  size_t wlen;
96 
97  /*
98  * remove the assert() before release
99  * it should help us find problems with wide-char strings
100  * in the development branch
101  */
102  assert(wline != NULL);
103  wlen = wcslen(wline);
104  ctrl_L = wexpand_ctrl_chars(&wline, &wlen, lcook_width);
105  free(*line);
106  *line = wchar_t2char(wline);
107  free(wline);
108  assert(line != NULL);
109  *length = strlen(*line);
110 #else
111  int curr_len = LEN;
112  unsigned int i = 0, j, ln = 0;
113  char *buf = my_malloc(curr_len);
114  unsigned char *c;
115 
116  c = (unsigned char *) *line;
117  while (*c) {
118  if (i > curr_len - (lcook_width + 1)) {
119  curr_len <<= 1;
120  buf = my_realloc(buf, curr_len);
121  }
122  if (*c == '\n')
123  ln = i + 1;
124  if (*c == '\t') { /* expand tabs */
125  j = i + lcook_width - ((i - ln) % lcook_width);
126  for (; i < j; i++)
127  buf[i] = ' ';
128  } else if (((*c) & 0xFF) < ' ' && *c != '\n' && (!IS_LOCAL_CHARSET("Big5") || *c != 27)) { /* literal ctrl chars */
129  buf[i++] = '^';
130  buf[i++] = ((*c) & 0xFF) + '@';
131  if (*c == '\f') /* ^L detected */
132  ctrl_L = TRUE;
133  } else {
134  if (!my_isprint(*c) && *c != '\n')
135  buf[i++] = '?';
136  else
137  buf[i++] = *c;
138  }
139  c++;
140  }
141  buf[i] = '\0';
142  *length = i + 1;
143  *line = my_realloc(*line, *length);
144  strcpy(*line, buf);
145  free(buf);
146 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
147  return ctrl_L;
148 }
149 
150 
151 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
152 static t_bool
153 wexpand_ctrl_chars(
154  wchar_t **wline,
155  size_t *length,
156  size_t lcook_width)
157 {
158  size_t cur_len = LEN, i = 0, j, ln = 0;
159  wchar_t *wbuf = my_malloc(cur_len * sizeof(wchar_t));
160  wchar_t *wc;
161  t_bool ctrl_L = FALSE;
162 
163  wc = *wline;
164  while (*wc) {
165  if (i > cur_len - (lcook_width + 1)) {
166  cur_len <<= 1;
167  wbuf = my_realloc(wbuf, cur_len * sizeof(wchar_t));
168  }
169  if (*wc == '\n')
170  ln = i + 1;
171  if (*wc == '\t') { /* expand_tabs */
172  j = i + lcook_width - ((i - ln) % lcook_width);
173  for (; i < j; i++)
174  wbuf[i] = ' ';
175  } else if (*wc < ' ' && *wc != '\n' && (!IS_LOCAL_CHARSET("Big5") || *wc != 27)) { /* literal ctrl chars */
176  wbuf[i++] = '^';
177  wbuf[i++] = *wc + '@';
178  if (*wc == '\f') /* ^L detected */
179  ctrl_L = TRUE;
180  } else {
181  if (!iswprint((wint_t) *wc) && *wc != '\n')
182  wbuf[i++] = '?';
183  else
184  wbuf[i++] = *wc;
185  }
186  wc++;
187  }
188  wbuf[i] = '\0';
189  *length = i + 1;
190  *wline = my_realloc(*wline, *length * sizeof(wchar_t));
191  wcscpy(*wline, wbuf);
192  free(wbuf);
193  return ctrl_L;
194 }
195 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
196 
197 
198 /*
199  * Output text to the cooked stream. Wrap lines as necessary.
200  * Update the line count and the array of line offsets
201  * Extend the lineoffset array as needed in CHUNK amounts.
202  * flags are 'hints' to the pager about line content.
203  * buf_len is the size put_cooked should use for its buffer.
204  */
205 static void
207  size_t buf_len,
208  t_bool wrap_lines,
209  int flags,
210  const char *fmt,
211  ...)
212 {
213  char *p, *bufp, *buf;
214  int wrap_column;
215  int space;
216  static int saved_flags = 0;
217  va_list ap;
218 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
219  int bytes;
220  wint_t *wp;
221 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
222 
223  buf = my_malloc(buf_len + 1);
224 
225  va_start(ap, fmt);
226  vsnprintf(buf, buf_len + 1, fmt, ap);
227 
228  if (tinrc.wrap_column < 0)
229  wrap_column = ((tinrc.wrap_column > -cCOLS) ? cCOLS + tinrc.wrap_column : cCOLS);
230  else
231 #if 1
232  wrap_column = ((tinrc.wrap_column > 0) ? tinrc.wrap_column : cCOLS);
233 #else /* never cut off long lines */
234  wrap_column = (((tinrc.wrap_column > 0) && (tinrc.wrap_column < cCOLS)) ? tinrc.wrap_column : cCOLS);
235 #endif /* 1 */
236 
237  p = bufp = buf;
238 
239 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
240  wp = my_malloc((MB_CUR_MAX + 1) * sizeof(wint_t));
241 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
242 
243  while (*p) {
244  if (wrap_lines) {
245  space = wrap_column;
246  while (space > 0 && *p && *p != '\n') {
247 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
248  if ((bytes = mbtowc((wchar_t *) wp, p, MB_CUR_MAX)) > 0) {
249  if ((space -= wcwidth(*wp)) < 0)
250  break;
251  p += bytes;
252  } else
253  p++;
254 #else
255  p++;
256  space--;
257 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
258  }
259  } else {
260  while (*p && *p != '\n')
261  p++;
262  }
263  fwrite(bufp, 1, p - bufp, art->cooked);
264  fputs("\n", art->cooked);
265  if (*p == '\n')
266  p++;
267  bufp = p;
268 
269  if (art->cooked_lines == 0) {
270  art->cookl = my_malloc(sizeof(t_lineinfo) * CHUNK);
271  art->cookl[0].offset = 0;
272  }
273 
274  /*
275  * Pick up flags from a previous partial write
276  */
277  art->cookl[art->cooked_lines].flags = flags | saved_flags;
278  saved_flags = 0;
279  art->cooked_lines++;
280 
281  /*
282  * Grow the array of lines if needed - we resize it properly at the end
283  */
284  if (art->cooked_lines % CHUNK == 0)
285  art->cookl = my_realloc(art->cookl, sizeof(t_lineinfo) * CHUNK * ((art->cooked_lines / CHUNK) + 1));
286 
287  art->cookl[art->cooked_lines].offset = ftell(art->cooked);
288  }
289 
290 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
291  free(wp);
292 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
293 
294  /*
295  * If there is anything left over, then it must be a non \n terminated
296  * partial line from base64 decoding etc.. Dump it now and the rest of
297  * the line (with the \n) will fill in the t_lineinfo
298  * We must save the flags now as the rest of the line may not have the same properties
299  * We need to keep the length for accounting purposes
300  */
301  if (*bufp != '\0') {
302  fputs(bufp, art->cooked);
303  saved_flags = flags;
304  }
305 
306  va_end(ap);
307  free(buf);
308 }
309 
310 
311 /*
312  * Add a new uuencode attachment description to the current part
313  */
314 static t_part *
316  t_part **part,
317  char *name)
318 {
319  t_part *ptr = new_part((*part)->uue);
320 
321  if (!(*part)->uue) /* new_part() is simple and doesn't attach list heads */
322  (*part)->uue = ptr;
323 
324  free_list(ptr->params);
325  /*
326  * Load the name into the parameter list
327  */
328  ptr->params = new_params();
329  ptr->params->name = my_strdup("name");
330  ptr->params->value = my_strdup(str_trim(name));
331 
332  ptr->encoding = ENCODING_UUE; /* treat as x-uuencode */
333 
334  ptr->offset = ftell(art->cooked);
335  ptr->depth = (*part)->depth; /* uue is at the same depth as the envelope */
336 
337  /*
338  * If an extension is present, try and add a Content-Type
339  */
340  if ((name = strrchr(name, '.')) != NULL)
341  lookup_mimetype(name + 1, ptr);
342 
343  return ptr;
344 }
345 
346 
347 /*
348  * Get the suggested filename for an attachment. RFC says Content-Disposition
349  * 'filename' supersedes Content-Type 'name'. We must also remove path
350  * information.
351  */
352 const char *
354  t_param *ptr)
355 {
356  const char *name;
357  char *p;
358 
359  if (!(name = get_param(ptr, "filename"))) {
360  if (!(name = get_param(ptr, "name")))
361  return NULL;
362  }
363 
364  if ((p = strrchr(name, DIRSEP)))
365  return p + 1;
366 
367  return name;
368 }
369 
370 
371 #define PUT_UUE(part, qualifier_text) \
372  put_cooked(LEN, wrap_lines, C_UUE, _(txt_uue), \
373  part->depth ? (part->depth - 1) * 4 : 0, "", \
374  content_types[part->type], part->subtype, \
375  qualifier_text, part->line_count, get_filename(part->params))
376 
377 #define PUT_ATTACH(part, depth, name, charset) \
378  put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach), \
379  depth, "", \
380  content_types[part->type], part->subtype, \
381  content_encodings[part->encoding], \
382  charset ? _(txt_attach_charset) : "", BlankIfNull(charset), \
383  part->line_count, \
384  name ? _(txt_name) : "", BlankIfNull(name)); \
385  \
386  if (part->description) \
387  put_cooked(LEN, wrap_lines, C_ATTACH, \
388  _(txt_attach_description), \
389  depth, "", \
390  part->description); \
391  if (part->next != NULL || IS_PLAINTEXT(part)) \
392  put_cooked(1, wrap_lines, C_ATTACH, "\n")
393 
394 /*
395  * Decodes text bodies, remove sig's, detects uuencoded sections
396  */
397 static void
399  t_bool wrap_lines,
400  FILE *in,
401  t_part *part,
402  int hide_uue)
403 {
404  char *rest = NULL;
405  char *line = NULL, *buf, *tmpline;
406  const char *ncharset;
407  size_t max_line_len = 0;
408  int flags, len, lines_left, len_blank;
409  int offsets[6];
410  int size_offsets = ARRAY_SIZE(offsets);
411  unsigned int lines_skipped = 0;
412  t_bool in_sig = FALSE; /* Set when in sig portion */
413  t_bool in_uue = FALSE; /* Set when in uuencoded section */
414  t_bool in_verbatim = FALSE; /* Set when in verbatim section */
415  t_bool verbatim_begin = FALSE; /* Set when verbatim_begin_regex matches */
416  t_bool is_uubody; /* Set when current line looks like a uuencoded line */
417  t_bool first_line_blank = TRUE; /* Unset when first non-blank line is reached */
418  t_bool put_blank_lines = FALSE; /* Set when previously skipped lines needs to put */
419  t_part *curruue = NULL;
420 
421  if (part->uue) { /* These are redone each time we recook/resize etc.. */
422  free_parts(part->uue);
423  part->uue = NULL;
424  }
425 
426  fseek(in, part->offset, SEEK_SET);
427 
428  if (part->encoding == ENCODING_BASE64)
429  (void) mmdecode(NULL, 'b', 0, NULL); /* flush */
430 
431  lines_left = part->line_count;
432  while ((lines_left > 0) || rest) {
433  switch (part->encoding) {
434  case ENCODING_BASE64:
435  lines_left -= read_decoded_base64_line(in, &line, &max_line_len, lines_left, &rest);
436  break;
437 
438  case ENCODING_QP:
439  lines_left -= read_decoded_qp_line(in, &line, &max_line_len, lines_left);
440  break;
441 
442  default:
443  if ((buf = tin_fgets(in, FALSE)) == NULL) {
444  FreeAndNull(line);
445  break;
446  }
447 
448  /*
449  * tin_fgets() uses the returned space also internally
450  * so it's not advisable to use it for our own purposes
451  * especially if we must resize it.
452  * So copy buf to line (and resize line if necessary).
453  */
454  if (max_line_len < strlen(buf) + 2) {
455  max_line_len = strlen(buf) + 2;
456  line = my_realloc(line, max_line_len);
457  }
458  strcpy(line, buf);
459 
460  /*
461  * FIXME: Some code in cook.c expects a '\n' at the end
462  * of the line. As tin_fgets() strips trailing '\n', re-add it.
463  * This should probably be fixed in that other code.
464  */
465  strcat(line, "\n");
466 
467  lines_left--;
468  break;
469  }
470  if (!(line && strlen(line))) {
471  FreeIfNeeded(rest);
472  break; /* premature end of file, file error etc. */
473  }
474 
475  /* convert network to local charset, tex2iso, iso2asc etc. */
476  ncharset = get_param(part->params, "charset");
477  process_charsets(&line, &max_line_len, ncharset ? ncharset : "US-ASCII", tinrc.mm_local_charset, curr_group->attribute->tex2iso_conv && art->tex2iso);
478 
479 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
480  if (IS_LOCAL_CHARSET("UTF-8"))
481  utf8_valid(line);
482 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
483 
484  len = (int) strlen(line);
485 
486  /*
487  * trim article body and sig (not verbatim blocks):
488  * - skip leading blank lines
489  * - replace multiple blank lines with one empty line
490  * - skip tailing blank lines, keep one if an
491  * attachment follows
492  */
493  if (curr_group->attribute->trim_article_body && !in_uue && !in_verbatim && !verbatim_begin) {
494  len_blank = 1;
495  tmpline = line;
496  /* check if line contains only whitespace */
497  while ((*tmpline == ' ') || (*tmpline == '\t')) {
498  len_blank++;
499  tmpline++;
500  }
501  if (len_blank == len) { /* line is blank */
502  if (lines_left == 0 && (curr_group->attribute->trim_article_body & SKIP_TRAILING)) {
503  if (!(part->next == NULL || (STRIP_ALTERNATIVE(art) && !IS_PLAINTEXT(part->next))))
504  put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
505  continue;
506  }
507  if (first_line_blank) {
509  continue;
511  lines_skipped++;
512  if (lines_left == 0 && !(curr_group->attribute->trim_article_body & SKIP_TRAILING)) {
513  for (; lines_skipped > 0; lines_skipped--)
514  put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
515  }
516  continue;
517  }
518  } else { /* line is not blank */
519  if (first_line_blank)
520  first_line_blank = FALSE;
521  if (lines_skipped && (!in_sig || curr_group->attribute->show_signatures)) {
522  if (strcmp(line, SIGDASHES) != 0 || curr_group->attribute->show_signatures) {
524  put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
525  else
526  put_blank_lines = TRUE;
528  put_blank_lines = TRUE;
529  if (put_blank_lines) {
530  for (; lines_skipped > 0; lines_skipped--)
531  put_cooked(1, TRUE, in_sig ? C_SIG : C_BODY, "\n");
532  }
533  put_blank_lines = FALSE;
534  lines_skipped = 0;
535  }
536  }
537  } /* if (tinrc.trim_article_body...) */
538 
539  /* look for verbatim marks, set in_verbatim only for lines in between */
541  if (verbatim_begin) {
542  in_verbatim = TRUE;
543  verbatim_begin = FALSE;
544  } else if (!in_sig && !in_uue && !in_verbatim && MATCH_REGEX(verbatim_begin_regex, line, len))
545  verbatim_begin = TRUE;
546  if (in_verbatim && MATCH_REGEX(verbatim_end_regex, line, len))
547  in_verbatim = FALSE;
548  }
549 
550  if (!in_verbatim) {
551  /*
552  * Detect and skip signatures if necessary
553  */
554  if (!in_sig) {
555  if (strcmp(line, SIGDASHES) == 0) {
556  in_sig = TRUE;
557  if (in_uue) {
558  in_uue = FALSE;
559  if (hide_uue)
560  PUT_UUE(curruue, _(txt_incomplete));
561  }
562  }
563  }
564 
565  if (in_sig && !(curr_group->attribute->show_signatures))
566  continue; /* No further processing needed */
567 
568  /*
569  * Detect and process uuencoded sections
570  * Look for the start or the end of a uuencoded section
571  *
572  * TODO: look for a tailing size line after end (non standard
573  * extension)?
574  */
575  if (pcre_exec(uubegin_regex.re, uubegin_regex.extra, line, len, 0, 0, offsets, size_offsets) != PCRE_ERROR_NOMATCH) {
576  in_uue = TRUE;
577  curruue = new_uue(&part, line + offsets[1]);
578  if (hide_uue)
579  continue; /* Don't cook the 'begin' line */
580  } else if (strncmp(line, "end\n", 4) == 0) {
581  if (in_uue) {
582  in_uue = FALSE;
583  if (hide_uue) {
584  PUT_UUE(curruue, "");
585  continue; /* Don't cook the 'end' line */
586  }
587  }
588  }
589 
590  /*
591  * See if this line looks like a uuencoded 'body' line
592  */
593  is_uubody = FALSE;
594 
595  if (MATCH_REGEX(uubody_regex, line, len)) {
596  int sum = (((*line) - ' ') & 077) * 4 / 3; /* uuencode octet checksum */
597 
598  /* sum = 0 in a uubody only on the last line, a single ` */
599  if (sum == 0 && len == 1 + 1) /* +1 for the \n */
600  is_uubody = TRUE;
601  else if (len == sum + 1 + 1)
602  is_uubody = TRUE;
603 #ifdef DEBUG_ART
604  if (debug & DEBUG_MISC)
605  fprintf(stderr, "%s sum=%d len=%d (%s)\n", bool_unparse(is_uubody), sum, len, line);
606 #endif /* DEBUG_ART */
607  }
608 
609  if (in_uue) {
610  if (is_uubody)
611  curruue->line_count++;
612  else {
613  if (line[0] == '\n') { /* Blank line in a uubody - definitely a failure */
614  /* fprintf(stderr, "not a uue line while reading a uue body?\n"); */
615  in_uue = FALSE;
616  if (hide_uue)
617  /* don't continue here, so we see the line that 'broke' in_uue */
618  PUT_UUE(curruue, _(txt_incomplete));
619  }
620  }
621  } else {
622  /*
623  * UUE_ALL = 'Try harder' - we never saw a begin line, but useful
624  * when uue sections are split across > 1 article
625  */
626  if (is_uubody && hide_uue == UUE_ALL) {
627  char name[] = N_("(unknown)");
628 
629  curruue = new_uue(&part, name);
630  curruue->line_count++;
631  in_uue = TRUE;
632  continue;
633  }
634  }
635 
636  /*
637  * Skip output if we're hiding uue or the sig
638  */
639  if (in_uue && hide_uue)
640  continue; /* No further processing needed */
641  }
642 
643  flags = in_verbatim ? C_VERBATIM : in_sig ? C_SIG : C_BODY;
644 
645  /*
646  * Don't do any further handling of uue || verbatim lines
647  */
648  if (in_uue) {
649  put_cooked(max_line_len, wrap_lines, flags, "%s", line);
650  continue;
651  } else if (in_verbatim) {
652  expand_ctrl_chars(&line, &max_line_len, 8);
653  put_cooked(max_line_len, wrap_lines, flags, "%s", line);
654  continue;
655  }
656 
657 #ifdef HAVE_COLOR
658  /* keep order in sync with color.c:draw_pager_line() */
659  if (quote_regex3.re) {
660  if (MATCH_REGEX(quote_regex3, line, len))
661  flags |= C_QUOTE3;
662  else if (quote_regex2.re) {
663  if (MATCH_REGEX(quote_regex2, line, len))
664  flags |= C_QUOTE2;
665  else if (curr_group->attribute->extquote_handling && extquote_regex.re) {
666  if (MATCH_REGEX(extquote_regex, line, len))
667  flags |= C_EXTQUOTE;
668  else if (quote_regex.re) {
669  if (MATCH_REGEX(quote_regex, line, len))
670  flags |= C_QUOTE1;
671  }
672  } else if (quote_regex.re) {
673  if (MATCH_REGEX(quote_regex, line, len))
674  flags |= C_QUOTE1;
675  }
676  }
677  }
678 #endif /* HAVE_COLOR */
679 
680  if (MATCH_REGEX(url_regex, line, len))
681  flags |= C_URL;
682  if (MATCH_REGEX(mail_regex, line, len))
683  flags |= C_MAIL;
684  if (MATCH_REGEX(news_regex, line, len))
685  flags |= C_NEWS;
686 
687  if (expand_ctrl_chars(&line, &max_line_len, tabwidth))
688  flags |= C_CTRLL; /* Line contains form-feed */
689 
690  buf = line;
691 
692  /*
693  * Skip over the first space in case of Format=Flowed (space-stuffing)
694  */
695  if (part->format == FORMAT_FLOWED) {
696  if (line[0] == ' ')
697  ++buf;
698  }
699 
700  put_cooked(max_line_len, wrap_lines && (!IS_LOCAL_CHARSET("Big5")), flags, "%s", buf);
701  } /* while */
702 
703  /*
704  * Were we reading uue and ran off the end ?
705  */
706  if (in_uue && hide_uue)
707  PUT_UUE(curruue, _(txt_incomplete));
708 
709  free(line);
710 }
711 
712 
713 /*
714  * Return TRUE if this header should be printed as per
715  * news_headers_to_[not_]display
716  */
717 static t_bool
719  const char *line)
720 {
721  int i;
722  t_bool ret = FALSE;
723 
725  ret = TRUE; /* wild do */
726  else {
727  for (i = 0; i < curr_group->attribute->headers_to_display->num; i++) {
729  ret = TRUE;
730  break;
731  }
732  }
733  }
734 
736  ret = FALSE; /* wild don't: doesn't make sense! */
737  else {
738  for (i = 0; i < curr_group->attribute->headers_to_not_display->num; i++) {
740  ret = FALSE;
741  break;
742  }
743  }
744  }
745 
746  return ret;
747 }
748 
749 
750 /* #define DEBUG_ART 1 */
751 #ifdef DEBUG_ART
752 static void
753 dump_cooked(
754  void)
755 {
756  char *line;
757  int i;
758 
759  for (i = 0; i < art->cooked_lines; i++) {
760  fseek(art->cooked, art->cookl[i].offset, SEEK_SET);
761  line = tin_fgets(art->cooked, FALSE);
762  fprintf(stderr, "[%3d] %4ld %3x [%s]\n", i, art->cookl[i].offset, art->cookl[i].flags, line);
763  }
764  fprintf(stderr, "%d lines cooked\n", art->cooked_lines);
765 }
766 #endif /* DEBUG_ART */
767 
768 
769 /*
770  * Check for charsets which may contain NULL bytes and thus break string
771  * functions. Possibly incomplete.
772  *
773  * TODO: fix the other code to handle those charsets properly.
774  */
775 static t_bool
777  const char *charset)
778 {
779  static const char *charsets[] = {
780  "csUnicode", /* alias for ISO-10646-UCS-2 */
781  "csUCS4", /* alias for ISO-10646-UCS-4 */
782  "ISO-10646-UCS-2",
783  "ISO-10646-UCS-4",
784  "UTF-16", /* covers also BE/LE */
785  "UTF-32", /* covers also BE/LE */
786  NULL };
787  const char **charsetptr = charsets;
788  t_bool ret = FALSE;
789 
790  if (!charset)
791  return ret;
792 
793  do {
794  if (!strncasecmp(charset, *charsetptr, strlen(*charsetptr)))
795  ret = TRUE;
796  } while (!ret && *(++charsetptr) != NULL);
797 
798  return ret;
799 }
800 
801 
802 /*
803  * 'cooks' an article, ie, prepare what will actually appear on the screen
804  * It is not easy to do this in the same pass as the initial read since
805  * boundary conditions for multipart articles make it harder to do on the
806  * fly decoding.
807  * We could have cooked the headers whilst they were being read but we're
808  * trying to keep this simple.
809  *
810  * Expects:
811  * Fresh article context to write into
812  * parse_uue is set only when the art is opened to create t_parts for
813  * uue sections found, when resizing this is not needed
814  * hide_uue determines the folding of uue sections
815  * Handles:
816  * multipart articles
817  * stripping of non text sections if skip_alternative
818  * Q and B decoding of text sections
819  * handling of uuencoded sections
820  * stripping of sigs if !show_signatures
821  * Returns:
822  * TRUE on success
823  *
824  * TODO:
825  * give an error-message on at least disk-full
826  */
827 t_bool
829  t_bool wrap_lines,
830  t_openartinfo *artinfo,
831  int hide_uue,
833 {
834  const char *charset;
835  const char *name;
836  char *line;
837  struct t_header *hdr = &artinfo->hdr;
838  t_bool header_put = FALSE;
839  static const char *struct_header[] = {
840  "Approved: ", "From: ", "Originator: ",
841  "Reply-To: ", "Sender: ", "X-Cancelled-By: ", "X-Comment-To: ",
842  "X-Submissions-To: ", "To: ", "Cc: ", "Bcc: ", "X-Originator: ", NULL };
843 
844  art = artinfo; /* Global saves lots of passing artinfo around */
845 
846  if (!(art->cooked = tmpfile()))
847  return FALSE;
848 
849  art->cooked_lines = 0;
850 
851  rewind(artinfo->raw);
852 
853  /*
854  * Put down just the headers we want
855  */
856  while ((line = tin_fgets(artinfo->raw, TRUE)) != NULL) {
857  if (line[0] == '\0') { /* End of headers? */
858  if (STRIP_ALTERNATIVE(artinfo)) {
860  header_put = TRUE;
862  }
863  }
864  if (header_put)
865  put_cooked(1, TRUE, 0, "\n"); /* put a newline after headers */
866  break;
867  }
868 
869  if (show_all_headers || header_wanted(line)) { /* Put cooked data */
870  const char **strptr = struct_header;
871  char *l = NULL, *ptr, *foo, *bar;
872  size_t i = LEN;
873  t_bool found = FALSE;
874 
875  /* structured headers */
876  do {
877  if (!strncasecmp(line, *strptr, strlen(*strptr))) {
878  foo = my_strdup(*strptr);
879  if ((ptr = strchr(foo, ':'))) {
880  *ptr = '\0';
881  unfold_header(line);
882  if ((ptr = parse_header(line, foo, TRUE, TRUE, FALSE))) {
883 #if 0
884  /*
885  * TODO:
886  * idna_decode() currently expects just a FQDN
887  * or a mailaddress (with all comments stripped).
888  *
889  * we need to look for something like
890  * (?i)((?:\S+\.)?xn--[a-z0-9\.\-]{3,}\S+)\b
891  * and just decode $1
892  * maybe also in process_text_body_part()
893  */
894  bar = idna_decode(ptr);
895 #else
896  bar = my_strdup(ptr);
897 #endif /* 0 */
898  l = my_calloc(1, strlen(bar) + strlen(*strptr) + 1);
899  strncpy(l, line, strlen(*strptr));
900  strcat(l, bar);
901  free(bar);
902  }
903  }
904  free(foo);
905  found = TRUE;
906  }
907  } while (!found && *(++strptr) != NULL);
908 
909  /* unstructured but must not be decoded */
910  if (l == NULL && (!strncasecmp(line, "References: ", 12) || !strncasecmp(line, "Message-ID: ", 12) || !strncasecmp(line, "Date: ", 6) || !strncasecmp(line, "Newsgroups: ", 12) || !strncasecmp(line, "Distribution: ", 14) || !strncasecmp(line, "Followup-To: ", 13) || !strncasecmp(line, "X-Face: ", 8) || !strncasecmp(line, "Cancel-Lock: ", 13) || !strncasecmp(line, "Cancel-Key: ", 12) || !strncasecmp(line, "Supersedes: ", 12)))
911  l = my_strdup(line);
912 
913  if (l == NULL)
914  l = my_strdup(rfc1522_decode(line));
915 
916 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
917  if (IS_LOCAL_CHARSET("UTF-8"))
918  utf8_valid(l);
919 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
920  header_put = TRUE;
921  expand_ctrl_chars(&l, &i, tabwidth);
922  put_cooked(i, wrap_lines, C_HEADER, "%s", l);
923  free(l);
924  }
925  }
926 
927  if (tin_errno != 0)
928  return FALSE;
929 
930  /*
931  * Process the attachments in turn, print a neato header, and process/decode
932  * the body if of text type
933  */
934  if (hdr->mime && hdr->ext->type == TYPE_MULTIPART) {
935  t_part *ptr;
936 
937  for (ptr = hdr->ext->next; ptr != NULL; ptr = ptr->next) {
938  /*
939  * Ignore non text/plain sections with alternative handling
940  */
941  if (STRIP_ALTERNATIVE(artinfo) && !IS_PLAINTEXT(ptr))
942  continue;
943 
944  name = get_filename(ptr->params);
945  if (!strcmp(content_types[ptr->type], "text"))
946  charset = get_param(ptr->params, "charset");
947  else
948  charset = NULL;
949  PUT_ATTACH(ptr, (ptr->depth - 1) * 4, name, charset);
950 
951  /* Try to view anything of type text, may need to review this */
952  if (IS_PLAINTEXT(ptr)) {
953  if (charset_unsupported(charset)) {
954  put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), (ptr->depth - 1) * 4, "", charset);
955  if (ptr->next)
956  put_cooked(1, wrap_lines, C_ATTACH, "\n");
957  } else
958  process_text_body_part(wrap_lines, artinfo->raw, ptr, hide_uue);
959  }
960  }
961  } else {
962  if (!strcmp(content_types[hdr->ext->type], "text"))
963  charset = get_param(hdr->ext->params, "charset");
964  else
965  charset = NULL;
966  /*
967  * A regular single-body article
968  */
969  if (IS_PLAINTEXT(hdr->ext)) {
970  if (charset_unsupported(charset))
971  put_cooked(LEN, wrap_lines, C_ATTACH, _(txt_attach_unsup_charset), 0, "", charset);
972  else
973  process_text_body_part(wrap_lines, artinfo->raw, hdr->ext, hide_uue);
974  } else {
975  /*
976  * Non-textual main body
977  */
978  name = get_filename(hdr->ext->params);
979  PUT_ATTACH(hdr->ext, 0, name, charset);
980  }
981  }
982 
983 #ifdef DEBUG_ART
984  dump_cooked();
985 #endif /* DEBUG_ART */
986 
987  if (art->cooked_lines > 0)
989 
990  rewind(art->cooked);
991  return (tin_errno != 0) ? FALSE : TRUE;
992 }
name
const char * name
Definition: signal.c:117
get_filename
const char * get_filename(t_param *ptr)
Definition: cook.c:353
expand_ctrl_chars
t_bool expand_ctrl_chars(char **line, size_t *length, size_t lcook_width)
Definition: cook.c:87
DEBUG_MISC
#define DEBUG_MISC
Definition: debug.h:54
_
#define _(Text)
Definition: tin.h:94
my_realloc
#define my_realloc(ptr, size)
Definition: tin.h:2198
url_regex
struct regex_cache url_regex
UUE_ALL
#define UUE_ALL
Definition: tin.h:1239
hide_uue
static int hide_uue
Definition: page.c:80
my_strdup
char * my_strdup(const char *str)
Definition: string.c:133
C_URL
#define C_URL
Definition: rfc2046.h:163
TYPE_MULTIPART
#define TYPE_MULTIPART
Definition: rfc2046.h:48
bool_unparse
#define bool_unparse(b)
Definition: bool.h:83
str_trim
char * str_trim(char *string)
Definition: string.c:532
verbatim_end_regex
struct regex_cache verbatim_end_regex
CHUNK
#define CHUNK
Definition: cook.c:51
pcre_exec
int pcre_exec(const pcre *, const pcre_extra *, const char *, int, int, int, int *, int)
Definition: pcre_exec.c:3690
openartinfo
Definition: rfc2046.h:183
lineinfo::flags
int flags
Definition: rfc2046.h:176
ENCODING_QP
#define ENCODING_QP
Definition: rfc2046.h:56
t_attribute::tex2iso_conv
unsigned tex2iso_conv
Definition: tin.h:1656
art
static t_openartinfo * art
Definition: cook.c:78
free_list
void free_list(t_param *list)
Definition: rfc2046.c:549
tinrc
struct t_config tinrc
Definition: init.c:191
txt_info_x_conversion_note
constext txt_info_x_conversion_note[]
Definition: lang.c:552
COMPACT_MULTIPLE
#define COMPACT_MULTIPLE
Definition: tin.h:972
FreeAndNull
#define FreeAndNull(p)
Definition: tin.h:2204
curr_group
struct t_group * curr_group
Definition: group.c:55
C_VERBATIM
#define C_VERBATIM
Definition: rfc2046.h:167
lineinfo
Definition: rfc2046.h:173
C_NEWS
#define C_NEWS
Definition: rfc2046.h:165
regex_cache::extra
pcre_extra * extra
Definition: tin.h:1919
t_newsheader::header
char ** header
Definition: tin.h:1557
free_parts
void free_parts(t_part *ptr)
Definition: rfc2046.c:846
openartinfo::cooked_lines
int cooked_lines
Definition: rfc2046.h:187
unfold_header
void unfold_header(char *line)
Definition: rfc2046.c:1143
tcurses.h
openartinfo::tex2iso
t_bool tex2iso
Definition: rfc2046.h:186
get_param
const char * get_param(t_param *list, const char *name)
Definition: rfc2046.c:568
tin.h
openartinfo::cooked
FILE * cooked
Definition: rfc2046.h:189
C_HEADER
#define C_HEADER
Definition: rfc2046.h:152
N_
#define N_(Str)
Definition: tin.h:82
param::value
char * value
Definition: rfc2046.h:79
tin_fgets
char * tin_fgets(FILE *fp, t_bool header)
Definition: read.c:320
part::next
struct part * next
Definition: rfc2046.h:107
t_attribute::verbatim_handling
unsigned verbatim_handling
Definition: tin.h:1643
part::line_count
int line_count
Definition: rfc2046.h:104
t_newsheader::num
int num
Definition: tin.h:1558
C_QUOTE3
#define C_QUOTE3
Definition: rfc2046.h:161
part
Definition: rfc2046.h:92
t_attribute::show_signatures
unsigned show_signatures
Definition: tin.h:1635
vsnprintf
#define vsnprintf
Definition: tin.h:2420
content_types
constext * content_types[]
Definition: lang.c:1453
IS_PLAINTEXT
#define IS_PLAINTEXT(x)
Definition: tin.h:1025
new_part
t_part * new_part(t_part *part)
Definition: rfc2046.c:792
param::name
char * name
Definition: rfc2046.h:78
PUT_ATTACH
#define PUT_ATTACH(part, depth, name, charset)
Definition: cook.c:377
MATCH_REGEX
#define MATCH_REGEX(x, y, z)
Definition: cook.c:58
C_BODY
#define C_BODY
Definition: rfc2046.h:153
rfc1522_decode
char * rfc1522_decode(const char *s)
Definition: rfc2047.c:232
C_CTRLL
#define C_CTRLL
Definition: rfc2046.h:166
cook_article
t_bool cook_article(t_bool wrap_lines, t_openartinfo *artinfo, int hide_uue, t_bool show_all_headers)
Definition: cook.c:828
t_config::mm_local_charset
char mm_local_charset[LEN]
Definition: tinrc.h:103
new_params
t_param * new_params(void)
Definition: rfc2046.c:527
my_isprint
int my_isprint(int c)
Definition: misc.c:986
uubegin_regex
struct regex_cache uubegin_regex
new_uue
static t_part * new_uue(t_part **part, char *name)
Definition: cook.c:315
buf
static char buf[16]
Definition: langinfo.c:50
ARRAY_SIZE
#define ARRAY_SIZE(array)
Definition: tin.h:2201
FreeIfNeeded
#define FreeIfNeeded(p)
Definition: tin.h:2203
read_decoded_base64_line
int read_decoded_base64_line(FILE *file, char **line, size_t *max_line_len, const int max_lines_to_read, char **rest)
Definition: rfc2045.c:323
openartinfo::cookl
t_lineinfo * cookl
Definition: rfc2046.h:191
assert
#define assert(p)
Definition: tin.h:1295
show_all_headers
static t_bool show_all_headers
Definition: page.c:88
process_text_body_part
static void process_text_body_part(t_bool wrap_lines, FILE *in, t_part *part, int hide_uue)
Definition: cook.c:398
news_regex
struct regex_cache news_regex
cCOLS
int cCOLS
Definition: curses.c:53
LEN
#define LEN
Definition: tin.h:854
tabwidth
size_t tabwidth
Definition: page.c:73
tmpfile
FILE * tmpfile(void)
Definition: tmpfile.c:53
part::type
unsigned type
Definition: rfc2046.h:94
ENCODING_UUE
#define ENCODING_UUE
Definition: rfc2046.h:60
SKIP_TRAILING
#define SKIP_TRAILING
Definition: tin.h:971
part::offset
long offset
Definition: rfc2046.h:103
regex_cache::re
pcre * re
Definition: tin.h:1918
idna_decode
char * idna_decode(char *in)
Definition: misc.c:3758
C_MAIL
#define C_MAIL
Definition: rfc2046.h:164
my_calloc
#define my_calloc(nmemb, size)
Definition: tin.h:2197
SEEK_SET
#define SEEK_SET
Definition: tin.h:2441
t_header
Definition: rfc2046.h:126
param
Definition: rfc2046.h:76
FORMAT_FLOWED
#define FORMAT_FLOWED
Definition: rfc2046.h:70
PCRE_ERROR_NOMATCH
#define PCRE_ERROR_NOMATCH
Definition: pcre.h:125
part::depth
int depth
Definition: rfc2046.h:105
SIGDASHES
#define SIGDASHES
Definition: tin.h:743
C_QUOTE1
#define C_QUOTE1
Definition: rfc2046.h:159
C_SIG
#define C_SIG
Definition: rfc2046.h:154
STRIP_ALTERNATIVE
#define STRIP_ALTERNATIVE(x)
Definition: cook.c:53
C_QUOTE2
#define C_QUOTE2
Definition: rfc2046.h:160
txt_attach_unsup_charset
constext txt_attach_unsup_charset[]
Definition: lang.c:81
FALSE
#define FALSE
Definition: bool.h:70
charset_unsupported
static t_bool charset_unsupported(const char *charset)
Definition: cook.c:776
debug
unsigned short debug
Definition: debug.c:51
lookup_mimetype
void lookup_mimetype(const char *ext, t_part *part)
Definition: mimetypes.c:105
t_attribute::headers_to_not_display
struct t_newsheader * headers_to_not_display
Definition: tin.h:1595
openartinfo::raw
FILE * raw
Definition: rfc2046.h:188
openartinfo::hdr
struct t_header hdr
Definition: rfc2046.h:185
process_charsets
void process_charsets(char **line, size_t *max_line_len, const char *network_charset, const char *local_charset, t_bool conv_tex2iso)
Definition: misc.c:2634
verbatim_begin_regex
struct regex_cache verbatim_begin_regex
txt_incomplete
constext txt_incomplete[]
Definition: lang.c:556
t_group::attribute
struct t_attribute * attribute
Definition: tin.h:1790
t_header::mime
t_bool mime
Definition: rfc2046.h:145
mail_regex
struct regex_cache mail_regex
part::uue
struct part * uue
Definition: rfc2046.h:106
parse_header
char * parse_header(char *buf, const char *pat, t_bool decode, t_bool structured, t_bool keep_tab)
Definition: rfc2046.c:908
t_config::wrap_column
int wrap_column
Definition: tinrc.h:195
IS_LOCAL_CHARSET
#define IS_LOCAL_CHARSET(c)
Definition: tin.h:776
read_decoded_qp_line
int read_decoded_qp_line(FILE *file, char **line, size_t *max_line_len, const int max_lines_to_read)
Definition: rfc2045.c:436
t_bool
unsigned t_bool
Definition: bool.h:77
mmdecode
int mmdecode(const char *what, int encoding, int delimiter, char *where)
Definition: rfc2047.c:147
t_attribute::trim_article_body
unsigned trim_article_body
Definition: tin.h:1636
TRUE
#define TRUE
Definition: bool.h:74
part::params
t_param * params
Definition: rfc2046.h:102
tin_errno
int tin_errno
Definition: read.c:59
strncasecmp
int strncasecmp(const char *p, const char *q, size_t n)
Definition: string.c:484
SKIP_LEADING
#define SKIP_LEADING
Definition: tin.h:970
t_attribute::headers_to_display
struct t_newsheader * headers_to_display
Definition: tin.h:1594
t_header::ext
t_part * ext
Definition: rfc2046.h:146
PUT_UUE
#define PUT_UUE(part, qualifier_text)
Definition: cook.c:371
part::encoding
unsigned encoding
Definition: rfc2046.h:95
part::format
unsigned format
Definition: rfc2046.h:96
header_wanted
static t_bool header_wanted(const char *line)
Definition: cook.c:718
put_cooked
static void put_cooked(size_t buf_len, t_bool wrap_lines, int flags, const char *fmt,...)
Definition: cook.c:206
ENCODING_BASE64
#define ENCODING_BASE64
Definition: rfc2046.h:57
DIRSEP
#define DIRSEP
Definition: tin.h:2104
C_ATTACH
#define C_ATTACH
Definition: rfc2046.h:155
lineinfo::offset
long offset
Definition: rfc2046.h:175
uubody_regex
struct regex_cache uubody_regex
my_malloc
#define my_malloc(size)
Definition: tin.h:2196