tin  2.4.4
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.4.4.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

rfc2046.c
Go to the documentation of this file.
1 /*
2  * Project : tin - a Usenet reader
3  * Module : rfc2046.c
4  * Author : Jason Faultless <jason@altarstone.com>
5  * Created : 2000-02-18
6  * Updated : 2019-10-25
7  * Notes : RFC 2046 MIME article parsing
8  *
9  * Copyright (c) 2000-2020 Jason Faultless <jason@altarstone.com>
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  *
16  * 1. Redistributions of source code must retain the above copyright notice,
17  * this list of conditions and the following disclaimer.
18  *
19  * 2. Redistributions in binary form must reproduce the above copyright
20  * notice, this list of conditions and the following disclaimer in the
21  * documentation and/or other materials provided with the distribution.
22  *
23  * 3. Neither the name of the copyright holder nor the names of its
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 
41 #ifndef TIN_H
42 # include "tin.h"
43 #endif /* !TIN_H */
44 
45 
46 /*
47  * local prototypes
48  */
49 static char *get_charset(char *value);
50 static char *get_quoted_string(char *source, char **dest);
51 static char *get_token(const char *source);
52 static char *strip_charset(char **value);
53 static char *skip_equal_sign(char *source);
54 static char *skip_space(char *source);
55 static int boundary_cmp(const char *line, const char *boundary);
56 static int count_lines(char *line);
57 static int parse_multipart_article(FILE *infile, t_openartinfo *artinfo, t_part *part, int depth, t_bool show_progress_meter);
58 static int parse_normal_article(FILE *in, t_openartinfo *artinfo, t_bool show_progress_meter);
59 static int parse_rfc2045_article(FILE *infile, int line_count, t_openartinfo *artinfo, t_bool show_progress_meter);
60 static unsigned int parse_content_encoding(char *encoding);
61 static void decode_value(const char *charset, t_param *part);
62 static void parse_content_type(char *type, t_part *content);
63 static void parse_content_disposition(char *disp, t_part *part);
64 static void parse_params(char *params, t_part *content);
65 static void progress(int line_count);
66 static void remove_cwsp(char *source);
67 #ifdef DEBUG_ART
68  static void dump_art(t_openartinfo *art);
69 #endif /* DEBUG_ART */
70 
71 
72 /*
73  * Local variables
74  */
75 static int art_lines = 0; /* lines in art on spool */
76 static const char *progress_mesg = NULL; /* message progress() should display */
77 /* RFC 2231 decoding table */
78 static const char xtbl[] = {
79 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
80 /* 0 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
81 /* 1 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
82 /* 2 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
83 /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
84 /* 4 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
85 /* 5 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
86 /* 6 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
87 /* 7 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
88 };
89 
90 #define XVAL(c) (xtbl[(unsigned int) (c)])
91 /* C90: isxdigit(3) */
92 #define IS_XDIGIT(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
93 #define PARAM_SEP "; \n"
94 /* default parameters for Content-Type */
95 #define CT_DEFPARMS "charset=US-ASCII"
96 
97 /*
98  * Use the default message if one hasn't been supplied
99  * Body search is currently the only function that has a different message
100  */
101 static void
103  int line_count)
104 {
105  if (progress_mesg != NULL && art_lines > 0 && line_count && line_count % MODULO_COUNT_NUM == 0)
106  show_progress(progress_mesg, line_count, art_lines);
107 }
108 
109 
110 /*
111  * Lookup content type in content_types[] array and return matching
112  * index or -1
113  */
114 int
116  char *type)
117 {
118  int i;
119 
120  if (type == NULL)
121  return -1;
122 
123  for (i = 0; content_types[i] != NULL; ++i) {
124  if (strcasecmp(type, content_types[i]) == 0)
125  return i;
126  }
127 
128  return -1;
129 }
130 
131 
132 /*
133  * check if a line is a MIME boundary
134  * returns BOUND_NONE if it is not, BOUND_START if normal boundary and
135  * BOUND_END if closing boundary
136  */
137 static int
139  const char *line,
140  const char *boundary)
141 {
142  size_t blen = strlen(boundary);
143  size_t len;
144  char *e, *l;
145  int nl;
146 
147  if ((len = strlen(line)) == 0)
148  return BOUND_NONE;
149 
150  if (blen + 2 > len)
151  return BOUND_NONE;
152 
153  /* remove trailing whites as per RFC 2046 5.1.1 */
154  l = my_strdup(line);
155  e = l + len - 1;
156  while(e > l + blen + 1 && isspace(*e))
157  *e-- = '\0';
158 
159  len = strlen(l);
160 
161  nl = l[len - 1] == '\n';
162 
163  if (len != blen + 2 + nl && len != blen + 4 + nl) {
164  free(l);
165  return BOUND_NONE;
166  }
167  if (l[0] != '-' || l[1] != '-') {
168  free(l);
169  return BOUND_NONE;
170  }
171 
172  if (strncmp(l + 2, boundary, blen) != 0) {
173  free(l);
174  return BOUND_NONE;
175  }
176 
177  if (l[blen + 2] != '-') {
178  if (nl ? l[blen + 2] == '\n' : l[blen + 2] == '\0') {
179  free(l);
180  return BOUND_START;
181  } else {
182  free(l);
183  return BOUND_NONE;
184  }
185  }
186 
187  if (l[blen + 3] != '-') {
188  free(l);
189  return BOUND_NONE;
190  }
191 
192  if (nl ? l[blen + 4] == '\n' : l[blen + 4] == '\0') {
193  free(l);
194  return BOUND_END;
195  }
196  free(l);
197  return BOUND_NONE;
198 }
199 
200 
201 /*
202  * RFC2046 5.1.2 says that we are required to check for all possible
203  * boundaries, not only the one that is expected. Iterate through all
204  * the parts.
205  */
206 static int
208  const char *line,
209  t_part *part)
210 {
211  const char *boundary;
212  int bnd = BOUND_NONE;
213 
214  for (; part != NULL; part = part->next) {
215  /* We may not have even parsed a boundary for this part yet */
216  if ((boundary = get_param(part->params, "boundary")) == NULL)
217  continue;
218  if ((bnd = boundary_cmp(line, boundary)) != BOUND_NONE)
219  break;
220  }
221 
222  return bnd;
223 }
224 
225 
226 #define ATTRIBUTE_DELIMS "()<>@,;:\\\"/[]?="
227 
228 static char *
230  char *source)
231 {
232  while ((*source) && ((*source == ' ') || (*source == '\t')))
233  source++;
234  return *source ? source : NULL;
235 }
236 
237 
238 /*
239  * Removes comments and white space
240  */
241 static void
243  char *source)
244 {
245  char *from, *to, src;
246  int c_cnt = 0;
247  t_bool inquotes = FALSE;
248 
249  from = to = source;
250 
251  while ((src = *from++) && c_cnt >= 0) {
252  if (src == '"' && c_cnt == 0)
253  inquotes = bool_not(inquotes);
254 
255  if (inquotes && src == '\\' && *from) {
256  *to++ = src;
257  *to++ = *from++;
258  continue;
259  }
260 
261  if (!inquotes) {
262  /* skip over quoted pairs */
263  if (c_cnt && src == '\\') {
264  ++from;
265  continue;
266  }
267  if (src == '(') {
268  ++c_cnt;
269  continue;
270  }
271  if (src == ')') {
272  --c_cnt;
273  continue;
274  }
275  if (c_cnt > 0 || src == ' ' || src == '\t')
276  continue;
277  }
278 
279  *to++ = src;
280  }
281 
282  /*
283  * Setting *source = '\0' might be the right thing
284  * because the header is damaged. Anyway, we let the
285  * rest of the code pick up usable pieces.
286  */
287 #if 0
288  if (c_cnt != 0)
289  /* unbalanced parenthesis, header damaged */
290  *source = '\0';
291  else
292 #endif /* 0 */
293  *to = '\0';
294 }
295 
296 
297 static char *
299  const char *source)
300 {
301  char *dest = my_strdup(source);
302  char *ptr = dest;
303 
304  while (isascii((int) *ptr) && isprint((int) *ptr) && *ptr != ' ' && !strchr(ATTRIBUTE_DELIMS, *ptr))
305  ptr++;
306  *ptr = '\0';
307 
308  return my_realloc(dest, strlen(dest) + 1);
309 }
310 
311 
312 static char *
314  char *source,
315  char **dest)
316 {
317  char *ptr;
318  t_bool quote = FALSE;
319 
320  *dest = my_malloc(strlen(source) + 1);
321  ptr = *dest;
322  source++; /* skip over double quote */
323  while (*source) {
324  if (*source == '\\') {
325  quote = TRUE; /* next char as-is */
326  if (*++source == '\\') {
327  *ptr++ = *source++;
328  quote = FALSE;
329  }
330  continue;
331  }
332  if ((*source == '"') && !quote)
333  break; /* end of quoted-string */
334  *ptr++ = *source++;
335  quote = FALSE;
336  }
337  *ptr = '\0';
338  *dest = my_realloc(*dest, strlen(*dest) + 1);
339  return *source ? source + 1 : source;
340 }
341 
342 
343 /*
344  * RFC 2231: Extract character set from parameter value
345  */
346 static char *
348  char *value)
349 {
350  char *charset, *ptr;
351 
352  /* no charset information present */
353  if (!strchr(value, '\''))
354  return NULL;
355 
356  /* no charset given -> fall back to us-ascii */
357  if (*value == '\'')
358  return my_strdup("US-ASCII");
359 
360  charset = my_strdup(value);
361 
362  if ((ptr = strchr(charset, '\'')))
363  *ptr = '\0';
364 
365  return charset;
366 }
367 
368 
369 /*
370  * RFC 2231: Decode parameter value according to the given
371  * character set
372  */
373 static void
375  const char *charset,
376  t_param *part)
377 {
378  char *rptr, *wptr;
379  const char *cset;
380  size_t max_line_len = strlen(part->value);
381 
382  /*
383  * we prefer part->charset if present, even if rfc 2231
384  * forbids different charsets for each part
385  */
386  cset = part->charset ? part->charset : charset;
387  rptr = wptr = part->value;
388 
389  while (*rptr) {
390  if (*rptr == '%' && IS_XDIGIT(*(rptr + 1)) && IS_XDIGIT(*(rptr + 2))) {
391  *wptr++ = XVAL(*(rptr + 1)) << 4 | XVAL(*(rptr + 2));
392  rptr += 3;
393  } else
394  *wptr++ = *rptr++;
395  }
396  *wptr = '\0';
397 
398  process_charsets(&(part->value), &max_line_len, cset, tinrc.mm_local_charset, FALSE);
399  part->encoded = FALSE;
400  FreeAndNull(part->charset);
401 }
402 
403 
404 /*
405  * RFC 2231: Remove character set (and language information)
406  * from parameter value
407  */
408 static char *
410  char **value)
411 {
412  char *newval, *ptr;
413 
414  if ((ptr = strrchr(*value, '\''))) {
415  newval = my_strdup(ptr + 1);
416  free(*value);
417  *value = my_realloc(newval, strlen(newval) + 1);
418  }
419 
420  return *value;
421 }
422 
423 
424 /*
425  * Skip equal sign and (non compliant) white space around it
426  */
427 static char *
429  char *source)
430 {
431  if (!(source = skip_space(source)))
432  return NULL;
433 
434  if (*source++ != '=')
435  /* no equal sign, invalid header, stop parsing here */
436  return NULL;
437 
438  return skip_space(source);
439 }
440 
441 
442 /*
443  * Parse a Content-* parameter list into a linked list
444  * Ensure the ->params element is correctly initialised before calling
445  * TODO: may still not catch everything permitted in the RFC
446  */
447 static void
449  char *params,
450  t_part *content)
451 {
452  char *name, *param, *value, *contp;
453  int idx;
454  t_bool encoded;
455  t_param *ptr;
456 
457  param = params;
458  while (*param) {
459  idx = -1;
460  encoded = FALSE;
461  /* Skip over white space */
462  if (!(param = skip_space(param)))
463  break;
464 
465  /* catch parameter name */
466  name = get_token(param);
467  param += strlen(name);
468 
469  if (!*param) {
470  /* Nothing follows, invalid, stop here */
472  break;
473  }
474 
475  /* RFC 2231 Character set and language information */
476  if ((contp = strrchr(name, '*')) && !*(contp + 1)) {
477  encoded = TRUE;
478  *contp = '\0';
479  }
480 
481  /* RFC 2231 Parameter Value Continuations */
482  if ((contp = strchr(name, '*')) && *(contp + 1) >= '0' && *(contp + 1) <= '9') {
483  idx = atoi(contp + 1);
484  *contp = '\0';
485  }
486 
487  if (!(param = skip_equal_sign(param))) {
489  break;
490  }
491 
492  /* catch parameter value; may be surrounded by double quotes */
493  if (*param == '"') /* parse quoted-string */
495  else {
496  /* parse token */
497  value = get_token(param);
498  param += strlen(value);
499  }
500 
501  ptr = new_params();
502  ptr->name = name;
503  if (encoded) {
504  ptr->encoded = TRUE;
505  ptr->charset = get_charset(value);
506  ptr->value = strip_charset(&value);
507  } else
508  ptr->value = value;
509 
510  ptr->part = idx;
511  ptr->next = content->params; /* Push onto start of list */
512  content->params = ptr;
513 
514  /* advance pointer to next parameter */
515  while ((*param) && (*param != ';'))
516  param++;
517  if (*param == ';')
518  param++;
519  }
520 }
521 
522 
523 /*
524  * Return a freshly allocated and initialised t_param structure
525  */
526 t_param *
528  void)
529 {
530  t_param *ptr;
531 
532  ptr = my_malloc(sizeof(t_param));
533  ptr->name = NULL;
534  ptr->value = NULL;
535  ptr->charset = NULL;
536  ptr->part = -1;
537  ptr->encoded = FALSE;
538  ptr->enc_fallback = TRUE;
539  ptr->next = NULL;
540 
541  return ptr;
542 }
543 
544 
545 /*
546  * Free up a generic list object
547  */
548 void
550  t_param *list)
551 {
552  while (list->next != NULL) {
553  free_list(list->next);
554  list->next = NULL;
555  }
556 
557  free(list->name);
558  free(list->value);
559  FreeIfNeeded(list->charset);
560  free(list);
561 }
562 
563 
564 /*
565  * Return a parameter value from a param list or NULL
566  */
567 const char *
569  t_param *list,
570  const char *name)
571 {
572  char *tmpval, *charset = NULL;
573  int i, j;
574  size_t newlen;
575  t_param *p_list, *c_list;
576 
577  for (p_list = list; p_list != NULL; p_list = p_list->next) {
578  /*
579  * RFC 2231 Parameter Value Continuations + Character Set
580  *
581  * part == 0,1,2...: parameter has several parts, must be concatenated
582  * part == -1 : parameter has only one part
583  * part == -2 : part has already been concatenated, main part has
584  * part == -1
585  *
586  * charset : character set if present
587  */
588  if (strcasecmp(name, p_list->name) == 0 && p_list->part > -2) {
589  if (p_list->part == -1 && p_list->encoded && p_list->charset) {
590  decode_value(p_list->charset, p_list);
591  p_list->encoded = FALSE;
592  p_list->enc_fallback = FALSE;
593  }
594  if (p_list->part >= 0) {
595  newlen = 0;
596  if (p_list->charset) {
597  FreeIfNeeded(charset);
598  charset = my_strdup(p_list->charset);
599  }
600  for (j = 0, c_list = list; c_list != NULL; c_list = c_list->next) {
601  if (strcasecmp(name, c_list->name) == 0) {
602  if (c_list->part < 0)
603  continue;
604  if (c_list->part < p_list->part) {
605  if (c_list->charset) {
606  FreeIfNeeded(charset);
607  charset = my_strdup(c_list->charset);
608  }
609  p_list = c_list;
610  }
611 
612  if (j < c_list->part)
613  j = c_list->part;
614 
615  newlen += strlen(c_list->value);
616  }
617  }
618  p_list->value = my_realloc(p_list->value, newlen + 1);
619  if (charset)
620  decode_value(charset, p_list);
621  for (i = p_list->part + 1; i <= j; ++i) {
622  for (c_list = list; c_list != NULL; c_list = c_list->next) {
623  if (strcasecmp(name, c_list->name) == 0) {
624  if (c_list->part == i) {
625  if (c_list->encoded && charset)
626  decode_value(charset, c_list);
627  strcat(p_list->value, c_list->value);
628  c_list->part = -2;
629  }
630  }
631  }
632  }
633  p_list->part = -1;
634  p_list->encoded = FALSE;
635  p_list->enc_fallback = FALSE;
636  FreeAndNull(charset);
637  }
638  /*
639  * RFC 2047 'encoded-word' is not allowed at this place but
640  * some clients use this nevertheless -> we try to decode that
641  */
642  if (p_list->enc_fallback) {
643  tmpval = p_list->value;
644  if (*tmpval == '=' && *(++tmpval) == '?') {
645  if ((tmpval = rfc1522_decode(p_list->value))) {
646  free(p_list->value);
647  p_list->value = my_strdup(tmpval);
648  }
649  }
650  p_list->enc_fallback = FALSE;
651  }
652  return p_list->value;
653  }
654  }
655 
656  return NULL;
657 }
658 
659 
660 /*
661  * Split a Content-Type header into a t_part structure
662  */
663 static void
665  char *type,
666  t_part *content)
667 {
668  char *subtype, *params;
669  int i;
670 
671  /* Remove comments and white space */
672  remove_cwsp(type);
673 
674  /*
675  * Split the type/subtype
676  */
677  if ((type = strtok(type, "/")) == NULL)
678  return;
679 
680  /* Look up major type */
681 
682  /*
683  * Unrecognised type, treat according to RFC
684  */
685  if ((i = content_type(type)) == -1) {
686  content->type = TYPE_APPLICATION;
687  free(content->subtype);
688  content->subtype = my_strdup("octet-stream");
689  return;
690  } else
691  content->type = i;
692 
693  subtype = strtok(NULL, PARAM_SEP);
694  /* save new subtype, or use pre-initialised value "plain" */
695  if (subtype != NULL) { /* check for broken Content-Type: is header without a subtype */
696  free(content->subtype); /* Pre-initialised to plain */
697  content->subtype = my_strdup(subtype);
698  str_lwr(content->subtype);
699  }
700 
701  /*
702  * Parse any parameters into a list
703  */
704  if ((params = strtok(NULL, "\n")) != NULL) {
705  const char *format;
706 #ifndef CHARSET_CONVERSION
707  char defparms[] = CT_DEFPARMS; /* must be writable */
708 #endif /* !CHARSET_CONVERSION */
709 
710  parse_params(params, content);
711  if (!get_param(content->params, "charset")) { /* add default charset if needed */
712 #ifndef CHARSET_CONVERSION
713  parse_params(defparms, content);
714 #else
715  if (curr_group->attribute->undeclared_charset) {
716  char *charsetheader;
717 
718  charsetheader = my_malloc(strlen(curr_group->attribute->undeclared_charset) + 9); /* 9=len('charset=\0') */
719  sprintf(charsetheader, "charset=%s", curr_group->attribute->undeclared_charset);
720  parse_params(charsetheader, content);
721  free(charsetheader);
722  } else {
723  char defparms[] = CT_DEFPARMS; /* must be writable */
724 
725  parse_params(defparms, content);
726  }
727 #endif /* !CHARSET_CONVERSION */
728  }
729  if ((format = get_param(content->params, "format"))) {
730  if (!strcasecmp(format, "flowed"))
731  content->format = FORMAT_FLOWED;
732  }
733  }
734 }
735 
736 
737 static unsigned int
739  char *encoding)
740 {
741  unsigned int i;
742 
743  /* Remove comments and white space */
744  remove_cwsp(encoding);
745 
746  for (i = 0; content_encodings[i] != NULL; ++i) {
747  if (strcasecmp(encoding, content_encodings[i]) == 0)
748  return i;
749  }
750 
751  /*
752  * TODO: check rfc - may need to switch Content-Type to
753  * application/octet-steam where this header exists but is unparsable.
754  *
755  * RFC 2045 6.2:
756  * Labelling unencoded data containing 8bit characters as "7bit" is not
757  * allowed, nor is labelling unencoded non-line-oriented data as anything
758  * other than "binary" allowed.
759  */
760  return ENCODING_BINARY;
761 }
762 
763 
764 /*
765  * We're only really interested in the filename parameter, which has
766  * a higher precedence than the name parameter from Content-Type (RFC 1806)
767  * Attach the parsed params to the part passed in 'part'
768  */
769 static void
771  char *disp,
772  t_part *part)
773 {
774  char *ptr;
775 
776  /* Remove comments and white space */
777  remove_cwsp(disp);
778 
779  strtok(disp, PARAM_SEP);
780  if ((ptr = strtok(NULL, "\n")) == NULL)
781  return;
782 
783  parse_params(ptr, part);
784 }
785 
786 
787 /*
788  * Return a freshly allocated and initialised part structure attached to the
789  * end of the list of article parts given
790  */
791 t_part *
793  t_part *part)
794 {
795  t_part *p;
796  t_part *ptr = my_malloc(sizeof(t_part));
797 #ifndef CHARSET_CONVERSION
798  char defparms[] = CT_DEFPARMS; /* must be writable */
799 #endif /* !CHARSET_CONVERSION */
800 
801  ptr->type = TYPE_TEXT; /* Defaults per RFC */
802  ptr->subtype = my_strdup("plain");
803  ptr->description = NULL;
804  ptr->encoding = ENCODING_7BIT;
805  ptr->format = FORMAT_FIXED;
806  ptr->params = NULL;
807 
808 #ifndef CHARSET_CONVERSION
809  parse_params(defparms, ptr);
810 #else
811  if (curr_group && curr_group->attribute->undeclared_charset) {
812  char *charsetheader;
813 
814  charsetheader = my_malloc(strlen(curr_group->attribute->undeclared_charset) + 9); /* 9=len('charset=\0') */
815  sprintf(charsetheader, "charset=%s", curr_group->attribute->undeclared_charset);
816  parse_params(charsetheader, ptr);
817  free(charsetheader);
818  } else {
819  char defparms[] = CT_DEFPARMS; /* must be writable */
820 
821  parse_params(defparms, ptr);
822  }
823 #endif /* !CHARSET_CONVERSION */
824 
825  ptr->offset = 0;
826  ptr->line_count = 0;
827  ptr->depth = 0; /* Not an embedded object (yet) */
828  ptr->uue = NULL;
829  ptr->next = NULL;
830 
831  if (part == NULL) /* List head - we don't do this */
832  return ptr;
833 
834  for (p = part; p->next != NULL; p = p->next)
835  ;
836  p->next = ptr;
837 
838  return ptr;
839 }
840 
841 
842 /*
843  * Free a linked list of t_part
844  */
845 void
847  t_part *ptr)
848 {
849  while (ptr->next != NULL) {
850  free_parts(ptr->next);
851  ptr->next = NULL;
852  }
853 
854  free(ptr->subtype);
855  FreeAndNull(ptr->description);
856  if (ptr->params)
857  free_list(ptr->params);
858  if (ptr->uue)
859  free_parts(ptr->uue);
860  free(ptr);
861 }
862 
863 
864 void
866  struct t_header *hdr)
867 {
868  /*
869  * Initialise the header struct
870  */
871  FreeAndNull(hdr->from);
872  FreeAndNull(hdr->to);
873  FreeAndNull(hdr->cc);
874  FreeAndNull(hdr->bcc);
875  FreeAndNull(hdr->date);
876  FreeAndNull(hdr->subj);
877  FreeAndNull(hdr->org);
878  FreeAndNull(hdr->replyto);
879  FreeAndNull(hdr->newsgroups);
880  FreeAndNull(hdr->messageid);
881  FreeAndNull(hdr->references);
882  FreeAndNull(hdr->distrib);
883  FreeAndNull(hdr->keywords);
884  FreeAndNull(hdr->summary);
885  FreeAndNull(hdr->followup);
886  FreeAndNull(hdr->ftnto);
887 #ifdef XFACE_ABLE
888  FreeAndNull(hdr->xface);
889 #endif /* XFACE_ABLE */
890  hdr->mime = FALSE;
891 
892  if (hdr->ext)
893  free_parts(hdr->ext);
894  hdr->ext = NULL;
895 }
896 
897 
898 /*
899  * buf: Article header
900  * pat: Text to match in header
901  * decode: RFC2047-decode the header
902  * structured: extract address-part before decoding the header
903  *
904  * Returns:
905  * (decoded) body of header if matched or NULL
906  */
907 char *
909  char *buf,
910  const char *pat,
911  t_bool decode,
912  t_bool structured,
913  t_bool keep_tab)
914 {
915  size_t plen = strlen(pat);
916  char *ptr = buf + plen;
917 
918  /*
919  * Does ': ' follow the header text?
920  */
921  if (!(*ptr && *(ptr + 1) && *ptr == ':' && *(ptr + 1) == ' '))
922  return NULL;
923 
924  /*
925  * If the header matches, skip past the ': ' and any leading whitespace
926  */
927  if (strncasecmp(buf, pat, plen) != 0)
928  return NULL;
929 
930  ptr += 2;
931 
932  str_trim(ptr);
933  if (!*ptr)
934  return NULL;
935 
936  if (decode) {
937  if (structured) {
938  char addr[HEADER_LEN];
939  char name[HEADER_LEN];
940  int type;
941 
942  if (gnksa_split_from(ptr, addr, name, &type) == GNKSA_OK) {
943  buffer_to_ascii(addr);
944 
945  if (*name) {
946  if (type == GNKSA_ADDRTYPE_OLDSTYLE)
947  sprintf(ptr, "%s (%s)", addr, convert_to_printable(rfc1522_decode(name), keep_tab));
948  else
949  sprintf(ptr, "%s <%s>", convert_to_printable(rfc1522_decode(name), keep_tab), addr);
950  } else
951  strcpy(ptr, addr);
952  } else
953  return convert_to_printable(ptr, keep_tab);
954  } else
955  return (convert_to_printable(rfc1522_decode(ptr), keep_tab));
956  }
957 
958  return ptr;
959 }
960 
961 
962 /*
963  * Read main article headers into a blank header structure.
964  * Pass the data 'from' -> 'to' when reading via NNTP
965  * Return tin_errno (basically will be !=0 if reading was 'q'uit)
966  * We have to guard against 'to' here since this function is exported
967  */
968 int
970  struct t_header *hdr,
971  FILE *from,
972  FILE *to)
973 {
974  char *line;
975  char *ptr;
976 
977  memset(hdr, 0, sizeof(struct t_header));
978  hdr->mime = FALSE;
979  hdr->ext = new_part(NULL); /* Initialise MIME data */
980 
981  while ((line = tin_fgets(from, TRUE)) != NULL) {
982  if (read_news_via_nntp && to) {
983  fprintf(to, "%s\n", line); /* Put raw data */
984 #ifdef DEBUG
985  if ((debug & DEBUG_NNTP) && verbose > 1)
986  debug_print_file("NNTP", "<<<%s%s", logtime(), line);
987 #endif /* DEBUG */
988  }
989  /*
990  * End of headers ?
991  */
992  if (line[0] == '\0') {
993  if (to)
994  hdr->ext->offset = ftell(to); /* Offset of main body */
995  return 0;
996  }
997 
998  /*
999  * FIXME: multiple headers of the same name could lead to information
1000  * loss (multiple Cc: lines are allowed, for example)
1001  */
1002  unfold_header(line);
1003  if ((ptr = parse_header(line, "From", TRUE, TRUE, FALSE))) {
1004  FreeIfNeeded(hdr->from);
1005  hdr->from = my_strdup(ptr);
1006  continue;
1007  }
1008  if ((ptr = parse_header(line, "To", TRUE, TRUE, FALSE))) {
1009  FreeIfNeeded(hdr->to);
1010  hdr->to = my_strdup(ptr);
1011  continue;
1012  }
1013  if ((ptr = parse_header(line, "Cc", TRUE, TRUE, FALSE))) {
1014  FreeIfNeeded(hdr->cc);
1015  hdr->cc = my_strdup(ptr);
1016  continue;
1017  }
1018  if ((ptr = parse_header(line, "Bcc", TRUE, TRUE, FALSE))) {
1019  FreeIfNeeded(hdr->bcc);
1020  hdr->bcc = my_strdup(ptr);
1021  continue;
1022  }
1023  if ((ptr = parse_header(line, "Date", FALSE, FALSE, FALSE))) {
1024  FreeIfNeeded(hdr->date);
1025  hdr->date = my_strdup(ptr);
1026  continue;
1027  }
1028  if ((ptr = parse_header(line, "Subject", TRUE, FALSE, TRUE))) {
1029  FreeIfNeeded(hdr->subj);
1030  hdr->subj = my_strdup(ptr);
1031  continue;
1032  }
1033  if ((ptr = parse_header(line, "Organization", TRUE, FALSE, TRUE))) {
1034  FreeIfNeeded(hdr->org);
1035  hdr->org = my_strdup(ptr);
1036  continue;
1037  }
1038  if ((ptr = parse_header(line, "Reply-To", TRUE, TRUE, FALSE))) {
1039  FreeIfNeeded(hdr->replyto);
1040  hdr->replyto = my_strdup(ptr);
1041  continue;
1042  }
1043  if ((ptr = parse_header(line, "Newsgroups", FALSE, FALSE, FALSE))) {
1044  FreeIfNeeded(hdr->newsgroups);
1045  hdr->newsgroups = my_strdup(ptr);
1046  continue;
1047  }
1048  if ((ptr = parse_header(line, "Message-ID", FALSE, FALSE, FALSE))) {
1049  FreeIfNeeded(hdr->messageid);
1050  hdr->messageid = my_strdup(ptr);
1051  continue;
1052  }
1053  if ((ptr = parse_header(line, "References", FALSE, FALSE, FALSE))) {
1054  FreeIfNeeded(hdr->references);
1055  hdr->references = my_strdup(ptr);
1056  continue;
1057  }
1058  if ((ptr = parse_header(line, "Distribution", FALSE, FALSE, FALSE))) {
1059  FreeIfNeeded(hdr->distrib);
1060  hdr->distrib = my_strdup(ptr);
1061  continue;
1062  }
1063  if ((ptr = parse_header(line, "Keywords", TRUE, FALSE, FALSE))) {
1064  FreeIfNeeded(hdr->keywords);
1065  hdr->keywords = my_strdup(ptr);
1066  continue;
1067  }
1068  if ((ptr = parse_header(line, "Summary", TRUE, FALSE, FALSE))) {
1069  FreeIfNeeded(hdr->summary);
1070  hdr->summary = my_strdup(ptr);
1071  continue;
1072  }
1073  if ((ptr = parse_header(line, "Followup-To", FALSE, FALSE, FALSE))) {
1074  FreeIfNeeded(hdr->followup);
1075  hdr->followup = my_strdup(ptr);
1076  continue;
1077  }
1078  if ((ptr = parse_header(line, "X-Comment-To", TRUE, TRUE, FALSE))) {
1079  FreeIfNeeded(hdr->ftnto);
1080  hdr->ftnto = my_strdup(ptr);
1081  continue;
1082  }
1083 #ifdef XFACE_ABLE
1084  if ((ptr = parse_header(line, "X-Face", FALSE, FALSE, FALSE))) {
1085  FreeIfNeeded(hdr->xface);
1086  hdr->xface = my_strdup(ptr);
1087  continue;
1088  }
1089 #endif /* XFACE_ABLE */
1090  /* TODO: check version */
1091  if (parse_header(line, "MIME-Version", FALSE, FALSE, FALSE)) {
1092  hdr->mime = TRUE;
1093  continue;
1094  }
1095  if ((ptr = parse_header(line, "Content-Type", FALSE, FALSE, FALSE))) {
1096  parse_content_type(ptr, hdr->ext);
1097  continue;
1098  }
1099  if ((ptr = parse_header(line, "Content-Transfer-Encoding", FALSE, FALSE, FALSE))) {
1100  hdr->ext->encoding = parse_content_encoding(ptr);
1101  continue;
1102  }
1103  if ((ptr = parse_header(line, "Content-Description", TRUE, FALSE, FALSE))) {
1104  FreeIfNeeded(hdr->ext->description);
1105  hdr->ext->description = my_strdup(ptr);
1106  continue;
1107  }
1108  if ((ptr = parse_header(line, "Content-Disposition", FALSE, FALSE, FALSE))) {
1109  parse_content_disposition(ptr, hdr->ext);
1110  continue;
1111  }
1112  }
1113 
1114  return tin_errno;
1115 }
1116 
1117 
1118 /*
1119  * Count lines in a continuated header.
1120  * line MUST NOT end in a newline.
1121  */
1122 static int
1124  char *line)
1125 {
1126  char *src = line;
1127  char c;
1128  int lines = 1;
1129 
1130  while ((c = *src++))
1131  if (c == '\n')
1132  lines++;
1133  return lines;
1134 }
1135 
1136 
1137 /*
1138  * Unfold header, i.e. strip any newline off it. Don't strip other
1139  * whitespace, it depends on the header if this is legal (structured
1140  * headers) or not (unstructured headers, e.g. Subject)
1141  */
1142 void
1144  char *line)
1145 {
1146  char *src = line, *dst = line;
1147  char c;
1148 
1149  while ((c = *src++)) {
1150  if (c != '\n')
1151  *dst++ = c;
1152  }
1153  *dst = c;
1154 }
1155 
1156 
1157 #define M_SEARCHING 1 /* Looking for boundary */
1158 #define M_HDR 2 /* In MIME headers */
1159 #define M_BODY 3 /* In MIME body */
1160 
1161 #define TIN_EOF 0xf00 /* Used internally for error recovery */
1162 
1163 /*
1164  * Handles multipart/ article types, write data to a raw stream when reading via NNTP
1165  * artinfo is used for generic article pointers
1166  * part contains content info about the attachment we're parsing
1167  * depth is the number of levels by which the current part is embedded
1168  * Returns a tin_errno value which is '&'ed with TIN_EOF if the end of the
1169  * article is reached (to prevent broken articles from hanging the NNTP socket)
1170  */
1171 static int
1173  FILE *infile,
1174  t_openartinfo *artinfo,
1175  t_part *part,
1176  int depth,
1177  t_bool show_progress_meter)
1178 {
1179  char *line;
1180  char *ptr;
1181  int bnd;
1182  int state = M_SEARCHING;
1183  t_bool is_rfc822 = FALSE;
1184  t_part *curr_part = NULL, *rfc822_part = NULL;
1185 
1186  while ((line = tin_fgets(infile, (state == M_HDR))) != NULL) {
1187 /* fprintf(stderr, "%d---:%s\n", depth, line); */
1188 
1189  /*
1190  * Check current line for boundary markers
1191  */
1192  bnd = boundary_check(line, artinfo->hdr.ext);
1193 
1194  if (read_news_via_nntp) {
1195  fprintf(artinfo->raw, "%s\n", line);
1196 #ifdef DEBUG
1197  if ((debug & DEBUG_NNTP) && verbose > 1)
1198  debug_print_file("NNTP", "<<<%s%s", logtime(), line);
1199 #endif /* DEBUG */
1200  }
1201 
1202  artinfo->hdr.ext->line_count += count_lines(line);
1203  if (show_progress_meter)
1204  progress(artinfo->hdr.ext->line_count); /* Overall line count */
1205 
1206  if (part && part != artinfo->hdr.ext)
1207  part->line_count += count_lines(line);
1208 
1209  if (is_rfc822 && rfc822_part)
1210  rfc822_part->line_count += count_lines(line);
1211 
1212  if (bnd == BOUND_END) { /* End of this part detected */
1213  if (is_rfc822 && rfc822_part)
1214  rfc822_part->line_count -= count_lines(line);
1215  /*
1216  * When we have reached the end boundary of the outermost envelope
1217  * just log any trailing data for the raw article format.
1218  */
1219  if (boundary_cmp(line, get_param(artinfo->hdr.ext->params, "boundary")) == BOUND_END)
1220  depth = 0;
1221 #if 0 /* doesn't count tailing lines after envelop mime part - correct but confusing */
1222  if (read_news_via_nntp && depth == 0)
1223  while ((line = tin_fgets(infile, FALSE)) != NULL)
1224  fprintf(artinfo->raw, "%s\n", line);
1225 #else
1226  if (depth == 0) {
1227  while ((line = tin_fgets(infile, FALSE)) != NULL) {
1228  if (read_news_via_nntp)
1229  fprintf(artinfo->raw, "%s\n", line);
1230  artinfo->hdr.ext->line_count++;
1231  }
1232  }
1233 #endif /* 0 */
1234  return tin_errno;
1235  }
1236 
1237  switch (state) {
1238  case M_SEARCHING:
1239  switch (bnd) {
1240  case BOUND_NONE:
1241  break; /* Keep looking */
1242 
1243  case BOUND_START:
1244  state = M_HDR; /* Now parsing headers of a part */
1245  curr_part = new_part(part);
1246  curr_part->depth = depth;
1247  break;
1248  }
1249  break;
1250 
1251  case M_HDR:
1252  switch (bnd) {
1253  case BOUND_START: /* TODO: skip error message if not -DDEBUG? */
1255  continue;
1256 
1257  case BOUND_NONE:
1258  break; /* Correct - No boundary */
1259  }
1260 
1261  if (*line == '\0') { /* End of MIME headers */
1262  state = M_BODY;
1263  curr_part->offset = ftell(artinfo->raw);
1264 
1265  if (curr_part->type == TYPE_MULTIPART) { /* Complex multipart article */
1266  int ret, old_line_count;
1267 
1268  old_line_count = curr_part->line_count;
1269  if ((ret = parse_multipart_article(infile, artinfo, curr_part, depth + 1, show_progress_meter)) != 0)
1270  return ret; /* User abort or EOF reached */
1271  if (part && part != artinfo->hdr.ext)
1272  part->line_count += curr_part->line_count - old_line_count;
1273  if (is_rfc822 && rfc822_part)
1274  rfc822_part->line_count += curr_part->line_count - old_line_count;
1275  } else if (curr_part->type == TYPE_MESSAGE && !strcasecmp("RFC822", curr_part->subtype)) {
1276  is_rfc822 = TRUE;
1277  rfc822_part = curr_part;
1278  state = M_HDR;
1279  curr_part = new_part(part);
1280  curr_part->depth = ++depth;
1281  }
1282  break;
1283  }
1284 
1285  /*
1286  * Keep headers that interest us
1287  */
1288 /* fprintf(stderr, "HDR:%s\n", line); */
1289  unfold_header(line);
1290  if ((ptr = parse_header(line, "Content-Type", FALSE, FALSE, FALSE))) {
1291  parse_content_type(ptr, curr_part);
1292  break;
1293  }
1294  if ((ptr = parse_header(line, "Content-Transfer-Encoding", FALSE, FALSE, FALSE))) {
1295  curr_part->encoding = parse_content_encoding(ptr);
1296  break;
1297  }
1298  if ((ptr = parse_header(line, "Content-Disposition", FALSE, FALSE, FALSE))) {
1299  parse_content_disposition(ptr, curr_part);
1300  break;
1301  }
1302  if ((ptr = parse_header(line, "Content-Description", TRUE, FALSE, FALSE))) {
1303  FreeIfNeeded(curr_part->description);
1304  curr_part->description = my_strdup(ptr);
1305  break;
1306  }
1307  break;
1308 
1309  case M_BODY:
1310  switch (bnd) {
1311  case BOUND_NONE:
1312 /* fprintf(stderr, "BOD:%s\n", line); */
1313  curr_part->line_count++;
1314  break;
1315 
1316  case BOUND_START: /* Start new attachment */
1317  if (is_rfc822) {
1318  --depth;
1319  rfc822_part->line_count--;
1320  rfc822_part = NULL;
1321  is_rfc822 = FALSE;
1322  }
1323  state = M_HDR;
1324  curr_part = new_part(part);
1325  curr_part->depth = depth;
1326  break;
1327  }
1328  break;
1329  } /* switch (state) */
1330  } /* while() */
1331 
1332  /*
1333  * We only reach this point when we (unexpectedly) reach the end of the
1334  * article
1335  */
1336  return tin_errno | TIN_EOF; /* Flag EOF */
1337 }
1338 
1339 
1340 /*
1341  * Parse a non-multipart article, merely a passthrough and bean counter
1342  */
1343 static int
1345  FILE *in,
1346  t_openartinfo *artinfo,
1347  t_bool show_progress_meter)
1348 {
1349  char *line;
1350 
1351  while ((line = tin_fgets(in, FALSE)) != NULL) {
1352  if (read_news_via_nntp) {
1353  fprintf(artinfo->raw, "%s\n", line);
1354 #ifdef DEBUG
1355  if ((debug & DEBUG_NNTP) && verbose > 1)
1356  debug_print_file("NNTP", "<<<%s%s", logtime(), line);
1357 #endif /* DEBUG */
1358  }
1359 
1360  ++artinfo->hdr.ext->line_count;
1361 
1362  if (show_progress_meter)
1363  progress(artinfo->hdr.ext->line_count);
1364  }
1365  return tin_errno;
1366 }
1367 
1368 
1369 #ifdef DEBUG_ART
1370 /* DEBUG dump of what we got */
1371 static void
1372 dump_uue(
1373  t_part *ptr,
1374  t_openartinfo *art)
1375 {
1376  if (ptr->uue != NULL) {
1377  t_part *uu;
1378  for (uu = ptr->uue; uu != NULL; uu = uu->next) {
1379  fprintf(stderr, "UU: %s\n", get_param(uu->params, "name"));
1380  fprintf(stderr, " Content-Type: %s/%s\n Content-Transfer-Encoding: %s\n",
1381  content_types[uu->type], uu->subtype,
1383  fprintf(stderr, " Offset: %ld Lines: %d\n", uu->offset, uu->line_count);
1384  fprintf(stderr, " Depth: %d\n", uu->depth);
1385  fseek(art->raw, uu->offset, SEEK_SET);
1386  fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1387  }
1388  }
1389 }
1390 
1391 
1392 static void
1393 dump_art(
1394  t_openartinfo *art)
1395 {
1396  t_part *ptr;
1397  t_param *pptr;
1398  struct t_header note_h = art->hdr;
1399 
1400  fprintf(stderr, "\nMain body\nMIME-Version: %u\n", note_h.mime);
1401  fprintf(stderr, "Content-Type: %s/%s\nContent-Transfer-Encoding: %s\n",
1404  if (note_h.ext->description)
1405  fprintf(stderr, "Content-Description: %s\n", note_h.ext->description);
1406  fprintf(stderr, "Offset: %ld\nLines: %d\n", note_h.ext->offset, note_h.ext->line_count);
1407  for (pptr = note_h.ext->params; pptr != NULL; pptr = pptr->next)
1408  fprintf(stderr, "P: %s = %s\n", pptr->name, pptr->value);
1409  dump_uue(note_h.ext, art);
1410  fseek(art->raw, note_h.ext->offset, SEEK_SET);
1411  fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1412  fprintf(stderr, "\n");
1413 
1414  for (ptr = note_h.ext->next; ptr != NULL; ptr = ptr->next) {
1415  fprintf(stderr, "Attachment:\n");
1416  fprintf(stderr, "\tContent-Type: %s/%s\n\tContent-Transfer-Encoding: %s\n",
1417  content_types[ptr->type], ptr->subtype,
1418  content_encodings[ptr->encoding]);
1419  if (ptr->description)
1420  fprintf(stderr, "\tContent-Description: %s\n", ptr->description);
1421  fprintf(stderr, "\tOffset: %ld\n\tLines: %d\n", ptr->offset, ptr->line_count);
1422  fprintf(stderr, "\tDepth: %d\n", ptr->depth);
1423  for (pptr = ptr->params; pptr != NULL; pptr = pptr->next)
1424  fprintf(stderr, "\tP: %s = %s\n", pptr->name, pptr->value);
1425  dump_uue(ptr, art);
1426  fseek(art->raw, ptr->offset, SEEK_SET);
1427  fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1428  }
1429 }
1430 #endif /* DEBUG_ART */
1431 
1432 
1433 /*
1434  * Core parser for all article types
1435  * Return NULL if we couldn't open an output stream when reading via NNTP
1436  * When reading from local spool we assign the filehandle of the on-spool
1437  * article directly to artinfo->raw
1438  */
1439 static int
1441  FILE *infile,
1442  int line_count,
1443  t_openartinfo *artinfo,
1444  t_bool show_progress_meter)
1445 {
1446  int ret = ART_ABORT;
1447 
1448  if (read_news_via_nntp && !(artinfo->raw = tmpfile()))
1449  goto error;
1450 
1451  if (!read_news_via_nntp)
1452  artinfo->raw = infile;
1453 
1454  art_lines = line_count;
1455 
1456  if ((ret = parse_rfc822_headers(&artinfo->hdr, infile, artinfo->raw)) != 0)
1457  goto error;
1458 
1459  /*
1460  * Is this a MIME article ?
1461  * We don't bother to parse all plain text articles
1462  */
1463  if (artinfo->hdr.mime && artinfo->hdr.ext->type == TYPE_MULTIPART) {
1464  if ((ret = parse_multipart_article(infile, artinfo, artinfo->hdr.ext, 1, show_progress_meter)) != 0) {
1465  /* Strip off EOF condition if present */
1466  if (ret & TIN_EOF) {
1467  ret ^= TIN_EOF;
1468  /* TODO: skip error message if not -DDEBUG? */
1469  error_message(2, _(txt_error_mime_end), content_types[artinfo->hdr.ext->type], artinfo->hdr.ext->subtype);
1470  if (ret != 0)
1471  goto error;
1472  } else
1473  goto error;
1474  }
1475  } else {
1476  if ((ret = parse_normal_article(infile, artinfo, show_progress_meter)) != 0)
1477  goto error;
1478  }
1479 
1480  if (read_news_via_nntp)
1481  TIN_FCLOSE(infile);
1482 
1483  return 0;
1484 
1485 error:
1486  if (read_news_via_nntp)
1487  TIN_FCLOSE(infile);
1488  art_close(artinfo);
1489  return ret;
1490 }
1491 
1492 
1493 /*
1494  * Open a mail/news article using NNTP ARTICLE command
1495  * or directly off local spool
1496  * Return:
1497  * A pointer to the open postprocessed file
1498  * NULL pointer if article open fails in some way
1499  */
1500 FILE *
1502  struct t_group *group,
1503  t_artnum art)
1504 {
1505  FILE *art_fp;
1506 
1507 #ifdef NNTP_ABLE
1508  if (read_news_via_nntp && group->type == GROUP_TYPE_NEWS) {
1509  char buf[NNTP_STRLEN];
1510  snprintf(buf, sizeof(buf), "ARTICLE %"T_ARTNUM_PFMT, art);
1511  art_fp = nntp_command(buf, OK_ARTICLE, NULL, 0);
1512  } else {
1513 #endif /* NNTP_ABLE */
1514  char buf[PATH_LEN];
1515  char pbuf[PATH_LEN];
1516  char fbuf[NAME_LEN + 1];
1517  char *group_path = my_malloc(strlen(group->name) + 2); /* tailing "/\0" */;
1518 
1519  make_group_path(group->name, group_path);
1520  joinpath(buf, sizeof(buf), group->spooldir, group_path);
1521  free(group_path);
1522  snprintf(fbuf, sizeof(fbuf), "%"T_ARTNUM_PFMT, art);
1523  joinpath(pbuf, sizeof(pbuf), buf, fbuf);
1524 
1525  art_fp = fopen(pbuf, "r");
1526 #ifdef NNTP_ABLE
1527  }
1528 #endif /* NNTP_ABLE */
1529 
1530  return art_fp;
1531 }
1532 
1533 
1534 /* ----------- art_open() and art_close() are the only interface --------- */
1535 /* ------------------------for accessing articles ------------------- */
1536 
1537 /*
1538  * Open's and postprocesses and article
1539  * Populates the passed in artinfo structure if successful
1540  *
1541  * Returns:
1542  * 0 Art opened successfully
1543  * ART_UNAVAILABLE Couldn't find article
1544  * ART_ABORT User aborted during read of article
1545  */
1546 int
1548  t_bool wrap_lines,
1549  struct t_article *art,
1550  struct t_group *group,
1551  t_openartinfo *artinfo,
1552  t_bool show_progress_meter,
1553  const char *pmesg)
1554 {
1555  FILE *fp;
1556 
1557  memset(artinfo, 0, sizeof(t_openartinfo));
1558 
1559  if ((fp = open_art_fp(group, art->artnum)) == NULL)
1560  return ((tin_errno == 0) ? ART_UNAVAILABLE : ART_ABORT);
1561 
1562 #ifdef DEBUG_ART
1563  fprintf(stderr, "art_open(%p)\n", (void *) artinfo);
1564 #endif /* DEBUG_ART */
1565 
1566  progress_mesg = pmesg;
1567  if (parse_rfc2045_article(fp, art->line_count, artinfo, show_progress_meter) != 0) {
1568  progress_mesg = NULL;
1569  return ART_ABORT;
1570  }
1571  progress_mesg = NULL;
1572 
1573  /*
1574  * TODO: compare art->msgid and artinfo->hdr.messageid and issue a
1575  * warning (once) about broken overviews if they differ
1576  */
1577 
1578  if ((artinfo->tex2iso = ((group->attribute->tex2iso_conv) ? is_art_tex_encoded(artinfo->raw) : FALSE)))
1580 
1581  /* Maybe fix it so if this fails, we default to raw? */
1582  if (!cook_article(wrap_lines, artinfo, tinrc.hide_uue, FALSE))
1583  return ART_ABORT;
1584 
1585 #ifdef DEBUG_ART
1586  dump_art(artinfo);
1587 #endif /* DEBUG_ART */
1588 
1589  /*
1590  * If Newsgroups is empty it is a good bet the article is a mail article
1591  * TODO: Why do this ?
1592  */
1593  if (!artinfo->hdr.newsgroups)
1594  artinfo->hdr.newsgroups = my_strdup(group->name);
1595 
1596  return 0;
1597 }
1598 
1599 
1600 /*
1601  * Close an open article identified by an 'artinfo' handle
1602  */
1603 void
1605  t_openartinfo *artinfo)
1606 {
1607 #ifdef DEBUG_ART
1608  fprintf(stderr, "art_close(%p)\n", (void *) artinfo);
1609 #endif /* DEBUG_ART */
1610 
1611  if (artinfo == NULL)
1612  return;
1613 
1614  free_and_init_header(&artinfo->hdr);
1615 
1616  artinfo->tex2iso = FALSE;
1617 
1618  if (artinfo->raw) {
1619  fclose(artinfo->raw);
1620  artinfo->raw = NULL;
1621  }
1622 
1623  if (artinfo->cooked) {
1624  fclose(artinfo->cooked);
1625  artinfo->cooked = NULL;
1626  }
1627 
1628  FreeAndNull(artinfo->rawl);
1629  FreeAndNull(artinfo->cookl);
1630 }
name
const char * name
Definition: signal.c:117
DEBUG_NNTP
#define DEBUG_NNTP
Definition: debug.h:47
t_config::hide_uue
int hide_uue
Definition: tinrc.h:138
get_param
const char * get_param(t_param *list, const char *name)
Definition: rfc2046.c:568
t_article
Definition: tin.h:1510
t_header::date
char * date
Definition: rfc2046.h:132
NNTP_STRLEN
#define NNTP_STRLEN
Definition: nntplib.h:155
txt_error_mime_end
constext txt_error_mime_end[]
Definition: lang.c:248
parse_rfc822_headers
int parse_rfc822_headers(struct t_header *hdr, FILE *from, FILE *to)
Definition: rfc2046.c:969
strcasecmp
int strcasecmp(const char *p, const char *q)
Definition: string.c:468
_
#define _(Text)
Definition: tin.h:94
t_header::bcc
char * bcc
Definition: rfc2046.h:131
my_realloc
#define my_realloc(ptr, size)
Definition: tin.h:2198
FORMAT_FIXED
#define FORMAT_FIXED
Definition: rfc2046.h:69
t_header::messageid
char * messageid
Definition: rfc2046.h:137
my_strdup
char * my_strdup(const char *str)
Definition: string.c:133
t_group
Definition: tin.h:1772
TYPE_MULTIPART
#define TYPE_MULTIPART
Definition: rfc2046.h:48
t_header::ftnto
char * ftnto
Definition: rfc2046.h:143
TYPE_TEXT
#define TYPE_TEXT
Definition: rfc2046.h:47
boundary_check
static int boundary_check(const char *line, t_part *part)
Definition: rfc2046.c:207
str_trim
char * str_trim(char *string)
Definition: string.c:532
parse_content_encoding
static unsigned int parse_content_encoding(char *encoding)
Definition: rfc2046.c:738
get_token
static char * get_token(const char *source)
Definition: rfc2046.c:298
count_lines
static int count_lines(char *line)
Definition: rfc2046.c:1123
ENCODING_BINARY
#define ENCODING_BINARY
Definition: rfc2046.h:59
GROUP_TYPE_NEWS
#define GROUP_TYPE_NEWS
Definition: tin.h:1059
parse_content_type
static void parse_content_type(char *type, t_part *content)
Definition: rfc2046.c:664
OK_ARTICLE
#define OK_ARTICLE
Definition: nntplib.h:99
content_encodings
constext * content_encodings[]
Definition: lang.c:1448
get_quoted_string
static char * get_quoted_string(char *source, char **dest)
Definition: rfc2046.c:313
parse_multipart_article
static int parse_multipart_article(FILE *infile, t_openartinfo *artinfo, t_part *part, int depth, t_bool show_progress_meter)
Definition: rfc2046.c:1172
M_HDR
#define M_HDR
Definition: rfc2046.c:1158
openartinfo
Definition: rfc2046.h:183
read_news_via_nntp
t_bool read_news_via_nntp
Definition: init.c:150
ENCODING_7BIT
#define ENCODING_7BIT
Definition: rfc2046.h:55
t_header::keywords
char * keywords
Definition: rfc2046.h:140
art_lines
static int art_lines
Definition: rfc2046.c:75
t_attribute::tex2iso_conv
unsigned tex2iso_conv
Definition: tin.h:1656
art
static t_openartinfo * art
Definition: cook.c:78
t_header::cc
char * cc
Definition: rfc2046.h:130
tinrc
struct t_config tinrc
Definition: init.c:191
buffer_to_ascii
char * buffer_to_ascii(char *c)
Definition: misc.c:2610
skip_equal_sign
static char * skip_equal_sign(char *source)
Definition: rfc2046.c:428
value
Definition: plp_snprintf.c:180
wait_message
void wait_message(unsigned int sdelay, const char *fmt,...)
Definition: screen.c:133
t_header::replyto
char * replyto
Definition: rfc2046.h:135
FreeAndNull
#define FreeAndNull(p)
Definition: tin.h:2204
curr_group
struct t_group * curr_group
Definition: group.c:55
ART_UNAVAILABLE
#define ART_UNAVAILABLE
Definition: tin.h:1323
content_type
int content_type(char *type)
Definition: rfc2046.c:115
BOUND_START
#define BOUND_START
Definition: rfc2046.h:66
t_group::type
unsigned int type
Definition: tin.h:1781
M_BODY
#define M_BODY
Definition: rfc2046.c:1159
state
state
Definition: save.c:56
BOUND_END
#define BOUND_END
Definition: rfc2046.h:67
TIN_FCLOSE
#define TIN_FCLOSE(x)
Definition: tin.h:1037
art_close
void art_close(t_openartinfo *artinfo)
Definition: rfc2046.c:1604
t_header::followup
char * followup
Definition: rfc2046.h:142
tin.h
t_header::org
char * org
Definition: rfc2046.h:134
strip_charset
static char * strip_charset(char **value)
Definition: rfc2046.c:409
TYPE_MESSAGE
#define TYPE_MESSAGE
Definition: rfc2046.h:50
is_art_tex_encoded
t_bool is_art_tex_encoded(FILE *fp)
Definition: charset.c:343
decode_value
static void decode_value(const char *charset, t_param *part)
Definition: rfc2046.c:374
show_progress
void show_progress(const char *txt, t_artnum count, t_artnum total)
Definition: screen.c:477
PATH_LEN
#define PATH_LEN
Definition: tin.h:837
BOUND_NONE
#define BOUND_NONE
Definition: rfc2046.h:65
param::value
char * value
Definition: rfc2046.h:79
t_header::xface
char * xface
Definition: rfc2046.h:144
TIN_EOF
#define TIN_EOF
Definition: rfc2046.c:1161
tin_fgets
char * tin_fgets(FILE *fp, t_bool header)
Definition: read.c:320
part::next
struct part * next
Definition: rfc2046.h:107
part::line_count
int line_count
Definition: rfc2046.h:104
make_group_path
void make_group_path(const char *name, char *path)
Definition: misc.c:2067
part
Definition: rfc2046.h:92
content_types
constext * content_types[]
Definition: lang.c:1453
param::name
char * name
Definition: rfc2046.h:78
xtbl
static const char xtbl[]
Definition: rfc2046.c:78
T_ARTNUM_PFMT
#define T_ARTNUM_PFMT
Definition: tin.h:227
ART_ABORT
#define ART_ABORT
Definition: tin.h:1335
free_and_init_header
void free_and_init_header(struct t_header *hdr)
Definition: rfc2046.c:865
rfc1522_decode
char * rfc1522_decode(const char *s)
Definition: rfc2047.c:232
t_header::subj
char * subj
Definition: rfc2046.h:133
t_header::distrib
char * distrib
Definition: rfc2046.h:139
NAME_LEN
#define NAME_LEN
Definition: tin.h:852
t_config::mm_local_charset
char mm_local_charset[LEN]
Definition: tinrc.h:103
GNKSA_ADDRTYPE_OLDSTYLE
#define GNKSA_ADDRTYPE_OLDSTYLE
Definition: extern.h:1605
param::encoded
t_bool encoded
Definition: rfc2046.h:82
ATTRIBUTE_DELIMS
#define ATTRIBUTE_DELIMS
Definition: rfc2046.c:226
t_header::newsgroups
char * newsgroups
Definition: rfc2046.h:136
parse_content_disposition
static void parse_content_disposition(char *disp, t_part *part)
Definition: rfc2046.c:770
part::description
char * description
Definition: rfc2046.h:101
parse_header
char * parse_header(char *buf, const char *pat, t_bool decode, t_bool structured, t_bool keep_tab)
Definition: rfc2046.c:908
art_open
int art_open(t_bool wrap_lines, struct t_article *art, struct t_group *group, t_openartinfo *artinfo, t_bool show_progress_meter, const char *pmesg)
Definition: rfc2046.c:1547
buf
static char buf[16]
Definition: langinfo.c:50
convert_to_printable
char * convert_to_printable(char *buf, t_bool keep_tab)
Definition: charset.c:385
FreeIfNeeded
#define FreeIfNeeded(p)
Definition: tin.h:2203
param::charset
char * charset
Definition: rfc2046.h:80
param::enc_fallback
t_bool enc_fallback
Definition: rfc2046.h:83
t_header::references
char * references
Definition: rfc2046.h:138
isascii
#define isascii(c)
Definition: tin.h:408
tmpfile
FILE * tmpfile(void)
Definition: tmpfile.c:53
part::type
unsigned type
Definition: rfc2046.h:94
parse_rfc2045_article
static int parse_rfc2045_article(FILE *infile, int line_count, t_openartinfo *artinfo, t_bool show_progress_meter)
Definition: rfc2046.c:1440
str_lwr
void str_lwr(char *str)
Definition: string.c:284
unfold_header
void unfold_header(char *line)
Definition: rfc2046.c:1143
free_parts
void free_parts(t_part *ptr)
Definition: rfc2046.c:846
new_part
t_part * new_part(t_part *part)
Definition: rfc2046.c:792
part::offset
long offset
Definition: rfc2046.h:103
XVAL
#define XVAL(c)
Definition: rfc2046.c:90
SEEK_SET
#define SEEK_SET
Definition: tin.h:2441
bool_not
#define bool_not(b)
Definition: bool.h:81
t_header
Definition: rfc2046.h:126
param
Definition: rfc2046.h:76
FORMAT_FLOWED
#define FORMAT_FLOWED
Definition: rfc2046.h:70
t_header::summary
char * summary
Definition: rfc2046.h:141
TYPE_APPLICATION
#define TYPE_APPLICATION
Definition: rfc2046.h:49
atoi
int atoi(const char *s)
part::depth
int depth
Definition: rfc2046.h:105
CT_DEFPARMS
#define CT_DEFPARMS
Definition: rfc2046.c:95
t_group::name
char * name
Definition: tin.h:1773
t_header::to
char * to
Definition: rfc2046.h:129
FALSE
#define FALSE
Definition: bool.h:70
txt_error_mime_start
constext txt_error_mime_start[]
Definition: lang.c:249
M_SEARCHING
#define M_SEARCHING
Definition: rfc2046.c:1157
progress
static void progress(int line_count)
Definition: rfc2046.c:102
skip_space
static char * skip_space(char *source)
Definition: rfc2046.c:229
debug
unsigned short debug
Definition: debug.c:51
open_art_fp
FILE * open_art_fp(struct t_group *group, t_artnum art)
Definition: rfc2046.c:1501
verbose
int verbose
Definition: init.c:153
PARAM_SEP
#define PARAM_SEP
Definition: rfc2046.c:93
joinpath
void joinpath(char *result, size_t result_size, const char *dir, const char *file)
Definition: joinpath.c:50
new_params
t_param * new_params(void)
Definition: rfc2046.c:527
param::part
int part
Definition: rfc2046.h:81
free_list
void free_list(t_param *list)
Definition: rfc2046.c:549
snprintf
#define snprintf
Definition: tin.h:2417
openartinfo::raw
FILE * raw
Definition: rfc2046.h:188
openartinfo::hdr
struct t_header hdr
Definition: rfc2046.h:185
t_artnum
long t_artnum
Definition: tin.h:226
error_message
void error_message(unsigned int sdelay, const char *fmt,...)
Definition: screen.c:184
t_group::spooldir
char * spooldir
Definition: tin.h:1776
parse_normal_article
static int parse_normal_article(FILE *in, t_openartinfo *artinfo, t_bool show_progress_meter)
Definition: rfc2046.c:1344
process_charsets
void process_charsets(char **line, size_t *max_line_len, const char *network_charset, const char *local_charset, t_bool conv_tex2iso)
Definition: misc.c:2634
param::next
struct param * next
Definition: rfc2046.h:84
t_group::attribute
struct t_attribute * attribute
Definition: tin.h:1790
t_header::mime
t_bool mime
Definition: rfc2046.h:145
part::uue
struct part * uue
Definition: rfc2046.h:106
remove_cwsp
static void remove_cwsp(char *source)
Definition: rfc2046.c:242
MODULO_COUNT_NUM
#define MODULO_COUNT_NUM
Definition: tin.h:862
parse_params
static void parse_params(char *params, t_part *content)
Definition: rfc2046.c:448
txt_is_tex_encoded
constext txt_is_tex_encoded[]
Definition: lang.c:555
cook_article
t_bool cook_article(t_bool wrap_lines, t_openartinfo *artinfo, int hide_uue, t_bool show_all_headers)
Definition: cook.c:828
t_bool
unsigned t_bool
Definition: bool.h:77
TRUE
#define TRUE
Definition: bool.h:74
gnksa_split_from
int gnksa_split_from(const char *from, char *address, char *realname, int *addrtype)
Definition: misc.c:3358
part::params
t_param * params
Definition: rfc2046.h:102
tin_errno
int tin_errno
Definition: read.c:59
HEADER_LEN
#define HEADER_LEN
Definition: tin.h:857
strncasecmp
int strncasecmp(const char *p, const char *q, size_t n)
Definition: string.c:484
boundary_cmp
static int boundary_cmp(const char *line, const char *boundary)
Definition: rfc2046.c:138
t_header::ext
t_part * ext
Definition: rfc2046.h:146
t_header::from
char * from
Definition: rfc2046.h:128
part::encoding
unsigned encoding
Definition: rfc2046.h:95
part::format
unsigned format
Definition: rfc2046.h:96
get_charset
static char * get_charset(char *value)
Definition: rfc2046.c:347
GNKSA_OK
#define GNKSA_OK
Definition: extern.h:1571
part::subtype
char * subtype
Definition: rfc2046.h:100
note_h
static struct t_header * note_h
Definition: page.c:75
IS_XDIGIT
#define IS_XDIGIT(c)
Definition: rfc2046.c:92
my_malloc
#define my_malloc(size)
Definition: tin.h:2196
progress_mesg
static const char * progress_mesg
Definition: rfc2046.c:76