tin  2.6.1
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.6.1.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

rfc2046.c
Go to the documentation of this file.
1/*
2 * Project : tin - a Usenet reader
3 * Module : rfc2046.c
4 * Author : Jason Faultless <jason@altarstone.com>
5 * Created : 2000-02-18
6 * Updated : 2021-11-01
7 * Notes : RFC 2046 MIME article parsing
8 *
9 * Copyright (c) 2000-2022 Jason Faultless <jason@altarstone.com>
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright notice,
17 * this list of conditions and the following disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 *
23 * 3. Neither the name of the copyright holder nor the names of its
24 * contributors may be used to endorse or promote products derived from
25 * this software without specific prior written permission.
26 *
27 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40
41#ifndef TIN_H
42# include "tin.h"
43#endif /* !TIN_H */
44
45
46/*
47 * local prototypes
48 */
49static char *get_charset(char *value);
50static char *get_quoted_string(char *source, char **dest);
51static char *get_token(const char *source);
52static char *strip_charset(char **value);
53static char *skip_equal_sign(char *source);
54static char *skip_space(char *source);
55static int boundary_cmp(const char *line, const char *boundary);
56static int count_lines(char *line);
57static int parse_multipart_article(FILE *infile, t_openartinfo *artinfo, t_part *part, int depth, t_bool show_progress_meter);
58static int parse_normal_article(FILE *in, t_openartinfo *artinfo, t_bool show_progress_meter);
59static int parse_rfc2045_article(FILE *infile, int line_count, t_openartinfo *artinfo, t_bool show_progress_meter);
60static unsigned int parse_content_encoding(char *encoding);
61static void decode_value(const char *charset, t_param *part);
62static void parse_content_type(char *type, t_part *content);
63static void parse_content_disposition(char *disp, t_part *part);
64static void parse_params(char *params, t_part *content);
65static void progress(int line_count);
66static void remove_cwsp(char *source);
67#ifdef DEBUG_ART
68 static void dump_art(t_openartinfo *art);
69#endif /* DEBUG_ART */
70
71
72/*
73 * Local variables
74 */
75static int art_lines = 0; /* lines in art on spool */
76static const char *progress_mesg = NULL; /* message progress() should display */
77/* RFC 2231 decoding table */
78static const char xtbl[] = {
79/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
80/* 0 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
81/* 1 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
82/* 2 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
83/* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
84/* 4 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
85/* 5 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
86/* 6 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
87/* 7 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
88};
89
90#define XVAL(c) (xtbl[(unsigned int) (c)])
91/* C90: isxdigit(3) */
92#define IS_XDIGIT(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
93#define PARAM_SEP "; \n"
94/* default parameters for Content-Type */
95#define CT_DEFPARMS "charset=US-ASCII"
96
97/*
98 * Use the default message if one hasn't been supplied
99 * Body search is currently the only function that has a different message
100 */
101static void
103 int line_count)
104{
105 if (progress_mesg != NULL && art_lines > 0 && line_count && line_count % MODULO_COUNT_NUM == 0)
107}
108
109
110/*
111 * Lookup content type in content_types[] array and return matching
112 * index or -1
113 */
114int
116 char *type)
117{
118 int i;
119
120 if (type == NULL)
121 return -1;
122
123 for (i = 0; content_types[i] != NULL; ++i) {
124 if (strcasecmp(type, content_types[i]) == 0)
125 return i;
126 }
127
128 return -1;
129}
130
131
132/*
133 * check if a line is a MIME boundary
134 * returns BOUND_NONE if it is not, BOUND_START if normal boundary and
135 * BOUND_END if closing boundary
136 */
137static int
139 const char *line,
140 const char *boundary)
141{
142 size_t blen = strlen(boundary);
143 size_t len;
144 char *e, *l;
145 int nl;
146
147 if ((len = strlen(line)) == 0)
148 return BOUND_NONE;
149
150 if (blen + 2 > len)
151 return BOUND_NONE;
152
153 /* remove trailing whites as per RFC 2046 5.1.1 */
154 l = my_strdup(line);
155 e = l + len - 1;
156 while (e > l + blen + 1 && isspace((unsigned char) *e))
157 *e-- = '\0';
158
159 len = strlen(l);
160
161 nl = l[len - 1] == '\n';
162
163 if (len != blen + 2 + (size_t) nl && len != blen + 4 + (size_t) nl) {
164 free(l);
165 return BOUND_NONE;
166 }
167 if (l[0] != '-' || l[1] != '-') {
168 free(l);
169 return BOUND_NONE;
170 }
171
172 if (strncmp(l + 2, boundary, blen) != 0) {
173 free(l);
174 return BOUND_NONE;
175 }
176
177 if (l[blen + 2] != '-') {
178 if (nl ? l[blen + 2] == '\n' : l[blen + 2] == '\0') {
179 free(l);
180 return BOUND_START;
181 } else {
182 free(l);
183 return BOUND_NONE;
184 }
185 }
186
187 if (l[blen + 3] != '-') {
188 free(l);
189 return BOUND_NONE;
190 }
191
192 if (nl ? l[blen + 4] == '\n' : l[blen + 4] == '\0') {
193 free(l);
194 return BOUND_END;
195 }
196 free(l);
197 return BOUND_NONE;
198}
199
200
201/*
202 * RFC2046 5.1.2 says that we are required to check for all possible
203 * boundaries, not only the one that is expected. Iterate through all
204 * the parts.
205 */
206static int
208 const char *line,
209 t_part *part)
210{
211 const char *boundary;
212 int bnd = BOUND_NONE;
213
214 for (; part != NULL; part = part->next) {
215 /* We may not have even parsed a boundary for this part yet */
216 if ((boundary = get_param(part->params, "boundary")) == NULL)
217 continue;
218 if ((bnd = boundary_cmp(line, boundary)) != BOUND_NONE)
219 break;
220 }
221
222 return bnd;
223}
224
225
226#define ATTRIBUTE_DELIMS "()<>@,;:\\\"/[]?="
227
228static char *
230 char *source)
231{
232 while ((*source) && ((*source == ' ') || (*source == '\t')))
233 source++;
234 return *source ? source : NULL;
235}
236
237
238/*
239 * Removes comments and white space
240 */
241static void
243 char *source)
244{
245 char *from, *to, src;
246 int c_cnt = 0;
247 t_bool inquotes = FALSE;
248
249 from = to = source;
250
251 while ((src = *from++) && c_cnt >= 0) {
252 if (src == '"' && c_cnt == 0)
253 inquotes = bool_not(inquotes);
254
255 if (inquotes && src == '\\' && *from) {
256 *to++ = src;
257 *to++ = *from++;
258 continue;
259 }
260
261 if (!inquotes) {
262 /* skip over quoted pairs */
263 if (c_cnt && src == '\\') {
264 ++from;
265 continue;
266 }
267 if (src == '(') {
268 ++c_cnt;
269 continue;
270 }
271 if (src == ')') {
272 --c_cnt;
273 continue;
274 }
275 if (c_cnt > 0 || src == ' ' || src == '\t')
276 continue;
277 }
278
279 *to++ = src;
280 }
281
282 /*
283 * Setting *source = '\0' might be the right thing
284 * because the header is damaged. Anyway, we let the
285 * rest of the code pick up usable pieces.
286 */
287#if 0
288 if (c_cnt != 0)
289 /* unbalanced parenthesis, header damaged */
290 *source = '\0';
291 else
292#endif /* 0 */
293 *to = '\0';
294}
295
296
297static char *
299 const char *source)
300{
301 char *dest = my_strdup(source);
302 char *ptr = dest;
303
304 while (isascii((int) *ptr) && isprint((int) *ptr) && *ptr != ' ' && !strchr(ATTRIBUTE_DELIMS, *ptr))
305 ptr++;
306 *ptr = '\0';
307
308 return my_realloc(dest, strlen(dest) + 1);
309}
310
311
312static char *
314 char *source,
315 char **dest)
316{
317 char *ptr;
318 t_bool quote = FALSE;
319
320 *dest = my_malloc(strlen(source) + 1);
321 ptr = *dest;
322 source++; /* skip over double quote */
323 while (*source) {
324 if (*source == '\\') {
325 quote = TRUE; /* next char as-is */
326 if (*++source == '\\') {
327 *ptr++ = *source++;
328 quote = FALSE;
329 }
330 continue;
331 }
332 if ((*source == '"') && !quote)
333 break; /* end of quoted-string */
334 *ptr++ = *source++;
335 quote = FALSE;
336 }
337 *ptr = '\0';
338 *dest = my_realloc(*dest, strlen(*dest) + 1);
339 return *source ? source + 1 : source;
340}
341
342
343/*
344 * RFC 2231: Extract character set from parameter value
345 */
346static char *
348 char *value)
349{
350 char *charset, *ptr;
351
352 /* no charset information present */
353 if (!strchr(value, '\''))
354 return NULL;
355
356 /* no charset given -> fall back to us-ascii */
357 if (*value == '\'')
358 return my_strdup("US-ASCII");
359
360 charset = my_strdup(value);
361
362 if ((ptr = strchr(charset, '\'')))
363 *ptr = '\0';
364
365 return charset;
366}
367
368
369/*
370 * RFC 2231: Decode parameter value according to the given
371 * character set
372 */
373static void
375 const char *charset,
376 t_param *part)
377{
378 char *rptr, *wptr;
379 const char *cset;
380 size_t max_line_len = strlen(part->value);
381
382 /*
383 * we prefer part->charset if present, even if rfc 2231
384 * forbids different charsets for each part
385 */
386 cset = part->charset ? part->charset : charset;
387 rptr = wptr = part->value;
388
389 while (*rptr) {
390 if (*rptr == '%' && IS_XDIGIT(*(rptr + 1)) && IS_XDIGIT(*(rptr + 2))) {
391 *wptr++ = (char) (XVAL(*(rptr + 1)) << 4 | XVAL(*(rptr + 2)));
392 rptr += 3;
393 } else
394 *wptr++ = *rptr++;
395 }
396 *wptr = '\0';
397
398 process_charsets(&(part->value), &max_line_len, cset, tinrc.mm_local_charset, FALSE);
399 part->encoded = FALSE;
400 FreeAndNull(part->charset);
401}
402
403
404/*
405 * RFC 2231: Remove character set (and language information)
406 * from parameter value
407 */
408static char *
410 char **value)
411{
412 char *newval, *ptr;
413
414 if ((ptr = strrchr(*value, '\''))) {
415 newval = my_strdup(ptr + 1);
416 free(*value);
417 *value = my_realloc(newval, strlen(newval) + 1);
418 }
419
420 return *value;
421}
422
423
424/*
425 * Skip equal sign and (non compliant) white space around it
426 */
427static char *
429 char *source)
430{
431 if (!(source = skip_space(source)))
432 return NULL;
433
434 if (*source++ != '=')
435 /* no equal sign, invalid header, stop parsing here */
436 return NULL;
437
438 return skip_space(source);
439}
440
441
442/*
443 * Parse a Content-* parameter list into a linked list
444 * Ensure the ->params element is correctly initialised before calling
445 * TODO: may still not catch everything permitted in the RFC
446 */
447static void
449 char *params,
450 t_part *content)
451{
452 char *name, *param, *value, *contp;
453 int idx;
454 t_bool encoded;
455 t_param *ptr;
456
457 param = params;
458 while (*param) {
459 idx = -1;
460 encoded = FALSE;
461 /* Skip over white space */
462 if (!(param = skip_space(param)))
463 break;
464
465 /* catch parameter name */
467 param += strlen(name);
468
469 if (!*param) {
470 /* Nothing follows, invalid, stop here */
472 break;
473 }
474
475 /* RFC 2231 Character set and language information */
476 if ((contp = strrchr(name, '*')) && !*(contp + 1)) {
477 encoded = TRUE;
478 *contp = '\0';
479 }
480
481 /* RFC 2231 Parameter Value Continuations */
482 if ((contp = strchr(name, '*')) && *(contp + 1) >= '0' && *(contp + 1) <= '9') {
483 idx = atoi(contp + 1);
484 *contp = '\0';
485 }
486
487 if (!(param = skip_equal_sign(param))) {
489 break;
490 }
491
492 /* catch parameter value; may be surrounded by double quotes */
493 if (*param == '"') /* parse quoted-string */
495 else {
496 /* parse token */
498 param += strlen(value);
499 }
500
501 ptr = new_params();
502 ptr->name = name;
503 if (encoded) {
504 ptr->encoded = TRUE;
505 ptr->charset = get_charset(value);
506 ptr->value = strip_charset(&value);
507 } else
508 ptr->value = value;
509
510 ptr->part = idx;
511 ptr->next = content->params; /* Push onto start of list */
512 content->params = ptr;
513
514 /* advance pointer to next parameter */
515 while ((*param) && (*param != ';'))
516 param++;
517 if (*param == ';')
518 param++;
519 }
520}
521
522
523/*
524 * Return a freshly allocated and initialised t_param structure
525 */
526t_param *
528 void)
529{
530 t_param *ptr;
531
532 ptr = my_malloc(sizeof(t_param));
533 ptr->name = NULL;
534 ptr->value = NULL;
535 ptr->charset = NULL;
536 ptr->part = -1;
537 ptr->encoded = FALSE;
538 ptr->enc_fallback = TRUE;
539 ptr->next = NULL;
540
541 return ptr;
542}
543
544
545/*
546 * Free up a generic list object
547 */
548void
550 t_param *list)
551{
552 while (list->next != NULL) {
553 free_list(list->next);
554 list->next = NULL;
555 }
556
557 free(list->name);
558 free(list->value);
559 FreeIfNeeded(list->charset);
560 free(list);
561}
562
563
564/*
565 * Return a parameter value from a param list or NULL
566 */
567const char *
569 t_param *list,
570 const char *name)
571{
572 char *tmpval, *charset = NULL;
573 int i, j;
574 size_t newlen;
575 t_param *p_list, *c_list;
576
577 for (p_list = list; p_list != NULL; p_list = p_list->next) {
578 /*
579 * RFC 2231 Parameter Value Continuations + Character Set
580 *
581 * part == 0,1,2...: parameter has several parts, must be concatenated
582 * part == -1 : parameter has only one part
583 * part == -2 : part has already been concatenated, main part has
584 * part == -1
585 *
586 * charset : character set if present
587 */
588 if (strcasecmp(name, p_list->name) == 0 && p_list->part > -2) {
589 if (p_list->part == -1 && p_list->encoded && p_list->charset) {
590 decode_value(p_list->charset, p_list);
591 p_list->encoded = FALSE;
592 p_list->enc_fallback = FALSE;
593 }
594 if (p_list->part >= 0) {
595 newlen = 0;
596 if (p_list->charset) {
597 FreeIfNeeded(charset);
598 charset = my_strdup(p_list->charset);
599 }
600 for (j = 0, c_list = list; c_list != NULL; c_list = c_list->next) {
601 if (strcasecmp(name, c_list->name) == 0) {
602 if (c_list->part < 0)
603 continue;
604 if (c_list->part < p_list->part) {
605 if (c_list->charset) {
606 FreeIfNeeded(charset);
607 charset = my_strdup(c_list->charset);
608 }
609 p_list = c_list;
610 }
611
612 if (j < c_list->part)
613 j = c_list->part;
614
615 newlen += strlen(c_list->value);
616 }
617 }
618 p_list->value = my_realloc(p_list->value, newlen + 1);
619 if (charset)
620 decode_value(charset, p_list);
621 for (i = p_list->part + 1; i <= j; ++i) {
622 for (c_list = list; c_list != NULL; c_list = c_list->next) {
623 if (strcasecmp(name, c_list->name) == 0) {
624 if (c_list->part == i) {
625 if (c_list->encoded && charset)
626 decode_value(charset, c_list);
627 strcat(p_list->value, c_list->value);
628 c_list->part = -2;
629 }
630 }
631 }
632 }
633 p_list->part = -1;
634 p_list->encoded = FALSE;
635 p_list->enc_fallback = FALSE;
636 FreeAndNull(charset);
637 }
638 /*
639 * RFC 2047 'encoded-word' is not allowed at this place but
640 * some clients use this nevertheless -> we try to decode that
641 */
642 if (p_list->enc_fallback) {
643 tmpval = p_list->value;
644 if (*tmpval == '=' && *(++tmpval) == '?') {
645 if ((tmpval = rfc1522_decode(p_list->value))) {
646 free(p_list->value);
647 p_list->value = my_strdup(tmpval);
648 }
649 }
650 p_list->enc_fallback = FALSE;
651 }
652 return p_list->value;
653 }
654 }
655
656 return NULL;
657}
658
659
660/*
661 * Split a Content-Type header into a t_part structure
662 */
663static void
665 char *type,
666 t_part *content)
667{
668 char *subtype, *params;
669 int i;
670
671 /* Remove comments and white space */
672 remove_cwsp(type);
673
674 /*
675 * Split the type/subtype
676 */
677 if ((type = strtok(type, "/")) == NULL)
678 return;
679
680 /* Look up major type */
681
682 /*
683 * Unrecognised type, treat according to RFC
684 */
685 if ((i = content_type(type)) == -1) {
686 content->type = TYPE_APPLICATION;
687 free(content->subtype);
688 content->subtype = my_strdup("octet-stream");
689 return;
690 } else
691 content->type = i;
692
693 subtype = strtok(NULL, PARAM_SEP);
694 /* save new subtype, or use pre-initialised value "plain" */
695 if (subtype != NULL) { /* check for broken Content-Type: is header without a subtype */
696 free(content->subtype); /* Pre-initialised to plain */
697 content->subtype = my_strdup(subtype);
698 str_lwr(content->subtype);
699 }
700
701 /*
702 * Parse any parameters into a list
703 */
704 if ((params = strtok(NULL, "\n")) != NULL) {
705 const char *format;
706#ifndef CHARSET_CONVERSION
707 char defparms[] = CT_DEFPARMS; /* must be writable */
708#endif /* !CHARSET_CONVERSION */
709
710 parse_params(params, content);
711 if (!get_param(content->params, "charset")) { /* add default charset if needed */
712#ifndef CHARSET_CONVERSION
713 parse_params(defparms, content);
714#else
715 if (curr_group->attribute->undeclared_charset) {
716 char *charsetheader;
717
718 charsetheader = my_malloc(strlen(curr_group->attribute->undeclared_charset) + 9); /* 9=len('charset=\0') */
719 sprintf(charsetheader, "charset=%s", curr_group->attribute->undeclared_charset);
720 parse_params(charsetheader, content);
721 free(charsetheader);
722 } else {
723 char defparms[] = CT_DEFPARMS; /* must be writable */
724
725 parse_params(defparms, content);
726 }
727#endif /* !CHARSET_CONVERSION */
728 }
729 if ((format = get_param(content->params, "format"))) {
730 if (!strcasecmp(format, "flowed"))
731 content->format = FORMAT_FLOWED;
732 }
733 }
734}
735
736
737static unsigned int
739 char *encoding)
740{
741 unsigned int i;
742
743 /* Remove comments and white space */
744 remove_cwsp(encoding);
745
746 for (i = 0; content_encodings[i] != NULL; ++i) {
747 if (strcasecmp(encoding, content_encodings[i]) == 0)
748 return i;
749 }
750
751 /*
752 * TODO: check rfc - may need to switch Content-Type to
753 * application/octet-steam where this header exists but is unparsable.
754 *
755 * RFC 2045 6.2:
756 * Labelling unencoded data containing 8bit characters as "7bit" is not
757 * allowed, nor is labelling unencoded non-line-oriented data as anything
758 * other than "binary" allowed.
759 */
760 return ENCODING_BINARY;
761}
762
763
764/*
765 * We're only really interested in the filename parameter, which has
766 * a higher precedence than the name parameter from Content-Type (RFC 1806)
767 * Attach the parsed params to the part passed in 'part'
768 */
769static void
771 char *disp,
772 t_part *part)
773{
774 char *ptr;
775
776 /* Remove comments and white space */
777 remove_cwsp(disp);
778
779 strtok(disp, PARAM_SEP);
780 if ((ptr = strtok(NULL, "\n")) == NULL)
781 return;
782
783 parse_params(ptr, part);
784}
785
786
787/*
788 * Return a freshly allocated and initialised part structure attached to the
789 * end of the list of article parts given
790 */
791t_part *
793 t_part *part)
794{
795 t_part *p;
796 t_part *ptr = my_malloc(sizeof(t_part));
797#ifndef CHARSET_CONVERSION
798 char defparms[] = CT_DEFPARMS; /* must be writable */
799#endif /* !CHARSET_CONVERSION */
800
801 ptr->type = TYPE_TEXT; /* Defaults per RFC */
802 ptr->subtype = my_strdup("plain");
803 ptr->description = NULL;
804 ptr->encoding = ENCODING_7BIT;
805 ptr->format = FORMAT_FIXED;
806 ptr->params = NULL;
807
808#ifndef CHARSET_CONVERSION
809 parse_params(defparms, ptr);
810#else
811 if (curr_group && curr_group->attribute->undeclared_charset) {
812 char *charsetheader;
813
814 charsetheader = my_malloc(strlen(curr_group->attribute->undeclared_charset) + 9); /* 9=len('charset=\0') */
815 sprintf(charsetheader, "charset=%s", curr_group->attribute->undeclared_charset);
816 parse_params(charsetheader, ptr);
817 free(charsetheader);
818 } else {
819 char defparms[] = CT_DEFPARMS; /* must be writable */
820
821 parse_params(defparms, ptr);
822 }
823#endif /* !CHARSET_CONVERSION */
824
825 ptr->offset = 0;
826 ptr->line_count = 0;
827 ptr->depth = 0; /* Not an embedded object (yet) */
828 ptr->uue = NULL;
829 ptr->next = NULL;
830
831 if (part == NULL) /* List head - we don't do this */
832 return ptr;
833
834 for (p = part; p->next != NULL; p = p->next)
835 ;
836 p->next = ptr;
837
838 return ptr;
839}
840
841
842/*
843 * Free a linked list of t_part
844 */
845void
847 t_part *ptr)
848{
849 while (ptr->next != NULL) {
850 free_parts(ptr->next);
851 ptr->next = NULL;
852 }
853
854 free(ptr->subtype);
856 if (ptr->params)
857 free_list(ptr->params);
858 if (ptr->uue)
859 free_parts(ptr->uue);
860 free(ptr);
861}
862
863
864void
866 struct t_header *hdr)
867{
868 /*
869 * Initialise the header struct
870 */
871 FreeAndNull(hdr->from);
872 FreeAndNull(hdr->to);
873 FreeAndNull(hdr->cc);
874 FreeAndNull(hdr->bcc);
875 FreeAndNull(hdr->date);
876 FreeAndNull(hdr->subj);
877 FreeAndNull(hdr->org);
878 FreeAndNull(hdr->replyto);
882 FreeAndNull(hdr->distrib);
883 FreeAndNull(hdr->keywords);
884 FreeAndNull(hdr->summary);
885 FreeAndNull(hdr->followup);
886 FreeAndNull(hdr->ftnto);
887#ifdef XFACE_ABLE
888 FreeAndNull(hdr->xface);
889#endif /* XFACE_ABLE */
890 hdr->mime = FALSE;
891
892 if (hdr->ext)
893 free_parts(hdr->ext);
894 hdr->ext = NULL;
895}
896
897
898/*
899 * buf: Article header
900 * pat: Text to match in header
901 * decode: RFC2047-decode the header
902 * structured: extract address-part before decoding the header
903 *
904 * Returns:
905 * (decoded) body of header if matched or NULL
906 */
907char *
909 char *buf,
910 const char *pat,
911 t_bool decode,
912 t_bool structured,
913 t_bool keep_tab)
914{
915 size_t plen = strlen(pat);
916 char *ptr = buf + plen;
917
918 /*
919 * Does ': ' follow the header text?
920 */
921 if (!(*ptr && *(ptr + 1) && *ptr == ':' && *(ptr + 1) == ' '))
922 return NULL;
923
924 /*
925 * If the header matches, skip past the ': ' and any leading whitespace
926 */
927 if (strncasecmp(buf, pat, plen) != 0)
928 return NULL;
929
930 ptr += 2;
931
932 str_trim(ptr);
933 if (!*ptr)
934 return NULL;
935
936 if (decode) {
937 if (structured) {
938 char addr[HEADER_LEN];
939 char name[HEADER_LEN];
940 int type;
941
942 if (gnksa_split_from(ptr, addr, name, &type) == GNKSA_OK) {
943 buffer_to_ascii(addr);
944
945 if (*name) {
946 if (type == GNKSA_ADDRTYPE_OLDSTYLE)
947 sprintf(ptr, "%s (%s)", addr, convert_to_printable(rfc1522_decode(name), keep_tab));
948 else
949 sprintf(ptr, "%s <%s>", convert_to_printable(rfc1522_decode(name), keep_tab), addr);
950 } else
951 strcpy(ptr, addr);
952 } else
953 return convert_to_printable(ptr, keep_tab);
954 } else
955 return (convert_to_printable(rfc1522_decode(ptr), keep_tab));
956 }
957
958 return ptr;
959}
960
961
962/*
963 * Read main article headers into a blank header structure.
964 * Pass the data 'from' -> 'to' when reading via NNTP
965 * Return tin_errno (basically will be !=0 if reading was 'q'uit)
966 * We have to guard against 'to' here since this function is exported
967 */
968int
970 struct t_header *hdr,
971 FILE *from,
972 FILE *to)
973{
974 char *line;
975 char *ptr;
976
977 memset(hdr, 0, sizeof(struct t_header));
978 hdr->mime = FALSE;
979 hdr->ext = new_part(NULL); /* Initialise MIME data */
980
981 while ((line = tin_fgets(from, TRUE)) != NULL) {
982 if (read_news_via_nntp && to) {
983 fprintf(to, "%s\n", line); /* Put raw data */
984#ifdef DEBUG
985 if ((debug & DEBUG_NNTP) && verbose > 1)
986 debug_print_file("NNTP", "<<<%s%s", logtime(), line);
987#endif /* DEBUG */
988 }
989 /*
990 * End of headers ?
991 */
992 if (line[0] == '\0') {
993 if (to)
994 hdr->ext->offset = ftell(to); /* Offset of main body */
995
996 /* avoid null subject */
997 if (!hdr->subj)
998 hdr->subj = my_strdup("");
999
1000 return 0;
1001 }
1002
1003 /*
1004 * FIXME: multiple headers of the same name could lead to information
1005 * loss (multiple Cc: lines are allowed, for example)
1006 */
1007 unfold_header(line);
1008 if ((ptr = parse_header(line, "From", TRUE, TRUE, FALSE))) {
1009 FreeIfNeeded(hdr->from);
1010 hdr->from = my_strdup(ptr);
1011 continue;
1012 }
1013 if ((ptr = parse_header(line, "To", TRUE, TRUE, FALSE))) {
1014 FreeIfNeeded(hdr->to);
1015 hdr->to = my_strdup(ptr);
1016 continue;
1017 }
1018 if ((ptr = parse_header(line, "Cc", TRUE, TRUE, FALSE))) {
1019 FreeIfNeeded(hdr->cc);
1020 hdr->cc = my_strdup(ptr);
1021 continue;
1022 }
1023 if ((ptr = parse_header(line, "Bcc", TRUE, TRUE, FALSE))) {
1024 FreeIfNeeded(hdr->bcc);
1025 hdr->bcc = my_strdup(ptr);
1026 continue;
1027 }
1028 if ((ptr = parse_header(line, "Date", FALSE, FALSE, FALSE))) {
1029 FreeIfNeeded(hdr->date);
1030 hdr->date = my_strdup(ptr);
1031 continue;
1032 }
1033 if ((ptr = parse_header(line, "Subject", TRUE, FALSE, TRUE))) {
1034 FreeIfNeeded(hdr->subj);
1035 hdr->subj = my_strdup(ptr);
1036 continue;
1037 }
1038 if ((ptr = parse_header(line, "Organization", TRUE, FALSE, TRUE))) {
1039 FreeIfNeeded(hdr->org);
1040 hdr->org = my_strdup(ptr);
1041 continue;
1042 }
1043 if ((ptr = parse_header(line, "Reply-To", TRUE, TRUE, FALSE))) {
1044 FreeIfNeeded(hdr->replyto);
1045 hdr->replyto = my_strdup(ptr);
1046 continue;
1047 }
1048 if ((ptr = parse_header(line, "Newsgroups", FALSE, FALSE, FALSE))) {
1050 hdr->newsgroups = my_strdup(ptr);
1051 continue;
1052 }
1053 if ((ptr = parse_header(line, "Message-ID", FALSE, FALSE, FALSE))) {
1054 FreeIfNeeded(hdr->messageid);
1055 hdr->messageid = my_strdup(ptr);
1056 continue;
1057 }
1058 if ((ptr = parse_header(line, "References", FALSE, FALSE, FALSE))) {
1060 hdr->references = my_strdup(ptr);
1061 continue;
1062 }
1063 if ((ptr = parse_header(line, "Distribution", FALSE, FALSE, FALSE))) {
1064 FreeIfNeeded(hdr->distrib);
1065 hdr->distrib = my_strdup(ptr);
1066 continue;
1067 }
1068 if ((ptr = parse_header(line, "Keywords", TRUE, FALSE, FALSE))) {
1069 FreeIfNeeded(hdr->keywords);
1070 hdr->keywords = my_strdup(ptr);
1071 continue;
1072 }
1073 if ((ptr = parse_header(line, "Summary", TRUE, FALSE, FALSE))) {
1074 FreeIfNeeded(hdr->summary);
1075 hdr->summary = my_strdup(ptr);
1076 continue;
1077 }
1078 if ((ptr = parse_header(line, "Followup-To", FALSE, FALSE, FALSE))) {
1079 FreeIfNeeded(hdr->followup);
1080 hdr->followup = my_strdup(ptr);
1081 continue;
1082 }
1083 if ((ptr = parse_header(line, "X-Comment-To", TRUE, TRUE, FALSE))) {
1084 FreeIfNeeded(hdr->ftnto);
1085 hdr->ftnto = my_strdup(ptr);
1086 continue;
1087 }
1088#ifdef XFACE_ABLE
1089 if ((ptr = parse_header(line, "X-Face", FALSE, FALSE, FALSE))) {
1090 FreeIfNeeded(hdr->xface);
1091 hdr->xface = my_strdup(ptr);
1092 continue;
1093 }
1094#endif /* XFACE_ABLE */
1095 /* TODO: check version */
1096 if (parse_header(line, "MIME-Version", FALSE, FALSE, FALSE)) {
1097 hdr->mime = TRUE;
1098 continue;
1099 }
1100 if ((ptr = parse_header(line, "Content-Type", FALSE, FALSE, FALSE))) {
1101 parse_content_type(ptr, hdr->ext);
1102 continue;
1103 }
1104 if ((ptr = parse_header(line, "Content-Transfer-Encoding", FALSE, FALSE, FALSE))) {
1105 hdr->ext->encoding = parse_content_encoding(ptr);
1106 continue;
1107 }
1108 if ((ptr = parse_header(line, "Content-Description", TRUE, FALSE, FALSE))) {
1110 hdr->ext->description = my_strdup(ptr);
1111 continue;
1112 }
1113 if ((ptr = parse_header(line, "Content-Disposition", FALSE, FALSE, FALSE))) {
1114 parse_content_disposition(ptr, hdr->ext);
1115 continue;
1116 }
1117 }
1118
1119 return tin_errno;
1120}
1121
1122
1123/*
1124 * Count lines in a continuated header.
1125 * line MUST NOT end in a newline.
1126 */
1127static int
1129 char *line)
1130{
1131 char *src = line;
1132 char c;
1133 int lines = 1;
1134
1135 while ((c = *src++))
1136 if (c == '\n')
1137 lines++;
1138 return lines;
1139}
1140
1141
1142/*
1143 * Unfold header, i.e. strip any newline off it. Don't strip other
1144 * whitespace, it depends on the header if this is legal (structured
1145 * headers) or not (unstructured headers, e.g. Subject)
1146 */
1147void
1149 char *line)
1150{
1151 char *src = line, *dst = line;
1152 char c;
1153
1154 while ((c = *src++)) {
1155 if (c != '\n')
1156 *dst++ = c;
1157 }
1158 *dst = c;
1159}
1160
1161
1162#define M_SEARCHING 1 /* Looking for boundary */
1163#define M_HDR 2 /* In MIME headers */
1164#define M_BODY 3 /* In MIME body */
1165
1166#define TIN_EOF 0xf00 /* Used internally for error recovery */
1167
1168/*
1169 * Handles multipart/ article types, write data to a raw stream when reading via NNTP
1170 * artinfo is used for generic article pointers
1171 * part contains content info about the attachment we're parsing
1172 * depth is the number of levels by which the current part is embedded
1173 * Returns a tin_errno value which is '&'ed with TIN_EOF if the end of the
1174 * article is reached (to prevent broken articles from hanging the NNTP socket)
1175 */
1176static int
1178 FILE *infile,
1179 t_openartinfo *artinfo,
1180 t_part *part,
1181 int depth,
1182 t_bool show_progress_meter)
1183{
1184 char *line;
1185 char *ptr;
1186 const char *bd;
1187 int bnd;
1188 int state = M_SEARCHING;
1189 t_bool is_rfc822 = FALSE;
1190 t_part *curr_part = NULL, *rfc822_part = NULL;
1191
1192 while ((line = tin_fgets(infile, (state == M_HDR))) != NULL) {
1193/* fprintf(stderr, "%d---:%s\n", depth, line); */
1194
1195 /*
1196 * Check current line for boundary markers
1197 */
1198 bnd = boundary_check(line, artinfo->hdr.ext);
1199
1200 if (read_news_via_nntp) {
1201 fprintf(artinfo->raw, "%s\n", line);
1202#ifdef DEBUG
1203 if ((debug & DEBUG_NNTP) && verbose > 1)
1204 debug_print_file("NNTP", "<<<%s%s", logtime(), line);
1205#endif /* DEBUG */
1206 }
1207
1208 artinfo->hdr.ext->line_count += count_lines(line);
1209 if (show_progress_meter)
1210 progress(artinfo->hdr.ext->line_count); /* Overall line count */
1211
1212 if (part && part != artinfo->hdr.ext)
1213 part->line_count += count_lines(line);
1214
1215 if (is_rfc822 && rfc822_part)
1216 rfc822_part->line_count += count_lines(line);
1217
1218 if (bnd == BOUND_END) { /* End of this part detected */
1219 if (is_rfc822 && rfc822_part)
1220 rfc822_part->line_count -= count_lines(line);
1221 /*
1222 * When we have reached the end boundary of the outermost envelope
1223 * just log any trailing data for the raw article format.
1224 */
1225 if ((bd = get_param(artinfo->hdr.ext->params, "boundary")) != NULL) {
1226 if (boundary_cmp(line, bd) == BOUND_END)
1227 depth = 0;
1228 }
1229#if 0 /* doesn't count tailing lines after envelop mime part - correct but confusing */
1230 if (read_news_via_nntp && depth == 0)
1231 while ((line = tin_fgets(infile, FALSE)) != NULL)
1232 fprintf(artinfo->raw, "%s\n", line);
1233#else
1234 if (depth == 0) {
1235 while ((line = tin_fgets(infile, FALSE)) != NULL) {
1237 fprintf(artinfo->raw, "%s\n", line);
1238 artinfo->hdr.ext->line_count++;
1239 }
1240 }
1241#endif /* 0 */
1242 return tin_errno;
1243 }
1244
1245 switch (state) {
1246 case M_SEARCHING:
1247 switch (bnd) {
1248 case BOUND_NONE:
1249 break; /* Keep looking */
1250
1251 case BOUND_START:
1252 state = M_HDR; /* Now parsing headers of a part */
1253 curr_part = new_part(part);
1254 curr_part->depth = depth;
1255 break;
1256 }
1257 break;
1258
1259 case M_HDR:
1260 switch (bnd) {
1261 case BOUND_START:
1262#ifdef DEBUG
1263 if (debug & DEBUG_MISC)
1264 error_message(2, _(txt_error_mime_start));
1265#endif /* DEBUG */
1266 continue;
1267
1268 case BOUND_NONE:
1269 break; /* Correct - No boundary */
1270 }
1271
1272 if (*line == '\0') { /* End of MIME headers */
1273 state = M_BODY;
1274 curr_part->offset = ftell(artinfo->raw);
1275
1276 if (curr_part->type == TYPE_MULTIPART) { /* Complex multipart article */
1277 int ret, old_line_count;
1278
1279 old_line_count = curr_part->line_count;
1280 if ((ret = parse_multipart_article(infile, artinfo, curr_part, depth + 1, show_progress_meter)) != 0)
1281 return ret; /* User abort or EOF reached */
1282 if (part && part != artinfo->hdr.ext)
1283 part->line_count += curr_part->line_count - old_line_count;
1284 if (is_rfc822 && rfc822_part)
1285 rfc822_part->line_count += curr_part->line_count - old_line_count;
1286 } else if (curr_part->type == TYPE_MESSAGE && !strcasecmp("RFC822", curr_part->subtype)) {
1287 is_rfc822 = TRUE;
1288 rfc822_part = curr_part;
1289 state = M_HDR;
1290 curr_part = new_part(part);
1291 curr_part->depth = ++depth;
1292 }
1293 break;
1294 }
1295
1296 /*
1297 * Keep headers that interest us
1298 */
1299/* fprintf(stderr, "HDR:%s\n", line); */
1300 unfold_header(line);
1301 if ((ptr = parse_header(line, "Content-Type", FALSE, FALSE, FALSE))) {
1302 parse_content_type(ptr, curr_part);
1303 break;
1304 }
1305 if ((ptr = parse_header(line, "Content-Transfer-Encoding", FALSE, FALSE, FALSE))) {
1306 curr_part->encoding = parse_content_encoding(ptr);
1307 break;
1308 }
1309 if ((ptr = parse_header(line, "Content-Disposition", FALSE, FALSE, FALSE))) {
1310 parse_content_disposition(ptr, curr_part);
1311 break;
1312 }
1313 if ((ptr = parse_header(line, "Content-Description", TRUE, FALSE, FALSE))) {
1314 FreeIfNeeded(curr_part->description);
1315 curr_part->description = my_strdup(ptr);
1316 break;
1317 }
1318 break;
1319
1320 case M_BODY:
1321 switch (bnd) {
1322 case BOUND_NONE:
1323/* fprintf(stderr, "BOD:%s\n", line); */
1324 curr_part->line_count++;
1325 break;
1326
1327 case BOUND_START: /* Start new attachment */
1328 if (is_rfc822) {
1329 --depth;
1330 rfc822_part->line_count--;
1331 rfc822_part = NULL;
1332 is_rfc822 = FALSE;
1333 }
1334 state = M_HDR;
1335 curr_part = new_part(part);
1336 curr_part->depth = depth;
1337 break;
1338 }
1339 break;
1340 } /* switch (state) */
1341 } /* while() */
1342
1343 /*
1344 * We only reach this point when we (unexpectedly) reach the end of the
1345 * article
1346 */
1347 return tin_errno | TIN_EOF; /* Flag EOF */
1348}
1349
1350
1351/*
1352 * Parse a non-multipart article, merely a passthrough and bean counter
1353 */
1354static int
1356 FILE *in,
1357 t_openartinfo *artinfo,
1358 t_bool show_progress_meter)
1359{
1360 char *line;
1361
1362 while ((line = tin_fgets(in, FALSE)) != NULL) {
1363 if (read_news_via_nntp) {
1364 fprintf(artinfo->raw, "%s\n", line);
1365#ifdef DEBUG
1366 if ((debug & DEBUG_NNTP) && verbose > 1)
1367 debug_print_file("NNTP", "<<<%s%s", logtime(), line);
1368#endif /* DEBUG */
1369 }
1370
1371 ++artinfo->hdr.ext->line_count;
1372
1373 if (show_progress_meter)
1374 progress(artinfo->hdr.ext->line_count);
1375 }
1376 return tin_errno;
1377}
1378
1379
1380#ifdef DEBUG_ART
1381/* DEBUG dump of what we got */
1382static void
1383dump_uue(
1384 t_part *ptr,
1386{
1387 if (ptr->uue != NULL) {
1388 t_part *uu;
1389 for (uu = ptr->uue; uu != NULL; uu = uu->next) {
1390 fprintf(stderr, "UU: %s\n", get_param(uu->params, "name"));
1391 fprintf(stderr, " Content-Type: %s/%s\n Content-Transfer-Encoding: %s\n",
1392 content_types[uu->type], uu->subtype,
1394 fprintf(stderr, " Offset: %ld Lines: %d\n", uu->offset, uu->line_count);
1395 fprintf(stderr, " Depth: %d\n", uu->depth);
1396 fseek(art->raw, uu->offset, SEEK_SET);
1397 fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1398 }
1399 }
1400}
1401
1402
1403static void
1404dump_art(
1406{
1407 t_part *ptr;
1408 t_param *pptr;
1409 struct t_header note_h = art->hdr;
1410
1411 fprintf(stderr, "\nMain body\nMIME-Version: %u\n", note_h.mime);
1412 fprintf(stderr, "Content-Type: %s/%s\nContent-Transfer-Encoding: %s\n",
1415 if (note_h.ext->description)
1416 fprintf(stderr, "Content-Description: %s\n", note_h.ext->description);
1417 fprintf(stderr, "Offset: %ld\nLines: %d\n", note_h.ext->offset, note_h.ext->line_count);
1418 for (pptr = note_h.ext->params; pptr != NULL; pptr = pptr->next)
1419 fprintf(stderr, "P: %s = %s\n", pptr->name, pptr->value);
1420 dump_uue(note_h.ext, art);
1421 fseek(art->raw, note_h.ext->offset, SEEK_SET);
1422 fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1423 fprintf(stderr, "\n");
1424
1425 for (ptr = note_h.ext->next; ptr != NULL; ptr = ptr->next) {
1426 fprintf(stderr, "Attachment:\n");
1427 fprintf(stderr, "\tContent-Type: %s/%s\n\tContent-Transfer-Encoding: %s\n",
1428 content_types[ptr->type], ptr->subtype,
1430 if (ptr->description)
1431 fprintf(stderr, "\tContent-Description: %s\n", ptr->description);
1432 fprintf(stderr, "\tOffset: %ld\n\tLines: %d\n", ptr->offset, ptr->line_count);
1433 fprintf(stderr, "\tDepth: %d\n", ptr->depth);
1434 for (pptr = ptr->params; pptr != NULL; pptr = pptr->next)
1435 fprintf(stderr, "\tP: %s = %s\n", pptr->name, pptr->value);
1436 dump_uue(ptr, art);
1437 fseek(art->raw, ptr->offset, SEEK_SET);
1438 fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1439 }
1440}
1441#endif /* DEBUG_ART */
1442
1443
1444/*
1445 * Core parser for all article types
1446 * Return NULL if we couldn't open an output stream when reading via NNTP
1447 * When reading from local spool we assign the filehandle of the on-spool
1448 * article directly to artinfo->raw
1449 */
1450static int
1452 FILE *infile,
1453 int line_count,
1454 t_openartinfo *artinfo,
1455 t_bool show_progress_meter)
1456{
1457 int ret = ART_ABORT;
1458
1459 if (read_news_via_nntp && !(artinfo->raw = tmpfile()))
1460 goto error;
1461
1462 if (!read_news_via_nntp)
1463 artinfo->raw = infile;
1464
1465 art_lines = line_count;
1466
1467 if ((ret = parse_rfc822_headers(&artinfo->hdr, infile, artinfo->raw)) != 0)
1468 goto error;
1469
1470 /* no article data returned, just a '.' after 220er response */
1471 if (artinfo->hdr.ext->offset == 0) {
1472 ret = ART_UNAVAILABLE;
1473 goto error;
1474 }
1475
1476 /*
1477 * Is this a MIME article ?
1478 * We don't bother to parse all plain text articles
1479 */
1480 if (artinfo->hdr.mime && artinfo->hdr.ext->type == TYPE_MULTIPART) {
1481 if ((ret = parse_multipart_article(infile, artinfo, artinfo->hdr.ext, 1, show_progress_meter)) != 0) {
1482 /* Strip off EOF condition if present */
1483 if (ret & TIN_EOF) {
1484 ret ^= TIN_EOF;
1485#ifdef DEBUG
1486 if (debug & DEBUG_MISC)
1487 error_message(2, _(txt_error_mime_end), content_types[artinfo->hdr.ext->type], artinfo->hdr.ext->subtype);
1488#endif /* DEBUG */
1489 if (ret != 0)
1490 goto error;
1491 } else
1492 goto error;
1493 }
1494 } else {
1495 if ((ret = parse_normal_article(infile, artinfo, show_progress_meter)) != 0)
1496 goto error;
1497 }
1498
1500 TIN_FCLOSE(infile);
1501
1502 return 0;
1503
1504error:
1506 TIN_FCLOSE(infile);
1507 art_close(artinfo);
1508 return ret;
1509}
1510
1511
1512/*
1513 * Open a mail/news article using NNTP ARTICLE command
1514 * or directly off local spool
1515 * Return:
1516 * A pointer to the open postprocessed file
1517 * NULL pointer if article open fails in some way
1518 */
1519FILE *
1521 struct t_group *group,
1522 t_artnum art)
1523{
1524 FILE *art_fp;
1525
1526#ifdef NNTP_ABLE
1527 if (read_news_via_nntp && group->type == GROUP_TYPE_NEWS) {
1528 char buf[NNTP_STRLEN];
1529 snprintf(buf, sizeof(buf), "ARTICLE %"T_ARTNUM_PFMT, art);
1530 art_fp = nntp_command(buf, OK_ARTICLE, NULL, 0);
1531 } else {
1532#endif /* NNTP_ABLE */
1533 char buf[PATH_LEN];
1534 char pbuf[PATH_LEN];
1535 char fbuf[NAME_LEN + 1];
1536 char *group_path = my_malloc(strlen(group->name) + 2); /* tailing "/\0" */;
1537
1538 make_group_path(group->name, group_path);
1539 joinpath(buf, sizeof(buf), group->spooldir, group_path);
1540 free(group_path);
1541 snprintf(fbuf, sizeof(fbuf), "%"T_ARTNUM_PFMT, art);
1542 joinpath(pbuf, sizeof(pbuf), buf, fbuf);
1543
1544 art_fp = fopen(pbuf, "r");
1545#ifdef NNTP_ABLE
1546 }
1547#endif /* NNTP_ABLE */
1548
1549 return art_fp;
1550}
1551
1552
1553/* ----------- art_open() and art_close() are the only interface --------- */
1554/* ------------------------for accessing articles ------------------- */
1555
1556/*
1557 * Opens and postprocesses an article
1558 * Populates the passed in artinfo structure if successful
1559 *
1560 * Returns:
1561 * 0 Art opened successfully
1562 * ART_UNAVAILABLE Couldn't find article
1563 * ART_ABORT User aborted during read of article
1564 */
1565int
1567 t_bool wrap_lines,
1568 struct t_article *art,
1569 struct t_group *group,
1570 t_openartinfo *artinfo,
1571 t_bool show_progress_meter,
1572 const char *pmesg)
1573{
1574 FILE *fp;
1575
1576 memset(artinfo, 0, sizeof(t_openartinfo));
1577
1578 if ((fp = open_art_fp(group, art->artnum)) == NULL)
1579 return ((tin_errno == 0) ? ART_UNAVAILABLE : ART_ABORT);
1580
1581#ifdef DEBUG_ART
1582 fprintf(stderr, "art_open(%p)\n", (void *) artinfo);
1583#endif /* DEBUG_ART */
1584
1585 progress_mesg = pmesg;
1586 if (parse_rfc2045_article(fp, art->line_count, artinfo, show_progress_meter) != 0) {
1587 progress_mesg = NULL;
1588 return ((tin_errno == 0) ? ART_UNAVAILABLE : ART_ABORT);
1589 }
1590 progress_mesg = NULL;
1591
1592 /*
1593 * TODO: compare art->msgid and artinfo->hdr.messageid and issue a
1594 * warning (once) about broken overviews if they differ
1595 */
1596
1597 if ((artinfo->tex2iso = ((group->attribute->tex2iso_conv) ? is_art_tex_encoded(artinfo->raw) : FALSE)))
1599
1600 /* Maybe fix it so if this fails, we default to raw? */
1601 if (!cook_article(wrap_lines, artinfo, tinrc.hide_uue, FALSE))
1602 return ART_ABORT;
1603
1604#ifdef DEBUG_ART
1605 dump_art(artinfo);
1606#endif /* DEBUG_ART */
1607
1608 /*
1609 * If Newsgroups is empty it is a good bet the article is a mail article
1610 * TODO: Why do this ?
1611 */
1612 if (!artinfo->hdr.newsgroups)
1613 artinfo->hdr.newsgroups = my_strdup(group->name);
1614
1615 return 0;
1616}
1617
1618
1619/*
1620 * Close an open article identified by an 'artinfo' handle
1621 */
1622void
1624 t_openartinfo *artinfo)
1625{
1626#ifdef DEBUG_ART
1627 fprintf(stderr, "art_close(%p)\n", (void *) artinfo);
1628#endif /* DEBUG_ART */
1629
1630 if (artinfo == NULL)
1631 return;
1632
1633 free_and_init_header(&artinfo->hdr);
1634
1635 artinfo->tex2iso = FALSE;
1636
1637 if (artinfo->raw) {
1638 fclose(artinfo->raw);
1639 artinfo->raw = NULL;
1640 }
1641
1642 if (artinfo->cooked) {
1643 fclose(artinfo->cooked);
1644 artinfo->cooked = NULL;
1645 }
1646
1647 FreeAndNull(artinfo->rawl);
1648 FreeAndNull(artinfo->cookl);
1649}
unsigned t_bool
Definition: bool.h:77
#define bool_not(b)
Definition: bool.h:81
#define TRUE
Definition: bool.h:74
#define FALSE
Definition: bool.h:70
static t_openartinfo * art
Definition: cook.c:78
#define DEBUG_MISC
Definition: debug.h:54
#define DEBUG_NNTP
Definition: debug.h:47
int verbose
Definition: init.c:154
int tin_errno
Definition: read.c:59
#define GNKSA_OK
Definition: extern.h:1590
constext txt_is_tex_encoded[]
Definition: lang.c:564
constext * content_encodings[]
Definition: lang.c:1461
#define GNKSA_ADDRTYPE_OLDSTYLE
Definition: extern.h:1626
constext * content_types[]
Definition: lang.c:1466
struct t_group * curr_group
Definition: group.c:55
struct t_config tinrc
Definition: init.c:192
unsigned short debug
Definition: debug.c:51
t_bool read_news_via_nntp
Definition: init.c:151
static char buf[16]
Definition: langinfo.c:50
#define NNTP_STRLEN
Definition: nntplib.h:155
#define OK_ARTICLE
Definition: nntplib.h:99
static struct t_header * note_h
Definition: page.c:75
t_bool is_art_tex_encoded(FILE *fp)
Definition: charset.c:352
FILE * tmpfile(void)
Definition: tmpfile.c:53
void make_group_path(const char *name, char *path)
Definition: misc.c:2078
char * str_trim(char *string)
Definition: string.c:539
int atoi(const char *s)
void process_charsets(char **line, size_t *max_line_len, const char *network_charset, const char *local_charset, t_bool conv_tex2iso)
Definition: misc.c:2656
void error_message(unsigned int sdelay, const char *fmt,...)
Definition: screen.c:224
void str_lwr(char *str)
Definition: string.c:291
char * convert_to_printable(char *buf, t_bool keep_tab)
Definition: charset.c:394
void joinpath(char *result, size_t result_size, const char *dir, const char *file)
Definition: joinpath.c:50
char * my_strdup(const char *str)
Definition: string.c:139
t_bool cook_article(t_bool wrap_lines, t_openartinfo *artinfo, int hide_uue, t_bool show_all_headers)
Definition: cook.c:828
int gnksa_split_from(const char *from, char *address, char *realname, int *addrtype)
Definition: misc.c:3390
char * tin_fgets(FILE *fp, t_bool header)
Definition: read.c:317
void wait_message(unsigned int sdelay, const char *fmt,...)
Definition: screen.c:133
int strncasecmp(const char *p, const char *q, size_t n)
Definition: string.c:491
char * buffer_to_ascii(char *c)
Definition: misc.c:2632
int strcasecmp(const char *p, const char *q)
Definition: string.c:475
char * rfc1522_decode(const char *s)
Definition: rfc2047.c:232
void show_progress(const char *txt, t_artnum count, t_artnum total)
Definition: screen.c:529
FILE * open_art_fp(struct t_group *group, t_artnum art)
Definition: rfc2046.c:1520
t_part * new_part(t_part *part)
Definition: rfc2046.c:792
#define CT_DEFPARMS
Definition: rfc2046.c:95
char * parse_header(char *buf, const char *pat, t_bool decode, t_bool structured, t_bool keep_tab)
Definition: rfc2046.c:908
int content_type(char *type)
Definition: rfc2046.c:115
static int art_lines
Definition: rfc2046.c:75
#define M_SEARCHING
Definition: rfc2046.c:1162
static int boundary_check(const char *line, t_part *part)
Definition: rfc2046.c:207
void free_list(t_param *list)
Definition: rfc2046.c:549
static void remove_cwsp(char *source)
Definition: rfc2046.c:242
static int count_lines(char *line)
Definition: rfc2046.c:1128
#define M_HDR
Definition: rfc2046.c:1163
static int parse_normal_article(FILE *in, t_openartinfo *artinfo, t_bool show_progress_meter)
Definition: rfc2046.c:1355
#define M_BODY
Definition: rfc2046.c:1164
const char * get_param(t_param *list, const char *name)
Definition: rfc2046.c:568
#define TIN_EOF
Definition: rfc2046.c:1166
static char * get_charset(char *value)
Definition: rfc2046.c:347
#define XVAL(c)
Definition: rfc2046.c:90
t_param * new_params(void)
Definition: rfc2046.c:527
static int parse_rfc2045_article(FILE *infile, int line_count, t_openartinfo *artinfo, t_bool show_progress_meter)
Definition: rfc2046.c:1451
void art_close(t_openartinfo *artinfo)
Definition: rfc2046.c:1623
#define ATTRIBUTE_DELIMS
Definition: rfc2046.c:226
static void parse_content_type(char *type, t_part *content)
Definition: rfc2046.c:664
#define PARAM_SEP
Definition: rfc2046.c:93
static char * strip_charset(char **value)
Definition: rfc2046.c:409
void free_parts(t_part *ptr)
Definition: rfc2046.c:846
#define IS_XDIGIT(c)
Definition: rfc2046.c:92
static int boundary_cmp(const char *line, const char *boundary)
Definition: rfc2046.c:138
static void parse_params(char *params, t_part *content)
Definition: rfc2046.c:448
static char * skip_space(char *source)
Definition: rfc2046.c:229
void unfold_header(char *line)
Definition: rfc2046.c:1148
static const char * progress_mesg
Definition: rfc2046.c:76
static void decode_value(const char *charset, t_param *part)
Definition: rfc2046.c:374
int art_open(t_bool wrap_lines, struct t_article *art, struct t_group *group, t_openartinfo *artinfo, t_bool show_progress_meter, const char *pmesg)
Definition: rfc2046.c:1566
static const char xtbl[]
Definition: rfc2046.c:78
static char * get_token(const char *source)
Definition: rfc2046.c:298
static void parse_content_disposition(char *disp, t_part *part)
Definition: rfc2046.c:770
static unsigned int parse_content_encoding(char *encoding)
Definition: rfc2046.c:738
static void progress(int line_count)
Definition: rfc2046.c:102
static char * skip_equal_sign(char *source)
Definition: rfc2046.c:428
void free_and_init_header(struct t_header *hdr)
Definition: rfc2046.c:865
int parse_rfc822_headers(struct t_header *hdr, FILE *from, FILE *to)
Definition: rfc2046.c:969
static char * get_quoted_string(char *source, char **dest)
Definition: rfc2046.c:313
static int parse_multipart_article(FILE *infile, t_openartinfo *artinfo, t_part *part, int depth, t_bool show_progress_meter)
Definition: rfc2046.c:1177
#define FORMAT_FLOWED
Definition: rfc2046.h:70
#define TYPE_MESSAGE
Definition: rfc2046.h:50
#define FORMAT_FIXED
Definition: rfc2046.h:69
#define ENCODING_BINARY
Definition: rfc2046.h:59
#define BOUND_NONE
Definition: rfc2046.h:65
#define TYPE_MULTIPART
Definition: rfc2046.h:48
#define TYPE_APPLICATION
Definition: rfc2046.h:49
#define TYPE_TEXT
Definition: rfc2046.h:47
#define ENCODING_7BIT
Definition: rfc2046.h:55
#define BOUND_END
Definition: rfc2046.h:67
#define BOUND_START
Definition: rfc2046.h:66
state
Definition: save.c:56
const char * name
Definition: signal.c:117
t_lineinfo * cookl
Definition: rfc2046.h:191
t_bool tex2iso
Definition: rfc2046.h:186
FILE * cooked
Definition: rfc2046.h:189
struct t_header hdr
Definition: rfc2046.h:185
FILE * raw
Definition: rfc2046.h:188
t_lineinfo * rawl
Definition: rfc2046.h:190
Definition: rfc2046.h:77
struct param * next
Definition: rfc2046.h:84
char * charset
Definition: rfc2046.h:80
t_bool encoded
Definition: rfc2046.h:82
t_bool enc_fallback
Definition: rfc2046.h:83
int part
Definition: rfc2046.h:81
char * name
Definition: rfc2046.h:78
char * value
Definition: rfc2046.h:79
Definition: rfc2046.h:93
long offset
Definition: rfc2046.h:103
unsigned type
Definition: rfc2046.h:94
char * subtype
Definition: rfc2046.h:100
int line_count
Definition: rfc2046.h:104
struct part * uue
Definition: rfc2046.h:106
int depth
Definition: rfc2046.h:105
char * description
Definition: rfc2046.h:101
unsigned format
Definition: rfc2046.h:96
t_param * params
Definition: rfc2046.h:102
unsigned encoding
Definition: rfc2046.h:95
struct part * next
Definition: rfc2046.h:107
Definition: tin.h:1533
unsigned tex2iso_conv
Definition: tin.h:1702
int hide_uue
Definition: tinrc.h:150
char mm_local_charset[LEN]
Definition: tinrc.h:115
Definition: tin.h:1816
struct t_attribute * attribute
Definition: tin.h:1834
unsigned int type
Definition: tin.h:1825
char * name
Definition: tin.h:1817
char * spooldir
Definition: tin.h:1820
char * to
Definition: rfc2046.h:129
char * replyto
Definition: rfc2046.h:135
char * subj
Definition: rfc2046.h:133
char * distrib
Definition: rfc2046.h:139
char * ftnto
Definition: rfc2046.h:143
char * org
Definition: rfc2046.h:134
char * followup
Definition: rfc2046.h:142
char * cc
Definition: rfc2046.h:130
char * date
Definition: rfc2046.h:132
char * keywords
Definition: rfc2046.h:140
char * messageid
Definition: rfc2046.h:137
t_part * ext
Definition: rfc2046.h:146
t_bool mime
Definition: rfc2046.h:145
char * bcc
Definition: rfc2046.h:131
char * xface
Definition: rfc2046.h:144
char * references
Definition: rfc2046.h:138
char * from
Definition: rfc2046.h:128
char * summary
Definition: rfc2046.h:141
char * newsgroups
Definition: rfc2046.h:136
long t_artnum
Definition: tin.h:229
#define SEEK_SET
Definition: tin.h:2512
#define TIN_FCLOSE(x)
Definition: tin.h:1048
#define MODULO_COUNT_NUM
Definition: tin.h:868
#define NAME_LEN
Definition: tin.h:858
#define my_malloc(size)
Definition: tin.h:2245
#define FreeIfNeeded(p)
Definition: tin.h:2252
#define isascii(c)
Definition: tin.h:411
#define ART_ABORT
Definition: tin.h:1360
#define _(Text)
Definition: tin.h:94
#define PATH_LEN
Definition: tin.h:843
#define snprintf
Definition: tin.h:2464
#define FreeAndNull(p)
Definition: tin.h:2253
#define GROUP_TYPE_NEWS
Definition: tin.h:1070
#define HEADER_LEN
Definition: tin.h:863
#define T_ARTNUM_PFMT
Definition: tin.h:230
#define my_realloc(ptr, size)
Definition: tin.h:2247
#define ART_UNAVAILABLE
Definition: tin.h:1348