"Fossies" - the Fresh Open Source Software Archive 
Member "tin-2.6.1/src/rfc2046.c" (22 Dec 2021, 40193 Bytes) of package /linux/misc/tin-2.6.1.tar.xz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "rfc2046.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.6.0_vs_2.6.1.
1 /*
2 * Project : tin - a Usenet reader
3 * Module : rfc2046.c
4 * Author : Jason Faultless <jason@altarstone.com>
5 * Created : 2000-02-18
6 * Updated : 2021-11-01
7 * Notes : RFC 2046 MIME article parsing
8 *
9 * Copyright (c) 2000-2022 Jason Faultless <jason@altarstone.com>
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright notice,
17 * this list of conditions and the following disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 *
23 * 3. Neither the name of the copyright holder nor the names of its
24 * contributors may be used to endorse or promote products derived from
25 * this software without specific prior written permission.
26 *
27 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40
41 #ifndef TIN_H
42 # include "tin.h"
43 #endif /* !TIN_H */
44
45
46 /*
47 * local prototypes
48 */
49 static char *get_charset(char *value);
50 static char *get_quoted_string(char *source, char **dest);
51 static char *get_token(const char *source);
52 static char *strip_charset(char **value);
53 static char *skip_equal_sign(char *source);
54 static char *skip_space(char *source);
55 static int boundary_cmp(const char *line, const char *boundary);
56 static int count_lines(char *line);
57 static int parse_multipart_article(FILE *infile, t_openartinfo *artinfo, t_part *part, int depth, t_bool show_progress_meter);
58 static int parse_normal_article(FILE *in, t_openartinfo *artinfo, t_bool show_progress_meter);
59 static int parse_rfc2045_article(FILE *infile, int line_count, t_openartinfo *artinfo, t_bool show_progress_meter);
60 static unsigned int parse_content_encoding(char *encoding);
61 static void decode_value(const char *charset, t_param *part);
62 static void parse_content_type(char *type, t_part *content);
63 static void parse_content_disposition(char *disp, t_part *part);
64 static void parse_params(char *params, t_part *content);
65 static void progress(int line_count);
66 static void remove_cwsp(char *source);
67 #ifdef DEBUG_ART
68 static void dump_art(t_openartinfo *art);
69 #endif /* DEBUG_ART */
70
71
72 /*
73 * Local variables
74 */
75 static int art_lines = 0; /* lines in art on spool */
76 static const char *progress_mesg = NULL; /* message progress() should display */
77 /* RFC 2231 decoding table */
78 static const char xtbl[] = {
79 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
80 /* 0 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
81 /* 1 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
82 /* 2 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
83 /* 3 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
84 /* 4 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
85 /* 5 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
86 /* 6 */ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
87 /* 7 */ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
88 };
89
90 #define XVAL(c) (xtbl[(unsigned int) (c)])
91 /* C90: isxdigit(3) */
92 #define IS_XDIGIT(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F'))
93 #define PARAM_SEP "; \n"
94 /* default parameters for Content-Type */
95 #define CT_DEFPARMS "charset=US-ASCII"
96
97 /*
98 * Use the default message if one hasn't been supplied
99 * Body search is currently the only function that has a different message
100 */
101 static void
102 progress(
103 int line_count)
104 {
105 if (progress_mesg != NULL && art_lines > 0 && line_count && line_count % MODULO_COUNT_NUM == 0)
106 show_progress(progress_mesg, line_count, art_lines);
107 }
108
109
110 /*
111 * Lookup content type in content_types[] array and return matching
112 * index or -1
113 */
114 int
115 content_type(
116 char *type)
117 {
118 int i;
119
120 if (type == NULL)
121 return -1;
122
123 for (i = 0; content_types[i] != NULL; ++i) {
124 if (strcasecmp(type, content_types[i]) == 0)
125 return i;
126 }
127
128 return -1;
129 }
130
131
132 /*
133 * check if a line is a MIME boundary
134 * returns BOUND_NONE if it is not, BOUND_START if normal boundary and
135 * BOUND_END if closing boundary
136 */
137 static int
138 boundary_cmp(
139 const char *line,
140 const char *boundary)
141 {
142 size_t blen = strlen(boundary);
143 size_t len;
144 char *e, *l;
145 int nl;
146
147 if ((len = strlen(line)) == 0)
148 return BOUND_NONE;
149
150 if (blen + 2 > len)
151 return BOUND_NONE;
152
153 /* remove trailing whites as per RFC 2046 5.1.1 */
154 l = my_strdup(line);
155 e = l + len - 1;
156 while (e > l + blen + 1 && isspace((unsigned char) *e))
157 *e-- = '\0';
158
159 len = strlen(l);
160
161 nl = l[len - 1] == '\n';
162
163 if (len != blen + 2 + (size_t) nl && len != blen + 4 + (size_t) nl) {
164 free(l);
165 return BOUND_NONE;
166 }
167 if (l[0] != '-' || l[1] != '-') {
168 free(l);
169 return BOUND_NONE;
170 }
171
172 if (strncmp(l + 2, boundary, blen) != 0) {
173 free(l);
174 return BOUND_NONE;
175 }
176
177 if (l[blen + 2] != '-') {
178 if (nl ? l[blen + 2] == '\n' : l[blen + 2] == '\0') {
179 free(l);
180 return BOUND_START;
181 } else {
182 free(l);
183 return BOUND_NONE;
184 }
185 }
186
187 if (l[blen + 3] != '-') {
188 free(l);
189 return BOUND_NONE;
190 }
191
192 if (nl ? l[blen + 4] == '\n' : l[blen + 4] == '\0') {
193 free(l);
194 return BOUND_END;
195 }
196 free(l);
197 return BOUND_NONE;
198 }
199
200
201 /*
202 * RFC2046 5.1.2 says that we are required to check for all possible
203 * boundaries, not only the one that is expected. Iterate through all
204 * the parts.
205 */
206 static int
207 boundary_check(
208 const char *line,
209 t_part *part)
210 {
211 const char *boundary;
212 int bnd = BOUND_NONE;
213
214 for (; part != NULL; part = part->next) {
215 /* We may not have even parsed a boundary for this part yet */
216 if ((boundary = get_param(part->params, "boundary")) == NULL)
217 continue;
218 if ((bnd = boundary_cmp(line, boundary)) != BOUND_NONE)
219 break;
220 }
221
222 return bnd;
223 }
224
225
226 #define ATTRIBUTE_DELIMS "()<>@,;:\\\"/[]?="
227
228 static char *
229 skip_space(
230 char *source)
231 {
232 while ((*source) && ((*source == ' ') || (*source == '\t')))
233 source++;
234 return *source ? source : NULL;
235 }
236
237
238 /*
239 * Removes comments and white space
240 */
241 static void
242 remove_cwsp(
243 char *source)
244 {
245 char *from, *to, src;
246 int c_cnt = 0;
247 t_bool inquotes = FALSE;
248
249 from = to = source;
250
251 while ((src = *from++) && c_cnt >= 0) {
252 if (src == '"' && c_cnt == 0)
253 inquotes = bool_not(inquotes);
254
255 if (inquotes && src == '\\' && *from) {
256 *to++ = src;
257 *to++ = *from++;
258 continue;
259 }
260
261 if (!inquotes) {
262 /* skip over quoted pairs */
263 if (c_cnt && src == '\\') {
264 ++from;
265 continue;
266 }
267 if (src == '(') {
268 ++c_cnt;
269 continue;
270 }
271 if (src == ')') {
272 --c_cnt;
273 continue;
274 }
275 if (c_cnt > 0 || src == ' ' || src == '\t')
276 continue;
277 }
278
279 *to++ = src;
280 }
281
282 /*
283 * Setting *source = '\0' might be the right thing
284 * because the header is damaged. Anyway, we let the
285 * rest of the code pick up usable pieces.
286 */
287 #if 0
288 if (c_cnt != 0)
289 /* unbalanced parenthesis, header damaged */
290 *source = '\0';
291 else
292 #endif /* 0 */
293 *to = '\0';
294 }
295
296
297 static char *
298 get_token(
299 const char *source)
300 {
301 char *dest = my_strdup(source);
302 char *ptr = dest;
303
304 while (isascii((int) *ptr) && isprint((int) *ptr) && *ptr != ' ' && !strchr(ATTRIBUTE_DELIMS, *ptr))
305 ptr++;
306 *ptr = '\0';
307
308 return my_realloc(dest, strlen(dest) + 1);
309 }
310
311
312 static char *
313 get_quoted_string(
314 char *source,
315 char **dest)
316 {
317 char *ptr;
318 t_bool quote = FALSE;
319
320 *dest = my_malloc(strlen(source) + 1);
321 ptr = *dest;
322 source++; /* skip over double quote */
323 while (*source) {
324 if (*source == '\\') {
325 quote = TRUE; /* next char as-is */
326 if (*++source == '\\') {
327 *ptr++ = *source++;
328 quote = FALSE;
329 }
330 continue;
331 }
332 if ((*source == '"') && !quote)
333 break; /* end of quoted-string */
334 *ptr++ = *source++;
335 quote = FALSE;
336 }
337 *ptr = '\0';
338 *dest = my_realloc(*dest, strlen(*dest) + 1);
339 return *source ? source + 1 : source;
340 }
341
342
343 /*
344 * RFC 2231: Extract character set from parameter value
345 */
346 static char *
347 get_charset(
348 char *value)
349 {
350 char *charset, *ptr;
351
352 /* no charset information present */
353 if (!strchr(value, '\''))
354 return NULL;
355
356 /* no charset given -> fall back to us-ascii */
357 if (*value == '\'')
358 return my_strdup("US-ASCII");
359
360 charset = my_strdup(value);
361
362 if ((ptr = strchr(charset, '\'')))
363 *ptr = '\0';
364
365 return charset;
366 }
367
368
369 /*
370 * RFC 2231: Decode parameter value according to the given
371 * character set
372 */
373 static void
374 decode_value(
375 const char *charset,
376 t_param *part)
377 {
378 char *rptr, *wptr;
379 const char *cset;
380 size_t max_line_len = strlen(part->value);
381
382 /*
383 * we prefer part->charset if present, even if rfc 2231
384 * forbids different charsets for each part
385 */
386 cset = part->charset ? part->charset : charset;
387 rptr = wptr = part->value;
388
389 while (*rptr) {
390 if (*rptr == '%' && IS_XDIGIT(*(rptr + 1)) && IS_XDIGIT(*(rptr + 2))) {
391 *wptr++ = (char) (XVAL(*(rptr + 1)) << 4 | XVAL(*(rptr + 2)));
392 rptr += 3;
393 } else
394 *wptr++ = *rptr++;
395 }
396 *wptr = '\0';
397
398 process_charsets(&(part->value), &max_line_len, cset, tinrc.mm_local_charset, FALSE);
399 part->encoded = FALSE;
400 FreeAndNull(part->charset);
401 }
402
403
404 /*
405 * RFC 2231: Remove character set (and language information)
406 * from parameter value
407 */
408 static char *
409 strip_charset(
410 char **value)
411 {
412 char *newval, *ptr;
413
414 if ((ptr = strrchr(*value, '\''))) {
415 newval = my_strdup(ptr + 1);
416 free(*value);
417 *value = my_realloc(newval, strlen(newval) + 1);
418 }
419
420 return *value;
421 }
422
423
424 /*
425 * Skip equal sign and (non compliant) white space around it
426 */
427 static char *
428 skip_equal_sign(
429 char *source)
430 {
431 if (!(source = skip_space(source)))
432 return NULL;
433
434 if (*source++ != '=')
435 /* no equal sign, invalid header, stop parsing here */
436 return NULL;
437
438 return skip_space(source);
439 }
440
441
442 /*
443 * Parse a Content-* parameter list into a linked list
444 * Ensure the ->params element is correctly initialised before calling
445 * TODO: may still not catch everything permitted in the RFC
446 */
447 static void
448 parse_params(
449 char *params,
450 t_part *content)
451 {
452 char *name, *param, *value, *contp;
453 int idx;
454 t_bool encoded;
455 t_param *ptr;
456
457 param = params;
458 while (*param) {
459 idx = -1;
460 encoded = FALSE;
461 /* Skip over white space */
462 if (!(param = skip_space(param)))
463 break;
464
465 /* catch parameter name */
466 name = get_token(param);
467 param += strlen(name);
468
469 if (!*param) {
470 /* Nothing follows, invalid, stop here */
471 FreeIfNeeded(name);
472 break;
473 }
474
475 /* RFC 2231 Character set and language information */
476 if ((contp = strrchr(name, '*')) && !*(contp + 1)) {
477 encoded = TRUE;
478 *contp = '\0';
479 }
480
481 /* RFC 2231 Parameter Value Continuations */
482 if ((contp = strchr(name, '*')) && *(contp + 1) >= '0' && *(contp + 1) <= '9') {
483 idx = atoi(contp + 1);
484 *contp = '\0';
485 }
486
487 if (!(param = skip_equal_sign(param))) {
488 FreeIfNeeded(name);
489 break;
490 }
491
492 /* catch parameter value; may be surrounded by double quotes */
493 if (*param == '"') /* parse quoted-string */
494 param = get_quoted_string(param, &value);
495 else {
496 /* parse token */
497 value = get_token(param);
498 param += strlen(value);
499 }
500
501 ptr = new_params();
502 ptr->name = name;
503 if (encoded) {
504 ptr->encoded = TRUE;
505 ptr->charset = get_charset(value);
506 ptr->value = strip_charset(&value);
507 } else
508 ptr->value = value;
509
510 ptr->part = idx;
511 ptr->next = content->params; /* Push onto start of list */
512 content->params = ptr;
513
514 /* advance pointer to next parameter */
515 while ((*param) && (*param != ';'))
516 param++;
517 if (*param == ';')
518 param++;
519 }
520 }
521
522
523 /*
524 * Return a freshly allocated and initialised t_param structure
525 */
526 t_param *
527 new_params(
528 void)
529 {
530 t_param *ptr;
531
532 ptr = my_malloc(sizeof(t_param));
533 ptr->name = NULL;
534 ptr->value = NULL;
535 ptr->charset = NULL;
536 ptr->part = -1;
537 ptr->encoded = FALSE;
538 ptr->enc_fallback = TRUE;
539 ptr->next = NULL;
540
541 return ptr;
542 }
543
544
545 /*
546 * Free up a generic list object
547 */
548 void
549 free_list(
550 t_param *list)
551 {
552 while (list->next != NULL) {
553 free_list(list->next);
554 list->next = NULL;
555 }
556
557 free(list->name);
558 free(list->value);
559 FreeIfNeeded(list->charset);
560 free(list);
561 }
562
563
564 /*
565 * Return a parameter value from a param list or NULL
566 */
567 const char *
568 get_param(
569 t_param *list,
570 const char *name)
571 {
572 char *tmpval, *charset = NULL;
573 int i, j;
574 size_t newlen;
575 t_param *p_list, *c_list;
576
577 for (p_list = list; p_list != NULL; p_list = p_list->next) {
578 /*
579 * RFC 2231 Parameter Value Continuations + Character Set
580 *
581 * part == 0,1,2...: parameter has several parts, must be concatenated
582 * part == -1 : parameter has only one part
583 * part == -2 : part has already been concatenated, main part has
584 * part == -1
585 *
586 * charset : character set if present
587 */
588 if (strcasecmp(name, p_list->name) == 0 && p_list->part > -2) {
589 if (p_list->part == -1 && p_list->encoded && p_list->charset) {
590 decode_value(p_list->charset, p_list);
591 p_list->encoded = FALSE;
592 p_list->enc_fallback = FALSE;
593 }
594 if (p_list->part >= 0) {
595 newlen = 0;
596 if (p_list->charset) {
597 FreeIfNeeded(charset);
598 charset = my_strdup(p_list->charset);
599 }
600 for (j = 0, c_list = list; c_list != NULL; c_list = c_list->next) {
601 if (strcasecmp(name, c_list->name) == 0) {
602 if (c_list->part < 0)
603 continue;
604 if (c_list->part < p_list->part) {
605 if (c_list->charset) {
606 FreeIfNeeded(charset);
607 charset = my_strdup(c_list->charset);
608 }
609 p_list = c_list;
610 }
611
612 if (j < c_list->part)
613 j = c_list->part;
614
615 newlen += strlen(c_list->value);
616 }
617 }
618 p_list->value = my_realloc(p_list->value, newlen + 1);
619 if (charset)
620 decode_value(charset, p_list);
621 for (i = p_list->part + 1; i <= j; ++i) {
622 for (c_list = list; c_list != NULL; c_list = c_list->next) {
623 if (strcasecmp(name, c_list->name) == 0) {
624 if (c_list->part == i) {
625 if (c_list->encoded && charset)
626 decode_value(charset, c_list);
627 strcat(p_list->value, c_list->value);
628 c_list->part = -2;
629 }
630 }
631 }
632 }
633 p_list->part = -1;
634 p_list->encoded = FALSE;
635 p_list->enc_fallback = FALSE;
636 FreeAndNull(charset);
637 }
638 /*
639 * RFC 2047 'encoded-word' is not allowed at this place but
640 * some clients use this nevertheless -> we try to decode that
641 */
642 if (p_list->enc_fallback) {
643 tmpval = p_list->value;
644 if (*tmpval == '=' && *(++tmpval) == '?') {
645 if ((tmpval = rfc1522_decode(p_list->value))) {
646 free(p_list->value);
647 p_list->value = my_strdup(tmpval);
648 }
649 }
650 p_list->enc_fallback = FALSE;
651 }
652 return p_list->value;
653 }
654 }
655
656 return NULL;
657 }
658
659
660 /*
661 * Split a Content-Type header into a t_part structure
662 */
663 static void
664 parse_content_type(
665 char *type,
666 t_part *content)
667 {
668 char *subtype, *params;
669 int i;
670
671 /* Remove comments and white space */
672 remove_cwsp(type);
673
674 /*
675 * Split the type/subtype
676 */
677 if ((type = strtok(type, "/")) == NULL)
678 return;
679
680 /* Look up major type */
681
682 /*
683 * Unrecognised type, treat according to RFC
684 */
685 if ((i = content_type(type)) == -1) {
686 content->type = TYPE_APPLICATION;
687 free(content->subtype);
688 content->subtype = my_strdup("octet-stream");
689 return;
690 } else
691 content->type = i;
692
693 subtype = strtok(NULL, PARAM_SEP);
694 /* save new subtype, or use pre-initialised value "plain" */
695 if (subtype != NULL) { /* check for broken Content-Type: is header without a subtype */
696 free(content->subtype); /* Pre-initialised to plain */
697 content->subtype = my_strdup(subtype);
698 str_lwr(content->subtype);
699 }
700
701 /*
702 * Parse any parameters into a list
703 */
704 if ((params = strtok(NULL, "\n")) != NULL) {
705 const char *format;
706 #ifndef CHARSET_CONVERSION
707 char defparms[] = CT_DEFPARMS; /* must be writable */
708 #endif /* !CHARSET_CONVERSION */
709
710 parse_params(params, content);
711 if (!get_param(content->params, "charset")) { /* add default charset if needed */
712 #ifndef CHARSET_CONVERSION
713 parse_params(defparms, content);
714 #else
715 if (curr_group->attribute->undeclared_charset) {
716 char *charsetheader;
717
718 charsetheader = my_malloc(strlen(curr_group->attribute->undeclared_charset) + 9); /* 9=len('charset=\0') */
719 sprintf(charsetheader, "charset=%s", curr_group->attribute->undeclared_charset);
720 parse_params(charsetheader, content);
721 free(charsetheader);
722 } else {
723 char defparms[] = CT_DEFPARMS; /* must be writable */
724
725 parse_params(defparms, content);
726 }
727 #endif /* !CHARSET_CONVERSION */
728 }
729 if ((format = get_param(content->params, "format"))) {
730 if (!strcasecmp(format, "flowed"))
731 content->format = FORMAT_FLOWED;
732 }
733 }
734 }
735
736
737 static unsigned int
738 parse_content_encoding(
739 char *encoding)
740 {
741 unsigned int i;
742
743 /* Remove comments and white space */
744 remove_cwsp(encoding);
745
746 for (i = 0; content_encodings[i] != NULL; ++i) {
747 if (strcasecmp(encoding, content_encodings[i]) == 0)
748 return i;
749 }
750
751 /*
752 * TODO: check rfc - may need to switch Content-Type to
753 * application/octet-steam where this header exists but is unparsable.
754 *
755 * RFC 2045 6.2:
756 * Labelling unencoded data containing 8bit characters as "7bit" is not
757 * allowed, nor is labelling unencoded non-line-oriented data as anything
758 * other than "binary" allowed.
759 */
760 return ENCODING_BINARY;
761 }
762
763
764 /*
765 * We're only really interested in the filename parameter, which has
766 * a higher precedence than the name parameter from Content-Type (RFC 1806)
767 * Attach the parsed params to the part passed in 'part'
768 */
769 static void
770 parse_content_disposition(
771 char *disp,
772 t_part *part)
773 {
774 char *ptr;
775
776 /* Remove comments and white space */
777 remove_cwsp(disp);
778
779 strtok(disp, PARAM_SEP);
780 if ((ptr = strtok(NULL, "\n")) == NULL)
781 return;
782
783 parse_params(ptr, part);
784 }
785
786
787 /*
788 * Return a freshly allocated and initialised part structure attached to the
789 * end of the list of article parts given
790 */
791 t_part *
792 new_part(
793 t_part *part)
794 {
795 t_part *p;
796 t_part *ptr = my_malloc(sizeof(t_part));
797 #ifndef CHARSET_CONVERSION
798 char defparms[] = CT_DEFPARMS; /* must be writable */
799 #endif /* !CHARSET_CONVERSION */
800
801 ptr->type = TYPE_TEXT; /* Defaults per RFC */
802 ptr->subtype = my_strdup("plain");
803 ptr->description = NULL;
804 ptr->encoding = ENCODING_7BIT;
805 ptr->format = FORMAT_FIXED;
806 ptr->params = NULL;
807
808 #ifndef CHARSET_CONVERSION
809 parse_params(defparms, ptr);
810 #else
811 if (curr_group && curr_group->attribute->undeclared_charset) {
812 char *charsetheader;
813
814 charsetheader = my_malloc(strlen(curr_group->attribute->undeclared_charset) + 9); /* 9=len('charset=\0') */
815 sprintf(charsetheader, "charset=%s", curr_group->attribute->undeclared_charset);
816 parse_params(charsetheader, ptr);
817 free(charsetheader);
818 } else {
819 char defparms[] = CT_DEFPARMS; /* must be writable */
820
821 parse_params(defparms, ptr);
822 }
823 #endif /* !CHARSET_CONVERSION */
824
825 ptr->offset = 0;
826 ptr->line_count = 0;
827 ptr->depth = 0; /* Not an embedded object (yet) */
828 ptr->uue = NULL;
829 ptr->next = NULL;
830
831 if (part == NULL) /* List head - we don't do this */
832 return ptr;
833
834 for (p = part; p->next != NULL; p = p->next)
835 ;
836 p->next = ptr;
837
838 return ptr;
839 }
840
841
842 /*
843 * Free a linked list of t_part
844 */
845 void
846 free_parts(
847 t_part *ptr)
848 {
849 while (ptr->next != NULL) {
850 free_parts(ptr->next);
851 ptr->next = NULL;
852 }
853
854 free(ptr->subtype);
855 FreeAndNull(ptr->description);
856 if (ptr->params)
857 free_list(ptr->params);
858 if (ptr->uue)
859 free_parts(ptr->uue);
860 free(ptr);
861 }
862
863
864 void
865 free_and_init_header(
866 struct t_header *hdr)
867 {
868 /*
869 * Initialise the header struct
870 */
871 FreeAndNull(hdr->from);
872 FreeAndNull(hdr->to);
873 FreeAndNull(hdr->cc);
874 FreeAndNull(hdr->bcc);
875 FreeAndNull(hdr->date);
876 FreeAndNull(hdr->subj);
877 FreeAndNull(hdr->org);
878 FreeAndNull(hdr->replyto);
879 FreeAndNull(hdr->newsgroups);
880 FreeAndNull(hdr->messageid);
881 FreeAndNull(hdr->references);
882 FreeAndNull(hdr->distrib);
883 FreeAndNull(hdr->keywords);
884 FreeAndNull(hdr->summary);
885 FreeAndNull(hdr->followup);
886 FreeAndNull(hdr->ftnto);
887 #ifdef XFACE_ABLE
888 FreeAndNull(hdr->xface);
889 #endif /* XFACE_ABLE */
890 hdr->mime = FALSE;
891
892 if (hdr->ext)
893 free_parts(hdr->ext);
894 hdr->ext = NULL;
895 }
896
897
898 /*
899 * buf: Article header
900 * pat: Text to match in header
901 * decode: RFC2047-decode the header
902 * structured: extract address-part before decoding the header
903 *
904 * Returns:
905 * (decoded) body of header if matched or NULL
906 */
907 char *
908 parse_header(
909 char *buf,
910 const char *pat,
911 t_bool decode,
912 t_bool structured,
913 t_bool keep_tab)
914 {
915 size_t plen = strlen(pat);
916 char *ptr = buf + plen;
917
918 /*
919 * Does ': ' follow the header text?
920 */
921 if (!(*ptr && *(ptr + 1) && *ptr == ':' && *(ptr + 1) == ' '))
922 return NULL;
923
924 /*
925 * If the header matches, skip past the ': ' and any leading whitespace
926 */
927 if (strncasecmp(buf, pat, plen) != 0)
928 return NULL;
929
930 ptr += 2;
931
932 str_trim(ptr);
933 if (!*ptr)
934 return NULL;
935
936 if (decode) {
937 if (structured) {
938 char addr[HEADER_LEN];
939 char name[HEADER_LEN];
940 int type;
941
942 if (gnksa_split_from(ptr, addr, name, &type) == GNKSA_OK) {
943 buffer_to_ascii(addr);
944
945 if (*name) {
946 if (type == GNKSA_ADDRTYPE_OLDSTYLE)
947 sprintf(ptr, "%s (%s)", addr, convert_to_printable(rfc1522_decode(name), keep_tab));
948 else
949 sprintf(ptr, "%s <%s>", convert_to_printable(rfc1522_decode(name), keep_tab), addr);
950 } else
951 strcpy(ptr, addr);
952 } else
953 return convert_to_printable(ptr, keep_tab);
954 } else
955 return (convert_to_printable(rfc1522_decode(ptr), keep_tab));
956 }
957
958 return ptr;
959 }
960
961
962 /*
963 * Read main article headers into a blank header structure.
964 * Pass the data 'from' -> 'to' when reading via NNTP
965 * Return tin_errno (basically will be !=0 if reading was 'q'uit)
966 * We have to guard against 'to' here since this function is exported
967 */
968 int
969 parse_rfc822_headers(
970 struct t_header *hdr,
971 FILE *from,
972 FILE *to)
973 {
974 char *line;
975 char *ptr;
976
977 memset(hdr, 0, sizeof(struct t_header));
978 hdr->mime = FALSE;
979 hdr->ext = new_part(NULL); /* Initialise MIME data */
980
981 while ((line = tin_fgets(from, TRUE)) != NULL) {
982 if (read_news_via_nntp && to) {
983 fprintf(to, "%s\n", line); /* Put raw data */
984 #ifdef DEBUG
985 if ((debug & DEBUG_NNTP) && verbose > 1)
986 debug_print_file("NNTP", "<<<%s%s", logtime(), line);
987 #endif /* DEBUG */
988 }
989 /*
990 * End of headers ?
991 */
992 if (line[0] == '\0') {
993 if (to)
994 hdr->ext->offset = ftell(to); /* Offset of main body */
995
996 /* avoid null subject */
997 if (!hdr->subj)
998 hdr->subj = my_strdup("");
999
1000 return 0;
1001 }
1002
1003 /*
1004 * FIXME: multiple headers of the same name could lead to information
1005 * loss (multiple Cc: lines are allowed, for example)
1006 */
1007 unfold_header(line);
1008 if ((ptr = parse_header(line, "From", TRUE, TRUE, FALSE))) {
1009 FreeIfNeeded(hdr->from);
1010 hdr->from = my_strdup(ptr);
1011 continue;
1012 }
1013 if ((ptr = parse_header(line, "To", TRUE, TRUE, FALSE))) {
1014 FreeIfNeeded(hdr->to);
1015 hdr->to = my_strdup(ptr);
1016 continue;
1017 }
1018 if ((ptr = parse_header(line, "Cc", TRUE, TRUE, FALSE))) {
1019 FreeIfNeeded(hdr->cc);
1020 hdr->cc = my_strdup(ptr);
1021 continue;
1022 }
1023 if ((ptr = parse_header(line, "Bcc", TRUE, TRUE, FALSE))) {
1024 FreeIfNeeded(hdr->bcc);
1025 hdr->bcc = my_strdup(ptr);
1026 continue;
1027 }
1028 if ((ptr = parse_header(line, "Date", FALSE, FALSE, FALSE))) {
1029 FreeIfNeeded(hdr->date);
1030 hdr->date = my_strdup(ptr);
1031 continue;
1032 }
1033 if ((ptr = parse_header(line, "Subject", TRUE, FALSE, TRUE))) {
1034 FreeIfNeeded(hdr->subj);
1035 hdr->subj = my_strdup(ptr);
1036 continue;
1037 }
1038 if ((ptr = parse_header(line, "Organization", TRUE, FALSE, TRUE))) {
1039 FreeIfNeeded(hdr->org);
1040 hdr->org = my_strdup(ptr);
1041 continue;
1042 }
1043 if ((ptr = parse_header(line, "Reply-To", TRUE, TRUE, FALSE))) {
1044 FreeIfNeeded(hdr->replyto);
1045 hdr->replyto = my_strdup(ptr);
1046 continue;
1047 }
1048 if ((ptr = parse_header(line, "Newsgroups", FALSE, FALSE, FALSE))) {
1049 FreeIfNeeded(hdr->newsgroups);
1050 hdr->newsgroups = my_strdup(ptr);
1051 continue;
1052 }
1053 if ((ptr = parse_header(line, "Message-ID", FALSE, FALSE, FALSE))) {
1054 FreeIfNeeded(hdr->messageid);
1055 hdr->messageid = my_strdup(ptr);
1056 continue;
1057 }
1058 if ((ptr = parse_header(line, "References", FALSE, FALSE, FALSE))) {
1059 FreeIfNeeded(hdr->references);
1060 hdr->references = my_strdup(ptr);
1061 continue;
1062 }
1063 if ((ptr = parse_header(line, "Distribution", FALSE, FALSE, FALSE))) {
1064 FreeIfNeeded(hdr->distrib);
1065 hdr->distrib = my_strdup(ptr);
1066 continue;
1067 }
1068 if ((ptr = parse_header(line, "Keywords", TRUE, FALSE, FALSE))) {
1069 FreeIfNeeded(hdr->keywords);
1070 hdr->keywords = my_strdup(ptr);
1071 continue;
1072 }
1073 if ((ptr = parse_header(line, "Summary", TRUE, FALSE, FALSE))) {
1074 FreeIfNeeded(hdr->summary);
1075 hdr->summary = my_strdup(ptr);
1076 continue;
1077 }
1078 if ((ptr = parse_header(line, "Followup-To", FALSE, FALSE, FALSE))) {
1079 FreeIfNeeded(hdr->followup);
1080 hdr->followup = my_strdup(ptr);
1081 continue;
1082 }
1083 if ((ptr = parse_header(line, "X-Comment-To", TRUE, TRUE, FALSE))) {
1084 FreeIfNeeded(hdr->ftnto);
1085 hdr->ftnto = my_strdup(ptr);
1086 continue;
1087 }
1088 #ifdef XFACE_ABLE
1089 if ((ptr = parse_header(line, "X-Face", FALSE, FALSE, FALSE))) {
1090 FreeIfNeeded(hdr->xface);
1091 hdr->xface = my_strdup(ptr);
1092 continue;
1093 }
1094 #endif /* XFACE_ABLE */
1095 /* TODO: check version */
1096 if (parse_header(line, "MIME-Version", FALSE, FALSE, FALSE)) {
1097 hdr->mime = TRUE;
1098 continue;
1099 }
1100 if ((ptr = parse_header(line, "Content-Type", FALSE, FALSE, FALSE))) {
1101 parse_content_type(ptr, hdr->ext);
1102 continue;
1103 }
1104 if ((ptr = parse_header(line, "Content-Transfer-Encoding", FALSE, FALSE, FALSE))) {
1105 hdr->ext->encoding = parse_content_encoding(ptr);
1106 continue;
1107 }
1108 if ((ptr = parse_header(line, "Content-Description", TRUE, FALSE, FALSE))) {
1109 FreeIfNeeded(hdr->ext->description);
1110 hdr->ext->description = my_strdup(ptr);
1111 continue;
1112 }
1113 if ((ptr = parse_header(line, "Content-Disposition", FALSE, FALSE, FALSE))) {
1114 parse_content_disposition(ptr, hdr->ext);
1115 continue;
1116 }
1117 }
1118
1119 return tin_errno;
1120 }
1121
1122
1123 /*
1124 * Count lines in a continuated header.
1125 * line MUST NOT end in a newline.
1126 */
1127 static int
1128 count_lines(
1129 char *line)
1130 {
1131 char *src = line;
1132 char c;
1133 int lines = 1;
1134
1135 while ((c = *src++))
1136 if (c == '\n')
1137 lines++;
1138 return lines;
1139 }
1140
1141
1142 /*
1143 * Unfold header, i.e. strip any newline off it. Don't strip other
1144 * whitespace, it depends on the header if this is legal (structured
1145 * headers) or not (unstructured headers, e.g. Subject)
1146 */
1147 void
1148 unfold_header(
1149 char *line)
1150 {
1151 char *src = line, *dst = line;
1152 char c;
1153
1154 while ((c = *src++)) {
1155 if (c != '\n')
1156 *dst++ = c;
1157 }
1158 *dst = c;
1159 }
1160
1161
1162 #define M_SEARCHING 1 /* Looking for boundary */
1163 #define M_HDR 2 /* In MIME headers */
1164 #define M_BODY 3 /* In MIME body */
1165
1166 #define TIN_EOF 0xf00 /* Used internally for error recovery */
1167
1168 /*
1169 * Handles multipart/ article types, write data to a raw stream when reading via NNTP
1170 * artinfo is used for generic article pointers
1171 * part contains content info about the attachment we're parsing
1172 * depth is the number of levels by which the current part is embedded
1173 * Returns a tin_errno value which is '&'ed with TIN_EOF if the end of the
1174 * article is reached (to prevent broken articles from hanging the NNTP socket)
1175 */
1176 static int
1177 parse_multipart_article(
1178 FILE *infile,
1179 t_openartinfo *artinfo,
1180 t_part *part,
1181 int depth,
1182 t_bool show_progress_meter)
1183 {
1184 char *line;
1185 char *ptr;
1186 const char *bd;
1187 int bnd;
1188 int state = M_SEARCHING;
1189 t_bool is_rfc822 = FALSE;
1190 t_part *curr_part = NULL, *rfc822_part = NULL;
1191
1192 while ((line = tin_fgets(infile, (state == M_HDR))) != NULL) {
1193 /* fprintf(stderr, "%d---:%s\n", depth, line); */
1194
1195 /*
1196 * Check current line for boundary markers
1197 */
1198 bnd = boundary_check(line, artinfo->hdr.ext);
1199
1200 if (read_news_via_nntp) {
1201 fprintf(artinfo->raw, "%s\n", line);
1202 #ifdef DEBUG
1203 if ((debug & DEBUG_NNTP) && verbose > 1)
1204 debug_print_file("NNTP", "<<<%s%s", logtime(), line);
1205 #endif /* DEBUG */
1206 }
1207
1208 artinfo->hdr.ext->line_count += count_lines(line);
1209 if (show_progress_meter)
1210 progress(artinfo->hdr.ext->line_count); /* Overall line count */
1211
1212 if (part && part != artinfo->hdr.ext)
1213 part->line_count += count_lines(line);
1214
1215 if (is_rfc822 && rfc822_part)
1216 rfc822_part->line_count += count_lines(line);
1217
1218 if (bnd == BOUND_END) { /* End of this part detected */
1219 if (is_rfc822 && rfc822_part)
1220 rfc822_part->line_count -= count_lines(line);
1221 /*
1222 * When we have reached the end boundary of the outermost envelope
1223 * just log any trailing data for the raw article format.
1224 */
1225 if ((bd = get_param(artinfo->hdr.ext->params, "boundary")) != NULL) {
1226 if (boundary_cmp(line, bd) == BOUND_END)
1227 depth = 0;
1228 }
1229 #if 0 /* doesn't count tailing lines after envelop mime part - correct but confusing */
1230 if (read_news_via_nntp && depth == 0)
1231 while ((line = tin_fgets(infile, FALSE)) != NULL)
1232 fprintf(artinfo->raw, "%s\n", line);
1233 #else
1234 if (depth == 0) {
1235 while ((line = tin_fgets(infile, FALSE)) != NULL) {
1236 if (read_news_via_nntp)
1237 fprintf(artinfo->raw, "%s\n", line);
1238 artinfo->hdr.ext->line_count++;
1239 }
1240 }
1241 #endif /* 0 */
1242 return tin_errno;
1243 }
1244
1245 switch (state) {
1246 case M_SEARCHING:
1247 switch (bnd) {
1248 case BOUND_NONE:
1249 break; /* Keep looking */
1250
1251 case BOUND_START:
1252 state = M_HDR; /* Now parsing headers of a part */
1253 curr_part = new_part(part);
1254 curr_part->depth = depth;
1255 break;
1256 }
1257 break;
1258
1259 case M_HDR:
1260 switch (bnd) {
1261 case BOUND_START:
1262 #ifdef DEBUG
1263 if (debug & DEBUG_MISC)
1264 error_message(2, _(txt_error_mime_start));
1265 #endif /* DEBUG */
1266 continue;
1267
1268 case BOUND_NONE:
1269 break; /* Correct - No boundary */
1270 }
1271
1272 if (*line == '\0') { /* End of MIME headers */
1273 state = M_BODY;
1274 curr_part->offset = ftell(artinfo->raw);
1275
1276 if (curr_part->type == TYPE_MULTIPART) { /* Complex multipart article */
1277 int ret, old_line_count;
1278
1279 old_line_count = curr_part->line_count;
1280 if ((ret = parse_multipart_article(infile, artinfo, curr_part, depth + 1, show_progress_meter)) != 0)
1281 return ret; /* User abort or EOF reached */
1282 if (part && part != artinfo->hdr.ext)
1283 part->line_count += curr_part->line_count - old_line_count;
1284 if (is_rfc822 && rfc822_part)
1285 rfc822_part->line_count += curr_part->line_count - old_line_count;
1286 } else if (curr_part->type == TYPE_MESSAGE && !strcasecmp("RFC822", curr_part->subtype)) {
1287 is_rfc822 = TRUE;
1288 rfc822_part = curr_part;
1289 state = M_HDR;
1290 curr_part = new_part(part);
1291 curr_part->depth = ++depth;
1292 }
1293 break;
1294 }
1295
1296 /*
1297 * Keep headers that interest us
1298 */
1299 /* fprintf(stderr, "HDR:%s\n", line); */
1300 unfold_header(line);
1301 if ((ptr = parse_header(line, "Content-Type", FALSE, FALSE, FALSE))) {
1302 parse_content_type(ptr, curr_part);
1303 break;
1304 }
1305 if ((ptr = parse_header(line, "Content-Transfer-Encoding", FALSE, FALSE, FALSE))) {
1306 curr_part->encoding = parse_content_encoding(ptr);
1307 break;
1308 }
1309 if ((ptr = parse_header(line, "Content-Disposition", FALSE, FALSE, FALSE))) {
1310 parse_content_disposition(ptr, curr_part);
1311 break;
1312 }
1313 if ((ptr = parse_header(line, "Content-Description", TRUE, FALSE, FALSE))) {
1314 FreeIfNeeded(curr_part->description);
1315 curr_part->description = my_strdup(ptr);
1316 break;
1317 }
1318 break;
1319
1320 case M_BODY:
1321 switch (bnd) {
1322 case BOUND_NONE:
1323 /* fprintf(stderr, "BOD:%s\n", line); */
1324 curr_part->line_count++;
1325 break;
1326
1327 case BOUND_START: /* Start new attachment */
1328 if (is_rfc822) {
1329 --depth;
1330 rfc822_part->line_count--;
1331 rfc822_part = NULL;
1332 is_rfc822 = FALSE;
1333 }
1334 state = M_HDR;
1335 curr_part = new_part(part);
1336 curr_part->depth = depth;
1337 break;
1338 }
1339 break;
1340 } /* switch (state) */
1341 } /* while() */
1342
1343 /*
1344 * We only reach this point when we (unexpectedly) reach the end of the
1345 * article
1346 */
1347 return tin_errno | TIN_EOF; /* Flag EOF */
1348 }
1349
1350
1351 /*
1352 * Parse a non-multipart article, merely a passthrough and bean counter
1353 */
1354 static int
1355 parse_normal_article(
1356 FILE *in,
1357 t_openartinfo *artinfo,
1358 t_bool show_progress_meter)
1359 {
1360 char *line;
1361
1362 while ((line = tin_fgets(in, FALSE)) != NULL) {
1363 if (read_news_via_nntp) {
1364 fprintf(artinfo->raw, "%s\n", line);
1365 #ifdef DEBUG
1366 if ((debug & DEBUG_NNTP) && verbose > 1)
1367 debug_print_file("NNTP", "<<<%s%s", logtime(), line);
1368 #endif /* DEBUG */
1369 }
1370
1371 ++artinfo->hdr.ext->line_count;
1372
1373 if (show_progress_meter)
1374 progress(artinfo->hdr.ext->line_count);
1375 }
1376 return tin_errno;
1377 }
1378
1379
1380 #ifdef DEBUG_ART
1381 /* DEBUG dump of what we got */
1382 static void
1383 dump_uue(
1384 t_part *ptr,
1385 t_openartinfo *art)
1386 {
1387 if (ptr->uue != NULL) {
1388 t_part *uu;
1389 for (uu = ptr->uue; uu != NULL; uu = uu->next) {
1390 fprintf(stderr, "UU: %s\n", get_param(uu->params, "name"));
1391 fprintf(stderr, " Content-Type: %s/%s\n Content-Transfer-Encoding: %s\n",
1392 content_types[uu->type], uu->subtype,
1393 content_encodings[uu->encoding]);
1394 fprintf(stderr, " Offset: %ld Lines: %d\n", uu->offset, uu->line_count);
1395 fprintf(stderr, " Depth: %d\n", uu->depth);
1396 fseek(art->raw, uu->offset, SEEK_SET);
1397 fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1398 }
1399 }
1400 }
1401
1402
1403 static void
1404 dump_art(
1405 t_openartinfo *art)
1406 {
1407 t_part *ptr;
1408 t_param *pptr;
1409 struct t_header note_h = art->hdr;
1410
1411 fprintf(stderr, "\nMain body\nMIME-Version: %u\n", note_h.mime);
1412 fprintf(stderr, "Content-Type: %s/%s\nContent-Transfer-Encoding: %s\n",
1413 content_types[note_h.ext->type], note_h.ext->subtype,
1414 content_encodings[note_h.ext->encoding]);
1415 if (note_h.ext->description)
1416 fprintf(stderr, "Content-Description: %s\n", note_h.ext->description);
1417 fprintf(stderr, "Offset: %ld\nLines: %d\n", note_h.ext->offset, note_h.ext->line_count);
1418 for (pptr = note_h.ext->params; pptr != NULL; pptr = pptr->next)
1419 fprintf(stderr, "P: %s = %s\n", pptr->name, pptr->value);
1420 dump_uue(note_h.ext, art);
1421 fseek(art->raw, note_h.ext->offset, SEEK_SET);
1422 fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1423 fprintf(stderr, "\n");
1424
1425 for (ptr = note_h.ext->next; ptr != NULL; ptr = ptr->next) {
1426 fprintf(stderr, "Attachment:\n");
1427 fprintf(stderr, "\tContent-Type: %s/%s\n\tContent-Transfer-Encoding: %s\n",
1428 content_types[ptr->type], ptr->subtype,
1429 content_encodings[ptr->encoding]);
1430 if (ptr->description)
1431 fprintf(stderr, "\tContent-Description: %s\n", ptr->description);
1432 fprintf(stderr, "\tOffset: %ld\n\tLines: %d\n", ptr->offset, ptr->line_count);
1433 fprintf(stderr, "\tDepth: %d\n", ptr->depth);
1434 for (pptr = ptr->params; pptr != NULL; pptr = pptr->next)
1435 fprintf(stderr, "\tP: %s = %s\n", pptr->name, pptr->value);
1436 dump_uue(ptr, art);
1437 fseek(art->raw, ptr->offset, SEEK_SET);
1438 fprintf(stderr, "[%s]\n\n", tin_fgets(art->raw, FALSE));
1439 }
1440 }
1441 #endif /* DEBUG_ART */
1442
1443
1444 /*
1445 * Core parser for all article types
1446 * Return NULL if we couldn't open an output stream when reading via NNTP
1447 * When reading from local spool we assign the filehandle of the on-spool
1448 * article directly to artinfo->raw
1449 */
1450 static int
1451 parse_rfc2045_article(
1452 FILE *infile,
1453 int line_count,
1454 t_openartinfo *artinfo,
1455 t_bool show_progress_meter)
1456 {
1457 int ret = ART_ABORT;
1458
1459 if (read_news_via_nntp && !(artinfo->raw = tmpfile()))
1460 goto error;
1461
1462 if (!read_news_via_nntp)
1463 artinfo->raw = infile;
1464
1465 art_lines = line_count;
1466
1467 if ((ret = parse_rfc822_headers(&artinfo->hdr, infile, artinfo->raw)) != 0)
1468 goto error;
1469
1470 /* no article data returned, just a '.' after 220er response */
1471 if (artinfo->hdr.ext->offset == 0) {
1472 ret = ART_UNAVAILABLE;
1473 goto error;
1474 }
1475
1476 /*
1477 * Is this a MIME article ?
1478 * We don't bother to parse all plain text articles
1479 */
1480 if (artinfo->hdr.mime && artinfo->hdr.ext->type == TYPE_MULTIPART) {
1481 if ((ret = parse_multipart_article(infile, artinfo, artinfo->hdr.ext, 1, show_progress_meter)) != 0) {
1482 /* Strip off EOF condition if present */
1483 if (ret & TIN_EOF) {
1484 ret ^= TIN_EOF;
1485 #ifdef DEBUG
1486 if (debug & DEBUG_MISC)
1487 error_message(2, _(txt_error_mime_end), content_types[artinfo->hdr.ext->type], artinfo->hdr.ext->subtype);
1488 #endif /* DEBUG */
1489 if (ret != 0)
1490 goto error;
1491 } else
1492 goto error;
1493 }
1494 } else {
1495 if ((ret = parse_normal_article(infile, artinfo, show_progress_meter)) != 0)
1496 goto error;
1497 }
1498
1499 if (read_news_via_nntp)
1500 TIN_FCLOSE(infile);
1501
1502 return 0;
1503
1504 error:
1505 if (read_news_via_nntp)
1506 TIN_FCLOSE(infile);
1507 art_close(artinfo);
1508 return ret;
1509 }
1510
1511
1512 /*
1513 * Open a mail/news article using NNTP ARTICLE command
1514 * or directly off local spool
1515 * Return:
1516 * A pointer to the open postprocessed file
1517 * NULL pointer if article open fails in some way
1518 */
1519 FILE *
1520 open_art_fp(
1521 struct t_group *group,
1522 t_artnum art)
1523 {
1524 FILE *art_fp;
1525
1526 #ifdef NNTP_ABLE
1527 if (read_news_via_nntp && group->type == GROUP_TYPE_NEWS) {
1528 char buf[NNTP_STRLEN];
1529 snprintf(buf, sizeof(buf), "ARTICLE %"T_ARTNUM_PFMT, art);
1530 art_fp = nntp_command(buf, OK_ARTICLE, NULL, 0);
1531 } else {
1532 #endif /* NNTP_ABLE */
1533 char buf[PATH_LEN];
1534 char pbuf[PATH_LEN];
1535 char fbuf[NAME_LEN + 1];
1536 char *group_path = my_malloc(strlen(group->name) + 2); /* tailing "/\0" */;
1537
1538 make_group_path(group->name, group_path);
1539 joinpath(buf, sizeof(buf), group->spooldir, group_path);
1540 free(group_path);
1541 snprintf(fbuf, sizeof(fbuf), "%"T_ARTNUM_PFMT, art);
1542 joinpath(pbuf, sizeof(pbuf), buf, fbuf);
1543
1544 art_fp = fopen(pbuf, "r");
1545 #ifdef NNTP_ABLE
1546 }
1547 #endif /* NNTP_ABLE */
1548
1549 return art_fp;
1550 }
1551
1552
1553 /* ----------- art_open() and art_close() are the only interface --------- */
1554 /* ------------------------for accessing articles ------------------- */
1555
1556 /*
1557 * Opens and postprocesses an article
1558 * Populates the passed in artinfo structure if successful
1559 *
1560 * Returns:
1561 * 0 Art opened successfully
1562 * ART_UNAVAILABLE Couldn't find article
1563 * ART_ABORT User aborted during read of article
1564 */
1565 int
1566 art_open(
1567 t_bool wrap_lines,
1568 struct t_article *art,
1569 struct t_group *group,
1570 t_openartinfo *artinfo,
1571 t_bool show_progress_meter,
1572 const char *pmesg)
1573 {
1574 FILE *fp;
1575
1576 memset(artinfo, 0, sizeof(t_openartinfo));
1577
1578 if ((fp = open_art_fp(group, art->artnum)) == NULL)
1579 return ((tin_errno == 0) ? ART_UNAVAILABLE : ART_ABORT);
1580
1581 #ifdef DEBUG_ART
1582 fprintf(stderr, "art_open(%p)\n", (void *) artinfo);
1583 #endif /* DEBUG_ART */
1584
1585 progress_mesg = pmesg;
1586 if (parse_rfc2045_article(fp, art->line_count, artinfo, show_progress_meter) != 0) {
1587 progress_mesg = NULL;
1588 return ((tin_errno == 0) ? ART_UNAVAILABLE : ART_ABORT);
1589 }
1590 progress_mesg = NULL;
1591
1592 /*
1593 * TODO: compare art->msgid and artinfo->hdr.messageid and issue a
1594 * warning (once) about broken overviews if they differ
1595 */
1596
1597 if ((artinfo->tex2iso = ((group->attribute->tex2iso_conv) ? is_art_tex_encoded(artinfo->raw) : FALSE)))
1598 wait_message(0, _(txt_is_tex_encoded));
1599
1600 /* Maybe fix it so if this fails, we default to raw? */
1601 if (!cook_article(wrap_lines, artinfo, tinrc.hide_uue, FALSE))
1602 return ART_ABORT;
1603
1604 #ifdef DEBUG_ART
1605 dump_art(artinfo);
1606 #endif /* DEBUG_ART */
1607
1608 /*
1609 * If Newsgroups is empty it is a good bet the article is a mail article
1610 * TODO: Why do this ?
1611 */
1612 if (!artinfo->hdr.newsgroups)
1613 artinfo->hdr.newsgroups = my_strdup(group->name);
1614
1615 return 0;
1616 }
1617
1618
1619 /*
1620 * Close an open article identified by an 'artinfo' handle
1621 */
1622 void
1623 art_close(
1624 t_openartinfo *artinfo)
1625 {
1626 #ifdef DEBUG_ART
1627 fprintf(stderr, "art_close(%p)\n", (void *) artinfo);
1628 #endif /* DEBUG_ART */
1629
1630 if (artinfo == NULL)
1631 return;
1632
1633 free_and_init_header(&artinfo->hdr);
1634
1635 artinfo->tex2iso = FALSE;
1636
1637 if (artinfo->raw) {
1638 fclose(artinfo->raw);
1639 artinfo->raw = NULL;
1640 }
1641
1642 if (artinfo->cooked) {
1643 fclose(artinfo->cooked);
1644 artinfo->cooked = NULL;
1645 }
1646
1647 FreeAndNull(artinfo->rawl);
1648 FreeAndNull(artinfo->cookl);
1649 }