tin  2.6.1
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.6.1.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

rfc2045.c
Go to the documentation of this file.
1/*
2 * Project : tin - a Usenet reader
3 * Module : rfc2045.c
4 * Author : Chris Blum <chris@resolution.de>
5 * Created : 1995-09-01
6 * Updated : 2021-02-23
7 * Notes : RFC 2045/2047 encoding
8 *
9 * Copyright (c) 1995-2022 Chris Blum <chris@resolution.de>
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * 1. Redistributions of source code must retain the above copyright notice,
17 * this list of conditions and the following disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 *
23 * 3. Neither the name of the copyright holder nor the names of its
24 * contributors may be used to endorse or promote products derived from
25 * this software without specific prior written permission.
26 *
27 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40
41#ifndef TIN_H
42# include "tin.h"
43#endif /* !TIN_H */
44
45/*
46 * local prototypes
47 */
48static int put_rest(char **rest, char **line, size_t *max_line_len, const int offset);
49static unsigned char bin2hex(unsigned int x);
50static void set_rest(char **rest, const char *ptr);
51
52
53static unsigned char
55 unsigned int x)
56{
57 if (x < 10)
58 return (unsigned char) (x + '0');
59 return (unsigned char) (x - 10 + 'A');
60}
61
62
63#define HI4BITS(c) ((unsigned char) (*EIGHT_BIT(c) >> 4))
64#define LO4BITS(c) ((unsigned char) (*c & 0xf))
65
66/*
67 * A MIME replacement for fputs. e can be 'b' for base64, 'q' for
68 * quoted-printable, or 8 (default) for 8bit. Long lines get broken in
69 * encoding modes. If line is the null pointer, flush internal buffers.
70 * NOTE: Use only with text encodings, because line feed characters (0x0A)
71 * will be encoded as CRLF line endings when using base64! This will
72 * certainly break any binary format ...
73 */
74void
76 char *line,
77 FILE *f,
78 int e)
79{
80 int i;
81 static char *b = NULL; /* they must be static for base64 */
82 static char buffer[80];
83 static int bits = 0;
84 static int xpos = 0;
85 static unsigned long pattern = 0;
86
87 if (e == 'b') {
88 if (!b) {
89 b = buffer;
90 *buffer = '\0';
91 }
92 if (!line) { /* flush */
93 if (bits) {
94 if (xpos >= 73) {
95 *b++ = '\n';
96 *b = '\0';
97 fputs(buffer, f);
98 b = buffer;
99 xpos = 0;
100 }
101 pattern <<= 24 - bits;
102 for (i = 0; i < 4; i++) {
103 if (bits >= 0) {
104 *b++ = base64_alphabet[(pattern & 0xfc0000) >> 18];
105 pattern <<= 6;
106 bits -= 6;
107 } else
108 *b++ = '=';
109 xpos++;
110 }
111 pattern = 0;
112 bits = 0;
113 }
114 if (xpos) {
115 *b = '\0';
116 fputs(buffer, f);
117 xpos = 0;
118 }
119 b = NULL;
120 } else {
121 char *line_crlf = line;
122 size_t len = strlen(line);
123 char tmpbuf[2050]; /* FIXME: this is sizeof(buffer)+2 from rfc15211522_encode() */
124
125 /*
126 * base64 requires CRLF line endings in text types
127 * convert LF to CRLF if not CRLF already (Windows?)
128 */
129 if ((len > 0) && (line[len - 1] == '\n') &&
130 ((len == 1) || (line[len - 2] != '\r'))) {
131 STRCPY(tmpbuf, line);
132 line_crlf = tmpbuf;
133 line_crlf[len - 1] = '\r';
134 line_crlf[len] = '\n';
135 line_crlf[len + 1] = '\0';
136 }
137
138 while (*line_crlf) {
139 pattern <<= 8;
140 pattern |= *EIGHT_BIT(line_crlf)++;
141 bits += 8;
142 if (bits >= 24) {
143 if (xpos >= 73) {
144 *b++ = '\n';
145 *b = '\0';
146 b = buffer;
147 xpos = 0;
148 fputs(buffer, f);
149 }
150 for (i = 0; i < 4; i++) {
151 *b++ = base64_alphabet[(pattern >> (bits - 6)) & 0x3f];
152 xpos++;
153 bits -= 6;
154 }
155 pattern = 0;
156 }
157 }
158 }
159 } else if (e == 'q') {
160 if (!line) {
161 /*
162 * we don't really flush anything in qp mode, just set
163 * xpos to 0 in case the last line wasn't terminated by
164 * \n.
165 */
166 xpos = 0;
167 b = NULL;
168 return;
169 }
170 b = buffer;
171 while (*line) {
172 if (isspace((unsigned char) *line) && *line != '\n') {
173 char *l = line + 1;
174
175 while (*l) {
176 if (!isspace((unsigned char) *l)) { /* it's not trailing whitespace, no encoding needed */
177 *b++ = *line++;
178 xpos++;
179 break;
180 }
181 l++;
182 }
183 if (!*l) { /* trailing whitespace must be encoded */
184 *b++ = '=';
185 *b++ = (char) bin2hex(HI4BITS(line));
186 *b++ = (char) bin2hex(LO4BITS(line));
187 xpos += 3;
188 line++;
189 }
190 } else if ((!is_EIGHT_BIT(line) && *line != '=')
191 || (*line == '\n')) {
192 *b++ = *line++;
193 xpos++;
194 if (*(line - 1) == '\n')
195 break;
196 } else {
197 *b++ = '=';
198 *b++ = (char) bin2hex(HI4BITS(line));
199 *b++ = (char) bin2hex(LO4BITS(line));
200 xpos += 3;
201 line++;
202 }
203 if (xpos > 72 && *line != '\n') { /* 72 +3 [worst case] + equal sign = 76 :-) */
204 *b++ = '='; /* break long lines with a 'soft line break' */
205 *b++ = '\n';
206 *b++ = '\0';
207 fputs(buffer, f);
208 b = buffer;
209 xpos = 0;
210 }
211 }
212 *b = '\0';
213 if (b != buffer)
214 fputs(buffer, f);
215 if (b != buffer && b[-1] == '\n')
216 xpos = 0;
217 } else if (line)
218 fputs(line, f);
219}
220
221
222/*
223 * Set everything in ptr as the rest of a physical line to be processed
224 * later.
225 */
226static void
228 char **rest,
229 const char *ptr)
230{
231 char *old_rest = *rest;
232
233 if (ptr == NULL || strlen(ptr) == 0) {
234 FreeAndNull(*rest);
235 return;
236 }
237 *rest = my_strdup(ptr);
238 FreeIfNeeded(old_rest);
239}
240
241
242/*
243 * Copy things that were left over from the last decoding into the new line.
244 * If there's a newline in the rest, copy everything up to and including that
245 * newline into the expected buffer, adjust rest and return. If there's no
246 * newline in the rest, copy all of it to the expected buffer and return.
247 *
248 * Side effects: resizes line if necessary, adjusts max_line_len
249 * accordingly.
250 *
251 * This function returns the number of characters written to the line buffer.
252 */
253static int
255 char **rest,
256 char **line,
257 size_t *max_line_len,
258 const int offset)
259{
260 char *my_rest = *rest;
261 char *ptr;
262 char c;
263 int put_chars = offset;
264
265 if ((ptr = my_rest) == NULL)
266 return put_chars;
267 if (strlen(my_rest) == 0) {
268 FreeAndNull(*rest);
269 return put_chars;
270 }
271
272 while ((c = *ptr++) && (c != '\n')) {
273 if ((c == '\r') && (*ptr == '\n'))
274 continue; /* step over CRLF */
275 /*
276 * Resize line if necessary. Keep in mind that we add LF and \0 later.
277 */
278 if (put_chars >= (int) *max_line_len - 2) {
279 if (*max_line_len == 0)
280 *max_line_len = LEN;
281 else
282 *max_line_len <<= 1;
283 *line = my_realloc(*line, *max_line_len);
284 }
285 (*line)[put_chars++] = c;
286 }
287 if (c == '\n') {
288 /* Look for CRLF spread over two lines. */
289 if (put_chars && (*line)[put_chars - 1] == '\r')
290 --put_chars;
291 /*
292 * FIXME: Adding a newline may be not correct. At least it may
293 * be not what the author of that article intended.
294 * Unfortunately, a newline is expected at the end of a line by
295 * some other code in cook.c and even those functions invoking
296 * this one rely on it.
297 */
298 (*line)[put_chars++] = '\n';
299 set_rest(rest, ptr);
300 } else /* c == 0 */
301 /* rest is now empty */
302 FreeAndNull(*rest);
303
304 (*line)[put_chars] = '\0'; /* don't count the terminating NULL! */
305 return put_chars;
306}
307
308
309/*
310 * Read a logical base64 encoded line into the specified line buffer.
311 * Logical lines can be split over several physical base64 encoded lines and
312 * a single physical base64 encoded line can contain several logical lines.
313 * This function keeps track of all these cases and always copies only one
314 * decoded line to the line buffer.
315 *
316 * Side effects: resizes line if necessary, adjusts max_line_len
317 * accordingly.
318 *
319 * This function returns the number of physical lines read or a negative
320 * value on error.
321 */
322int
324 FILE *file,
325 char **line,
326 size_t *max_line_len,
327 const int max_lines_to_read,
328 char **rest)
329{
330 char *buf2; /* holds the entire decoded line */
331 char *buf; /* holds the entire encoded line */
332 int count;
333 int lines_read = 0;
334 int put_chars;
335
336 /*
337 * First of all, catch everything that is left over from the last decoding.
338 * If there's a newline in that rest, copy everything up to and including
339 * that newline in the expected buffer, adjust rest and return. If there's
340 * no newline in the rest, copy all of it (modulo length of the buffer) to
341 * the expected buffer and continue as if there was no rest.
342 */
343 put_chars = put_rest(rest, line, max_line_len, 0);
344 if (put_chars && ((*line)[put_chars - 1] == '\n'))
345 return 0; /* we didn't read any new lines but filled the line */
346
347 /*
348 * At this point, either there was no rest or there was no newline in the
349 * rest. In any case, we need to read further encoded lines and decode
350 * them until we find a newline or there are no more (encoded or physical)
351 * lines in this part of the posting. To be sure, now allocate memory for
352 * the output if it wasn't already done.
353 */
354 if (*max_line_len == 0) {
355 *max_line_len = LEN;
356 *line = my_malloc(*max_line_len);
357 }
358
359 /*
360 * max_lines_to_read==0 occurs at end of an encoded part and if there was
361 * no trailing newline in the encoded text. So we put one there and exit.
362 * FIXME: Adding a newline may be not correct. At least it may be not
363 * what the author of that article intended. Unfortunately, a newline is
364 * expected at the end of a line by some other code in cook.c.
365 */
366 if (max_lines_to_read <= 0) {
367 if (put_chars) {
368 (*line)[put_chars++] = '\n';
369 (*line)[put_chars] = '\0';
370 }
371 return max_lines_to_read;
372 }
373 /*
374 * Ok, now read a new line from the original article.
375 */
376 do {
377 if ((buf = tin_fgets(file, FALSE)) == NULL) {
378 /*
379 * Premature end of file (or file error), leave loop. To prevent
380 * re-invoking of this function, set the numbers of read lines to
381 * the expected maximum that should be read at most.
382 *
383 * FIXME: Adding a newline may be not correct. At least it may be
384 * not what the author of that article intended. Unfortunately, a
385 * newline is expected at the end of a line by some other code in
386 * cook.c.
387 */
388 if (put_chars > (int) *max_line_len - 2) {
389 *max_line_len <<= 1;
390 *line = my_realloc(*line, *max_line_len);
391 }
392 (*line)[put_chars++] = '\n';
393 (*line)[put_chars] = '\0';
394 return max_lines_to_read;
395 }
396 lines_read++;
397 buf2 = my_malloc(strlen(buf) + 1); /* decoded string is always shorter than encoded string, so this is safe */
398 count = mmdecode(buf, 'b', '\0', buf2);
399 buf2[count] = '\0';
400 FreeIfNeeded(*rest);
401 *rest = buf2;
402 put_chars = put_rest(rest, line, max_line_len, put_chars);
403 if (put_chars && ((*line)[put_chars - 1] == '\n')) /* end of logical line reached */
404 return lines_read;
405 } while (lines_read < max_lines_to_read);
406 /*
407 * FIXME: Adding a newline may be not correct. At least it may be
408 * not what the author of that article intended. Unfortunately, a
409 * newline is expected at the end of a line by some other code in
410 * cook.c.
411 */
412 if (put_chars > (int) *max_line_len - 2) {
413 *max_line_len <<= 1;
414 *line = my_realloc(*line, *max_line_len);
415 }
416 if ((put_chars == 0) || ((*line)[put_chars - 1] != '\n'))
417 (*line)[put_chars++] = '\n';
418 (*line)[put_chars] = '\0';
419 return lines_read;
420}
421
422
423/*
424 * Read a logical quoted-printable encoded line into the specified line
425 * buffer. Quoted-printable lines can be split over several physical lines,
426 * so this function collects all affected lines, concatenates and decodes
427 * them.
428 *
429 * Side effects: resizes line if necessary, adjusts max_line_len
430 * accordingly.
431 *
432 * This function returns the number of physical lines read or a negative
433 * value on error.
434 */
435int
437 FILE *file,
438 char **line, /* where to copy the decoded line */
439 size_t *max_line_len, /* (maximum) line length */
440 const int max_lines_to_read) /* don't read more physical lines than told here */
441{
442 char *buf, *buf2;
443 char *ptr;
444 char c;
445 int buflen = LEN;
446 int count;
447 int lines_read = 0;
448 size_t chars_to_add;
449
450 buf = my_malloc(buflen); /* initial internal line buffer */
451 *buf = '\0';
452 do {
453 if ((buf2 = tin_fgets(file, FALSE)) == NULL) {
454 /*
455 * Premature end of file (or file error, leave loop. To prevent
456 * re-invocation of this function, set the numbers of read lines
457 * to the expected maximum that should be read at most.
458 */
459 lines_read = max_lines_to_read;
460 break;
461 }
462 lines_read++;
463 if ((chars_to_add = strlen(buf2)) == 0) /* Empty line, leave loop. */
464 break;
465
466 /*
467 * Strip trailing white space at the end of the line.
468 * See RFC 2045, section 6.7, #3
469 */
470 c = buf2[chars_to_add - 1];
471 while ((chars_to_add > 0) && ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'))) {
472 --chars_to_add;
473 c = (chars_to_add > 0 ? buf2[chars_to_add - 1] : '\0');
474 }
475
476 /*
477 * '=' at the end of a line indicates a soft break meaning
478 * that the following physical line "belongs" to this one.
479 * (See RFC 2045, section 6.7, #5)
480 *
481 * Skip that equal sign now; since c holds this char, the
482 * loop is not left but the next line is read and concatenated
483 * with this one while the '=' is overwritten.
484 */
485 if (c == '=') /* c is 0 when chars_to_add is 0 so this is safe */
486 buf2[--chars_to_add] = '\0';
487
488 /*
489 * Join physical lines to a logical one; keep in mind that a LF is
490 * added afterwards.
491 */
492 if (chars_to_add > ((size_t) buflen - strlen(buf) - 2)) {
493 buflen <<= 1;
494 buf = my_realloc(buf, buflen);
495 }
496 strncat(buf, buf2, (size_t) (buflen - 2));
497 } while ((c == '=') && (lines_read < max_lines_to_read));
498 /*
499 * re-add newline and NULL termination at end of line
500 * FIXME: Adding a newline may be not correct. At least it may be not
501 * what the author of that article intended. Unfortunately, a newline is
502 * expected at the end of a line by some other code in cook.c.
503 */
504 strcat(buf, "\n");
505
506 /*
507 * Now decode complete (logical) line from buf to buf2 and copy it to the
508 * buffer where the invoking function expects it. Don't decode directly
509 * to the buffer of the other function to prevent buffer overruns and to
510 * decide if the encoding was ok.
511 */
512 buf2 = my_malloc(strlen(buf) + 1); /* Don't use realloc here, tin_fgets relies on its internal state! */
513 count = mmdecode(buf, 'q', '\0', buf2);
514
515 if (count >= 0) {
516 buf2[count] = '\0';
517 ptr = buf2;
518 } else /* error in encoding: copy raw line */
519 ptr = buf;
520
521 if (*max_line_len < strlen(ptr) + 1) {
522 *max_line_len = strlen(ptr) + 1;
523 *line = my_realloc(*line, *max_line_len);
524 }
525 strncpy(*line, ptr, *max_line_len);
526 (*line)[*max_line_len - 1] = '\0'; /* be sure to terminate string */
527 free(buf);
528 free(buf2);
529 return lines_read;
530}
#define FALSE
Definition: bool.h:70
const char base64_alphabet[64]
Definition: rfc2047.c:73
static char buf[16]
Definition: langinfo.c:50
int mmdecode(const char *what, int encoding, int delimiter, char *where)
Definition: rfc2047.c:147
char * my_strdup(const char *str)
Definition: string.c:139
char * tin_fgets(FILE *fp, t_bool header)
Definition: read.c:317
static int offset
Definition: read.c:62
static int put_rest(char **rest, char **line, size_t *max_line_len, const int offset)
Definition: rfc2045.c:254
static void set_rest(char **rest, const char *ptr)
Definition: rfc2045.c:227
void rfc1521_encode(char *line, FILE *f, int e)
Definition: rfc2045.c:75
static unsigned char bin2hex(unsigned int x)
Definition: rfc2045.c:54
#define HI4BITS(c)
Definition: rfc2045.c:63
#define LO4BITS(c)
Definition: rfc2045.c:64
int read_decoded_qp_line(FILE *file, char **line, size_t *max_line_len, const int max_lines_to_read)
Definition: rfc2045.c:436
int read_decoded_base64_line(FILE *file, char **line, size_t *max_line_len, const int max_lines_to_read, char **rest)
Definition: rfc2045.c:323
#define LEN
Definition: tin.h:860
#define STRCPY(dst, src)
Definition: tin.h:820
#define my_malloc(size)
Definition: tin.h:2245
#define FreeIfNeeded(p)
Definition: tin.h:2252
#define FreeAndNull(p)
Definition: tin.h:2253
#define my_realloc(ptr, size)
Definition: tin.h:2247
#define EIGHT_BIT(ptr)
Definition: tin.h:2267
#define is_EIGHT_BIT(p)
Definition: tin.h:2268