"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.4.1/src/rfc2045.c" (12 Oct 2016, 15181 Bytes) of archive /linux/misc/tin-2.4.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "rfc2045.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.4.0_vs_2.4.1.

    1 /*
    2  *  Project   : tin - a Usenet reader
    3  *  Module    : rfc2045.c
    4  *  Author    : Chris Blum <chris@resolution.de>
    5  *  Created   : 1995-09-01
    6  *  Updated   : 2015-08-24
    7  *  Notes     : RFC 2045/2047 encoding
    8  *
    9  * Copyright (c) 1995-2017 Chris Blum <chris@resolution.de>
   10  * All rights reserved.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. The name of the author may not be used to endorse or promote
   21  *    products derived from this software without specific prior written
   22  *    permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
   25  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
   26  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
   28  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
   30  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   31  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
   32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   33  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   34  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   35  */
   36 
   37 
   38 #ifndef TIN_H
   39 #   include "tin.h"
   40 #endif /* !TIN_H */
   41 
   42 /*
   43  * local prototypes
   44  */
   45 static int put_rest(char **rest, char **line, size_t *max_line_len, const int offset);
   46 static unsigned char bin2hex(unsigned int x);
   47 static void set_rest(char **rest, const char *ptr);
   48 
   49 
   50 static unsigned char
   51 bin2hex(
   52     unsigned int x)
   53 {
   54     if (x < 10)
   55         return x + '0';
   56     return x - 10 + 'A';
   57 }
   58 
   59 
   60 #define HI4BITS(c) ((unsigned char) (*EIGHT_BIT(c) >> 4))
   61 #define LO4BITS(c) ((unsigned char) (*c & 0xf))
   62 
   63 /*
   64  * A MIME replacement for fputs. e can be 'b' for base64, 'q' for
   65  * quoted-printable, or 8 (default) for 8bit. Long lines get broken in
   66  * encoding modes. If line is the null pointer, flush internal buffers.
   67  * NOTE: Use only with text encodings, because line feed characters (0x0A)
   68  *       will be encoded as CRLF line endings when using base64! This will
   69  *       certainly break any binary format ...
   70  */
   71 void
   72 rfc1521_encode(
   73     char *line,
   74     FILE *f,
   75     int e)
   76 {
   77     int i;
   78     static char *b = NULL;  /* they must be static for base64 */
   79     static char buffer[80];
   80     static int bits = 0;
   81     static int xpos = 0;
   82     static unsigned long pattern = 0;
   83 
   84     if (e == 'b') {
   85         if (!b) {
   86             b = buffer;
   87             *buffer = '\0';
   88         }
   89         if (!line) {        /* flush */
   90             if (bits) {
   91                 if (xpos >= 73) {
   92                     *b++ = '\n';
   93                     *b = 0;
   94                     fputs(buffer, f);
   95                     b = buffer;
   96                     xpos = 0;
   97                 }
   98                 pattern <<= 24 - bits;
   99                 for (i = 0; i < 4; i++) {
  100                     if (bits >= 0) {
  101                         *b++ = base64_alphabet[(pattern & 0xfc0000) >> 18];
  102                         pattern <<= 6;
  103                         bits -= 6;
  104                     } else
  105                         *b++ = '=';
  106                     xpos++;
  107                 }
  108                 pattern = 0;
  109                 bits = 0;
  110             }
  111             if (xpos) {
  112                 *b = 0;
  113                 fputs(buffer, f);
  114                 xpos = 0;
  115             }
  116             b = NULL;
  117         } else {
  118             char *line_crlf = line;
  119             size_t len = strlen(line);
  120             char tmpbuf[2050]; /* FIXME: this is sizeof(buffer)+2 from rfc15211522_encode() */
  121 
  122             /*
  123              * base64 requires CRLF line endings in text types
  124              * convert LF to CRLF if not CRLF already (Windows?)
  125              */
  126             if ((len > 0) && (line[len - 1] == '\n') &&
  127                     ((len == 1) || (line[len - 2] != '\r'))) {
  128                 STRCPY(tmpbuf, line);
  129                 line_crlf = tmpbuf;
  130                 line_crlf[len - 1] = '\r';
  131                 line_crlf[len] = '\n';
  132                 line_crlf[len + 1] = '\0';
  133             }
  134 
  135             while (*line_crlf) {
  136                 pattern <<= 8;
  137                 pattern |= *EIGHT_BIT(line_crlf)++;
  138                 bits += 8;
  139                 if (bits >= 24) {
  140                     if (xpos >= 73) {
  141                         *b++ = '\n';
  142                         *b = 0;
  143                         b = buffer;
  144                         xpos = 0;
  145                         fputs(buffer, f);
  146                     }
  147                     for (i = 0; i < 4; i++) {
  148                         *b++ = base64_alphabet[(pattern >> (bits - 6)) & 0x3f];
  149                         xpos++;
  150                         bits -= 6;
  151                     }
  152                     pattern = 0;
  153                 }
  154             }
  155         }
  156     } else if (e == 'q') {
  157         if (!line) {
  158             /*
  159              * we don't really flush anything in qp mode, just set
  160              * xpos to 0 in case the last line wasn't terminated by
  161              * \n.
  162              */
  163             xpos = 0;
  164             b = NULL;
  165             return;
  166         }
  167         b = buffer;
  168         while (*line) {
  169             if (isspace((unsigned char) *line) && *line != '\n') {
  170                 char *l = line + 1;
  171 
  172                 while (*l) {
  173                     if (!isspace((unsigned char) *l)) {     /* it's not trailing whitespace, no encoding needed */
  174                         *b++ = *line++;
  175                         xpos++;
  176                         break;
  177                     }
  178                     l++;
  179                 }
  180                 if (!*l) {      /* trailing whitespace must be encoded */
  181                     *b++ = '=';
  182                     *b++ = bin2hex(HI4BITS(line));
  183                     *b++ = bin2hex(LO4BITS(line));
  184                     xpos += 3;
  185                     line++;
  186                 }
  187             } else if ((!is_EIGHT_BIT(line) && *line != '=')
  188                           || (*line == '\n')) {
  189                 *b++ = *line++;
  190                 xpos++;
  191                 if (*(line - 1) == '\n')
  192                     break;
  193             } else {
  194                 *b++ = '=';
  195                 *b++ = bin2hex(HI4BITS(line));
  196                 *b++ = bin2hex(LO4BITS(line));
  197                 xpos += 3;
  198                 line++;
  199             }
  200             if (xpos > 72 && *line != '\n') {   /* 72 +3 [worst case] + equal sign = 76 :-) */
  201                 *b++ = '=';     /* break long lines with a 'soft line break' */
  202                 *b++ = '\n';
  203                 *b++ = '\0';
  204                 fputs(buffer, f);
  205                 b = buffer;
  206                 xpos = 0;
  207             }
  208         }
  209         *b = 0;
  210         if (b != buffer)
  211             fputs(buffer, f);
  212         if (b != buffer && b[-1] == '\n')
  213             xpos = 0;
  214     } else if (line)
  215         fputs(line, f);
  216 }
  217 
  218 
  219 /*
  220  * Set everything in ptr as the rest of a physical line to be processed
  221  * later.
  222  */
  223 static void
  224 set_rest(
  225     char **rest,
  226     const char *ptr)
  227 {
  228     char *old_rest = *rest;
  229 
  230     if (ptr == NULL || strlen(ptr) == 0) {
  231         FreeAndNull(*rest);
  232         return;
  233     }
  234     *rest = my_strdup(ptr);
  235     FreeIfNeeded(old_rest);
  236 }
  237 
  238 
  239 /*
  240  * Copy things that were left over from the last decoding into the new line.
  241  * If there's a newline in the rest, copy everything up to and including that
  242  * newline into the expected buffer, adjust rest and return. If there's no
  243  * newline in the rest, copy all of it to the expected buffer and return.
  244  *
  245  * Side effects: resizes line if necessary, adjusts max_line_len
  246  * accordingly.
  247  *
  248  * This function returns the number of characters written to the line buffer.
  249  */
  250 static int
  251 put_rest(
  252     char **rest,
  253     char **line,
  254     size_t *max_line_len,
  255     const int offset)
  256 {
  257     char *my_rest = *rest;
  258     char *ptr;
  259     char c;
  260     int put_chars = offset;
  261 
  262     if ((ptr = my_rest) == NULL)
  263         return put_chars;
  264     if (strlen(my_rest) == 0) {
  265         FreeAndNull(*rest);
  266         return put_chars;
  267     }
  268 
  269     while ((c = *ptr++) && (c != '\n')) {
  270         if ((c == '\r') && (*ptr == '\n'))
  271             continue;   /* step over CRLF */
  272         /*
  273          * Resize line if necessary. Keep in mind that we add LF and \0 later.
  274          */
  275         if (put_chars >= (int) *max_line_len - 2) {
  276             if (*max_line_len == 0)
  277                 *max_line_len = LEN;
  278             else
  279                 *max_line_len <<= 1;
  280             *line = my_realloc(*line, *max_line_len);
  281         }
  282         (*line)[put_chars++] = c;
  283     }
  284     if (c == '\n') {
  285         /* Look for CRLF spread over two lines. */
  286         if (put_chars && (*line)[put_chars -1] == '\r')
  287             --put_chars;
  288         /*
  289          * FIXME: Adding a newline may be not correct. At least it may
  290          * be not what the author of that article intended.
  291          * Unfortunately, a newline is expected at the end of a line by
  292          * some other code in cook.c and even those functions invoking
  293          * this one rely on it.
  294          */
  295         (*line)[put_chars++] = '\n';
  296         set_rest(rest, ptr);
  297     } else /* c == 0 */
  298         /* rest is now empty */
  299         FreeAndNull(*rest);
  300 
  301     (*line)[put_chars] = '\0';  /* don't count the termining NULL! */
  302     return put_chars;
  303 }
  304 
  305 
  306 /*
  307  * Read a logical base64 encoded line into the specified line buffer.
  308  * Logical lines can be split over several physical base64 encoded lines and
  309  * a single physical base64 encoded line can contain several logical lines.
  310  * This function keeps track of all these cases and always copies only one
  311  * decoded line to the line buffer.
  312  *
  313  * Side effects: resizes line if necessary, adjusts max_line_len
  314  * accordingly.
  315  *
  316  * This function returns the number of physical lines read or a negative
  317  * value on error.
  318  */
  319 int
  320 read_decoded_base64_line(
  321     FILE *file,
  322     char **line,
  323     size_t *max_line_len,
  324     const int max_lines_to_read,
  325     char **rest)
  326 {
  327     char *buf2; /* holds the entire decoded line */
  328     char *buf;  /* holds the entire encoded line */
  329     int count;
  330     int lines_read = 0;
  331     int put_chars;
  332 
  333     /*
  334      * First of all, catch everything that is left over from the last decoding.
  335      * If there's a newline in that rest, copy everything up to and including
  336      * that newline in the expected buffer, adjust rest and return. If there's
  337      * no newline in the rest, copy all of it (modulo length of the buffer) to
  338      * the expected buffer and continue as if there was no rest.
  339      */
  340     put_chars = put_rest(rest, line, max_line_len, 0);
  341     if (put_chars && ((*line)[put_chars - 1] == '\n'))
  342         return 0;   /* we didn't read any new lines but filled the line */
  343 
  344     /*
  345      * At this point, either there was no rest or there was no newline in the
  346      * rest. In any case, we need to read further encoded lines and decode
  347      * them until we find a newline or there are no more (encoded or physical)
  348      * lines in this part of the posting. To be sure, now allocate memory for
  349      * the output if it wasn't already done.
  350      */
  351     if (*max_line_len == 0) {
  352         *max_line_len = LEN;
  353         *line = my_malloc(*max_line_len);
  354     }
  355 
  356     /*
  357      * max_lines_to_read==0 occurs at end of an encoded part and if there was
  358      * no trailing newline in the encoded text. So we put one there and exit.
  359      * FIXME: Adding a newline may be not correct. At least it may be not
  360      * what the author of that article intended. Unfortunately, a newline is
  361      * expected at the end of a line by some other code in cook.c.
  362      */
  363     if (max_lines_to_read <= 0) {
  364         if (put_chars) {
  365             (*line)[put_chars++] = '\n';
  366             (*line)[put_chars] = '\0';
  367         }
  368         return max_lines_to_read;
  369     }
  370     /*
  371      * Ok, now read a new line from the original article.
  372      */
  373     do {
  374         if ((buf = tin_fgets(file, FALSE)) == NULL) {
  375             /*
  376              * Premature end of file (or file error), leave loop. To prevent
  377              * re-invoking of this function, set the numbers of read lines to
  378              * the expected maximum that should be read at most.
  379              *
  380              * FIXME: Adding a newline may be not correct. At least it may be
  381              * not what the author of that article intended. Unfortunately, a
  382              * newline is expected at the end of a line by some other code in
  383              * cook.c.
  384              */
  385             if (put_chars > (int) *max_line_len - 2) {
  386                 *max_line_len <<= 1;
  387                 *line = my_realloc(*line, *max_line_len);
  388             }
  389             (*line)[put_chars++] = '\n';
  390             (*line)[put_chars] = '\0';
  391             return max_lines_to_read;
  392         }
  393         lines_read++;
  394         buf2 = my_malloc(strlen(buf) + 1); /* decoded string is always shorter than encoded string, so this is safe */
  395         count = mmdecode(buf, 'b', '\0', buf2);
  396         buf2[count] = '\0';
  397         FreeIfNeeded(*rest);
  398         *rest = buf2;
  399         put_chars = put_rest(rest, line, max_line_len, put_chars);
  400         if (put_chars && ((*line)[put_chars - 1] == '\n')) /* end of logical line reached */
  401             return lines_read;
  402     } while (lines_read < max_lines_to_read);
  403     /*
  404      * FIXME: Adding a newline may be not correct. At least it may be
  405      * not what the author of that article intended. Unfortunately, a
  406      * newline is expected at the end of a line by some other code in
  407      * cook.c.
  408      */
  409     if (put_chars > (int) *max_line_len - 2) {
  410         *max_line_len <<= 1;
  411         *line = my_realloc(*line, *max_line_len);
  412     }
  413     if ((0 == put_chars) || ('\n' != (*line)[put_chars - 1]))
  414             (*line)[put_chars++] = '\n';
  415     (*line)[put_chars] = '\0';
  416     return lines_read;
  417 }
  418 
  419 
  420 /*
  421  * Read a logical quoted-printable encoded line into the specified line
  422  * buffer. Quoted-printable lines can be split over several physical lines,
  423  * so this function collects all affected lines, concatenates and decodes
  424  * them.
  425  *
  426  * Side effects: resizes line if necessary, adjusts max_line_len
  427  * accordingly.
  428  *
  429  * This function returns the number of physical lines read or a negative
  430  * value on error.
  431  */
  432 int
  433 read_decoded_qp_line(
  434     FILE *file,
  435     char **line,                    /* where to copy the decoded line */
  436     size_t *max_line_len,               /* (maximum) line length */
  437     const int max_lines_to_read)    /* don't read more physical lines than told here */
  438 {
  439     char *buf, *buf2;
  440     char *ptr;
  441     char c;
  442     int buflen = LEN;
  443     int count;
  444     int lines_read = 0;
  445     size_t chars_to_add;
  446 
  447     buf = my_malloc(buflen); /* initial internal line buffer */
  448     *buf = '\0';
  449     do {
  450         if ((buf2 = tin_fgets(file, FALSE)) == NULL) {
  451             /*
  452              * Premature end of file (or file error, leave loop. To prevent
  453              * re-invocation of this function, set the numbers of read lines
  454              * to the expected maximum that should be read at most.
  455              */
  456             lines_read = max_lines_to_read;
  457             break;
  458         }
  459         lines_read++;
  460         if ((chars_to_add = strlen(buf2)) == 0) /* Empty line, leave loop. */
  461             break;
  462 
  463         /*
  464          * Strip trailing white space at the end of the line.
  465          * See RFC 2045, section 6.7, #3
  466          */
  467         c = buf2[chars_to_add - 1];
  468         while ((chars_to_add > 0) && ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'))) {
  469             --chars_to_add;
  470             c = (chars_to_add > 0 ? buf2[chars_to_add - 1] : '\0');
  471         }
  472 
  473         /*
  474          * '=' at the end of a line indicates a soft break meaning
  475          * that the following physical line "belongs" to this one.
  476          * (See RFC 2045, section 6.7, #5)
  477          *
  478          * Skip that equal sign now; since c holds this char, the
  479          * loop is not left but the next line is read and concatenated
  480          * with this one while the '=' is overwritten.
  481          */
  482         if (c == '=') /* c is 0 when chars_to_add is 0 so this is safe */
  483             buf2[--chars_to_add] = '\0';
  484 
  485         /*
  486          * Join physical lines to a logical one; keep in mind that a LF is
  487          * added afterwards.
  488          */
  489         if (chars_to_add > buflen - strlen(buf) - 2) {
  490             buflen <<= 1;
  491             buf = my_realloc(buf, buflen);
  492         }
  493         strncat(buf, buf2, buflen);
  494     } while ((c == '=') && (lines_read < max_lines_to_read));
  495     /*
  496      * re-add newline and NULL termination at end of line
  497      * FIXME: Adding a newline may be not correct. At least it may be not
  498      * what the author of that article intended. Unfortunately, a newline is
  499      * expected at the end of a line by some other code in cook.c.
  500      */
  501     strcat(buf, "\n");
  502 
  503     /*
  504      * Now decode complete (logical) line from buf to buf2 and copy it to the
  505      * buffer where the invoking function expects it. Don't decode directly
  506      * to the buffer of the other function to prevent buffer overruns and to
  507      * decide if the encoding was ok.
  508      */
  509     buf2 = my_malloc(strlen(buf) + 1); /* Don't use realloc here, tin_fgets relies on its internal state! */
  510     count = mmdecode(buf, 'q', '\0', buf2);
  511 
  512     if (count >= 0) {
  513         buf2[count] = '\0';
  514         ptr = buf2;
  515     } else  /* error in encoding: copy raw line */
  516         ptr = buf;
  517 
  518     if (*max_line_len < strlen(ptr) + 1) {
  519         *max_line_len = strlen(ptr) + 1;
  520         *line = my_realloc(*line, *max_line_len);
  521     }
  522     strncpy(*line, ptr, *max_line_len);
  523     (*line)[*max_line_len - 1] = '\0'; /* be sure to terminate string */
  524     free(buf);
  525     free(buf2);
  526     return lines_read;
  527 }