"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.4.4/src/rfc2045.c" (20 Nov 2019, 15301 Bytes) of package /linux/misc/tin-2.4.4.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "rfc2045.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.4.3_vs_2.4.4.

    1 /*
    2  *  Project   : tin - a Usenet reader
    3  *  Module    : rfc2045.c
    4  *  Author    : Chris Blum <chris@resolution.de>
    5  *  Created   : 1995-09-01
    6  *  Updated   : 2018-11-22
    7  *  Notes     : RFC 2045/2047 encoding
    8  *
    9  * Copyright (c) 1995-2020 Chris Blum <chris@resolution.de>
   10  * All rights reserved.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  *
   16  * 1. Redistributions of source code must retain the above copyright notice,
   17  *    this list of conditions and the following disclaimer.
   18  *
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  *
   23  * 3. Neither the name of the copyright holder nor the names of its
   24  *    contributors may be used to endorse or promote products derived from
   25  *    this software without specific prior written permission.
   26  *
   27  * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
   31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 
   40 
   41 #ifndef TIN_H
   42 #   include "tin.h"
   43 #endif /* !TIN_H */
   44 
   45 /*
   46  * local prototypes
   47  */
   48 static int put_rest(char **rest, char **line, size_t *max_line_len, const int offset);
   49 static unsigned char bin2hex(unsigned int x);
   50 static void set_rest(char **rest, const char *ptr);
   51 
   52 
   53 static unsigned char
   54 bin2hex(
   55     unsigned int x)
   56 {
   57     if (x < 10)
   58         return x + '0';
   59     return x - 10 + 'A';
   60 }
   61 
   62 
   63 #define HI4BITS(c) ((unsigned char) (*EIGHT_BIT(c) >> 4))
   64 #define LO4BITS(c) ((unsigned char) (*c & 0xf))
   65 
   66 /*
   67  * A MIME replacement for fputs. e can be 'b' for base64, 'q' for
   68  * quoted-printable, or 8 (default) for 8bit. Long lines get broken in
   69  * encoding modes. If line is the null pointer, flush internal buffers.
   70  * NOTE: Use only with text encodings, because line feed characters (0x0A)
   71  *       will be encoded as CRLF line endings when using base64! This will
   72  *       certainly break any binary format ...
   73  */
   74 void
   75 rfc1521_encode(
   76     char *line,
   77     FILE *f,
   78     int e)
   79 {
   80     int i;
   81     static char *b = NULL;  /* they must be static for base64 */
   82     static char buffer[80];
   83     static int bits = 0;
   84     static int xpos = 0;
   85     static unsigned long pattern = 0;
   86 
   87     if (e == 'b') {
   88         if (!b) {
   89             b = buffer;
   90             *buffer = '\0';
   91         }
   92         if (!line) {        /* flush */
   93             if (bits) {
   94                 if (xpos >= 73) {
   95                     *b++ = '\n';
   96                     *b = '\0';
   97                     fputs(buffer, f);
   98                     b = buffer;
   99                     xpos = 0;
  100                 }
  101                 pattern <<= 24 - bits;
  102                 for (i = 0; i < 4; i++) {
  103                     if (bits >= 0) {
  104                         *b++ = base64_alphabet[(pattern & 0xfc0000) >> 18];
  105                         pattern <<= 6;
  106                         bits -= 6;
  107                     } else
  108                         *b++ = '=';
  109                     xpos++;
  110                 }
  111                 pattern = 0;
  112                 bits = 0;
  113             }
  114             if (xpos) {
  115                 *b = '\0';
  116                 fputs(buffer, f);
  117                 xpos = 0;
  118             }
  119             b = NULL;
  120         } else {
  121             char *line_crlf = line;
  122             size_t len = strlen(line);
  123             char tmpbuf[2050]; /* FIXME: this is sizeof(buffer)+2 from rfc15211522_encode() */
  124 
  125             /*
  126              * base64 requires CRLF line endings in text types
  127              * convert LF to CRLF if not CRLF already (Windows?)
  128              */
  129             if ((len > 0) && (line[len - 1] == '\n') &&
  130                     ((len == 1) || (line[len - 2] != '\r'))) {
  131                 STRCPY(tmpbuf, line);
  132                 line_crlf = tmpbuf;
  133                 line_crlf[len - 1] = '\r';
  134                 line_crlf[len] = '\n';
  135                 line_crlf[len + 1] = '\0';
  136             }
  137 
  138             while (*line_crlf) {
  139                 pattern <<= 8;
  140                 pattern |= *EIGHT_BIT(line_crlf)++;
  141                 bits += 8;
  142                 if (bits >= 24) {
  143                     if (xpos >= 73) {
  144                         *b++ = '\n';
  145                         *b = '\0';
  146                         b = buffer;
  147                         xpos = 0;
  148                         fputs(buffer, f);
  149                     }
  150                     for (i = 0; i < 4; i++) {
  151                         *b++ = base64_alphabet[(pattern >> (bits - 6)) & 0x3f];
  152                         xpos++;
  153                         bits -= 6;
  154                     }
  155                     pattern = 0;
  156                 }
  157             }
  158         }
  159     } else if (e == 'q') {
  160         if (!line) {
  161             /*
  162              * we don't really flush anything in qp mode, just set
  163              * xpos to 0 in case the last line wasn't terminated by
  164              * \n.
  165              */
  166             xpos = 0;
  167             b = NULL;
  168             return;
  169         }
  170         b = buffer;
  171         while (*line) {
  172             if (isspace((unsigned char) *line) && *line != '\n') {
  173                 char *l = line + 1;
  174 
  175                 while (*l) {
  176                     if (!isspace((unsigned char) *l)) {     /* it's not trailing whitespace, no encoding needed */
  177                         *b++ = *line++;
  178                         xpos++;
  179                         break;
  180                     }
  181                     l++;
  182                 }
  183                 if (!*l) {      /* trailing whitespace must be encoded */
  184                     *b++ = '=';
  185                     *b++ = bin2hex(HI4BITS(line));
  186                     *b++ = bin2hex(LO4BITS(line));
  187                     xpos += 3;
  188                     line++;
  189                 }
  190             } else if ((!is_EIGHT_BIT(line) && *line != '=')
  191                           || (*line == '\n')) {
  192                 *b++ = *line++;
  193                 xpos++;
  194                 if (*(line - 1) == '\n')
  195                     break;
  196             } else {
  197                 *b++ = '=';
  198                 *b++ = bin2hex(HI4BITS(line));
  199                 *b++ = bin2hex(LO4BITS(line));
  200                 xpos += 3;
  201                 line++;
  202             }
  203             if (xpos > 72 && *line != '\n') {   /* 72 +3 [worst case] + equal sign = 76 :-) */
  204                 *b++ = '=';     /* break long lines with a 'soft line break' */
  205                 *b++ = '\n';
  206                 *b++ = '\0';
  207                 fputs(buffer, f);
  208                 b = buffer;
  209                 xpos = 0;
  210             }
  211         }
  212         *b = '\0';
  213         if (b != buffer)
  214             fputs(buffer, f);
  215         if (b != buffer && b[-1] == '\n')
  216             xpos = 0;
  217     } else if (line)
  218         fputs(line, f);
  219 }
  220 
  221 
  222 /*
  223  * Set everything in ptr as the rest of a physical line to be processed
  224  * later.
  225  */
  226 static void
  227 set_rest(
  228     char **rest,
  229     const char *ptr)
  230 {
  231     char *old_rest = *rest;
  232 
  233     if (ptr == NULL || strlen(ptr) == 0) {
  234         FreeAndNull(*rest);
  235         return;
  236     }
  237     *rest = my_strdup(ptr);
  238     FreeIfNeeded(old_rest);
  239 }
  240 
  241 
  242 /*
  243  * Copy things that were left over from the last decoding into the new line.
  244  * If there's a newline in the rest, copy everything up to and including that
  245  * newline into the expected buffer, adjust rest and return. If there's no
  246  * newline in the rest, copy all of it to the expected buffer and return.
  247  *
  248  * Side effects: resizes line if necessary, adjusts max_line_len
  249  * accordingly.
  250  *
  251  * This function returns the number of characters written to the line buffer.
  252  */
  253 static int
  254 put_rest(
  255     char **rest,
  256     char **line,
  257     size_t *max_line_len,
  258     const int offset)
  259 {
  260     char *my_rest = *rest;
  261     char *ptr;
  262     char c;
  263     int put_chars = offset;
  264 
  265     if ((ptr = my_rest) == NULL)
  266         return put_chars;
  267     if (strlen(my_rest) == 0) {
  268         FreeAndNull(*rest);
  269         return put_chars;
  270     }
  271 
  272     while ((c = *ptr++) && (c != '\n')) {
  273         if ((c == '\r') && (*ptr == '\n'))
  274             continue;   /* step over CRLF */
  275         /*
  276          * Resize line if necessary. Keep in mind that we add LF and \0 later.
  277          */
  278         if (put_chars >= (int) *max_line_len - 2) {
  279             if (*max_line_len == 0)
  280                 *max_line_len = LEN;
  281             else
  282                 *max_line_len <<= 1;
  283             *line = my_realloc(*line, *max_line_len);
  284         }
  285         (*line)[put_chars++] = c;
  286     }
  287     if (c == '\n') {
  288         /* Look for CRLF spread over two lines. */
  289         if (put_chars && (*line)[put_chars -1] == '\r')
  290             --put_chars;
  291         /*
  292          * FIXME: Adding a newline may be not correct. At least it may
  293          * be not what the author of that article intended.
  294          * Unfortunately, a newline is expected at the end of a line by
  295          * some other code in cook.c and even those functions invoking
  296          * this one rely on it.
  297          */
  298         (*line)[put_chars++] = '\n';
  299         set_rest(rest, ptr);
  300     } else /* c == 0 */
  301         /* rest is now empty */
  302         FreeAndNull(*rest);
  303 
  304     (*line)[put_chars] = '\0';  /* don't count the terminating NULL! */
  305     return put_chars;
  306 }
  307 
  308 
  309 /*
  310  * Read a logical base64 encoded line into the specified line buffer.
  311  * Logical lines can be split over several physical base64 encoded lines and
  312  * a single physical base64 encoded line can contain several logical lines.
  313  * This function keeps track of all these cases and always copies only one
  314  * decoded line to the line buffer.
  315  *
  316  * Side effects: resizes line if necessary, adjusts max_line_len
  317  * accordingly.
  318  *
  319  * This function returns the number of physical lines read or a negative
  320  * value on error.
  321  */
  322 int
  323 read_decoded_base64_line(
  324     FILE *file,
  325     char **line,
  326     size_t *max_line_len,
  327     const int max_lines_to_read,
  328     char **rest)
  329 {
  330     char *buf2; /* holds the entire decoded line */
  331     char *buf;  /* holds the entire encoded line */
  332     int count;
  333     int lines_read = 0;
  334     int put_chars;
  335 
  336     /*
  337      * First of all, catch everything that is left over from the last decoding.
  338      * If there's a newline in that rest, copy everything up to and including
  339      * that newline in the expected buffer, adjust rest and return. If there's
  340      * no newline in the rest, copy all of it (modulo length of the buffer) to
  341      * the expected buffer and continue as if there was no rest.
  342      */
  343     put_chars = put_rest(rest, line, max_line_len, 0);
  344     if (put_chars && ((*line)[put_chars - 1] == '\n'))
  345         return 0;   /* we didn't read any new lines but filled the line */
  346 
  347     /*
  348      * At this point, either there was no rest or there was no newline in the
  349      * rest. In any case, we need to read further encoded lines and decode
  350      * them until we find a newline or there are no more (encoded or physical)
  351      * lines in this part of the posting. To be sure, now allocate memory for
  352      * the output if it wasn't already done.
  353      */
  354     if (*max_line_len == 0) {
  355         *max_line_len = LEN;
  356         *line = my_malloc(*max_line_len);
  357     }
  358 
  359     /*
  360      * max_lines_to_read==0 occurs at end of an encoded part and if there was
  361      * no trailing newline in the encoded text. So we put one there and exit.
  362      * FIXME: Adding a newline may be not correct. At least it may be not
  363      * what the author of that article intended. Unfortunately, a newline is
  364      * expected at the end of a line by some other code in cook.c.
  365      */
  366     if (max_lines_to_read <= 0) {
  367         if (put_chars) {
  368             (*line)[put_chars++] = '\n';
  369             (*line)[put_chars] = '\0';
  370         }
  371         return max_lines_to_read;
  372     }
  373     /*
  374      * Ok, now read a new line from the original article.
  375      */
  376     do {
  377         if ((buf = tin_fgets(file, FALSE)) == NULL) {
  378             /*
  379              * Premature end of file (or file error), leave loop. To prevent
  380              * re-invoking of this function, set the numbers of read lines to
  381              * the expected maximum that should be read at most.
  382              *
  383              * FIXME: Adding a newline may be not correct. At least it may be
  384              * not what the author of that article intended. Unfortunately, a
  385              * newline is expected at the end of a line by some other code in
  386              * cook.c.
  387              */
  388             if (put_chars > (int) *max_line_len - 2) {
  389                 *max_line_len <<= 1;
  390                 *line = my_realloc(*line, *max_line_len);
  391             }
  392             (*line)[put_chars++] = '\n';
  393             (*line)[put_chars] = '\0';
  394             return max_lines_to_read;
  395         }
  396         lines_read++;
  397         buf2 = my_malloc(strlen(buf) + 1); /* decoded string is always shorter than encoded string, so this is safe */
  398         count = mmdecode(buf, 'b', '\0', buf2);
  399         buf2[count] = '\0';
  400         FreeIfNeeded(*rest);
  401         *rest = buf2;
  402         put_chars = put_rest(rest, line, max_line_len, put_chars);
  403         if (put_chars && ((*line)[put_chars - 1] == '\n')) /* end of logical line reached */
  404             return lines_read;
  405     } while (lines_read < max_lines_to_read);
  406     /*
  407      * FIXME: Adding a newline may be not correct. At least it may be
  408      * not what the author of that article intended. Unfortunately, a
  409      * newline is expected at the end of a line by some other code in
  410      * cook.c.
  411      */
  412     if (put_chars > (int) *max_line_len - 2) {
  413         *max_line_len <<= 1;
  414         *line = my_realloc(*line, *max_line_len);
  415     }
  416     if ((put_chars == 0) || ((*line)[put_chars - 1] != '\n'))
  417             (*line)[put_chars++] = '\n';
  418     (*line)[put_chars] = '\0';
  419     return lines_read;
  420 }
  421 
  422 
  423 /*
  424  * Read a logical quoted-printable encoded line into the specified line
  425  * buffer. Quoted-printable lines can be split over several physical lines,
  426  * so this function collects all affected lines, concatenates and decodes
  427  * them.
  428  *
  429  * Side effects: resizes line if necessary, adjusts max_line_len
  430  * accordingly.
  431  *
  432  * This function returns the number of physical lines read or a negative
  433  * value on error.
  434  */
  435 int
  436 read_decoded_qp_line(
  437     FILE *file,
  438     char **line,                    /* where to copy the decoded line */
  439     size_t *max_line_len,               /* (maximum) line length */
  440     const int max_lines_to_read)    /* don't read more physical lines than told here */
  441 {
  442     char *buf, *buf2;
  443     char *ptr;
  444     char c;
  445     int buflen = LEN;
  446     int count;
  447     int lines_read = 0;
  448     size_t chars_to_add;
  449 
  450     buf = my_malloc(buflen); /* initial internal line buffer */
  451     *buf = '\0';
  452     do {
  453         if ((buf2 = tin_fgets(file, FALSE)) == NULL) {
  454             /*
  455              * Premature end of file (or file error, leave loop. To prevent
  456              * re-invocation of this function, set the numbers of read lines
  457              * to the expected maximum that should be read at most.
  458              */
  459             lines_read = max_lines_to_read;
  460             break;
  461         }
  462         lines_read++;
  463         if ((chars_to_add = strlen(buf2)) == 0) /* Empty line, leave loop. */
  464             break;
  465 
  466         /*
  467          * Strip trailing white space at the end of the line.
  468          * See RFC 2045, section 6.7, #3
  469          */
  470         c = buf2[chars_to_add - 1];
  471         while ((chars_to_add > 0) && ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\r'))) {
  472             --chars_to_add;
  473             c = (chars_to_add > 0 ? buf2[chars_to_add - 1] : '\0');
  474         }
  475 
  476         /*
  477          * '=' at the end of a line indicates a soft break meaning
  478          * that the following physical line "belongs" to this one.
  479          * (See RFC 2045, section 6.7, #5)
  480          *
  481          * Skip that equal sign now; since c holds this char, the
  482          * loop is not left but the next line is read and concatenated
  483          * with this one while the '=' is overwritten.
  484          */
  485         if (c == '=') /* c is 0 when chars_to_add is 0 so this is safe */
  486             buf2[--chars_to_add] = '\0';
  487 
  488         /*
  489          * Join physical lines to a logical one; keep in mind that a LF is
  490          * added afterwards.
  491          */
  492         if (chars_to_add > buflen - strlen(buf) - 2) {
  493             buflen <<= 1;
  494             buf = my_realloc(buf, buflen);
  495         }
  496         strncat(buf, buf2, buflen - 2);
  497     } while ((c == '=') && (lines_read < max_lines_to_read));
  498     /*
  499      * re-add newline and NULL termination at end of line
  500      * FIXME: Adding a newline may be not correct. At least it may be not
  501      * what the author of that article intended. Unfortunately, a newline is
  502      * expected at the end of a line by some other code in cook.c.
  503      */
  504     strcat(buf, "\n");
  505 
  506     /*
  507      * Now decode complete (logical) line from buf to buf2 and copy it to the
  508      * buffer where the invoking function expects it. Don't decode directly
  509      * to the buffer of the other function to prevent buffer overruns and to
  510      * decide if the encoding was ok.
  511      */
  512     buf2 = my_malloc(strlen(buf) + 1); /* Don't use realloc here, tin_fgets relies on its internal state! */
  513     count = mmdecode(buf, 'q', '\0', buf2);
  514 
  515     if (count >= 0) {
  516         buf2[count] = '\0';
  517         ptr = buf2;
  518     } else  /* error in encoding: copy raw line */
  519         ptr = buf;
  520 
  521     if (*max_line_len < strlen(ptr) + 1) {
  522         *max_line_len = strlen(ptr) + 1;
  523         *line = my_realloc(*line, *max_line_len);
  524     }
  525     strncpy(*line, ptr, *max_line_len);
  526     (*line)[*max_line_len - 1] = '\0'; /* be sure to terminate string */
  527     free(buf);
  528     free(buf2);
  529     return lines_read;
  530 }