"Fossies" - the Fresh Open Source Software Archive

Member "sitecopy-0.16.6/lib/neon/ne_compress.c" (26 Nov 2007, 13612 Bytes) of archive /linux/www/sitecopy-0.16.6.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ne_compress.c" see the Fossies "Dox" file reference documentation.

    1 /* 
    2    Handling of compressed HTTP responses
    3    Copyright (C) 2001-2006, Joe Orton <joe@manyfish.co.uk>
    4 
    5    This library is free software; you can redistribute it and/or
    6    modify it under the terms of the GNU Library General Public
    7    License as published by the Free Software Foundation; either
    8    version 2 of the License, or (at your option) any later version.
    9    
   10    This library is distributed in the hope that it will be useful,
   11    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13    Library General Public License for more details.
   14 
   15    You should have received a copy of the GNU Library General Public
   16    License along with this library; if not, write to the Free
   17    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
   18    MA 02111-1307, USA
   19 
   20 */
   21 
   22 #include "config.h"
   23 
   24 #ifdef HAVE_STRING_H
   25 #include <string.h>
   26 #endif
   27 #ifdef HAVE_STDLIB_H
   28 #include <stdlib.h>
   29 #endif
   30 
   31 #include "ne_request.h"
   32 #include "ne_compress.h"
   33 #include "ne_utils.h"
   34 #include "ne_internal.h"
   35 
   36 #ifdef NE_HAVE_ZLIB
   37 
   38 #include <zlib.h>
   39 
   40 /* Adds support for the 'gzip' Content-Encoding in HTTP.  gzip is a
   41  * file format which wraps the DEFLATE compression algorithm.  zlib
   42  * implements DEFLATE: we have to unwrap the gzip format (specified in
   43  * RFC1952) as it comes off the wire, and hand off chunks of data to
   44  * be inflated. */
   45 
   46 struct ne_decompress_s {
   47     ne_request *request; /* associated request. */
   48     ne_session *session; /* associated session. */
   49     /* temporary buffer for holding inflated data. */
   50     char outbuf[NE_BUFSIZ];
   51     z_stream zstr;
   52     int zstrinit; /* non-zero if zstr has been initialized */
   53 
   54     /* pass blocks back to this. */
   55     ne_block_reader reader;
   56     ne_accept_response acceptor;
   57     void *userdata;
   58 
   59     /* buffer for gzip header bytes. */
   60     unsigned char header[10];
   61     size_t hdrcount;    /* bytes in header */
   62 
   63     unsigned char footer[8];
   64     size_t footcount; /* bytes in footer. */
   65 
   66     /* CRC32 checksum: odd that zlib uses uLong for this since it is a
   67      * 64-bit integer on LP64 platforms. */
   68     uLong checksum;
   69 
   70     /* current state. */
   71     enum state {
   72     NE_Z_BEFORE_DATA, /* not received any response blocks yet. */
   73     NE_Z_PASSTHROUGH, /* response not compressed: passing through. */
   74     NE_Z_IN_HEADER, /* received a few bytes of response data, but not
   75              * got past the gzip header yet. */
   76     NE_Z_POST_HEADER, /* waiting for the end of the NUL-terminated bits. */
   77     NE_Z_INFLATING, /* inflating response bytes. */
   78     NE_Z_AFTER_DATA, /* after data; reading CRC32 & ISIZE */
   79     NE_Z_FINISHED /* stream is finished. */
   80     } state;
   81 };
   82 
   83 /* Convert 'buf' to unsigned int; 'buf' must be 'unsigned char *' */
   84 #define BUF2UINT(buf) (((buf)[3]<<24) + ((buf)[2]<<16) + ((buf)[1]<<8) + (buf)[0])
   85 
   86 #define ID1 0x1f
   87 #define ID2 0x8b
   88 
   89 #define HDR_DONE 0
   90 #define HDR_EXTENDED 1
   91 #define HDR_ERROR 2
   92 
   93 #define HDR_ID1(ctx) ((ctx)->header[0])
   94 #define HDR_ID2(ctx) ((ctx)->header[1])
   95 #define HDR_CMETH(ctx) ((ctx)->header[2])
   96 #define HDR_FLAGS(ctx) ((ctx)->header[3])
   97 #define HDR_MTIME(ctx) (BUF2UINT(&(ctx)->header[4]))
   98 #define HDR_XFLAGS(ctx) ((ctx)->header[8])
   99 #define HDR_OS(ctx) ((ctx)->header[9])
  100 
  101 /* parse_header parses the gzip header, sets the next state and returns
  102  *   HDR_DONE: all done, bytes following are raw DEFLATE data.
  103  *   HDR_EXTENDED: all done, expect a NUL-termianted string
  104  *                 before the DEFLATE data
  105  *   HDR_ERROR: invalid header, give up (session error is set).
  106  */
  107 static int parse_header(ne_decompress *ctx)
  108 {
  109     NE_DEBUG(NE_DBG_HTTP, "ID1: %d  ID2: %d, cmeth %d, flags %d\n", 
  110              HDR_ID1(ctx), HDR_ID2(ctx), HDR_CMETH(ctx), HDR_FLAGS(ctx));
  111     
  112     if (HDR_ID1(ctx) != ID1 || HDR_ID2(ctx) != ID2 || HDR_CMETH(ctx) != 8) {
  113     ne_set_error(ctx->session, "Compressed stream invalid");
  114     return HDR_ERROR;
  115     }
  116 
  117     NE_DEBUG(NE_DBG_HTTP, "mtime: %d, xflags: %d, os: %d\n",
  118          HDR_MTIME(ctx), HDR_XFLAGS(ctx), HDR_OS(ctx));
  119     
  120     /* TODO: we can only handle one NUL-terminated extensions field
  121      * currently.  Really, we should count the number of bits set, and
  122      * skip as many fields as bits set (bailing if any reserved bits
  123      * are set. */
  124     if (HDR_FLAGS(ctx) == 8) {
  125     ctx->state = NE_Z_POST_HEADER;
  126     return HDR_EXTENDED;
  127     } else if (HDR_FLAGS(ctx) != 0) {
  128     ne_set_error(ctx->session, "Compressed stream not supported");
  129     return HDR_ERROR;
  130     }
  131 
  132     NE_DEBUG(NE_DBG_HTTP, "compress: Good stream.\n");
  133     
  134     ctx->state = NE_Z_INFLATING;
  135     return HDR_DONE;
  136 }
  137 
  138 /* Process extra 'len' bytes of 'buf' which were received after the
  139  * DEFLATE data. */
  140 static int process_footer(ne_decompress *ctx, 
  141                const unsigned char *buf, size_t len)
  142 {
  143     if (len + ctx->footcount > 8) {
  144         ne_set_error(ctx->session, 
  145                      "Too many bytes (%" NE_FMT_SIZE_T ") in gzip footer",
  146                      len);
  147         return -1;
  148     } else {
  149     memcpy(ctx->footer + ctx->footcount, buf, len);
  150     ctx->footcount += len;
  151     if (ctx->footcount == 8) {
  152         uLong crc = BUF2UINT(ctx->footer) & 0xFFFFFFFF;
  153         if (crc == ctx->checksum) {
  154         ctx->state = NE_Z_FINISHED;
  155         NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum match.\n");
  156         } else {
  157         NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum mismatch: "
  158              "given %lu vs computed %lu\n", crc, ctx->checksum);
  159         ne_set_error(ctx->session, 
  160                  "Checksum invalid for compressed stream");
  161                 return -1;
  162         }
  163     }
  164     }
  165     return 0;
  166 }
  167 
  168 /* A zlib function failed with 'code'; set the session error string
  169  * appropriately. */
  170 static void set_zlib_error(ne_decompress *ctx, const char *msg, int code)
  171 {
  172     if (ctx->zstr.msg)
  173         ne_set_error(ctx->session, "%s: %s", msg, ctx->zstr.msg);
  174     else {
  175         const char *err;
  176         switch (code) {
  177         case Z_STREAM_ERROR: err = "stream error"; break;
  178         case Z_DATA_ERROR: err = "data corrupt"; break;
  179         case Z_MEM_ERROR: err = "out of memory"; break;
  180         case Z_BUF_ERROR: err = "buffer error"; break;
  181         case Z_VERSION_ERROR: err = "library version mismatch"; break;
  182         default: err = "unknown error"; break;
  183         }
  184         ne_set_error(ctx->session, _("%s: %s (code %d)"), msg, err, code);
  185     }
  186 }
  187 
  188 /* Inflate response buffer 'buf' of length 'len'. */
  189 static int do_inflate(ne_decompress *ctx, const char *buf, size_t len)
  190 {
  191     int ret;
  192 
  193     ctx->zstr.avail_in = len;
  194     ctx->zstr.next_in = (unsigned char *)buf;
  195     ctx->zstr.total_in = 0;
  196     
  197     do {
  198     ctx->zstr.avail_out = sizeof ctx->outbuf;
  199     ctx->zstr.next_out = (unsigned char *)ctx->outbuf;
  200     ctx->zstr.total_out = 0;
  201     
  202     ret = inflate(&ctx->zstr, Z_NO_FLUSH);
  203     
  204     NE_DEBUG(NE_DBG_HTTP, 
  205          "compress: inflate %d, %ld bytes out, %d remaining\n",
  206          ret, ctx->zstr.total_out, ctx->zstr.avail_in);
  207 #if 0
  208     NE_DEBUG(NE_DBG_HTTPBODY,
  209          "Inflated body block (%ld):\n[%.*s]\n", 
  210          ctx->zstr.total_out, (int)ctx->zstr.total_out, 
  211          ctx->outbuf);
  212 #endif
  213     /* update checksum. */
  214     ctx->checksum = crc32(ctx->checksum, (unsigned char *)ctx->outbuf, 
  215                   ctx->zstr.total_out);
  216 
  217     /* pass on the inflated data, if any */
  218         if (ctx->zstr.total_out > 0) {
  219             int rret = ctx->reader(ctx->userdata, ctx->outbuf,
  220                                    ctx->zstr.total_out);
  221             if (rret) return rret;
  222         }   
  223     } while (ret == Z_OK && ctx->zstr.avail_in > 0);
  224     
  225     if (ret == Z_STREAM_END) {
  226     NE_DEBUG(NE_DBG_HTTP, "compress: end of data stream, %d bytes remain.\n",
  227          ctx->zstr.avail_in);
  228     /* process the footer. */
  229     ctx->state = NE_Z_AFTER_DATA;
  230     return process_footer(ctx, ctx->zstr.next_in, ctx->zstr.avail_in);
  231     } else if (ret != Z_OK) {
  232         set_zlib_error(ctx, _("Could not inflate data"), ret);
  233         return NE_ERROR;
  234     }
  235     return 0;
  236 }
  237 
  238 /* Callback which is passed blocks of the response body. */
  239 static int gz_reader(void *ud, const char *buf, size_t len)
  240 {
  241     ne_decompress *ctx = ud;
  242     const char *zbuf;
  243     size_t count;
  244     const char *hdr;
  245 
  246     if (len == 0) {
  247         /* End of response: */
  248         switch (ctx->state) {
  249         case NE_Z_BEFORE_DATA:
  250             hdr = ne_get_response_header(ctx->request, "Content-Encoding");
  251             if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
  252                 /* response was truncated: return error. */
  253                 break;
  254             }
  255             /* else, fall through */
  256         case NE_Z_FINISHED: /* complete gzip response */
  257         case NE_Z_PASSTHROUGH: /* complete uncompressed response */
  258             return ctx->reader(ctx->userdata, buf, 0);
  259         default:
  260             /* invalid state: truncated response. */
  261             break;
  262         }
  263     /* else: truncated response, fail. */
  264     ne_set_error(ctx->session, "Compressed response was truncated");
  265     return NE_ERROR;
  266     }        
  267 
  268     switch (ctx->state) {
  269     case NE_Z_PASSTHROUGH:
  270     /* move along there. */
  271     return ctx->reader(ctx->userdata, buf, len);
  272 
  273     case NE_Z_FINISHED:
  274     /* Could argue for tolerance, and ignoring trailing content;
  275      * but it could mean something more serious. */
  276     if (len > 0) {
  277         ne_set_error(ctx->session,
  278              "Unexpected content received after compressed stream");
  279             return NE_ERROR;
  280     }
  281         break;
  282 
  283     case NE_Z_BEFORE_DATA:
  284     /* work out whether this is a compressed response or not. */
  285         hdr = ne_get_response_header(ctx->request, "Content-Encoding");
  286         if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
  287             int ret;
  288         NE_DEBUG(NE_DBG_HTTP, "compress: got gzipped stream.\n");
  289 
  290             /* inflateInit2() works here where inflateInit() doesn't. */
  291             ret = inflateInit2(&ctx->zstr, -MAX_WBITS);
  292             if (ret != Z_OK) {
  293                 set_zlib_error(ctx, _("Could not initialize zlib"), ret);
  294                 return -1;
  295             }
  296         ctx->zstrinit = 1;
  297 
  298     } else {
  299         /* No Content-Encoding header: pass it on.  TODO: we could
  300          * hack it and register the real callback now. But that
  301          * would require add_resp_body_rdr to have defined
  302          * ordering semantics etc etc */
  303         ctx->state = NE_Z_PASSTHROUGH;
  304         return ctx->reader(ctx->userdata, buf, len);
  305     }
  306 
  307     ctx->state = NE_Z_IN_HEADER;
  308     /* FALLTHROUGH */
  309 
  310     case NE_Z_IN_HEADER:
  311     /* copy as many bytes as possible into the buffer. */
  312     if (len + ctx->hdrcount > 10) {
  313         count = 10 - ctx->hdrcount;
  314     } else {
  315         count = len;
  316     }
  317     memcpy(ctx->header + ctx->hdrcount, buf, count);
  318     ctx->hdrcount += count;
  319     /* have we got the full header yet? */
  320     if (ctx->hdrcount != 10) {
  321         return 0;
  322     }
  323 
  324     buf += count;
  325     len -= count;
  326 
  327     switch (parse_header(ctx)) {
  328     case HDR_EXTENDED:
  329         if (len == 0)
  330         return 0;
  331         break;
  332         case HDR_ERROR:
  333             return NE_ERROR;
  334     case HDR_DONE:
  335         if (len > 0) {
  336         return do_inflate(ctx, buf, len);
  337         }
  338             break;
  339     }
  340 
  341     /* FALLTHROUGH */
  342 
  343     case NE_Z_POST_HEADER:
  344     /* eating the filename string. */
  345     zbuf = memchr(buf, '\0', len);
  346     if (zbuf == NULL) {
  347         /* not found it yet. */
  348         return 0;
  349     }
  350 
  351     NE_DEBUG(NE_DBG_HTTP,
  352          "compresss: skipped %" NE_FMT_SIZE_T " header bytes.\n", 
  353          zbuf - buf);
  354     /* found end of string. */
  355     len -= (1 + zbuf - buf);
  356     buf = zbuf + 1;
  357     ctx->state = NE_Z_INFLATING;
  358     if (len == 0) {
  359         /* end of string was at end of buffer. */
  360         return 0;
  361     }
  362 
  363     /* FALLTHROUGH */
  364 
  365     case NE_Z_INFLATING:
  366     return do_inflate(ctx, buf, len);
  367 
  368     case NE_Z_AFTER_DATA:
  369     return process_footer(ctx, (unsigned char *)buf, len);
  370     }
  371 
  372     return 0;
  373 }
  374 
  375 /* Prepare for a compressed response; may be called many times per
  376  * request, for auth retries etc. */
  377 static void gz_pre_send(ne_request *r, void *ud, ne_buffer *req)
  378 {
  379     ne_decompress *ctx = ud;
  380 
  381     if (ctx->request == r) {
  382         NE_DEBUG(NE_DBG_HTTP, "compress: Initialization.\n");
  383         
  384         /* (Re-)Initialize the context */
  385         ctx->state = NE_Z_BEFORE_DATA;
  386         if (ctx->zstrinit) inflateEnd(&ctx->zstr);
  387         ctx->zstrinit = 0;
  388         ctx->hdrcount = ctx->footcount = 0;
  389         ctx->checksum = crc32(0L, Z_NULL, 0);
  390     }
  391 }
  392 
  393 /* Wrapper for user-passed acceptor function. */
  394 static int gz_acceptor(void *userdata, ne_request *req, const ne_status *st)
  395 {
  396     ne_decompress *ctx = userdata;
  397     return ctx->acceptor(ctx->userdata, req, st);
  398 }
  399 
  400 /* A slightly ugly hack: the pre_send hook is scoped per-session, so
  401  * must check that the invoking request is this one, before doing
  402  * anything, and must be unregistered when the context is
  403  * destroyed. */
  404 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
  405                     ne_block_reader rdr, void *userdata)
  406 {
  407     ne_decompress *ctx = ne_calloc(sizeof *ctx);
  408 
  409     ne_add_request_header(req, "Accept-Encoding", "gzip");
  410 
  411     ne_add_response_body_reader(req, gz_acceptor, gz_reader, ctx);
  412 
  413     ctx->reader = rdr;
  414     ctx->userdata = userdata;
  415     ctx->session = ne_get_session(req);
  416     ctx->request = req;
  417     ctx->acceptor = acpt;
  418 
  419     ne_hook_pre_send(ne_get_session(req), gz_pre_send, ctx);
  420 
  421     return ctx;    
  422 }
  423 
  424 void ne_decompress_destroy(ne_decompress *ctx)
  425 {
  426     if (ctx->zstrinit) inflateEnd(&ctx->zstr);
  427 
  428     ne_unhook_pre_send(ctx->session, gz_pre_send, ctx);
  429 
  430     ne_free(ctx);
  431 }
  432 
  433 #else /* !NE_HAVE_ZLIB */
  434 
  435 /* Pass-through interface present to provide ABI compatibility. */
  436 
  437 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
  438                     ne_block_reader rdr, void *userdata)
  439 {
  440     ne_add_response_body_reader(req, acpt, rdr, userdata);
  441     /* an arbitrary return value: don't confuse them by returning NULL. */
  442     return (ne_decompress *)req;
  443 }
  444 
  445 void ne_decompress_destroy(ne_decompress *dc)
  446 {
  447 }
  448 
  449 #endif /* NE_HAVE_ZLIB */