"Fossies" - the Fresh Open Source Software Archive

Member "zsync-0.6.2/libzsync/zmap.c" (16 Sep 2010, 14013 Bytes) of package /linux/privat/old/zsync-0.6.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "zmap.c" see the Fossies "Dox" file reference documentation.

    1 
    2 /*
    3  *   zsync - client side rsync over http
    4  *   Copyright (C) 2004,2005,2007,2009 Colin Phipps <cph@moria.org.uk>
    5  *
    6  *   This program is free software; you can redistribute it and/or modify
    7  *   it under the terms of the Artistic License v2 (see the accompanying 
    8  *   file COPYING for the full license terms), or, at your option, any later 
    9  *   version of the same license.
   10  *
   11  *   This program is distributed in the hope that it will be useful,
   12  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
   13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   14  *   COPYING file for details.
   15  */
   16 
   17 /* zmap part of libzsync
   18  * Random access for gzip files. Yes, really.
   19  * Code to read a zmap made by zsyncmake, map block ranges in the uncompressed
   20  * data to block ranges in the compressed data, and then to configure a zlib
   21  * zstream object to commence reading a compressed stream mid-stream.
   22  */
   23 
   24 #include "zsglobal.h"
   25 
   26 #include <stdio.h>
   27 #include <stdlib.h>
   28 #include <string.h>
   29 #include <sys/types.h>
   30 #include <arpa/inet.h>
   31 #ifdef HAVE_INTTYPES_H
   32 #include <inttypes.h>
   33 #endif
   34 
   35 #ifdef WITH_DMALLOC
   36 # include <dmalloc.h>
   37 #endif
   38 
   39 #include "zmap.h"
   40 #include "format_string.h"
   41 
   42 /* This is a record of a checkpoint in a zlib stream - we have the bit position
   43  * (yes, bit - zlib compresses input bytes down to a variable number of bits)
   44  * and the corresponding output byte offset.
   45  * blockcount is 0 if this bit position in the zlib stream is the start of a
   46  *  zlib block, and is 1, 2, 3 etc for subsequent points that are in the same
   47  *  zlib block. */
   48 
   49 struct zmapentry {
   50     long long inbits;
   51     long long outbytes;
   52     int blockcount;
   53 };
   54 
   55 /* Store all the zmapentry's as an array, and the # of entries */
   56 struct zmap {
   57     int n;
   58     struct zmapentry *e;
   59 };
   60 
   61 /* zmap_make(gzblock[], numblocks)
   62  * Constructor. Supply the gzblocks (read from the Z-Map in the .zsync file)
   63  */
   64 struct zmap *zmap_make(const struct gzblock *zb, int n) {
   65     int i;
   66 
   67     /* Entries in the gzblock format are relative. We want absolute offsets; so
   68      * here are the absolute position on the 'in' (compressed) and 'out'
   69      * (uncompressed) streams as state for the loop below. */
   70     long long in = 0;
   71     long long out = 0;
   72     int bc = 0;     /* And this is the number of map points see in the current zlib block */
   73 
   74     /* Allocate zmap, space for all its entries, fill in fields */
   75     struct zmap *m = malloc(sizeof(struct zmap));
   76     if (!m)
   77         return m;
   78     m->n = n;
   79     m->e = malloc(sizeof(struct zmapentry) * n);
   80     if (!m->e) {
   81         free(m);
   82         return NULL;
   83     }
   84 
   85     /* Convert the packed on-disk platform-neutral storage into our in-memory
   86      * native storage with absolute offsets. Entry-by-entry. */
   87     for (i = 0; i < n; i++) {
   88         uint16_t ob = ntohs(zb[i].outbyteoffset);
   89 
   90         /* Identify zlib block starts and adjust in-block count accordingly */
   91         if (ob & GZB_NOTBLOCKSTART) {
   92             ob &= ~GZB_NOTBLOCKSTART;
   93             bc++;
   94         }
   95         else {
   96             bc = 0;
   97         }
   98 
   99         /* Calculate absolute position of this map entry */
  100         in += ntohs(zb[i].inbitoffset);
  101         out += ob;
  102 
  103         /* And write the entry */
  104         m->e[i].inbits = in;
  105         m->e[i].outbytes = out;
  106         m->e[i].blockcount = bc;
  107     }
  108     return m;
  109 }
  110 
  111 /* zmap_free - destructor */
  112 void zmap_free(struct zmap *m) {
  113     free(m->e);
  114     free(m);
  115 }
  116 
  117 /* consolidate_byteranges
  118  * The byte ranges in the compressed content determined as sufficient to return
  119  * each range in the uncompressed content could contain duplication/overlap.
  120  * So we go through and consolidate any overlapping ranges.
  121  */
  122 static off_t* consolidate_byteranges(off_t* zbyterange, int* num) {
  123     int k = *num;
  124     int i;
  125 
  126     for (i = 0; i < k - 1;) {
  127         if (zbyterange[2 * i + 1] >= zbyterange[2 * (i + 1)]) {
  128             /* Ranges overlap, merge
  129             * The end of the first range need not be before the end of the
  130              *  second, so this if () block is to set the end of the combined block
  131              *  to the greater of the two.
  132              * The start of the second block could be before the start of the first:
  133              *  but this only occurs where the second range is a block header, and
  134              *  the first range is some data that an earlier uncompressed range
  135              *  needed out of the same block, in which case we are guaranteed that
  136              *  the block header must have been requested earlier, and so the second
  137              *  block here can be dropped anyway.
  138              */
  139             if (zbyterange[2 * i + 1] < zbyterange[2 * (i + 1) + 1])
  140                 zbyterange[2 * i + 1] = zbyterange[2 * (i + 1) + 1];
  141 
  142             /* Now eliminate zbyterange[2*(i+1) +0 and +1]; */
  143             memmove(&zbyterange[2 * i + 2], &zbyterange[2 * i + 4],
  144                     2 * (k - 2 - i) * sizeof(zbyterange[0]));
  145             k--;
  146         }
  147         else
  148             i++;
  149     }
  150     /* Update the number of ranges with the new number, and fit the memory
  151      * allocation to the actual number of ranges it contains */
  152     *num = k;
  153     if (k > 0) {
  154         zbyterange = realloc(zbyterange, 2 * k * sizeof *zbyterange);
  155     }
  156     return zbyterange;
  157 }
  158 
  159 /* num_ranges = find_compressed_ranges_for(
  160  *      zmap, ranges[], num_ranges, &state, start_offset, end_offset)
  161  * Adds byte ranges to the supplied ranges structure (and returns the new total
  162  * number) such that the compressed content contained will certainly produce
  163  * the uncompressed content [start_offset, end_offset).
  164  *
  165  * &state is a long long that the caller provides to keep state between calls
  166  * (records the last block header that we added a range for, so we don't re-add
  167  * it again.) Returns -1 on error.
  168  *
  169  * Adds at most 2 byte ranges per call, and the caller is responsible for
  170  * ensuring that ranges[] has enough room for at least that.
  171  */
  172 static int find_compressed_ranges_for(const struct zmap* zm, off_t* zbyterange,
  173         int k, long long* lastwroteblockstart_inbitoffset,
  174         long long start, long long end)
  175 {
  176     int j;
  177 
  178     /* the zstart, zend vars are the main state for the loop below:
  179      *  zstart == -1: we are looking for the first compressed block containing
  180      *    data from our target range.
  181      *  zstart is an offset, zend == -1: we found the start, now looking for
  182      *    the first compressed block that is outside the range that we need.
  183      *  zstart, zend are offsets: we got the end of the block too, all done. */
  184     long long zstart = -1;
  185     long long zend = -1;
  186 
  187     /* This is the offset of the previous compressed block start. See comment
  188      * in the loop below for where/why we need it. */
  189     long long lastblockstart_inbitoffset = 0;
  190 
  191     /* Step through the blocks of compressed data */
  192     for (j = 0; j < zm->n && (zstart == -1 || zend == -1); j++) {
  193         register long long inbitoffset = zm->e[j].inbits;
  194         register long long outbyteoffset = zm->e[j].outbytes;
  195 
  196         /* Is this the first block that comes after the start point - if so, the
  197          * previous block is the place to start */
  198         if (start < outbyteoffset && zstart == -1) {
  199             if (j == 0)
  200                 break;
  201             zstart = zm->e[j - 1].inbits;
  202 
  203             /* We need the zlib block header for range of compressed data
  204              * - you can't decompress the data without knowing the huffman tree
  205              * for this block of data.
  206              * So, immediately add a range of at least
  207              *  *** WARNING MAGIC NUMBER ***           200 bytes
  208              * (which is a guess by me, I think the zlib header never exceeds that)
  209              * covering the preceding zlib block header */
  210 
  211             if (*lastwroteblockstart_inbitoffset !=
  212                     lastblockstart_inbitoffset) {
  213                 zbyterange[2 * k] = lastblockstart_inbitoffset / 8;
  214                 zbyterange[2 * k + 1] = zbyterange[2 * k] + 200;
  215                 k++;
  216                 *lastwroteblockstart_inbitoffset = lastblockstart_inbitoffset;
  217             }
  218         }
  219 
  220         /* We need to remember the most recent zlib block header, for the above.
  221          * (Note this is after the section above, because the code above is
  222          *  looking at the previous checkpoint, zm->e[j-1]. Only now do we worry
  223          *  about data in zm->e[j] .)
  224          * If blockcount == 0, this point in the compressed data is a block header */
  225         if (zm->e[j].blockcount == 0) {     /* Block starts here */
  226             lastblockstart_inbitoffset = inbitoffset;
  227         }
  228 
  229         /* If we have passed the start, and we have now passed the end, then
  230          * the end of this block is the end of the range to fetch.  Special
  231          * case end of stream, where the range libzsync knows about could
  232          * extend beyond the range of the zlib stream. */
  233         if (start < outbyteoffset
  234                 && (end <= outbyteoffset || j == zm->n - 1)) {
  235             zend = inbitoffset;
  236         }
  237     }
  238 
  239     /* If we failed to get either start or end, we're in trouble */
  240     if (zend == -1 || zstart == -1) {
  241         return -1;
  242     }
  243 
  244     /* Finally, translate bits to bytes and store these in our list of ranges
  245      * to get, and return the number of ranges to the caller so they know how
  246      * many they have now */
  247     zbyterange[2 * k] = zstart / 8;
  248     zbyterange[2 * k + 1] = (zend + 7) / 8;
  249     return k+1;
  250 }
  251 
  252 /* zmap_to_compressed_ranges(self, byteranges[], num_ranges, &num_zranges)
  253  * For each range of data that we want from the uncompressed file, work out a
  254  * corresponding byte range in the compressed file that definitely includes the
  255  * data in the target range. Return the byte ranges for the compressed file.
  256  *
  257  * Returns: byte ranges in the compressed file; and number of ranges in *num_zranges.
  258  * Ranges are an array of off_t[2*num_ranges], each byte range having 2 entries
  259  * for start and end.
  260  */
  261 off_t *zmap_to_compressed_ranges(const struct zmap *zm, off_t * byterange,
  262                                  int nrange, int *num) {
  263     int i;
  264     long long lastwroteblockstart_inbitoffset = 0;
  265 
  266     /* Allocate enough space to contain the byte ranges in the compressed file.
  267      * Allocate more than we need and shrink to fit at the end -
  268      *  2 byte ranges (of 2 off_t each) per range is the limit. */
  269     off_t *zbyterange = malloc(2 * 2 * nrange * sizeof *byterange);
  270     int k = 0; /* The number of zbyterange entries we actually have so far (each of 2 off_t) */
  271 
  272     for (i = 0; i < nrange; i++) {
  273         /* (try to) Find byte ranges in the compressed file to get this the ith
  274          * byterange. */
  275         k = find_compressed_ranges_for(zm, zbyterange, k, &lastwroteblockstart_inbitoffset,
  276                                        byterange[2 * i], byterange[2 * i + 1]);
  277         if (k < 0) {
  278             fprintf(stderr, "Z-Map couldn't tell us how to find " OFF_T_PF "-" OFF_T_PF "\n", byterange[2 * i], byterange[2 * i + 1]);
  279             free(zbyterange);
  280             return NULL;
  281         }
  282     }
  283 
  284     /* Return the # of ranges and the array of byte ranges we have built 
  285      * after consolidating ranges where possible */
  286     *num = k;
  287     return consolidate_byteranges(zbyterange, num);
  288 }
  289 
  290 #include "zlib/zlib.h"
  291 /* zmap_search(self, offset)
  292  * Find this offset in the Z-Map */
  293 int zmap_search(const struct zmap* zm, long zoffset) {
  294     /* State for binary search */
  295     int low = 0;
  296     int high = zm->n - 1;
  297 
  298     while (low <= high) {
  299         int m = (low + high) / 2;
  300         long long inbyte = zm->e[m].inbits / 8;
  301         if (inbyte == zoffset) {
  302             low = high = m;
  303             break;
  304         }
  305         else if (zoffset < inbyte) {
  306             high = m - 1;
  307         }
  308         else {
  309             low = m + 1;
  310         }
  311     }
  312     if (low > high) {
  313         fprintf(stderr, "bad offset %ld, not in z-map\n", zoffset);
  314         exit(3);
  315     }
  316     return low;
  317 }
  318 
  319 /* configure_zstream_for_zdata(self, zstream, offset, &poutoffset)
  320  * Given an zoffset and a zmap, configure the supplied zstream to be in the
  321  * correct state to interpret the compressed data stream read from the
  322  * compressed file at this offset. And return the offset in the uncompressed
  323  * stream that this corresponds to in the supplied long long* .
  324  * NOTE: the caller must call zlib:updatewindow() on the zstream to supply it
  325  * with 32k of leading context in the uncompressed stream, before the zstream
  326  * can be used to actually decompress.
  327  *
  328  * Requires some cooperation from the caller - it should not be called with an
  329  * offset that is not a block start unless it has been previously called with
  330  * an offset that is a block start, and the most recent block start call should
  331  * be for the same block as the current offset is contained in. If you call
  332  * this with the start offsets of blocks returned by zmap_to_compressed_ranges
  333  * and in the order that it returned them, this condition is satisfied.
  334  */
  335 void configure_zstream_for_zdata(const struct zmap *zm, z_stream * zs,
  336                                  long zoffset, long long *poutoffset) {
  337     /* Find the zmap entry corresponding to this offset */
  338     int i = zmap_search(zm, zoffset);
  339 
  340     /* If this is a compressed block start (so a new block), restart the
  341      * decompression fresh at this point */
  342     if (!zm->e[i].blockcount) {
  343         /* Release any old inflate object */
  344         if (zs->total_in > 0)
  345             inflateEnd(zs);
  346 
  347         inflateInit2(zs, -MAX_WBITS);
  348     }
  349 
  350     /* Else, the stream should already be configured for decompressing this
  351      * block. Sanity check - this zstream should have read data (the block
  352      * header) already). */
  353     else if (zs->total_in == 0) {
  354         fprintf(stderr, "bad first offset %ld, not a block start.\n", zoffset);
  355         exit(3);
  356     }
  357 
  358     /* Work out what the decompressed data will correspond to */
  359     *poutoffset = zm->e[i].outbytes;
  360 
  361     /* Align with the bitstream */
  362     inflate_advance(zs, zoffset, zm->e[i].inbits % 8, !zm->e[i].blockcount);
  363 }