"Fossies" - the Fresh Open Source Software Archive 
Member "zsync-0.6.2/libzsync/zmap.c" (16 Sep 2010, 14013 Bytes) of package /linux/privat/old/zsync-0.6.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "zmap.c" see the
Fossies "Dox" file reference documentation.
1
2 /*
3 * zsync - client side rsync over http
4 * Copyright (C) 2004,2005,2007,2009 Colin Phipps <cph@moria.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the Artistic License v2 (see the accompanying
8 * file COPYING for the full license terms), or, at your option, any later
9 * version of the same license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * COPYING file for details.
15 */
16
17 /* zmap part of libzsync
18 * Random access for gzip files. Yes, really.
19 * Code to read a zmap made by zsyncmake, map block ranges in the uncompressed
20 * data to block ranges in the compressed data, and then to configure a zlib
21 * zstream object to commence reading a compressed stream mid-stream.
22 */
23
24 #include "zsglobal.h"
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sys/types.h>
30 #include <arpa/inet.h>
31 #ifdef HAVE_INTTYPES_H
32 #include <inttypes.h>
33 #endif
34
35 #ifdef WITH_DMALLOC
36 # include <dmalloc.h>
37 #endif
38
39 #include "zmap.h"
40 #include "format_string.h"
41
42 /* This is a record of a checkpoint in a zlib stream - we have the bit position
43 * (yes, bit - zlib compresses input bytes down to a variable number of bits)
44 * and the corresponding output byte offset.
45 * blockcount is 0 if this bit position in the zlib stream is the start of a
46 * zlib block, and is 1, 2, 3 etc for subsequent points that are in the same
47 * zlib block. */
48
49 struct zmapentry {
50 long long inbits;
51 long long outbytes;
52 int blockcount;
53 };
54
55 /* Store all the zmapentry's as an array, and the # of entries */
56 struct zmap {
57 int n;
58 struct zmapentry *e;
59 };
60
61 /* zmap_make(gzblock[], numblocks)
62 * Constructor. Supply the gzblocks (read from the Z-Map in the .zsync file)
63 */
64 struct zmap *zmap_make(const struct gzblock *zb, int n) {
65 int i;
66
67 /* Entries in the gzblock format are relative. We want absolute offsets; so
68 * here are the absolute position on the 'in' (compressed) and 'out'
69 * (uncompressed) streams as state for the loop below. */
70 long long in = 0;
71 long long out = 0;
72 int bc = 0; /* And this is the number of map points see in the current zlib block */
73
74 /* Allocate zmap, space for all its entries, fill in fields */
75 struct zmap *m = malloc(sizeof(struct zmap));
76 if (!m)
77 return m;
78 m->n = n;
79 m->e = malloc(sizeof(struct zmapentry) * n);
80 if (!m->e) {
81 free(m);
82 return NULL;
83 }
84
85 /* Convert the packed on-disk platform-neutral storage into our in-memory
86 * native storage with absolute offsets. Entry-by-entry. */
87 for (i = 0; i < n; i++) {
88 uint16_t ob = ntohs(zb[i].outbyteoffset);
89
90 /* Identify zlib block starts and adjust in-block count accordingly */
91 if (ob & GZB_NOTBLOCKSTART) {
92 ob &= ~GZB_NOTBLOCKSTART;
93 bc++;
94 }
95 else {
96 bc = 0;
97 }
98
99 /* Calculate absolute position of this map entry */
100 in += ntohs(zb[i].inbitoffset);
101 out += ob;
102
103 /* And write the entry */
104 m->e[i].inbits = in;
105 m->e[i].outbytes = out;
106 m->e[i].blockcount = bc;
107 }
108 return m;
109 }
110
111 /* zmap_free - destructor */
112 void zmap_free(struct zmap *m) {
113 free(m->e);
114 free(m);
115 }
116
117 /* consolidate_byteranges
118 * The byte ranges in the compressed content determined as sufficient to return
119 * each range in the uncompressed content could contain duplication/overlap.
120 * So we go through and consolidate any overlapping ranges.
121 */
122 static off_t* consolidate_byteranges(off_t* zbyterange, int* num) {
123 int k = *num;
124 int i;
125
126 for (i = 0; i < k - 1;) {
127 if (zbyterange[2 * i + 1] >= zbyterange[2 * (i + 1)]) {
128 /* Ranges overlap, merge
129 * The end of the first range need not be before the end of the
130 * second, so this if () block is to set the end of the combined block
131 * to the greater of the two.
132 * The start of the second block could be before the start of the first:
133 * but this only occurs where the second range is a block header, and
134 * the first range is some data that an earlier uncompressed range
135 * needed out of the same block, in which case we are guaranteed that
136 * the block header must have been requested earlier, and so the second
137 * block here can be dropped anyway.
138 */
139 if (zbyterange[2 * i + 1] < zbyterange[2 * (i + 1) + 1])
140 zbyterange[2 * i + 1] = zbyterange[2 * (i + 1) + 1];
141
142 /* Now eliminate zbyterange[2*(i+1) +0 and +1]; */
143 memmove(&zbyterange[2 * i + 2], &zbyterange[2 * i + 4],
144 2 * (k - 2 - i) * sizeof(zbyterange[0]));
145 k--;
146 }
147 else
148 i++;
149 }
150 /* Update the number of ranges with the new number, and fit the memory
151 * allocation to the actual number of ranges it contains */
152 *num = k;
153 if (k > 0) {
154 zbyterange = realloc(zbyterange, 2 * k * sizeof *zbyterange);
155 }
156 return zbyterange;
157 }
158
159 /* num_ranges = find_compressed_ranges_for(
160 * zmap, ranges[], num_ranges, &state, start_offset, end_offset)
161 * Adds byte ranges to the supplied ranges structure (and returns the new total
162 * number) such that the compressed content contained will certainly produce
163 * the uncompressed content [start_offset, end_offset).
164 *
165 * &state is a long long that the caller provides to keep state between calls
166 * (records the last block header that we added a range for, so we don't re-add
167 * it again.) Returns -1 on error.
168 *
169 * Adds at most 2 byte ranges per call, and the caller is responsible for
170 * ensuring that ranges[] has enough room for at least that.
171 */
172 static int find_compressed_ranges_for(const struct zmap* zm, off_t* zbyterange,
173 int k, long long* lastwroteblockstart_inbitoffset,
174 long long start, long long end)
175 {
176 int j;
177
178 /* the zstart, zend vars are the main state for the loop below:
179 * zstart == -1: we are looking for the first compressed block containing
180 * data from our target range.
181 * zstart is an offset, zend == -1: we found the start, now looking for
182 * the first compressed block that is outside the range that we need.
183 * zstart, zend are offsets: we got the end of the block too, all done. */
184 long long zstart = -1;
185 long long zend = -1;
186
187 /* This is the offset of the previous compressed block start. See comment
188 * in the loop below for where/why we need it. */
189 long long lastblockstart_inbitoffset = 0;
190
191 /* Step through the blocks of compressed data */
192 for (j = 0; j < zm->n && (zstart == -1 || zend == -1); j++) {
193 register long long inbitoffset = zm->e[j].inbits;
194 register long long outbyteoffset = zm->e[j].outbytes;
195
196 /* Is this the first block that comes after the start point - if so, the
197 * previous block is the place to start */
198 if (start < outbyteoffset && zstart == -1) {
199 if (j == 0)
200 break;
201 zstart = zm->e[j - 1].inbits;
202
203 /* We need the zlib block header for range of compressed data
204 * - you can't decompress the data without knowing the huffman tree
205 * for this block of data.
206 * So, immediately add a range of at least
207 * *** WARNING MAGIC NUMBER *** 200 bytes
208 * (which is a guess by me, I think the zlib header never exceeds that)
209 * covering the preceding zlib block header */
210
211 if (*lastwroteblockstart_inbitoffset !=
212 lastblockstart_inbitoffset) {
213 zbyterange[2 * k] = lastblockstart_inbitoffset / 8;
214 zbyterange[2 * k + 1] = zbyterange[2 * k] + 200;
215 k++;
216 *lastwroteblockstart_inbitoffset = lastblockstart_inbitoffset;
217 }
218 }
219
220 /* We need to remember the most recent zlib block header, for the above.
221 * (Note this is after the section above, because the code above is
222 * looking at the previous checkpoint, zm->e[j-1]. Only now do we worry
223 * about data in zm->e[j] .)
224 * If blockcount == 0, this point in the compressed data is a block header */
225 if (zm->e[j].blockcount == 0) { /* Block starts here */
226 lastblockstart_inbitoffset = inbitoffset;
227 }
228
229 /* If we have passed the start, and we have now passed the end, then
230 * the end of this block is the end of the range to fetch. Special
231 * case end of stream, where the range libzsync knows about could
232 * extend beyond the range of the zlib stream. */
233 if (start < outbyteoffset
234 && (end <= outbyteoffset || j == zm->n - 1)) {
235 zend = inbitoffset;
236 }
237 }
238
239 /* If we failed to get either start or end, we're in trouble */
240 if (zend == -1 || zstart == -1) {
241 return -1;
242 }
243
244 /* Finally, translate bits to bytes and store these in our list of ranges
245 * to get, and return the number of ranges to the caller so they know how
246 * many they have now */
247 zbyterange[2 * k] = zstart / 8;
248 zbyterange[2 * k + 1] = (zend + 7) / 8;
249 return k+1;
250 }
251
252 /* zmap_to_compressed_ranges(self, byteranges[], num_ranges, &num_zranges)
253 * For each range of data that we want from the uncompressed file, work out a
254 * corresponding byte range in the compressed file that definitely includes the
255 * data in the target range. Return the byte ranges for the compressed file.
256 *
257 * Returns: byte ranges in the compressed file; and number of ranges in *num_zranges.
258 * Ranges are an array of off_t[2*num_ranges], each byte range having 2 entries
259 * for start and end.
260 */
261 off_t *zmap_to_compressed_ranges(const struct zmap *zm, off_t * byterange,
262 int nrange, int *num) {
263 int i;
264 long long lastwroteblockstart_inbitoffset = 0;
265
266 /* Allocate enough space to contain the byte ranges in the compressed file.
267 * Allocate more than we need and shrink to fit at the end -
268 * 2 byte ranges (of 2 off_t each) per range is the limit. */
269 off_t *zbyterange = malloc(2 * 2 * nrange * sizeof *byterange);
270 int k = 0; /* The number of zbyterange entries we actually have so far (each of 2 off_t) */
271
272 for (i = 0; i < nrange; i++) {
273 /* (try to) Find byte ranges in the compressed file to get this the ith
274 * byterange. */
275 k = find_compressed_ranges_for(zm, zbyterange, k, &lastwroteblockstart_inbitoffset,
276 byterange[2 * i], byterange[2 * i + 1]);
277 if (k < 0) {
278 fprintf(stderr, "Z-Map couldn't tell us how to find " OFF_T_PF "-" OFF_T_PF "\n", byterange[2 * i], byterange[2 * i + 1]);
279 free(zbyterange);
280 return NULL;
281 }
282 }
283
284 /* Return the # of ranges and the array of byte ranges we have built
285 * after consolidating ranges where possible */
286 *num = k;
287 return consolidate_byteranges(zbyterange, num);
288 }
289
290 #include "zlib/zlib.h"
291 /* zmap_search(self, offset)
292 * Find this offset in the Z-Map */
293 int zmap_search(const struct zmap* zm, long zoffset) {
294 /* State for binary search */
295 int low = 0;
296 int high = zm->n - 1;
297
298 while (low <= high) {
299 int m = (low + high) / 2;
300 long long inbyte = zm->e[m].inbits / 8;
301 if (inbyte == zoffset) {
302 low = high = m;
303 break;
304 }
305 else if (zoffset < inbyte) {
306 high = m - 1;
307 }
308 else {
309 low = m + 1;
310 }
311 }
312 if (low > high) {
313 fprintf(stderr, "bad offset %ld, not in z-map\n", zoffset);
314 exit(3);
315 }
316 return low;
317 }
318
319 /* configure_zstream_for_zdata(self, zstream, offset, &poutoffset)
320 * Given an zoffset and a zmap, configure the supplied zstream to be in the
321 * correct state to interpret the compressed data stream read from the
322 * compressed file at this offset. And return the offset in the uncompressed
323 * stream that this corresponds to in the supplied long long* .
324 * NOTE: the caller must call zlib:updatewindow() on the zstream to supply it
325 * with 32k of leading context in the uncompressed stream, before the zstream
326 * can be used to actually decompress.
327 *
328 * Requires some cooperation from the caller - it should not be called with an
329 * offset that is not a block start unless it has been previously called with
330 * an offset that is a block start, and the most recent block start call should
331 * be for the same block as the current offset is contained in. If you call
332 * this with the start offsets of blocks returned by zmap_to_compressed_ranges
333 * and in the order that it returned them, this condition is satisfied.
334 */
335 void configure_zstream_for_zdata(const struct zmap *zm, z_stream * zs,
336 long zoffset, long long *poutoffset) {
337 /* Find the zmap entry corresponding to this offset */
338 int i = zmap_search(zm, zoffset);
339
340 /* If this is a compressed block start (so a new block), restart the
341 * decompression fresh at this point */
342 if (!zm->e[i].blockcount) {
343 /* Release any old inflate object */
344 if (zs->total_in > 0)
345 inflateEnd(zs);
346
347 inflateInit2(zs, -MAX_WBITS);
348 }
349
350 /* Else, the stream should already be configured for decompressing this
351 * block. Sanity check - this zstream should have read data (the block
352 * header) already). */
353 else if (zs->total_in == 0) {
354 fprintf(stderr, "bad first offset %ld, not a block start.\n", zoffset);
355 exit(3);
356 }
357
358 /* Work out what the decompressed data will correspond to */
359 *poutoffset = zm->e[i].outbytes;
360
361 /* Align with the bitstream */
362 inflate_advance(zs, zoffset, zm->e[i].inbits % 8, !zm->e[i].blockcount);
363 }