"Fossies" - the Fresh Open Source Software Archive

Member "zsync-0.6.2/make.c" (16 Sep 2010, 30392 Bytes) of package /linux/privat/old/zsync-0.6.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "make.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  *   zsync - client side rsync over http
    3  *   Copyright (C) 2004,2005,2009 Colin Phipps <cph@moria.org.uk>
    4  *
    5  *   This program is free software; you can redistribute it and/or modify
    6  *   it under the terms of the Artistic License v2 (see the accompanying 
    7  *   file COPYING for the full license terms), or, at your option, any later 
    8  *   version of the same license.
    9  *
   10  *   This program is distributed in the hope that it will be useful,
   11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  *   COPYING file for details.
   14  */
   15 
   16 /* Command-line utility to create .zsync files */
   17 
   18 #include "zsglobal.h"
   19 
   20 #include <stdio.h>
   21 #include <stdlib.h>
   22 #include <string.h>
   23 #include <unistd.h>
   24 #include <ctype.h>
   25 #include <errno.h>
   26 #include <libgen.h>
   27 #include <math.h>
   28 #include <time.h>
   29 
   30 #include <arpa/inet.h>
   31 #ifdef HAVE_INTTYPES_H
   32 #include <inttypes.h>
   33 #endif
   34 
   35 #include <sys/types.h>
   36 #include <sys/stat.h>
   37 
   38 #ifdef WITH_DMALLOC
   39 # include <dmalloc.h>
   40 #endif
   41 
   42 #include "makegz.h"
   43 #include "librcksum/rcksum.h"
   44 #include "libzsync/zmap.h"
   45 #include "libzsync/sha1.h"
   46 #include "zlib/zlib.h"
   47 #include "format_string.h"
   48 
   49 /* We're only doing one file per run, so these are global state for the current
   50  * file being processed */
   51 SHA1_CTX shactx;
   52 size_t blocksize = 0;
   53 off_t len = 0;
   54 
   55 /* And settings from the command line */
   56 int verbose = 0;
   57 static int no_look_inside;
   58 
   59 /* stream_error(function, stream) - Exit with IO-related error message */
   60 void __attribute__ ((noreturn)) stream_error(const char *func, FILE * stream) {
   61     fprintf(stderr, "%s: %s\n", func, strerror(ferror(stream)));
   62     exit(2);
   63 }
   64 
   65 /* write_block_sums(buffer[], num_bytes, output_stream)
   66  * Given one block of data, calculate the checksums for this block and write
   67  * them (as raw bytes) to the given output stream */
   68 static void write_block_sums(unsigned char *buf, size_t got, FILE * f) {
   69     struct rsum r;
   70     unsigned char checksum[CHECKSUM_SIZE];
   71 
   72     /* Pad for our checksum, if this is a short last block  */
   73     if (got < blocksize)
   74         memset(buf + got, 0, blocksize - got);
   75 
   76     /* Do rsum and checksum, and convert to network endian */
   77     r = rcksum_calc_rsum_block(buf, blocksize);
   78     rcksum_calc_checksum(&checksum[0], buf, blocksize);
   79     r.a = htons(r.a);
   80     r.b = htons(r.b);
   81 
   82     /* Write them raw to the stream */
   83     if (fwrite(&r, sizeof r, 1, f) != 1)
   84         stream_error("fwrite", f);
   85     if (fwrite(checksum, sizeof checksum, 1, f) != 1)
   86         stream_error("fwrite", f);
   87 }
   88 
   89 /* long long pos = in_position(z_stream*)
   90  * Returns the position (in bits) that zlib has used in the compressed data
   91  * stream so far */
   92 static inline long long in_position(z_stream * pz) {
   93     return pz->total_in * (long long)8 - (63 & pz->data_type);
   94 }
   95 
   96 /* State for compressed file handling */
   97 static FILE *zmap;
   98 static int zmapentries;
   99 static char *zhead;
  100 
  101 /* write_zmap_delta(*prev_in, *prev_out, new_in, new_out, blockstart)
  102  * Given a position in the compressed and uncompressed streams, write a
  103  * checkpoint/map entry (to the stream held in the global variable zmap).
  104  * This is relative to the previous position supplied, and positions must be
  105  * supplied in order; caller provide two long long* as the first two parameters
  106  * for write_zmap_delta to use to keep state in.
  107  * blockstart is a boolean, is true if this is the start of a zlib block
  108  * (otherwise, this is a mid-block marker).
  109  */
  110 static void write_zmap_delta(long long *prev_in, long long *prev_out,
  111                              long long new_in, long long new_out,
  112                              int blockstart) {
  113     struct gzblock g;
  114     {   /* Calculate number of bits that the input (compressed stream) pointer
  115          * has advanced from the previous entry. */
  116         uint16_t inbits = new_in - *prev_in;
  117 
  118         if (*prev_in + inbits != new_in) {
  119             fprintf(stderr,
  120                     "too long between blocks (try a smaller block size with -b)\n");
  121             exit(1);
  122         }
  123 
  124         /* Convert to network endian, save in zmap struct, update state */
  125         inbits = htons(inbits);
  126         g.inbitoffset = inbits;
  127         *prev_in = new_in;
  128     }
  129     {   /* Calculate number of bits that the output (uncompressed stream)
  130          * pointer has advanced from the previous entry. */
  131         uint16_t outbytes = new_out - *prev_out;
  132 
  133         outbytes &= ~GZB_NOTBLOCKSTART;
  134         if ((long long)outbytes + *prev_out != new_out) {
  135             fprintf(stderr, "too long output of block blocks?");
  136             exit(1);
  137         }
  138         /* Encode blockstart marker in this value */
  139         if (!blockstart)
  140             outbytes |= GZB_NOTBLOCKSTART;
  141 
  142         /* Convert to network endian, save in zmap struct, update state */
  143         outbytes = htons(outbytes);
  144         g.outbyteoffset = outbytes;
  145         *prev_out = new_out;
  146     }
  147 
  148     /* Write out the zmap delta struct */
  149     if (fwrite(&g, sizeof(g), 1, zmap) != 1) {
  150         perror("write");
  151         exit(1);
  152     }
  153 
  154     /* And keep state */
  155     zmapentries++;
  156 }
  157 
  158 /* do_zstream(data_stream, zsync_stream, buffer, buffer_len)
  159  * Constructs the zmap for a compressed data stream, in a temporary file.
  160  * The compressed data is from data_stream, except that some bytes have already
  161  * been read from it - those are supplied in buffer (buffer_len of them).
  162  * The zsync block checksums are written to zsync_stream, and the zmap is
  163  * written to a temp file and the handle returned in the global var zmap.
  164  */
  165 void do_zstream(FILE * fin, FILE * fout, const char *bufsofar, size_t got) {
  166     z_stream zs;
  167     Bytef *inbuf = malloc(blocksize);
  168     const size_t inbufsz = blocksize;
  169     Bytef *outbuf = malloc(blocksize);
  170     int eoz = 0;
  171     int header_bits;
  172     long long prev_in = 0;
  173     long long prev_out = 0;
  174     long long midblock_in = 0;
  175     long long midblock_out = 0;
  176     int want_zdelta = 0;
  177 
  178     if (!inbuf || !outbuf) {
  179         fprintf(stderr, "memory allocation failure\n");
  180         exit(1);
  181     }
  182 
  183     /* Initialize decompressor */
  184     zs.zalloc = Z_NULL;
  185     zs.zfree = Z_NULL;
  186     zs.opaque = NULL;
  187     zs.next_in = inbuf;
  188     zs.avail_in = 0;
  189     zs.total_in = 0;
  190     zs.next_out = outbuf;
  191     zs.avail_out = 0;
  192     if (inflateInit2(&zs, -MAX_WBITS) != Z_OK)
  193         exit(-1);
  194 
  195     {   /* Skip gzip header and do initial buffer fill */
  196         const char *p = skip_zhead(bufsofar);
  197 
  198         {   /* Store hex version of gzip header in zhead */
  199             int header_bytes = p - bufsofar;
  200             int i;
  201 
  202             header_bits = 8 * header_bytes;
  203             got -= header_bytes;
  204 
  205             zhead = malloc(1 + 2 * header_bytes);
  206             for (i = 0; i < header_bytes; i++)
  207                 sprintf(zhead + 2 * i, "%02x", (unsigned char)bufsofar[i]);
  208         }
  209         if (got > inbufsz) {
  210             fprintf(stderr,
  211                     "internal failure, " SIZE_T_PF " > " SIZE_T_PF
  212                     " input buffer available\n", got, inbufsz);
  213             exit(2);
  214         }
  215 
  216         /* Copy any remaining already-read data from the buffer to the
  217          * decompressor input buffer */
  218         memcpy(inbuf, p, got);
  219         zs.avail_in = got;
  220 
  221         /* Fill the buffer up to offset inbufsz of the input file - we want to
  222          * try and keep the input blocks aligned with block boundaries in the
  223          * underlying filesystem and physical storage */
  224         if (inbufsz > got + (header_bits / 8))
  225             zs.avail_in +=
  226                 fread(inbuf + got, 1, inbufsz - got - (header_bits / 8), fin);
  227     }
  228 
  229     /* Start the zmap. We write into a temp file, which the caller then copies into the zsync file later. */
  230     zmap = tmpfile();
  231     if (!zmap) {
  232         perror("tmpfile");
  233         exit(2);
  234     }
  235 
  236     /* We are past the header, so we are now at the start of the first block */
  237     write_zmap_delta(&prev_in, &prev_out, header_bits, zs.total_out, 1);
  238     zs.avail_out = blocksize;
  239 
  240     /* keep going until the end of the compressed stream */
  241     while (!eoz) {
  242         /* refill input buffer if empty */
  243         if (zs.avail_in == 0) {
  244             int rc = fread(inbuf, 1, inbufsz, fin);
  245             if (rc < 0) {
  246                 perror("read");
  247                 exit(2);
  248             }
  249 
  250             /* Still expecting data (!eoz and avail_in == 0) but none there. */
  251             if (rc == 0) {
  252                 fprintf(stderr, "Premature end of compressed data.\n");
  253                 exit(1);
  254             }
  255 
  256             zs.next_in = inbuf;
  257             zs.avail_in = rc;
  258         }
  259         {
  260             int rc;
  261 
  262             /* Okay, decompress more data from inbuf to outbuf.
  263              * Z_BLOCK means that decompression will halt if we reach the end of a
  264              *  compressed block in the input file.
  265              * And decompression will also stop if outbuf is filled (at which point
  266              *  we have a whole block of uncompressed data and so should write its
  267              *  checksums)
  268              *
  269              * Terminology note:
  270              * Compressed block   = zlib block (stream of bytes compressed with
  271              *                      common huffman table)
  272              * Uncompressed block = Block of blocksize bytes (starting at an
  273              *                      offset that is a whole number of blocksize
  274              *                      bytes blocks from the start of the
  275              *                      (uncompressed) data. I.e. a zsync block.
  276              */
  277             rc = inflate(&zs, Z_BLOCK);
  278             switch (rc) {
  279             case Z_STREAM_END:
  280                 eoz = 1;
  281             case Z_BUF_ERROR:  /* Not really an error, just means we provided stingy buffers */
  282             case Z_OK:
  283                 break;
  284             default:
  285                 fprintf(stderr, "zlib error %s\n", zs.msg);
  286                 exit(1);
  287             }
  288 
  289             /* If the output buffer is filled, i.e. we've now got a whole block of uncompressed data. */
  290             if (zs.avail_out == 0 || rc == Z_STREAM_END) {
  291                 /* Add to the running SHA1 of the entire file. */
  292                 SHA1Update(&shactx, outbuf, blocksize - zs.avail_out);
  293 
  294                 /* Completed a block; write out its checksums */
  295                 write_block_sums(outbuf, blocksize - zs.avail_out, fout);
  296 
  297                 /* Clear the decompressed data buffer, ready for the next block of uncompressed data. */
  298                 zs.next_out = outbuf;
  299                 zs.avail_out = blocksize;
  300 
  301                 /* Having passed a block boundary in the uncompressed data */
  302                 want_zdelta = 1;
  303             }
  304 
  305             /* If we have reached a block boundary in the compressed data */
  306             if (zs.data_type & 128 || rc == Z_STREAM_END) {
  307                 /* write out info on this block */
  308                 write_zmap_delta(&prev_in, &prev_out,
  309                                  header_bits + in_position(&zs), zs.total_out,
  310                                  1);
  311 
  312                 midblock_in = midblock_out = 0;
  313                 want_zdelta = 0;
  314             }
  315 
  316             /* If we passed a block boundary in the uncompressed data, record the
  317              * next available point at which we could stop or start decompression.
  318              * Write a zmap delta with the 1st when we see the 2nd, etc */
  319             if (want_zdelta && inflateSafePoint(&zs)) {
  320                 long long cur_in = header_bits + in_position(&zs);
  321                 if (midblock_in) {
  322                     write_zmap_delta(&prev_in, &prev_out, midblock_in,
  323                                      midblock_out, 0);
  324                 }
  325                 midblock_in = cur_in;
  326                 midblock_out = zs.total_out;
  327                 want_zdelta = 0;
  328             }
  329         }
  330     }
  331 
  332     /* Record uncompressed length */
  333     len += zs.total_out;
  334     fputc('\n', fout);
  335     /* Move back to the start of the zmap constructed, ready for the caller to read it back in */
  336     rewind(zmap);
  337 
  338     /* Clean up */
  339     inflateEnd(&zs);
  340     free(inbuf);
  341     free(outbuf);
  342 }
  343 
  344 /* read_stream_write_blocksums(data_stream, zsync_stream)
  345  * Reads the data stream and writes to the zsync stream the blocksums for the
  346  * given data. No compression handling.
  347  */
  348 void read_stream_write_blocksums(FILE * fin, FILE * fout) {
  349     unsigned char *buf = malloc(blocksize);
  350 
  351     if (!buf) {
  352         fprintf(stderr, "out of memory\n");
  353         exit(1);
  354     }
  355 
  356     while (!feof(fin)) {
  357         int got = fread(buf, 1, blocksize, fin);
  358 
  359         if (got > 0) {
  360             if (!no_look_inside && len == 0 && buf[0] == 0x1f && buf[1] == 0x8b) {
  361                 do_zstream(fin, fout, (char *)buf, got);
  362                 break;
  363             }
  364 
  365             /* The SHA-1 sum, unlike our internal block-based sums, is on the whole file and nothing else - no padding */
  366             SHA1Update(&shactx, buf, got);
  367 
  368             write_block_sums(buf, got, fout);
  369             len += got;
  370         }
  371         else {
  372             if (ferror(fin))
  373                 stream_error("fread", fin);
  374         }
  375     }
  376     free(buf);
  377 }
  378 
  379 /* fcopy(instream, outstream)
  380  * Copies data from one stream to the other until EOF on the input.
  381  */
  382 void fcopy(FILE * fin, FILE * fout) {
  383     unsigned char buf[4096];
  384     size_t len;
  385 
  386     while ((len = fread(buf, 1, sizeof(buf), fin)) > 0) {
  387         if (fwrite(buf, 1, len, fout) < len)
  388             break;
  389     }
  390     if (ferror(fin)) {
  391         stream_error("fread", fin);
  392     }
  393     if (ferror(fout)) {
  394         stream_error("fwrite", fout);
  395     }
  396 }
  397 
  398 /* fcopy_hashes(hash_stream, zsync_stream, rsum_bytes, hash_bytes)
  399  * Copy the full block checksums from their temporary store file to the .zsync,
  400  * stripping the hashes down to the desired lengths specified by the last 2
  401  * parameters.
  402  */
  403 void fcopy_hashes(FILE * fin, FILE * fout, size_t rsum_bytes, size_t hash_bytes) {
  404     unsigned char buf[20];
  405     size_t len;
  406 
  407     while ((len = fread(buf, 1, sizeof(buf), fin)) > 0) {
  408         /* write trailing rsum_bytes of the rsum (trailing because the second part of the rsum is more useful in practice for hashing), and leading checksum_bytes of the checksum */
  409         if (fwrite(buf + 4 - rsum_bytes, 1, rsum_bytes, fout) < rsum_bytes)
  410             break;
  411         if (fwrite(buf + 4, 1, hash_bytes, fout) < hash_bytes)
  412             break;
  413     }
  414     if (ferror(fin)) {
  415         stream_error("fread", fin);
  416     }
  417     if (ferror(fout)) {
  418         stream_error("fwrite", fout);
  419     }
  420 }
  421 
  422 /* read_sample_and_close(stream, len, buf)
  423  * Reads len bytes from stream into buffer */
  424 static int read_sample_and_close(FILE * f, size_t l, void *buf) {
  425     int rc = 0;
  426     if (fread(buf, 1, l, f) == l)
  427         rc = 1;
  428     else if (errno != EBADF)
  429         perror("read");
  430     fclose(f);
  431     return rc;
  432 }
  433 
  434 /* str = encode_filename(filename_str)
  435  * Returns shell-escaped version of a given (filename) string */
  436 static char *encode_filename(const char *fname) {
  437     char *cmd = malloc(2 + strlen(fname) * 2);
  438     if (!cmd)
  439         return NULL;
  440 
  441     {   /* pass through string character by character */
  442         int i, j;
  443         for (i = j = 0; fname[i]; i++) {
  444             if (!isalnum(fname[i]))
  445                 cmd[j++] = '\\';
  446             cmd[j++] = fname[i];
  447         }
  448         cmd[j] = 0;
  449     }
  450     return cmd;
  451 }
  452 
  453 /* opt_str = guess_gzip_options(filename_str)
  454  * For the given (gzip) file, try to guess the options that were used with gzip
  455  * to create it.
  456  * Returns a malloced string containing the options for gzip, or NULL */
  457 static const char *const try_opts[] =
  458     { "--best", "", "--rsync", "--rsync --best", NULL };
  459 #define SAMPLE 1024
  460 
  461 char *guess_gzip_options(const char *f) {
  462     char orig[SAMPLE];
  463     {   /* Read sample of the header of the compressed file */
  464         FILE *s = fopen(f, "r");
  465         if (!s) {
  466             perror("open");
  467             return NULL;
  468         }
  469         if (!read_sample_and_close(s, SAMPLE, orig))
  470             return NULL;
  471     }
  472     {
  473         int i;
  474         const char *o;
  475         char *enc_f = encode_filename(f);
  476         int has_mtime_fname;
  477 
  478         {
  479             int has_mtime = zhead_has_mtime(orig);
  480             int has_fname = zhead_has_fname(orig);
  481 
  482             if (has_mtime && !has_fname) {
  483                 fprintf(stderr, "can't recompress, stream has mtime but no fname\n");
  484                 return NULL;
  485             }
  486             else if (has_fname && !has_mtime) {
  487                 fprintf(stderr, "can't recompress, stream has fname but no mtime\n");
  488                 return NULL;
  489             }
  490             else {
  491                 has_mtime_fname = has_fname; /* which = has_mtime */
  492             }
  493         }
  494 
  495         /* For each likely set of options, try recompressing the content with
  496          * those options */
  497         for (i = 0; (o = try_opts[i]) != NULL; i++) {
  498             FILE *p;
  499             {   /* Compose command line */
  500                 char cmd[1024];
  501                 snprintf(cmd, sizeof(cmd), "zcat %s | gzip -n %s 2> /dev/null",
  502                         enc_f, o);
  503 
  504                 /* And run it */
  505                 if (verbose)
  506                     fprintf(stderr, "running %s to determine gzip options\n",
  507                             cmd);
  508                 p = popen(cmd, "r");
  509                 if (!p) {
  510                     perror(cmd);
  511                 }
  512             }
  513 
  514             if (p) {   /* Read the recompressed content */
  515                 char samp[SAMPLE];
  516                 if (!read_sample_and_close(p, SAMPLE, samp)) {
  517                     ;       /* Read error - just fail this one and let the loop
  518                              * try another */
  519                 }
  520                 else {
  521                     /* We have the compressed version with these options.
  522                      * Compare with the original */
  523                     const char *a = skip_zhead(orig);
  524                     const char *b = skip_zhead(samp);
  525                     if (!memcmp(a, b, 900))
  526                         break;
  527                 }
  528             }
  529         }
  530         free(enc_f);
  531 
  532         if (!o) {
  533             return NULL;
  534         }
  535         else if (has_mtime_fname) {
  536             return strdup(o);
  537         }
  538         else {  /* Add --no-name to options to return */
  539             static const char noname[] = { "--no-name" };
  540             char* opts = malloc(strlen(o)+strlen(noname)+2);
  541             if (o[0]) {
  542                 strcpy(opts, o);
  543                 strcat(opts, " ");
  544             }
  545             else { opts[0] = 0; }
  546             strcat(opts, noname);
  547             return opts;
  548         }
  549     }
  550 }
  551 
  552 /* len = get_len(stream)
  553  * Returns the length of the file underlying this stream */
  554 off_t get_len(FILE * f) {
  555     struct stat s;
  556 
  557     if (fstat(fileno(f), &s) == -1)
  558         return 0;
  559     return s.st_size;
  560 }
  561 
  562 /****************************************************************************
  563  *
  564  * Main program
  565  */
  566 int main(int argc, char **argv) {
  567     FILE *instream;
  568     char *fname = NULL, *zfname = NULL;
  569     char **url = NULL;
  570     int nurls = 0;
  571     char **Uurl = NULL;
  572     int nUurls = 0;
  573     char *outfname = NULL;
  574     FILE *fout;
  575     char *infname = NULL;
  576     int rsum_len, checksum_len, seq_matches;
  577     int do_compress = 0;
  578     int do_recompress = -1;     // -1 means we decide for ourselves
  579     int do_exact = 0;
  580     char *gzopts = NULL;
  581     time_t mtime = -1;
  582 
  583     /* Open temporary file */
  584     FILE *tf = tmpfile();
  585 
  586     {   /* Options parsing */
  587         int opt;
  588         while ((opt = getopt(argc, argv, "b:Ceo:f:u:U:vVzZ")) != -1) {
  589             switch (opt) {
  590             case 'e':
  591                 do_exact = 1;
  592                 break;
  593             case 'C':
  594                 do_recompress = 0;
  595                 break;
  596             case 'o':
  597                 if (outfname) {
  598                     fprintf(stderr, "specify -o only once\n");
  599                     exit(2);
  600                 }
  601                 outfname = strdup(optarg);
  602                 break;
  603             case 'f':
  604                 if (fname) {
  605                     fprintf(stderr, "specify -f only once\n");
  606                     exit(2);
  607                 }
  608                 fname = strdup(optarg);
  609                 break;
  610             case 'b':
  611                 blocksize = atoi(optarg);
  612                 if ((blocksize & (blocksize - 1)) != 0) {
  613                     fprintf(stderr,
  614                             "blocksize must be a power of 2 (512, 1024, 2048, ...)\n");
  615                     exit(2);
  616                 }
  617                 break;
  618             case 'u':
  619                 url = realloc(url, (nurls + 1) * sizeof *url);
  620                 url[nurls++] = optarg;
  621                 break;
  622             case 'U':
  623                 Uurl = realloc(Uurl, (nUurls + 1) * sizeof *Uurl);
  624                 Uurl[nUurls++] = optarg;
  625                 break;
  626             case 'v':
  627                 verbose++;
  628                 break;
  629             case 'V':
  630                 printf(PACKAGE " v" VERSION " (zsyncmake compiled " __DATE__ " "
  631                        __TIME__ ")\n" "By Colin Phipps <cph@moria.org.uk>\n"
  632                        "Published under the Artistic License v2, see the COPYING file for details.\n");
  633                 exit(0);
  634             case 'z':
  635                 do_compress = 1;
  636                 break;
  637             case 'Z':
  638                 no_look_inside = 1;
  639                 break;
  640             }
  641         }
  642 
  643         /* Open data to create .zsync for - either it's a supplied filename, or stdin */
  644         if (optind == argc - 1) {
  645             infname = strdup(argv[optind]);
  646             instream = fopen(infname, "rb");
  647             if (!instream) {
  648                 perror("open");
  649                 exit(2);
  650             }
  651 
  652             {   /* Get mtime if available */
  653                 struct stat st;
  654                 if (fstat(fileno(instream), &st) == 0) {
  655                     mtime = st.st_mtime;
  656                 }
  657             }
  658 
  659             /* Use supplied filename as the target filename */
  660             if (!fname)
  661                 fname = basename(argv[optind]);
  662         }
  663         else {
  664             instream = stdin;
  665         }
  666     }
  667 
  668     /* If not user-specified, choose a blocksize based on size of the input file */
  669     if (!blocksize) {
  670         blocksize = (get_len(instream) < 100000000) ? 2048 : 4096;
  671     }
  672 
  673     /* If we've been asked to compress this file, do so and substitute the
  674      * compressed version for the original */
  675     if (do_compress) {
  676         char *newfname = NULL;
  677 
  678         {   /* Try adding .gz to the input filename */
  679             char *tryfname = infname;
  680             if (!tryfname) {
  681                 tryfname = fname;
  682             }
  683             if (tryfname) {
  684                 newfname = malloc(strlen(tryfname) + 4);
  685                 if (!newfname)
  686                     exit(1);
  687                 strcpy(newfname, tryfname);
  688                 strcat(newfname, ".gz");
  689             }
  690         }
  691 
  692         /* If we still don't know what to call it, default name */
  693         if (!newfname) {
  694             newfname = strdup("zsync-target.gz");
  695             if (!newfname)
  696                 exit(1);
  697         }
  698 
  699         /* Create optimal compressed version */
  700         instream = optimal_gzip(instream, newfname, blocksize);
  701         if (!instream) {
  702             fprintf(stderr, "failed to compress\n");
  703             exit(-1);
  704         }
  705 
  706         /* This replaces the original input stream for creating the .zsync */
  707         if (infname) {
  708             free(infname);
  709             infname = newfname;
  710         }
  711         else
  712             free(newfname);
  713     }
  714 
  715     /* Read the input file and construct the checksum of the whole file, and
  716      * the per-block checksums */
  717     SHA1Init(&shactx);
  718     read_stream_write_blocksums(instream, tf);
  719 
  720     {   /* Decide how long a rsum hash and checksum hash per block we need for this file */
  721         seq_matches = len > blocksize ? 2 : 1;
  722         rsum_len = ceil(((log(len) + log(blocksize)) / log(2) - 8.6) / seq_matches / 8);
  723 
  724         /* min and max lengths of rsums to store */
  725         if (rsum_len > 4) rsum_len = 4;
  726         if (rsum_len < 2) rsum_len = 2;
  727 
  728         /* Now the checksum length; min of two calculations */
  729         checksum_len = ceil(
  730                 (20 + (log(len) + log(1 + len / blocksize)) / log(2))
  731                 / seq_matches / 8);
  732         {
  733             int checksum_len2 =
  734                 (7.9 + (20 + log(1 + len / blocksize) / log(2))) / 8;
  735             if (checksum_len < checksum_len2)
  736                 checksum_len = checksum_len2;
  737         }
  738     }
  739 
  740     /* Recompression:
  741      * Where we were given a compressed file (not an uncompressed file that we
  742      * then compressed), but we nonetheless looked inside and made a .zsync for
  743      * the uncompressed data, the user may want to actually have the client
  744      * have the compressed version once the whole operation is done. 
  745      * If so, if possible we want the compressed version that the client gets
  746      * to exactly match the original; but as the client will have to compress
  747      * it after completion of zsyncing, it might not be possible to achieve
  748      * that.
  749      * So a load of code here to work out whether (the client should)
  750      * recompress, what options it should use to do so, and to inform the
  751      * creator of the zsync if we don't think the recompression will work. 
  752      */
  753 
  754     /* The only danger of the client not getting the original file is if we have compressed;
  755      * in that case we want to recompress iff the compressed version was supplied
  756      * (i.e. we weren't told to generate it ourselves with -z). */
  757     if (do_exact) {
  758         int old_do_recompress = do_recompress;
  759         do_recompress = (zmapentries && !do_compress) ? 2 : 0;
  760         if (old_do_recompress != -1 && (!old_do_recompress) != (!do_recompress)) {
  761             fprintf(stderr,
  762                     "conflicting request for compression and exactness\n");
  763             exit(2);
  764         }
  765     }
  766 
  767     /* We recompress if we were told to, OR if
  768      *  we were left to make our own decision about recompression
  769      *  the original was compressed & the zsync is of the uncompressed (i.e. there is a zmap)
  770      *  AND this compressed original isn't one we made ourselves just for transmission
  771      */
  772     if ((do_recompress > 0)
  773         || (do_recompress == -1 && zmapentries && !do_compress))
  774         gzopts = guess_gzip_options(infname);
  775     /* We now know whether to recompress - if the above and guess_gzip_options worked */
  776     if (do_recompress == -1)
  777         do_recompress = (gzopts != NULL) ? 1 : 0;
  778     if (do_recompress > 1 && gzopts == NULL) {
  779         fprintf(stderr, "recompression required, but %s\n",
  780                 zmap ?
  781                 "could not determine gzip options to reproduce this archive" :
  782                 "we are not looking into a compressed stream");
  783         exit(2);
  784     }
  785 
  786     /* Work out filename for the .zsync */
  787     if (fname && zmapentries) {
  788         /* Remove any trailing .gz, as it is the uncompressed file being transferred */
  789         char *p = strrchr(fname, '.');
  790         if (p) {
  791             zfname = strdup(fname);
  792             if (!strcmp(p, ".gz"))
  793                 *p = 0;
  794             if (!strcmp(p, ".tgz"))
  795                 strcpy(p, ".tar");
  796         }
  797     }
  798     if (!outfname && fname) {
  799         outfname = malloc(strlen(fname) + 10);
  800         sprintf(outfname, "%s.zsync", fname);
  801     }
  802 
  803     /* Open output file */
  804     if (outfname) {
  805         fout = fopen(outfname, "wb");
  806         if (!fout) {
  807             perror("open");
  808             exit(2);
  809         }
  810         free(outfname);
  811     }
  812     else {
  813         fout = stdout;
  814     }
  815 
  816     /* Okay, start writing the zsync file */
  817     fprintf(fout, "zsync: " VERSION "\n");
  818 
  819     /* Lines we might include but which older clients can ignore */
  820     if (do_recompress) {
  821         if (zfname)
  822             fprintf(fout, "Safe: Z-Filename Recompress MTime\nZ-Filename: %s\n",
  823                     zfname);
  824         else
  825             fprintf(fout, "Safe: Recompress MTime:\n");
  826     }
  827 
  828     if (fname) {
  829         fprintf(fout, "Filename: %s\n", fname);
  830         if (mtime != -1) {
  831             char buf[32];
  832             struct tm mtime_tm;
  833 
  834             if (gmtime_r(&mtime, &mtime_tm) != NULL) {
  835                 if (strftime(buf, sizeof buf, "%a, %d %b %Y %H:%M:%S %z", &mtime_tm) > 0)
  836                     fprintf(fout, "MTime: %s\n", buf);
  837             }
  838             else {
  839                 fprintf(stderr, "error converting %d to struct tm\n", mtime);
  840             }
  841         }
  842     }
  843     fprintf(fout, "Blocksize: " SIZE_T_PF "\n", blocksize);
  844     fprintf(fout, "Length: " OFF_T_PF "\n", len);
  845     fprintf(fout, "Hash-Lengths: %d,%d,%d\n", seq_matches, rsum_len,
  846             checksum_len);
  847     {                           /* Write URLs */
  848         int i;
  849         for (i = 0; i < nurls; i++)
  850             fprintf(fout, "%s: %s\n", zmapentries ? "Z-URL" : "URL", url[i]);
  851         for (i = 0; i < nUurls; i++)
  852             fprintf(fout, "URL: %s\n", Uurl[i]);
  853     }
  854     if (nurls == 0 && infname) {
  855         /* Assume that we are in the public dir, and use relative paths.
  856          * Look for an uncompressed version and add a URL for that to if appropriate. */
  857         fprintf(fout, "%s: %s\n", zmapentries ? "Z-URL" : "URL", infname);
  858         if (zmapentries && fname && !access(fname, R_OK)) {
  859             fprintf(fout, "URL: %s\n", fname);
  860         }
  861         fprintf(stderr,
  862                 "No URL given, so I am including a relative URL in the .zsync file - you must keep the file being served and the .zsync in the same public directory. Use -u %s to get this same result without this warning.\n",
  863                 infname);
  864     }
  865 
  866     {   /* Write out SHA1 checksum of the entire file */
  867         unsigned char digest[SHA1_DIGEST_LENGTH];
  868         unsigned int i;
  869 
  870         fputs("SHA-1: ", fout);
  871 
  872         SHA1Final(digest, &shactx);
  873 
  874         for (i = 0; i < sizeof digest; i++)
  875             fprintf(fout, "%02x", digest[i]);
  876         fputc('\n', fout);
  877     }
  878 
  879     if (do_recompress)      /* Write Recompress header if wanted */
  880         fprintf(fout, "Recompress: %s %s\n", zhead, gzopts);
  881     if (gzopts)
  882         free(gzopts);
  883 
  884     /* If we have a zmap, write it, header first and then the map itself */
  885     if (zmapentries) {
  886         fprintf(fout, "Z-Map2: %d\n", zmapentries);
  887         fcopy(zmap, fout);
  888         fclose(zmap);
  889     }
  890 
  891     /* End of headers */
  892     fputc('\n', fout);
  893 
  894     /* Now copy the actual block hashes to the .zsync */
  895     rewind(tf);
  896     fcopy_hashes(tf, fout, rsum_len, checksum_len);
  897 
  898     /* And cleanup */
  899     fclose(tf);
  900     fclose(fout);
  901 
  902     return 0;
  903 }