"Fossies" - the Fresh Open Source Software Archive

Member "zsync-0.6.2/http.c" (19 Sep 2010, 40092 Bytes) of package /linux/privat/old/zsync-0.6.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "http.c" see the Fossies "Dox" file reference documentation.

A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.


    1 
    2 /*
    3  *   zsync - client side rsync over http
    4  *   Copyright (C) 2004,2005,2007,2009 Colin Phipps <cph@moria.org.uk>
    5  *
    6  *   This program is free software; you can redistribute it and/or modify
    7  *   it under the terms of the Artistic License v2 (see the accompanying 
    8  *   file COPYING for the full license terms), or, at your option, any later 
    9  *   version of the same license.
   10  *
   11  *   This program is distributed in the hope that it will be useful,
   12  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
   13  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   14  *   COPYING file for details.
   15  */
   16 
   17 /* HTTP client code for zsync.
   18  * Including pipeline HTTP Range fetching code.  */
   19 
   20 #include "zsglobal.h"
   21 
   22 #include <stdio.h>
   23 #include <stdlib.h>
   24 #include <string.h>
   25 #include <errno.h>
   26 #include <unistd.h>
   27 
   28 #include <sys/types.h>
   29 #include <sys/stat.h>
   30 #include <sys/socket.h>
   31 #include <netdb.h>
   32 #include <time.h>
   33 
   34 #ifndef HAVE_GETADDRINFO
   35 #include "getaddrinfo.h"
   36 #endif
   37 
   38 #ifdef WITH_DMALLOC
   39 # include <dmalloc.h>
   40 #endif
   41 
   42 #include "http.h"
   43 #include "url.h"
   44 #include "progress.h"
   45 #include "format_string.h"
   46 
   47 /* socket = connect_to(host, service/port)
   48  * Establishes a TCP connection to the named host and port (which can be
   49  * supplied as a service name from /etc/services. Returns the socket handle, or
   50  * -1 on error. */
   51 int connect_to(const char *node, const char *service) {
   52     struct addrinfo hint;
   53     struct addrinfo *ai;
   54     int rc;
   55 
   56     memset(&hint, 0, sizeof hint);
   57     hint.ai_family = AF_UNSPEC;
   58     hint.ai_socktype = SOCK_STREAM;
   59 
   60     if ((rc = getaddrinfo(node, service, &hint, &ai)) != 0) {
   61         perror(node);
   62         return -1;
   63     }
   64     else {
   65         struct addrinfo *p;
   66         int sd = -1;
   67 
   68         for (p = ai; sd == -1 && p != NULL; p = p->ai_next) {
   69             if ((sd =
   70                  socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) {
   71                 perror("socket");
   72             }
   73             else if (connect(sd, p->ai_addr, p->ai_addrlen) < 0) {
   74                 perror(node);
   75                 close(sd);
   76                 sd = -1;
   77             }
   78         }
   79         freeaddrinfo(ai);
   80         return sd;
   81     }
   82 }
   83 
   84 /* fh = http_get_stream(filedesc, &status_code)
   85  * Converts a socket into a stream, and reads the first line from it as an HTTP
   86  * status line (response to a request that the caller should have already sent)
   87  * and returns the stream, and the status code to the location specified by the
   88  * second parameter. 
   89  */
   90 FILE *http_get_stream(int fd, int *code) {
   91     FILE *f = fdopen(fd, "r");
   92     char buf[256];
   93     char *p;
   94 
   95     if (fgets(buf, sizeof(buf), f) == NULL || memcmp(buf, "HTTP/1", 6) != 0
   96         || (p = strchr(buf, ' ')) == NULL) {
   97         *code = 0;
   98         fclose(f);
   99         return NULL;
  100     }
  101 
  102     *code = atoi(++p);
  103 
  104     return f;
  105 }
  106 
  107 /* url = get_location_url(stream, current_url)
  108  * Reads the HTTP response from the given stream and extracts the Location
  109  * header, making this URL absolute using the current URL. Returned as a
  110  * malloced string.
  111  * (it ought to be absolute anyway, by the RFC, but many servers send 
  112  * relative URIs). */
  113 char *get_location_url(FILE * f, const char *cur_url) {
  114     char buf[1024];
  115 
  116     while (fgets(buf, sizeof(buf), f)) {
  117         char *p;
  118 
  119         /* exit if end of headers */
  120         if (buf[0] == '\r' || buf[0] == '\n')
  121             return NULL;
  122 
  123         /* Look for Location header */
  124         p = strchr(buf, ':');
  125         if (!p)
  126             return NULL;
  127         *p++ = 0;
  128         if (strcasecmp(buf, "Location"))
  129             continue;
  130 
  131         /* Skip leading whitespace */
  132         while (*p == ' ')
  133             p++;
  134 
  135         {   /* Remove trailing whitespace */
  136             char *q = p;
  137             while (*q != '\r' && *q != '\n' && *q != ' ' && *q)
  138                 q++;
  139             *q = 0;
  140         }
  141         if (!*p)
  142             return NULL;
  143 
  144         /* Return URL after making absolute */
  145         return make_url_absolute(cur_url, p);
  146     }
  147     return NULL;                // TODO
  148 }
  149 
  150 /* Settings for HTTP connections - proxy host and port, auth details */
  151 static char *proxy;
  152 static char *pport;
  153 static char **auth_details; /* This is a realloced array with 3*num_auth_details entries */
  154 static int num_auth_details; /* The groups of 3 strings are host, user, pass */
  155 
  156 /* Remember referrer */
  157 char *referer;
  158 
  159 /* set_proxy_from_string(str)
  160  * Sets the proxy settings for HTTP connections to use; these can be either as
  161  * a host[:port] or as http://host[:port].
  162  * Returns non-zero if the settings were obtained successfully. */
  163 int set_proxy_from_string(const char *s) {
  164     if (!memcmp(s, http_scheme, strlen(http_scheme))) {
  165         /* http:// style proxy string */
  166         proxy = malloc(256);
  167         if (!proxy)
  168             return 0;
  169         if (!get_http_host_port(s, proxy, 256, &pport))
  170             return 0;
  171         if (!pport) {
  172             pport = strdup("webcache");
  173         }
  174         return 1;
  175     }
  176     else {
  177         /* host:port style proxy string; have to parse this ourselves */
  178         char *p;
  179         proxy = strdup(s);
  180         p = strchr(proxy, ':');
  181         if (!p) {
  182             pport = strdup("webcache");
  183             return 1;
  184         }
  185         *p++ = 0;
  186         pport = strdup(p);
  187         return 1;
  188     }
  189 }
  190 
  191 /* add_auth(host, user, pass)
  192  * Specify user & password combination to use connecting to the given host.
  193  */
  194 void add_auth(char *host, char *user, char *pass) {
  195     auth_details =
  196         realloc(auth_details, (num_auth_details + 1) * sizeof *auth_details);
  197     auth_details[num_auth_details * 3] = host;
  198     auth_details[num_auth_details * 3 + 1] = user;
  199     auth_details[num_auth_details * 3 + 2] = pass;
  200     num_auth_details++;
  201 }
  202 
  203 /* str = get_auth_hdr(host)
  204  * For the given host, returns the extra HTTP header(s) that should be included
  205  * to provide authentication information. Returned as a malloced string.
  206  */
  207 const char auth_header_tmpl[] = { "Authorization: Basic %s\r\n" };
  208 
  209 static char *get_auth_hdr(const char *hn) {
  210     /* Find any relevant entry in the auth table */
  211     int i;
  212     for (i = 0; i < num_auth_details * 3; i += 3) {
  213         if (!strcasecmp(auth_details[i], hn)) {
  214             char *b;
  215             char *header;
  216 
  217             /* We have found an entry in the auth details table for this
  218              * hostname; get the user & pass to use */
  219             char *u = auth_details[i + 1];
  220             char *p = auth_details[i + 2];
  221 
  222             /* Store unencoded user:pass */
  223             size_t l = strlen(u) + strlen(p) + 2;
  224             char *w = malloc(l);
  225             snprintf(w, l, "%s:%s", u, p);
  226 
  227             /* Now base64-encode that, and compose the header */
  228             b = base64(w);
  229             l = strlen(b) + strlen(auth_header_tmpl) + 1;
  230             header = malloc(l);
  231             snprintf(header, l, auth_header_tmpl, b);
  232 
  233             /* And clean up */
  234             free(w);
  235             free(b);
  236             return header;
  237         }
  238     }
  239     return NULL;
  240 }
  241 
  242 /* http_date_string(time, buf, buflen)
  243  * Stores a valid ASCII representation of the supplied datetime in the supplied
  244  * buffer (length given as buflen). Returns non-NULL if successful.
  245  */
  246 static char *http_date_string(time_t t, char *const buf, const int blen) {
  247     struct tm d;
  248 
  249     if (gmtime_r(&t, &d) != NULL) {
  250         if (strftime(buf, blen, "%a, %d %h %Y %T GMT", &d) > 0) {
  251             return buf;
  252         }
  253     }
  254     return NULL;
  255 }
  256 
  257 FILE *http_get(const char *orig_url, char **track_referer, const char *tfname) {
  258     int allow_redirects = 5;
  259     char *url;
  260     FILE *f = NULL;
  261     FILE *g = NULL;
  262     char *fname = NULL;
  263     char ifrange[200] = { "" };
  264     char *authhdr = NULL;
  265     int code;
  266 
  267     /* If we have a (possibly older or incomplete) copy of this file already,
  268      * add a suitable headers to only retrieve new/additional content */
  269     if (tfname) {
  270         struct stat st;
  271 
  272         /* Construct the name of the incomplete transfer file that would have
  273          * been used by a previous transfer */
  274         fname = malloc(strlen(tfname) + 6);
  275         strcpy(fname, tfname);
  276         strcat(fname, ".part");
  277 
  278         /* If we have an incomplete previous transfer, then our complete copy
  279          * must be older but the incomplete copy may be current still and we
  280          * could continue from that. */
  281         if (stat(fname, &st) == 0) {
  282             char buf[50];
  283             if (http_date_string(st.st_mtime, buf, sizeof(buf)) != NULL)
  284                 snprintf(ifrange, sizeof(ifrange),
  285                          "If-Unmodified-Since: %s\r\nRange: bytes=" OFF_T_PF
  286                          "-\r\n", buf, st.st_size);
  287         }
  288         else if (errno == ENOENT && stat(tfname, &st) == 0) {
  289             /* Else, if we have a complete possibly-old version, so only transfer
  290              * if the remote has newer. */
  291             char buf[50];
  292             if (http_date_string(st.st_mtime, buf, sizeof(buf)) != NULL)
  293                 snprintf(ifrange, sizeof(ifrange), "If-Modified-Since: %s\r\n",
  294                          buf);
  295         }
  296     }
  297 
  298     /* Take a malloced copy of the URL, so we treat it the same as strduped
  299      * URLs for any redirects followed. */
  300     url = strdup(orig_url);
  301     if (!url) {
  302         free(fname);
  303         return NULL;
  304     }
  305 
  306     /* Loop for redirect handling */
  307     for (; allow_redirects-- && url && !f;) {
  308         char hostn[256];
  309         const char *connecthost;
  310         char *connectport;
  311         char *p;
  312         char *port;
  313 
  314         /* Extract host and port to connect to */
  315         if ((p = get_http_host_port(url, hostn, sizeof(hostn), &port)) == NULL)
  316             break;
  317         if (!proxy) {
  318             connecthost = hostn;
  319             connectport = strdup(port);
  320         }
  321         else {
  322             connecthost = proxy;
  323             connectport = strdup(pport);
  324         }
  325 
  326         {   /* Connect */
  327             int sfd = connect_to(connecthost, connectport);
  328             free(connectport);
  329             if (sfd == -1)
  330                 break;
  331 
  332             {   /* Compose request */
  333                 char buf[1024];
  334                 snprintf(buf, sizeof(buf),
  335                          "GET %s HTTP/1.0\r\nHost: %s%s%s\r\nUser-Agent: zsync/%s\r\n%s%s\r\n",
  336                          proxy ? url : p, hostn, !strcmp(port,
  337                                                          "http") ? "" : ":",
  338                          !strcmp(port, "http") ? "" : port, VERSION,
  339                          ifrange[0] ? ifrange : "", authhdr ? authhdr : "");
  340 
  341                 /* Send request to remote */
  342                 if (send(sfd, buf, strlen(buf), 0) == -1) {
  343                     perror("sendmsg");
  344                     close(sfd);
  345                     break;
  346                 }
  347             }
  348 
  349             /* Wrap the socket in a stream for convenient line reading of the
  350              * response. */
  351             f = http_get_stream(sfd, &code);
  352             if (!f)
  353                 break;
  354 
  355             /* Redirect - go around again with new URL. */
  356             if (code == 301 || code == 302 || code == 307) {
  357                 char *oldurl = url;
  358                 url = get_location_url(f, oldurl);
  359                 free(oldurl);
  360                 fclose(f);
  361                 f = NULL;
  362             }
  363             else if (code == 401) {   /* Authorization required */
  364                 authhdr = get_auth_hdr(hostn);
  365                 if (authhdr) { /* Go around again with auth header */
  366                     fclose(f);
  367                     f = NULL;
  368                 }
  369                 else { /* No auth details available for this host - error out */
  370                     fclose(f);
  371                     f = NULL;
  372                     break;
  373                 }
  374             }
  375             else if (code == 412) {     // Precondition (i.e. if-unmodified-since) failed
  376                 ifrange[0] = 0;
  377                 fclose(f);
  378                 f = NULL;       // and go round again without the conditional Range:
  379             }
  380             else if (code == 200) {     // Downloading whole file
  381                 /* Write new file (plus allow reading once we finish) */
  382                 g = fname ? fopen(fname, "w+") : tmpfile();
  383             }
  384             else if (code == 206 && fname) {    // Had partial content and server confirms not modified
  385                 /* Append to existing on-disk content (plus allow reading once we finish) */
  386                 g = fopen(fname, "a+");
  387             }
  388             else if (code == 304) {     // Unchanged (if-modified-since was false)
  389                 /* No fetching, just reuse on-disk file */
  390                 g = fopen(tfname, "r");
  391             }
  392             else {                      /* Don't know - error */
  393                 fclose(f);
  394                 f = NULL;
  395                 break;
  396             }
  397         }
  398     }
  399 
  400     /* Store the referrer - we'll supply this when retrieving any content
  401      * referrer to by this file retrieved. */
  402     if (track_referer)
  403         *track_referer = url;
  404     else
  405         free(url);
  406 
  407     /* If we got a 304 Not Modified, return the existing content as-is */
  408     if (code == 304) {
  409         fclose(f);
  410         free(fname);
  411         return g;
  412     }
  413 
  414     /* Return errors from the above loop */
  415     if (!f) {
  416         fprintf(stderr, "failed on url %s\n", url ? url : "(missing redirect)");
  417         return NULL;
  418     }
  419 
  420     /* If our open of the output file failed, flag that error */
  421     if (!g) {
  422         fclose(f);
  423         perror("fopen");
  424         return NULL;
  425     }
  426 
  427     {   /* Read data returned by the request above, writing to the output file */
  428         size_t len = 0;
  429         {   /* Skip headers. TODO support content-encodings, Content-Location etc */
  430             char buf[512];
  431             do {
  432                 if (fgets(buf, sizeof(buf), f) == NULL) {
  433                     perror("read");
  434                     exit(1);
  435                 }
  436 
  437                 sscanf(buf, "Content-Length: " SIZE_T_PF, &len);
  438 
  439             } while (buf[0] != '\r' && !feof(f));
  440         }
  441 
  442         {   /* Now the actual content. Show progress as we go. */
  443             size_t got = 0;
  444             struct progress p = { 0, 0, 0, 0 };
  445             size_t r;
  446             if (!no_progress)
  447                 do_progress(&p, 0, got);
  448 
  449             while (!feof(f)) {
  450                 /* Read from the network */
  451                 char buf[1024];
  452                 r = fread(buf, 1, sizeof(buf), f);
  453                 if (r == 0 && ferror(f)) {
  454                     perror("read");
  455                     break;
  456                 }
  457 
  458                 /* And write anything received to the temp file */
  459                 if (r > 0) {
  460                     if (r > fwrite(buf, 1, r, g)) {
  461                         fprintf(stderr, "short write on %s\n", fname);
  462                         break;
  463                     }
  464 
  465                     /* And maintain progress indication */
  466                     got += r;
  467                     if (!no_progress)
  468                         do_progress(&p, len ? (100.0 * got / len) : 0, got);
  469                 }
  470             }
  471             if (!no_progress)
  472                 end_progress(&p, feof(f) ? 2 : 0);
  473         }
  474         fclose(f);
  475     }
  476 
  477     /* The caller wants the content we just downloaded; return the handle to
  478      * the start of the file that we have just written. */
  479     rewind(g);
  480 
  481     /* If we are keeping the download too, move it to the desired name. */
  482     if (fname) {
  483         rename(fname, tfname);
  484         free(fname);
  485     }
  486 
  487     return g;
  488 }
  489 
  490 /****************************************************************************
  491  *
  492  * HTTP Range: / 206 response interface 
  493  * 
  494  * The state engine here is:
  495  * If sd == -1, not connected;
  496  * else, if block_left is 0
  497  *     if boundary is unset, we're reading HTTP headers
  498  *     if boundary is set, we're reading a MIME boundary
  499  * else we're reading a block of actual data; block_left bytes still to read.
  500  */
  501 
  502 struct range_fetch {
  503     /* URL to retrieve from, host:port, auth header */
  504     char *url;
  505     char hosth[256];
  506     char *authh;
  507 
  508     /* Host and port to connect to (could be the same as the URL, or proxy) */
  509     char *chost;
  510     char *cport;
  511 
  512     int sd;         /* Currently open socket to the server, or -1 */
  513     char *boundary; /* If we're in the middle of reading a mime/multipart
  514                      * response, this is the boundary string. */
  515 
  516     /* State for block currently being read */
  517     size_t block_left;  /* non-zero if we're in the middle of reading a block */
  518     off_t offset;       /* and this is the offset of the start of the block we are reading */
  519 
  520     /* Buffering of data from the remote server */
  521     char buf[4096];
  522     int buf_start, buf_end; /* Bytes buf_start .. buf_end-1 in buf[] are valid */
  523 
  524     /* Keep count of total bytes retrieved */
  525     off_t bytes_down;
  526 
  527     int server_close; /* 0: can send more, 1: cannot send more (but one set of headers still to read), 2: cannot send more and all existing headers read */
  528 
  529     /* Byte ranges to fetch */
  530     off_t *ranges_todo; /* Contains 2*nranges ranges, consisting of start and stop offset */
  531     int nranges;
  532     int rangessent;     /* We've requested the first rangessent ranges from the remote */
  533     int rangesdone;     /* and received this many */
  534 };
  535 
  536 /* range_fetch methods */
  537 
  538 /* range_fetch_set_url(rf, url)
  539  * Set up a range_fetch to fetch from a given URL. Private method. 
  540  * C is a nightmare for memory allocation here. At least the errors should be
  541  * caught, but minor memory leaks may occur on some error paths. */
  542 static int range_fetch_set_url(struct range_fetch* rf, const char* orig_url) {
  543     /* Get the host, port and path from the URL. */
  544     char hostn[sizeof(rf->hosth)];
  545     char* cport;
  546     char* p = get_http_host_port(orig_url, hostn, sizeof(hostn), &cport);
  547     if (!p) {
  548         return 0;
  549     }
  550 
  551     free(rf->url);
  552     if (rf->authh) free(rf->authh);
  553 
  554     /* Get host:port for Host: header */
  555     if (strcmp(cport, "http") != 0)
  556         snprintf(rf->hosth, sizeof(rf->hosth), "%s:%s", hostn, cport);
  557     else
  558         snprintf(rf->hosth, sizeof(rf->hosth), "%s", hostn);
  559 
  560     if (proxy) {
  561         /* URL must be absolute; don't need cport anymore, just need full URL
  562          * to give to proxy. */
  563         free(cport);
  564         rf->url = strdup(orig_url);
  565     }
  566     else {
  567         free(rf->cport);
  568         free(rf->chost);
  569         // Set url to relative part and chost, cport to the target
  570         if ((rf->chost = strdup(hostn)) == NULL) {
  571             free(cport);
  572             return 0;
  573         }
  574         rf->cport = cport;
  575         rf->url = strdup(p);
  576     }
  577 
  578     /* Get any auth header that we should use */
  579     rf->authh = get_auth_hdr(hostn);
  580 
  581     return !!rf->url;
  582 }
  583 
  584 /* get_more_data - this is the method which owns all reads from the remote.
  585  * Nothing else reads from the remote. This buffers data, so that the
  586  * higher-level methods below can easily read whole lines from the remote. 
  587  * The higher-level methods call this function when they need more data: 
  588  * it refills the buffer with data from the network. Returns the bytes read. */
  589 static int get_more_data(struct range_fetch *rf) {
  590     /* First, garbage collect - move the 'live' data in the buffer to the start
  591      * of the buffer. */
  592     if (rf->buf_start) {
  593         memmove(rf->buf, &(rf->buf[rf->buf_start]),
  594                 rf->buf_end - rf->buf_start);
  595         rf->buf_end -= rf->buf_start;
  596         rf->buf_start = 0;
  597     }
  598 
  599     {   /* Read as much as the OS wants to give us, up to a limit of filling
  600          * the rest of the buffer; ignore EINTR. */
  601         int n;
  602         do {
  603             n = read(rf->sd, &(rf->buf[rf->buf_end]),
  604                      sizeof(rf->buf) - rf->buf_end);
  605         } while (n == -1 && errno == EINTR);
  606         if (n < 0) {
  607             perror("read");
  608         }
  609         else {
  610 
  611             /* Add new bytes to buffer, and update total bytes count */
  612             rf->buf_end += n;
  613             rf->bytes_down += n;
  614         }
  615         return n;
  616     }
  617 }
  618 
  619 /* rfgets - get next line from the remote (terminated by LF or end-of-file)
  620  * (using the buffer, fetching more data if there's no full line in the buffer
  621  * yet) */
  622 static char *rfgets(char *buf, size_t len, struct range_fetch *rf) {
  623     char *p;
  624     while (1) {
  625         /* Look for a line end in the in buffer */
  626         p = memchr(rf->buf + rf->buf_start, '\n', rf->buf_end - rf->buf_start);
  627 
  628         /* If we don't have the end of the line yet, fetch more data into the
  629          * buffer (and go around again) */
  630         if (!p) {
  631             int n = get_more_data(rf);
  632             if (n <= 0) {
  633                 /* EOF - just return all that we have left */
  634                 p = &(rf->buf[rf->buf_end]);
  635             }
  636         }
  637         else    /* We have a \n; set p to point just past it */
  638             p++;
  639 
  640         if (p) {
  641             register char *bufstart = &(rf->buf[rf->buf_start]);
  642 
  643             /* Work out how much data to return - the line, or at most 'len' bytes */
  644             len--;              /* leave space for trailing \0 */
  645             if (len > (size_t) (p - bufstart))
  646                 len = p - bufstart;
  647 
  648             /* Copy from input buffer to return buffer, nul terminate, and advance
  649              * current position in the input buffer */
  650             memcpy(buf, bufstart, len);
  651             buf[len] = 0;
  652             rf->buf_start += len;
  653             return buf;
  654         }
  655     }
  656 }
  657 
  658 /* range_fetch_start(origin_url)
  659  * Returns a new range fetch object, for the given URL.
  660  */
  661 struct range_fetch *range_fetch_start(const char *orig_url) {
  662     struct range_fetch *rf = malloc(sizeof(struct range_fetch));
  663     if (!rf)
  664         return NULL;
  665 
  666     /* If going through a proxy, we can immediately set up the host and port to
  667      * connect to */
  668     if (proxy) {
  669         rf->cport = strdup(pport);
  670         rf->chost = strdup(proxy);
  671     }
  672     else {
  673         rf->cport = NULL;
  674         rf->chost = NULL;
  675     }
  676     /* Blank initialisation for other fields before set_url call */
  677     rf->url = NULL;
  678     rf->authh = NULL;
  679 
  680     if (!range_fetch_set_url(rf, orig_url)) {
  681         free(rf->cport);
  682         free(rf->chost);
  683         free(rf);
  684         return NULL;
  685     }
  686 
  687     /* Initialise other state fields */
  688     rf->block_left = 0;
  689     rf->bytes_down = 0;
  690     rf->boundary = NULL;
  691     rf->sd = -1;                        /* Socket not open */
  692     rf->ranges_todo = NULL;             /* And no ranges given yet */
  693     rf->nranges = rf->rangesdone = 0;
  694 
  695     return rf;
  696 }
  697 
  698 /* range_fetch_addranges(self, off_t[], nranges)
  699  * Adds ranges to fetch, supplied as an array of 2*nranges offsets (start and
  700  * stop for each range) */
  701 void range_fetch_addranges(struct range_fetch *rf, off_t * ranges, int nranges) {
  702     int existing_ranges = rf->nranges - rf->rangesdone;
  703 
  704     /* Allocate new memory, enough for valid existing entries and new entries */
  705     off_t *nr = malloc(2 * sizeof(*ranges) * (nranges + existing_ranges));
  706     if (!nr)
  707         return;
  708 
  709     /* Copy only still-valid entries from the existing queue over */
  710     memcpy(nr, &(rf->ranges_todo[2 * rf->rangesdone]),
  711            2 * sizeof(*ranges) * existing_ranges);
  712 
  713     /* And replace existing queue with new one */
  714     free(rf->ranges_todo);
  715     rf->ranges_todo = nr;
  716     rf->rangessent -= rf->rangesdone;
  717     rf->rangesdone = 0;
  718     rf->nranges = existing_ranges;
  719 
  720     /* And append the new stuff */
  721     memcpy(&nr[2 * existing_ranges], ranges, 2 * sizeof(*ranges) * nranges);
  722     rf->nranges += nranges;
  723 }
  724 
  725 /* range_fetch_connect
  726  * Connect this rf to its remote server */
  727 static void range_fetch_connect(struct range_fetch *rf) {
  728     rf->sd = connect_to(rf->chost, rf->cport);
  729     rf->server_close = 0;
  730     rf->rangessent = rf->rangesdone;
  731     rf->buf_start = rf->buf_end = 0;    /* Buffer initially empty */
  732 }
  733 
  734 /* range_fetch_getmore
  735  * On a connected range fetch, send another request to the remote */
  736 static void range_fetch_getmore(struct range_fetch *rf) {
  737     char request[2048];
  738     int l;
  739     int max_range_per_request = 20;
  740 
  741     /* Only if there's stuff queued to get */
  742     if (rf->rangessent == rf->nranges)
  743         return;
  744 
  745     /* Build the base request, everything up to the Range: bytes= */
  746     snprintf(request, sizeof(request),
  747              "GET %s HTTP/1.1\r\n"
  748              "User-Agent: zsync/" VERSION "\r\n"
  749              "Host: %s"
  750              "%s%s\r\n"
  751              "%s"
  752              "Range: bytes=",
  753              rf->url, rf->hosth,
  754              referer ? "\r\nReferer: " : "", referer ? referer : "",
  755              rf->authh ? rf->authh : "");
  756 
  757     /* The for loop here is just a sanity check, lastrange is the real loop control */
  758     for (; rf->rangessent < rf->nranges;) {
  759         int i = rf->rangessent;
  760         int lastrange = 0;
  761 
  762         /* Add at least one byterange to the request; but is this the last one? 
  763          * That's decided based on whether there are any more to add, whether
  764          * we've reached our self-imposed limit per request, and whether
  765          * there's buffer space to add more.
  766          */
  767         l = strlen(request);
  768         if (l > 1200 || !(--max_range_per_request) || i == rf->nranges - 1)
  769             lastrange = 1;
  770 
  771         /* Append to the request */
  772         snprintf(request + l, sizeof(request) - l, OFF_T_PF "-" OFF_T_PF "%s",
  773                  rf->ranges_todo[2 * i], rf->ranges_todo[2 * i + 1],
  774                  lastrange ? "" : ",");
  775 
  776         /* And record that we have sent this one */
  777         rf->rangessent++;
  778 
  779         /* Exit loop if that is the last to add */
  780         if (lastrange)
  781             break;
  782     }
  783     l = strlen(request);
  784 
  785     /* Possibly close the connection (and record the fact, so we definitely
  786      * don't send more stuff) if this is the last */
  787     snprintf(request + l, sizeof(request) - l, "\r\n%s\r\n",
  788              rf->rangessent == rf->nranges ? (rf->server_close =
  789                                               1, "Connection: close\r\n") : "");
  790 
  791     {   /* Send the request */
  792         size_t len = strlen(request);
  793         char *p = request;
  794         int r = 0;
  795 
  796         while (len > 0
  797                && ((r = send(rf->sd, p, len, 0)) != -1 || errno == EINTR)) {
  798             if (r >= 0) {
  799                 p += r;
  800                 len -= r;
  801             }
  802         }
  803         if (r == -1) {
  804             perror("send");
  805         }
  806     }
  807 }
  808 
  809 /* buflwr(str) - in-place convert this string to lower case */
  810 static void buflwr(char *s) {
  811     char c;
  812     while ((c = *s) != 0) {
  813         if (c >= 'A' && c <= 'Z')
  814             *s = c - 'A' + 'a';
  815         s++;
  816     }
  817 }
  818 
  819 /* range_fetch_read_http_headers - read a set of HTTP headers, updating state
  820  * appropriately.
  821  * Returns: EOF returns 0, good returns 206 (reading a range block) or 30x
  822  *  (redirect), error returns <0 */
  823 int range_fetch_read_http_headers(struct range_fetch *rf) {
  824     char buf[512];
  825     int status;
  826     int seen_location = 0;
  827 
  828     {                           /* read status line */
  829         char *p;
  830 
  831         if (rfgets(buf, sizeof(buf), rf) == NULL)
  832             return -1;
  833         if (buf[0] == 0)
  834             return 0;           /* EOF, caller decides if that's an error */
  835         if (memcmp(buf, "HTTP/1", 6) != 0 || (p = strchr(buf, ' ')) == NULL) {
  836             fprintf(stderr, "got non-HTTP response '%s'\n", buf);
  837             return -1;
  838         }
  839         status = atoi(p + 1);
  840         if (status != 206 && status != 301 && status != 302) {
  841             if (status >= 300 && status < 400) {
  842                 fprintf(stderr,
  843                         "\nzsync received a redirect/further action required status code: %d\nzsync specifically refuses to proceed when a server requests further action. This is because zsync makes a very large number of requests per file retrieved, and so if zsync has to perform additional actions per request, it further increases the load on the target server. The person/entity who created this zsync file should change it to point directly to a URL where the target file can be retrieved without additional actions/redirects needing to be followed.\nSee http://zsync.moria.orc.uk/server-issues\n",
  844                         status);
  845             }
  846             else if (status == 200) {
  847                 fprintf(stderr,
  848                         "\nzsync received a data response (code %d) but this is not a partial content response\nzsync can only work with servers that support returning partial content from files. The person/entity creating this .zsync has tried to use a server that is not returning partial content. zsync cannot be used with this server.\nSee http://zsync.moria.orc.uk/server-issues\n",
  849                         status);
  850             }
  851             else {
  852                 /* generic error message otherwise */
  853                 fprintf(stderr, "bad status code %d\n", status);
  854             }
  855             return -1;
  856         }
  857         if (*(p - 1) == '0') {  /* HTTP/1.0 server? */
  858             rf->server_close = 2;
  859         }
  860     }
  861 
  862     /* Read other headers */
  863     while (1) {
  864         char *p;
  865 
  866         /* Get next line */
  867         if (rfgets(buf, sizeof(buf), rf) == NULL)
  868             return -1;
  869 
  870         /* If it's the end of the headers */
  871         if (buf[0] == '\r' || buf[0] == '\0') {
  872             /* We are happy provided we got the block boundary, or an actual block is starting. */
  873             if (((rf->boundary || rf->block_left)
  874                  && !(rf->boundary && rf->block_left))
  875                 || (status >= 300 && status < 400 && seen_location))
  876                 return status;
  877             break;
  878         }
  879 
  880         /* Parse header */
  881         p = strstr(buf, ": ");
  882         if (!p)
  883             break;
  884         *p = 0;
  885         p += 2;
  886         buflwr(buf);
  887         {   /* Remove the trailing \r\n from the value */
  888             int len = strcspn(p, "\r\n");
  889             p[len] = 0;
  890         }
  891         /* buf is the header name (lower-cased), p the value */
  892         /* Switch based on header */
  893 
  894         /* If remote closes the connection on us, record that */
  895         if (!strcmp(buf, "connection") && !strcmp(p, "close")) {
  896             rf->server_close = 2;
  897         }
  898 
  899         if (status == 206 && !strcmp(buf, "content-range")) {
  900             /* Okay, we're getting a non-MIME block from the remote. Get the
  901              * range and set our state appropriately */
  902             off_t from, to;
  903             sscanf(p, "bytes " OFF_T_PF "-" OFF_T_PF "/", &from, &to);
  904             if (from <= to) {
  905                 rf->block_left = to + 1 - from;
  906                 rf->offset = from;
  907             }
  908 
  909             /* Can only have got one range. */
  910             rf->rangesdone++;
  911             rf->rangessent = rf->rangesdone;
  912         }
  913 
  914         /* If we're about to get a MIME multipart block set */
  915         if (status == 206 && !strcasecmp(buf, "content-type")
  916             && !strncasecmp(p, "multipart/byteranges", 20)) {
  917 
  918             /* Get the multipart boundary string */
  919             char *q = strstr(p, "boundary=");
  920             if (!q)
  921                 break;
  922             q += 9;
  923 
  924             /* Gah, we could really use a regexp here. Could be quoted... */
  925             if (*q == '"') {
  926                 rf->boundary = strdup(q + 1);
  927                 q = strchr(rf->boundary, '"');
  928                 if (q)
  929                     *q = 0;
  930             }
  931             else {  /* or unquoted */
  932                 rf->boundary = strdup(q);
  933                 q = rf->boundary + strlen(rf->boundary) - 1;
  934 
  935                 while (*q == '\r' || *q == ' ' || *q == '\n')
  936                     *q-- = '\0';
  937             }
  938         }
  939 
  940         /* If remote is telling us to change URL */
  941         if ((status == 302 || status == 301)
  942             && !strcmp(buf, "location")) {
  943             if (seen_location++) {
  944                 fprintf(stderr, "Error: multiple Location headers on redirect\n");
  945                 break;
  946             }
  947 
  948             /* Set new target URL 
  949              * NOTE: we are violating the "the client SHOULD continue to use
  950              * the Request-URI for future requests" of RFC2616 10.3.3 for 302s.
  951              * It's not practical given the number of requests we are making to
  952              * follow the RFC here, and at least we're only remembering it for
  953              * the duration of this transfer. */
  954             if (!no_progress)
  955                 fprintf(stderr, "followed redirect to %s\n", p);
  956             range_fetch_set_url(rf, p);
  957 
  958             /* Flag caller to reconnect; the new URL might be a new target. */
  959             rf->server_close = 2;
  960         }
  961         /* No other headers that we care about. In particular:
  962          *
  963          * FIXME: non-conformant to HTTP/1.1 because we ignore
  964          * Transfer-Encoding: chunked.
  965          */
  966     }
  967     return -1;
  968 }
  969 
  970 /* get_range_block(self, &offset, buf[], buflen)
  971  *
  972  * This is where it all happens. This is a complex function to present a very
  973  * simple read(2)-like interface to the caller over the top of all the HTTP
  974  * going on.
  975  *
  976  * It returns blocks of actual data, retrieved from the origin URL, to the
  977  * caller. Data is returned in the buffer, up to the specified length, and the
  978  * offset in the file from which the data comes is written to the offset
  979  * parameter.
  980  *
  981  * Like read(2), it returns the total bytes read, 0 for EOF, -1 for error.
  982  *
  983  * The blocks that it returns are the ones previously registered by calls to
  984  * range_fetch_addranges (although it doesn't guarantee that only those block
  985  * are returned - that's just what it asks the remote for, but if the remote
  986  * returns more then it'll pass more to the caller - which doesn't matter).
  987  */
  988 int get_range_block(struct range_fetch *rf, off_t * offset, unsigned char *data,
  989                     size_t dlen) {
  990     size_t bytes_to_caller = 0;
  991 
  992     /* If we're not in the middle of reading a block of actual data */
  993     if (!rf->block_left) {
  994       check_boundary:
  995         /* And if not reading a MIME multipart boundary */
  996         if (!rf->boundary) {
  997 
  998             /* Then we're reading the start of a new set of HTTP headers
  999              * (possibly after connecting and sending a request first. */
 1000             int newconn = 0;
 1001             int header_result;
 1002 
 1003             /* If the server closed the connection on us, close our end. */
 1004             if (rf->sd != -1 && rf->server_close == 2) {
 1005                 close(rf->sd);
 1006                 rf->sd = -1;
 1007             }
 1008 
 1009             /* If not connected, connect and immediately request a block */
 1010             if (rf->sd == -1) {
 1011                 if (rf->rangesdone == rf->nranges)
 1012                     return 0;
 1013                 range_fetch_connect(rf);
 1014                 if (rf->sd == -1)
 1015                     return -1;
 1016                 newconn = 1;
 1017                 range_fetch_getmore(rf);
 1018             }
 1019 
 1020             /* read the response headers */
 1021             header_result = range_fetch_read_http_headers(rf);
 1022 
 1023             /* Might be the last */
 1024             if (rf->server_close == 1)
 1025                 rf->server_close = 2;
 1026 
 1027             /* EOF on first connect is fatal */
 1028             if (newconn && header_result == 0) {
 1029                 fprintf(stderr, "EOF from %s\n", rf->url);
 1030                 return -1;
 1031             }
 1032 
 1033             /* Return EOF or error to caller */
 1034             if (header_result <= 0)
 1035                 return header_result ? -1 : 0;
 1036 
 1037             /* Reconnect for a redirect */
 1038             if (header_result >= 300 && header_result < 400) {
 1039                 rf->server_close = 2;
 1040                 goto check_boundary;
 1041             }
 1042 
 1043             /* HTTP Pipelining - send next request before reading current response */
 1044             if (!rf->server_close)
 1045                 range_fetch_getmore(rf);
 1046         }
 1047 
 1048         /* Okay, if we're (now) reading a MIME boundary */
 1049         if (rf->boundary) {
 1050             /* Throw away blank line */
 1051             char buf[512];
 1052             int gotr = 0;
 1053             if (!rfgets(buf, sizeof(buf), rf))
 1054                 return 0;
 1055 
 1056             /* Get, hopefully, boundary marker line */
 1057             if (!rfgets(buf, sizeof(buf), rf))
 1058                 return 0;
 1059             if (buf[0] != '-' || buf[1] != '-')
 1060                 return 0;
 1061 
 1062             if (memcmp(&buf[2], rf->boundary, strlen(rf->boundary))) {
 1063                 fprintf(stderr, "got bad block boundary: %s != %s",
 1064                         rf->boundary, buf);
 1065                 return -1;      /* This is an error now */
 1066             }
 1067 
 1068             /* Last record marker has boundary followed by - */
 1069             if (buf[2 + strlen(rf->boundary)] == '-') {
 1070                 free(rf->boundary);
 1071                 rf->boundary = NULL;
 1072                 goto check_boundary;
 1073             }
 1074 
 1075             /* Otherwise, we're reading the MIME headers for this part until we get \r\n alone */
 1076             for (; buf[0] != '\r' && buf[0] != '\n' && buf[0] != '\0';) {
 1077                 off_t from, to;
 1078 
 1079                 /* Get next header */
 1080                 if (!rfgets(buf, sizeof(buf), rf))
 1081                     return 0;
 1082                 buflwr(buf);  /* HTTP headers are case insensitive */
 1083 
 1084                 /* We're looking for the Content-Range: header, to tell us how
 1085                  * many bytes and what part of the target file they represent.
 1086                  */
 1087                 if (2 ==
 1088                     sscanf(buf,
 1089                            "content-range: bytes " OFF_T_PF "-" OFF_T_PF "/",
 1090                            &from, &to)) {
 1091                     rf->offset = from;
 1092                     rf->block_left = to - from + 1;
 1093                     gotr = 1;
 1094                 }
 1095             }
 1096 
 1097             /* If we didn't get the byte range that this block represents, it's busted. */
 1098             if (!gotr) {
 1099                 fprintf(stderr,
 1100                         "got multipart/byteranges but no Content-Range?");
 1101                 return -1;
 1102             }
 1103 
 1104             /* Else, record that this range is (being) received */
 1105             rf->rangesdone++;
 1106         }
 1107     }
 1108 
 1109     /* Now the easy bit - we are reading a block of actual data */
 1110     if (!rf->block_left)
 1111         return 0;   /* pass EOF back to caller */
 1112     *offset = rf->offset;   /* caller wants to know what this data is */
 1113 
 1114     /* Loop until we've retrieved a whole block */
 1115     for (;;) {
 1116         /* Calculate how much more we can return to the caller now. This is the
 1117          * minimum of:
 1118          *   the amount left in this block from the remote
 1119          *   space left in the caller's buffer
 1120          *   the amount we have actually read from the remote
 1121          */
 1122         size_t rl = rf->block_left;
 1123         if (rl > dlen)
 1124             rl = dlen;
 1125         if ((size_t) (rf->buf_end - rf->buf_start) < rl) {
 1126             rl = rf->buf_end - rf->buf_start;
 1127 
 1128             /* There is more data in this block, and space for more in the
 1129              * caller's buffer, but we don't have any more read from the remote
 1130              * into our buffer yet. So read more now.
 1131              * If we don't get data, drop through and return what we have got.
 1132              * If we do, back to top of loop and try again.
 1133              */
 1134             if (!rl && get_more_data(rf) > 0)
 1135                 continue;
 1136         }
 1137 
 1138         /* If the caller's buffer is full or there's no more data in this block
 1139          * to give, we can now return. */
 1140         if (!rl)
 1141             return bytes_to_caller;
 1142 
 1143         /* Copy that amount to the caller's their buffer from our buffer */
 1144         memcpy(data, &(rf->buf[rf->buf_start]), rl);
 1145         rf->buf_start += rl;    /* Track pos in our buffer... */
 1146         data += rl;
 1147         dlen -= rl;             /* ...and caller's */
 1148         bytes_to_caller += rl;  /* ...and the return value */
 1149 
 1150         /* Keep track of how much of the current block is left to read */
 1151         rf->block_left -= rl;
 1152         /* and what position we are up to in the whole source file */
 1153         rf->offset += rl;
 1154         /* And go around again */
 1155     }
 1156 }
 1157 
 1158 /* range_fetch_bytes_down
 1159  * Simple getter method, returns the total bytes retrieved */
 1160 off_t range_fetch_bytes_down(const struct range_fetch * rf) {
 1161     return rf->bytes_down;
 1162 }
 1163 
 1164 /* Destructor */
 1165 void range_fetch_end(struct range_fetch *rf) {
 1166     if (rf->sd != -1)
 1167         close(rf->sd);
 1168     free(rf->ranges_todo);
 1169     free(rf->boundary);
 1170     free(rf->url);
 1171     free(rf->cport);
 1172     free(rf->chost);
 1173     free(rf);
 1174 }