A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 2 /* 3 * zsync - client side rsync over http 4 * Copyright (C) 2004,2005,2007,2009 Colin Phipps <cph@moria.org.uk> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the Artistic License v2 (see the accompanying 8 * file COPYING for the full license terms), or, at your option, any later 9 * version of the same license. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * COPYING file for details. 15 */ 16 17 /* HTTP client code for zsync. 18 * Including pipeline HTTP Range fetching code. */ 19 20 #include "zsglobal.h" 21 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 #include <errno.h> 26 #include <unistd.h> 27 28 #include <sys/types.h> 29 #include <sys/stat.h> 30 #include <sys/socket.h> 31 #include <netdb.h> 32 #include <time.h> 33 34 #ifndef HAVE_GETADDRINFO 35 #include "getaddrinfo.h" 36 #endif 37 38 #ifdef WITH_DMALLOC 39 # include <dmalloc.h> 40 #endif 41 42 #include "http.h" 43 #include "url.h" 44 #include "progress.h" 45 #include "format_string.h" 46 47 /* socket = connect_to(host, service/port) 48 * Establishes a TCP connection to the named host and port (which can be 49 * supplied as a service name from /etc/services. Returns the socket handle, or 50 * -1 on error. */ 51 int connect_to(const char *node, const char *service) { 52 struct addrinfo hint; 53 struct addrinfo *ai; 54 int rc; 55 56 memset(&hint, 0, sizeof hint); 57 hint.ai_family = AF_UNSPEC; 58 hint.ai_socktype = SOCK_STREAM; 59 60 if ((rc = getaddrinfo(node, service, &hint, &ai)) != 0) { 61 perror(node); 62 return -1; 63 } 64 else { 65 struct addrinfo *p; 66 int sd = -1; 67 68 for (p = ai; sd == -1 && p != NULL; p = p->ai_next) { 69 if ((sd = 70 socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) { 71 perror("socket"); 72 } 73 else if (connect(sd, p->ai_addr, p->ai_addrlen) < 0) { 74 perror(node); 75 close(sd); 76 sd = -1; 77 } 78 } 79 freeaddrinfo(ai); 80 return sd; 81 } 82 } 83 84 /* fh = http_get_stream(filedesc, &status_code) 85 * Converts a socket into a stream, and reads the first line from it as an HTTP 86 * status line (response to a request that the caller should have already sent) 87 * and returns the stream, and the status code to the location specified by the 88 * second parameter. 89 */ 90 FILE *http_get_stream(int fd, int *code) { 91 FILE *f = fdopen(fd, "r"); 92 char buf[256]; 93 char *p; 94 95 if (fgets(buf, sizeof(buf), f) == NULL || memcmp(buf, "HTTP/1", 6) != 0 96 || (p = strchr(buf, ' ')) == NULL) { 97 *code = 0; 98 fclose(f); 99 return NULL; 100 } 101 102 *code = atoi(++p); 103 104 return f; 105 } 106 107 /* url = get_location_url(stream, current_url) 108 * Reads the HTTP response from the given stream and extracts the Location 109 * header, making this URL absolute using the current URL. Returned as a 110 * malloced string. 111 * (it ought to be absolute anyway, by the RFC, but many servers send 112 * relative URIs). */ 113 char *get_location_url(FILE * f, const char *cur_url) { 114 char buf[1024]; 115 116 while (fgets(buf, sizeof(buf), f)) { 117 char *p; 118 119 /* exit if end of headers */ 120 if (buf[0] == '\r' || buf[0] == '\n') 121 return NULL; 122 123 /* Look for Location header */ 124 p = strchr(buf, ':'); 125 if (!p) 126 return NULL; 127 *p++ = 0; 128 if (strcasecmp(buf, "Location")) 129 continue; 130 131 /* Skip leading whitespace */ 132 while (*p == ' ') 133 p++; 134 135 { /* Remove trailing whitespace */ 136 char *q = p; 137 while (*q != '\r' && *q != '\n' && *q != ' ' && *q) 138 q++; 139 *q = 0; 140 } 141 if (!*p) 142 return NULL; 143 144 /* Return URL after making absolute */ 145 return make_url_absolute(cur_url, p); 146 } 147 return NULL; // TODO 148 } 149 150 /* Settings for HTTP connections - proxy host and port, auth details */ 151 static char *proxy; 152 static char *pport; 153 static char **auth_details; /* This is a realloced array with 3*num_auth_details entries */ 154 static int num_auth_details; /* The groups of 3 strings are host, user, pass */ 155 156 /* Remember referrer */ 157 char *referer; 158 159 /* set_proxy_from_string(str) 160 * Sets the proxy settings for HTTP connections to use; these can be either as 161 * a host[:port] or as http://host[:port]. 162 * Returns non-zero if the settings were obtained successfully. */ 163 int set_proxy_from_string(const char *s) { 164 if (!memcmp(s, http_scheme, strlen(http_scheme))) { 165 /* http:// style proxy string */ 166 proxy = malloc(256); 167 if (!proxy) 168 return 0; 169 if (!get_http_host_port(s, proxy, 256, &pport)) 170 return 0; 171 if (!pport) { 172 pport = strdup("webcache"); 173 } 174 return 1; 175 } 176 else { 177 /* host:port style proxy string; have to parse this ourselves */ 178 char *p; 179 proxy = strdup(s); 180 p = strchr(proxy, ':'); 181 if (!p) { 182 pport = strdup("webcache"); 183 return 1; 184 } 185 *p++ = 0; 186 pport = strdup(p); 187 return 1; 188 } 189 } 190 191 /* add_auth(host, user, pass) 192 * Specify user & password combination to use connecting to the given host. 193 */ 194 void add_auth(char *host, char *user, char *pass) { 195 auth_details = 196 realloc(auth_details, (num_auth_details + 1) * sizeof *auth_details); 197 auth_details[num_auth_details * 3] = host; 198 auth_details[num_auth_details * 3 + 1] = user; 199 auth_details[num_auth_details * 3 + 2] = pass; 200 num_auth_details++; 201 } 202 203 /* str = get_auth_hdr(host) 204 * For the given host, returns the extra HTTP header(s) that should be included 205 * to provide authentication information. Returned as a malloced string. 206 */ 207 const char auth_header_tmpl[] = { "Authorization: Basic %s\r\n" }; 208 209 static char *get_auth_hdr(const char *hn) { 210 /* Find any relevant entry in the auth table */ 211 int i; 212 for (i = 0; i < num_auth_details * 3; i += 3) { 213 if (!strcasecmp(auth_details[i], hn)) { 214 char *b; 215 char *header; 216 217 /* We have found an entry in the auth details table for this 218 * hostname; get the user & pass to use */ 219 char *u = auth_details[i + 1]; 220 char *p = auth_details[i + 2]; 221 222 /* Store unencoded user:pass */ 223 size_t l = strlen(u) + strlen(p) + 2; 224 char *w = malloc(l); 225 snprintf(w, l, "%s:%s", u, p); 226 227 /* Now base64-encode that, and compose the header */ 228 b = base64(w); 229 l = strlen(b) + strlen(auth_header_tmpl) + 1; 230 header = malloc(l); 231 snprintf(header, l, auth_header_tmpl, b); 232 233 /* And clean up */ 234 free(w); 235 free(b); 236 return header; 237 } 238 } 239 return NULL; 240 } 241 242 /* http_date_string(time, buf, buflen) 243 * Stores a valid ASCII representation of the supplied datetime in the supplied 244 * buffer (length given as buflen). Returns non-NULL if successful. 245 */ 246 static char *http_date_string(time_t t, char *const buf, const int blen) { 247 struct tm d; 248 249 if (gmtime_r(&t, &d) != NULL) { 250 if (strftime(buf, blen, "%a, %d %h %Y %T GMT", &d) > 0) { 251 return buf; 252 } 253 } 254 return NULL; 255 } 256 257 FILE *http_get(const char *orig_url, char **track_referer, const char *tfname) { 258 int allow_redirects = 5; 259 char *url; 260 FILE *f = NULL; 261 FILE *g = NULL; 262 char *fname = NULL; 263 char ifrange[200] = { "" }; 264 char *authhdr = NULL; 265 int code; 266 267 /* If we have a (possibly older or incomplete) copy of this file already, 268 * add a suitable headers to only retrieve new/additional content */ 269 if (tfname) { 270 struct stat st; 271 272 /* Construct the name of the incomplete transfer file that would have 273 * been used by a previous transfer */ 274 fname = malloc(strlen(tfname) + 6); 275 strcpy(fname, tfname); 276 strcat(fname, ".part"); 277 278 /* If we have an incomplete previous transfer, then our complete copy 279 * must be older but the incomplete copy may be current still and we 280 * could continue from that. */ 281 if (stat(fname, &st) == 0) { 282 char buf[50]; 283 if (http_date_string(st.st_mtime, buf, sizeof(buf)) != NULL) 284 snprintf(ifrange, sizeof(ifrange), 285 "If-Unmodified-Since: %s\r\nRange: bytes=" OFF_T_PF 286 "-\r\n", buf, st.st_size); 287 } 288 else if (errno == ENOENT && stat(tfname, &st) == 0) { 289 /* Else, if we have a complete possibly-old version, so only transfer 290 * if the remote has newer. */ 291 char buf[50]; 292 if (http_date_string(st.st_mtime, buf, sizeof(buf)) != NULL) 293 snprintf(ifrange, sizeof(ifrange), "If-Modified-Since: %s\r\n", 294 buf); 295 } 296 } 297 298 /* Take a malloced copy of the URL, so we treat it the same as strduped 299 * URLs for any redirects followed. */ 300 url = strdup(orig_url); 301 if (!url) { 302 free(fname); 303 return NULL; 304 } 305 306 /* Loop for redirect handling */ 307 for (; allow_redirects-- && url && !f;) { 308 char hostn[256]; 309 const char *connecthost; 310 char *connectport; 311 char *p; 312 char *port; 313 314 /* Extract host and port to connect to */ 315 if ((p = get_http_host_port(url, hostn, sizeof(hostn), &port)) == NULL) 316 break; 317 if (!proxy) { 318 connecthost = hostn; 319 connectport = strdup(port); 320 } 321 else { 322 connecthost = proxy; 323 connectport = strdup(pport); 324 } 325 326 { /* Connect */ 327 int sfd = connect_to(connecthost, connectport); 328 free(connectport); 329 if (sfd == -1) 330 break; 331 332 { /* Compose request */ 333 char buf[1024]; 334 snprintf(buf, sizeof(buf), 335 "GET %s HTTP/1.0\r\nHost: %s%s%s\r\nUser-Agent: zsync/%s\r\n%s%s\r\n", 336 proxy ? url : p, hostn, !strcmp(port, 337 "http") ? "" : ":", 338 !strcmp(port, "http") ? "" : port, VERSION, 339 ifrange[0] ? ifrange : "", authhdr ? authhdr : ""); 340 341 /* Send request to remote */ 342 if (send(sfd, buf, strlen(buf), 0) == -1) { 343 perror("sendmsg"); 344 close(sfd); 345 break; 346 } 347 } 348 349 /* Wrap the socket in a stream for convenient line reading of the 350 * response. */ 351 f = http_get_stream(sfd, &code); 352 if (!f) 353 break; 354 355 /* Redirect - go around again with new URL. */ 356 if (code == 301 || code == 302 || code == 307) { 357 char *oldurl = url; 358 url = get_location_url(f, oldurl); 359 free(oldurl); 360 fclose(f); 361 f = NULL; 362 } 363 else if (code == 401) { /* Authorization required */ 364 authhdr = get_auth_hdr(hostn); 365 if (authhdr) { /* Go around again with auth header */ 366 fclose(f); 367 f = NULL; 368 } 369 else { /* No auth details available for this host - error out */ 370 fclose(f); 371 f = NULL; 372 break; 373 } 374 } 375 else if (code == 412) { // Precondition (i.e. if-unmodified-since) failed 376 ifrange[0] = 0; 377 fclose(f); 378 f = NULL; // and go round again without the conditional Range: 379 } 380 else if (code == 200) { // Downloading whole file 381 /* Write new file (plus allow reading once we finish) */ 382 g = fname ? fopen(fname, "w+") : tmpfile(); 383 } 384 else if (code == 206 && fname) { // Had partial content and server confirms not modified 385 /* Append to existing on-disk content (plus allow reading once we finish) */ 386 g = fopen(fname, "a+"); 387 } 388 else if (code == 304) { // Unchanged (if-modified-since was false) 389 /* No fetching, just reuse on-disk file */ 390 g = fopen(tfname, "r"); 391 } 392 else { /* Don't know - error */ 393 fclose(f); 394 f = NULL; 395 break; 396 } 397 } 398 } 399 400 /* Store the referrer - we'll supply this when retrieving any content 401 * referrer to by this file retrieved. */ 402 if (track_referer) 403 *track_referer = url; 404 else 405 free(url); 406 407 /* If we got a 304 Not Modified, return the existing content as-is */ 408 if (code == 304) { 409 fclose(f); 410 free(fname); 411 return g; 412 } 413 414 /* Return errors from the above loop */ 415 if (!f) { 416 fprintf(stderr, "failed on url %s\n", url ? url : "(missing redirect)"); 417 return NULL; 418 } 419 420 /* If our open of the output file failed, flag that error */ 421 if (!g) { 422 fclose(f); 423 perror("fopen"); 424 return NULL; 425 } 426 427 { /* Read data returned by the request above, writing to the output file */ 428 size_t len = 0; 429 { /* Skip headers. TODO support content-encodings, Content-Location etc */ 430 char buf[512]; 431 do { 432 if (fgets(buf, sizeof(buf), f) == NULL) { 433 perror("read"); 434 exit(1); 435 } 436 437 sscanf(buf, "Content-Length: " SIZE_T_PF, &len); 438 439 } while (buf[0] != '\r' && !feof(f)); 440 } 441 442 { /* Now the actual content. Show progress as we go. */ 443 size_t got = 0; 444 struct progress p = { 0, 0, 0, 0 }; 445 size_t r; 446 if (!no_progress) 447 do_progress(&p, 0, got); 448 449 while (!feof(f)) { 450 /* Read from the network */ 451 char buf[1024]; 452 r = fread(buf, 1, sizeof(buf), f); 453 if (r == 0 && ferror(f)) { 454 perror("read"); 455 break; 456 } 457 458 /* And write anything received to the temp file */ 459 if (r > 0) { 460 if (r > fwrite(buf, 1, r, g)) { 461 fprintf(stderr, "short write on %s\n", fname); 462 break; 463 } 464 465 /* And maintain progress indication */ 466 got += r; 467 if (!no_progress) 468 do_progress(&p, len ? (100.0 * got / len) : 0, got); 469 } 470 } 471 if (!no_progress) 472 end_progress(&p, feof(f) ? 2 : 0); 473 } 474 fclose(f); 475 } 476 477 /* The caller wants the content we just downloaded; return the handle to 478 * the start of the file that we have just written. */ 479 rewind(g); 480 481 /* If we are keeping the download too, move it to the desired name. */ 482 if (fname) { 483 rename(fname, tfname); 484 free(fname); 485 } 486 487 return g; 488 } 489 490 /**************************************************************************** 491 * 492 * HTTP Range: / 206 response interface 493 * 494 * The state engine here is: 495 * If sd == -1, not connected; 496 * else, if block_left is 0 497 * if boundary is unset, we're reading HTTP headers 498 * if boundary is set, we're reading a MIME boundary 499 * else we're reading a block of actual data; block_left bytes still to read. 500 */ 501 502 struct range_fetch { 503 /* URL to retrieve from, host:port, auth header */ 504 char *url; 505 char hosth[256]; 506 char *authh; 507 508 /* Host and port to connect to (could be the same as the URL, or proxy) */ 509 char *chost; 510 char *cport; 511 512 int sd; /* Currently open socket to the server, or -1 */ 513 char *boundary; /* If we're in the middle of reading a mime/multipart 514 * response, this is the boundary string. */ 515 516 /* State for block currently being read */ 517 size_t block_left; /* non-zero if we're in the middle of reading a block */ 518 off_t offset; /* and this is the offset of the start of the block we are reading */ 519 520 /* Buffering of data from the remote server */ 521 char buf[4096]; 522 int buf_start, buf_end; /* Bytes buf_start .. buf_end-1 in buf[] are valid */ 523 524 /* Keep count of total bytes retrieved */ 525 off_t bytes_down; 526 527 int server_close; /* 0: can send more, 1: cannot send more (but one set of headers still to read), 2: cannot send more and all existing headers read */ 528 529 /* Byte ranges to fetch */ 530 off_t *ranges_todo; /* Contains 2*nranges ranges, consisting of start and stop offset */ 531 int nranges; 532 int rangessent; /* We've requested the first rangessent ranges from the remote */ 533 int rangesdone; /* and received this many */ 534 }; 535 536 /* range_fetch methods */ 537 538 /* range_fetch_set_url(rf, url) 539 * Set up a range_fetch to fetch from a given URL. Private method. 540 * C is a nightmare for memory allocation here. At least the errors should be 541 * caught, but minor memory leaks may occur on some error paths. */ 542 static int range_fetch_set_url(struct range_fetch* rf, const char* orig_url) { 543 /* Get the host, port and path from the URL. */ 544 char hostn[sizeof(rf->hosth)]; 545 char* cport; 546 char* p = get_http_host_port(orig_url, hostn, sizeof(hostn), &cport); 547 if (!p) { 548 return 0; 549 } 550 551 free(rf->url); 552 if (rf->authh) free(rf->authh); 553 554 /* Get host:port for Host: header */ 555 if (strcmp(cport, "http") != 0) 556 snprintf(rf->hosth, sizeof(rf->hosth), "%s:%s", hostn, cport); 557 else 558 snprintf(rf->hosth, sizeof(rf->hosth), "%s", hostn); 559 560 if (proxy) { 561 /* URL must be absolute; don't need cport anymore, just need full URL 562 * to give to proxy. */ 563 free(cport); 564 rf->url = strdup(orig_url); 565 } 566 else { 567 free(rf->cport); 568 free(rf->chost); 569 // Set url to relative part and chost, cport to the target 570 if ((rf->chost = strdup(hostn)) == NULL) { 571 free(cport); 572 return 0; 573 } 574 rf->cport = cport; 575 rf->url = strdup(p); 576 } 577 578 /* Get any auth header that we should use */ 579 rf->authh = get_auth_hdr(hostn); 580 581 return !!rf->url; 582 } 583 584 /* get_more_data - this is the method which owns all reads from the remote. 585 * Nothing else reads from the remote. This buffers data, so that the 586 * higher-level methods below can easily read whole lines from the remote. 587 * The higher-level methods call this function when they need more data: 588 * it refills the buffer with data from the network. Returns the bytes read. */ 589 static int get_more_data(struct range_fetch *rf) { 590 /* First, garbage collect - move the 'live' data in the buffer to the start 591 * of the buffer. */ 592 if (rf->buf_start) { 593 memmove(rf->buf, &(rf->buf[rf->buf_start]), 594 rf->buf_end - rf->buf_start); 595 rf->buf_end -= rf->buf_start; 596 rf->buf_start = 0; 597 } 598 599 { /* Read as much as the OS wants to give us, up to a limit of filling 600 * the rest of the buffer; ignore EINTR. */ 601 int n; 602 do { 603 n = read(rf->sd, &(rf->buf[rf->buf_end]), 604 sizeof(rf->buf) - rf->buf_end); 605 } while (n == -1 && errno == EINTR); 606 if (n < 0) { 607 perror("read"); 608 } 609 else { 610 611 /* Add new bytes to buffer, and update total bytes count */ 612 rf->buf_end += n; 613 rf->bytes_down += n; 614 } 615 return n; 616 } 617 } 618 619 /* rfgets - get next line from the remote (terminated by LF or end-of-file) 620 * (using the buffer, fetching more data if there's no full line in the buffer 621 * yet) */ 622 static char *rfgets(char *buf, size_t len, struct range_fetch *rf) { 623 char *p; 624 while (1) { 625 /* Look for a line end in the in buffer */ 626 p = memchr(rf->buf + rf->buf_start, '\n', rf->buf_end - rf->buf_start); 627 628 /* If we don't have the end of the line yet, fetch more data into the 629 * buffer (and go around again) */ 630 if (!p) { 631 int n = get_more_data(rf); 632 if (n <= 0) { 633 /* EOF - just return all that we have left */ 634 p = &(rf->buf[rf->buf_end]); 635 } 636 } 637 else /* We have a \n; set p to point just past it */ 638 p++; 639 640 if (p) { 641 register char *bufstart = &(rf->buf[rf->buf_start]); 642 643 /* Work out how much data to return - the line, or at most 'len' bytes */ 644 len--; /* leave space for trailing \0 */ 645 if (len > (size_t) (p - bufstart)) 646 len = p - bufstart; 647 648 /* Copy from input buffer to return buffer, nul terminate, and advance 649 * current position in the input buffer */ 650 memcpy(buf, bufstart, len); 651 buf[len] = 0; 652 rf->buf_start += len; 653 return buf; 654 } 655 } 656 } 657 658 /* range_fetch_start(origin_url) 659 * Returns a new range fetch object, for the given URL. 660 */ 661 struct range_fetch *range_fetch_start(const char *orig_url) { 662 struct range_fetch *rf = malloc(sizeof(struct range_fetch)); 663 if (!rf) 664 return NULL; 665 666 /* If going through a proxy, we can immediately set up the host and port to 667 * connect to */ 668 if (proxy) { 669 rf->cport = strdup(pport); 670 rf->chost = strdup(proxy); 671 } 672 else { 673 rf->cport = NULL; 674 rf->chost = NULL; 675 } 676 /* Blank initialisation for other fields before set_url call */ 677 rf->url = NULL; 678 rf->authh = NULL; 679 680 if (!range_fetch_set_url(rf, orig_url)) { 681 free(rf->cport); 682 free(rf->chost); 683 free(rf); 684 return NULL; 685 } 686 687 /* Initialise other state fields */ 688 rf->block_left = 0; 689 rf->bytes_down = 0; 690 rf->boundary = NULL; 691 rf->sd = -1; /* Socket not open */ 692 rf->ranges_todo = NULL; /* And no ranges given yet */ 693 rf->nranges = rf->rangesdone = 0; 694 695 return rf; 696 } 697 698 /* range_fetch_addranges(self, off_t[], nranges) 699 * Adds ranges to fetch, supplied as an array of 2*nranges offsets (start and 700 * stop for each range) */ 701 void range_fetch_addranges(struct range_fetch *rf, off_t * ranges, int nranges) { 702 int existing_ranges = rf->nranges - rf->rangesdone; 703 704 /* Allocate new memory, enough for valid existing entries and new entries */ 705 off_t *nr = malloc(2 * sizeof(*ranges) * (nranges + existing_ranges)); 706 if (!nr) 707 return; 708 709 /* Copy only still-valid entries from the existing queue over */ 710 memcpy(nr, &(rf->ranges_todo[2 * rf->rangesdone]), 711 2 * sizeof(*ranges) * existing_ranges); 712 713 /* And replace existing queue with new one */ 714 free(rf->ranges_todo); 715 rf->ranges_todo = nr; 716 rf->rangessent -= rf->rangesdone; 717 rf->rangesdone = 0; 718 rf->nranges = existing_ranges; 719 720 /* And append the new stuff */ 721 memcpy(&nr[2 * existing_ranges], ranges, 2 * sizeof(*ranges) * nranges); 722 rf->nranges += nranges; 723 } 724 725 /* range_fetch_connect 726 * Connect this rf to its remote server */ 727 static void range_fetch_connect(struct range_fetch *rf) { 728 rf->sd = connect_to(rf->chost, rf->cport); 729 rf->server_close = 0; 730 rf->rangessent = rf->rangesdone; 731 rf->buf_start = rf->buf_end = 0; /* Buffer initially empty */ 732 } 733 734 /* range_fetch_getmore 735 * On a connected range fetch, send another request to the remote */ 736 static void range_fetch_getmore(struct range_fetch *rf) { 737 char request[2048]; 738 int l; 739 int max_range_per_request = 20; 740 741 /* Only if there's stuff queued to get */ 742 if (rf->rangessent == rf->nranges) 743 return; 744 745 /* Build the base request, everything up to the Range: bytes= */ 746 snprintf(request, sizeof(request), 747 "GET %s HTTP/1.1\r\n" 748 "User-Agent: zsync/" VERSION "\r\n" 749 "Host: %s" 750 "%s%s\r\n" 751 "%s" 752 "Range: bytes=", 753 rf->url, rf->hosth, 754 referer ? "\r\nReferer: " : "", referer ? referer : "", 755 rf->authh ? rf->authh : ""); 756 757 /* The for loop here is just a sanity check, lastrange is the real loop control */ 758 for (; rf->rangessent < rf->nranges;) { 759 int i = rf->rangessent; 760 int lastrange = 0; 761 762 /* Add at least one byterange to the request; but is this the last one? 763 * That's decided based on whether there are any more to add, whether 764 * we've reached our self-imposed limit per request, and whether 765 * there's buffer space to add more. 766 */ 767 l = strlen(request); 768 if (l > 1200 || !(--max_range_per_request) || i == rf->nranges - 1) 769 lastrange = 1; 770 771 /* Append to the request */ 772 snprintf(request + l, sizeof(request) - l, OFF_T_PF "-" OFF_T_PF "%s", 773 rf->ranges_todo[2 * i], rf->ranges_todo[2 * i + 1], 774 lastrange ? "" : ","); 775 776 /* And record that we have sent this one */ 777 rf->rangessent++; 778 779 /* Exit loop if that is the last to add */ 780 if (lastrange) 781 break; 782 } 783 l = strlen(request); 784 785 /* Possibly close the connection (and record the fact, so we definitely 786 * don't send more stuff) if this is the last */ 787 snprintf(request + l, sizeof(request) - l, "\r\n%s\r\n", 788 rf->rangessent == rf->nranges ? (rf->server_close = 789 1, "Connection: close\r\n") : ""); 790 791 { /* Send the request */ 792 size_t len = strlen(request); 793 char *p = request; 794 int r = 0; 795 796 while (len > 0 797 && ((r = send(rf->sd, p, len, 0)) != -1 || errno == EINTR)) { 798 if (r >= 0) { 799 p += r; 800 len -= r; 801 } 802 } 803 if (r == -1) { 804 perror("send"); 805 } 806 } 807 } 808 809 /* buflwr(str) - in-place convert this string to lower case */ 810 static void buflwr(char *s) { 811 char c; 812 while ((c = *s) != 0) { 813 if (c >= 'A' && c <= 'Z') 814 *s = c - 'A' + 'a'; 815 s++; 816 } 817 } 818 819 /* range_fetch_read_http_headers - read a set of HTTP headers, updating state 820 * appropriately. 821 * Returns: EOF returns 0, good returns 206 (reading a range block) or 30x 822 * (redirect), error returns <0 */ 823 int range_fetch_read_http_headers(struct range_fetch *rf) { 824 char buf[512]; 825 int status; 826 int seen_location = 0; 827 828 { /* read status line */ 829 char *p; 830 831 if (rfgets(buf, sizeof(buf), rf) == NULL) 832 return -1; 833 if (buf[0] == 0) 834 return 0; /* EOF, caller decides if that's an error */ 835 if (memcmp(buf, "HTTP/1", 6) != 0 || (p = strchr(buf, ' ')) == NULL) { 836 fprintf(stderr, "got non-HTTP response '%s'\n", buf); 837 return -1; 838 } 839 status = atoi(p + 1); 840 if (status != 206 && status != 301 && status != 302) { 841 if (status >= 300 && status < 400) { 842 fprintf(stderr, 843 "\nzsync received a redirect/further action required status code: %d\nzsync specifically refuses to proceed when a server requests further action. This is because zsync makes a very large number of requests per file retrieved, and so if zsync has to perform additional actions per request, it further increases the load on the target server. The person/entity who created this zsync file should change it to point directly to a URL where the target file can be retrieved without additional actions/redirects needing to be followed.\nSee http://zsync.moria.orc.uk/server-issues\n", 844 status); 845 } 846 else if (status == 200) { 847 fprintf(stderr, 848 "\nzsync received a data response (code %d) but this is not a partial content response\nzsync can only work with servers that support returning partial content from files. The person/entity creating this .zsync has tried to use a server that is not returning partial content. zsync cannot be used with this server.\nSee http://zsync.moria.orc.uk/server-issues\n", 849 status); 850 } 851 else { 852 /* generic error message otherwise */ 853 fprintf(stderr, "bad status code %d\n", status); 854 } 855 return -1; 856 } 857 if (*(p - 1) == '0') { /* HTTP/1.0 server? */ 858 rf->server_close = 2; 859 } 860 } 861 862 /* Read other headers */ 863 while (1) { 864 char *p; 865 866 /* Get next line */ 867 if (rfgets(buf, sizeof(buf), rf) == NULL) 868 return -1; 869 870 /* If it's the end of the headers */ 871 if (buf[0] == '\r' || buf[0] == '\0') { 872 /* We are happy provided we got the block boundary, or an actual block is starting. */ 873 if (((rf->boundary || rf->block_left) 874 && !(rf->boundary && rf->block_left)) 875 || (status >= 300 && status < 400 && seen_location)) 876 return status; 877 break; 878 } 879 880 /* Parse header */ 881 p = strstr(buf, ": "); 882 if (!p) 883 break; 884 *p = 0; 885 p += 2; 886 buflwr(buf); 887 { /* Remove the trailing \r\n from the value */ 888 int len = strcspn(p, "\r\n"); 889 p[len] = 0; 890 } 891 /* buf is the header name (lower-cased), p the value */ 892 /* Switch based on header */ 893 894 /* If remote closes the connection on us, record that */ 895 if (!strcmp(buf, "connection") && !strcmp(p, "close")) { 896 rf->server_close = 2; 897 } 898 899 if (status == 206 && !strcmp(buf, "content-range")) { 900 /* Okay, we're getting a non-MIME block from the remote. Get the 901 * range and set our state appropriately */ 902 off_t from, to; 903 sscanf(p, "bytes " OFF_T_PF "-" OFF_T_PF "/", &from, &to); 904 if (from <= to) { 905 rf->block_left = to + 1 - from; 906 rf->offset = from; 907 } 908 909 /* Can only have got one range. */ 910 rf->rangesdone++; 911 rf->rangessent = rf->rangesdone; 912 } 913 914 /* If we're about to get a MIME multipart block set */ 915 if (status == 206 && !strcasecmp(buf, "content-type") 916 && !strncasecmp(p, "multipart/byteranges", 20)) { 917 918 /* Get the multipart boundary string */ 919 char *q = strstr(p, "boundary="); 920 if (!q) 921 break; 922 q += 9; 923 924 /* Gah, we could really use a regexp here. Could be quoted... */ 925 if (*q == '"') { 926 rf->boundary = strdup(q + 1); 927 q = strchr(rf->boundary, '"'); 928 if (q) 929 *q = 0; 930 } 931 else { /* or unquoted */ 932 rf->boundary = strdup(q); 933 q = rf->boundary + strlen(rf->boundary) - 1; 934 935 while (*q == '\r' || *q == ' ' || *q == '\n') 936 *q-- = '\0'; 937 } 938 } 939 940 /* If remote is telling us to change URL */ 941 if ((status == 302 || status == 301) 942 && !strcmp(buf, "location")) { 943 if (seen_location++) { 944 fprintf(stderr, "Error: multiple Location headers on redirect\n"); 945 break; 946 } 947 948 /* Set new target URL 949 * NOTE: we are violating the "the client SHOULD continue to use 950 * the Request-URI for future requests" of RFC2616 10.3.3 for 302s. 951 * It's not practical given the number of requests we are making to 952 * follow the RFC here, and at least we're only remembering it for 953 * the duration of this transfer. */ 954 if (!no_progress) 955 fprintf(stderr, "followed redirect to %s\n", p); 956 range_fetch_set_url(rf, p); 957 958 /* Flag caller to reconnect; the new URL might be a new target. */ 959 rf->server_close = 2; 960 } 961 /* No other headers that we care about. In particular: 962 * 963 * FIXME: non-conformant to HTTP/1.1 because we ignore 964 * Transfer-Encoding: chunked. 965 */ 966 } 967 return -1; 968 } 969 970 /* get_range_block(self, &offset, buf[], buflen) 971 * 972 * This is where it all happens. This is a complex function to present a very 973 * simple read(2)-like interface to the caller over the top of all the HTTP 974 * going on. 975 * 976 * It returns blocks of actual data, retrieved from the origin URL, to the 977 * caller. Data is returned in the buffer, up to the specified length, and the 978 * offset in the file from which the data comes is written to the offset 979 * parameter. 980 * 981 * Like read(2), it returns the total bytes read, 0 for EOF, -1 for error. 982 * 983 * The blocks that it returns are the ones previously registered by calls to 984 * range_fetch_addranges (although it doesn't guarantee that only those block 985 * are returned - that's just what it asks the remote for, but if the remote 986 * returns more then it'll pass more to the caller - which doesn't matter). 987 */ 988 int get_range_block(struct range_fetch *rf, off_t * offset, unsigned char *data, 989 size_t dlen) { 990 size_t bytes_to_caller = 0; 991 992 /* If we're not in the middle of reading a block of actual data */ 993 if (!rf->block_left) { 994 check_boundary: 995 /* And if not reading a MIME multipart boundary */ 996 if (!rf->boundary) { 997 998 /* Then we're reading the start of a new set of HTTP headers 999 * (possibly after connecting and sending a request first. */ 1000 int newconn = 0; 1001 int header_result; 1002 1003 /* If the server closed the connection on us, close our end. */ 1004 if (rf->sd != -1 && rf->server_close == 2) { 1005 close(rf->sd); 1006 rf->sd = -1; 1007 } 1008 1009 /* If not connected, connect and immediately request a block */ 1010 if (rf->sd == -1) { 1011 if (rf->rangesdone == rf->nranges) 1012 return 0; 1013 range_fetch_connect(rf); 1014 if (rf->sd == -1) 1015 return -1; 1016 newconn = 1; 1017 range_fetch_getmore(rf); 1018 } 1019 1020 /* read the response headers */ 1021 header_result = range_fetch_read_http_headers(rf); 1022 1023 /* Might be the last */ 1024 if (rf->server_close == 1) 1025 rf->server_close = 2; 1026 1027 /* EOF on first connect is fatal */ 1028 if (newconn && header_result == 0) { 1029 fprintf(stderr, "EOF from %s\n", rf->url); 1030 return -1; 1031 } 1032 1033 /* Return EOF or error to caller */ 1034 if (header_result <= 0) 1035 return header_result ? -1 : 0; 1036 1037 /* Reconnect for a redirect */ 1038 if (header_result >= 300 && header_result < 400) { 1039 rf->server_close = 2; 1040 goto check_boundary; 1041 } 1042 1043 /* HTTP Pipelining - send next request before reading current response */ 1044 if (!rf->server_close) 1045 range_fetch_getmore(rf); 1046 } 1047 1048 /* Okay, if we're (now) reading a MIME boundary */ 1049 if (rf->boundary) { 1050 /* Throw away blank line */ 1051 char buf[512]; 1052 int gotr = 0; 1053 if (!rfgets(buf, sizeof(buf), rf)) 1054 return 0; 1055 1056 /* Get, hopefully, boundary marker line */ 1057 if (!rfgets(buf, sizeof(buf), rf)) 1058 return 0; 1059 if (buf[0] != '-' || buf[1] != '-') 1060 return 0; 1061 1062 if (memcmp(&buf[2], rf->boundary, strlen(rf->boundary))) { 1063 fprintf(stderr, "got bad block boundary: %s != %s", 1064 rf->boundary, buf); 1065 return -1; /* This is an error now */ 1066 } 1067 1068 /* Last record marker has boundary followed by - */ 1069 if (buf[2 + strlen(rf->boundary)] == '-') { 1070 free(rf->boundary); 1071 rf->boundary = NULL; 1072 goto check_boundary; 1073 } 1074 1075 /* Otherwise, we're reading the MIME headers for this part until we get \r\n alone */ 1076 for (; buf[0] != '\r' && buf[0] != '\n' && buf[0] != '\0';) { 1077 off_t from, to; 1078 1079 /* Get next header */ 1080 if (!rfgets(buf, sizeof(buf), rf)) 1081 return 0; 1082 buflwr(buf); /* HTTP headers are case insensitive */ 1083 1084 /* We're looking for the Content-Range: header, to tell us how 1085 * many bytes and what part of the target file they represent. 1086 */ 1087 if (2 == 1088 sscanf(buf, 1089 "content-range: bytes " OFF_T_PF "-" OFF_T_PF "/", 1090 &from, &to)) { 1091 rf->offset = from; 1092 rf->block_left = to - from + 1; 1093 gotr = 1; 1094 } 1095 } 1096 1097 /* If we didn't get the byte range that this block represents, it's busted. */ 1098 if (!gotr) { 1099 fprintf(stderr, 1100 "got multipart/byteranges but no Content-Range?"); 1101 return -1; 1102 } 1103 1104 /* Else, record that this range is (being) received */ 1105 rf->rangesdone++; 1106 } 1107 } 1108 1109 /* Now the easy bit - we are reading a block of actual data */ 1110 if (!rf->block_left) 1111 return 0; /* pass EOF back to caller */ 1112 *offset = rf->offset; /* caller wants to know what this data is */ 1113 1114 /* Loop until we've retrieved a whole block */ 1115 for (;;) { 1116 /* Calculate how much more we can return to the caller now. This is the 1117 * minimum of: 1118 * the amount left in this block from the remote 1119 * space left in the caller's buffer 1120 * the amount we have actually read from the remote 1121 */ 1122 size_t rl = rf->block_left; 1123 if (rl > dlen) 1124 rl = dlen; 1125 if ((size_t) (rf->buf_end - rf->buf_start) < rl) { 1126 rl = rf->buf_end - rf->buf_start; 1127 1128 /* There is more data in this block, and space for more in the 1129 * caller's buffer, but we don't have any more read from the remote 1130 * into our buffer yet. So read more now. 1131 * If we don't get data, drop through and return what we have got. 1132 * If we do, back to top of loop and try again. 1133 */ 1134 if (!rl && get_more_data(rf) > 0) 1135 continue; 1136 } 1137 1138 /* If the caller's buffer is full or there's no more data in this block 1139 * to give, we can now return. */ 1140 if (!rl) 1141 return bytes_to_caller; 1142 1143 /* Copy that amount to the caller's their buffer from our buffer */ 1144 memcpy(data, &(rf->buf[rf->buf_start]), rl); 1145 rf->buf_start += rl; /* Track pos in our buffer... */ 1146 data += rl; 1147 dlen -= rl; /* ...and caller's */ 1148 bytes_to_caller += rl; /* ...and the return value */ 1149 1150 /* Keep track of how much of the current block is left to read */ 1151 rf->block_left -= rl; 1152 /* and what position we are up to in the whole source file */ 1153 rf->offset += rl; 1154 /* And go around again */ 1155 } 1156 } 1157 1158 /* range_fetch_bytes_down 1159 * Simple getter method, returns the total bytes retrieved */ 1160 off_t range_fetch_bytes_down(const struct range_fetch * rf) { 1161 return rf->bytes_down; 1162 } 1163 1164 /* Destructor */ 1165 void range_fetch_end(struct range_fetch *rf) { 1166 if (rf->sd != -1) 1167 close(rf->sd); 1168 free(rf->ranges_todo); 1169 free(rf->boundary); 1170 free(rf->url); 1171 free(rf->cport); 1172 free(rf->chost); 1173 free(rf); 1174 }