"Fossies" - the Fresh Open Source Software archive

Member "httpf-1-0-5/read.c" of archive httpf-1-0-5-src.tar.gz:


/*
    Copyright (C) 2000 DFN-FWL (http://www.fwl.dfn.de/fwl)

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
/*
 *
 * $Id: read.c,v 1.15 2001/06/08 21:49:08 saeschba Exp $
 *
 * $Log: read.c,v $
 * Revision 1.15  2001/06/08 21:49:08  saeschba
 * Modified some #include's for compiling on FreeBSD. Changed Makefil slightly.
 * args.h still includes param.h even though we use types.h everywhere else (the
 * two should not be used together according to man style on FreeBSD)
 *
 * Revision 1.14  2001/03/01 16:03:37  glauschwuffel
 * The three result constants for recv() are gone and replaced by our beloved
 * OKAY and failure. This was done to ease the processing when we catch SIGHUP.
 *
 * A header can now be seperated from the body by 0d,0a,0d,0a (HTTP specification)
 * or 0a,0a (altavista).
 *
 * Revision 1.13  2000/06/19 21:41:59  glauschwuffel
 * includes log.h instead of misc.h
 *
 * Revision 1.12  2000/05/07 16:18:11  glauschwuffel
 * Input data is now terminated with a null character, therefore we read only HTTP_BUFZIE-1 bytes.
 *
 * Revision 1.11  2000/04/24 19:52:04  gellert
 * BUGFIX: if we had an incomplete tag in the buffer, we did a
 * recv and assumed, that the buffer then is full. this is not
 * true if the server has a small segment size and we did just
 * get one segment til the recv. than the tag may still be
 * incomplete and we throw it away. now we recv until the buffer
 * is full (or eof occured) explicitly.
 *
 * Revision 1.10  2000/03/07 15:18:01  gellert
 * some changes to port to linux!!!
 *
 * Revision 1.9  2000/03/07 15:06:56  goldbach
 * added GPL info
 *
 * Revision 1.8  2000/03/01 17:43:56  goldbach
 * introduced some constants for read and copy-pkts
 *
 * Revision 1.7  1999/11/30 16:57:53  gellert
 * skip flag: initialization moved to beginning of copy-pakets. read_header: find_empty_line is called with the length of the read block, not the full HTTP_BUFSIZE.
 *
 * Revision 1.6  1999/11/11 15:59:33  gellert
 * If there is no empty line in the first blockof the header,
 * a new read is done to get the rest of the header (until the
 * inbuf is filled).
 *
 * Revision 1.5  1999/09/21 09:56:28  gellert
 * just changed the log level of the "EOF" messages to debug.
 *
 * Revision 1.4  1999/07/28 14:23:40  gellert
 * some changes to the calls of logmsg
 *
 * Revision 1.3  1999/05/27 16:48:01  goldbach
 * minor cleanup for logmsg()
 *
 * Revision 1.2  1999/05/12 16:04:23  goldbach
 * major clean-up session
 *
 * Revision 1.1  1999/05/12 14:39:36  goldbach
 * Renamed read_header.c to read.c
 *
 * Revision 1.6  1999/05/05 11:58:52  goldbach
 * minor clean-ups
 *
 * Revision 1.5  1999/03/30 12:30:08  goldbach
 * 1st surfable version
 *
 * Revision 1.4  1999/03/02 11:42:13  goldbach
 * sets inlen: the inbuf is valid up to here.
 *
 * Revision 1.3  1999/02/18 15:10:27  goldbach
 * fixed return codes, cleaned up the source a bit
 *
 * Revision 1.2  1999/02/16 17:42:58  gellert
 * fixed the missing end of comment "* /" before the call to strstr.
 * replaced "strstr" with an own routine to search for the end of the header.
 * this new routine is given a maximum length to search.
 *
 * Revision 1.1  1999/02/15 16:14:07  goldbach
 * Erste Version
 *
 *
 */

#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>

#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#include "httpf.h"
#include "log.h"
#include "mythreads.h"
#include "read.h"

static int find_empty_line_in_header(TCT*);
static int read_block(TCT*, int);
static int read_full_block(TCT*, int);
static int read_header(TCT*, int);


/*
 * read_block
 *
 * Read data block from socket into inbuf. The address to read data into
 * is inbuf+inlen. The buffer is filled up to the end.
 *
 * To read as much data as possible do:
 *   - reset_buffers()
 *   - read_blcok(_)
 * To fill the inbuf from a specific position:
 *   - set inlen to the position
 *   - read_block(_) [tries to read HTTP_BUFZIE-tct->inlen bytes]
 *
 * Sets:
 *   - tct->inbuf contents with data read from tct->insocket
 *   - tct->inlen to new length of valid data in inbuf.
 *   - tct->recv_time to time before recv(). See threads.c for the
 *     deadlock checker
 *
 */
static int read_block(TCT *tct, int socket)
{
    int len;

    tct->error=NO_ERROR;

    /* abort immediately if we've already encountered EOF */
    if (tct->more_input_data==NO) {
      logmsg(LOG_ERR, "[%d] Read attempt on socket %d after EOF",
	     tct->tid, socket);
    }

    /* store current time to be able to detect deadlocks */
    tct->recv_time = time(NULL);

    /* read block */
    len=recv(socket,
	     tct->inbuf+tct->inlen,
	     HTTP_BUFSIZE-tct->inlen-1, 0);

    /* error on socket */
    if (len < 0) {
	logmsg(LOG_ERR, "[%d] Read error on socket %d with errno %d",
	       tct->tid, socket, errno);
	tct->error=SOCKET;
	return FAILURE;
    }

    /* add #of bytes read to valid buffer length */
    if(len>=0) tct->inlen += len;

	/* terminate data with null character */
    tct->inbuf[tct->inlen]=0;

    logmsg(LOG_DEBUG, "[%d] Terminated data after bytes '%x %x %x %x'",
	   tct->tid, tct->inbuf[tct->inlen-4], tct->inbuf[tct->inlen-3],
		   tct->inbuf[tct->inlen-2], tct->inbuf[tct->inlen-1]);

    logmsg(LOG_DEBUG, "[%d] Read %d bytes from socket %d.",
	   tct->tid, len, socket);
    logmsg(LOG_DEBUG, "[%d] Inbuf contains %d bytes.",
	   tct->tid, tct->inlen);

    /* eof */
    if (len==0) {
	logmsg(LOG_DEBUG, "[%d] EOF on socket %d.",
	       tct->tid, socket);
	tct->more_input_data=NO;
    }

    return OKAY;
}


/*
 * read_header()
 *
 * Read a single buffer of HTTP_BUFSIZE bytes via recv().
 * Try to find the end of the HTTP header and set TCT->inoffset
 * to the offset where the body begins.
 *
 * Results:
 *   OKAY -- header has been read successfully
 *   FAILURE -- there has been an error. Check the error attribute:
 *     NO_END_OF_HEADER: the blank line is missing.
 *     other: set by read_full_block()
 */
static int read_header(TCT *tct, int socket) {
  int nl, result;
  int quit = 0;
    
  tct->inoffset = 0;
  tct->inlen = 0;

  result = read_block(tct, socket);
  if (result!=OKAY) return result;

  /* EOF if an error if we operate on the client's socket */
  if ((socket == tct->insocket) && (tct->more_input_data == NO)) {
    tct->error = SOCKET;
    return FAILURE;
  }
  
    
  do {

    /* try to find the end of the header (an empty line) */
    nl = find_empty_line_in_header(tct);

    /*
     * re-read if there's no empty line and if there is data left
     * Don't use read_full_block() since this blocks (no pun intended)
     */
    if (nl==(int) NULL) {
      if ((tct->inlen<(HTTP_BUFSIZE-1)) && (tct->more_input_data==YES)) {
	logmsg(LOG_DEBUG, "[%d] Reading next block of header", tct->tid);
	result = read_block(tct, socket);
	if (result!=OKAY) return result;
      } else {
	logmsg(LOG_ERR, "[%d] No end of HTTP header found",
	       tct->tid);
	tct->error=NO_END_OF_HEADER;
	return FAILURE;
      }
    } else { /* we found the end of header line */
      quit=1;
    }
  } while (quit==0);
    
  /* the line was found and we can now tell where the body begins */
  tct->inoffset = (int)nl;
    
  logmsg(LOG_DEBUG, "[%d] Body starts at offset %d",
	 tct->tid, tct->inoffset);
  
  logmsg(LOG_DEBUG, "[%d] First few bytes of header: '%s'",
	 tct->tid, tct->inbuf);

  return OKAY;
}

static int look_for_empty_line_in_header(TCT *tct, const char *newlines, int len) {
  int n,i = 0;
  char *buffend;
    
  buffend = tct->inbuf+tct->inlen;
  for (n=0; n<tct->inlen; n++) {
    /* is this an expected char? */
    if (*(tct->inbuf+n) == newlines[i]) {
      i++;
      if (i == len) {
	logmsg(LOG_DEBUG, "[%d] Header ends at offset %d",
	       tct->tid, n);
	
	return (n+1);
      }
      /* not expected, so begin to search again for the first
	 char of the searchstring */
    } else {
      i=0;
    }
  }
  return ((int) NULL);
}

/*
 * find_empty_line_in_header()
 *
 * Try to find an empty line in the input buffer. An 'empty line' is
 *   a) what the specification says: 0d,0a,0d,0a
 *   b) what the real world does: 0a,0a
 *
 * Returns the positions after the the empty line (that should be the
 * first octet of the buffer) or 0 if there's no empty line.
 */
static int find_empty_line_in_header(TCT *tct) {
  int after_empty_line;
  const char specification_nls[4]={0x0d,0x0a,0x0d,0x0a}; /* HTTP specification */
  const char real_world_nls[2]={0x0a,0x0a}; /* real world, e.g. Altavista */
  
  after_empty_line = look_for_empty_line_in_header(tct, specification_nls, 4);
  if (after_empty_line == 0) {
    logmsg(LOG_INFO, "[%d] Looking for an empty line (real world)", tct->tid);
    after_empty_line = look_for_empty_line_in_header(tct, real_world_nls, 2);
  }
  
  return after_empty_line;
}

/*
 * read_client_header()
 *
 * Read a block of data on the client's socket.
 * We assume that the user pressed 'Stop' if we get
 * an EOF on the socket and indicate FAILURE with
 * a SOCKET error.
 *
 * Note: reading data from the server *may* include an EOF.
 */
int read_client_header(TCT *tct) {
  int read_result;
  
  logmsg(LOG_INFO, "[%d] Reading client header",
	 tct->tid, tct->inlen);
  read_result = read_header(tct, tct->insocket);

  /* if there has been an error we indicate FAILURE anyway */
  if (read_result == FAILURE) return FAILURE;

  /*
   * Unlike reading from the server we have an error if
   * we see an EOF. We get EOF iff the client closed the
   * connection ('orderly shutdown', see select(3c)).
   * We expect the client to close the connection iff
   * the user pressed 'Stop'.
   */
     
  if (tct->more_input_data == NO) {
    tct->error = SOCKET;
    return FAILURE;
  }

  return OKAY;
}

int read_server_header(TCT *tct) {
    logmsg(LOG_INFO, "[%d] Reading server header",
	   tct->tid, tct->inlen);
    return read_header(tct, tct->outsocket);
}

int read_client_body(TCT *tct) {
    logmsg(LOG_INFO, "[%d] Reading client body",
	   tct->tid, tct->inlen);
    return read_block(tct, tct->insocket);
}

int read_server_body(TCT *tct) {
    logmsg(LOG_INFO, "[%d] Reading server body",
	   tct->tid,  tct->inlen);
    return read_block(tct, tct->outsocket);
}

static int read_full_block(TCT *tct, int socket) {
  int result = OKAY;

  while ((tct->inlen < HTTP_BUFSIZE-1) && (result == OKAY)) {
    result = read_block(tct, socket);
    if (tct->more_input_data==NO) break;
  }
  return result;
}

/*
 * read_full_server_block()
 *
 * Tries to fill up the inbuf completely.
 * Results:
 *  OKAY -- data has been read and the buffer is full
 *                (inlen = HTTP_BUFSIZE-1)
 *  FAILURE -- there has been an error whilst reading from socket
 *
 * Note: HTTP_BUFSIZE-1 is the maximum size since we set the last
 * char in the buffer to null (termination).
 *
 */
int read_full_server_block(TCT *tct) {
  return read_full_block(tct, tct->outsocket);
}

/*
 * read_full_client_block()
 *
 * Note: don't use this. It blocks. It is just here to tell you that you
 * shouldn't use it.
 */
int read_full_client_block(TCT *tct) {
  return read_full_block(tct, tct->insocket);
}