"Fossies" - the Fresh Open Source Software archive 
/*
Copyright (C) 2000 DFN-FWL (http://www.fwl.dfn.de/fwl)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/*
*
* $Id: read.c,v 1.15 2001/06/08 21:49:08 saeschba Exp $
*
* $Log: read.c,v $
* Revision 1.15 2001/06/08 21:49:08 saeschba
* Modified some #include's for compiling on FreeBSD. Changed Makefil slightly.
* args.h still includes param.h even though we use types.h everywhere else (the
* two should not be used together according to man style on FreeBSD)
*
* Revision 1.14 2001/03/01 16:03:37 glauschwuffel
* The three result constants for recv() are gone and replaced by our beloved
* OKAY and failure. This was done to ease the processing when we catch SIGHUP.
*
* A header can now be seperated from the body by 0d,0a,0d,0a (HTTP specification)
* or 0a,0a (altavista).
*
* Revision 1.13 2000/06/19 21:41:59 glauschwuffel
* includes log.h instead of misc.h
*
* Revision 1.12 2000/05/07 16:18:11 glauschwuffel
* Input data is now terminated with a null character, therefore we read only HTTP_BUFZIE-1 bytes.
*
* Revision 1.11 2000/04/24 19:52:04 gellert
* BUGFIX: if we had an incomplete tag in the buffer, we did a
* recv and assumed, that the buffer then is full. this is not
* true if the server has a small segment size and we did just
* get one segment til the recv. than the tag may still be
* incomplete and we throw it away. now we recv until the buffer
* is full (or eof occured) explicitly.
*
* Revision 1.10 2000/03/07 15:18:01 gellert
* some changes to port to linux!!!
*
* Revision 1.9 2000/03/07 15:06:56 goldbach
* added GPL info
*
* Revision 1.8 2000/03/01 17:43:56 goldbach
* introduced some constants for read and copy-pkts
*
* Revision 1.7 1999/11/30 16:57:53 gellert
* skip flag: initialization moved to beginning of copy-pakets. read_header: find_empty_line is called with the length of the read block, not the full HTTP_BUFSIZE.
*
* Revision 1.6 1999/11/11 15:59:33 gellert
* If there is no empty line in the first blockof the header,
* a new read is done to get the rest of the header (until the
* inbuf is filled).
*
* Revision 1.5 1999/09/21 09:56:28 gellert
* just changed the log level of the "EOF" messages to debug.
*
* Revision 1.4 1999/07/28 14:23:40 gellert
* some changes to the calls of logmsg
*
* Revision 1.3 1999/05/27 16:48:01 goldbach
* minor cleanup for logmsg()
*
* Revision 1.2 1999/05/12 16:04:23 goldbach
* major clean-up session
*
* Revision 1.1 1999/05/12 14:39:36 goldbach
* Renamed read_header.c to read.c
*
* Revision 1.6 1999/05/05 11:58:52 goldbach
* minor clean-ups
*
* Revision 1.5 1999/03/30 12:30:08 goldbach
* 1st surfable version
*
* Revision 1.4 1999/03/02 11:42:13 goldbach
* sets inlen: the inbuf is valid up to here.
*
* Revision 1.3 1999/02/18 15:10:27 goldbach
* fixed return codes, cleaned up the source a bit
*
* Revision 1.2 1999/02/16 17:42:58 gellert
* fixed the missing end of comment "* /" before the call to strstr.
* replaced "strstr" with an own routine to search for the end of the header.
* this new routine is given a maximum length to search.
*
* Revision 1.1 1999/02/15 16:14:07 goldbach
* Erste Version
*
*
*/
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "httpf.h"
#include "log.h"
#include "mythreads.h"
#include "read.h"
static int find_empty_line_in_header(TCT*);
static int read_block(TCT*, int);
static int read_full_block(TCT*, int);
static int read_header(TCT*, int);
/*
* read_block
*
* Read data block from socket into inbuf. The address to read data into
* is inbuf+inlen. The buffer is filled up to the end.
*
* To read as much data as possible do:
* - reset_buffers()
* - read_blcok(_)
* To fill the inbuf from a specific position:
* - set inlen to the position
* - read_block(_) [tries to read HTTP_BUFZIE-tct->inlen bytes]
*
* Sets:
* - tct->inbuf contents with data read from tct->insocket
* - tct->inlen to new length of valid data in inbuf.
* - tct->recv_time to time before recv(). See threads.c for the
* deadlock checker
*
*/
static int read_block(TCT *tct, int socket)
{
int len;
tct->error=NO_ERROR;
/* abort immediately if we've already encountered EOF */
if (tct->more_input_data==NO) {
logmsg(LOG_ERR, "[%d] Read attempt on socket %d after EOF",
tct->tid, socket);
}
/* store current time to be able to detect deadlocks */
tct->recv_time = time(NULL);
/* read block */
len=recv(socket,
tct->inbuf+tct->inlen,
HTTP_BUFSIZE-tct->inlen-1, 0);
/* error on socket */
if (len < 0) {
logmsg(LOG_ERR, "[%d] Read error on socket %d with errno %d",
tct->tid, socket, errno);
tct->error=SOCKET;
return FAILURE;
}
/* add #of bytes read to valid buffer length */
if(len>=0) tct->inlen += len;
/* terminate data with null character */
tct->inbuf[tct->inlen]=0;
logmsg(LOG_DEBUG, "[%d] Terminated data after bytes '%x %x %x %x'",
tct->tid, tct->inbuf[tct->inlen-4], tct->inbuf[tct->inlen-3],
tct->inbuf[tct->inlen-2], tct->inbuf[tct->inlen-1]);
logmsg(LOG_DEBUG, "[%d] Read %d bytes from socket %d.",
tct->tid, len, socket);
logmsg(LOG_DEBUG, "[%d] Inbuf contains %d bytes.",
tct->tid, tct->inlen);
/* eof */
if (len==0) {
logmsg(LOG_DEBUG, "[%d] EOF on socket %d.",
tct->tid, socket);
tct->more_input_data=NO;
}
return OKAY;
}
/*
* read_header()
*
* Read a single buffer of HTTP_BUFSIZE bytes via recv().
* Try to find the end of the HTTP header and set TCT->inoffset
* to the offset where the body begins.
*
* Results:
* OKAY -- header has been read successfully
* FAILURE -- there has been an error. Check the error attribute:
* NO_END_OF_HEADER: the blank line is missing.
* other: set by read_full_block()
*/
static int read_header(TCT *tct, int socket) {
int nl, result;
int quit = 0;
tct->inoffset = 0;
tct->inlen = 0;
result = read_block(tct, socket);
if (result!=OKAY) return result;
/* EOF if an error if we operate on the client's socket */
if ((socket == tct->insocket) && (tct->more_input_data == NO)) {
tct->error = SOCKET;
return FAILURE;
}
do {
/* try to find the end of the header (an empty line) */
nl = find_empty_line_in_header(tct);
/*
* re-read if there's no empty line and if there is data left
* Don't use read_full_block() since this blocks (no pun intended)
*/
if (nl==(int) NULL) {
if ((tct->inlen<(HTTP_BUFSIZE-1)) && (tct->more_input_data==YES)) {
logmsg(LOG_DEBUG, "[%d] Reading next block of header", tct->tid);
result = read_block(tct, socket);
if (result!=OKAY) return result;
} else {
logmsg(LOG_ERR, "[%d] No end of HTTP header found",
tct->tid);
tct->error=NO_END_OF_HEADER;
return FAILURE;
}
} else { /* we found the end of header line */
quit=1;
}
} while (quit==0);
/* the line was found and we can now tell where the body begins */
tct->inoffset = (int)nl;
logmsg(LOG_DEBUG, "[%d] Body starts at offset %d",
tct->tid, tct->inoffset);
logmsg(LOG_DEBUG, "[%d] First few bytes of header: '%s'",
tct->tid, tct->inbuf);
return OKAY;
}
static int look_for_empty_line_in_header(TCT *tct, const char *newlines, int len) {
int n,i = 0;
char *buffend;
buffend = tct->inbuf+tct->inlen;
for (n=0; n<tct->inlen; n++) {
/* is this an expected char? */
if (*(tct->inbuf+n) == newlines[i]) {
i++;
if (i == len) {
logmsg(LOG_DEBUG, "[%d] Header ends at offset %d",
tct->tid, n);
return (n+1);
}
/* not expected, so begin to search again for the first
char of the searchstring */
} else {
i=0;
}
}
return ((int) NULL);
}
/*
* find_empty_line_in_header()
*
* Try to find an empty line in the input buffer. An 'empty line' is
* a) what the specification says: 0d,0a,0d,0a
* b) what the real world does: 0a,0a
*
* Returns the positions after the the empty line (that should be the
* first octet of the buffer) or 0 if there's no empty line.
*/
static int find_empty_line_in_header(TCT *tct) {
int after_empty_line;
const char specification_nls[4]={0x0d,0x0a,0x0d,0x0a}; /* HTTP specification */
const char real_world_nls[2]={0x0a,0x0a}; /* real world, e.g. Altavista */
after_empty_line = look_for_empty_line_in_header(tct, specification_nls, 4);
if (after_empty_line == 0) {
logmsg(LOG_INFO, "[%d] Looking for an empty line (real world)", tct->tid);
after_empty_line = look_for_empty_line_in_header(tct, real_world_nls, 2);
}
return after_empty_line;
}
/*
* read_client_header()
*
* Read a block of data on the client's socket.
* We assume that the user pressed 'Stop' if we get
* an EOF on the socket and indicate FAILURE with
* a SOCKET error.
*
* Note: reading data from the server *may* include an EOF.
*/
int read_client_header(TCT *tct) {
int read_result;
logmsg(LOG_INFO, "[%d] Reading client header",
tct->tid, tct->inlen);
read_result = read_header(tct, tct->insocket);
/* if there has been an error we indicate FAILURE anyway */
if (read_result == FAILURE) return FAILURE;
/*
* Unlike reading from the server we have an error if
* we see an EOF. We get EOF iff the client closed the
* connection ('orderly shutdown', see select(3c)).
* We expect the client to close the connection iff
* the user pressed 'Stop'.
*/
if (tct->more_input_data == NO) {
tct->error = SOCKET;
return FAILURE;
}
return OKAY;
}
int read_server_header(TCT *tct) {
logmsg(LOG_INFO, "[%d] Reading server header",
tct->tid, tct->inlen);
return read_header(tct, tct->outsocket);
}
int read_client_body(TCT *tct) {
logmsg(LOG_INFO, "[%d] Reading client body",
tct->tid, tct->inlen);
return read_block(tct, tct->insocket);
}
int read_server_body(TCT *tct) {
logmsg(LOG_INFO, "[%d] Reading server body",
tct->tid, tct->inlen);
return read_block(tct, tct->outsocket);
}
static int read_full_block(TCT *tct, int socket) {
int result = OKAY;
while ((tct->inlen < HTTP_BUFSIZE-1) && (result == OKAY)) {
result = read_block(tct, socket);
if (tct->more_input_data==NO) break;
}
return result;
}
/*
* read_full_server_block()
*
* Tries to fill up the inbuf completely.
* Results:
* OKAY -- data has been read and the buffer is full
* (inlen = HTTP_BUFSIZE-1)
* FAILURE -- there has been an error whilst reading from socket
*
* Note: HTTP_BUFSIZE-1 is the maximum size since we set the last
* char in the buffer to null (termination).
*
*/
int read_full_server_block(TCT *tct) {
return read_full_block(tct, tct->outsocket);
}
/*
* read_full_client_block()
*
* Note: don't use this. It blocks. It is just here to tell you that you
* shouldn't use it.
*/
int read_full_client_block(TCT *tct) {
return read_full_block(tct, tct->insocket);
}