tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

scan_http.cpp
Go to the documentation of this file.
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /**
3  *
4  * scan_http:
5  * Decodes HTTP responses
6  */
7 
8 #include "config.h"
9 
10 #include "tcpflow.h"
11 #include "tcpip.h"
12 #include "tcpdemux.h"
13 
15 
16 #include "mime_map.h"
17 
18 #ifdef HAVE_SYS_WAIT_H
19 #include <sys/wait.h>
20 #endif
21 
22 
23 #ifdef HAVE_LIBZ
24 # define ZLIB_CONST
25 # ifdef GNUC_HAS_DIAGNOSTIC_PRAGMA
26 # pragma GCC diagnostic ignored "-Wundef"
27 # pragma GCC diagnostic ignored "-Wcast-qual"
28 # endif
29 # ifdef HAVE_ZLIB_H
30 # include <zlib.h>
31 # endif
32 #else
33 # define z_stream void * // prevents z_stream from generating an error
34 #endif
35 
36 #define MIN_HTTP_BUFSIZE 80 // don't bother parsing smaller than this
37 
38 #include <sys/types.h>
39 #include <iostream>
40 #include <algorithm>
41 #include <map>
42 #include <iomanip>
43 
44 #define HTTP_CMD "http_cmd"
45 #define HTTP_ALERT_FD "http_alert_fd"
46 
47 /* options */
48 std::string http_cmd; // command to run on each http object
49 int http_subproc_max = 10; // how many subprocesses are we allowed?
50 int http_subproc = 0; // how many do we currently have?
51 int http_alert_fd = -1; // where should we send alerts?
52 
53 
54 /* define a callback object for sharing state between scan_http() and its callbacks
55  */
57 private:
59  scan_http_cbo(const scan_http_cbo& c); // not implemented
60  scan_http_cbo &operator=(const scan_http_cbo &c); // not implemented
61 
62 public:
63  virtual ~scan_http_cbo(){
64  on_message_complete(); // make sure message was ended
65  }
66  scan_http_cbo(const std::string& path_,const char *base_,std::stringstream *xmlstream_) :
67  path(path_), base(base_),xmlstream(xmlstream_),xml_fo(),request_no(0),
69  output_path(), fd(-1), first_body(true),bytes_written(0),unzip(false),zs(),zinit(false),zfail(false){};
70 private:
71 
72  const std::string path; // where data gets written
73  const char *base; // where data started in memory
74  std::stringstream *xmlstream; // if present, where to put the fileobject annotations
75  std::stringstream xml_fo; // xml stream for this file object
76  int request_no; // request number
77 
78  /* parsed headers */
79  std::map<std::string, std::string> headers;
80 
81  /* placeholders for possibly-incomplete header data */
83  std::string header_value, header_field;
84  std::string output_path;
85  int fd; // fd for writing
86  bool first_body; // first call to on_body after headers
87  uint64_t bytes_written;
88 
89  /* decompression for gzip-encoded streams. */
90  bool unzip; // should we be decompressing?
91  z_stream zs; // zstream (avoids casting and memory allocation)
92  bool zinit; // we have initialized the zstream
93  bool zfail; // zstream failed in some manner, so ignore the rest of this stream
94 
95  /* The static functions are callbacks; they wrap the method calls */
96 #define CBO (reinterpret_cast<scan_http_cbo*>(parser->data))
97 public:
98  static int scan_http_cb_on_message_begin(http_parser * parser) { return CBO->on_message_begin();}
99  static int scan_http_cb_on_url(http_parser * parser, const char *at, size_t length) { return 0;}
100  static int scan_http_cb_on_header_field(http_parser * parser, const char *at, size_t length) { return CBO->on_header_field(at,length);}
101  static int scan_http_cb_on_header_value(http_parser * parser, const char *at, size_t length) { return CBO->on_header_value(at,length); }
102  static int scan_http_cb_on_headers_complete(http_parser * parser) { return CBO->on_headers_complete();}
103  static int scan_http_cb_on_body(http_parser * parser, const char *at, size_t length) { return CBO->on_body(at,length);}
104  static int scan_http_cb_on_message_complete(http_parser * parser) {return CBO->on_message_complete();}
105 #undef CBO
106 private:
107  int on_message_begin();
108  int on_url(const char *at, size_t length);
109  int on_header_field(const char *at, size_t length);
110  int on_header_value(const char *at, size_t length);
111  int on_headers_complete();
112  int on_body(const char *at, size_t length);
113  int on_message_complete();
114 };
115 
116 
117 /**
118  * on_message_begin:
119  * Increment request nubmer. Note that the first request is request_no = 1
120  */
121 
123 {
124  request_no ++;
125  return 0;
126 }
127 
128 /**
129  * on_url currently not implemented.
130  */
131 
132 int scan_http_cbo::on_url(const char *at, size_t length)
133 {
134  return 0;
135 }
136 
137 
138 /* Note 1: The state machine is defined in http-parser/README.md
139  * Note 2: All header field names are converted to lowercase.
140  * This is consistent with the RFC.
141  */
142 
143 int scan_http_cbo::on_header_field(const char *at,size_t length)
144 {
145  std::string field(at,length);
146  std::transform(field.begin(), field.end(), field.begin(), ::tolower);
147 
148  switch(last_on_header){
149  case NOTHING:
150  // Allocate new buffer and copy callback data into it
151  header_field = field;
152  break;
153  case VALUE:
154  // New header started.
155  // Copy current name,value buffers to headers
156  // list and allocate new buffer for new name
158  header_field = field;
159  break;
160  case FIELD:
161  // Previous name continues. Reallocate name
162  // buffer and append callback data to it
163  header_field.append(field);
164  break;
165  }
167  return 0;
168 }
169 
170 int scan_http_cbo::on_header_value(const char *at, size_t length)
171 {
172  const std::string value(at,length);
173  switch(last_on_header){
174  case FIELD:
175  //Value for current header started. Allocate
176  //new buffer and copy callback data to it
177  header_value = value;
178  break;
179  case VALUE:
180  //Value continues. Reallocate value buffer
181  //and append callback data to it
182  header_value.append(value);
183  break;
184  case NOTHING:
185  // this shouldn't happen
186  DEBUG(10)("Internal error in http-parser");
187  break;
188  }
190 
191  return 0;
192 }
193 
194 /**
195  * called when last header is read.
196  * Determine the filename based on request_no and extension.
197  * Also see if decompressing is happening...
198  */
199 
201 {
202  tcpdemux *demux = tcpdemux::getInstance();
203 
204  /* Add the most recently read header to the map, if any */
205  if (last_on_header==VALUE) {
207  header_field="";
208  }
209 
210  /* Set output path to <path>-HTTPBODY-nnn.ext for each part.
211  * This is not consistent with tcpflow <= 1.3.0, which supported only one HTTPBODY,
212  * but it's correct...
213  */
214 
215  std::stringstream os;
216  os << path << "-HTTPBODY-" << std::setw(3) << std::setfill('0') << request_no << std::setw(0);
217 
218  /* See if we can guess a file extension */
219  std::string extension = get_extension_for_mime_type(headers["content-type"]);
220  if (extension.size()) {
221  os << "." << extension;
222  }
223 
224  output_path = os.str();
225 
226  /* Choose an output function based on the content encoding */
227  std::string content_encoding(headers["content-encoding"]);
228 
229  if ((content_encoding == "gzip" || content_encoding == "deflate") && (demux->opt.gzip_decompress)){
230 #ifdef HAVE_LIBZ
231  DEBUG(10) ( "%s: detected zlib content, decompressing", output_path.c_str());
232  unzip = true;
233 #else
234  /* We can't decompress, so just give it a .gz */
235  output_path.append(".gz");
236  DEBUG(5) ( "%s: refusing to decompress since zlib is unavailable", output_path.c_str() );
237 #endif
238  }
239 
240  /* Open the output path */
241  fd = demux->retrying_open(output_path.c_str(), O_WRONLY|O_CREAT|O_BINARY|O_TRUNC, 0644);
242  if (fd < 0) {
243  DEBUG(1) ("unable to open HTTP body file %s", output_path.c_str());
244  }
245  if(http_alert_fd>=0){
246  std::stringstream ss;
247  ss << "open\t" << output_path << "\n";
248  const std::string &sso = ss.str();
249  if(write(http_alert_fd,sso.c_str(),sso.size())!=(int)sso.size()){
250  perror("write");
251  }
252  }
253 
254  first_body = true; // next call to on_body will be the first one
255 
256  /* We can do something smart with the headers here.
257  *
258  * For example, we could:
259  * - Record all headers into the report.xml
260  * - Pick the intended filename if we see Content-Disposition: attachment; name="..."
261  * - Record headers into filesystem extended attributes on the body file
262  */
263  return 0;
264 }
265 
266 /* Write to fd, optionally decompressing as we go */
267 int scan_http_cbo::on_body(const char *at,size_t length)
268 {
269  if (fd < 0) return -1; // no open fd? (internal error)x
270  if (length==0) return 0; // nothing to write
271 
272  if(first_body){ // stuff for first time on_body is called
273  xml_fo << " <byte_run file_offset='" << (at-base) << "'><fileobject><filename>" << output_path << "</filename>";
274  first_body = false;
275  }
276 
277  /* If not decompressing, just write the data and return. */
278  if(unzip==false){
279  size_t offset = 0;
280  while (offset < length) {
281  int rv = write(fd, at + offset, length - offset);
282  if (rv < 0) return -1; // write error; that's bad
283  offset += rv;
284  }
285  bytes_written += offset;
286  return 0;
287  }
288 
289 #ifndef HAVE_LIBZ
290  assert(0); // shoudln't have gotten here
291 #endif
292  if(zfail) return 0; // stream was corrupt; ignore rest
293  /* set up this round of decompression, using a small local buffer */
294 
295  /* Call init if we are not initialized */
296  char decompressed[65536]; // where decompressed data goes
297  if (!zinit) {
298  memset(&zs,0,sizeof(zs));
299  zs.next_in = (Bytef*)at;
300  zs.avail_in = length;
301  zs.next_out = (Bytef*)decompressed;
302  zs.avail_out = sizeof(decompressed);
303 
304  int rv = inflateInit2(&zs, 32 + MAX_WBITS); /* 32 auto-detects gzip or deflate */
305  if (rv != Z_OK) {
306  /* fail! */
307  DEBUG(3) ("decompression failed at stream initialization; rv=%d bad Content-Encoding?",rv);
308  zfail = true;
309  return 0;
310  }
311  zinit = true; // successfully initted
312  } else {
313  zs.next_in = (Bytef*)at;
314  zs.avail_in = length;
315  zs.next_out = (Bytef*)decompressed;
316  zs.avail_out = sizeof(decompressed);
317  }
318 
319  /* iteratively decompress, writing each time */
320  while (zs.avail_in > 0) {
321  /* decompress as much as possible */
322  int rv = inflate(&zs, Z_SYNC_FLUSH);
323 
324  if (rv == Z_STREAM_END) {
325  /* are we done with the stream? */
326  if (zs.avail_in > 0) {
327  /* ...no. */
328  DEBUG(3) ("decompression completed, but with trailing garbage");
329  return 0;
330  }
331  } else if (rv != Z_OK) {
332  /* some other error */
333  DEBUG(3) ("decompression failed (corrupted stream?)");
334  zfail = true; // ignore the rest of this stream
335  return 0;
336  }
337 
338  /* successful decompression, at least partly */
339  /* write the result */
340  int bytes_decompressed = sizeof(decompressed) - zs.avail_out;
341  ssize_t written = write(fd, decompressed, bytes_decompressed);
342 
343  if (written < bytes_decompressed) {
344  DEBUG(3) ("writing decompressed data failed");
345  zfail= true;
346  return 0;
347  }
348  bytes_written += written;
349 
350  /* reset the buffer for the next iteration */
351  zs.next_out = (Bytef*)decompressed;
352  zs.avail_out = sizeof(decompressed);
353  }
354  return 0;
355 }
356 
357 
358 /**
359  * called at the conclusion of each HTTP body.
360  * Clean out all of the state for this HTTP header/body pair.
361  */
362 
364 {
365  /* Close the file */
366  headers.clear();
367  header_field = "";
368  header_value = "";
370  if(fd >= 0) {
371  if (::close(fd) != 0) {
372  perror("close() of http body");
373  }
374  fd = -1;
375  }
376 
377  /* Erase zero-length files and update the DFXML */
378  if(bytes_written>0){
379  /* Update DFXML */
380  if(xmlstream){
381  xml_fo << "<filesize>" << bytes_written << "</filesize></fileobject></byte_run>\n";
382  if(xmlstream) *xmlstream << xml_fo.str();
383  }
384  if(http_alert_fd>=0){
385  std::stringstream ss;
386  ss << "close\t" << output_path << "\n";
387  const std::string &sso = ss.str();
388  if(write(http_alert_fd,sso.c_str(),sso.size()) != (int)sso.size()){
389  perror("write");
390  }
391  }
392  if(http_cmd.size()>0 && output_path.size()>0){
393  /* If we are at maximum number of subprocesses, wait for one to exit */
394  std::string cmd = http_cmd + " " + output_path;
395 #ifdef HAVE_FORK
396  int status=0;
397  pid_t pid = 0;
398  while(http_subproc >= http_subproc_max){
399  pid = wait(&status);
400  http_subproc--;
401  }
402  /* Fork off a child */
403  pid = fork();
404  if(pid<0) die("Cannot fork child");
405  if(pid==0){
406  /* We are the child */
407  exit(system(cmd.c_str()));
408  }
409  http_subproc++;
410 #else
411  system(cmd.c_str());
412 #endif
413  }
414  } else {
415  /* Nothing written; erase the file */
416  if(output_path.size() > 0){
417  ::unlink(output_path.c_str());
418  }
419  }
420 
421  /* Erase the state variables for this part */
422  xml_fo.str("");
423  output_path = "";
424  bytes_written=0;
425  unzip = false;
426  if(zinit){
427  inflateEnd(&zs);
428  zinit = false;
429  }
430  zfail = false;
431  return 0;
432 }
433 
434 
435 /***
436  * the HTTP scanner plugin itself
437  */
438 
439 extern "C"
440 void scan_http(const class scanner_params &sp,const recursion_control_block &rcb)
441 {
443  std::cerr << "scan_http requires sp version " << scanner_params::CURRENT_SP_VERSION << "; "
444  << "got version " << sp.sp_version << "\n";
445  exit(1);
446  }
447 
449  sp.info->name = "http";
450  sp.info->flags = scanner_info::SCANNER_DISABLED; // default disabled
451  sp.info->get_config(HTTP_CMD,&http_cmd,"Command to execute on each HTTP attachment");
452  sp.info->get_config(HTTP_ALERT_FD,&http_alert_fd,"File descriptor to send information about completed HTTP attachments");
453  return; /* No feature files created */
454  }
455 
457  /* See if there is an HTTP response */
458  if(sp.sbuf.bufsize>=MIN_HTTP_BUFSIZE && sp.sbuf.memcmp(reinterpret_cast<const uint8_t *>("HTTP/1."),0,7)==0){
459  /* Smells enough like HTTP to try parsing */
460  /* Set up callbacks */
461  http_parser_settings scan_http_parser_settings;
462  memset(&scan_http_parser_settings,0,sizeof(scan_http_parser_settings)); // in the event that new callbacks get created
464  scan_http_parser_settings.on_url = scan_http_cbo::scan_http_cb_on_url;
468  scan_http_parser_settings.on_body = scan_http_cbo::scan_http_cb_on_body;
470 
471  if(sp.sxml) (*sp.sxml) << "\n <byte_runs>\n";
472  for(size_t offset=0;;){
473  /* Set up a parser instance for the next chunk of HTTP responses and data.
474  * This might be repeated several times due to connection re-use and multiple requests.
475  * Note that the parser is not a C++ library but it can pass a "data" to the
476  * callback. We put the address for the scan_http_cbo object in the data and
477  * recover it with a cast in each of the callbacks.
478  */
479 
480  /* Make an sbuf for the remaining data.
481  * Note that this may not be necessary, because in our test runs the parser
482  * processed all of the data the first time through...
483  */
484  sbuf_t sub_buf(sp.sbuf, offset);
485 
486  const char *base = reinterpret_cast<const char*>(sub_buf.buf);
487  http_parser parser;
489 
490  scan_http_cbo cbo(sp.sbuf.pos0.path,base,sp.sxml);
491  parser.data = &cbo;
492 
493  /* Parse */
494  size_t parsed = http_parser_execute(&parser, &scan_http_parser_settings,
495  base, sub_buf.size());
496  assert(parsed <= sub_buf.size());
497 
498  /* Indicate EOF (flushing callbacks) and terminate if we parsed the entire buffer.
499  */
500  if (parsed == sub_buf.size()) {
501  http_parser_execute(&parser, &scan_http_parser_settings, NULL, 0);
502  break;
503  }
504 
505  /* Stop parsing if we parsed nothing, as that indicates something header! */
506  if (parsed == 0) {
507  break;
508  }
509 
510  /* Stop parsing if we're a connection upgrade (e.g. WebSockets) */
511  if (parser.upgrade) {
512  DEBUG(9) ("upgrade connection detected (WebSockets?); cowardly refusing to dump further");
513  break;
514  }
515 
516  /* Bump the offset for next iteration */
517  offset += parsed;
518  }
519  if(sp.sxml) (*sp.sxml) << " </byte_runs>";
520  }
521  }
522 }
Definition: sbuf.h:221
static int scan_http_cb_on_header_field(http_parser *parser, const char *at, size_t length)
Definition: scan_http.cpp:100
std::string output_path
Definition: scan_http.cpp:84
virtual ~scan_http_cbo()
Definition: scan_http.cpp:63
std::stringstream * xmlstream
Definition: scan_http.cpp:74
std::string header_field
Definition: scan_http.cpp:83
int on_headers_complete()
Definition: scan_http.cpp:200
scan_http_cbo(const std::string &path_, const char *base_, std::stringstream *xmlstream_)
Definition: scan_http.cpp:66
const std::string path
Definition: scan_http.cpp:69
scan_http_cbo(const scan_http_cbo &c)
static int scan_http_cb_on_headers_complete(http_parser *parser)
Definition: scan_http.cpp:102
uint64_t bytes_written
Definition: scan_http.cpp:87
int on_message_complete()
Definition: scan_http.cpp:363
static int scan_http_cb_on_body(http_parser *parser, const char *at, size_t length)
Definition: scan_http.cpp:103
last_on_header_t last_on_header
Definition: scan_http.cpp:82
std::string header_value
Definition: scan_http.cpp:83
std::stringstream xml_fo
Definition: scan_http.cpp:75
int on_message_begin()
Definition: scan_http.cpp:122
const char * base
Definition: scan_http.cpp:73
static int scan_http_cb_on_url(http_parser *parser, const char *at, size_t length)
Definition: scan_http.cpp:99
int on_url(const char *at, size_t length)
Definition: scan_http.cpp:132
static int scan_http_cb_on_header_value(http_parser *parser, const char *at, size_t length)
Definition: scan_http.cpp:101
int on_header_value(const char *at, size_t length)
Definition: scan_http.cpp:170
static int scan_http_cb_on_message_complete(http_parser *parser)
Definition: scan_http.cpp:104
int on_header_field(const char *at, size_t length)
Definition: scan_http.cpp:143
scan_http_cbo & operator=(const scan_http_cbo &c)
static int scan_http_cb_on_message_begin(http_parser *parser)
Definition: scan_http.cpp:98
int on_body(const char *at, size_t length)
Definition: scan_http.cpp:267
std::map< std::string, std::string > headers
Definition: scan_http.cpp:79
bool gzip_decompress
Definition: tcpdemux.h:113
int retrying_open(const std::string &filename, int oflag, int mask)
Definition: tcpdemux.cpp:121
options opt
Definition: tcpdemux.h:146
static tcpdemux * getInstance()
Definition: tcpdemux.cpp:103
#define O_BINARY
size_t bufsize
Definition: sbuf.h:248
int memcmp(const uint8_t *cbuf, size_t at, size_t len) const
Definition: sbuf_private.h:11
pos0_t pos0
Definition: sbuf.h:235
const uint8_t * buf
Definition: sbuf.h:246
const std::string path
Definition: sbuf.h:72
size_t size() const
Definition: sbuf.h:378
std::string name
static const int CURRENT_SP_VERSION
virtual void get_config(const scanner_info::config_t &c, const std::string &name, std::string *val, const std::string &help)
Definition: plugin.cpp:415
std::stringstream * sxml
scanner_info * info
static const int SCANNER_DISABLED
const int sp_version
const phase_t phase
const sbuf_t & sbuf
size_t http_parser_execute(http_parser *parser, const http_parser_settings *settings, const char *data, size_t len)
Definition: http_parser.c:632
void http_parser_init(http_parser *parser, enum http_parser_type t)
Definition: http_parser.c:2084
@ HTTP_RESPONSE
Definition: http_parser.h:211
std::string get_extension_for_mime_type(const std::string &mime_type)
Definition: mime_map.cpp:719
#define MIN_HTTP_BUFSIZE
Definition: scan_http.cpp:36
void scan_http(const class scanner_params &sp, const recursion_control_block &rcb)
Definition: scan_http.cpp:440
#define HTTP_CMD
Definition: scan_http.cpp:44
#define CBO
Definition: scan_http.cpp:96
#define HTTP_ALERT_FD
Definition: scan_http.cpp:45
std::string http_cmd
Definition: scan_http.cpp:48
int http_subproc
Definition: scan_http.cpp:50
int http_subproc_max
Definition: scan_http.cpp:49
int http_alert_fd
Definition: scan_http.cpp:51
#define z_stream
Definition: scan_http.cpp:33
http_data_cb on_header_value
Definition: http_parser.h:324
http_cb on_headers_complete
Definition: http_parser.h:325
http_data_cb on_url
Definition: http_parser.h:321
http_data_cb on_body
Definition: http_parser.h:326
http_data_cb on_header_field
Definition: http_parser.h:323
http_cb on_message_complete
Definition: http_parser.h:327
unsigned int upgrade
Definition: http_parser.h:312
void * data
Definition: http_parser.h:315
int c
Definition: tcpdemux.cpp:366
void die(const char *fmt,...)
Definition: util.cpp:175
#define DEBUG(message_level)
Definition: tcpflow.h:273
unsigned char uint8_t
Definition: util.h:6