"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/scan_http.cpp" (19 Feb 2021, 18954 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "scan_http.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.5.0_vs_1.6.1.

    1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
    2 /**
    3  *
    4  * scan_http:
    5  * Decodes HTTP responses
    6  */
    7 
    8 #include "config.h"
    9 
   10 #include "tcpflow.h"
   11 #include "tcpip.h"
   12 #include "tcpdemux.h"
   13 
   14 #include "http-parser/http_parser.h"
   15 
   16 #include "mime_map.h"
   17 
   18 #ifdef HAVE_SYS_WAIT_H
   19 #include <sys/wait.h>
   20 #endif
   21 
   22 
   23 #ifdef HAVE_LIBZ
   24 #  define ZLIB_CONST
   25 #  ifdef GNUC_HAS_DIAGNOSTIC_PRAGMA
   26 #    pragma GCC diagnostic ignored "-Wundef"
   27 #    pragma GCC diagnostic ignored "-Wcast-qual"
   28 #  endif
   29 #  ifdef HAVE_ZLIB_H
   30 #    include <zlib.h>
   31 #  endif
   32 #else
   33 #  define z_stream void *               // prevents z_stream from generating an error
   34 #endif
   35 
   36 #define MIN_HTTP_BUFSIZE 80             // don't bother parsing smaller than this
   37 
   38 #include <sys/types.h>
   39 #include <iostream>
   40 #include <algorithm>
   41 #include <map>
   42 #include <iomanip>
   43 
   44 #define HTTP_CMD "http_cmd"
   45 #define HTTP_ALERT_FD "http_alert_fd"
   46 
   47 /* options */
   48 std::string http_cmd;                   // command to run on each http object
   49 int http_subproc_max = 10;              // how many subprocesses are we allowed?
   50 int http_subproc = 0;                   // how many do we currently have?
   51 int http_alert_fd = -1;                 // where should we send alerts?
   52 
   53 
   54 /* define a callback object for sharing state between scan_http() and its callbacks
   55  */
   56 class scan_http_cbo {
   57 private:
   58     typedef enum {NOTHING,FIELD,VALUE} last_on_header_t;
   59     scan_http_cbo(const scan_http_cbo& c); // not implemented
   60     scan_http_cbo &operator=(const scan_http_cbo &c); // not implemented
   61 
   62 public:
   63     virtual ~scan_http_cbo(){
   64         on_message_complete();          // make sure message was ended
   65     }
   66     scan_http_cbo(const std::string& path_,const char *base_,std::stringstream *xmlstream_) :
   67         path(path_), base(base_),xmlstream(xmlstream_),xml_fo(),request_no(0),
   68         headers(), last_on_header(NOTHING), header_value(), header_field(),
   69         output_path(), fd(-1), first_body(true),bytes_written(0),unzip(false),zs(),zinit(false),zfail(false){};
   70 private:        
   71         
   72     const std::string path;             // where data gets written
   73     const char *base;                   // where data started in memory
   74     std::stringstream *xmlstream;       // if present, where to put the fileobject annotations
   75     std::stringstream xml_fo;           // xml stream for this file object
   76     int request_no;                     // request number
   77         
   78     /* parsed headers */
   79     std::map<std::string, std::string> headers;
   80         
   81     /* placeholders for possibly-incomplete header data */
   82     last_on_header_t last_on_header;
   83     std::string header_value, header_field;
   84     std::string output_path;
   85     int         fd;                         // fd for writing
   86     bool        first_body;                 // first call to on_body after headers
   87     uint64_t    bytes_written;
   88 
   89     /* decompression for gzip-encoded streams. */
   90     bool     unzip;           // should we be decompressing?
   91     z_stream zs;              // zstream (avoids casting and memory allocation)
   92     bool     zinit;           // we have initialized the zstream 
   93     bool     zfail;           // zstream failed in some manner, so ignore the rest of this stream
   94 
   95     /* The static functions are callbacks; they wrap the method calls */
   96 #define CBO (reinterpret_cast<scan_http_cbo*>(parser->data))
   97 public:
   98     static int scan_http_cb_on_message_begin(http_parser * parser) { return CBO->on_message_begin();}
   99     static int scan_http_cb_on_url(http_parser * parser, const char *at, size_t length) { return 0;}
  100     static int scan_http_cb_on_header_field(http_parser * parser, const char *at, size_t length) { return CBO->on_header_field(at,length);}
  101     static int scan_http_cb_on_header_value(http_parser * parser, const char *at, size_t length) { return CBO->on_header_value(at,length); }
  102     static int scan_http_cb_on_headers_complete(http_parser * parser) { return CBO->on_headers_complete();}
  103     static int scan_http_cb_on_body(http_parser * parser, const char *at, size_t length) { return CBO->on_body(at,length);}
  104     static int scan_http_cb_on_message_complete(http_parser * parser) {return CBO->on_message_complete();}
  105 #undef CBO
  106 private:
  107     int on_message_begin();
  108     int on_url(const char *at, size_t length);
  109     int on_header_field(const char *at, size_t length);
  110     int on_header_value(const char *at, size_t length);
  111     int on_headers_complete();
  112     int on_body(const char *at, size_t length);
  113     int on_message_complete();          
  114 };
  115     
  116 
  117 /**
  118  * on_message_begin:
  119  * Increment request nubmer. Note that the first request is request_no = 1
  120  */
  121 
  122 int scan_http_cbo::on_message_begin()
  123 {
  124     request_no ++;
  125     return 0;
  126 }
  127 
  128 /**
  129  * on_url currently not implemented.
  130  */
  131 
  132 int scan_http_cbo::on_url(const char *at, size_t length)
  133 {
  134     return 0;
  135 }
  136 
  137 
  138 /* Note 1: The state machine is defined in http-parser/README.md
  139  * Note 2: All header field names are converted to lowercase.
  140  *         This is consistent with the RFC.
  141  */
  142 
  143 int scan_http_cbo::on_header_field(const char *at,size_t length)
  144 {
  145     std::string field(at,length);
  146     std::transform(field.begin(), field.end(), field.begin(), ::tolower);
  147     
  148     switch(last_on_header){
  149     case NOTHING:                       
  150         // Allocate new buffer and copy callback data into it
  151         header_field = field;
  152         break;
  153     case VALUE:
  154         // New header started.
  155         // Copy current name,value buffers to headers
  156         // list and allocate new buffer for new name
  157         headers[header_field] = header_value;
  158         header_field = field;
  159         break;
  160     case FIELD:
  161         // Previous name continues. Reallocate name
  162         // buffer and append callback data to it
  163         header_field.append(field);
  164         break;
  165     }
  166     last_on_header = FIELD;
  167     return 0;
  168 }
  169 
  170 int scan_http_cbo::on_header_value(const char *at, size_t length)
  171 {
  172     const std::string value(at,length);
  173     switch(last_on_header){
  174     case FIELD:
  175         //Value for current header started. Allocate
  176         //new buffer and copy callback data to it
  177         header_value = value;
  178         break;
  179     case VALUE:
  180         //Value continues. Reallocate value buffer
  181         //and append callback data to it
  182         header_value.append(value);
  183         break;
  184     case NOTHING:
  185         // this shouldn't happen
  186         DEBUG(10)("Internal error in http-parser");
  187         break;
  188     }
  189     last_on_header = VALUE;
  190 
  191     return 0;
  192 }
  193 
  194 /**
  195  * called when last header is read.
  196  * Determine the filename based on request_no and extension.
  197  * Also see if decompressing is happening...
  198  */
  199 
  200 int scan_http_cbo::on_headers_complete()
  201 {
  202     tcpdemux *demux = tcpdemux::getInstance();
  203 
  204     /* Add the most recently read header to the map, if any */
  205     if (last_on_header==VALUE) {
  206         headers[header_field] = header_value;
  207         header_field="";
  208     }
  209         
  210     /* Set output path to <path>-HTTPBODY-nnn.ext for each part.
  211      * This is not consistent with tcpflow <= 1.3.0, which supported only one HTTPBODY,
  212      * but it's correct...
  213      */
  214     
  215     std::stringstream os;
  216     os << path << "-HTTPBODY-" << std::setw(3) << std::setfill('0') << request_no << std::setw(0);
  217 
  218     /* See if we can guess a file extension */
  219     std::string extension = get_extension_for_mime_type(headers["content-type"]);
  220     if (extension.size()) {
  221         os << "." << extension;
  222     }
  223         
  224     output_path = os.str();
  225         
  226     /* Choose an output function based on the content encoding */
  227     std::string content_encoding(headers["content-encoding"]);
  228 
  229     if ((content_encoding == "gzip" || content_encoding == "deflate") && (demux->opt.gzip_decompress)){
  230 #ifdef HAVE_LIBZ
  231         DEBUG(10) ( "%s: detected zlib content, decompressing", output_path.c_str());
  232         unzip = true;
  233 #else
  234         /* We can't decompress, so just give it a .gz */
  235         output_path.append(".gz");
  236         DEBUG(5) ( "%s: refusing to decompress since zlib is unavailable", output_path.c_str() );
  237 #endif
  238     } 
  239         
  240     /* Open the output path */
  241     fd = demux->retrying_open(output_path.c_str(), O_WRONLY|O_CREAT|O_BINARY|O_TRUNC, 0644);
  242     if (fd < 0) {
  243         DEBUG(1) ("unable to open HTTP body file %s", output_path.c_str());
  244     }
  245     if(http_alert_fd>=0){
  246         std::stringstream ss;
  247         ss << "open\t" << output_path << "\n";
  248         const std::string &sso = ss.str();
  249         if(write(http_alert_fd,sso.c_str(),sso.size())!=(int)sso.size()){
  250             perror("write");
  251         }
  252     }
  253 
  254     first_body = true;                  // next call to on_body will be the first one
  255         
  256     /* We can do something smart with the headers here.
  257      *
  258      * For example, we could:
  259      *  - Record all headers into the report.xml
  260      *  - Pick the intended filename if we see Content-Disposition: attachment; name="..."
  261      *  - Record headers into filesystem extended attributes on the body file
  262      */
  263     return 0;
  264 }
  265 
  266 /* Write to fd, optionally decompressing as we go */
  267 int scan_http_cbo::on_body(const char *at,size_t length)
  268 {
  269     if (fd < 0)    return -1;              // no open fd? (internal error)x
  270     if (length==0) return 0;               // nothing to write
  271 
  272     if(first_body){                      // stuff for first time on_body is called
  273         xml_fo << "     <byte_run file_offset='" << (at-base) << "'><fileobject><filename>" << output_path << "</filename>";
  274         first_body = false;
  275     }
  276 
  277     /* If not decompressing, just write the data and return. */
  278     if(unzip==false){
  279         size_t offset = 0;
  280         while (offset < length) {
  281                 int rv = write(fd, at + offset, length - offset);
  282                 if (rv < 0) return -1;  // write error; that's bad
  283                 offset += rv;
  284         }
  285         bytes_written += offset;
  286         return 0;
  287     }
  288 
  289 #ifndef HAVE_LIBZ
  290     assert(0);                          // shoudln't have gotten here
  291 #endif    
  292     if(zfail) return 0;                 // stream was corrupt; ignore rest
  293     /* set up this round of decompression, using a small local buffer */
  294 
  295     /* Call init if we are not initialized */
  296     char decompressed[65536];           // where decompressed data goes
  297     if (!zinit) {
  298         memset(&zs,0,sizeof(zs));
  299         zs.next_in = (Bytef*)at;
  300         zs.avail_in = length;
  301         zs.next_out = (Bytef*)decompressed;
  302         zs.avail_out = sizeof(decompressed);
  303         
  304         int rv = inflateInit2(&zs, 32 + MAX_WBITS);      /* 32 auto-detects gzip or deflate */
  305         if (rv != Z_OK) {
  306             /* fail! */
  307             DEBUG(3) ("decompression failed at stream initialization; rv=%d bad Content-Encoding?",rv);
  308             zfail = true;
  309             return 0;
  310         }
  311         zinit = true;                   // successfully initted
  312     } else {
  313         zs.next_in = (Bytef*)at;
  314         zs.avail_in = length;
  315         zs.next_out = (Bytef*)decompressed;
  316         zs.avail_out = sizeof(decompressed);
  317     }
  318         
  319     /* iteratively decompress, writing each time */
  320     while (zs.avail_in > 0) {
  321         /* decompress as much as possible */
  322         int rv = inflate(&zs, Z_SYNC_FLUSH);
  323                 
  324         if (rv == Z_STREAM_END) {
  325             /* are we done with the stream? */
  326             if (zs.avail_in > 0) {
  327                 /* ...no. */
  328                 DEBUG(3) ("decompression completed, but with trailing garbage");
  329                 return 0;
  330             }
  331         } else if (rv != Z_OK) {
  332             /* some other error */
  333             DEBUG(3) ("decompression failed (corrupted stream?)");
  334             zfail = true;               // ignore the rest of this stream
  335             return 0;
  336         }
  337                 
  338         /* successful decompression, at least partly */
  339         /* write the result */
  340         int bytes_decompressed = sizeof(decompressed) - zs.avail_out;
  341         ssize_t written = write(fd, decompressed, bytes_decompressed);
  342 
  343         if (written < bytes_decompressed) {
  344             DEBUG(3) ("writing decompressed data failed");
  345             zfail= true;
  346             return 0;
  347         }
  348         bytes_written += written;
  349                 
  350         /* reset the buffer for the next iteration */
  351         zs.next_out = (Bytef*)decompressed;
  352         zs.avail_out = sizeof(decompressed);
  353     }
  354     return 0;
  355 }
  356 
  357 
  358 /**
  359  * called at the conclusion of each HTTP body.
  360  * Clean out all of the state for this HTTP header/body pair.
  361  */
  362 
  363 int scan_http_cbo::on_message_complete()
  364 {
  365     /* Close the file */
  366     headers.clear();
  367     header_field = "";
  368     header_value = "";
  369     last_on_header = NOTHING;
  370     if(fd >= 0) {
  371         if (::close(fd) != 0) {
  372             perror("close() of http body");
  373         }
  374         fd = -1;
  375     }
  376 
  377     /* Erase zero-length files and update the DFXML */
  378     if(bytes_written>0){
  379         /* Update DFXML */
  380         if(xmlstream){
  381             xml_fo << "<filesize>" << bytes_written << "</filesize></fileobject></byte_run>\n";
  382             if(xmlstream) *xmlstream << xml_fo.str();
  383         }
  384         if(http_alert_fd>=0){
  385             std::stringstream ss;
  386             ss << "close\t" << output_path << "\n";
  387             const std::string &sso = ss.str();
  388             if(write(http_alert_fd,sso.c_str(),sso.size()) != (int)sso.size()){
  389                 perror("write");
  390             }
  391         }
  392         if(http_cmd.size()>0 && output_path.size()>0){
  393             /* If we are at maximum number of subprocesses, wait for one to exit */
  394             std::string cmd = http_cmd + " " + output_path;
  395 #ifdef HAVE_FORK
  396             int status=0;
  397             pid_t pid = 0;
  398             while(http_subproc >= http_subproc_max){
  399                 pid = wait(&status);
  400                 http_subproc--;
  401             }
  402             /* Fork off a child */
  403             pid = fork();
  404             if(pid<0) die("Cannot fork child");
  405             if(pid==0){
  406                 /* We are the child */
  407                 exit(system(cmd.c_str()));
  408             }
  409             http_subproc++;
  410 #else
  411             system(cmd.c_str());
  412 #endif            
  413         }
  414     } else {
  415         /* Nothing written; erase the file */
  416         if(output_path.size() > 0){
  417             ::unlink(output_path.c_str());
  418         }
  419     }
  420 
  421     /* Erase the state variables for this part */
  422     xml_fo.str("");
  423     output_path = "";
  424     bytes_written=0;
  425     unzip = false;
  426     if(zinit){
  427         inflateEnd(&zs);
  428         zinit = false;
  429     }
  430     zfail = false;
  431     return 0;
  432 }
  433 
  434 
  435 /***
  436  * the HTTP scanner plugin itself
  437  */
  438 
  439 extern "C"
  440 void  scan_http(const class scanner_params &sp,const recursion_control_block &rcb)
  441 {
  442     if(sp.sp_version!=scanner_params::CURRENT_SP_VERSION){
  443         std::cerr << "scan_http requires sp version " << scanner_params::CURRENT_SP_VERSION << "; "
  444                   << "got version " << sp.sp_version << "\n";
  445         exit(1);
  446     }
  447 
  448     if(sp.phase==scanner_params::PHASE_STARTUP){
  449         sp.info->name  = "http";
  450         sp.info->flags = scanner_info::SCANNER_DISABLED; // default disabled
  451         sp.info->get_config(HTTP_CMD,&http_cmd,"Command to execute on each HTTP attachment");
  452         sp.info->get_config(HTTP_ALERT_FD,&http_alert_fd,"File descriptor to send information about completed HTTP attachments");
  453         return;         /* No feature files created */
  454     }
  455 
  456     if(sp.phase==scanner_params::PHASE_SCAN){
  457         /* See if there is an HTTP response */
  458         if(sp.sbuf.bufsize>=MIN_HTTP_BUFSIZE && sp.sbuf.memcmp(reinterpret_cast<const uint8_t *>("HTTP/1."),0,7)==0){
  459             /* Smells enough like HTTP to try parsing */
  460             /* Set up callbacks */
  461             http_parser_settings scan_http_parser_settings;
  462             memset(&scan_http_parser_settings,0,sizeof(scan_http_parser_settings)); // in the event that new callbacks get created
  463             scan_http_parser_settings.on_message_begin          = scan_http_cbo::scan_http_cb_on_message_begin;
  464             scan_http_parser_settings.on_url                    = scan_http_cbo::scan_http_cb_on_url;
  465             scan_http_parser_settings.on_header_field           = scan_http_cbo::scan_http_cb_on_header_field;
  466             scan_http_parser_settings.on_header_value           = scan_http_cbo::scan_http_cb_on_header_value;
  467             scan_http_parser_settings.on_headers_complete       = scan_http_cbo::scan_http_cb_on_headers_complete;
  468             scan_http_parser_settings.on_body                   = scan_http_cbo::scan_http_cb_on_body;
  469             scan_http_parser_settings.on_message_complete       = scan_http_cbo::scan_http_cb_on_message_complete;
  470                         
  471             if(sp.sxml) (*sp.sxml) << "\n    <byte_runs>\n";
  472             for(size_t offset=0;;){
  473                 /* Set up a parser instance for the next chunk of HTTP responses and data.
  474                  * This might be repeated several times due to connection re-use and multiple requests.
  475                  * Note that the parser is not a C++ library but it can pass a "data" to the
  476                  * callback. We put the address for the scan_http_cbo object in the data and
  477                  * recover it with a cast in each of the callbacks.
  478                  */
  479                 
  480                 /* Make an sbuf for the remaining data.
  481                  * Note that this may not be necessary, because in our test runs the parser
  482                  * processed all of the data the first time through...
  483                  */
  484                 sbuf_t sub_buf(sp.sbuf, offset);
  485                                 
  486                 const char *base = reinterpret_cast<const char*>(sub_buf.buf);
  487                 http_parser parser;
  488                 http_parser_init(&parser, HTTP_RESPONSE);
  489 
  490                 scan_http_cbo cbo(sp.sbuf.pos0.path,base,sp.sxml);
  491                 parser.data = &cbo;
  492 
  493                 /* Parse */
  494                 size_t parsed = http_parser_execute(&parser, &scan_http_parser_settings,
  495                                                     base, sub_buf.size());
  496                 assert(parsed <= sub_buf.size());
  497                                 
  498                 /* Indicate EOF (flushing callbacks) and terminate if we parsed the entire buffer.
  499                  */
  500                 if (parsed == sub_buf.size()) {
  501                     http_parser_execute(&parser, &scan_http_parser_settings, NULL, 0);
  502                     break;
  503                 }
  504                                 
  505                 /* Stop parsing if we parsed nothing, as that indicates something header! */
  506                 if (parsed == 0) {
  507                     break;
  508                 }
  509                                 
  510                 /* Stop parsing if we're a connection upgrade (e.g. WebSockets) */
  511                 if (parser.upgrade) {
  512                     DEBUG(9) ("upgrade connection detected (WebSockets?); cowardly refusing to dump further");
  513                     break;
  514                 }
  515                                 
  516                 /* Bump the offset for next iteration */
  517                 offset += parsed;
  518             }
  519             if(sp.sxml) (*sp.sxml) << "    </byte_runs>";
  520         }
  521     }
  522 }