"Fossies" - the Fresh Open Source Software Archive

Member "tcpflow-1.6.1/src/be13_api/sbuf.cpp" (19 Feb 2021, 10962 Bytes) of package /linux/misc/tcpflow-1.6.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "sbuf.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.4.5_vs_1.5.0.

    1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
    2 #include "config.h"
    3 #include <fcntl.h>
    4 #include <sys/stat.h>
    5 #include <stdio.h>
    6 #include "bulk_extractor_i.h"
    7 #include "unicode_escape.h"
    8 
    9 /****************************************************************
   10  *** SBUF_T
   11  ****************************************************************/
   12 
   13 #ifndef O_BINARY
   14 #define O_BINARY 0
   15 #endif
   16 
   17 /**
   18  *  Map a file; falls back to read if mmap is not available
   19  */
   20 const std::string sbuf_t::U10001C("\xf4\x80\x80\x9c");
   21 std::string sbuf_t::map_file_delimiter(sbuf_t::U10001C);
   22 sbuf_t *sbuf_t::map_file(const std::string &fname)
   23 {
   24     int fd = open(fname.c_str(),O_RDONLY|O_BINARY,0);
   25     if(fd<0) return 0;          /* cannot open file */
   26     sbuf_t *sbuf = sbuf_t::map_file(fname,fd);
   27     if(sbuf) {
   28         sbuf->should_close = true;          // be sure to close the file
   29     }
   30     return sbuf;
   31 }
   32 
   33 /* Map a file when we are given an open fd.
   34  * The fd is not closed when the file is unmapped.
   35  * If there is no mmap, just allocate space and read the file
   36  */
   37 
   38 sbuf_t *sbuf_t::map_file(const std::string &fname,int fd)
   39 {
   40     struct stat st;
   41     if(fstat(fd,&st)){
   42         close(fd);
   43         return 0; /* cannot stat */
   44     }
   45 
   46 #ifdef HAVE_MMAP
   47     uint8_t *buf = (uint8_t *)mmap(0,st.st_size,PROT_READ,MAP_FILE|MAP_SHARED,fd,0);
   48     bool should_free  = false;
   49     bool should_unmap = true;
   50 #else
   51     uint8_t *buf = (uint8_t *)malloc(st.st_size);
   52     if(buf==0){         /* malloc failed */
   53         return 0;
   54     }
   55     lseek(fd,0,SEEK_SET);               // go to beginning of file
   56     size_t r = (size_t)read(fd,(void *)buf,st.st_size);
   57     if(r!=(size_t)st.st_size){
   58         free((void *)buf);              /* read failed */
   59         return 0;
   60     }
   61     close(fd);
   62     fd = 0;
   63     bool should_free = true;
   64     bool should_unmap = false;
   65 #endif
   66     sbuf_t *sbuf = new sbuf_t(pos0_t(fname+sbuf_t::map_file_delimiter),
   67                               buf,
   68                               st.st_size,
   69                               st.st_size,
   70                               fd,
   71                               should_unmap,
   72                               should_free,
   73                               false);   // the caller's job is to close
   74     return sbuf;
   75 }
   76 
   77 /*
   78  * Returns self or the highest parent of self, whichever is higher
   79  */
   80 const sbuf_t *sbuf_t::highest_parent() const 
   81 {
   82     const sbuf_t *hp = this;
   83     while(hp->parent != 0){
   84         hp = hp->parent;
   85     }
   86     return hp;
   87 }
   88 
   89 /**
   90  * rawdump the sbuf to an ostream.
   91  */
   92 void sbuf_t::raw_dump(std::ostream &os,uint64_t start,uint64_t len) const
   93 {
   94     for(uint64_t i=start;i<start+len  && i<bufsize;i++){
   95         os << buf[i];
   96     }
   97 }
   98 
   99 /**
  100  * rawdump the sbuf to a file descriptor
  101  */
  102 void sbuf_t::raw_dump(int fd2,uint64_t start,uint64_t len) const
  103 {
  104     if(len>bufsize-start) len=bufsize-start; // maximum left
  105     uint64_t written = ::write(fd2,buf+start,len);
  106     if(written!=len){
  107         std::cerr << "write: cannot write sbuf.\n";
  108     }
  109 }
  110 
  111 static std::string hexch(unsigned char ch)
  112 {
  113     char buf[4];
  114     snprintf(buf,sizeof(buf),"%02x",ch);
  115     return std::string(buf);
  116 }
  117 
  118 /**
  119  * hexdump the sbuf.
  120  */
  121 void sbuf_t::hex_dump(std::ostream &os,uint64_t start,uint64_t len) const
  122 {
  123     const size_t bytes_per_line = 32;
  124     size_t max_spaces = 0;
  125     for(uint64_t i=start;i<start+len && i<bufsize;i+=bytes_per_line){
  126         size_t spaces=0;
  127 
  128         /* Print the offset */
  129         char b[64];
  130         snprintf(b,sizeof(b),"%04x: ",(int)i);
  131         os << b;
  132         spaces += strlen(b);
  133 
  134         for(size_t j=0;j<bytes_per_line && i+j<bufsize && i+j<start+len;j++){
  135             unsigned char ch = (*this)[i+j];
  136             os << hexch(ch);  spaces += 2;
  137             if(j%2==1){
  138                 os << " ";
  139                 spaces += 1;
  140             }
  141         }
  142         if(spaces>max_spaces) max_spaces=spaces;
  143         for(;spaces<max_spaces;spaces++){
  144             os << ' ';
  145         }
  146         for(size_t j=0;j<bytes_per_line && i+j<bufsize && i+j<start+len;j++){
  147             unsigned char ch = (*this)[i+j];
  148             if(ch>=' ' && ch<='~') os << ch;
  149             else os << '.';
  150         }
  151         os << "\n";
  152     }
  153 }
  154 
  155 /* Write to a file descriptor */
  156 ssize_t sbuf_t::write(int fd_,size_t loc,size_t len) const
  157 {
  158     if(loc>=bufsize) return 0;          // cannot write
  159     if(loc+len>bufsize) len=bufsize-loc; // clip at the end
  160     return ::write(fd_,buf+loc,len);
  161 }
  162 
  163 /* Write to a FILE */
  164 ssize_t sbuf_t::write(FILE *f,size_t loc,size_t len) const
  165 {
  166     if(loc>=bufsize) return 0;          // cannot write
  167     if(loc+len>bufsize) len=bufsize-loc; // clip at the end
  168     return ::fwrite(buf+loc,1,len,f);
  169 }
  170 
  171 /* Return a substring */
  172 std::string sbuf_t::substr(size_t loc,size_t len) const
  173 {
  174     if(loc>=bufsize) return std::string("");            // cannot write
  175     if(loc+len>bufsize) len=bufsize-loc; // clip at the end
  176     return std::string((const char *)buf+loc,len);
  177 }
  178 
  179 bool sbuf_t::is_constant(size_t off,size_t len,uint8_t ch) const // verify that it's constant
  180 {
  181     while(len>0){
  182         if(((*this)[off])!=ch) return false;
  183         off++;
  184         len--;
  185     }
  186     return true;
  187 }
  188 
  189 void sbuf_t::hex_dump(std::ostream &os) const 
  190 {
  191     hex_dump(os,0,bufsize);
  192 }
  193 
  194 /**
  195  * Convert a binary blob to a hex representation
  196  */
  197 
  198 #ifndef NSRL_HEXBUF_UPPERCASE
  199 #define NSRL_HEXBUF_UPPERCASE 0x01
  200 #define NSRL_HEXBUF_SPACE2    0x02
  201 #define NSRL_HEXBUF_SPACE4    0x04
  202 #endif
  203 
  204 
  205 static int hexcharvals[256] = {-1,0};
  206 static const char *hexbuf(char *dst,int dst_len,const unsigned char *bin,int bytes,int flag)
  207 {
  208     int charcount = 0;
  209     const char *start = dst;            // remember where the start of the string is
  210     const char *fmt = (flag & NSRL_HEXBUF_UPPERCASE) ? "%02X" : "%02x";
  211 
  212     if(hexcharvals[0]==-1){
  213         /* Need to initialize this */
  214         for(int i=0;i<256;i++){
  215             hexcharvals[i] = 0;
  216         }
  217         for(int i=0;i<10;i++){
  218             hexcharvals['0'+i] = i;
  219         }
  220         for(int i=10;i<16;i++){
  221             hexcharvals['A'+i-10] = i;
  222             hexcharvals['a'+i-10] = i;
  223         }
  224     }
  225 
  226     *dst = 0;                           // begin with null termination
  227     while(bytes>0 && dst_len > 3){
  228         sprintf(dst,fmt,*bin); // convert the next byte
  229         dst += 2;
  230         bin += 1;
  231         dst_len -= 2;
  232         bytes--;
  233         charcount++;                    // how many characters
  234         
  235         if((flag & NSRL_HEXBUF_SPACE2) || ((flag & NSRL_HEXBUF_SPACE4) && charcount%2==0)){
  236             *dst++ = ' ';
  237             *dst   = '\000';
  238             dst_len -= 1;
  239         }
  240     }
  241     return start;                       // return the start
  242 }
  243 
  244 
  245 std::ostream & operator <<(std::ostream &os,const sbuf_t &t){
  246         char hex[17];
  247         hexbuf(hex,sizeof(hex),t.buf,8,0);
  248         os << "sbuf[page_number="   << t.page_number
  249            << " pos0=" << t.pos0 << " " << "buf[0..8]=0x" << hex
  250            << " bufsize=" << t.bufsize << " pagesize=" << t.pagesize << "]";
  251         return os;
  252     }
  253 
  254 /**
  255  * Read the requested number of UTF-8 format string octets including any \0.
  256  */
  257 void sbuf_t::getUTF8(size_t i, size_t num_octets_requested, std::string &utf8_string) const {
  258     // clear any residual value
  259     utf8_string = "";
  260 
  261     if(i>=bufsize) {
  262         // past EOF
  263         return;
  264     }
  265     if(i+num_octets_requested>bufsize) {
  266         // clip at EOF
  267         num_octets_requested = bufsize - i;
  268     }
  269     utf8_string = std::string((const char *)buf+i,num_octets_requested);
  270 }
  271 
  272 /**
  273  * Read UTF-8 format code octets into string up to but not including \0.
  274  */
  275 void sbuf_t::getUTF8(size_t i, std::string &utf8_string) const {
  276     // clear any residual value
  277     utf8_string = "";
  278 
  279     // read octets
  280     for (size_t off=i; off<bufsize; off++) {
  281         uint8_t octet = get8u(off);
  282 
  283         // stop before \0
  284         if (octet == 0) {
  285             // at \0
  286             break;
  287         }
  288 
  289         // accept the octet
  290         utf8_string.push_back(octet);
  291     }
  292 }
  293 
  294 /**
  295  * Read the requested number of UTF-16 format code units into wstring including any \U0000.
  296  */
  297 void sbuf_t::getUTF16(size_t i, size_t num_code_units_requested, std::wstring &utf16_string) const {
  298     // clear any residual value
  299     utf16_string = std::wstring();
  300 
  301     if(i>=bufsize) {
  302         // past EOF
  303         return;
  304     }
  305     if(i+num_code_units_requested*2+1>bufsize) {
  306         // clip at EOF
  307         num_code_units_requested = ((bufsize-1)-i)/2;
  308     }
  309     // NOTE: we can't use wstring constructor because we require 16 bits,
  310     // not whatever sizeof(wchar_t) is.
  311     // utf16_string = std::wstring((const char *)buf+i,num_code_units_requested);
  312 
  313     // get code units individually
  314     for (size_t j = 0; j < num_code_units_requested; j++) {
  315         utf16_string.push_back(get16u(i + j*2));
  316     }
  317 }
  318 
  319 /**
  320  * Read UTF-16 format code units into wstring up to but not including \U0000.
  321  */
  322 void sbuf_t::getUTF16(size_t i, std::wstring &utf16_string) const {
  323     // clear any residual value
  324     utf16_string = std::wstring();
  325 
  326     // read the code units
  327     size_t off;
  328     for (off=i; off<bufsize-1; off += 2) {
  329         uint16_t code_unit = get16u(off);
  330         //cout << "sbuf.cpp getUTF16 i: " << i << " code unit: " << code_unit << "\n";
  331 
  332         // stop before \U0000
  333         if (code_unit == 0) {
  334             // at \U0000
  335             break;
  336         }
  337 
  338         // accept the code unit
  339         utf16_string.push_back(code_unit);
  340     }
  341 }
  342 
  343 /**
  344  * Read the requested number of UTF-16 format code units using the specified byte order into wstring including any \U0000.
  345  */
  346 void sbuf_t::getUTF16(size_t i, size_t num_code_units_requested, byte_order_t bo, std::wstring &utf16_string) const {
  347     // clear any residual value
  348     utf16_string = std::wstring();
  349 
  350     if(i>=bufsize) {
  351         // past EOF
  352         return;
  353     }
  354     if(i+num_code_units_requested*2+1>bufsize) {
  355         // clip at EOF
  356         num_code_units_requested = ((bufsize-1)-i)/2;
  357     }
  358     // NOTE: we can't use wstring constructor because we require 16 bits,
  359     // not whatever sizeof(wchar_t) is.
  360     // utf16_string = std::wstring((const char *)buf+i,num_code_units_requested);
  361 
  362     // get code units individually
  363     for (size_t j = 0; j < num_code_units_requested; j++) {
  364         utf16_string.push_back(get16u(i + j, bo));
  365     }
  366 }
  367 
  368 /**
  369  * Read UTF-16 format code units using the specified byte order into wstring up to but not including \U0000.
  370  */
  371 void sbuf_t::getUTF16(size_t i, byte_order_t bo, std::wstring &utf16_string) const {
  372     // clear any residual value
  373     utf16_string = std::wstring();
  374 
  375     // read the code units
  376     size_t off;
  377     for (off=i; off<bufsize-1; off += 2) {
  378         uint16_t code_unit = get16u(off, bo);
  379         //cout << "sbuf.cpp getUTF16 i: " << i << " code unit: " << code_unit << "\n";
  380 
  381         // stop before \U0000
  382         if (code_unit == 0) {
  383             // at \U0000
  384             break;
  385         }
  386 
  387         // accept the code unit
  388         utf16_string.push_back(code_unit);
  389     }
  390 }
  391