tcpflow  1.6.1
About: tcpflow is a TCP/IP packet demultiplexer that captures data transmitted as part of TCP connections (flows), and stores the data in a way that is convenient for protocol analysis and debugging.
  Fossies Dox: tcpflow-1.6.1.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

sbuf.cpp
Go to the documentation of this file.
1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 #include "config.h"
3 #include <fcntl.h>
4 #include <sys/stat.h>
5 #include <stdio.h>
6 #include "bulk_extractor_i.h"
7 #include "unicode_escape.h"
8 
9 /****************************************************************
10  *** SBUF_T
11  ****************************************************************/
12 
13 #ifndef O_BINARY
14 #define O_BINARY 0
15 #endif
16 
17 /**
18  * Map a file; falls back to read if mmap is not available
19  */
20 const std::string sbuf_t::U10001C("\xf4\x80\x80\x9c");
22 sbuf_t *sbuf_t::map_file(const std::string &fname)
23 {
24  int fd = open(fname.c_str(),O_RDONLY|O_BINARY,0);
25  if(fd<0) return 0; /* cannot open file */
26  sbuf_t *sbuf = sbuf_t::map_file(fname,fd);
27  if(sbuf) {
28  sbuf->should_close = true; // be sure to close the file
29  }
30  return sbuf;
31 }
32 
33 /* Map a file when we are given an open fd.
34  * The fd is not closed when the file is unmapped.
35  * If there is no mmap, just allocate space and read the file
36  */
37 
38 sbuf_t *sbuf_t::map_file(const std::string &fname,int fd)
39 {
40  struct stat st;
41  if(fstat(fd,&st)){
42  close(fd);
43  return 0; /* cannot stat */
44  }
45 
46 #ifdef HAVE_MMAP
47  uint8_t *buf = (uint8_t *)mmap(0,st.st_size,PROT_READ,MAP_FILE|MAP_SHARED,fd,0);
48  bool should_free = false;
49  bool should_unmap = true;
50 #else
51  uint8_t *buf = (uint8_t *)malloc(st.st_size);
52  if(buf==0){ /* malloc failed */
53  return 0;
54  }
55  lseek(fd,0,SEEK_SET); // go to beginning of file
56  size_t r = (size_t)read(fd,(void *)buf,st.st_size);
57  if(r!=(size_t)st.st_size){
58  free((void *)buf); /* read failed */
59  return 0;
60  }
61  close(fd);
62  fd = 0;
63  bool should_free = true;
64  bool should_unmap = false;
65 #endif
67  buf,
68  st.st_size,
69  st.st_size,
70  fd,
73  false); // the caller's job is to close
74  return sbuf;
75 }
76 
77 /*
78  * Returns self or the highest parent of self, whichever is higher
79  */
81 {
82  const sbuf_t *hp = this;
83  while(hp->parent != 0){
84  hp = hp->parent;
85  }
86  return hp;
87 }
88 
89 /**
90  * rawdump the sbuf to an ostream.
91  */
92 void sbuf_t::raw_dump(std::ostream &os,uint64_t start,uint64_t len) const
93 {
94  for(uint64_t i=start;i<start+len && i<bufsize;i++){
95  os << buf[i];
96  }
97 }
98 
99 /**
100  * rawdump the sbuf to a file descriptor
101  */
102 void sbuf_t::raw_dump(int fd2,uint64_t start,uint64_t len) const
103 {
104  if(len>bufsize-start) len=bufsize-start; // maximum left
105  uint64_t written = ::write(fd2,buf+start,len);
106  if(written!=len){
107  std::cerr << "write: cannot write sbuf.\n";
108  }
109 }
110 
111 static std::string hexch(unsigned char ch)
112 {
113  char buf[4];
114  snprintf(buf,sizeof(buf),"%02x",ch);
115  return std::string(buf);
116 }
117 
118 /**
119  * hexdump the sbuf.
120  */
121 void sbuf_t::hex_dump(std::ostream &os,uint64_t start,uint64_t len) const
122 {
123  const size_t bytes_per_line = 32;
124  size_t max_spaces = 0;
125  for(uint64_t i=start;i<start+len && i<bufsize;i+=bytes_per_line){
126  size_t spaces=0;
127 
128  /* Print the offset */
129  char b[64];
130  snprintf(b,sizeof(b),"%04x: ",(int)i);
131  os << b;
132  spaces += strlen(b);
133 
134  for(size_t j=0;j<bytes_per_line && i+j<bufsize && i+j<start+len;j++){
135  unsigned char ch = (*this)[i+j];
136  os << hexch(ch); spaces += 2;
137  if(j%2==1){
138  os << " ";
139  spaces += 1;
140  }
141  }
142  if(spaces>max_spaces) max_spaces=spaces;
143  for(;spaces<max_spaces;spaces++){
144  os << ' ';
145  }
146  for(size_t j=0;j<bytes_per_line && i+j<bufsize && i+j<start+len;j++){
147  unsigned char ch = (*this)[i+j];
148  if(ch>=' ' && ch<='~') os << ch;
149  else os << '.';
150  }
151  os << "\n";
152  }
153 }
154 
155 /* Write to a file descriptor */
156 ssize_t sbuf_t::write(int fd_,size_t loc,size_t len) const
157 {
158  if(loc>=bufsize) return 0; // cannot write
159  if(loc+len>bufsize) len=bufsize-loc; // clip at the end
160  return ::write(fd_,buf+loc,len);
161 }
162 
163 /* Write to a FILE */
164 ssize_t sbuf_t::write(FILE *f,size_t loc,size_t len) const
165 {
166  if(loc>=bufsize) return 0; // cannot write
167  if(loc+len>bufsize) len=bufsize-loc; // clip at the end
168  return ::fwrite(buf+loc,1,len,f);
169 }
170 
171 /* Return a substring */
172 std::string sbuf_t::substr(size_t loc,size_t len) const
173 {
174  if(loc>=bufsize) return std::string(""); // cannot write
175  if(loc+len>bufsize) len=bufsize-loc; // clip at the end
176  return std::string((const char *)buf+loc,len);
177 }
178 
179 bool sbuf_t::is_constant(size_t off,size_t len,uint8_t ch) const // verify that it's constant
180 {
181  while(len>0){
182  if(((*this)[off])!=ch) return false;
183  off++;
184  len--;
185  }
186  return true;
187 }
188 
189 void sbuf_t::hex_dump(std::ostream &os) const
190 {
191  hex_dump(os,0,bufsize);
192 }
193 
194 /**
195  * Convert a binary blob to a hex representation
196  */
197 
198 #ifndef NSRL_HEXBUF_UPPERCASE
199 #define NSRL_HEXBUF_UPPERCASE 0x01
200 #define NSRL_HEXBUF_SPACE2 0x02
201 #define NSRL_HEXBUF_SPACE4 0x04
202 #endif
203 
204 
205 static int hexcharvals[256] = {-1,0};
206 static const char *hexbuf(char *dst,int dst_len,const unsigned char *bin,int bytes,int flag)
207 {
208  int charcount = 0;
209  const char *start = dst; // remember where the start of the string is
210  const char *fmt = (flag & NSRL_HEXBUF_UPPERCASE) ? "%02X" : "%02x";
211 
212  if(hexcharvals[0]==-1){
213  /* Need to initialize this */
214  for(int i=0;i<256;i++){
215  hexcharvals[i] = 0;
216  }
217  for(int i=0;i<10;i++){
218  hexcharvals['0'+i] = i;
219  }
220  for(int i=10;i<16;i++){
221  hexcharvals['A'+i-10] = i;
222  hexcharvals['a'+i-10] = i;
223  }
224  }
225 
226  *dst = 0; // begin with null termination
227  while(bytes>0 && dst_len > 3){
228  sprintf(dst,fmt,*bin); // convert the next byte
229  dst += 2;
230  bin += 1;
231  dst_len -= 2;
232  bytes--;
233  charcount++; // how many characters
234 
235  if((flag & NSRL_HEXBUF_SPACE2) || ((flag & NSRL_HEXBUF_SPACE4) && charcount%2==0)){
236  *dst++ = ' ';
237  *dst = '\000';
238  dst_len -= 1;
239  }
240  }
241  return start; // return the start
242 }
243 
244 
245 std::ostream & operator <<(std::ostream &os,const sbuf_t &t){
246  char hex[17];
247  hexbuf(hex,sizeof(hex),t.buf,8,0);
248  os << "sbuf[page_number=" << t.page_number
249  << " pos0=" << t.pos0 << " " << "buf[0..8]=0x" << hex
250  << " bufsize=" << t.bufsize << " pagesize=" << t.pagesize << "]";
251  return os;
252  }
253 
254 /**
255  * Read the requested number of UTF-8 format string octets including any \0.
256  */
257 void sbuf_t::getUTF8(size_t i, size_t num_octets_requested, std::string &utf8_string) const {
258  // clear any residual value
259  utf8_string = "";
260 
261  if(i>=bufsize) {
262  // past EOF
263  return;
264  }
265  if(i+num_octets_requested>bufsize) {
266  // clip at EOF
267  num_octets_requested = bufsize - i;
268  }
269  utf8_string = std::string((const char *)buf+i,num_octets_requested);
270 }
271 
272 /**
273  * Read UTF-8 format code octets into string up to but not including \0.
274  */
275 void sbuf_t::getUTF8(size_t i, std::string &utf8_string) const {
276  // clear any residual value
277  utf8_string = "";
278 
279  // read octets
280  for (size_t off=i; off<bufsize; off++) {
281  uint8_t octet = get8u(off);
282 
283  // stop before \0
284  if (octet == 0) {
285  // at \0
286  break;
287  }
288 
289  // accept the octet
290  utf8_string.push_back(octet);
291  }
292 }
293 
294 /**
295  * Read the requested number of UTF-16 format code units into wstring including any \U0000.
296  */
297 void sbuf_t::getUTF16(size_t i, size_t num_code_units_requested, std::wstring &utf16_string) const {
298  // clear any residual value
299  utf16_string = std::wstring();
300 
301  if(i>=bufsize) {
302  // past EOF
303  return;
304  }
305  if(i+num_code_units_requested*2+1>bufsize) {
306  // clip at EOF
307  num_code_units_requested = ((bufsize-1)-i)/2;
308  }
309  // NOTE: we can't use wstring constructor because we require 16 bits,
310  // not whatever sizeof(wchar_t) is.
311  // utf16_string = std::wstring((const char *)buf+i,num_code_units_requested);
312 
313  // get code units individually
314  for (size_t j = 0; j < num_code_units_requested; j++) {
315  utf16_string.push_back(get16u(i + j*2));
316  }
317 }
318 
319 /**
320  * Read UTF-16 format code units into wstring up to but not including \U0000.
321  */
322 void sbuf_t::getUTF16(size_t i, std::wstring &utf16_string) const {
323  // clear any residual value
324  utf16_string = std::wstring();
325 
326  // read the code units
327  size_t off;
328  for (off=i; off<bufsize-1; off += 2) {
329  uint16_t code_unit = get16u(off);
330  //cout << "sbuf.cpp getUTF16 i: " << i << " code unit: " << code_unit << "\n";
331 
332  // stop before \U0000
333  if (code_unit == 0) {
334  // at \U0000
335  break;
336  }
337 
338  // accept the code unit
339  utf16_string.push_back(code_unit);
340  }
341 }
342 
343 /**
344  * Read the requested number of UTF-16 format code units using the specified byte order into wstring including any \U0000.
345  */
346 void sbuf_t::getUTF16(size_t i, size_t num_code_units_requested, byte_order_t bo, std::wstring &utf16_string) const {
347  // clear any residual value
348  utf16_string = std::wstring();
349 
350  if(i>=bufsize) {
351  // past EOF
352  return;
353  }
354  if(i+num_code_units_requested*2+1>bufsize) {
355  // clip at EOF
356  num_code_units_requested = ((bufsize-1)-i)/2;
357  }
358  // NOTE: we can't use wstring constructor because we require 16 bits,
359  // not whatever sizeof(wchar_t) is.
360  // utf16_string = std::wstring((const char *)buf+i,num_code_units_requested);
361 
362  // get code units individually
363  for (size_t j = 0; j < num_code_units_requested; j++) {
364  utf16_string.push_back(get16u(i + j, bo));
365  }
366 }
367 
368 /**
369  * Read UTF-16 format code units using the specified byte order into wstring up to but not including \U0000.
370  */
371 void sbuf_t::getUTF16(size_t i, byte_order_t bo, std::wstring &utf16_string) const {
372  // clear any residual value
373  utf16_string = std::wstring();
374 
375  // read the code units
376  size_t off;
377  for (off=i; off<bufsize-1; off += 2) {
378  uint16_t code_unit = get16u(off, bo);
379  //cout << "sbuf.cpp getUTF16 i: " << i << " code unit: " << code_unit << "\n";
380 
381  // stop before \U0000
382  if (code_unit == 0) {
383  // at \U0000
384  break;
385  }
386 
387  // accept the code unit
388  utf16_string.push_back(code_unit);
389  }
390 }
391 
Definition: sbuf.h:70
Definition: sbuf.h:221
bool should_close
Definition: sbuf.h:228
bool is_constant(size_t loc, size_t len, uint8_t ch) const
Definition: sbuf.cpp:179
const sbuf_t * highest_parent() const
Definition: sbuf.cpp:80
size_t bufsize
Definition: sbuf.h:248
std::string substr(size_t loc, size_t len) const
Definition: sbuf.cpp:172
void hex_dump(std::ostream &os, uint64_t start, uint64_t len) const
Definition: sbuf.cpp:121
void raw_dump(std::ostream &os, uint64_t start, uint64_t len) const
Definition: sbuf.cpp:92
sbuf_t()
Definition: sbuf.h:258
int fd
Definition: sbuf.h:224
bool should_unmap
Definition: sbuf.h:225
std::ostream & operator<<(std::ostream &os, const sbuf_t &t)
Definition: sbuf.cpp:245
byte_order_t
Definition: sbuf.h:482
uint8_t get8u(size_t i) const
Definition: sbuf_private.h:20
int page_number
Definition: sbuf.h:234
static std::string map_file_delimiter
Definition: sbuf.h:372
ssize_t write(int fd, size_t loc, size_t len) const
Definition: sbuf.cpp:156
void getUTF8(size_t i, size_t num_octets_requested, std::string &utf8_string) const
Definition: sbuf.cpp:257
static const std::string U10001C
Definition: sbuf.h:371
const sbuf_t * parent
Definition: sbuf.h:237
size_t pagesize
Definition: sbuf.h:249
void getUTF16(size_t i, size_t num_code_units_requested, std::wstring &utf16_string) const
Definition: sbuf.cpp:297
pos0_t pos0
Definition: sbuf.h:235
const uint8_t * buf
Definition: sbuf.h:246
bool should_free
Definition: sbuf.h:227
uint16_t get16u(size_t i) const
Definition: sbuf_private.h:25
static sbuf_t * map_file(const std::string &fname)
Definition: sbuf.cpp:22
#define NSRL_HEXBUF_SPACE2
Definition: sbuf.cpp:200
#define O_BINARY
Definition: sbuf.cpp:14
#define NSRL_HEXBUF_SPACE4
Definition: sbuf.cpp:201
static std::string hexch(unsigned char ch)
Definition: sbuf.cpp:111
static const char * hexbuf(char *dst, int dst_len, const unsigned char *bin, int bytes, int flag)
Definition: sbuf.cpp:206
static int hexcharvals[256]
Definition: sbuf.cpp:205
#define NSRL_HEXBUF_UPPERCASE
Definition: sbuf.cpp:199
#define SEEK_SET
Definition: tcpflow.h:202
#define PROT_READ
Definition: util.cpp:227
#define MAP_FILE
Definition: util.cpp:228
#define MAP_SHARED
Definition: util.cpp:229
void * mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
Definition: util.cpp:230
unsigned short uint16_t
Definition: util.h:7
unsigned char uint8_t
Definition: util.h:6