A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 #!/bin/sh 2 3 # Copyright 2013 MediaMobil Communication GmbH 4 # 5 # This program is free software: you can redistribute it and/or modify 6 # it under the terms of the GNU General Public License as published by 7 # the Free Software Foundation, either version 3 of the License, or 8 # (at your option) any later version. 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU General Public License for more details. 14 15 # This script converts a binary .db file into a .csv file. 16 # The .db file was generated by darkstat with the --export option. 17 # The .csv file shall be read by any spreadsheet application. 18 SCRIPTNAME=$( basename $0) 19 if test -z "$( type -P awk )" ; then 20 echo "${SCRIPTNAME}: missing AWK interpreter, at least not found in PATH" 21 echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH" 22 exit 1 23 fi 24 if test -z "$( type -P od )" ; then 25 echo "${SCRIPTNAME}: missing od file dump tool, at least not found in PATH" 26 echo "${SCRIPTNAME}: every POSIX compliant OS has one; add the location to PATH" 27 exit 1 28 fi 29 if test $# -ne 1; then 30 echo "${SCRIPTNAME}: missing parameter; need file name of .db file" 31 exit 1 32 fi 33 DBFILENAME=$1 34 if test -r ${DBFILENAME}; then 35 echo ${SCRIPTNAME}: Found file ${DBFILENAME} 36 else 37 echo ${SCRIPTNAME}: file ${DBFILENAME} does not exist 38 exit 1 39 fi 40 CSVFILENAME=${DBFILENAME%%.*}.csv 41 echo ${SCRIPTNAME}: Writing output into ${CSVFILENAME} 42 43 # The spec of the .db export format exists for different versions: 44 # https://github.com/emikulic/darkstat/blob/master/export-format.txt 45 # http://git.msquadrat.de/darkstat.git/blob_plain/master:/export-format.txt 46 # http://phil.lavin.me.uk/downloads/parse.phps 47 # Only file format version 1 is supported by us. 48 # Obviously, darkstat itself distinguishes 3 different host format versions. 49 # Only host format version 2 is supported by us. 50 # The darkstat database file is converted from binary format 51 # to ASCII by the standard Unix command od. 52 53 # Some things don't work correctly yet. 54 # Probably because there is no DNS server configured in our embedded device 55 # that produces .db files within OpenWRT. 56 # - host name contains nonsense at constant length 5 57 # - "last seen" timing information contains always 0 58 # - we read the graphics section of the file but ignore it 59 60 # Let the od tool convert each binary byte into several textual formats. 61 # The AWK script reads all variants and later picks the format it needs. 62 od -Ad -v -tx1 -tu1 -ta -w1 < ${DBFILENAME} | 63 awk ' 64 NF==2 { addr = 0 + $1; hex[addr] = $2; next } 65 NF==1 && addr in dec { ascii[addr]=$1; next } 66 NF==1 && ! (addr in dec) { dec[addr]=$1; next } 67 # Now all variants of the bytes are available in certain arrays. 68 # The array indices cover the range 0 .. addr. 69 70 function read_bytes(array, address, count, retval, c) { 71 retval="" 72 for (c=0; c<count; c++) 73 retval = retval array[address+c] 74 return retval 75 } 76 function read_number(address, count, retval, c) { 77 retval=0 78 for (c=0; c<count; c++) 79 retval = retval*256 + dec[address+c] 80 return retval 81 } 82 function read_text(address, count, retval, c) { 83 retval="" 84 for (c=0; c<count; c++) 85 retval = retval ascii[address+c] 86 return retval 87 } 88 function quit(reason, terminate, retval) { 89 if (length(reason) > 0) 90 print reason 91 if (terminate != 0) { 92 # Any remaining bytes in the file shall be dumped. 93 for (i=ai; i<=addr; i++) 94 print i, hex[i], ascii[i] 95 exit(retval) 96 } 97 } 98 function readIPsection() { 99 ip_protos_data=read_bytes(ascii, ai, 1) 100 if (ip_protos_data != "P") 101 quit("expected ip_protos_data P, found " ip_protos_data, 1, 1) 102 ai += 1 103 ip_proto_count=read_number(ai, 1) 104 ai += 1 105 for (pi=0; pi<ip_proto_count; pi++) { 106 ip_proto_type=read_number(ai, 1) 107 ai += 1 108 IPprotos = IPprotos " " ip_proto_type 109 ip_proto_in += read_number(ai, 8) 110 ai += 8 111 ip_proto_out += read_number(ai, 8) 112 ai += 8 113 } 114 } 115 function readTCPsection() { 116 tcp_protos_data=read_bytes(ascii, ai, 1) 117 if (tcp_protos_data != "T") 118 quit("expected tcp_protos_data T, found " tcp_protos_data, 1, 1) 119 ai += 1 120 tcp_proto_count=read_number(ai, 2) 121 ai += 2 122 for (ti=0; ti<tcp_proto_count; ti++) { 123 tcp_proto_port=read_number(ai, 2) 124 ai += 2 125 TCPports = TCPports " " tcp_proto_port 126 tcp_proto_syn=read_number(ai, 8) 127 ai += 8 128 tcp_proto_in += read_number(ai, 8) 129 ai += 8 130 tcp_proto_out += read_number(ai, 8) 131 ai += 8 132 if (tcp_proto_port == 22) { 133 ssh_in += tcp_proto_in 134 ssh_out += tcp_proto_out 135 } 136 if (tcp_proto_port == 3389) { 137 rdp_in += tcp_proto_in 138 rdp_out += tcp_proto_out 139 } 140 } 141 } 142 function readUDPsection() { 143 udp_protos_data=read_bytes(ascii, ai, 1) 144 if (udp_protos_data != "U") 145 quit("expected udp_protos_data U, found " udp_protos_data, 1, 1) 146 ai += 1 147 udp_proto_count=read_number(ai, 2) 148 ai += 2 149 for (ui=0; ui<udp_proto_count; ui++) { 150 udp_proto_port=read_number(ai, 2) 151 ai += 2 152 UDPports = UDPports " " udp_proto_port 153 udp_proto_in += read_number(ai, 8) 154 ai += 8 155 udp_proto_out += read_number(ai, 8) 156 ai += 8 157 if (udp_proto_port == 22) { 158 ssh_in += udp_proto_in 159 ssh_out += udp_proto_out 160 } 161 if (udp_proto_port == 3389) { 162 rdp_in += udp_proto_in 163 rdp_out += udp_proto_out 164 } 165 } 166 } 167 function readGraphsection(interval) { 168 n_bars=read_number(ai++, 1) 169 i_bars=read_number(ai++, 1) 170 for (bi=0; bi<n_bars; bi++) { 171 graph_bytes_in=read_number(ai, 8) 172 ai += 8 173 graph_bytes_out=read_number(ai, 8) 174 ai += 8 175 } 176 } 177 178 END { 179 file_header=read_bytes(hex, 0, 4) 180 if (file_header != "da314159") 181 quit("input data is not an exported darkstat .db file, wrong header: " file_header, 1, 1) 182 section_header=read_bytes(hex, 4, 3) 183 if (section_header != "da4853") 184 quit("section header da4853 expected: " section_header, 1, 1) 185 db_version=read_bytes(hex, 7, 1) 186 if (db_version != "01") 187 quit("file format supported only in version 01", 1, 1) 188 host_count=read_number(8, 4) 189 ai=12 190 # Print a header into the .csv file. 191 printf("IP address;MAC address;host in bytes;host out bytes;IP protos;IP in bytes;IP out bytes;TCP port count;TCP in bytes;TCP out bytes;UDP port count;UDP in bytes;UDP out bytes;ssh in bytes;ssh out bytes;rdp in bytes;rdp out bytes;TCP ports;UDP ports\n") 192 for (hi=1; hi<=host_count; hi++) { 193 # Make sure all variables to be printed are initially empty. 194 ip_address=mac_address="" 195 host_bytes_in=host_bytes_out=ip_proto_in=ip_proto_out=tcp_proto_in=tcp_proto_out=udp_proto_in=udp_proto_out=ssh_in=ssh_out=rdp_in=rdp_out=0 196 IPprotos=TCPports=UDPports="" 197 tcp_proto_count=udp_proto_count=0 198 host_header=read_bytes(hex, ai, 3) 199 host_version=read_bytes(hex, ai+3, 1) 200 ai += 4 201 if (host_version == "02") { 202 ip_address=read_number(ai+0,1) "." read_number(ai+1,1) "." read_number(ai+2,1) "." read_number(ai+3,1) 203 ai += 4 204 if ((host_version+0) > 1) { 205 last_seen=read_number(ai, 4) 206 # This value is always 0 in our files. 207 ai += 4 208 } 209 mac_address=hex[ai+0] ":" hex[ai+1] ":" hex[ai+2] ":" hex[ai+3] ":" hex[ai+4] ":" hex[ai+5] 210 ai += 6 211 # Weird stuff: the host name should be read. 212 # But there are only 5 bytes of nonsense. 213 # The first byte should be the length counter, but it isnt. 214 # The last byte is in fact a 0 byte. 215 # Probably caused by the missing DNS server. 216 # ignore 5 bytes with nonsense 217 nonsense=read_text(ai, 5) 218 ai += 5 219 host_bytes_in=read_number(ai, 8) 220 ai += 8 221 host_bytes_out=read_number(ai, 8) 222 ai += 8 223 readIPsection() 224 readTCPsection() 225 readUDPsection() 226 } else { 227 quit("host format supported only in version 02: " host_version, 1, 1) 228 #address_familiy=read_bytes(hex, ai, 1) 229 #print "address familiy = " address_familiy 230 } 231 printf("\"%s\";\"%s\";%d;%d;%s;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%s;%s\n", 232 ip_address, mac_address, host_bytes_in, host_bytes_out, 233 IPprotos, ip_proto_in, ip_proto_out, 234 tcp_proto_count, tcp_proto_in, tcp_proto_out, 235 udp_proto_count, udp_proto_in, udp_proto_out, 236 ssh_in, ssh_out, rdp_in, rdp_out, 237 TCPports, UDPports) 238 } 239 section_header=read_bytes(hex, ai, 3) 240 if (section_header != "da4752") 241 quit("section header da4752 expected: " section_header, 1, 1) 242 ai += 3 243 db_version=read_bytes(hex, ai, 1) 244 if (db_version != "01") 245 quit("file format supported only in version 01", 1, 1) 246 ai += 1 247 last_time=read_number(ai, 8) 248 ai += 8 249 readGraphsection("60 seconds") 250 readGraphsection("60 minutes") 251 readGraphsection("24 hours") 252 readGraphsection("31 days") 253 # The complete file has been parsed, no bytes should be left over. 254 # Terminate with return value 0 if the byte numbers match. 255 quit("", (addr != ai+1) ?0:1, addr != ai+1) 256 } 257 ' > ${CSVFILENAME}