A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.
1 package SquidAnalyzer; 2 #------------------------------------------------------------------------------ 3 # Project : Squid Log Analyzer 4 # Name : SquidAnalyzer.pm 5 # Language : Perl 5 6 # OS : All 7 # Copyright: Copyright (c) 2001-2017 Gilles Darold - All rights reserved. 8 # Licence : This program is free software; you can redistribute it 9 # and/or modify it under the same terms as Perl itself. 10 # Author : Gilles Darold, gilles _AT_ darold _DOT_ net 11 # Function : Main perl module for Squid Log Analyzer 12 # Usage : See documentation. 13 #------------------------------------------------------------------------------ 14 use strict qw/vars/; 15 16 BEGIN { 17 use Exporter(); 18 use vars qw($VERSION $COPYRIGHT $AUTHOR @ISA @EXPORT $ZCAT_PROG $BZCAT_PROG $XZCAT_PROG $RM_PROG); 19 use POSIX qw/ strftime sys_wait_h /; 20 use IO::File; 21 use Socket (); 22 use Time::HiRes qw/ualarm/; 23 use Time::Local qw/timelocal_nocheck timegm_nocheck/; 24 use Fcntl qw(:flock); 25 use IO::Handle; 26 use FileHandle; 27 use POSIX qw(locale_h); 28 setlocale(LC_NUMERIC, ''); 29 setlocale(LC_ALL, 'C'); 30 31 # Set all internal variable 32 $VERSION = '6.6'; 33 $COPYRIGHT = 'Copyright (c) 2001-2017 Gilles Darold - All rights reserved.'; 34 $AUTHOR = "Gilles Darold - gilles _AT_ darold _DOT_ net"; 35 36 @ISA = qw(Exporter); 37 @EXPORT = qw//; 38 39 $| = 1; 40 41 } 42 43 $ZCAT_PROG = "/bin/zcat"; 44 $BZCAT_PROG = "/bin/bzcat"; 45 $RM_PROG = "/bin/rm"; 46 $XZCAT_PROG = "/usr/bin/xzcat"; 47 48 # DNS Cache 49 my %CACHE = (); 50 51 # Color used to draw grpahs 52 my @GRAPH_COLORS = ('#6e9dc9', '#f4ab3a', '#ac7fa8', '#8dbd0f'); 53 54 # Default translation srings 55 my %Translate = ( 56 'CharSet' => 'utf-8', 57 '01' => 'Jan', 58 '02' => 'Feb', 59 '03' => 'Mar', 60 '04' => 'Apr', 61 '05' => 'May', 62 '06' => 'Jun', 63 '07' => 'Jul', 64 '08' => 'Aug', 65 '09' => 'Sep', 66 '10' => 'Oct', 67 '11' => 'Nov', 68 '12' => 'Dec', 69 'KB' => 'Kilo bytes', 70 'MB' => 'Mega bytes', 71 'GB' => 'Giga bytes', 72 'Bytes' => 'Bytes', 73 'Total' => 'Total', 74 'Years' => 'Years', 75 'Users' => 'Users', 76 'Sites' => 'Sites', 77 'Cost' => 'Cost', 78 'Requests' => 'Requests', 79 'Megabytes' => 'Mega bytes', 80 'Months' => 'Months', 81 'Days' => 'Days', 82 'Hit' => 'Hit', 83 'Miss' => 'Miss', 84 'Denied' => 'Denied', 85 'Domains' => 'Domains', 86 'Requests_graph' => 'Requests', 87 'Megabytes_graph' => 'Mega bytes', 88 'Months_graph' => 'Months', 89 'Days_graph' => 'Days', 90 'Hit_graph' => 'Hit', 91 'Miss_graph' => 'Miss', 92 'Denied_graph' => 'Denied', 93 'Total_graph' => 'Total', 94 'Domains_graph' => 'Domains', 95 'Users_help' => 'Total number of different users for this period', 96 'Sites_help' => 'Total number of different visited sites for this period', 97 'Domains_help' => 'Total number of different second level visited domain for this period', 98 'Hit_help' => 'Objects found in cache', 99 'Miss_help' => 'Objects not found in cache', 100 'Denied_help' => 'Objects with denied access', 101 'Cost_help' => '1 Mega byte =', 102 'Generation' => 'Report generated on', 103 'Main_cache_title' => 'Cache Statistics', 104 'Cache_title' => 'Cache Statistics on', 105 'Stat_label' => 'Stat', 106 'Mime_link' => 'Mime Types', 107 'Network_link' => 'Networks', 108 'User_link' => 'Users', 109 'Top_url_link' => 'Top Urls', 110 'Top_domain_link' => 'Top Domains', 111 'Back_link' => 'Back', 112 'Graph_cache_hit_title' => '%s Requests statistics on', 113 'Graph_cache_byte_title' => '%s Mega Bytes statistics on', 114 'Hourly' => 'Hourly', 115 'Hours' => 'Hours', 116 'Daily' => 'Daily', 117 'Days' => 'Days', 118 'Monthly' => 'Monthly', 119 'Months' => 'Months', 120 'Mime_title' => 'Mime Type Statistics on', 121 'Mime_number' => 'Number of mime type', 122 'Network_title' => 'Network Statistics on', 123 'Network_number' => 'Number of network', 124 'Duration' => 'Duration', 125 'Time' => 'Time', 126 'Largest' => 'Largest', 127 'Url' => 'Url', 128 'User_title' => 'User Statistics on', 129 'User_number' => 'Number of user', 130 'Url_title' => 'Top %d site on', 131 'Url_Hits_title' => 'Top %d Url hits on', 132 'Url_Bytes_title' => 'Top %d Url bytes on', 133 'Url_Duration_title' => 'Top %d Url duration on', 134 'Url_number' => 'Number of Url', 135 'Domain_Hits_title' => 'Top %d Domain hits on', 136 'Domain_Bytes_title' => 'Top %d Domain bytes on', 137 'Domain_Duration_title' => 'Top %d Domain duration on', 138 'Domain_number' => 'Number of domain', 139 'Domain_graph_hits_title' => 'Domain Hits Statistics on', 140 'Domain_graph_bytes_title' => 'Domain Bytes Statistics on', 141 'Second_domain_graph_hits_title' => 'Second level Hits Statistics on', 142 'Second_domain_graph_bytes_title' => 'Second level Bytes Statistics on', 143 'First_visit' => 'First visit', 144 'Last_visit' => 'Last visit', 145 'Globals_Statistics' => 'Globals Statistics', 146 'Legend' => 'Legend', 147 'File_Generated' => 'File generated by', 148 'Up_link' => 'Up', 149 'Click_year_stat' => 'Click on year\'s statistics link for details', 150 'Mime_graph_hits_title' => 'Mime Type Hits Statistics on', 151 'Mime_graph_bytes_title' => 'Mime Type MBytes Statistics on', 152 'User' => 'User', 153 'Count' => 'Count', 154 'WeekDay' => 'Su Mo Tu We Th Fr Sa', 155 'Week' => 'Week', 156 'Top_denied_link' => 'Top Denied', 157 'Blocklist_acl_title' => 'Blocklist ACL use', 158 'Throughput' => 'Throughput', 159 'Graph_throughput_title' => '%s throughput on', 160 'Throughput_graph' => 'Bytes/sec', 161 ); 162 163 my @TLD1 = ( 164 '\.com\.ac','\.net\.ac','\.gov\.ac','\.org\.ac','\.mil\.ac','\.co\.ae', 165 '\.net\.ae','\.gov\.ae','\.ac\.ae','\.sch\.ae','\.org\.ae','\.mil\.ae','\.pro\.ae', 166 '\.name\.ae','\.com\.af','\.edu\.af','\.gov\.af','\.net\.af','\.org\.af','\.com\.al', 167 '\.edu\.al','\.gov\.al','\.mil\.al','\.net\.al','\.org\.al','\.ed\.ao','\.gv\.ao', 168 '\.og\.ao','\.co\.ao','\.pb\.ao','\.it\.ao','\.com\.ar','\.edu\.ar','\.gob\.ar', 169 '\.gov\.ar','\.gov\.ar','\.int\.ar','\.mil\.ar','\.net\.ar','\.org\.ar','\.tur\.ar', 170 '\.gv\.at','\.ac\.at','\.co\.at','\.or\.at','\.com\.au','\.net\.au','\.org\.au', 171 '\.edu\.au','\.gov\.au','\.csiro\.au','\.asn\.au','\.id\.au','\.org\.ba','\.net\.ba', 172 '\.edu\.ba','\.gov\.ba','\.mil\.ba','\.unsa\.ba','\.untz\.ba','\.unmo\.ba','\.unbi\.ba', 173 '\.unze\.ba','\.co\.ba','\.com\.ba','\.rs\.ba','\.co\.bb','\.com\.bb','\.net\.bb', 174 '\.org\.bb','\.gov\.bb','\.edu\.bb','\.info\.bb','\.store\.bb','\.tv\.bb','\.biz\.bb', 175 '\.com\.bh','\.info\.bh','\.cc\.bh','\.edu\.bh','\.biz\.bh','\.net\.bh','\.org\.bh', 176 '\.gov\.bh','\.com\.bn','\.edu\.bn','\.gov\.bn','\.net\.bn','\.org\.bn','\.com\.bo', 177 '\.net\.bo','\.org\.bo','\.tv\.bo','\.mil\.bo','\.int\.bo','\.gob\.bo','\.gov\.bo', 178 '\.edu\.bo','\.adm\.br','\.adv\.br','\.agr\.br','\.am\.br','\.arq\.br','\.art\.br', 179 '\.ato\.br','\.b\.br','\.bio\.br','\.blog\.br','\.bmd\.br','\.cim\.br','\.cng\.br', 180 '\.cnt\.br','\.com\.br','\.coop\.br','\.ecn\.br','\.edu\.br','\.eng\.br','\.esp\.br', 181 '\.etc\.br','\.eti\.br','\.far\.br','\.flog\.br','\.fm\.br','\.fnd\.br','\.fot\.br', 182 '\.fst\.br','\.g12\.br','\.ggf\.br','\.gov\.br','\.imb\.br','\.ind\.br','\.inf\.br', 183 '\.jor\.br','\.jus\.br','\.lel\.br','\.mat\.br','\.med\.br','\.mil\.br','\.mus\.br', 184 '\.net\.br','\.nom\.br','\.not\.br','\.ntr\.br','\.odo\.br','\.org\.br','\.ppg\.br', 185 '\.pro\.br','\.psc\.br','\.psi\.br','\.qsl\.br','\.rec\.br','\.slg\.br','\.srv\.br', 186 '\.tmp\.br','\.trd\.br','\.tur\.br','\.tv\.br','\.vet\.br','\.vlog\.br','\.wiki\.br', 187 '\.zlg\.br','\.com\.bs','\.net\.bs','\.org\.bs','\.edu\.bs','\.gov\.bs','com\.bz', 188 'edu\.bz','gov\.bz','net\.bz','org\.bz','\.ab\.ca','\.bc\.ca','\.mb\.ca','\.nb\.ca', 189 '\.nf\.ca','\.nl\.ca','\.ns\.ca','\.nt\.ca','\.nu\.ca','\.on\.ca','\.pe\.ca','\.qc\.ca', 190 '\.sk\.ca','\.yk\.ca','\.co\.ck','\.org\.ck','\.edu\.ck','\.gov\.ck','\.net\.ck', 191 '\.gen\.ck','\.biz\.ck','\.info\.ck','\.ac\.cn','\.com\.cn','\.edu\.cn','\.gov\.cn', 192 '\.mil\.cn','\.net\.cn','\.org\.cn','\.ah\.cn','\.bj\.cn','\.cq\.cn','\.fj\.cn','\.gd\.cn', 193 '\.gs\.cn','\.gz\.cn','\.gx\.cn','\.ha\.cn','\.hb\.cn','\.he\.cn','\.hi\.cn','\.hl\.cn', 194 '\.hn\.cn','\.jl\.cn','\.js\.cn','\.jx\.cn','\.ln\.cn','\.nm\.cn','\.nx\.cn','\.qh\.cn', 195 '\.sc\.cn','\.sd\.cn','\.sh\.cn','\.sn\.cn','\.sx\.cn','\.tj\.cn','\.tw\.cn','\.xj\.cn', 196 '\.xz\.cn','\.yn\.cn','\.zj\.cn','\.com\.co','\.org\.co','\.edu\.co','\.gov\.co', 197 '\.net\.co','\.mil\.co','\.nom\.co','\.ac\.cr','\.co\.cr','\.ed\.cr','\.fi\.cr','\.go\.cr', 198 '\.com\.cu','\.edu\.cu','\.gov\.cu','\.net\.cu','\.org\.cu', 199 '\.or\.cr','\.sa\.cr','\.cr','\.ac\.cy','\.net\.cy','\.gov\.cy','\.org\.cy', 200 '\.pro\.cy','\.name\.cy','\.ekloges\.cy','\.tm\.cy','\.ltd\.cy','\.biz\.cy','\.press\.cy', 201 '\.parliament\.cy','\.com\.cy','\.edu\.do','\.gob\.do','\.gov\.do','\.com\.do','\.sld\.do', 202 '\.org\.do','\.net\.do','\.web\.do','\.mil\.do','\.art\.do','\.com\.dz','\.org\.dz', 203 '\.net\.dz','\.gov\.dz','\.edu\.dz','\.asso\.dz','\.pol\.dz','\.art\.dz','\.com\.ec', 204 '\.info\.ec','\.net\.ec','\.fin\.ec','\.med\.ec','\.pro\.ec','\.org\.ec','\.edu\.ec', 205 '\.gov\.ec','\.mil\.ec','\.com\.eg','\.edu\.eg','\.eun\.eg','\.gov\.eg','\.mil\.eg', 206 '\.name\.eg','\.net\.eg','\.org\.eg','\.sci\.eg','\.com\.er','\.edu\.er','\.gov\.er', 207 '\.mil\.er','\.net\.er','\.org\.er','\.ind\.er','\.rochest\.er','\.w\.er','\.com\.es', 208 '\.nom\.es','\.org\.es','\.gob\.es','\.edu\.es','\.com\.et','\.gov\.et','\.org\.et', 209 '\.edu\.et','\.net\.et','\.biz\.et','\.name\.et','\.info\.et','\.ac\.fj','\.biz\.fj', 210 '\.com\.fj','\.info\.fj','\.mil\.fj','\.name\.fj','\.net\.fj','\.org\.fj','\.pro\.fj', 211 '\.co\.fk','\.org\.fk','\.gov\.fk','\.ac\.fk','\.nom\.fk','\.net\.fk','\.fr','\.tm\.fr', 212 '\.asso\.fr','\.nom\.fr','\.prd\.fr','\.presse\.fr','\.com\.fr','\.gouv\.fr','\.co\.gg', 213 '\.net\.gg','\.org\.gg','\.com\.gh','\.edu\.gh','\.gov\.gh','\.org\.gh','\.mil\.gh', 214 '\.com\.gn','\.ac\.gn','\.gov\.gn','\.org\.gn','\.net\.gn','\.com\.gr','\.edu\.gr','\.net\.gr', 215 '\.org\.gr','\.gov\.gr','\.mil\.gr','\.com\.gt','\.edu\.gt','\.net\.gt','\.gob\.gt', 216 '\.org\.gt','\.mil\.gt','\.ind\.gt','\.com\.gu','\.net\.gu','\.gov\.gu','\.org\.gu','\.edu\.gu', 217 '\.com\.hk','\.edu\.hk','\.gov\.hk','\.idv\.hk','\.net\.hk','\.org\.hk','\.ac\.id','\.co\.id', 218 '\.net\.id','\.or\.id','\.web\.id','\.sch\.id','\.mil\.id','\.go\.id','\.war\.net\.id','\.ac\.il', 219 '\.co\.il','\.org\.il','\.net\.il','\.k12\.il','\.gov\.il','\.muni\.il','\.idf\.il','\.in', 220 '\.co\.in','\.firm\.in','\.net\.in','\.org\.in','\.gen\.in','\.ind\.in','\.ac\.in','\.edu\.in', 221 '\.res\.in','\.ernet\.in','\.gov\.in','\.mil\.in','\.nic\.in','\.nic\.in','\.iq','\.gov\.iq', 222 '\.edu\.iq','\.com\.iq','\.mil\.iq','\.org\.iq','\.net\.iq','\.ir','\.ac\.ir','\.co\.ir', 223 '\.gov\.ir','\.id\.ir','\.net\.ir','\.org\.ir','\.sch\.ir','\.dnssec\.ir','\.gov\.it', 224 '\.edu\.it','\.co\.je','\.net\.je','\.org\.je','\.com\.jo','\.net\.jo','\.gov\.jo','\.edu\.jo', 225 '\.org\.jo','\.mil\.jo','\.name\.jo','\.sch\.jo','\.ac\.jp','\.ad\.jp','\.co\.jp','\.ed\.jp', 226 '\.go\.jp','\.gr\.jp','\.lg\.jp','\.ne\.jp','\.or\.jp','\.co\.ke','\.or\.ke','\.ne\.ke','\.go\.ke', 227 '\.ac\.ke','\.sc\.ke','\.me\.ke','\.mobi\.ke','\.info\.ke','\.per\.kh','\.com\.kh','\.edu\.kh', 228 '\.gov\.kh','\.mil\.kh','\.net\.kh','\.org\.kh','\.com\.ki','\.biz\.ki','\.de\.ki','\.net\.ki', 229 '\.info\.ki','\.org\.ki','\.gov\.ki','\.edu\.ki','\.mob\.ki','\.tel\.ki','\.km','\.com\.km', 230 '\.coop\.km','\.asso\.km','\.nom\.km','\.presse\.km','\.tm\.km','\.medecin\.km','\.notaires\.km', 231 '\.pharmaciens\.km','\.veterinaire\.km','\.edu\.km','\.gouv\.km','\.mil\.km','\.net\.kn', 232 '\.org\.kn','\.edu\.kn','\.gov\.kn','\.kr','\.co\.kr','\.ne\.kr','\.or\.kr','\.re\.kr','\.pe\.kr', 233 '\.go\.kr','\.mil\.kr','\.ac\.kr','\.hs\.kr','\.ms\.kr','\.es\.kr','\.sc\.kr','\.kg\.kr', 234 '\.seoul\.kr','\.busan\.kr','\.daegu\.kr','\.incheon\.kr','\.gwangju\.kr','\.daejeon\.kr', 235 '\.ulsan\.kr','\.gyeonggi\.kr','\.gangwon\.kr','\.chungbuk\.kr','\.chungnam\.kr','\.jeonbuk\.kr', 236 '\.jeonnam\.kr','\.gyeongbuk\.kr','\.gyeongnam\.kr','\.jeju\.kr','\.edu\.kw','\.com\.kw', 237 '\.net\.kw','\.org\.kw','\.gov\.kw','\.com\.ky','\.org\.ky','\.net\.ky','\.edu\.ky','\.gov\.ky', 238 '\.com\.kz','\.edu\.kz','\.gov\.kz','\.mil\.kz','\.net\.kz','\.org\.kz','\.com\.lb','\.edu\.lb', 239 '\.gov\.lb','\.net\.lb','\.org\.lb','\.gov\.lk','\.sch\.lk','\.net\.lk','\.int\.lk','\.com\.lk', 240 '\.org\.lk','\.edu\.lk','\.ngo\.lk','\.soc\.lk','\.web\.lk','\.ltd\.lk','\.assn\.lk','\.grp\.lk', 241 '\.hotel\.lk','\.com\.lr','\.edu\.lr','\.gov\.lr','\.org\.lr','\.net\.lr','\.com\.lv','\.edu\.lv', 242 '\.gov\.lv','\.org\.lv','\.mil\.lv','\.id\.lv','\.net\.lv','\.asn\.lv','\.conf\.lv','\.com\.ly', 243 '\.net\.ly','\.gov\.ly','\.plc\.ly','\.edu\.ly','\.sch\.ly','\.med\.ly','\.org\.ly','\.id\.ly', 244 '\.ma','\.net\.ma','\.ac\.ma','\.org\.ma','\.gov\.ma','\.press\.ma','\.co\.ma','\.tm\.mc', 245 '\.asso\.mc','\.co\.me','\.net\.me','\.org\.me','\.edu\.me','\.ac\.me','\.gov\.me','\.its\.me', 246 '\.priv\.me','\.org\.mg','\.nom\.mg','\.gov\.mg','\.prd\.mg','\.tm\.mg','\.edu\.mg','\.mil\.mg', 247 '\.com\.mg','\.com\.mk','\.org\.mk','\.net\.mk','\.edu\.mk','\.gov\.mk','\.inf\.mk','\.name\.mk', 248 '\.pro\.mk','\.com\.ml','\.net\.ml','\.org\.ml','\.edu\.ml','\.gov\.ml','\.presse\.ml','\.gov\.mn', 249 '\.edu\.mn','\.org\.mn','\.com\.mo','\.edu\.mo','\.gov\.mo','\.net\.mo','\.org\.mo','\.com\.mt', 250 '\.org\.mt','\.net\.mt','\.edu\.mt','\.gov\.mt','\.aero\.mv','\.biz\.mv','\.com\.mv','\.coop\.mv', 251 '\.edu\.mv','\.gov\.mv','\.info\.mv','\.int\.mv','\.mil\.mv','\.museum\.mv','\.name\.mv','\.net\.mv', 252 '\.org\.mv','\.pro\.mv','\.ac\.mw','\.co\.mw','\.com\.mw','\.coop\.mw','\.edu\.mw','\.gov\.mw', 253 '\.int\.mw','\.museum\.mw','\.net\.mw','\.org\.mw','\.com\.mx','\.net\.mx','\.org\.mx','\.edu\.mx', 254 '\.gob\.mx','\.com\.my','\.net\.my','\.org\.my','\.gov\.my','\.edu\.my','\.sch\.my','\.mil\.my', 255 '\.name\.my','\.com\.nf','\.net\.nf','\.arts\.nf','\.store\.nf','\.web\.nf','\.firm\.nf', 256 '\.info\.nf','\.other\.nf','\.per\.nf','\.rec\.nf','\.com\.ng','\.org\.ng','\.gov\.ng','\.edu\.ng', 257 '\.net\.ng','\.sch\.ng','\.name\.ng','\.mobi\.ng','\.biz\.ng','\.mil\.ng','\.gob\.ni','\.co\.ni', 258 '\.com\.ni','\.ac\.ni','\.edu\.ni','\.org\.ni','\.nom\.ni','\.net\.ni','\.mil\.ni','\.com\.np', 259 '\.edu\.np','\.gov\.np','\.org\.np','\.mil\.np','\.net\.np','\.edu\.nr','\.gov\.nr','\.biz\.nr', 260 '\.info\.nr','\.net\.nr','\.org\.nr','\.com\.nr','\.com\.om','\.co\.om','\.edu\.om','\.ac\.om', 261 '\.sch\.om','\.gov\.om','\.net\.om','\.org\.om','\.mil\.om','\.museum\.om','\.biz\.om','\.pro\.om', 262 '\.med\.om','\.edu\.pe','\.gob\.pe','\.nom\.pe','\.mil\.pe','\.sld\.pe','\.org\.pe','\.com\.pe', 263 '\.net\.pe','\.com\.ph','\.net\.ph','\.org\.ph','\.mil\.ph','\.ngo\.ph','\.i\.ph','\.gov\.ph', 264 '\.edu\.ph','\.com\.pk','\.net\.pk','\.edu\.pk','\.org\.pk','\.fam\.pk','\.biz\.pk','\.web\.pk', 265 '\.gov\.pk','\.gob\.pk','\.gok\.pk','\.gon\.pk','\.gop\.pk','\.gos\.pk','\.pwr\.pl','\.com\.pl', 266 '\.biz\.pl','\.net\.pl','\.art\.pl','\.edu\.pl','\.org\.pl','\.ngo\.pl','\.gov\.pl','\.info\.pl', 267 '\.mil\.pl','\.waw\.pl','\.warszawa\.pl','\.wroc\.pl','\.wroclaw\.pl','\.krakow\.pl','\.katowice\.pl', 268 '\.poznan\.pl','\.lodz\.pl','\.gda\.pl','\.gdansk\.pl','\.slupsk\.pl','\.radom\.pl','\.szczecin\.pl', 269 '\.lublin\.pl','\.bialystok\.pl','\.olsztyn\.pl','\.torun\.pl','\.gorzow\.pl','\.zgora\.pl', 270 '\.biz\.pr','\.com\.pr','\.edu\.pr','\.gov\.pr','\.info\.pr','\.isla\.pr','\.name\.pr','\.net\.pr', 271 '\.org\.pr','\.pro\.pr','\.est\.pr','\.prof\.pr','\.ac\.pr','\.com\.ps','\.net\.ps','\.org\.ps', 272 '\.edu\.ps','\.gov\.ps','\.plo\.ps','\.sec\.ps','\.co\.pw','\.ne\.pw','\.or\.pw','\.ed\.pw','\.go\.pw', 273 '\.belau\.pw','\.arts\.ro','\.com\.ro','\.firm\.ro','\.info\.ro','\.nom\.ro','\.nt\.ro','\.org\.ro', 274 '\.rec\.ro','\.store\.ro','\.tm\.ro','\.www\.ro','\.co\.rs','\.org\.rs','\.edu\.rs','\.ac\.rs', 275 '\.gov\.rs','\.in\.rs','\.com\.sb','\.net\.sb','\.edu\.sb','\.org\.sb','\.gov\.sb','\.com\.sc', 276 '\.net\.sc','\.edu\.sc','\.gov\.sc','\.org\.sc','\.co\.sh','\.com\.sh','\.org\.sh','\.gov\.sh', 277 '\.edu\.sh','\.net\.sh','\.nom\.sh','\.com\.sl','\.net\.sl','\.org\.sl','\.edu\.sl','\.gov\.sl', 278 '\.gov\.st','\.saotome\.st','\.principe\.st','\.consulado\.st','\.embaixada\.st','\.org\.st', 279 '\.edu\.st','\.net\.st','\.com\.st','\.store\.st','\.mil\.st','\.co\.st','\.edu\.sv','\.gob\.sv', 280 '\.com\.sv','\.org\.sv','\.red\.sv','\.co\.sz','\.ac\.sz','\.org\.sz','\.com\.tr','\.gen\.tr', 281 '\.org\.tr','\.biz\.tr','\.info\.tr','\.av\.tr','\.dr\.tr','\.pol\.tr','\.bel\.tr','\.tsk\.tr', 282 '\.bbs\.tr','\.k12\.tr','\.edu\.tr','\.name\.tr','\.net\.tr','\.gov\.tr','\.web\.tr','\.tel\.tr', 283 '\.tv\.tr','\.co\.tt','\.com\.tt','\.org\.tt','\.net\.tt','\.biz\.tt','\.info\.tt','\.pro\.tt', 284 '\.int\.tt','\.coop\.tt','\.jobs\.tt','\.mobi\.tt','\.travel\.tt','\.museum\.tt','\.aero\.tt', 285 '\.cat\.tt','\.tel\.tt','\.name\.tt','\.mil\.tt','\.edu\.tt','\.gov\.tt','\.edu\.tw','\.gov\.tw', 286 '\.mil\.tw','\.com\.tw','\.net\.tw','\.org\.tw','\.idv\.tw','\.game\.tw','\.ebiz\.tw','\.club\.tw', 287 '\.com\.mu','\.gov\.mu','\.net\.mu','\.org\.mu','\.ac\.mu','\.co\.mu','\.or\.mu','\.ac\.mz', 288 '\.co\.mz','\.edu\.mz','\.org\.mz','\.gov\.mz','\.com\.na','\.co\.na','\.ac\.nz','\.co\.nz', 289 '\.cri\.nz','\.geek\.nz','\.gen\.nz','\.govt\.nz','\.health\.nz','\.iwi\.nz','\.maori\.nz', 290 '\.mil\.nz','\.net\.nz','\.org\.nz','\.parliament\.nz','\.school\.nz','\.abo\.pa','\.ac\.pa', 291 '\.com\.pa','\.edu\.pa','\.gob\.pa','\.ing\.pa','\.med\.pa','\.net\.pa','\.nom\.pa','\.org\.pa', 292 '\.sld\.pa','\.com\.pt','\.edu\.pt','\.gov\.pt','\.int\.pt','\.net\.pt','\.nome\.pt','\.org\.pt', 293 '\.publ\.pt','\.com\.py','\.edu\.py','\.gov\.py','\.mil\.py','\.net\.py','\.org\.py','\.com\.qa', 294 '\.edu\.qa','\.gov\.qa','\.mil\.qa','\.net\.qa','\.org\.qa','\.asso\.re','\.com\.re','\.nom\.re', 295 '\.ac\.ru','\.adygeya\.ru','\.altai\.ru','\.amur\.ru','\.arkhangelsk\.ru','\.astrakhan\.ru', 296 '\.bashkiria\.ru','\.belgorod\.ru','\.bir\.ru','\.bryansk\.ru','\.buryatia\.ru','\.cbg\.ru', 297 '\.chel\.ru','\.chelyabinsk\.ru','\.chita\.ru','\.chita\.ru','\.chukotka\.ru','\.chuvashia\.ru', 298 '\.com\.ru','\.dagestan\.ru','\.e-burg\.ru','\.edu\.ru','\.gov\.ru','\.grozny\.ru','\.int\.ru', 299 '\.irkutsk\.ru','\.ivanovo\.ru','\.izhevsk\.ru','\.jar\.ru','\.joshkar-ola\.ru','\.kalmykia\.ru', 300 '\.kaluga\.ru','\.kamchatka\.ru','\.karelia\.ru','\.kazan\.ru','\.kchr\.ru','\.kemerovo\.ru', 301 '\.khabarovsk\.ru','\.khakassia\.ru','\.khv\.ru','\.kirov\.ru','\.koenig\.ru','\.komi\.ru', 302 '\.kostroma\.ru','\.kranoyarsk\.ru','\.kuban\.ru','\.kurgan\.ru','\.kursk\.ru','\.lipetsk\.ru', 303 '\.magadan\.ru','\.mari\.ru','\.mari-el\.ru','\.marine\.ru','\.mil\.ru','\.mordovia\.ru', 304 '\.mosreg\.ru','\.msk\.ru','\.murmansk\.ru','\.nalchik\.ru','\.net\.ru','\.nnov\.ru','\.nov\.ru', 305 '\.novosibirsk\.ru','\.nsk\.ru','\.omsk\.ru','\.orenburg\.ru','\.org\.ru','\.oryol\.ru','\.penza\.ru', 306 '\.perm\.ru','\.pp\.ru','\.pskov\.ru','\.ptz\.ru','\.rnd\.ru','\.ryazan\.ru','\.sakhalin\.ru','\.samara\.ru', 307 '\.saratov\.ru','\.simbirsk\.ru','\.smolensk\.ru','\.spb\.ru','\.stavropol\.ru','\.stv\.ru', 308 '\.surgut\.ru','\.tambov\.ru','\.tatarstan\.ru','\.tom\.ru','\.tomsk\.ru','\.tsaritsyn\.ru', 309 '\.tsk\.ru','\.tula\.ru','\.tuva\.ru','\.tver\.ru','\.tyumen\.ru','\.udm\.ru','\.udmurtia\.ru','\.ulan-ude\.ru', 310 '\.vladikavkaz\.ru','\.vladimir\.ru','\.vladivostok\.ru','\.volgograd\.ru','\.vologda\.ru', 311 '\.voronezh\.ru','\.vrn\.ru','\.vyatka\.ru','\.yakutia\.ru','\.yamal\.ru','\.yekaterinburg\.ru', 312 '\.yuzhno-sakhalinsk\.ru','\.ac\.rw','\.co\.rw','\.com\.rw','\.edu\.rw','\.gouv\.rw','\.gov\.rw', 313 '\.int\.rw','\.mil\.rw','\.net\.rw','\.com\.sa','\.edu\.sa','\.gov\.sa','\.med\.sa','\.net\.sa', 314 '\.org\.sa','\.pub\.sa','\.sch\.sa','\.com\.sd','\.edu\.sd','\.gov\.sd','\.info\.sd','\.med\.sd', 315 '\.net\.sd','\.org\.sd','\.tv\.sd','\.a\.se','\.ac\.se','\.b\.se','\.bd\.se','\.c\.se','\.d\.se', 316 '\.e\.se','\.f\.se','\.g\.se','\.h\.se','\.i\.se','\.k\.se','\.l\.se','\.m\.se','\.n\.se','\.o\.se', 317 '\.org\.se','\.p\.se','\.parti\.se','\.pp\.se','\.press\.se','\.r\.se','\.s\.se','\.t\.se','\.tm\.se', 318 '\.u\.se','\.w\.se','\.x\.se','\.y\.se','\.z\.se','\.com\.sg','\.edu\.sg','\.gov\.sg','\.idn\.sg', 319 '\.net\.sg','\.org\.sg','\.per\.sg','\.art\.sn','\.com\.sn','\.edu\.sn','\.gouv\.sn','\.org\.sn', 320 '\.perso\.sn','\.univ\.sn','\.com\.sy','\.edu\.sy','\.gov\.sy','\.mil\.sy','\.net\.sy','\.news\.sy', 321 '\.org\.sy','\.ac\.th','\.co\.th','\.go\.th','\.in\.th','\.mi\.th','\.net\.th','\.or\.th','\.ac\.tj', 322 '\.biz\.tj','\.co\.tj','\.com\.tj','\.edu\.tj','\.go\.tj','\.gov\.tj','\.info\.tj','\.int\.tj', 323 '\.mil\.tj','\.name\.tj','\.net\.tj','\.nic\.tj','\.org\.tj','\.test\.tj','\.web\.tj','\.agrinet\.tn', 324 '\.com\.tn','\.defense\.tn','\.edunet\.tn','\.ens\.tn','\.fin\.tn','\.gov\.tn','\.ind\.tn','\.info\.tn', 325 '\.intl\.tn','\.mincom\.tn','\.nat\.tn','\.net\.tn','\.org\.tn','\.perso\.tn','\.rnrt\.tn','\.rns\.tn', 326 '\.rnu\.tn','\.tourism\.tn','\.ac\.tz','\.co\.tz','\.go\.tz','\.ne\.tz','\.or\.tz','\.biz\.ua', 327 '\.cherkassy\.ua','\.chernigov\.ua','\.chernovtsy\.ua','\.ck\.ua','\.cn\.ua','\.co\.ua','\.com\.ua', 328 '\.crimea\.ua','\.cv\.ua','\.dn\.ua','\.dnepropetrovsk\.ua','\.donetsk\.ua','\.dp\.ua','\.edu\.ua', 329 '\.gov\.ua','\.if\.ua','\.in\.ua','\.ivano-frankivsk\.ua','\.kh\.ua','\.kharkov\.ua','\.kherson\.ua', 330 '\.khmelnitskiy\.ua','\.kiev\.ua','\.kirovograd\.ua','\.km\.ua','\.kr\.ua','\.ks\.ua','\.kv\.ua', 331 '\.lg\.ua','\.lugansk\.ua','\.lutsk\.ua','\.lviv\.ua','\.me\.ua','\.mk\.ua','\.net\.ua','\.nikolaev\.ua', 332 '\.od\.ua','\.odessa\.ua','\.org\.ua','\.pl\.ua','\.poltava\.ua','\.pp\.ua','\.rovno\.ua','\.rv\.ua', 333 '\.sebastopol\.ua','\.sumy\.ua','\.te\.ua','\.ternopil\.ua','\.uzhgorod\.ua','\.vinnica\.ua','\.vn\.ua', 334 '\.zaporizhzhe\.ua','\.zhitomir\.ua','\.zp\.ua','\.zt\.ua','\.ac\.ug','\.co\.ug','\.go\.ug','\.ne\.ug', 335 '\.or\.ug','\.org\.ug','\.sc\.ug','\.ac\.uk','\.bl\.uk','\.british-library\.uk','\.co\.uk','\.cym\.uk', 336 '\.gov\.uk','\.govt\.uk','\.icnet\.uk','\.jet\.uk','\.lea\.uk','\.ltd\.uk','\.me\.uk','\.mil\.uk', 337 '\.mod\.uk','\.mod\.uk','\.national-library-scotland\.uk','\.nel\.uk','\.net\.uk','\.nhs\.uk', 338 '\.nhs\.uk','\.nic\.uk','\.nls\.uk','\.org\.uk','\.orgn\.uk','\.parliament\.uk','\.parliament\.uk', 339 '\.plc\.uk','\.police\.uk','\.sch\.uk','\.scot\.uk','\.soc\.uk','\.dni\.us','\.fed\.us','\.isa\.us', 340 '\.kids\.us','\.nsn\.us','\.com\.uy','\.edu\.uy','\.gub\.uy','\.mil\.uy','\.net\.uy','\.org\.uy', 341 '\.co\.ve','\.com\.ve','\.edu\.ve','\.gob\.ve','\.info\.ve','\.mil\.ve','\.net\.ve','\.org\.ve', 342 '\.web\.ve','\.co\.vi','\.com\.vi','\.k12\.vi','\.net\.vi','\.org\.vi','\.ac\.vn','\.biz\.vn', 343 '\.com\.vn','\.edu\.vn','\.gov\.vn','\.health\.vn','\.info\.vn','\.int\.vn','\.name\.vn','\.net\.vn', 344 '\.org\.vn','\.pro\.vn','\.co\.ye','\.com\.ye','\.gov\.ye','\.ltd\.ye','\.me\.ye','\.net\.ye', 345 '\.org\.ye','\.plc\.ye','\.ac\.yu','\.co\.yu','\.edu\.yu','\.gov\.yu','\.org\.yu','\.ac\.za', 346 '\.agric\.za','\.alt\.za','\.bourse\.za','\.city\.za','\.co\.za','\.cybernet\.za','\.db\.za', 347 '\.ecape\.school\.za','\.edu\.za','\.fs\.school\.za','\.gov\.za','\.gp\.school\.za','\.grondar\.za', 348 '\.iaccess\.za','\.imt\.za','\.inca\.za','\.kzn\.school\.za','\.landesign\.za','\.law\.za', 349 '\.lp\.school\.za','\.mil\.za','\.mpm\.school\.za','\.ncape\.school\.za','\.net\.za','\.ngo\.za', 350 '\.nis\.za','\.nom\.za','\.nw\.school\.za','\.olivetti\.za','\.org\.za','\.pix\.za','\.school\.za', 351 '\.tm\.za','\.wcape\.school\.za','\.web\.za','\.ac\.zm','\.co\.zm','\.com\.zm','\.edu\.zm','\.gov\.zm', 352 '\.net\.zm','\.org\.zm','\.sch\.zm' 353 ); 354 355 my @TLD2 = ( 356 '\.ac','\.ad','\.ae','\.af','\.ag','\.ai','\.al','\.am','\.ao','\.aq', 357 '\.ar','\.as','\.at','\.au','\.aw','\.ax','\.az','\.ba','\.bb','\.bd', 358 '\.be','\.bf','\.bg','\.bh','\.bi','\.bj','\.bm','\.bn','\.bo','\.br', 359 '\.bs','\.bt','\.bw','\.by','\.bz','\.ca','\.cc','\.cd','\.cf','\.cg', 360 '\.ch','\.ci','\.ck','\.cl','\.cm','\.cn','\.co','\.cr','\.cu','\.cv', 361 '\.cw','\.cx','\.cy','\.cz','\.de','\.dj','\.dk','\.dm','\.do','\.dz', 362 '\.ec','\.ee','\.eg','\.er','\.es','\.et','\.eu','\.fi','\.fj','\.fk', 363 '\.fm','\.fo','\.fr','\.ga','\.gd','\.ge','\.gf','\.gg','\.gh','\.gi', 364 '\.gl','\.gm','\.gn','\.gp','\.gq','\.gr','\.gs','\.gt','\.gu','\.gw', 365 '\.gy','\.hk','\.hm','\.hn','\.hr','\.ht','\.hu','\.id','\.ie','\.il', 366 '\.im','\.in','\.io','\.iq','\.ir','\.is','\.it','\.je','\.jm','\.jo', 367 '\.jp','\.ke','\.kg','\.kh','\.ki','\.km','\.kn','\.kp','\.kr','\.kw', 368 '\.ky','\.kz','\.la','\.lb','\.lc','\.li','\.lk','\.lr','\.ls','\.lt', 369 '\.lu','\.lv','\.ly','\.ma','\.mc','\.md','\.me','\.mg','\.mh','\.mk', 370 '\.ml','\.mm','\.mn','\.mo','\.mp','\.mq','\.mr','\.ms','\.mt','\.mu', 371 '\.mv','\.mw','\.mx','\.my','\.mz','\.na','\.nc','\.ne','\.nf','\.ng', 372 '\.ni','\.nl','\.no','\.np','\.nr','\.nu','\.nz','\.om','\.pa','\.pe', 373 '\.pf','\.pg','\.ph','\.pk','\.pl','\.pm','\.pn','\.pr','\.ps','\.pt', 374 '\.pw','\.py','\.qa','\.re','\.ro','\.rs','\.ru','\.rw','\.sa','\.sb', 375 '\.sc','\.sd','\.se','\.sg','\.sh','\.si','\.sk','\.sl','\.sm','\.sn', 376 '\.so','\.sr','\.ss','\.st','\.su','\.sv','\.sx','\.sy','\.sz','\.tc', 377 '\.td','\.tf','\.tg','\.th','\.tj','\.tk','\.tl','\.tm','\.tn','\.to', 378 '\.tr','\.tt','\.tv','\.tw','\.tz','\.ua','\.ug','\.uk','\.us','\.uy', 379 '\.uz','\.va','\.vc','\.ve','\.vg','\.vi','\.vn','\.vu','\.wf','\.ws', 380 '\.ye','\.za','\.zm','\.zw','\.com','\.info','\.net','\.org','\.biz', 381 '\.name','\.pro','\.xxx','\.aero','\.asia','\.bzh','\.cat','\.coop', 382 '\.edu','\.gov','\.int','\.jobs','\.mil','\.mobi','\.museum','\.paris', 383 '\.sport','\.tel','\.travel','\.kids','\.mail','\.post','\.arpa','\.example', 384 '\.invalid','\.localhost','\.test','\.bitnet','\.csnet','\.lan','\.local', 385 '\.onion','\.root','\.uucp','\.tld','\.nato' 386 ); 387 388 my %month_number = ( 389 'Jan' => '01', 390 'Feb' => '02', 391 'Mar' => '03', 392 'Apr' => '04', 393 'May' => '05', 394 'Jun' => '06', 395 'Jul' => '07', 396 'Aug' => '08', 397 'Sep' => '09', 398 'Oct' => '10', 399 'Nov' => '11', 400 'Dec' => '12', 401 ); 402 403 # Regex to match ipv4 and ipv6 address 404 my $ip_regexp = qr/^([a-fA-F0-9\.\:]+)$/; 405 my $cidr_regex = qr/^[a-fA-F0-9\.\:]+\/\d+$/; 406 407 # Native log format squid %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt 408 my $native_format_regex1 = qr/^(\d+\.\d{3})\s+(\d+)\s+([^\s]+)\s+([^\s]+)\s+(\d+)\s+([^\s]+)\s+(.*)/; 409 my $native_format_regex2 = qr/^([^\s]+?)\s+([^\s]+)\s+([^\s]+\/[^\s]+)\s+([^\s]+)\s*/; 410 #logformat common %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh 411 #logformat combined %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh 412 my $common_format_regex1 = qr/([^\s]+)\s([^\s]+)\s([^\s]+)\s\[(\d+\/...\/\d+:\d+:\d+:\d+\s[\d\+\-]+)\]\s"([^\s]+)\s([^\s]+)\s([^\s]+)"\s(\d+)\s+(\d+)(.*)\s([^\s:]+:[^\s]+)\s*([^\/]+\/[^\s]+|-)?$/; 413 # Log format for SquidGuard logs 414 my $sg_format_regex1 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* Request\(([^\/]+\/[^\/]+)\/[^\)]*\) ([^\s]+) ([^\s\\]+)\/[^\s]+ ([^\s]+) ([^\s]+) ([^\s]+)/; 415 my $sg_format_regex2 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* (New setting|Added User|init domainlist|Going into emergency mode|ending emergency mode)/; 416 # Log format for ufdbGuard logs: BLOCK user clienthost aclname category url method 417 my $ug_format_regex1 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* (BLOCK) ([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)$/; 418 419 sub new 420 { 421 my ($class, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone) = @_; 422 423 # Construct the class 424 my $self = {}; 425 bless $self, $class; 426 427 # Initialize all variables 428 $self->_init($conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone); 429 430 # Return the instance 431 return($self); 432 433 } 434 435 sub localdie 436 { 437 my ($self, $msg) = @_; 438 439 print STDERR "$msg"; 440 unlink("$self->{pidfile}"); 441 442 # Cleanup old temporary files 443 foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'ug_last_parsed.tmp') { 444 unlink("$self->{pid_dir}/$tmp_file"); 445 } 446 447 exit 1; 448 } 449 450 #### 451 # method used to fork as many child as wanted 452 ## 453 sub spawn 454 { 455 my $self = shift; 456 my $coderef = shift; 457 458 unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') { 459 print "usage: spawn CODEREF"; 460 exit 0; 461 } 462 463 my $pid; 464 if (!defined($pid = fork)) { 465 print STDERR "Error: cannot fork: $!\n"; 466 return; 467 } elsif ($pid) { 468 $self->{running_pids}{$pid} = 1; 469 return; # the parent 470 } 471 # the child -- go spawn 472 $< = $>; 473 $( = $); # suid progs only 474 475 exit &$coderef(); 476 } 477 478 sub wait_all_childs 479 { 480 my $self = shift; 481 482 while (scalar keys %{$self->{running_pids}} > 0) { 483 my $kid = waitpid(-1, WNOHANG); 484 if ($kid > 0) { 485 delete $self->{running_pids}{$kid}; 486 } 487 sleep(1); 488 } 489 } 490 491 sub manage_queue_size 492 { 493 my ($self, $child_count) = @_; 494 495 while ($child_count >= $self->{queue_size}) { 496 my $kid = waitpid(-1, WNOHANG); 497 if ($kid > 0) { 498 $child_count--; 499 delete $self->{running_pids}{$kid}; 500 } 501 sleep(1); 502 } 503 504 return $child_count; 505 } 506 507 sub save_current_line 508 { 509 my $self = shift; 510 511 if ($self->{end_time}) { 512 my $current = new IO::File; 513 $current->open(">$self->{Output}/SquidAnalyzer.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/SquidAnalyzer.current, $!\n"); 514 print $current "$self->{end_time}\t$self->{end_offset}"; 515 $current->close; 516 } 517 if ($self->{sg_end_time}) { 518 my $current = new IO::File; 519 $current->open(">$self->{Output}/SquidGuard.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/SquidGuard.current, $!\n"); 520 print $current "$self->{sg_end_time}\t$self->{sg_end_offset}"; 521 $current->close; 522 } 523 if ($self->{ug_end_time}) { 524 my $current = new IO::File; 525 $current->open(">$self->{Output}/ufdbGuard.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/ufdbGuard.current, $!\n"); 526 print $current "$self->{ug_end_time}\t$self->{ug_end_offset}"; 527 $current->close; 528 } 529 } 530 531 # Extract number of seconds since epoch from timestamp in log line 532 sub look_for_timestamp 533 { 534 my ($self, $line) = @_; 535 536 my $time = 0; 537 my $tz = ((0-$self->{TimeZone})*3600); 538 # Squid native format 539 if ( $line =~ $native_format_regex1 ) { 540 $time = $1; 541 $self->{is_squidguard_log} = 0; 542 $self->{is_ufdbguard_log} = 0; 543 # Squid common HTTP format 544 } elsif ( $line =~ $common_format_regex1 ) { 545 $time = $4; 546 $time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/; 547 if (!$self->{TimeZone}) { 548 $time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900); 549 } else { 550 $time = timegm_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $tz; 551 } 552 $self->{is_squidguard_log} = 0; 553 $self->{is_ufdbguard_log} = 0; 554 # SquidGuard log format 555 } elsif (( $line =~ $sg_format_regex1 ) || ( $line =~ $sg_format_regex2 )) { 556 $self->{is_squidguard_log} = 1; 557 $self->{is_ufdbguard_log} = 0; 558 if (!$self->{TimeZone}) { 559 $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900); 560 } else { 561 $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz; 562 } 563 # ufdbGuard log format 564 } elsif ( $line =~ $ug_format_regex1 ) { 565 $self->{is_ufdbguard_log} = 1; 566 $self->{is_squidguard_log} = 0; 567 if (!$self->{TimeZone}) { 568 $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900); 569 } else { 570 $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz; 571 } 572 } 573 574 return $time; 575 } 576 577 # Detect if log file is a squidGuard log or not 578 sub get_log_format 579 { 580 my ($self, $file) = @_; 581 582 my $logfile = new IO::File; 583 $logfile->open($file) || $self->localdie("ERROR: Unable to open log file $file. $!\n"); 584 my $max_line = 10000; 585 my $i = 0; 586 while (my $line = <$logfile>) { 587 chomp($line); 588 589 # SquidGuard log format 590 if (( $line =~ $sg_format_regex1 ) || ( $line =~ $sg_format_regex2 )) { 591 $self->{is_squidguard_log} = 1; 592 $self->{is_ufdbguard_log} = 0; 593 last; 594 # ufdbGuard log format 595 } elsif ( $line =~ $ug_format_regex1 ) { 596 $self->{is_ufdbguard_log} = 1; 597 $self->{is_squidguard_log} = 0; 598 last; 599 # Squid native format 600 } elsif ( $line =~ $native_format_regex1 ) { 601 $self->{is_squidguard_log} = 0; 602 $self->{is_ufdbguard_log} = 0; 603 last; 604 # Squid common HTTP format 605 } elsif ( $line =~ $common_format_regex1 ) { 606 $self->{is_squidguard_log} = 0; 607 $self->{is_ufdbguard_log} = 0; 608 last; 609 } else { 610 last if ($i > $max_line); 611 } 612 $i++; 613 } 614 $logfile->close(); 615 } 616 617 618 sub parseFile 619 { 620 my ($self) = @_; 621 622 my $line_count = 0; 623 my $line_processed_count = 0; 624 my $line_stored_count = 0; 625 my $saved_queue_size = $self->{queue_size}; 626 my $history_offset = $self->{end_offset}; 627 628 foreach my $lfile (@{$self->{LogFile}}) { 629 630 # Detect if log file is from squid or squidguard 631 $self->get_log_format($lfile); 632 if ($self->{is_ufdbguard_log}) { 633 $history_offset = $self->{ug_end_offset}; 634 } elsif ($self->{is_squidguard_log}) { 635 $history_offset = $self->{sg_end_offset}; 636 } else { 637 $history_offset = $self->{end_offset}; 638 } 639 640 print STDERR "Starting to parse logfile $lfile.\n" if (!$self->{QuietMode}); 641 if ((!-f $lfile) || (-z $lfile)) { 642 print STDERR "DEBUG: bad or empty log file $lfile.\n" if (!$self->{QuietMode}); 643 next; 644 } 645 # Restore the right multiprocess queue 646 $self->{queue_size} = $saved_queue_size; 647 648 # Compressed file do not allow multiprocess 649 if ($lfile =~ /\.(gz|bz2)$/) { 650 $self->{queue_size} = 1; 651 } 652 653 # Search the last position in logfile 654 if ($history_offset) { 655 656 # Initialize start offset for each file 657 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 658 $self->{end_offset} = $history_offset; 659 } elsif (!$self->{is_squidguard_log}) { 660 $self->{ug_end_offset} = $history_offset; 661 } else { 662 $self->{sg_end_offset} = $history_offset; 663 } 664 665 # Compressed file are always read from the begining 666 if ($lfile =~ /\.(gz|bz2)$/i) { 667 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 668 $self->{end_offset} = 0; 669 } elsif (!$self->{is_squidguard_log}) { 670 $self->{ug_end_offset} = 0; 671 } else { 672 $self->{sg_end_offset} = 0; 673 } 674 } else { 675 # Look at first line to see if the file should be parse from the begining. 676 my $logfile = new IO::File; 677 $logfile->open($lfile) || $self->localdie("ERROR: Unable to open log file $lfile. $!\n"); 678 my $line = <$logfile>; 679 chomp($line); 680 681 # Remove syslog header and mark the format 682 if ($line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /) { 683 print STDERR "DEBUG: log was generated through syslog, the header will be removed.\n" if (!$self->{QuietMode}); 684 $self->{Syslog} = 1; 685 } 686 687 my $curtime = $self->look_for_timestamp($line); 688 689 my $hist_time = $self->{history_time}; 690 if ($self->{is_squidguard_log}) { 691 $hist_time = $self->{sg_history_time}; 692 } elsif ($self->{is_ufdbguard_log}) { 693 $hist_time = $self->{ug_history_time}; 694 } 695 # if the first timestamp is higher that the history time, start from the beginning 696 if ($curtime > $hist_time) { 697 print STDERR "DEBUG: new file: $lfile, start from the beginning.\n" if (!$self->{QuietMode}); 698 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 699 $self->{end_offset} = 0; 700 } elsif (!$self->{is_squidguard_log}) { 701 $self->{ug_end_offset} = 0; 702 } else { 703 $self->{sg_end_offset} = 0; 704 } 705 # If the size of the file is lower than the history offset, parse this file from the beginning 706 } elsif ((lstat($lfile))[7] <= $history_offset) { 707 # move at begining of the file to see if this is a new one 708 $logfile->seek(0, 0); 709 for (my $i = 1; $i <= 10000; $i++) { 710 $line = <$logfile>; 711 chomp($line); 712 # Remove syslog header and mark the format 713 if ($self->{Syslog}) { 714 $line =~ s/^[A-Z][a-z]{2} \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ \[[^\]]+\] (\d{10}\.\d{3})/$1/; 715 } 716 $curtime = $self->look_for_timestamp($line); 717 if ($curtime) { 718 # If timestamp found at startup is lower than the history file, 719 # the file will not be parsed at all. 720 if ($hist_time > $curtime) { 721 print STDERR "DEBUG: this file will not be parsed: $lfile, size is lower than expected.\n" if (!$self->{QuietMode}); 722 print STDERR "DEBUG: exploring $lfile, timestamp found at startup, $curtime, is lower than history time $hist_time.\n" if (!$self->{QuietMode}); 723 $line = 'NOK'; 724 last; 725 } 726 } 727 } 728 $logfile->close; 729 # This file should be ommitted jump to the next file 730 next if ($line eq 'NOK'); 731 732 print STDERR "DEBUG: new file: $lfile, start from the beginning.\n" if (!$self->{QuietMode}); 733 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 734 $self->{end_offset} = 0; 735 } elsif (!$self->{is_squidguard_log}) { 736 $self->{ug_end_offset} = 0; 737 } else { 738 $self->{sg_end_offset} = 0; 739 } 740 } else { 741 # move at offset and see if next line is older than history time 742 $logfile->seek($history_offset, 0); 743 for (my $i = 1; $i <= 10; $i++) { 744 $line = <$logfile>; 745 chomp($line); 746 # Remove syslog header and mark the format 747 if ($self->{Syslog}) { 748 $line =~ s/^[A-Z][a-z]{2} \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ \[[^\]]+\] (\d{10}\.\d{3})/$1/; 749 } 750 $curtime = $self->look_for_timestamp($line); 751 if ($curtime) { 752 if ($curtime < $hist_time) { 753 my $tmp_time = CORE::localtime($curtime); 754 print STDERR "DEBUG: this file will not be parsed: $lfile, line after offset is older than expected: $curtime < $hist_time.\n" if (!$self->{QuietMode}); 755 $line = 'NOK'; 756 last; 757 } 758 } 759 } 760 $logfile->close; 761 # This file should be ommitted jump to the next file 762 next if ($line eq 'NOK'); 763 } 764 $logfile->close; 765 } 766 767 } else { 768 print STDERR "DEBUG: this file will be parsed, no history found.\n" if (!$self->{QuietMode}); 769 # Initialise start offset for each file 770 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 771 $self->{end_offset} = 0; 772 } elsif (!$self->{is_squidguard_log}) { 773 $self->{ug_end_offset} = 0; 774 } else { 775 $self->{sg_end_offset} = 0; 776 } 777 } 778 779 if ($self->{queue_size} <= 1) { 780 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 781 $self->_parse_file_part($lfile, $self->{end_offset}); 782 } elsif (!$self->{is_squidguard_log}) { 783 $self->_parse_file_part($lfile, $self->{ug_end_offset}); 784 } else { 785 $self->_parse_file_part($lfile, $self->{sg_end_offset}); 786 } 787 } else { 788 # Create multiple processes to parse one log file by chunks of data 789 my @chunks = $self->split_logfile($lfile); 790 my $child_count = 0; 791 for (my $i = 0; $i < $#chunks; $i++) { 792 if ($self->{interrupt}) { 793 print STDERR "FATAL: Abort signal received when processing to next chunk\n"; 794 return; 795 } 796 $self->spawn(sub { 797 $self->_parse_file_part($lfile, $chunks[$i], $chunks[$i+1], $i); 798 }); 799 $child_count = $self->manage_queue_size(++$child_count); 800 } 801 } 802 } 803 804 # Wait for last child stop 805 $self->wait_all_childs() if ($self->{queue_size} > 1); 806 807 # Get the last information parsed in this file part 808 foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'sg_last_parsed.tmp') { 809 810 if (-e "$self->{pid_dir}/$tmp_file") { 811 812 if (open(IN, "$self->{pid_dir}/$tmp_file")) { 813 my %history_tmp = (); 814 while (my $l = <IN>) { 815 chomp($l); 816 my @data = split(/\s/, $l); 817 $history_tmp{"$data[0]$data[1]$data[2]"}{$data[4]} = join(' ', @data); 818 $line_stored_count += $data[5]; 819 $line_processed_count += $data[6]; 820 $line_count += $data[7]; 821 if (!$self->{first_year} || ("$data[8]$data[9]" lt "$self->{first_year}$self->{first_month}{$data[8]}}") ) { 822 $self->{first_year} = $data[8]; 823 $self->{first_month}{$data[8]} = $data[9]; 824 } 825 my @tmp = split(/,/, $data[10]); 826 foreach my $w (@tmp) { 827 if (!grep(/^$w$/, @{$self->{week_parsed}})) { 828 push(@{$self->{week_parsed}}, $w); 829 } 830 } 831 } 832 close(IN); 833 foreach my $date (sort {$b <=> $a} keys %history_tmp) { 834 foreach my $offset (sort {$b <=> $a} keys %{$history_tmp{$date}}) { 835 my @data = split(/\s/, $history_tmp{$date}{$offset}); 836 $self->{last_year} = $data[0]; 837 $self->{last_month}{$data[0]} = $data[1]; 838 $self->{last_day}{$data[0]} = $data[2]; 839 if ($tmp_file eq 'last_parsed.tmp') { 840 $self->{end_time} = $data[3]; 841 $self->{end_offset} = $data[4]; 842 } elsif ($tmp_file eq 'ug_last_parsed.tmp') { 843 $self->{ug_end_time} = $data[3]; 844 $self->{ug_end_offset} = $data[4]; 845 } elsif ($tmp_file eq 'sg_last_parsed.tmp') { 846 $self->{sg_end_time} = $data[3]; 847 $self->{sg_end_offset} = $data[4]; 848 } 849 last; 850 } 851 last; 852 } 853 } else { 854 print STDERR "ERROR: can't read last parsed line from $self->{pid_dir}/$tmp_file, $!\n"; 855 } 856 } 857 } 858 859 if (!$self->{last_year}) { 860 861 print STDERR "No new log registered...\n" if (!$self->{QuietMode}); 862 863 } else { 864 865 if (!$self->{QuietMode}) { 866 print STDERR "SQUID LOG END TIME : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{end_time})), "\n" if ($self->{end_time}); 867 print STDERR "SQUIGUARD LOG END TIME : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{sg_end_time})), "\n" if ($self->{sg_end_time}); 868 print STDERR "UFDBGUARD LOG END TIME : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{ug_end_time})), "\n" if ($self->{ug_end_time}); 869 print STDERR "Read $line_count lines, matched $line_processed_count and found $line_stored_count new lines\n"; 870 } 871 872 # Set the current start time into history file 873 $self->save_current_line(); 874 875 # Force reordering and unique sorting of data files 876 my $child_count = 0; 877 if (!$self->{rebuild}) { 878 if (!$self->{QuietMode}) { 879 print STDERR "Reordering daily data files now...\n"; 880 } 881 for my $date ("$self->{first_year}$self->{first_month}{$self->{first_year}}" .. "$self->{last_year}$self->{last_month}{$self->{last_year}}") { 882 $date =~ /^(\d{4})(\d{2})$/; 883 my $y = $1; 884 my $m = $2; 885 next if (($m < 1) || ($m > 12)); 886 if ($self->{interrupt}) { 887 print STDERR "FATAL: Abort signal received\n"; 888 return; 889 } 890 if (-d "$self->{Output}/$y/$m") { 891 foreach my $d ("01" .. "31") { 892 if (-d "$self->{Output}/$y/$m/$d") { 893 if ($self->{queue_size} > 1) { 894 $self->spawn(sub { 895 $self->_save_stat($y, $m, $d); 896 }); 897 $child_count = $self->manage_queue_size(++$child_count); 898 } else { 899 $self->_save_stat($y, $m, $d); 900 } 901 $self->_clear_stats(); 902 } 903 } 904 } 905 } 906 # Wait for last child stop 907 $self->wait_all_childs() if ($self->{queue_size} > 1); 908 $child_count = 0; 909 } 910 911 # Compute week statistics 912 if (!$self->{no_week_stat}) { 913 if (!$self->{QuietMode}) { 914 print STDERR "Generating weekly data files now...\n"; 915 } 916 917 foreach my $week (@{$self->{week_parsed}}) { 918 my ($y, $m, $wn) = split(/\//, $week); 919 my @wd = &get_wdays_per_month($wn, "$y-$m"); 920 $wn++; 921 922 print STDERR "Compute and dump weekly statistics for week $wn on $y\n" if (!$self->{QuietMode}); 923 if ($self->{queue_size} > 1) { 924 $self->spawn(sub { 925 $self->_save_data($y, $m, undef, sprintf("%02d", $wn), @wd); 926 }); 927 $child_count = $self->manage_queue_size(++$child_count); 928 } else { 929 $self->_save_data($y, $m, undef, sprintf("%02d", $wn), @wd); 930 } 931 $self->_clear_stats(); 932 } 933 } 934 # Wait for last child stop 935 $self->wait_all_childs() if ($self->{queue_size} > 1); 936 $child_count = 0; 937 938 # Compute month statistics 939 if (!$self->{QuietMode}) { 940 print STDERR "Generating monthly data files now...\n"; 941 } 942 943 for my $date ("$self->{first_year}$self->{first_month}{$self->{first_year}}" .. "$self->{last_year}$self->{last_month}{$self->{last_year}}") { 944 $date =~ /^(\d{4})(\d{2})$/; 945 my $y = $1; 946 my $m = $2; 947 next if (($m < 1) || ($m > 12)); 948 if ($self->{interrupt}) { 949 print STDERR "FATAL: Abort signal received\n"; 950 return; 951 } 952 if (-d "$self->{Output}/$y/$m") { 953 print STDERR "Compute and dump month statistics for $y/$m\n" if (!$self->{QuietMode}); 954 if ($self->{queue_size} > 1) { 955 $self->spawn(sub { 956 $self->_save_data("$y", "$m"); 957 }); 958 $child_count = $self->manage_queue_size(++$child_count); 959 } else { 960 $self->_save_data("$y", "$m"); 961 } 962 $self->_clear_stats(); 963 } 964 } 965 966 # Wait for last child stop 967 $self->wait_all_childs() if ($self->{queue_size} > 1); 968 969 # Compute year statistics 970 $child_count = 0; 971 if (!$self->{QuietMode}) { 972 print STDERR "Generating yearly data files now...\n"; 973 } 974 for my $year ($self->{first_year} .. $self->{last_year}) { 975 if ($self->{interrupt}) { 976 print STDERR "FATAL: Abort signal received\n"; 977 return; 978 } 979 if (-d "$self->{Output}/$year") { 980 print STDERR "Compute and dump year statistics for $year\n" if (!$self->{QuietMode}); 981 if ($self->{queue_size} > 1) { 982 $self->spawn(sub { 983 $self->_save_data("$year"); 984 }); 985 $child_count = $self->manage_queue_size(++$child_count); 986 } else { 987 $self->_save_data("$year"); 988 } 989 $self->_clear_stats(); 990 } 991 } 992 993 # Wait for last child stop 994 $self->wait_all_childs() if ($self->{queue_size} > 1); 995 996 } 997 998 } 999 1000 sub split_logfile 1001 { 1002 my ($self, $logf) = @_; 1003 1004 my @chunks = (0); 1005 1006 # get file size 1007 my $totalsize = (stat("$logf"))[7] || 0; 1008 1009 my $offsplit = $self->{end_offset}; 1010 if ($self->{is_squidguard_log}) { 1011 $offsplit = $self->{sg_end_offset}; 1012 } elsif ($self->{is_ufdbguard_log}) { 1013 $offsplit = $self->{ug_end_offset}; 1014 } 1015 1016 # If the file is very small, many jobs actually make the parsing take longer 1017 if ( ($totalsize <= 16777216) || ($totalsize <= $offsplit)) { #16MB 1018 push(@chunks, $totalsize); 1019 return @chunks; 1020 } 1021 1022 # Split and search the right position in file corresponding to the number of jobs 1023 my $i = 1; 1024 if ($offsplit && ($offsplit < $totalsize)) { 1025 $chunks[0] = $offsplit; 1026 } 1027 my $lfile = undef; 1028 open($lfile, $logf) || die "FATAL: cannot read log file $logf. $!\n"; 1029 while ($i < $self->{queue_size}) { 1030 my $pos = int(($totalsize/$self->{queue_size}) * $i); 1031 if ($pos > $chunks[0]) { 1032 $lfile->seek($pos, 0); 1033 #Move the offset to the BEGINNING of each line, because the logic in process_file requires so 1034 $pos = $pos + length(<$lfile>) - 1; 1035 push(@chunks, $pos) if ($pos < $totalsize); 1036 } 1037 last if ($pos >= $totalsize); 1038 $i++; 1039 } 1040 $lfile->close(); 1041 1042 push(@chunks, $totalsize); 1043 1044 return @chunks; 1045 } 1046 1047 sub check_exclusions 1048 { 1049 my ($self, $login, $client_ip, $url) = @_; 1050 1051 return 0 if (!exists $self->{Exclude}{users} && !exists $self->{Exclude}{clients} && !exists $self->{Exclude}{networks} && !exists $self->{Exclude}{uris}); 1052 1053 # check for user exclusion 1054 if (exists $self->{Exclude}{users} && $login) { 1055 foreach my $e (@{$self->{Exclude}{users}}) { 1056 # look for users using the following format: user@domain.tld, domain\user and user 1057 if ( ($login =~ m#^$e$#i) || ($login =~ m#^$e\@#i) || ($login =~ m#\\$e$#i) ) { 1058 return 1; 1059 } 1060 } 1061 } 1062 1063 # check for client exclusion 1064 if (exists $self->{Exclude}{clients} && $client_ip) { 1065 foreach my $e (@{$self->{Exclude}{clients}}) { 1066 if ($client_ip =~ m#^$e$#i) { 1067 return 1; 1068 } 1069 } 1070 } 1071 1072 # check for Network exclusion 1073 if (exists $self->{Exclude}{networks} && $client_ip) { 1074 foreach my $e (@{$self->{Exclude}{networks}}) { 1075 if (&check_ip($client_ip, $e)) { 1076 return 1; 1077 } 1078 } 1079 } 1080 1081 # check for URL exclusion 1082 if (exists $self->{Exclude}{uris} && $url) { 1083 foreach my $e (@{$self->{Exclude}{uris}}) { 1084 if ($url =~ m#^$e$#i) { 1085 return 1; 1086 } 1087 } 1088 } 1089 1090 return 0; 1091 } 1092 1093 sub check_inclusions 1094 { 1095 my ($self, $login, $client_ip) = @_; 1096 1097 return 1 if (!exists $self->{Include}{users} && !exists $self->{Include}{clients} && !exists $self->{Include}{networks}); 1098 1099 # check for user inclusion 1100 if (exists $self->{Include}{users} && $login) { 1101 foreach my $e (@{$self->{Include}{users}}) { 1102 # look for users using the following format: user@domain.tld, domain\user and user 1103 if ( ($login =~ m#^$e$#i) || ($login =~ m#^$e\@#i) || ($login =~ m#\\$e$#i) ) { 1104 return 1; 1105 } 1106 } 1107 } 1108 1109 # If login is a client ip, checked login against clients and networks filters 1110 if (!$client_ip && ($login =~ /^\d+\.\d+\.\d+\.\d+$/)) { 1111 $client_ip = $login; 1112 } 1113 1114 # check for client inclusion 1115 if (exists $self->{Include}{clients} && $client_ip) { 1116 foreach my $e (@{$self->{Include}{clients}}) { 1117 if ($client_ip =~ m#^$e$#i) { 1118 return 1; 1119 } 1120 } 1121 } 1122 1123 # check for Network inclusion 1124 if (exists $self->{Include}{networks} && $client_ip) { 1125 foreach my $e (@{$self->{Include}{networks}}) { 1126 if (&check_ip($client_ip, $e)) { 1127 return 1; 1128 } 1129 } 1130 } 1131 1132 return 0; 1133 } 1134 1135 sub _parse_file_part 1136 { 1137 my ($self, $file, $start_offset, $stop_offset) = @_; 1138 1139 print STDERR "Reading file $file from offset $start_offset to ", ($stop_offset||'end'), ".\n" if (!$self->{QuietMode}); 1140 1141 # Open logfile 1142 my $logfile = new IO::File; 1143 if ($file =~ /\.gz/) { 1144 # Open a pipe to zcat program for compressed log 1145 $logfile->open("$ZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $ZCAT_PROG $file. $!\n"); 1146 } elsif ($file =~ /\.bz2/) { 1147 # Open a pipe to bzcat program for compressed log 1148 $logfile->open("$BZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $BZCAT_PROG $file. $!\n"); 1149 } elsif ($file =~ /\.xz/) { 1150 # Open a pipe to xzcat program for compressed log 1151 $logfile->open("$XZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $XZCAT_PROG $file. $!\n"); 1152 } else { 1153 $logfile->open($file) || $self->localdie("ERROR: Unable to open Squid access.log file $file. $!\n"); 1154 } 1155 1156 my $line = ''; 1157 my $time = 0; 1158 my $elapsed = 0; 1159 my $client_ip = ''; 1160 my $client_name = ''; 1161 my $code = ''; 1162 my $bytes = 0; 1163 my $method = ''; 1164 my $url = ''; 1165 my $login = ''; 1166 my $status = ''; 1167 my $mime_type = ''; 1168 $self->{Syslog} = 0; 1169 1170 my $acl = ''; 1171 1172 my $line_count = 0; 1173 my $line_processed_count = 0; 1174 my $line_stored_count = 0; 1175 1176 # Move directly to the start position 1177 if ($start_offset) { 1178 $logfile->seek($start_offset, 0); 1179 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 1180 $self->{end_offset} = $start_offset; 1181 } elsif (!$self->{is_squidguard_log}) { 1182 $self->{ug_end_offset} = $start_offset; 1183 } else { 1184 $self->{sg_end_offset} = $start_offset; 1185 } 1186 } 1187 1188 # Set timezone in seconds 1189 my $tz = ((0-$self->{TimeZone})*3600); 1190 1191 # The log file format must be : 1192 # time elapsed client code/status bytes method URL rfc931 peerstatus/peerhost type 1193 # This is the default format of squid access log file. 1194 1195 # Read and parse each line of the access log file 1196 while ($line = <$logfile>) { 1197 1198 # quit this log if we reach the ending offset 1199 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 1200 last if ($stop_offset && ($self->{end_offset}>= $stop_offset)); 1201 # Store the current position in logfile 1202 $self->{end_offset} += length($line); 1203 } elsif (!$self->{is_squidguard_log}) { 1204 last if ($stop_offset && ($self->{ug_end_offset}>= $stop_offset)); 1205 # Store the current position in logfile 1206 $self->{ug_end_offset} += length($line); 1207 } else { 1208 last if ($stop_offset && ($self->{sg_end_offset}>= $stop_offset)); 1209 # Store the current position in logfile 1210 $self->{sg_end_offset} += length($line); 1211 } 1212 1213 chomp($line); 1214 next if (!$line); 1215 1216 # skip immediately lines that squid is not able to tag. 1217 next if ($line =~ / TAG_NONE(_ABORTED)?\//); 1218 1219 # Remove syslog header and mark the format 1220 if ($self->{Syslog} == 1) { 1221 $line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /; 1222 # Remove syslog header and mark the format 1223 } elsif (!$self->{Syslog} && ($line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /)) { 1224 print STDERR "DEBUG: log was generated through syslog, the header will be removed.\n" if (!$self->{QuietMode}); 1225 $self->{Syslog} = 1; 1226 } else { 1227 $self->{Syslog} = 2; 1228 } 1229 1230 # Number of log lines parsed 1231 $line_count++; 1232 1233 # SquidAnalyzer supports the following squid log format: 1234 #logformat squid %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt 1235 #logformat squidmime %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt [%>h] [%<h] 1236 #logformat common %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh 1237 #logformat combined %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh 1238 # Parse log with format: time elapsed client code/status bytes method URL rfc931 peerstatus/peerhost mime_type 1239 my $format = 'native'; 1240 if ( !$self->{is_squidguard_log} && !$self->{is_ufdbguard_log} && ($line =~ $native_format_regex1) ) { 1241 $time = $1; 1242 $time += $tz; 1243 $elapsed = abs($2); 1244 $client_ip = $3; 1245 $code = $4; 1246 $bytes = $5; 1247 $method = $6; 1248 $line = $7; 1249 if ($self->{TimeStart} || $self->{TimeStop}) { 1250 my $hour = strftime("%H:%M", CORE::localtime($time)); 1251 next if ($self->{TimeStart} && $hour lt $self->{TimeStart}); 1252 last if ($self->{TimeStop} && $hour gt $self->{TimeStop}); 1253 } 1254 } elsif ( !$self->{is_squidguard_log} && !$self->{is_ufdbguard_log} && ($line =~ $common_format_regex1) ) { 1255 $format = 'http'; 1256 $client_ip = $1; 1257 $elapsed = abs($2); 1258 $login = lc($3); 1259 $time = $4; 1260 $method = $5; 1261 $url = lc($6); 1262 $status = $8; 1263 $bytes = $9; 1264 $line = $10; 1265 $code = $11; 1266 $mime_type = $12; 1267 $time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/; 1268 next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart}); 1269 last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop}); 1270 if (!$self->{TimeZone}) { 1271 $time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900); 1272 } else { 1273 $time = timegm_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $tz; 1274 } 1275 # Some site has corrupted mime_type, try to remove nasty characters 1276 if ($mime_type =~ s/[^\-\/\.\(\)\+\_,\=a-z0-9]+//igs) { 1277 $mime_type = 'invalid/type'; 1278 } 1279 } elsif ( !$self->{is_ufdbguard_log} && ($line =~ $sg_format_regex1) ) { 1280 $format = 'squidguard'; 1281 $self->{is_squidguard_log} = 1; 1282 $acl = $7; 1283 $client_ip = $9; 1284 $elapsed = 0; 1285 $login = lc($10); 1286 $method = $11; 1287 $url = lc($8); 1288 $status = 301; 1289 $bytes = 0; 1290 $code = $12 . ':'; 1291 $mime_type = ''; 1292 next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart}); 1293 last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop}); 1294 if (!$self->{TimeZone}) { 1295 $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900); 1296 } else { 1297 $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz; 1298 } 1299 # Log format for ufdbGuard logs: timestamp [pid] BLOCK user clienthost aclname category url method 1300 } elsif ($line =~ $ug_format_regex1) { 1301 $format = 'ufdbguard'; 1302 $self->{is_ufdbguard_log} = 1; 1303 $acl = "$10/$11"; 1304 $client_ip = $9; 1305 $elapsed = 0; 1306 $login = lc($8); 1307 $method = $13; 1308 $url = lc($12); 1309 $status = 301; 1310 $bytes = 0; 1311 $code = 'REDIRECT:'; 1312 $mime_type = ''; 1313 next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart}); 1314 last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop}); 1315 if (!$self->{TimeZone}) { 1316 $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900); 1317 } else { 1318 $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz; 1319 } 1320 } else { 1321 next; 1322 } 1323 1324 if ($time) { 1325 # Do not parse some unwanted method 1326 my $qm_method = quotemeta($method) || ''; 1327 next if (($#{$self->{ExcludedMethods}} >= 0) && grep(/^$qm_method$/, @{$self->{ExcludedMethods}})); 1328 1329 # Do not parse some unwanted code; e.g. TCP_DENIED/403 1330 my $qm_code = quotemeta($code) || ''; 1331 next if (($#{$self->{ExcludedCodes}} >= 0) && grep(m#^$code$#, @{$self->{ExcludedCodes}})); 1332 1333 # Go to last parsed date (incremental mode) 1334 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 1335 next if ($self->{history_time} && ($time <= $self->{history_time})); 1336 } elsif (!$self->{is_squidguard_log}) { 1337 next if ($self->{ug_history_time} && ($time <= $self->{ug_history_time})); 1338 } else { 1339 next if ($self->{sg_history_time} && ($time <= $self->{sg_history_time})); 1340 } 1341 1342 # Register the last parsing time and last offset position in logfile 1343 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 1344 $self->{end_time} = $time if (!$time || ($self->{end_time} < $time)); 1345 # Register the first parsing time 1346 if (!$self->{begin_time} || ($self->{begin_time} > $time)) { 1347 $self->{begin_time} = $time; 1348 print STDERR "SQUID LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time)), "\n" if (!$self->{QuietMode}); 1349 } 1350 } elsif (!$self->{is_squidguard_log}) { 1351 $self->{ug_end_time} = $time if (!$time || ($self->{ug_end_time} < $time)); 1352 # Register the first parsing time 1353 if (!$self->{ug_begin_time} || ($self->{ug_begin_time} > $time)) { 1354 $self->{ug_begin_time} = $time; 1355 print STDERR "UFDBGUARD LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time)), "\n" if (!$self->{QuietMode}); 1356 } 1357 } else { 1358 $self->{sg_end_time} = $time if (!$time || ($self->{sg_end_time} < $time)); 1359 # Register the first parsing time 1360 if (!$self->{sg_begin_time} || ($self->{sg_begin_time} > $time)) { 1361 $self->{sg_begin_time} = $time; 1362 print STDERR "SQUIDGUARD LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time)), "\n" if (!$self->{QuietMode}); 1363 } 1364 } 1365 1366 # Only store (HIT|UNMODIFIED)/(MISS|MODIFIED|TUNNEL)/(DENIED|REDIRECT) status 1367 # and peer CD_SIBLING_HIT/ aswell as peer SIBLING_HIT/... 1368 if ( ($code =~ m#(HIT|UNMODIFIED)[:/]#) || ($self->{SiblingHit} && ($line =~ / (CD_)?SIBLING_HIT/)) ) { 1369 $code = 'HIT'; 1370 } elsif ($code =~ m#(MISS|MODIFIED|TUNNEL)[:/]#) { 1371 $code = 'MISS'; 1372 } elsif ($code =~ m#(DENIED|DENIED_REPLY|REDIRECT)[:/]#) { 1373 $code = 'DENIED'; 1374 } else { 1375 next; 1376 } 1377 1378 # With common and combined log format those fields have already been parsed 1379 if (($format eq 'native') && ($line =~ $native_format_regex2) ) { 1380 $url = lc($1); 1381 $login = lc($2); 1382 $status = lc($3); 1383 $mime_type = lc($4); 1384 # Some site has corrupted mime_type, try to remove nasty characters 1385 if ($mime_type =~ s/[^\-\/\.\(\)\+\_,\=a-z0-9]+//igs) { 1386 $mime_type = 'invalid/type'; 1387 } 1388 } 1389 1390 if ($url) { 1391 if (!$mime_type || ($mime_type eq '-')) { 1392 $mime_type = 'none'; 1393 } 1394 1395 # Do not parse some unwanted method 1396 next if (($#{$self->{ExcludedMimes}} >= 0) && map {$mime_type =~ m#^$_$#} @{$self->{ExcludedMimes}}); 1397 1398 # Remove extra space character in username 1399 $login =~ s/\%20//g; 1400 1401 my $id = $client_ip || ''; 1402 if ($login ne '-') { 1403 $id = $login; 1404 } 1405 next if (!$id || (!$bytes && ($code ne 'DENIED'))); 1406 1407 ##### 1408 # If there's some mandatory inclusion, check the entry against the definitions 1409 # The entry is skipped directly if it is not in an inclusion list 1410 ##### 1411 next if (!$self->check_inclusions($login, $client_ip)); 1412 1413 ##### 1414 # Check the entry against the exclusion definitions. The entry 1415 # is skipped directly when it match an exclusion definition. 1416 ##### 1417 next if ($self->check_exclusions($login, $client_ip, $url)); 1418 1419 # Set default user login to client ip address 1420 # Anonymize all users 1421 if ($self->{AnonymizeLogin} && ($client_ip ne $id)) { 1422 if (!exists $self->{AnonymizedId}{$id}) { 1423 $self->{AnonymizedId}{$id} = &anonymize_id(); 1424 } 1425 $id = $self->{AnonymizedId}{$id}; 1426 } 1427 1428 # Now parse data and generate statistics 1429 $self->_parseData($time, $elapsed, $client_ip, $code, $bytes, $url, $id, $mime_type, $acl, $method); 1430 $line_stored_count++; 1431 1432 } 1433 $line_processed_count++; 1434 } 1435 } 1436 $logfile->close(); 1437 1438 if ($self->{cur_year}) { 1439 # Save last parsed data 1440 $self->_append_data($self->{cur_year}, $self->{cur_month}, $self->{cur_day}); 1441 # Stats can be cleared 1442 $self->_clear_stats(); 1443 1444 # Stores last week to process 1445 my $wn = &get_week_number($self->{cur_year}, $self->{cur_month}, $self->{cur_day}); 1446 if (!grep(/^$self->{cur_year}\/$self->{cur_month}\/$wn$/, @{$self->{week_parsed}})) { 1447 push(@{$self->{week_parsed}}, "$self->{cur_year}/$self->{cur_month}/$wn"); 1448 } 1449 1450 # Save the last information parsed in this file part 1451 my $tmp_file = 'last_parsed.tmp'; 1452 if ($self->{is_squidguard_log}) { 1453 $tmp_file = 'sg_last_parsed.tmp'; 1454 } elsif ($self->{is_ufdbguard_log}) { 1455 $tmp_file = 'ug_last_parsed.tmp'; 1456 } 1457 if (open(OUT, ">>$self->{pid_dir}/$tmp_file")) { 1458 flock(OUT, 2) || die "FATAL: can't acquire lock on file $tmp_file, $!\n"; 1459 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) { 1460 print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{end_time} $self->{end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n"; 1461 } elsif (!$self->{is_squidguard_log}) { 1462 print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{ug_end_time} $self->{ug_end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n"; 1463 } else { 1464 print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{sg_end_time} $self->{sg_end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n"; 1465 } 1466 close(OUT); 1467 } else { 1468 print STDERR "ERROR: can't save last parsed line into $self->{pid_dir}/$tmp_file, $!\n"; 1469 } 1470 } 1471 1472 } 1473 1474 sub _clear_stats 1475 { 1476 my $self = shift; 1477 1478 # Hashes to store user statistics 1479 $self->{stat_user_hour} = (); 1480 $self->{stat_user_day} = (); 1481 $self->{stat_user_month} = (); 1482 $self->{stat_usermax_hour} = (); 1483 $self->{stat_usermax_day} = (); 1484 $self->{stat_usermax_month} = (); 1485 $self->{stat_user_url_hour} = (); 1486 $self->{stat_user_url_day} = (); 1487 $self->{stat_user_url_month} = (); 1488 1489 # Hashes to store network statistics 1490 $self->{stat_network_hour} = (); 1491 $self->{stat_network_day} = (); 1492 $self->{stat_network_month} = (); 1493 $self->{stat_netmax_hour} = (); 1494 $self->{stat_netmax_day} = (); 1495 $self->{stat_netmax_month} = (); 1496 1497 # Hashes to store user / network statistics 1498 $self->{stat_netuser_hour} = (); 1499 $self->{stat_netuser_day} = (); 1500 $self->{stat_netuser_month} = (); 1501 1502 # Hashes to store cache status (hit/miss) 1503 $self->{stat_code_hour} = (); 1504 $self->{stat_code_day} = (); 1505 $self->{stat_code_month} = (); 1506 1507 # Hashes to store throughput statsœ 1508 $self->{stat_throughput_hour} = (); 1509 $self->{stat_throughput_day} = (); 1510 $self->{stat_throughput_month} = (); 1511 1512 # Hashes to store mime type 1513 $self->{stat_mime_type_hour} = (); 1514 $self->{stat_mime_type_day} = (); 1515 $self->{stat_mime_type_month} = (); 1516 1517 } 1518 1519 sub _init 1520 { 1521 my ($self, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone) = @_; 1522 1523 # Set path to pid file 1524 $pidfile = $pid_dir . '/' . $pidfile; 1525 1526 # Prevent for a call without instance 1527 if (!ref($self)) { 1528 print STDERR "ERROR - init : Unable to call init without an object instance.\n"; 1529 unlink("$pidfile"); 1530 exit(0); 1531 } 1532 $self->{pidfile} = $pidfile || '/tmp/squid-analyzer.pid'; 1533 1534 # Load configuration information 1535 if (!$conf_file) { 1536 if (-f '/etc/squidanalyzer/squidanalyzer.conf') { 1537 $conf_file = '/etc/squidanalyzer/squidanalyzer.conf'; 1538 } elsif (-f '/etc/squidanalyzer.conf') { 1539 $conf_file = '/etc/squidanalyzer.conf'; 1540 } elsif (-f 'squidanalyzer.conf') { 1541 $conf_file = 'squidanalyzer.conf'; 1542 } 1543 } 1544 my %options = $self->parse_config($conf_file, $log_file, $rebuild); 1545 1546 # Configuration options 1547 $self->{MinPie} = $options{MinPie} || 2; 1548 $self->{QuietMode} = $options{QuietMode} || 0; 1549 $self->{UrlReport} = $options{UrlReport} || 0; 1550 $self->{UrlHitsOnly} = $options{UrlHitsOnly} || 0; 1551 $self->{MaxFormatError} = $options{MaxFormatError} || 0; 1552 if (defined $options{UserReport}) { 1553 $self->{UserReport} = $options{UserReport}; 1554 } else { 1555 # Assure backward compatibility after update otherwize 1556 # data files will lost users information if directive 1557 # is not found in the configuration file 1558 $self->{UserReport} = 1; 1559 } 1560 $self->{Output} = $options{Output} || ''; 1561 $self->{WebUrl} = $options{WebUrl} || ''; 1562 $self->{WebUrl} .= '/' if ($self->{WebUrl} && ($self->{WebUrl} !~ /\/$/)); 1563 $self->{DateFormat} = $options{DateFormat} || '%y-%m-%d'; 1564 $self->{Lang} = $options{Lang} || ''; 1565 $self->{AnonymizeLogin} = $options{AnonymizeLogin} || 0; 1566 $self->{SiblingHit} = $options{SiblingHit} || 1; 1567 $self->{ImgFormat} = $options{ImgFormat} || 'png'; 1568 $self->{Locale} = $options{Locale} || ''; 1569 $self->{TopUrlUser} = $options{TopUrlUser} || 0; 1570 $self->{no_year_stat} = 0; 1571 $self->{no_week_stat} = 0; 1572 $self->{UseClientDNSName} = $options{UseClientDNSName} || 0; 1573 $self->{DNSLookupTimeout} = $options{DNSLookupTimeout} || 0.0001; 1574 $self->{DNSLookupTimeout} = int($self->{DNSLookupTimeout} * 1000000); 1575 $self->{LogFile} = (); 1576 $self->{queue_size} = 1; 1577 $self->{running_pids} = (); 1578 $self->{pid_dir} = $pid_dir || '/tmp'; 1579 $self->{child_count} = 0; 1580 $self->{rebuild} = $rebuild || 0; 1581 $self->{is_squidguard_log} = 0; 1582 $self->{TimeZone} = $options{TimeZone} || $timezone || 0; 1583 $self->{Syslog} = 0; 1584 $self->{UseUrlPort} = 1; 1585 $self->{TimeStart} = $options{TimeStart} || ''; 1586 $self->{TimeStop} = $options{TimeStop} || ''; 1587 1588 # Cleanup old temporary files 1589 foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'ug_last_parsed.tmp') { 1590 unlink("$self->{pid_dir}/$tmp_file"); 1591 } 1592 1593 $self->{CustomHeader} = $options{CustomHeader} || qq{<a href="$self->{WebUrl}"><img src="$self->{WebUrl}images/logo-squidanalyzer.png" title="SquidAnalyzer $VERSION" border="0"></a> SquidAnalyzer}; 1594 $self->{ExcludedMethods} = (); 1595 if ($options{ExcludedMethods}) { 1596 push(@{$self->{ExcludedMethods}}, split(/\s*,\s*/, $options{ExcludedMethods})); 1597 } 1598 $self->{ExcludedCodes} = (); 1599 if ($options{ExcludedCodes}) { 1600 push(@{$self->{ExcludedCodes}}, split(/\s*,\s*/, $options{ExcludedCodes})); 1601 } 1602 $self->{ExcludedMimes} = (); 1603 if ($options{ExcludedMimes}) { 1604 push(@{$self->{ExcludedMimes}}, split(/\s*,\s*/, $options{ExcludedMimes})); 1605 } 1606 1607 if ($self->{Lang}) { 1608 open(IN, "$self->{Lang}") or die "ERROR: can't open translation file $self->{Lang}, $!\n"; 1609 while (my $l = <IN>) { 1610 chomp($l); 1611 $l =~ s/\r//gs; 1612 next if ($l =~ /^\s*#/); 1613 next if (!$l); 1614 my ($key, $str) = split(/\t+/, $l); 1615 $Translate{$key} = $str; 1616 } 1617 close(IN); 1618 } 1619 if (!$self->{Output}) { 1620 die "ERROR: 'Output' configuration option must be set.\n"; 1621 } 1622 if (! -d $self->{Output}) { 1623 die "ERROR: 'Output' directory $self->{Output} doesn't exists.\n"; 1624 } 1625 if (!$self->{rebuild}) { 1626 push(@{$self->{LogFile}}, @{$options{LogFile}}); 1627 if ($#{$self->{LogFile}} < 0) { 1628 die "ERROR: 'LogFile' configuration directive must be set or a log file given at command line.\n"; 1629 } 1630 } 1631 $self->{OrderUser} = lc($options{OrderUser}) || 'bytes'; 1632 $self->{OrderNetwork} = lc($options{OrderNetwork}) || 'bytes'; 1633 $self->{OrderUrl} = lc($options{OrderUrl}) || 'bytes'; 1634 $self->{OrderMime} = lc($options{OrderMime}) || 'bytes'; 1635 if ($self->{OrderUser} !~ /^(hits|bytes|duration)$/) { 1636 die "ERROR: OrderUser must be one of these values: hits, bytes or duration\n"; 1637 } 1638 if ($self->{OrderNetwork} !~ /^(hits|bytes|duration)$/) { 1639 die "ERROR: OrderNetwork must be one of these values: hits, bytes or duration\n"; 1640 } 1641 if ($self->{OrderUrl} !~ /^(hits|bytes|duration)$/) { 1642 die "ERROR: OrderUrl must be one of these values: hits, bytes or duration\n"; 1643 } 1644 if ($self->{OrderMime} !~ /^(hits|bytes)$/) { 1645 die "ERROR: OrderMime must be one of these values: hits or bytes\n"; 1646 } 1647 %{$self->{NetworkAlias}} = $self->parse_network_aliases($options{NetworkAlias} || ''); 1648 %{$self->{UserAlias}} = $self->parse_user_aliases($options{UserAlias} || ''); 1649 %{$self->{Exclude}} = $self->parse_exclusion($options{Exclude} || ''); 1650 %{$self->{Include}} = $self->parse_inclusion($options{Include} || ''); 1651 %{$self->{NetworkAliasCache}} = (); 1652 %{$self->{UserAliasCache}} = (); 1653 $self->{has_nework_alias} = scalar keys %{$self->{NetworkAlias}}; 1654 $self->{has_user_alias} = scalar keys %{$self->{UserAlias}}; 1655 1656 $self->{CostPrice} = $options{CostPrice} || 0; 1657 $self->{Currency} = $options{Currency} || '€'; 1658 $self->{TopNumber} = $options{TopNumber} || 10; 1659 $self->{TopStorage} = $options{TopStorage} || 0; 1660 $self->{TransfertUnit} = $options{TransfertUnit} || 'BYTES'; 1661 if (!grep(/^$self->{TransfertUnit}$/i, 'BYTES', 'KB', 'MB', 'GB')) { 1662 die "ERROR: TransfertUnit must be one of these values: KB, MB or GB\n"; 1663 } else { 1664 if (uc($self->{TransfertUnit}) eq 'BYTES') { 1665 $self->{TransfertUnitValue} = 1; 1666 $self->{TransfertUnit} = 'Bytes'; 1667 } elsif (uc($self->{TransfertUnit}) eq 'KB') { 1668 $self->{TransfertUnitValue} = 1024; 1669 } elsif (uc($self->{TransfertUnit}) eq 'MB') { 1670 $self->{TransfertUnitValue} = 1024*1024; 1671 } elsif (uc($self->{TransfertUnit}) eq 'GB') { 1672 $self->{TransfertUnitValue} = 1024*1024*1024; 1673 } 1674 } 1675 1676 # Init statistics storage hashes 1677 $self->_clear_stats(); 1678 1679 # Used to store the first and last date parsed 1680 $self->{last_year} = 0; 1681 $self->{last_month} = (); 1682 $self->{last_day} = (); 1683 $self->{cur_year} = 0; 1684 $self->{cur_month} = 0; 1685 $self->{cur_day} = 0; 1686 $self->{first_year} = 0; 1687 $self->{first_month} = (); 1688 $self->{begin_time} = 0; 1689 $self->{end_time} = 0; 1690 $self->{end_offset} = 0; 1691 $self->{week_parsed} = (); 1692 # Used to stored command line parameters from squid-analyzer 1693 $self->{history_time} = 0; 1694 $self->{preserve} = 0; 1695 $self->{sg_end_time} = 0; 1696 $self->{sg_end_offset} = 0; 1697 $self->{ug_end_time} = 0; 1698 $self->{ug_end_offset} = 0; 1699 1700 # Override verbose mode 1701 $self->{QuietMode} = 0 if ($debug); 1702 1703 # Enable local date format if defined, else strftime will be used. The limitation 1704 # this behavior is that all dates in HTML files will be the same for performences reasons. 1705 if ($self->{Locale}) { 1706 my $lang = 'LANG=' . $self->{Locale}; 1707 $self->{start_date} = `$lang date | iconv -t $Translate{CharSet} 2>/dev/null`; 1708 chomp($self->{start_date}); 1709 } 1710 1711 # Get the last parsing date for Squid log incremental parsing 1712 if (!$rebuild && -e "$self->{Output}/SquidAnalyzer.current") { 1713 my $current = new IO::File; 1714 unless($current->open("$self->{Output}/SquidAnalyzer.current")) { 1715 print STDERR "ERROR: Can't read file $self->{Output}/SquidAnalyzer.current, $!\n" if (!$self->{QuietMode}); 1716 print STDERR "Starting at the first line of Squid access log file.\n" if (!$self->{QuietMode}); 1717 } else { 1718 my $tmp = <$current>; 1719 chomp($tmp); 1720 ($self->{history_time}, $self->{end_offset}) = split(/[\t]/, $tmp); 1721 $self->{begin_time} = $self->{history_time}; 1722 $current->close(); 1723 if ($self->{history_time}) { 1724 print STDERR "SQUID LOG HISTORY TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{history_time})), " - HISTORY OFFSET: $self->{end_offset}\n" if (!$self->{QuietMode}); 1725 } 1726 } 1727 } 1728 1729 # Get the last parsing date for SquidGuard log incremental parsing 1730 if (!$rebuild && -e "$self->{Output}/SquidGuard.current") { 1731 my $current = new IO::File; 1732 unless($current->open("$self->{Output}/SquidGuard.current")) { 1733 print STDERR "ERROR: Can't read file $self->{Output}/SquidGuard.current, $!\n" if (!$self->{QuietMode}); 1734 print STDERR "Starting at the first line of SquidGuard log file.\n" if (!$self->{QuietMode}); 1735 } else { 1736 my $tmp = <$current>; 1737 chomp($tmp); 1738 ($self->{sg_history_time}, $self->{sg_end_offset}) = split(/[\t]/, $tmp); 1739 $self->{sg_begin_time} = $self->{sg_history_time}; 1740 $current->close(); 1741 if ($self->{sg_history_time}) { 1742 print STDERR "SQUIDGUARD LOG HISTORY TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{sg_history_time})), " - HISTORY OFFSET: $self->{sg_end_offset}\n" if (!$self->{QuietMode}); 1743 } 1744 } 1745 } 1746 1747 # Get the last parsing date for ufdbGuard log incremental parsing 1748 if (!$rebuild && -e "$self->{Output}/ufdbGuard.current") { 1749 my $current = new IO::File; 1750 unless($current->open("$self->{Output}/ufdbGuard.current")) { 1751 print STDERR "ERROR: Can't read file $self->{Output}/ufdbGuard.current, $!\n" if (!$self->{QuietMode}); 1752 print STDERR "Starting at the first line of ufdbGuard log file.\n" if (!$self->{QuietMode}); 1753 } else { 1754 my $tmp = <$current>; 1755 chomp($tmp); 1756 ($self->{ug_history_time}, $self->{ug_end_offset}) = split(/[\t]/, $tmp); 1757 $self->{ug_begin_time} = $self->{ug_history_time}; 1758 $current->close(); 1759 if ($self->{ug_history_time}) { 1760 print STDERR "UFDBGUARD LOG HISTORY TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{ug_history_time})), " - HISTORY OFFSET: $self->{ug_end_offset}\n" if (!$self->{QuietMode}); 1761 } 1762 } 1763 } 1764 1765 $self->{menu} = qq{ 1766 <div id="menu"> 1767 <ul> 1768 <li><a href="../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li> 1769 }; 1770 if ($self->{UrlReport}) { 1771 $self->{menu} .= qq{ 1772 <li><a href="domain.html"><span class="iconDomain">$Translate{'Top_domain_link'}</span></a></li> 1773 <li><a href="url.html"><span class="iconUrl">$Translate{'Top_url_link'}</span></a></li> 1774 <li><a href="denied.html"><span class="iconUrl">$Translate{'Top_denied_link'}</span></a></li> 1775 }; 1776 } 1777 if ($self->{UserReport}) { 1778 $self->{menu} .= qq{ 1779 <li><a href="user.html"><span class="iconUser">$Translate{'User_link'}</span></a></li> 1780 }; 1781 } 1782 $self->{menu} .= qq{ 1783 <li><a href="network.html"><span class="iconNetwork">$Translate{'Network_link'}</span></a></li> 1784 <li><a href="mime_type.html"><span class="iconMime">$Translate{'Mime_link'}</span></a></li> 1785 </ul> 1786 </div> 1787 }; 1788 1789 $self->{menu2} = qq{ 1790 <div id="menu"> 1791 <ul> 1792 <li><a href="../../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li> 1793 }; 1794 if ($self->{UrlReport}) { 1795 $self->{menu2} .= qq{ 1796 <li><a href="../../domain.html"><span class="iconDomain">$Translate{'Top_domain_link'}</span></a></li> 1797 <li><a href="../../url.html"><span class="iconUrl">$Translate{'Top_url_link'}</span></a></li>A 1798 <li><a href="../../denied.html"><span class="iconUrl">$Translate{'Top_denied_link'}</span></a></li>A 1799 }; 1800 } 1801 if ($self->{UserReport}) { 1802 $self->{menu2} .= qq{ 1803 <li><a href="../../user.html"><span class="iconUser">$Translate{'User_link'}</span></a></li> 1804 }; 1805 } 1806 $self->{menu2} .= qq{ 1807 <li><a href="../../network.html"><span class="iconNetwork">$Translate{'Network_link'}</span></a></li> 1808 <li><a href="../../mime_type.html"><span class="iconMime">$Translate{'Mime_link'}</span></a></li> 1809 </ul> 1810 </div> 1811 }; 1812 1813 $self->{menu3} = qq{ 1814 <div id="menu"> 1815 <ul> 1816 <li><a href="../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li> 1817 </ul> 1818 </div> 1819 }; 1820 1821 } 1822 1823 sub _gethostbyaddr 1824 { 1825 my ($self, $ip) = @_; 1826 1827 1828 my $host = undef; 1829 my $err = ''; 1830 unless(exists $CACHE{$ip}) { 1831 eval { 1832 local $SIG{ALRM} = sub { die "DNS lookup timeout.\n"; }; 1833 ualarm $self->{DNSLookupTimeout}; 1834 my @addrs = (); 1835 if ($] < 5.014) { 1836 $host = gethostbyaddr(inet_aton($ip), AF_INET); 1837 } else { 1838 # We also need to resolve IPV6 addresses 1839 if ($ip =~ /^\d+\.\d+\.\d+\.\d+$/) { 1840 ($err, @addrs) = Socket::getaddrinfo( $ip, 0, { 'protocol' => Socket::IPPROTO_TCP, 'family' => Socket::AF_INET } ); 1841 } else { 1842 ($err, @addrs) = Socket::getaddrinfo( $ip, 0, { 'protocol' => Socket::IPPROTO_TCP, 'family' => Socket::AF_INET6 } ); 1843 } 1844 } 1845 for my $addr (@addrs) { 1846 ($err, $host) = Socket::getnameinfo( $addr->{addr}); 1847 last; 1848 } 1849 ualarm 0; 1850 }; 1851 if ($@) { 1852 delete $CACHE{$ip}; 1853 if (!$self->{QuietMode}) { 1854 warn "_gethostbyaddr timeout reach for ip: $ip, timeout can be adjusted with directive DNSLookupTimeout\n"; 1855 } 1856 } elsif ($err) { 1857 delete $CACHE{$ip}; 1858 if (!$self->{QuietMode}) { 1859 warn "_gethostbyaddr error resolving ip: $ip, $err\n"; 1860 } 1861 } 1862 else { 1863 $CACHE{$ip} = $host; 1864 #printf "_gethostbyaddr success : %s (%s)\n", $ip, $host; 1865 } 1866 } 1867 return $CACHE{$ip} || $ip; 1868 } 1869 1870 sub apply_network_alias 1871 { 1872 my ($self, $ip) = @_; 1873 1874 return $self->{NetworkAliasCache}{$ip} if (exists $self->{NetworkAliasCache}{$ip}); 1875 1876 my $found = 0; 1877 foreach my $r (keys %{$self->{NetworkAlias}}) { 1878 if ($r =~ $cidr_regex) { 1879 if (&check_ip($ip, $r)) { 1880 $self->{NetworkAliasCache}{$ip} = $self->{NetworkAlias}->{$r}; 1881 $ip = $self->{NetworkAlias}->{$r}; 1882 $found = 1; 1883 last; 1884 } 1885 } elsif ($ip =~ /^$r/) { 1886 $self->{NetworkAliasCache}{$ip} = $self->{NetworkAlias}->{$r}; 1887 $ip = $self->{NetworkAlias}->{$r}; 1888 $found = 1; 1889 last; 1890 } 1891 } 1892 $self->{NetworkAliasCache}{$ip} = $ip if (!$found); 1893 1894 return $ip; 1895 } 1896 1897 sub apply_user_alias 1898 { 1899 my ($self, $id) = @_; 1900 1901 return $self->{UserAliasCache}{$id} if (exists $self->{UserAliasCache}{$id}); 1902 1903 my $found = 0; 1904 foreach my $u (keys %{$self->{UserAlias}}) { 1905 if ( $id =~ /^$u$/i ) { 1906 $self->{UserAliasCache}{$id} = $self->{UserAlias}->{$u}; 1907 $id = $self->{UserAlias}->{$u}; 1908 $found = 1; 1909 last; 1910 } 1911 } 1912 $self->{UserAliasCache}{$id} = $id if (!$found); 1913 1914 return $id; 1915 } 1916 1917 sub _parseData 1918 { 1919 my ($self, $time, $elapsed, $client, $code, $bytes, $url, $id, $type, $acl, $method) = @_; 1920 1921 # Save original IP address for dns resolving 1922 my $client_ip_addr = $client; 1923 1924 # Get the current year and month 1925 my ($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = CORE::localtime($time); 1926 $year += 1900; 1927 $month = sprintf("%02d", $month + 1); 1928 $day = sprintf("%02d", $day); 1929 1930 # Store data when hour change to save memory 1931 if ($self->{cur_year} && ($self->{cur_hour} ne '') && ($hour != $self->{cur_hour}) ) { 1932 # If the day has changed then we want to save stats of the previous one 1933 $self->_append_data($self->{cur_year}, $self->{cur_month}, $self->{cur_day}); 1934 # Stats can be cleared 1935 print STDERR "Clearing statistics storage hashes, for $self->{cur_year}-$self->{cur_month}-$self->{cur_day} ", sprintf("%02d", $self->{cur_hour}), ":00:00.\n" if (!$self->{QuietMode}); 1936 $self->_clear_stats(); 1937 } 1938 1939 # Stores weeks to process 1940 if (!$self->{no_week_stat}) { 1941 if ("$year$month$day" ne "$self->{cur_year}$self->{cur_month}$self->{cur_day}") { 1942 my $wn = &get_week_number($year, $month, $day); 1943 if (!grep(/^$year\/$month\/$wn$/, @{$self->{week_parsed}})) { 1944 push(@{$self->{week_parsed}}, "$year/$month/$wn"); 1945 } 1946 } 1947 } 1948 1949 # Extract the domainname part of the URL 1950 $url =~ s/:\d+.*// if (!$self->{UseUrlPort}); 1951 $url =~ m/^[^\/]+\/\/([^\/]+)/; 1952 my $dest = $1 || $url; 1953 1954 # Replace username by his dnsname if there's no username 1955 # (login is equal to ip) and if client is an ip address 1956 if ( ($id eq $client) && $self->{UseClientDNSName}) { 1957 if ($client =~ $ip_regexp) { 1958 my $dnsname = $self->_gethostbyaddr($client); 1959 if ($dnsname) { 1960 $id = $dnsname; 1961 } 1962 } 1963 } 1964 1965 # Replace network by his aliases if any 1966 my $network = (!$self->{has_nework_alias}) ? $client : $self->apply_network_alias($client); 1967 1968 # Set default to a class C network 1969 if (!$network) { 1970 $client =~ /^(.*)([:\.]+)\d+$/; 1971 $network = "$1$2". "0"; 1972 } 1973 1974 # Replace username by his alias if any 1975 $id = (!$self->{has_user_alias}) ? $id : $self->apply_user_alias($id); 1976 1977 # Stores last parsed date part 1978 if (!$self->{last_year} || ("$year$month$day" gt "$self->{last_year}$self->{last_month}{$self->{last_year}}$self->{last_day}{$self->{last_year}}")) { 1979 $self->{last_year} = $year; 1980 $self->{last_month}{$self->{last_year}} = $month; 1981 $self->{last_day}{$self->{last_year}} = $day; 1982 } 1983 1984 # Stores first parsed date part 1985 if (!$self->{first_year} || ("$self->{first_year}$self->{first_month}{$self->{first_year}}" gt "$year$month")) { 1986 $self->{first_year} = $year; 1987 $self->{first_month}{$self->{first_year}} = $month; 1988 } 1989 1990 # Stores current processed values 1991 $self->{cur_year} = $year; 1992 $self->{cur_month} = $month; 1993 $self->{cur_day} = $day; 1994 $self->{cur_hour} = $hour; 1995 $hour = sprintf("%02d", $hour); 1996 1997 #### Store access denied statistics 1998 if ($code eq 'DENIED') { 1999 $self->{stat_code_hour}{$code}{$hour}{hits}++; 2000 $self->{stat_code_hour}{$code}{$hour}{bytes} += $bytes; 2001 $self->{stat_code_day}{$code}{$self->{last_day}}{hits}++; 2002 $self->{stat_code_day}{$code}{$self->{last_day}}{bytes} += $bytes; 2003 2004 $self->{stat_throughput_hour}{$code}{$hour}{bytes} += $bytes; 2005 $self->{stat_throughput_day}{$code}{$self->{last_day}}{bytes} += $bytes; 2006 $self->{stat_throughput_hour}{$code}{$hour}{elapsed} += $elapsed; 2007 $self->{stat_throughput_day}{$code}{$self->{last_day}}{elapsed} += $elapsed; 2008 2009 #### Store url statistics 2010 if ($self->{UrlReport}) { 2011 $self->{stat_denied_url_hour}{$id}{$dest}{hits}++; 2012 $self->{stat_denied_url_hour}{$id}{$dest}{firsthit} = $time if (!$self->{stat_denied_url_hour}{$id}{$dest}{firsthit} || ($time < $self->{stat_denied_url_hour}{$id}{$dest}{firsthit})); 2013 $self->{stat_denied_url_hour}{$id}{$dest}{lasthit} = $time if (!$self->{stat_denied_url_hour}{$id}{$dest}{lasthit} || ($time > $self->{stat_denied_url_hour}{$id}{$dest}{lasthit})); 2014 $self->{stat_denied_url_hour}{$id}{$dest}{blacklist}{$acl}++ if ($acl); 2015 $self->{stat_denied_url_day}{$id}{$dest}{hits}++; 2016 $self->{stat_denied_url_day}{$id}{$dest}{firsthit} = $time if (!$self->{stat_denied_url_day}{$id}{$dest}{firsthit} || ($time < $self->{stat_denied_url_day}{$id}{$dest}{firsthit})); 2017 $self->{stat_denied_url_day}{$id}{$dest}{lasthit} = $time if (!$self->{stat_denied_url_day}{$id}{$dest}{lasthit} || ($time > $self->{stat_denied_url_day}{$id}{$dest}{lasthit})); 2018 $self->{stat_denied_url_day}{$id}{$dest}{blacklist}{$acl}++ if ($acl); 2019 $self->{stat_user_hour}{$id}{$hour}{hits} += 0; 2020 $self->{stat_user_hour}{$id}{$hour}{bytes} += 0; 2021 $self->{stat_user_hour}{$id}{$hour}{duration} += 0; 2022 $self->{stat_user_day}{$id}{$self->{last_day}}{hits} += 0; 2023 $self->{stat_user_day}{$id}{$self->{last_day}}{bytes} += 0; 2024 $self->{stat_user_day}{$id}{$self->{last_day}}{duration} += 0; 2025 } 2026 return; 2027 } 2028 2029 #### Store client statistics 2030 if ($self->{UserReport}) { 2031 $self->{stat_user_hour}{$id}{$hour}{hits}++; 2032 $self->{stat_user_hour}{$id}{$hour}{bytes} += $bytes; 2033 $self->{stat_user_hour}{$id}{$hour}{duration} += $elapsed; 2034 $self->{stat_user_day}{$id}{$self->{last_day}}{hits}++; 2035 $self->{stat_user_day}{$id}{$self->{last_day}}{bytes} += $bytes; 2036 $self->{stat_user_day}{$id}{$self->{last_day}}{duration} += $elapsed; 2037 if ($bytes > $self->{stat_usermax_hour}{$id}{largest_file_size}) { 2038 $self->{stat_usermax_hour}{$id}{largest_file_size} = $bytes; 2039 $self->{stat_usermax_hour}{$id}{largest_file_url} = $url; 2040 } 2041 if ($bytes > $self->{stat_usermax_day}{$id}{largest_file_size}) { 2042 $self->{stat_usermax_day}{$id}{largest_file_size} = $bytes; 2043 $self->{stat_usermax_day}{$id}{largest_file_url} = $url; 2044 } 2045 } 2046 2047 #### Store networks statistics 2048 $self->{stat_network_hour}{$network}{$hour}{hits}++; 2049 $self->{stat_network_hour}{$network}{$hour}{bytes} += $bytes; 2050 $self->{stat_network_hour}{$network}{$hour}{duration} += $elapsed; 2051 $self->{stat_network_day}{$network}{$self->{last_day}}{hits}++; 2052 $self->{stat_network_day}{$network}{$self->{last_day}}{bytes} += $bytes; 2053 $self->{stat_network_day}{$network}{$self->{last_day}}{duration} += $elapsed; 2054 if ($bytes > $self->{stat_netmax_hour}{$network}{largest_file_size}) { 2055 $self->{stat_netmax_hour}{$network}{largest_file_size} = $bytes; 2056 $self->{stat_netmax_hour}{$network}{largest_file_url} = $url; 2057 } 2058 if ($bytes > $self->{stat_netmax_day}{$network}{largest_file_size}) { 2059 $self->{stat_netmax_day}{$network}{largest_file_size} = $bytes; 2060 $self->{stat_netmax_day}{$network}{largest_file_url} = $url; 2061 } 2062 2063 #### Store HIT/MISS/DENIED statistics 2064 $self->{stat_code_hour}{$code}{$hour}{hits}++; 2065 $self->{stat_code_hour}{$code}{$hour}{bytes} += $bytes; 2066 $self->{stat_code_hour}{$code}{$hour}{elapsed} += $elapsed; 2067 $self->{stat_code_day}{$code}{$self->{last_day}}{hits}++; 2068 $self->{stat_code_day}{$code}{$self->{last_day}}{bytes} += $bytes; 2069 $self->{stat_code_day}{$code}{$self->{last_day}}{elapsed} += $elapsed; 2070 2071 $self->{stat_throughput_hour}{$code}{$hour}{bytes} += $bytes; 2072 $self->{stat_throughput_day}{$code}{$self->{last_day}}{bytes} += $bytes; 2073 $self->{stat_throughput_hour}{$code}{$hour}{elapsed} += $elapsed; 2074 $self->{stat_throughput_day}{$code}{$self->{last_day}}{elapsed} += $elapsed; 2075 2076 #### Store url statistics 2077 if ($self->{UrlReport}) { 2078 $self->{stat_user_url_hour}{$id}{$dest}{duration} += $elapsed; 2079 $self->{stat_user_url_hour}{$id}{$dest}{hits}++; 2080 $self->{stat_user_url_hour}{$id}{$dest}{bytes} += $bytes; 2081 $self->{stat_user_url_hour}{$id}{$dest}{firsthit} = $time if (!$self->{stat_user_url_hour}{$id}{$dest}{firsthit} || ($time < $self->{stat_user_url_hour}{$id}{$dest}{firsthit})); 2082 $self->{stat_user_url_hour}{$id}{$dest}{lasthit} = $time if (!$self->{stat_user_url_hour}{$id}{$dest}{lasthit} || ($time > $self->{stat_user_url_hour}{$id}{$dest}{lasthit})); 2083 if (!exists $self->{stat_user_url_hour}{$id}{$dest}{arr_last} || ($#{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}} < 9) || ($time > ($self->{stat_user_url_hour}{$id}{$dest}{arr_last}[-1]+300))) { 2084 push(@{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}}, $time); 2085 shift(@{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}}) if ($#{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}} > 9); 2086 } 2087 $self->{stat_user_url_day}{$id}{$dest}{duration} += $elapsed; 2088 $self->{stat_user_url_day}{$id}{$dest}{hits}++; 2089 $self->{stat_user_url_day}{$id}{$dest}{firsthit} = $time if (!$self->{stat_user_url_day}{$id}{$dest}{firsthit} || ($time < $self->{stat_user_url_day}{$id}{$dest}{firsthit})); 2090 $self->{stat_user_url_day}{$id}{$dest}{lasthit} = $time if (!$self->{stat_user_url_day}{$id}{$dest}{lasthit} || ($time > $self->{stat_user_url_day}{$id}{$dest}{lasthit})); 2091 $self->{stat_user_url_day}{$id}{$dest}{bytes} += $bytes; 2092 if ($code eq 'HIT') { 2093 $self->{stat_user_url_day}{$id}{$dest}{cache_hit}++; 2094 $self->{stat_user_url_day}{$id}{$dest}{cache_bytes} += $bytes; 2095 } 2096 if (!exists $self->{stat_user_url_day}{$id}{$dest}{arr_last} || ($#{$self->{stat_user_url_day}{$id}{$dest}{arr_last}} < 9) || ($time > ($self->{stat_user_url_day}{$id}{$dest}{arr_last}[-1]+1800))) { 2097 push(@{$self->{stat_user_url_day}{$id}{$dest}{arr_last}}, $time); 2098 shift(@{$self->{stat_user_url_day}{$id}{$dest}{arr_last}}) if ($#{$self->{stat_user_url_day}{$id}{$dest}{arr_last}} > 9); 2099 } 2100 } 2101 2102 #### Store user per networks statistics 2103 if ($self->{UserReport}) { 2104 $self->{stat_netuser_hour}{$network}{$id}{duration} += $elapsed; 2105 $self->{stat_netuser_hour}{$network}{$id}{bytes} += $bytes; 2106 $self->{stat_netuser_hour}{$network}{$id}{hits}++; 2107 if ($bytes > $self->{stat_netuser_hour}{$network}{$id}{largest_file_size}) { 2108 $self->{stat_netuser_hour}{$network}{$id}{largest_file_size} = $bytes; 2109 $self->{stat_netuser_hour}{$network}{$id}{largest_file_url} = $url; 2110 } 2111 $self->{stat_netuser_day}{$network}{$id}{duration} += $elapsed; 2112 $self->{stat_netuser_day}{$network}{$id}{bytes} += $bytes; 2113 $self->{stat_netuser_day}{$network}{$id}{hits}++; 2114 if ($bytes > $self->{stat_netuser_day}{$network}{$id}{largest_file_size}) { 2115 $self->{stat_netuser_day}{$network}{$id}{largest_file_size} = $bytes; 2116 $self->{stat_netuser_day}{$network}{$id}{largest_file_url} = $url; 2117 } 2118 } 2119 2120 #### Store mime type statistics 2121 $self->{stat_mime_type_hour}{"$type"}{hits}++; 2122 $self->{stat_mime_type_hour}{"$type"}{bytes} += $bytes; 2123 $self->{stat_mime_type_day}{"$type"}{hits}++; 2124 $self->{stat_mime_type_day}{"$type"}{bytes} += $bytes; 2125 } 2126 2127 sub _load_history 2128 { 2129 my ($self, $type, $year, $month, $day, $path, $kind, $wn, @wd) = @_; 2130 2131 #### Load history 2132 if ($type eq 'day') { 2133 foreach my $d ("01" .. "31") { 2134 $self->_read_stat($year, $month, $d, 'day', $kind); 2135 } 2136 } elsif ($type eq 'week') { 2137 $path = "$year/week$wn"; 2138 foreach my $wdate (@wd) { 2139 $wdate =~ /^(\d+)-(\d+)-(\d+)$/; 2140 $self->_read_stat($1, $2, $3, 'day', $kind, $wn); 2141 } 2142 $type = 'day'; 2143 } elsif ($type eq 'month') { 2144 foreach my $m ("01" .. "12") { 2145 $self->_read_stat($year, $m, $day, 'month', $kind); 2146 } 2147 } else { 2148 $self->_read_stat($year, $month, $day, '', $kind); 2149 } 2150 2151 } 2152 2153 sub _append_stat 2154 { 2155 my ($self, $year, $month, $day) = @_; 2156 2157 my $read_type = ''; 2158 my $type = 'hour'; 2159 if (!$day) { 2160 $type = 'day'; 2161 } 2162 if (!$month) { 2163 $type = 'month'; 2164 } 2165 $read_type = $type; 2166 2167 my $path = join('/', $year, $month, $day); 2168 $path =~ s/[\/]+$//; 2169 2170 print STDERR "Appending data into $self->{Output}/$path\n" if (!$self->{QuietMode}); 2171 2172 #### Save cache statistics 2173 my $dat_file_code = new IO::File; 2174 $dat_file_code->open(">>$self->{Output}/$path/stat_code.dat") 2175 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_code.dat, $!\n"); 2176 flock($dat_file_code, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2177 $self->_write_stat_data($dat_file_code, $type, 'stat_code'); 2178 $dat_file_code->close(); 2179 2180 #### With huge log file we only store global statistics in year and month views 2181 return if ( $self->{no_year_stat} && ($type ne 'hour') ); 2182 2183 #### Save url statistics per user 2184 if ($self->{UrlReport}) { 2185 my $dat_file_user_url = new IO::File; 2186 $dat_file_user_url->open(">>$self->{Output}/$path/stat_user_url.dat") 2187 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user_url.dat, $!\n"); 2188 flock($dat_file_user_url, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2189 $self->_write_stat_data($dat_file_user_url, $type, 'stat_user_url'); 2190 $dat_file_user_url->close(); 2191 # Denied URL 2192 my $dat_file_denied_url = new IO::File; 2193 $dat_file_denied_url->open(">>$self->{Output}/$path/stat_denied_url.dat") 2194 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_denied_url.dat, $!\n"); 2195 flock($dat_file_denied_url, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2196 $self->_write_stat_data($dat_file_denied_url, $type, 'stat_denied_url'); 2197 $dat_file_denied_url->close(); 2198 } 2199 2200 #### Save user statistics 2201 if ($self->{UserReport}) { 2202 my $dat_file_user = new IO::File; 2203 $dat_file_user->open(">>$self->{Output}/$path/stat_user.dat") 2204 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user.dat, $!\n"); 2205 flock($dat_file_user, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2206 $self->_write_stat_data($dat_file_user, $type, 'stat_user'); 2207 $dat_file_user->close(); 2208 } 2209 2210 #### Save network statistics 2211 my $dat_file_network = new IO::File; 2212 $dat_file_network->open(">>$self->{Output}/$path/stat_network.dat") 2213 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_network.dat, $!\n"); 2214 flock($dat_file_network, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2215 $self->_write_stat_data($dat_file_network, $type, 'stat_network'); 2216 $dat_file_network->close(); 2217 2218 #### Save user per network statistics 2219 if ($self->{UserReport}) { 2220 my $dat_file_netuser = new IO::File; 2221 $dat_file_netuser->open(">>$self->{Output}/$path/stat_netuser.dat") 2222 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_netuser.dat, $!\n"); 2223 flock($dat_file_netuser, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2224 $self->_write_stat_data($dat_file_netuser, $type, 'stat_netuser'); 2225 $dat_file_netuser->close(); 2226 } 2227 2228 #### Save mime statistics 2229 my $dat_file_mime_type = new IO::File; 2230 $dat_file_mime_type->open(">>$self->{Output}/$path/stat_mime_type.dat") 2231 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_mime_type.dat, $!\n"); 2232 flock($dat_file_mime_type, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2233 $self->_write_stat_data($dat_file_mime_type, $type, 'stat_mime_type'); 2234 $dat_file_mime_type->close(); 2235 2236 } 2237 2238 sub _save_stat 2239 { 2240 my ($self, $year, $month, $day, $wn, @wd) = @_; 2241 2242 my $path = join('/', $year, $month, $day); 2243 $path =~ s/[\/]+$//; 2244 2245 my $read_type = ''; 2246 my $type = 'hour'; 2247 if (!$day) { 2248 $type = 'day'; 2249 } 2250 if ($wn) { 2251 $type = 'week'; 2252 $path = "$year/week$wn"; 2253 } 2254 if (!$month) { 2255 $type = 'month'; 2256 } 2257 $read_type = $type; 2258 2259 print STDERR "Saving data into $self->{Output}/$path\n" if (!$self->{QuietMode}); 2260 2261 #### Save cache statistics 2262 my $dat_file_code = new IO::File; 2263 $self->_load_history($read_type, $year, $month, $day, $path, 'stat_code', $wn, @wd); 2264 $dat_file_code->open(">$self->{Output}/$path/stat_code.dat") 2265 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_code.dat, $!\n"); 2266 flock($dat_file_code, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2267 $self->_write_stat_data($dat_file_code, $type, 'stat_code'); 2268 $dat_file_code->close(); 2269 2270 #### With huge log file we only store global statistics in year and month views 2271 return if ( $self->{no_year_stat} && (($type ne 'hour') && !$wn) ); 2272 2273 #### Save url statistics per user 2274 if ($self->{UrlReport}) { 2275 my $dat_file_user_url = new IO::File; 2276 $self->_load_history($read_type, $year, $month, $day, $path, 'stat_user_url', $wn, @wd); 2277 $dat_file_user_url->open(">$self->{Output}/$path/stat_user_url.dat") 2278 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user_url.dat, $!\n"); 2279 flock($dat_file_user_url, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2280 $self->_write_stat_data($dat_file_user_url, $type, 'stat_user_url'); 2281 $dat_file_user_url->close(); 2282 # Denied URL 2283 my $dat_file_denied_url = new IO::File; 2284 $self->_load_history($read_type, $year, $month, $day, $path, 'stat_denied_url', $wn, @wd); 2285 $dat_file_denied_url->open(">$self->{Output}/$path/stat_denied_url.dat") 2286 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_denied_url.dat, $!\n"); 2287 flock($dat_file_denied_url, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2288 $self->_write_stat_data($dat_file_denied_url, $type, 'stat_denied_url'); 2289 $dat_file_denied_url->close(); 2290 } 2291 2292 #### Save user statistics 2293 if ($self->{UserReport}) { 2294 my $dat_file_user = new IO::File; 2295 $self->_load_history($read_type, $year, $month, $day, $path, 'stat_user', $wn, @wd); 2296 $dat_file_user->open(">$self->{Output}/$path/stat_user.dat") 2297 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user.dat, $!\n"); 2298 flock($dat_file_user, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2299 $self->_write_stat_data($dat_file_user, $type, 'stat_user'); 2300 $dat_file_user->close(); 2301 } 2302 2303 #### Save network statistics 2304 my $dat_file_network = new IO::File; 2305 $self->_load_history($read_type, $year, $month, $day, $path, 'stat_network', $wn, @wd); 2306 $dat_file_network->open(">$self->{Output}/$path/stat_network.dat") 2307 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_network.dat, $!\n"); 2308 flock($dat_file_network, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2309 $self->_write_stat_data($dat_file_network, $type, 'stat_network'); 2310 $dat_file_network->close(); 2311 2312 #### Save user per network statistics 2313 if ($self->{UserReport}) { 2314 my $dat_file_netuser = new IO::File; 2315 $self->_load_history($read_type, $year, $month, $day, $path, 'stat_netuser', $wn, @wd); 2316 $dat_file_netuser->open(">$self->{Output}/$path/stat_netuser.dat") 2317 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_netuser.dat, $!\n"); 2318 flock($dat_file_netuser, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2319 $self->_write_stat_data($dat_file_netuser, $type, 'stat_netuser'); 2320 $dat_file_netuser->close(); 2321 } 2322 2323 #### Save mime statistics 2324 my $dat_file_mime_type = new IO::File; 2325 $self->_load_history($read_type, $year, $month, $day, $path, 'stat_mime_type', $wn, @wd); 2326 $dat_file_mime_type->open(">$self->{Output}/$path/stat_mime_type.dat") 2327 or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_mime_type.dat, $!\n"); 2328 flock($dat_file_mime_type, 2) || die "FATAL: can't acquire lock on file, $!\n"; 2329 $self->_write_stat_data($dat_file_mime_type, $type, 'stat_mime_type'); 2330 $dat_file_mime_type->close(); 2331 2332 } 2333 2334 sub _write_stat_data 2335 { 2336 my ($self, $fh, $type, $kind) = @_; 2337 2338 $type = 'day' if ($type eq 'week'); 2339 2340 #### Save cache statistics 2341 if ($kind eq 'stat_code') { 2342 foreach my $code (sort {$a cmp $b} keys %{$self->{"stat_code_$type"}}) { 2343 $fh->print("$code " . "hits_$type="); 2344 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_code_$type"}{$code}}) { 2345 $fh->print("$tmp:" . $self->{"stat_code_$type"}{$code}{$tmp}{hits} . ","); 2346 } 2347 $fh->print(";bytes_$type="); 2348 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_code_$type"}{$code}}) { 2349 $fh->print("$tmp:" . $self->{"stat_code_$type"}{$code}{$tmp}{bytes} . ","); 2350 } 2351 $fh->print(";thp_bytes_$type="); 2352 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_throughput_$type"}{$code}}) { 2353 $fh->print("$tmp:" . $self->{"stat_throughput_$type"}{$code}{$tmp}{bytes} . ","); 2354 } 2355 $fh->print(";thp_duration_$type="); 2356 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_throughput_$type"}{$code}}) { 2357 $fh->print("$tmp:" . $self->{"stat_throughput_$type"}{$code}{$tmp}{elapsed} . ","); 2358 } 2359 $fh->print("\n"); 2360 } 2361 $self->{"stat_code_$type"} = (); 2362 } 2363 2364 #### Save denied url statistics per user 2365 if ($kind eq 'stat_denied_url') { 2366 foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_denied_url_$type"}}) { 2367 foreach my $dest (keys %{$self->{"stat_denied_url_$type"}{$id}}) { 2368 next if (!$dest); 2369 my $u = $id; 2370 $u = '-' if (!$self->{UserReport}); 2371 my $bl = ''; 2372 if (exists $self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}) { 2373 foreach my $b (keys %{$self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}}) { 2374 $bl .= $b . ',' . $self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}{$b} . ','; 2375 } 2376 $bl =~ s/,$//; 2377 } 2378 $fh->print( 2379 "$id hits=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{hits} . ";" . 2380 "first=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{firsthit} . ";" . 2381 "last=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{lasthit} . ";" . 2382 "url=$dest" . ";" . 2383 "blacklist=" . $bl . 2384 "\n"); 2385 } 2386 } 2387 $self->{"stat_denied_url_$type"} = (); 2388 } 2389 2390 #### Save url statistics per user 2391 if ($kind eq 'stat_user_url') { 2392 foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_user_url_$type"}}) { 2393 my $i = 0; 2394 foreach my $dest (sort { 2395 $self->{"stat_user_url_$type"}{$id}{$b}{$self->{OrderUrl}} <=> $self->{"stat_user_url_$type"}{$id}{$a}{$self->{OrderUrl}} 2396 } keys %{$self->{"stat_user_url_$type"}{$id}}) { 2397 last if ($self->{TopStorage} && ($i > $self->{TopStorage})); 2398 my $u = $id; 2399 $u = '-' if (!$self->{UserReport}); 2400 $i++; 2401 $fh->print( 2402 "$id hits=" . $self->{"stat_user_url_$type"}{$id}{$dest}{hits} . ";" . 2403 "bytes=" . $self->{"stat_user_url_$type"}{$id}{$dest}{bytes} . ";" . 2404 "duration=" . $self->{"stat_user_url_$type"}{$id}{$dest}{duration} . ";" . 2405 "first=" . $self->{"stat_user_url_$type"}{$id}{$dest}{firsthit} . ";" . 2406 "last=" . $self->{"stat_user_url_$type"}{$id}{$dest}{lasthit} . ";" . 2407 "url=$dest;" . 2408 "cache_hit=" . ($self->{"stat_user_url_$type"}{$id}{$dest}{cache_hit}||0) . ";" . 2409 "cache_bytes=" . ($self->{"stat_user_url_$type"}{$id}{$dest}{cache_bytes}||0) . ";" . 2410 "arr_last=" . join(',', @{$self->{"stat_user_url_$type"}{$id}{$dest}{arr_last}}) . "\n"); 2411 } 2412 } 2413 $self->{"stat_user_url_$type"} = (); 2414 } 2415 2416 #### Save user statistics 2417 if ($kind eq 'stat_user') { 2418 foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_user_$type"}}) { 2419 my $name = $id; 2420 $name =~ s/\s+//g; 2421 $fh->print("$name hits_$type="); 2422 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) { 2423 $fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{hits} . ","); 2424 } 2425 $fh->print(";bytes_$type="); 2426 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) { 2427 $fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{bytes} . ","); 2428 } 2429 $fh->print(";duration_$type="); 2430 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) { 2431 $fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{duration} . ","); 2432 } 2433 $fh->print(";largest_file_size=" . $self->{"stat_usermax_$type"}{$id}{largest_file_size}); 2434 $fh->print(";largest_file_url=" . $self->{"stat_usermax_$type"}{$id}{largest_file_url} . "\n"); 2435 } 2436 $self->{"stat_user_$type"} = (); 2437 $self->{"stat_usermax_$type"} = (); 2438 } 2439 2440 #### Save network statistics 2441 if ($kind eq 'stat_network') { 2442 foreach my $net (sort {$a cmp $b} keys %{$self->{"stat_network_$type"}}) { 2443 $fh->print("$net\thits_$type="); 2444 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) { 2445 $fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{hits} . ","); 2446 } 2447 $fh->print(";bytes_$type="); 2448 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) { 2449 $fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{bytes} . ","); 2450 } 2451 $fh->print(";duration_$type="); 2452 foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) { 2453 $fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{duration} . ","); 2454 } 2455 $fh->print(";largest_file_size=" . $self->{"stat_netmax_$type"}{$net}{largest_file_size}); 2456 $fh->print(";largest_file_url=" . $self->{"stat_netmax_$type"}{$net}{largest_file_url} . "\n"); 2457 } 2458 $self->{"stat_network_$type"} = (); 2459 $self->{"stat_netmax_$type"} = (); 2460 } 2461 2462 #### Save user per network statistics 2463 if ($kind eq 'stat_netuser') { 2464 foreach my $net (sort {$a cmp $b} keys %{$self->{"stat_netuser_$type"}}) { 2465 foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_netuser_$type"}{$net}}) { 2466 $fh->print("$net\t$id\thits=" . $self->{"stat_netuser_$type"}{$net}{$id}{hits} . ";" . 2467 "bytes=" . $self->{"stat_netuser_$type"}{$net}{$id}{bytes} . ";" . 2468 "duration=" . $self->{"stat_netuser_$type"}{$net}{$id}{duration} . ";"); 2469 $fh->print("largest_file_size=" . 2470 $self->{"stat_netuser_$type"}{$net}{$id}{largest_file_size} . ";" . 2471 "largest_file_url=" . $self->{"stat_netuser_$type"}{$net}{$id}{largest_file_url} . "\n"); 2472 } 2473 } 2474 $self->{"stat_netuser_$type"} = (); 2475 } 2476 2477 #### Save mime statistics 2478 if ($kind eq 'stat_mime_type') { 2479 foreach my $mime (sort {$a cmp $b} keys %{$self->{"stat_mime_type_$type"}}) { 2480 $fh->print("$mime hits=" . $self->{"stat_mime_type_$type"}{$mime}{hits} . ";" . 2481 "bytes=" . $self->{"stat_mime_type_$type"}{$mime}{bytes} . "\n"); 2482 } 2483 $self->{"stat_mime_type_$type"} = (); 2484 } 2485 2486 } 2487 2488 sub _read_stat 2489 { 2490 my ($self, $year, $month, $day, $sum_type, $kind, $wn) = @_; 2491 2492 my $type = 'hour'; 2493 if (!$day) { 2494 $type = 'day'; 2495 } 2496 if (!$month) { 2497 $type = 'month'; 2498 } 2499 2500 my $path = join('/', $year, $month, $day); 2501 $path =~ s/[\/]+$//; 2502 2503 return if (! -d "$self->{Output}/$path"); 2504 2505 my $k = ''; 2506 my $key = ''; 2507 $key = $day if ($sum_type eq 'day'); 2508 $key = $month if ($sum_type eq 'month'); 2509 $sum_type ||= $type; 2510 2511 #### Read previous cache statistics 2512 if (!$kind || ($kind eq 'stat_code')) { 2513 my $dat_file_code = new IO::File; 2514 if ($dat_file_code->open("$self->{Output}/$path/stat_code.dat")) { 2515 my $i = 1; 2516 my $error = 0; 2517 while (my $l = <$dat_file_code>) { 2518 chomp($l); 2519 if ($l =~ s/^([^\s]+)\s+hits_$type=([^;]+);bytes_$type=([^;]+)//) { 2520 my $code = $1; 2521 my $hits = $2 || ''; 2522 my $bytes = $3 || ''; 2523 $hits =~ s/,$//; 2524 $bytes =~ s/,$//; 2525 my %hits_tmp = split(/[:,]/, $hits); 2526 foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) { 2527 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2528 $self->{"stat_code_$sum_type"}{$code}{$k}{hits} += $hits_tmp{$tmp}; 2529 } 2530 my %bytes_tmp = split(/[:,]/, $bytes); 2531 foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) { 2532 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2533 $self->{"stat_code_$sum_type"}{$code}{$k}{bytes} += $bytes_tmp{$tmp}; 2534 } 2535 if ($l =~ s/thp_bytes_$type=([^;]+);thp_duration_$type=([^;]+)//) { 2536 $bytes = $1 || ''; 2537 my $elapsed = $2 || ''; 2538 $elapsed =~ s/,$//; 2539 my %bytes_tmp = split(/[:,]/, $bytes); 2540 foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) { 2541 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2542 $self->{"stat_throughput_$sum_type"}{$code}{$k}{bytes} += $bytes_tmp{$tmp}; 2543 } 2544 my %elapsed_tmp = split(/[:,]/, $elapsed); 2545 foreach my $tmp (sort {$a <=> $b} keys %elapsed_tmp) { 2546 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2547 $self->{"stat_throughput_$sum_type"}{$code}{$k}{elapsed} += $elapsed_tmp{$tmp}; 2548 } 2549 } 2550 } else { 2551 print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_code.dat\n"; 2552 print STDERR "$l\n"; 2553 if ($error > $self->{MaxFormatError}) { 2554 unlink($self->{pidfile}); 2555 exit 0; 2556 } 2557 $error++; 2558 } 2559 $i++; 2560 } 2561 $dat_file_code->close(); 2562 } 2563 } 2564 2565 #### With huge log file we only store global statistics in year and month views 2566 return if ($self->{no_year_stat} && ($type ne 'hour')); 2567 2568 #### Read previous client statistics 2569 if (!$kind || ($kind eq 'stat_user')) { 2570 my $dat_file_user = new IO::File; 2571 if ($dat_file_user->open("$self->{Output}/$path/stat_user.dat")) { 2572 my $i = 1; 2573 my $error = 0; 2574 while (my $l = <$dat_file_user>) { 2575 chomp($l); 2576 if ($l =~ s/^([^\s]+)\s+hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) { 2577 my $id = $1; 2578 my $hits = $2 || ''; 2579 my $bytes = $3 || ''; 2580 my $duration = $4 || ''; 2581 my $lsize = $5 || 0; 2582 my $lurl = $6 || 0; 2583 2584 if ($self->{rebuild}) { 2585 next if (!$self->check_inclusions($id)); 2586 next if ($self->check_exclusions($id)); 2587 } 2588 2589 # Anonymize all users 2590 if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) { 2591 if (!exists $self->{AnonymizedId}{$id}) { 2592 $self->{AnonymizedId}{$id} = &anonymize_id(); 2593 } 2594 $id = $self->{AnonymizedId}{$id}; 2595 } 2596 2597 if ($lsize > $self->{"stat_usermax_$sum_type"}{$id}{largest_file_size}) { 2598 $self->{"stat_usermax_$sum_type"}{$id}{largest_file_size} = $lsize; 2599 $self->{"stat_usermax_$sum_type"}{$id}{largest_file_url} = $lurl; 2600 } 2601 $hits =~ s/,$//; 2602 $bytes =~ s/,$//; 2603 $duration =~ s/,$//; 2604 my %hits_tmp = split(/[:,]/, $hits); 2605 foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) { 2606 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2607 $self->{"stat_user_$sum_type"}{$id}{$k}{hits} += $hits_tmp{$tmp}; 2608 } 2609 my %bytes_tmp = split(/[:,]/, $bytes); 2610 foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) { 2611 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2612 $self->{"stat_user_$sum_type"}{$id}{$k}{bytes} += $bytes_tmp{$tmp}; 2613 } 2614 my %duration_tmp = split(/[:,]/, $duration); 2615 foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) { 2616 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2617 $self->{"stat_user_$sum_type"}{$id}{$k}{duration} += $duration_tmp{$tmp}; 2618 } 2619 } else { 2620 print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_user.dat:\n"; 2621 print STDERR "$l\n"; 2622 if ($error > $self->{MaxFormatError}) { 2623 unlink($self->{pidfile}); 2624 exit 0; 2625 } 2626 $error++; 2627 } 2628 $i++; 2629 } 2630 $dat_file_user->close(); 2631 } 2632 } 2633 2634 #### Read previous url statistics 2635 if ($self->{UrlReport}) { 2636 2637 if (!$kind || ($kind eq 'stat_user_url')) { 2638 my $dat_file_user_url = new IO::File; 2639 if ($dat_file_user_url->open("$self->{Output}/$path/stat_user_url.dat")) { 2640 my $i = 1; 2641 my $error = 0; 2642 while (my $l = <$dat_file_user_url>) { 2643 chomp($l); 2644 my $id = ''; 2645 if ($l =~ /^([^\s]+)\s+hits=/) { 2646 $id = $1; 2647 } 2648 $id = '-' if (!$self->{UserReport}); 2649 2650 if ($self->{rebuild}) { 2651 next if (!$self->check_inclusions($id)); 2652 next if ($self->check_exclusions($id)); 2653 } 2654 2655 # Anonymize all users 2656 if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) { 2657 if (!exists $self->{AnonymizedId}{$id}) { 2658 $self->{AnonymizedId}{$id} = &anonymize_id(); 2659 } 2660 $id = $self->{AnonymizedId}{$id}; 2661 } 2662 2663 if ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*?);cache_hit=(\d*);cache_bytes=(\d*)//) { 2664 my $url = $7; 2665 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2; 2666 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3; 2667 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4); 2668 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} = $5 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} || ($5 < $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit})); 2669 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} = $6 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} || ($6 > $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit})); 2670 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{cache_hit} += $8; 2671 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{cache_bytes} += $9; 2672 if ($self->{rebuild}) { 2673 if ($self->check_exclusions('', '', $url)) { 2674 delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"}; 2675 next; 2676 } 2677 } 2678 if ($l =~ s/^;arr_last=(.*)//) { 2679 my $incr = 1800; 2680 $incr = 300 if ($sum_type eq 'hour'); 2681 $incr = 86400 if ($sum_type eq 'month'); 2682 foreach my $tm (split(/,/, $1)) { 2683 if (!exists $self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last} || ($#{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}} < 9) || ($tm > ${$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}[-1] + $incr)) { 2684 push(@{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}, $tm); 2685 shift(@{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}) if ($#{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}} > 9); 2686 } 2687 } 2688 } 2689 } elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*)$//) { 2690 my $url = $7; 2691 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2; 2692 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3; 2693 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4); 2694 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} = $5 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} || ($5 < $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit})); 2695 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} = $6 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} || ($6 > $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit})); 2696 if ($self->{rebuild}) { 2697 if ($self->check_exclusions('', '', $url)) { 2698 delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"}; 2699 next; 2700 } 2701 } 2702 } elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);url=(.*)$//) { 2703 my $url = $5; 2704 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2; 2705 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3; 2706 $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4); 2707 if ($self->{rebuild}) { 2708 if ($self->check_exclusions('', '', $url)) { 2709 delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"}; 2710 next; 2711 } 2712 } 2713 } else { 2714 print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_user_url.dat\n"; 2715 print STDERR "$l\n"; 2716 if ($error > $self->{MaxFormatError}) { 2717 unlink($self->{pidfile}); 2718 exit 0; 2719 } 2720 $error++; 2721 } 2722 $i++; 2723 } 2724 $dat_file_user_url->close(); 2725 } 2726 } 2727 2728 if (!$kind || ($kind eq 'stat_denied_url')) { 2729 2730 my $dat_file_denied_url = new IO::File; 2731 if ($dat_file_denied_url->open("$self->{Output}/$path/stat_denied_url.dat")) { 2732 my $i = 1; 2733 my $error = 0; 2734 while (my $l = <$dat_file_denied_url>) { 2735 chomp($l); 2736 my $id = ''; 2737 if ($l =~ /^([^\s]+)\s+hits=/) { 2738 $id = $1; 2739 } 2740 $id = '-' if (!$self->{UserReport}); 2741 2742 if ($self->{rebuild}) { 2743 next if (!$self->check_inclusions($id)); 2744 next if ($self->check_exclusions($id)); 2745 } 2746 2747 # Anonymize all denieds 2748 if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) { 2749 if (!exists $self->{AnonymizedId}{$id}) { 2750 $self->{AnonymizedId}{$id} = &anonymize_id(); 2751 } 2752 $id = $self->{AnonymizedId}{$id}; 2753 } 2754 2755 if ($l =~ s/^([^\s]+)\s+hits=(\d+);first=([^;]*);last=([^;]*);url=(.*);blacklist=(.*)//) { 2756 if ($self->{rebuild}) { 2757 next if ($self->check_exclusions('', '', $5)); 2758 } 2759 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{hits} += $2; 2760 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} = $3 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} || ($3 < $self->{"stat_denied_url_$sum_type"}{$id}{"$7"}{firsthit})); 2761 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} = $4 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} || ($4 > $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit})); 2762 if ($6) { 2763 my %tmp = split(/,/, $6); 2764 foreach my $k (keys %tmp) { 2765 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{blacklist}{$k} += $tmp{$k}; 2766 } 2767 } 2768 } elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);first=([^;]*);last=([^;]*);url=(.*)//) { 2769 if ($self->{rebuild}) { 2770 next if ($self->check_exclusions('', '', $5)); 2771 } 2772 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{hits} += $2; 2773 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} = $3 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} || ($3 < $self->{"stat_denied_url_$sum_type"}{$id}{"$7"}{firsthit})); 2774 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} = $4 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} || ($4 > $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit})); 2775 } elsif ($l =~ /^([^\s]+)\s+hits=;first=;last=;url=/) { 2776 # do nothing, this should not appears, but fixes issue #81 2777 } else { 2778 print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_denied_url.dat\n"; 2779 print STDERR "$l\n"; 2780 if ($error > $self->{MaxFormatError}) { 2781 unlink($self->{pidfile}); 2782 exit 0; 2783 } 2784 $error++; 2785 } 2786 $i++; 2787 } 2788 $dat_file_denied_url->close(); 2789 } 2790 } 2791 2792 } 2793 2794 #### Read previous network statistics 2795 if (!$kind || ($kind eq 'stat_network')) { 2796 my $dat_file_network = new IO::File; 2797 if ($dat_file_network->open("$self->{Output}/$path/stat_network.dat")) { 2798 my $i = 1; 2799 my $error = 0; 2800 while (my $l = <$dat_file_network>) { 2801 chomp($l); 2802 my ($net, $data) = split(/\t/, $l); 2803 if (!$data) { 2804 # Assume backward compatibility 2805 $l =~ s/^(.*)\shits_$type=/hits_$type=/; 2806 $net = $1; 2807 $data = $l; 2808 } 2809 2810 if ($self->{rebuild} && !exists $self->{NetworkAlias}->{$net}) { 2811 next if (!$self->check_inclusions('', $net)); 2812 next if ($self->check_exclusions('', $net)); 2813 2814 } 2815 2816 if ($self->{UpdateAlias}) { 2817 # Replace network by his aliases if any 2818 $net = (!$self->{has_nework_alias}) ? $net : $self->apply_network_alias($net) 2819 } 2820 2821 if ($data =~ s/^hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) { 2822 my $hits = $1 || ''; 2823 my $bytes = $2 || ''; 2824 my $duration = $3 || ''; 2825 2826 if ($4 > $self->{"stat_netmax_$sum_type"}{$net}{largest_file_size}) { 2827 $self->{"stat_netmax_$sum_type"}{$net}{largest_file_size} = $4; 2828 $self->{"stat_netmax_$sum_type"}{$net}{largest_file_url} = $5; 2829 } 2830 $hits =~ s/,$//; 2831 $bytes =~ s/,$//; 2832 $duration =~ s/,$//; 2833 my %hits_tmp = split(/[:,]/, $hits); 2834 foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) { 2835 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2836 $self->{"stat_network_$sum_type"}{$net}{$k}{hits} += $hits_tmp{$tmp}; 2837 } 2838 my %bytes_tmp = split(/[:,]/, $bytes); 2839 foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) { 2840 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2841 $self->{"stat_network_$sum_type"}{$net}{$k}{bytes} += $bytes_tmp{$tmp}; 2842 } 2843 my %duration_tmp = split(/[:,]/, $duration); 2844 foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) { 2845 if ($key ne '') { $k = $key; } else { $k = $tmp; } 2846 $self->{"stat_network_$sum_type"}{$net}{$k}{duration} += $duration_tmp{$tmp}; 2847 } 2848 } else { 2849 print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_network.dat\n"; 2850 print STDERR "$l\n"; 2851 if ($error > $self->{MaxFormatError}) { 2852 unlink($self->{pidfile}); 2853 exit 0; 2854 } 2855 $error++; 2856 } 2857 $i++; 2858 } 2859 $dat_file_network->close(); 2860 } 2861 } 2862 2863 #### Read previous user per network statistics 2864 if ($self->{UserReport}) { 2865 if (!$kind || ($kind eq 'stat_netuser')) { 2866 my $dat_file_netuser = new IO::File; 2867 if ($dat_file_netuser->open("$self->{Output}/$path/stat_netuser.dat")) { 2868 my $i = 1; 2869 my $error = 0; 2870 while (my $l = <$dat_file_netuser>) { 2871 chomp($l); 2872 my ($net, $id, $data) = split(/\t/, $l); 2873 if (!$data) { 2874 # Assume backward compatibility 2875 $l =~ s/^(.*)\s([^\s]+)\shits=/hits=/; 2876 $net = $1; 2877 $id = $2; 2878 $data = $l; 2879 } 2880 2881 if ($self->{rebuild}) { 2882 next if (!$self->check_inclusions($id, $net)); 2883 next if ($self->check_exclusions($id, $net)); 2884 } 2885 2886 # Replace network by his aliases if any 2887 $net = (!$self->{has_nework_alias}) ? $net : $self->apply_network_alias($net); 2888 2889 # Anonymize all users 2890 if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) { 2891 if (!exists $self->{AnonymizedId}{$id}) { 2892 $self->{AnonymizedId}{$id} = &anonymize_id(); 2893 } 2894 $id = $self->{AnonymizedId}{$id}; 2895 } 2896 2897 if ($data =~ s/^hits=(\d+);bytes=(\d+);duration=([\-\d]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) { 2898 $self->{"stat_netuser_$sum_type"}{$net}{$id}{hits} += $1; 2899 $self->{"stat_netuser_$sum_type"}{$net}{$id}{bytes} += $2; 2900 $self->{"stat_netuser_$sum_type"}{$net}{$id}{duration} += abs($3); 2901 if ($4 > $self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_size}) { 2902 $self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_size} = $4; 2903 $self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_url} = $5; 2904