"Fossies" - the Fresh Open Source Software Archive

Member "squidanalyzer-6.6/SquidAnalyzer.pm" (7 May 2017, 239262 Bytes) of package /linux/privat/squidanalyzer-6.6.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "SquidAnalyzer.pm" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 6.5_vs_6.6.

A hint: This file contains one or more very long lines, so maybe it is better readable using the pure text view mode that shows the contents as wrapped lines within the browser window.


    1 package SquidAnalyzer;
    2 #------------------------------------------------------------------------------
    3 # Project  : Squid Log Analyzer
    4 # Name     : SquidAnalyzer.pm
    5 # Language : Perl 5
    6 # OS       : All
    7 # Copyright: Copyright (c) 2001-2017 Gilles Darold - All rights reserved.
    8 # Licence  : This program is free software; you can redistribute it
    9 #            and/or modify it under the same terms as Perl itself.
   10 # Author   : Gilles Darold, gilles _AT_ darold _DOT_ net
   11 # Function : Main perl module for Squid Log Analyzer
   12 # Usage    : See documentation.
   13 #------------------------------------------------------------------------------
   14 use strict qw/vars/;
   15 
   16 BEGIN {
   17     use Exporter();
   18     use vars qw($VERSION $COPYRIGHT $AUTHOR @ISA @EXPORT $ZCAT_PROG $BZCAT_PROG $XZCAT_PROG $RM_PROG);
   19     use POSIX qw/ strftime sys_wait_h /;
   20     use IO::File;
   21     use Socket ();
   22     use Time::HiRes qw/ualarm/;
   23     use Time::Local qw/timelocal_nocheck timegm_nocheck/;
   24     use Fcntl qw(:flock);
   25     use IO::Handle;
   26     use FileHandle;
   27     use POSIX qw(locale_h);
   28     setlocale(LC_NUMERIC, '');
   29     setlocale(LC_ALL,     'C');
   30 
   31     # Set all internal variable
   32     $VERSION = '6.6';
   33     $COPYRIGHT = 'Copyright (c) 2001-2017 Gilles Darold - All rights reserved.';
   34     $AUTHOR = "Gilles Darold - gilles _AT_ darold _DOT_ net";
   35 
   36     @ISA = qw(Exporter);
   37     @EXPORT = qw//;
   38 
   39     $| = 1;
   40 
   41 }
   42 
   43 $ZCAT_PROG = "/bin/zcat";
   44 $BZCAT_PROG = "/bin/bzcat";
   45 $RM_PROG    = "/bin/rm";
   46 $XZCAT_PROG = "/usr/bin/xzcat";
   47 
   48 # DNS Cache
   49 my %CACHE = ();
   50 
   51 # Color used to draw grpahs
   52 my @GRAPH_COLORS = ('#6e9dc9', '#f4ab3a', '#ac7fa8', '#8dbd0f');
   53 
   54 # Default translation srings
   55 my %Translate = (
   56     'CharSet' => 'utf-8',
   57     '01' => 'Jan',
   58     '02' => 'Feb',
   59     '03' => 'Mar',
   60     '04' => 'Apr',
   61     '05' => 'May',
   62     '06' => 'Jun',
   63     '07' => 'Jul',
   64     '08' => 'Aug',
   65     '09' => 'Sep',
   66     '10' => 'Oct',
   67     '11' => 'Nov',
   68     '12' => 'Dec',
   69     'KB' => 'Kilo bytes',
   70     'MB' => 'Mega bytes',
   71     'GB' => 'Giga bytes',
   72     'Bytes' => 'Bytes',
   73     'Total' => 'Total',
   74     'Years' => 'Years',
   75     'Users' => 'Users',
   76     'Sites' => 'Sites',
   77     'Cost' => 'Cost',
   78     'Requests' => 'Requests',
   79     'Megabytes' => 'Mega bytes',
   80     'Months' => 'Months',
   81     'Days' => 'Days',
   82     'Hit' => 'Hit',
   83     'Miss' => 'Miss',
   84     'Denied' => 'Denied',
   85     'Domains' => 'Domains',
   86     'Requests_graph' => 'Requests',
   87     'Megabytes_graph' => 'Mega bytes',
   88     'Months_graph' => 'Months',
   89     'Days_graph' => 'Days',
   90     'Hit_graph' => 'Hit',
   91     'Miss_graph' => 'Miss',
   92     'Denied_graph' => 'Denied',
   93     'Total_graph' => 'Total',
   94     'Domains_graph' => 'Domains',
   95     'Users_help' => 'Total number of different users for this period',
   96     'Sites_help' => 'Total number of different visited sites for this period',
   97     'Domains_help' => 'Total number of different second level visited domain for this period',
   98     'Hit_help' => 'Objects found in cache',
   99     'Miss_help' => 'Objects not found in cache',
  100     'Denied_help' => 'Objects with denied access',
  101     'Cost_help' => '1 Mega byte =',
  102     'Generation' => 'Report generated on',
  103     'Main_cache_title' => 'Cache Statistics',
  104     'Cache_title' => 'Cache Statistics on',
  105     'Stat_label' => 'Stat',
  106     'Mime_link' => 'Mime Types',
  107     'Network_link' => 'Networks',
  108     'User_link' => 'Users',
  109     'Top_url_link' => 'Top Urls',
  110     'Top_domain_link' => 'Top Domains',
  111     'Back_link' => 'Back',
  112     'Graph_cache_hit_title' => '%s Requests statistics on',
  113     'Graph_cache_byte_title' => '%s Mega Bytes statistics on',
  114     'Hourly' => 'Hourly',
  115     'Hours' => 'Hours',
  116     'Daily' => 'Daily',
  117     'Days' => 'Days',
  118     'Monthly' => 'Monthly',
  119     'Months' => 'Months',
  120     'Mime_title' => 'Mime Type Statistics on',
  121     'Mime_number' => 'Number of mime type',
  122     'Network_title' => 'Network Statistics on',
  123     'Network_number' => 'Number of network',
  124     'Duration' => 'Duration',
  125     'Time' => 'Time',
  126     'Largest' => 'Largest',
  127     'Url' => 'Url',
  128     'User_title' => 'User Statistics on',
  129     'User_number' => 'Number of user',
  130     'Url_title' => 'Top %d site on',
  131     'Url_Hits_title' => 'Top %d Url hits on',
  132     'Url_Bytes_title' => 'Top %d Url bytes on',
  133     'Url_Duration_title' => 'Top %d Url duration on',
  134     'Url_number' => 'Number of Url',
  135     'Domain_Hits_title' => 'Top %d Domain hits on',
  136     'Domain_Bytes_title' => 'Top %d Domain bytes on',
  137     'Domain_Duration_title' => 'Top %d Domain duration on',
  138     'Domain_number' => 'Number of domain',
  139     'Domain_graph_hits_title' => 'Domain Hits Statistics on',
  140     'Domain_graph_bytes_title' => 'Domain Bytes Statistics on',
  141     'Second_domain_graph_hits_title' => 'Second level Hits Statistics on',
  142     'Second_domain_graph_bytes_title' => 'Second level Bytes Statistics on',
  143     'First_visit' => 'First visit',
  144     'Last_visit' => 'Last visit',
  145     'Globals_Statistics' => 'Globals Statistics',
  146     'Legend' => 'Legend',
  147     'File_Generated' => 'File generated by',
  148     'Up_link' => 'Up',
  149     'Click_year_stat' => 'Click on year\'s statistics link for details',
  150     'Mime_graph_hits_title' => 'Mime Type Hits Statistics on',
  151     'Mime_graph_bytes_title' => 'Mime Type MBytes Statistics on',
  152     'User' => 'User',
  153     'Count' => 'Count',
  154     'WeekDay' => 'Su Mo Tu We Th Fr Sa',
  155     'Week' => 'Week',
  156     'Top_denied_link' => 'Top Denied',
  157     'Blocklist_acl_title' => 'Blocklist ACL use',
  158     'Throughput' => 'Throughput',
  159     'Graph_throughput_title' => '%s throughput on',
  160     'Throughput_graph' => 'Bytes/sec',
  161 );
  162 
  163 my @TLD1 = (
  164     '\.com\.ac','\.net\.ac','\.gov\.ac','\.org\.ac','\.mil\.ac','\.co\.ae',
  165     '\.net\.ae','\.gov\.ae','\.ac\.ae','\.sch\.ae','\.org\.ae','\.mil\.ae','\.pro\.ae',
  166     '\.name\.ae','\.com\.af','\.edu\.af','\.gov\.af','\.net\.af','\.org\.af','\.com\.al',
  167     '\.edu\.al','\.gov\.al','\.mil\.al','\.net\.al','\.org\.al','\.ed\.ao','\.gv\.ao',
  168     '\.og\.ao','\.co\.ao','\.pb\.ao','\.it\.ao','\.com\.ar','\.edu\.ar','\.gob\.ar',
  169     '\.gov\.ar','\.gov\.ar','\.int\.ar','\.mil\.ar','\.net\.ar','\.org\.ar','\.tur\.ar',
  170     '\.gv\.at','\.ac\.at','\.co\.at','\.or\.at','\.com\.au','\.net\.au','\.org\.au',
  171     '\.edu\.au','\.gov\.au','\.csiro\.au','\.asn\.au','\.id\.au','\.org\.ba','\.net\.ba',
  172     '\.edu\.ba','\.gov\.ba','\.mil\.ba','\.unsa\.ba','\.untz\.ba','\.unmo\.ba','\.unbi\.ba',
  173     '\.unze\.ba','\.co\.ba','\.com\.ba','\.rs\.ba','\.co\.bb','\.com\.bb','\.net\.bb',
  174     '\.org\.bb','\.gov\.bb','\.edu\.bb','\.info\.bb','\.store\.bb','\.tv\.bb','\.biz\.bb',
  175     '\.com\.bh','\.info\.bh','\.cc\.bh','\.edu\.bh','\.biz\.bh','\.net\.bh','\.org\.bh',
  176     '\.gov\.bh','\.com\.bn','\.edu\.bn','\.gov\.bn','\.net\.bn','\.org\.bn','\.com\.bo',
  177     '\.net\.bo','\.org\.bo','\.tv\.bo','\.mil\.bo','\.int\.bo','\.gob\.bo','\.gov\.bo',
  178     '\.edu\.bo','\.adm\.br','\.adv\.br','\.agr\.br','\.am\.br','\.arq\.br','\.art\.br',
  179     '\.ato\.br','\.b\.br','\.bio\.br','\.blog\.br','\.bmd\.br','\.cim\.br','\.cng\.br',
  180     '\.cnt\.br','\.com\.br','\.coop\.br','\.ecn\.br','\.edu\.br','\.eng\.br','\.esp\.br',
  181     '\.etc\.br','\.eti\.br','\.far\.br','\.flog\.br','\.fm\.br','\.fnd\.br','\.fot\.br',
  182     '\.fst\.br','\.g12\.br','\.ggf\.br','\.gov\.br','\.imb\.br','\.ind\.br','\.inf\.br',
  183     '\.jor\.br','\.jus\.br','\.lel\.br','\.mat\.br','\.med\.br','\.mil\.br','\.mus\.br',
  184     '\.net\.br','\.nom\.br','\.not\.br','\.ntr\.br','\.odo\.br','\.org\.br','\.ppg\.br',
  185     '\.pro\.br','\.psc\.br','\.psi\.br','\.qsl\.br','\.rec\.br','\.slg\.br','\.srv\.br',
  186     '\.tmp\.br','\.trd\.br','\.tur\.br','\.tv\.br','\.vet\.br','\.vlog\.br','\.wiki\.br',
  187     '\.zlg\.br','\.com\.bs','\.net\.bs','\.org\.bs','\.edu\.bs','\.gov\.bs','com\.bz',
  188     'edu\.bz','gov\.bz','net\.bz','org\.bz','\.ab\.ca','\.bc\.ca','\.mb\.ca','\.nb\.ca',
  189     '\.nf\.ca','\.nl\.ca','\.ns\.ca','\.nt\.ca','\.nu\.ca','\.on\.ca','\.pe\.ca','\.qc\.ca',
  190     '\.sk\.ca','\.yk\.ca','\.co\.ck','\.org\.ck','\.edu\.ck','\.gov\.ck','\.net\.ck',
  191     '\.gen\.ck','\.biz\.ck','\.info\.ck','\.ac\.cn','\.com\.cn','\.edu\.cn','\.gov\.cn',
  192     '\.mil\.cn','\.net\.cn','\.org\.cn','\.ah\.cn','\.bj\.cn','\.cq\.cn','\.fj\.cn','\.gd\.cn',
  193     '\.gs\.cn','\.gz\.cn','\.gx\.cn','\.ha\.cn','\.hb\.cn','\.he\.cn','\.hi\.cn','\.hl\.cn',
  194     '\.hn\.cn','\.jl\.cn','\.js\.cn','\.jx\.cn','\.ln\.cn','\.nm\.cn','\.nx\.cn','\.qh\.cn',
  195     '\.sc\.cn','\.sd\.cn','\.sh\.cn','\.sn\.cn','\.sx\.cn','\.tj\.cn','\.tw\.cn','\.xj\.cn',
  196     '\.xz\.cn','\.yn\.cn','\.zj\.cn','\.com\.co','\.org\.co','\.edu\.co','\.gov\.co',
  197     '\.net\.co','\.mil\.co','\.nom\.co','\.ac\.cr','\.co\.cr','\.ed\.cr','\.fi\.cr','\.go\.cr',
  198     '\.com\.cu','\.edu\.cu','\.gov\.cu','\.net\.cu','\.org\.cu',
  199     '\.or\.cr','\.sa\.cr','\.cr','\.ac\.cy','\.net\.cy','\.gov\.cy','\.org\.cy',
  200     '\.pro\.cy','\.name\.cy','\.ekloges\.cy','\.tm\.cy','\.ltd\.cy','\.biz\.cy','\.press\.cy',
  201     '\.parliament\.cy','\.com\.cy','\.edu\.do','\.gob\.do','\.gov\.do','\.com\.do','\.sld\.do',
  202     '\.org\.do','\.net\.do','\.web\.do','\.mil\.do','\.art\.do','\.com\.dz','\.org\.dz',
  203     '\.net\.dz','\.gov\.dz','\.edu\.dz','\.asso\.dz','\.pol\.dz','\.art\.dz','\.com\.ec',
  204     '\.info\.ec','\.net\.ec','\.fin\.ec','\.med\.ec','\.pro\.ec','\.org\.ec','\.edu\.ec',
  205     '\.gov\.ec','\.mil\.ec','\.com\.eg','\.edu\.eg','\.eun\.eg','\.gov\.eg','\.mil\.eg',
  206     '\.name\.eg','\.net\.eg','\.org\.eg','\.sci\.eg','\.com\.er','\.edu\.er','\.gov\.er',
  207     '\.mil\.er','\.net\.er','\.org\.er','\.ind\.er','\.rochest\.er','\.w\.er','\.com\.es',
  208     '\.nom\.es','\.org\.es','\.gob\.es','\.edu\.es','\.com\.et','\.gov\.et','\.org\.et',
  209     '\.edu\.et','\.net\.et','\.biz\.et','\.name\.et','\.info\.et','\.ac\.fj','\.biz\.fj',
  210     '\.com\.fj','\.info\.fj','\.mil\.fj','\.name\.fj','\.net\.fj','\.org\.fj','\.pro\.fj',
  211     '\.co\.fk','\.org\.fk','\.gov\.fk','\.ac\.fk','\.nom\.fk','\.net\.fk','\.fr','\.tm\.fr',
  212     '\.asso\.fr','\.nom\.fr','\.prd\.fr','\.presse\.fr','\.com\.fr','\.gouv\.fr','\.co\.gg',
  213     '\.net\.gg','\.org\.gg','\.com\.gh','\.edu\.gh','\.gov\.gh','\.org\.gh','\.mil\.gh',
  214     '\.com\.gn','\.ac\.gn','\.gov\.gn','\.org\.gn','\.net\.gn','\.com\.gr','\.edu\.gr','\.net\.gr',
  215     '\.org\.gr','\.gov\.gr','\.mil\.gr','\.com\.gt','\.edu\.gt','\.net\.gt','\.gob\.gt',
  216     '\.org\.gt','\.mil\.gt','\.ind\.gt','\.com\.gu','\.net\.gu','\.gov\.gu','\.org\.gu','\.edu\.gu',
  217     '\.com\.hk','\.edu\.hk','\.gov\.hk','\.idv\.hk','\.net\.hk','\.org\.hk','\.ac\.id','\.co\.id',
  218     '\.net\.id','\.or\.id','\.web\.id','\.sch\.id','\.mil\.id','\.go\.id','\.war\.net\.id','\.ac\.il',
  219     '\.co\.il','\.org\.il','\.net\.il','\.k12\.il','\.gov\.il','\.muni\.il','\.idf\.il','\.in',
  220     '\.co\.in','\.firm\.in','\.net\.in','\.org\.in','\.gen\.in','\.ind\.in','\.ac\.in','\.edu\.in',
  221     '\.res\.in','\.ernet\.in','\.gov\.in','\.mil\.in','\.nic\.in','\.nic\.in','\.iq','\.gov\.iq',
  222     '\.edu\.iq','\.com\.iq','\.mil\.iq','\.org\.iq','\.net\.iq','\.ir','\.ac\.ir','\.co\.ir',
  223     '\.gov\.ir','\.id\.ir','\.net\.ir','\.org\.ir','\.sch\.ir','\.dnssec\.ir','\.gov\.it',
  224     '\.edu\.it','\.co\.je','\.net\.je','\.org\.je','\.com\.jo','\.net\.jo','\.gov\.jo','\.edu\.jo',
  225     '\.org\.jo','\.mil\.jo','\.name\.jo','\.sch\.jo','\.ac\.jp','\.ad\.jp','\.co\.jp','\.ed\.jp',
  226     '\.go\.jp','\.gr\.jp','\.lg\.jp','\.ne\.jp','\.or\.jp','\.co\.ke','\.or\.ke','\.ne\.ke','\.go\.ke',
  227     '\.ac\.ke','\.sc\.ke','\.me\.ke','\.mobi\.ke','\.info\.ke','\.per\.kh','\.com\.kh','\.edu\.kh',
  228     '\.gov\.kh','\.mil\.kh','\.net\.kh','\.org\.kh','\.com\.ki','\.biz\.ki','\.de\.ki','\.net\.ki',
  229     '\.info\.ki','\.org\.ki','\.gov\.ki','\.edu\.ki','\.mob\.ki','\.tel\.ki','\.km','\.com\.km',
  230     '\.coop\.km','\.asso\.km','\.nom\.km','\.presse\.km','\.tm\.km','\.medecin\.km','\.notaires\.km',
  231     '\.pharmaciens\.km','\.veterinaire\.km','\.edu\.km','\.gouv\.km','\.mil\.km','\.net\.kn',
  232     '\.org\.kn','\.edu\.kn','\.gov\.kn','\.kr','\.co\.kr','\.ne\.kr','\.or\.kr','\.re\.kr','\.pe\.kr',
  233     '\.go\.kr','\.mil\.kr','\.ac\.kr','\.hs\.kr','\.ms\.kr','\.es\.kr','\.sc\.kr','\.kg\.kr',
  234     '\.seoul\.kr','\.busan\.kr','\.daegu\.kr','\.incheon\.kr','\.gwangju\.kr','\.daejeon\.kr',
  235     '\.ulsan\.kr','\.gyeonggi\.kr','\.gangwon\.kr','\.chungbuk\.kr','\.chungnam\.kr','\.jeonbuk\.kr',
  236     '\.jeonnam\.kr','\.gyeongbuk\.kr','\.gyeongnam\.kr','\.jeju\.kr','\.edu\.kw','\.com\.kw',
  237     '\.net\.kw','\.org\.kw','\.gov\.kw','\.com\.ky','\.org\.ky','\.net\.ky','\.edu\.ky','\.gov\.ky',
  238     '\.com\.kz','\.edu\.kz','\.gov\.kz','\.mil\.kz','\.net\.kz','\.org\.kz','\.com\.lb','\.edu\.lb',
  239     '\.gov\.lb','\.net\.lb','\.org\.lb','\.gov\.lk','\.sch\.lk','\.net\.lk','\.int\.lk','\.com\.lk',
  240     '\.org\.lk','\.edu\.lk','\.ngo\.lk','\.soc\.lk','\.web\.lk','\.ltd\.lk','\.assn\.lk','\.grp\.lk',
  241     '\.hotel\.lk','\.com\.lr','\.edu\.lr','\.gov\.lr','\.org\.lr','\.net\.lr','\.com\.lv','\.edu\.lv',
  242     '\.gov\.lv','\.org\.lv','\.mil\.lv','\.id\.lv','\.net\.lv','\.asn\.lv','\.conf\.lv','\.com\.ly',
  243     '\.net\.ly','\.gov\.ly','\.plc\.ly','\.edu\.ly','\.sch\.ly','\.med\.ly','\.org\.ly','\.id\.ly',
  244     '\.ma','\.net\.ma','\.ac\.ma','\.org\.ma','\.gov\.ma','\.press\.ma','\.co\.ma','\.tm\.mc',
  245     '\.asso\.mc','\.co\.me','\.net\.me','\.org\.me','\.edu\.me','\.ac\.me','\.gov\.me','\.its\.me',
  246     '\.priv\.me','\.org\.mg','\.nom\.mg','\.gov\.mg','\.prd\.mg','\.tm\.mg','\.edu\.mg','\.mil\.mg',
  247     '\.com\.mg','\.com\.mk','\.org\.mk','\.net\.mk','\.edu\.mk','\.gov\.mk','\.inf\.mk','\.name\.mk',
  248     '\.pro\.mk','\.com\.ml','\.net\.ml','\.org\.ml','\.edu\.ml','\.gov\.ml','\.presse\.ml','\.gov\.mn',
  249     '\.edu\.mn','\.org\.mn','\.com\.mo','\.edu\.mo','\.gov\.mo','\.net\.mo','\.org\.mo','\.com\.mt',
  250     '\.org\.mt','\.net\.mt','\.edu\.mt','\.gov\.mt','\.aero\.mv','\.biz\.mv','\.com\.mv','\.coop\.mv',
  251     '\.edu\.mv','\.gov\.mv','\.info\.mv','\.int\.mv','\.mil\.mv','\.museum\.mv','\.name\.mv','\.net\.mv',
  252     '\.org\.mv','\.pro\.mv','\.ac\.mw','\.co\.mw','\.com\.mw','\.coop\.mw','\.edu\.mw','\.gov\.mw',
  253     '\.int\.mw','\.museum\.mw','\.net\.mw','\.org\.mw','\.com\.mx','\.net\.mx','\.org\.mx','\.edu\.mx',
  254     '\.gob\.mx','\.com\.my','\.net\.my','\.org\.my','\.gov\.my','\.edu\.my','\.sch\.my','\.mil\.my',
  255     '\.name\.my','\.com\.nf','\.net\.nf','\.arts\.nf','\.store\.nf','\.web\.nf','\.firm\.nf',
  256     '\.info\.nf','\.other\.nf','\.per\.nf','\.rec\.nf','\.com\.ng','\.org\.ng','\.gov\.ng','\.edu\.ng',
  257     '\.net\.ng','\.sch\.ng','\.name\.ng','\.mobi\.ng','\.biz\.ng','\.mil\.ng','\.gob\.ni','\.co\.ni',
  258     '\.com\.ni','\.ac\.ni','\.edu\.ni','\.org\.ni','\.nom\.ni','\.net\.ni','\.mil\.ni','\.com\.np',
  259     '\.edu\.np','\.gov\.np','\.org\.np','\.mil\.np','\.net\.np','\.edu\.nr','\.gov\.nr','\.biz\.nr',
  260     '\.info\.nr','\.net\.nr','\.org\.nr','\.com\.nr','\.com\.om','\.co\.om','\.edu\.om','\.ac\.om',
  261     '\.sch\.om','\.gov\.om','\.net\.om','\.org\.om','\.mil\.om','\.museum\.om','\.biz\.om','\.pro\.om',
  262     '\.med\.om','\.edu\.pe','\.gob\.pe','\.nom\.pe','\.mil\.pe','\.sld\.pe','\.org\.pe','\.com\.pe',
  263     '\.net\.pe','\.com\.ph','\.net\.ph','\.org\.ph','\.mil\.ph','\.ngo\.ph','\.i\.ph','\.gov\.ph',
  264     '\.edu\.ph','\.com\.pk','\.net\.pk','\.edu\.pk','\.org\.pk','\.fam\.pk','\.biz\.pk','\.web\.pk',
  265     '\.gov\.pk','\.gob\.pk','\.gok\.pk','\.gon\.pk','\.gop\.pk','\.gos\.pk','\.pwr\.pl','\.com\.pl',
  266     '\.biz\.pl','\.net\.pl','\.art\.pl','\.edu\.pl','\.org\.pl','\.ngo\.pl','\.gov\.pl','\.info\.pl',
  267     '\.mil\.pl','\.waw\.pl','\.warszawa\.pl','\.wroc\.pl','\.wroclaw\.pl','\.krakow\.pl','\.katowice\.pl',
  268     '\.poznan\.pl','\.lodz\.pl','\.gda\.pl','\.gdansk\.pl','\.slupsk\.pl','\.radom\.pl','\.szczecin\.pl',
  269     '\.lublin\.pl','\.bialystok\.pl','\.olsztyn\.pl','\.torun\.pl','\.gorzow\.pl','\.zgora\.pl',
  270     '\.biz\.pr','\.com\.pr','\.edu\.pr','\.gov\.pr','\.info\.pr','\.isla\.pr','\.name\.pr','\.net\.pr',
  271     '\.org\.pr','\.pro\.pr','\.est\.pr','\.prof\.pr','\.ac\.pr','\.com\.ps','\.net\.ps','\.org\.ps',
  272     '\.edu\.ps','\.gov\.ps','\.plo\.ps','\.sec\.ps','\.co\.pw','\.ne\.pw','\.or\.pw','\.ed\.pw','\.go\.pw',
  273     '\.belau\.pw','\.arts\.ro','\.com\.ro','\.firm\.ro','\.info\.ro','\.nom\.ro','\.nt\.ro','\.org\.ro',
  274     '\.rec\.ro','\.store\.ro','\.tm\.ro','\.www\.ro','\.co\.rs','\.org\.rs','\.edu\.rs','\.ac\.rs',
  275     '\.gov\.rs','\.in\.rs','\.com\.sb','\.net\.sb','\.edu\.sb','\.org\.sb','\.gov\.sb','\.com\.sc',
  276     '\.net\.sc','\.edu\.sc','\.gov\.sc','\.org\.sc','\.co\.sh','\.com\.sh','\.org\.sh','\.gov\.sh',
  277     '\.edu\.sh','\.net\.sh','\.nom\.sh','\.com\.sl','\.net\.sl','\.org\.sl','\.edu\.sl','\.gov\.sl',
  278     '\.gov\.st','\.saotome\.st','\.principe\.st','\.consulado\.st','\.embaixada\.st','\.org\.st',
  279     '\.edu\.st','\.net\.st','\.com\.st','\.store\.st','\.mil\.st','\.co\.st','\.edu\.sv','\.gob\.sv',
  280     '\.com\.sv','\.org\.sv','\.red\.sv','\.co\.sz','\.ac\.sz','\.org\.sz','\.com\.tr','\.gen\.tr',
  281     '\.org\.tr','\.biz\.tr','\.info\.tr','\.av\.tr','\.dr\.tr','\.pol\.tr','\.bel\.tr','\.tsk\.tr',
  282     '\.bbs\.tr','\.k12\.tr','\.edu\.tr','\.name\.tr','\.net\.tr','\.gov\.tr','\.web\.tr','\.tel\.tr',
  283     '\.tv\.tr','\.co\.tt','\.com\.tt','\.org\.tt','\.net\.tt','\.biz\.tt','\.info\.tt','\.pro\.tt',
  284     '\.int\.tt','\.coop\.tt','\.jobs\.tt','\.mobi\.tt','\.travel\.tt','\.museum\.tt','\.aero\.tt',
  285     '\.cat\.tt','\.tel\.tt','\.name\.tt','\.mil\.tt','\.edu\.tt','\.gov\.tt','\.edu\.tw','\.gov\.tw',
  286     '\.mil\.tw','\.com\.tw','\.net\.tw','\.org\.tw','\.idv\.tw','\.game\.tw','\.ebiz\.tw','\.club\.tw',
  287     '\.com\.mu','\.gov\.mu','\.net\.mu','\.org\.mu','\.ac\.mu','\.co\.mu','\.or\.mu','\.ac\.mz',
  288     '\.co\.mz','\.edu\.mz','\.org\.mz','\.gov\.mz','\.com\.na','\.co\.na','\.ac\.nz','\.co\.nz',
  289     '\.cri\.nz','\.geek\.nz','\.gen\.nz','\.govt\.nz','\.health\.nz','\.iwi\.nz','\.maori\.nz',
  290     '\.mil\.nz','\.net\.nz','\.org\.nz','\.parliament\.nz','\.school\.nz','\.abo\.pa','\.ac\.pa',
  291     '\.com\.pa','\.edu\.pa','\.gob\.pa','\.ing\.pa','\.med\.pa','\.net\.pa','\.nom\.pa','\.org\.pa',
  292     '\.sld\.pa','\.com\.pt','\.edu\.pt','\.gov\.pt','\.int\.pt','\.net\.pt','\.nome\.pt','\.org\.pt',
  293     '\.publ\.pt','\.com\.py','\.edu\.py','\.gov\.py','\.mil\.py','\.net\.py','\.org\.py','\.com\.qa',
  294     '\.edu\.qa','\.gov\.qa','\.mil\.qa','\.net\.qa','\.org\.qa','\.asso\.re','\.com\.re','\.nom\.re',
  295     '\.ac\.ru','\.adygeya\.ru','\.altai\.ru','\.amur\.ru','\.arkhangelsk\.ru','\.astrakhan\.ru',
  296     '\.bashkiria\.ru','\.belgorod\.ru','\.bir\.ru','\.bryansk\.ru','\.buryatia\.ru','\.cbg\.ru',
  297     '\.chel\.ru','\.chelyabinsk\.ru','\.chita\.ru','\.chita\.ru','\.chukotka\.ru','\.chuvashia\.ru',
  298     '\.com\.ru','\.dagestan\.ru','\.e-burg\.ru','\.edu\.ru','\.gov\.ru','\.grozny\.ru','\.int\.ru',
  299     '\.irkutsk\.ru','\.ivanovo\.ru','\.izhevsk\.ru','\.jar\.ru','\.joshkar-ola\.ru','\.kalmykia\.ru',
  300     '\.kaluga\.ru','\.kamchatka\.ru','\.karelia\.ru','\.kazan\.ru','\.kchr\.ru','\.kemerovo\.ru',
  301     '\.khabarovsk\.ru','\.khakassia\.ru','\.khv\.ru','\.kirov\.ru','\.koenig\.ru','\.komi\.ru',
  302     '\.kostroma\.ru','\.kranoyarsk\.ru','\.kuban\.ru','\.kurgan\.ru','\.kursk\.ru','\.lipetsk\.ru',
  303     '\.magadan\.ru','\.mari\.ru','\.mari-el\.ru','\.marine\.ru','\.mil\.ru','\.mordovia\.ru',
  304     '\.mosreg\.ru','\.msk\.ru','\.murmansk\.ru','\.nalchik\.ru','\.net\.ru','\.nnov\.ru','\.nov\.ru',
  305     '\.novosibirsk\.ru','\.nsk\.ru','\.omsk\.ru','\.orenburg\.ru','\.org\.ru','\.oryol\.ru','\.penza\.ru',
  306     '\.perm\.ru','\.pp\.ru','\.pskov\.ru','\.ptz\.ru','\.rnd\.ru','\.ryazan\.ru','\.sakhalin\.ru','\.samara\.ru',
  307     '\.saratov\.ru','\.simbirsk\.ru','\.smolensk\.ru','\.spb\.ru','\.stavropol\.ru','\.stv\.ru',
  308     '\.surgut\.ru','\.tambov\.ru','\.tatarstan\.ru','\.tom\.ru','\.tomsk\.ru','\.tsaritsyn\.ru',
  309     '\.tsk\.ru','\.tula\.ru','\.tuva\.ru','\.tver\.ru','\.tyumen\.ru','\.udm\.ru','\.udmurtia\.ru','\.ulan-ude\.ru',
  310     '\.vladikavkaz\.ru','\.vladimir\.ru','\.vladivostok\.ru','\.volgograd\.ru','\.vologda\.ru',
  311     '\.voronezh\.ru','\.vrn\.ru','\.vyatka\.ru','\.yakutia\.ru','\.yamal\.ru','\.yekaterinburg\.ru',
  312     '\.yuzhno-sakhalinsk\.ru','\.ac\.rw','\.co\.rw','\.com\.rw','\.edu\.rw','\.gouv\.rw','\.gov\.rw',
  313     '\.int\.rw','\.mil\.rw','\.net\.rw','\.com\.sa','\.edu\.sa','\.gov\.sa','\.med\.sa','\.net\.sa',
  314     '\.org\.sa','\.pub\.sa','\.sch\.sa','\.com\.sd','\.edu\.sd','\.gov\.sd','\.info\.sd','\.med\.sd',
  315     '\.net\.sd','\.org\.sd','\.tv\.sd','\.a\.se','\.ac\.se','\.b\.se','\.bd\.se','\.c\.se','\.d\.se',
  316     '\.e\.se','\.f\.se','\.g\.se','\.h\.se','\.i\.se','\.k\.se','\.l\.se','\.m\.se','\.n\.se','\.o\.se',
  317     '\.org\.se','\.p\.se','\.parti\.se','\.pp\.se','\.press\.se','\.r\.se','\.s\.se','\.t\.se','\.tm\.se',
  318     '\.u\.se','\.w\.se','\.x\.se','\.y\.se','\.z\.se','\.com\.sg','\.edu\.sg','\.gov\.sg','\.idn\.sg',
  319     '\.net\.sg','\.org\.sg','\.per\.sg','\.art\.sn','\.com\.sn','\.edu\.sn','\.gouv\.sn','\.org\.sn',
  320     '\.perso\.sn','\.univ\.sn','\.com\.sy','\.edu\.sy','\.gov\.sy','\.mil\.sy','\.net\.sy','\.news\.sy',
  321     '\.org\.sy','\.ac\.th','\.co\.th','\.go\.th','\.in\.th','\.mi\.th','\.net\.th','\.or\.th','\.ac\.tj',
  322     '\.biz\.tj','\.co\.tj','\.com\.tj','\.edu\.tj','\.go\.tj','\.gov\.tj','\.info\.tj','\.int\.tj',
  323     '\.mil\.tj','\.name\.tj','\.net\.tj','\.nic\.tj','\.org\.tj','\.test\.tj','\.web\.tj','\.agrinet\.tn',
  324     '\.com\.tn','\.defense\.tn','\.edunet\.tn','\.ens\.tn','\.fin\.tn','\.gov\.tn','\.ind\.tn','\.info\.tn',
  325     '\.intl\.tn','\.mincom\.tn','\.nat\.tn','\.net\.tn','\.org\.tn','\.perso\.tn','\.rnrt\.tn','\.rns\.tn',
  326     '\.rnu\.tn','\.tourism\.tn','\.ac\.tz','\.co\.tz','\.go\.tz','\.ne\.tz','\.or\.tz','\.biz\.ua',
  327     '\.cherkassy\.ua','\.chernigov\.ua','\.chernovtsy\.ua','\.ck\.ua','\.cn\.ua','\.co\.ua','\.com\.ua',
  328     '\.crimea\.ua','\.cv\.ua','\.dn\.ua','\.dnepropetrovsk\.ua','\.donetsk\.ua','\.dp\.ua','\.edu\.ua',
  329     '\.gov\.ua','\.if\.ua','\.in\.ua','\.ivano-frankivsk\.ua','\.kh\.ua','\.kharkov\.ua','\.kherson\.ua',
  330     '\.khmelnitskiy\.ua','\.kiev\.ua','\.kirovograd\.ua','\.km\.ua','\.kr\.ua','\.ks\.ua','\.kv\.ua',
  331     '\.lg\.ua','\.lugansk\.ua','\.lutsk\.ua','\.lviv\.ua','\.me\.ua','\.mk\.ua','\.net\.ua','\.nikolaev\.ua',
  332     '\.od\.ua','\.odessa\.ua','\.org\.ua','\.pl\.ua','\.poltava\.ua','\.pp\.ua','\.rovno\.ua','\.rv\.ua',
  333     '\.sebastopol\.ua','\.sumy\.ua','\.te\.ua','\.ternopil\.ua','\.uzhgorod\.ua','\.vinnica\.ua','\.vn\.ua',
  334     '\.zaporizhzhe\.ua','\.zhitomir\.ua','\.zp\.ua','\.zt\.ua','\.ac\.ug','\.co\.ug','\.go\.ug','\.ne\.ug',
  335     '\.or\.ug','\.org\.ug','\.sc\.ug','\.ac\.uk','\.bl\.uk','\.british-library\.uk','\.co\.uk','\.cym\.uk',
  336     '\.gov\.uk','\.govt\.uk','\.icnet\.uk','\.jet\.uk','\.lea\.uk','\.ltd\.uk','\.me\.uk','\.mil\.uk',
  337     '\.mod\.uk','\.mod\.uk','\.national-library-scotland\.uk','\.nel\.uk','\.net\.uk','\.nhs\.uk',
  338     '\.nhs\.uk','\.nic\.uk','\.nls\.uk','\.org\.uk','\.orgn\.uk','\.parliament\.uk','\.parliament\.uk',
  339     '\.plc\.uk','\.police\.uk','\.sch\.uk','\.scot\.uk','\.soc\.uk','\.dni\.us','\.fed\.us','\.isa\.us',
  340     '\.kids\.us','\.nsn\.us','\.com\.uy','\.edu\.uy','\.gub\.uy','\.mil\.uy','\.net\.uy','\.org\.uy',
  341     '\.co\.ve','\.com\.ve','\.edu\.ve','\.gob\.ve','\.info\.ve','\.mil\.ve','\.net\.ve','\.org\.ve',
  342     '\.web\.ve','\.co\.vi','\.com\.vi','\.k12\.vi','\.net\.vi','\.org\.vi','\.ac\.vn','\.biz\.vn',
  343     '\.com\.vn','\.edu\.vn','\.gov\.vn','\.health\.vn','\.info\.vn','\.int\.vn','\.name\.vn','\.net\.vn',
  344     '\.org\.vn','\.pro\.vn','\.co\.ye','\.com\.ye','\.gov\.ye','\.ltd\.ye','\.me\.ye','\.net\.ye',
  345     '\.org\.ye','\.plc\.ye','\.ac\.yu','\.co\.yu','\.edu\.yu','\.gov\.yu','\.org\.yu','\.ac\.za',
  346     '\.agric\.za','\.alt\.za','\.bourse\.za','\.city\.za','\.co\.za','\.cybernet\.za','\.db\.za',
  347     '\.ecape\.school\.za','\.edu\.za','\.fs\.school\.za','\.gov\.za','\.gp\.school\.za','\.grondar\.za',
  348     '\.iaccess\.za','\.imt\.za','\.inca\.za','\.kzn\.school\.za','\.landesign\.za','\.law\.za',
  349     '\.lp\.school\.za','\.mil\.za','\.mpm\.school\.za','\.ncape\.school\.za','\.net\.za','\.ngo\.za',
  350     '\.nis\.za','\.nom\.za','\.nw\.school\.za','\.olivetti\.za','\.org\.za','\.pix\.za','\.school\.za',
  351     '\.tm\.za','\.wcape\.school\.za','\.web\.za','\.ac\.zm','\.co\.zm','\.com\.zm','\.edu\.zm','\.gov\.zm',
  352     '\.net\.zm','\.org\.zm','\.sch\.zm'
  353 );
  354 
  355 my @TLD2 = (
  356     '\.ac','\.ad','\.ae','\.af','\.ag','\.ai','\.al','\.am','\.ao','\.aq',
  357     '\.ar','\.as','\.at','\.au','\.aw','\.ax','\.az','\.ba','\.bb','\.bd',
  358     '\.be','\.bf','\.bg','\.bh','\.bi','\.bj','\.bm','\.bn','\.bo','\.br',
  359     '\.bs','\.bt','\.bw','\.by','\.bz','\.ca','\.cc','\.cd','\.cf','\.cg',
  360     '\.ch','\.ci','\.ck','\.cl','\.cm','\.cn','\.co','\.cr','\.cu','\.cv',
  361     '\.cw','\.cx','\.cy','\.cz','\.de','\.dj','\.dk','\.dm','\.do','\.dz',
  362     '\.ec','\.ee','\.eg','\.er','\.es','\.et','\.eu','\.fi','\.fj','\.fk',
  363     '\.fm','\.fo','\.fr','\.ga','\.gd','\.ge','\.gf','\.gg','\.gh','\.gi',
  364     '\.gl','\.gm','\.gn','\.gp','\.gq','\.gr','\.gs','\.gt','\.gu','\.gw',
  365     '\.gy','\.hk','\.hm','\.hn','\.hr','\.ht','\.hu','\.id','\.ie','\.il',
  366     '\.im','\.in','\.io','\.iq','\.ir','\.is','\.it','\.je','\.jm','\.jo',
  367     '\.jp','\.ke','\.kg','\.kh','\.ki','\.km','\.kn','\.kp','\.kr','\.kw',
  368     '\.ky','\.kz','\.la','\.lb','\.lc','\.li','\.lk','\.lr','\.ls','\.lt',
  369     '\.lu','\.lv','\.ly','\.ma','\.mc','\.md','\.me','\.mg','\.mh','\.mk',
  370     '\.ml','\.mm','\.mn','\.mo','\.mp','\.mq','\.mr','\.ms','\.mt','\.mu',
  371     '\.mv','\.mw','\.mx','\.my','\.mz','\.na','\.nc','\.ne','\.nf','\.ng',
  372     '\.ni','\.nl','\.no','\.np','\.nr','\.nu','\.nz','\.om','\.pa','\.pe',
  373     '\.pf','\.pg','\.ph','\.pk','\.pl','\.pm','\.pn','\.pr','\.ps','\.pt',
  374     '\.pw','\.py','\.qa','\.re','\.ro','\.rs','\.ru','\.rw','\.sa','\.sb',
  375     '\.sc','\.sd','\.se','\.sg','\.sh','\.si','\.sk','\.sl','\.sm','\.sn',
  376     '\.so','\.sr','\.ss','\.st','\.su','\.sv','\.sx','\.sy','\.sz','\.tc',
  377     '\.td','\.tf','\.tg','\.th','\.tj','\.tk','\.tl','\.tm','\.tn','\.to',
  378     '\.tr','\.tt','\.tv','\.tw','\.tz','\.ua','\.ug','\.uk','\.us','\.uy',
  379     '\.uz','\.va','\.vc','\.ve','\.vg','\.vi','\.vn','\.vu','\.wf','\.ws',
  380     '\.ye','\.za','\.zm','\.zw','\.com','\.info','\.net','\.org','\.biz',
  381     '\.name','\.pro','\.xxx','\.aero','\.asia','\.bzh','\.cat','\.coop',
  382     '\.edu','\.gov','\.int','\.jobs','\.mil','\.mobi','\.museum','\.paris',
  383     '\.sport','\.tel','\.travel','\.kids','\.mail','\.post','\.arpa','\.example',
  384     '\.invalid','\.localhost','\.test','\.bitnet','\.csnet','\.lan','\.local',
  385     '\.onion','\.root','\.uucp','\.tld','\.nato'
  386 );
  387 
  388 my %month_number = (
  389     'Jan' => '01',
  390     'Feb' => '02',
  391     'Mar' => '03',
  392     'Apr' => '04',
  393     'May' => '05',
  394     'Jun' => '06',
  395     'Jul' => '07',
  396     'Aug' => '08',
  397     'Sep' => '09',
  398     'Oct' => '10',
  399     'Nov' => '11',
  400     'Dec' => '12',
  401 );
  402 
  403 # Regex to match ipv4 and ipv6 address
  404 my $ip_regexp = qr/^([a-fA-F0-9\.\:]+)$/;
  405 my $cidr_regex = qr/^[a-fA-F0-9\.\:]+\/\d+$/;
  406 
  407 # Native log format squid %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt
  408 my $native_format_regex1 = qr/^(\d+\.\d{3})\s+(\d+)\s+([^\s]+)\s+([^\s]+)\s+(\d+)\s+([^\s]+)\s+(.*)/;
  409 my $native_format_regex2 = qr/^([^\s]+?)\s+([^\s]+)\s+([^\s]+\/[^\s]+)\s+([^\s]+)\s*/;
  410 #logformat common     %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh
  411 #logformat combined   %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
  412 my $common_format_regex1 = qr/([^\s]+)\s([^\s]+)\s([^\s]+)\s\[(\d+\/...\/\d+:\d+:\d+:\d+\s[\d\+\-]+)\]\s"([^\s]+)\s([^\s]+)\s([^\s]+)"\s(\d+)\s+(\d+)(.*)\s([^\s:]+:[^\s]+)\s*([^\/]+\/[^\s]+|-)?$/;
  413 # Log format for SquidGuard logs
  414 my $sg_format_regex1 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* Request\(([^\/]+\/[^\/]+)\/[^\)]*\) ([^\s]+) ([^\s\\]+)\/[^\s]+ ([^\s]+) ([^\s]+) ([^\s]+)/;
  415 my $sg_format_regex2 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* (New setting|Added User|init domainlist|Going into emergency mode|ending emergency mode)/;
  416 # Log format for ufdbGuard logs: BLOCK user clienthost aclname category url method
  417 my $ug_format_regex1 = qr/^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) .* (BLOCK) ([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)$/;
  418 
  419 sub new
  420 {
  421     my ($class, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone) = @_;
  422 
  423     # Construct the class
  424     my $self = {};
  425     bless $self, $class;
  426 
  427     # Initialize all variables
  428     $self->_init($conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone);
  429 
  430     # Return the instance
  431     return($self);
  432 
  433 }
  434 
  435 sub localdie
  436 {
  437     my ($self, $msg) = @_;
  438 
  439     print STDERR "$msg";
  440     unlink("$self->{pidfile}");
  441 
  442     # Cleanup old temporary files
  443     foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'ug_last_parsed.tmp') {
  444         unlink("$self->{pid_dir}/$tmp_file");
  445     }
  446 
  447     exit 1;
  448 }
  449 
  450 ####
  451 # method used to fork as many child as wanted
  452 ##
  453 sub spawn
  454 {
  455     my $self = shift;
  456     my $coderef = shift;
  457 
  458     unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') {
  459         print "usage: spawn CODEREF";
  460         exit 0;
  461     }
  462 
  463     my $pid;
  464     if (!defined($pid = fork)) {
  465         print STDERR "Error: cannot fork: $!\n";
  466         return;
  467     } elsif ($pid) {
  468         $self->{running_pids}{$pid} = 1;
  469         return; # the parent
  470     }
  471     # the child -- go spawn
  472     $< = $>;
  473     $( = $); # suid progs only
  474 
  475     exit &$coderef();
  476 }
  477 
  478 sub wait_all_childs
  479 {
  480     my $self = shift;
  481 
  482     while (scalar keys %{$self->{running_pids}} > 0) {
  483         my $kid = waitpid(-1, WNOHANG);
  484         if ($kid > 0) {
  485             delete $self->{running_pids}{$kid};
  486         }
  487         sleep(1);
  488     }
  489 }
  490 
  491 sub manage_queue_size
  492 {
  493     my ($self, $child_count) = @_;
  494 
  495     while ($child_count >= $self->{queue_size}) {
  496         my $kid = waitpid(-1, WNOHANG);
  497         if ($kid > 0) {
  498             $child_count--;
  499             delete $self->{running_pids}{$kid};
  500         }
  501         sleep(1);
  502     }
  503 
  504     return $child_count;
  505 }
  506 
  507 sub save_current_line
  508 {
  509     my $self = shift;
  510 
  511     if ($self->{end_time}) {
  512         my $current = new IO::File;
  513         $current->open(">$self->{Output}/SquidAnalyzer.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/SquidAnalyzer.current, $!\n");
  514         print $current "$self->{end_time}\t$self->{end_offset}";
  515         $current->close;
  516     }
  517     if ($self->{sg_end_time}) {
  518         my $current = new IO::File;
  519         $current->open(">$self->{Output}/SquidGuard.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/SquidGuard.current, $!\n");
  520         print $current "$self->{sg_end_time}\t$self->{sg_end_offset}";
  521         $current->close;
  522     }
  523     if ($self->{ug_end_time}) {
  524         my $current = new IO::File;
  525         $current->open(">$self->{Output}/ufdbGuard.current") or $self->localdie("FATAL: Can't write to file $self->{Output}/ufdbGuard.current, $!\n");
  526         print $current "$self->{ug_end_time}\t$self->{ug_end_offset}";
  527         $current->close;
  528     }
  529 }
  530 
  531 # Extract number of seconds since epoch from timestamp in log line
  532 sub look_for_timestamp
  533 {
  534     my ($self, $line) = @_;
  535 
  536     my $time = 0;
  537     my $tz = ((0-$self->{TimeZone})*3600);
  538     # Squid native format
  539     if ( $line =~ $native_format_regex1 ) {
  540         $time = $1;
  541         $self->{is_squidguard_log} = 0;
  542         $self->{is_ufdbguard_log} = 0;
  543     # Squid common HTTP format
  544     } elsif ( $line =~ $common_format_regex1 ) {
  545         $time = $4;
  546         $time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/;
  547         if (!$self->{TimeZone}) {
  548             $time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900);
  549         } else {
  550             $time = timegm_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $tz;
  551         }
  552         $self->{is_squidguard_log} = 0;
  553         $self->{is_ufdbguard_log} = 0;
  554     # SquidGuard log format
  555     } elsif (( $line =~ $sg_format_regex1 ) || ( $line =~ $sg_format_regex2 )) {
  556         $self->{is_squidguard_log} = 1;
  557         $self->{is_ufdbguard_log} = 0;
  558         if (!$self->{TimeZone}) {
  559             $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
  560         } else {
  561             $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
  562         }
  563     # ufdbGuard log format
  564     } elsif ( $line =~ $ug_format_regex1 ) {
  565         $self->{is_ufdbguard_log} = 1;
  566         $self->{is_squidguard_log} = 0;
  567         if (!$self->{TimeZone}) {
  568             $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
  569         } else {
  570             $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
  571         }
  572     }
  573 
  574     return $time;
  575 }
  576 
  577 # Detect if log file is a squidGuard log or not
  578 sub get_log_format
  579 {
  580     my ($self, $file) = @_;
  581 
  582     my $logfile = new IO::File;
  583     $logfile->open($file) || $self->localdie("ERROR: Unable to open log file $file. $!\n");
  584     my $max_line = 10000;
  585     my $i = 0;
  586     while (my $line = <$logfile>) {
  587         chomp($line);
  588 
  589         # SquidGuard log format
  590         if (( $line =~ $sg_format_regex1 ) || ( $line =~ $sg_format_regex2 )) {
  591             $self->{is_squidguard_log} = 1;
  592             $self->{is_ufdbguard_log} = 0;
  593             last;
  594         # ufdbGuard log format
  595         } elsif ( $line =~ $ug_format_regex1 ) {
  596             $self->{is_ufdbguard_log} = 1;
  597             $self->{is_squidguard_log} = 0;
  598             last;
  599         # Squid native format
  600         } elsif ( $line =~ $native_format_regex1 ) {
  601             $self->{is_squidguard_log} = 0;
  602             $self->{is_ufdbguard_log} = 0;
  603             last;
  604         # Squid common HTTP format
  605         } elsif ( $line =~ $common_format_regex1 ) {
  606             $self->{is_squidguard_log} = 0;
  607             $self->{is_ufdbguard_log} = 0;
  608             last;
  609         } else {
  610             last if ($i > $max_line);
  611         }
  612         $i++;
  613     }
  614     $logfile->close();
  615 }
  616 
  617 
  618 sub parseFile
  619 {
  620     my ($self) = @_;
  621 
  622     my $line_count = 0;
  623     my $line_processed_count = 0;
  624     my $line_stored_count = 0;
  625     my $saved_queue_size = $self->{queue_size};
  626     my $history_offset = $self->{end_offset};
  627 
  628     foreach my $lfile (@{$self->{LogFile}}) {
  629 
  630         # Detect if log file is from squid or squidguard
  631         $self->get_log_format($lfile);
  632         if ($self->{is_ufdbguard_log}) {
  633             $history_offset = $self->{ug_end_offset};
  634         } elsif ($self->{is_squidguard_log}) {
  635             $history_offset = $self->{sg_end_offset};
  636         } else {
  637             $history_offset = $self->{end_offset};
  638         }
  639 
  640         print STDERR "Starting to parse logfile $lfile.\n" if (!$self->{QuietMode});
  641         if ((!-f $lfile) || (-z $lfile)) {
  642             print STDERR "DEBUG: bad or empty log file $lfile.\n" if (!$self->{QuietMode});
  643             next;
  644         }
  645         # Restore the right multiprocess queue
  646         $self->{queue_size} = $saved_queue_size;
  647 
  648         # Compressed file do not allow multiprocess
  649         if ($lfile =~ /\.(gz|bz2)$/) {
  650             $self->{queue_size} = 1;
  651         }
  652 
  653         # Search the last position in logfile
  654         if ($history_offset) {
  655 
  656             # Initialize start offset for each file
  657             if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
  658                 $self->{end_offset} = $history_offset;
  659             } elsif (!$self->{is_squidguard_log}) {
  660                 $self->{ug_end_offset} = $history_offset;
  661             } else {
  662                 $self->{sg_end_offset} = $history_offset;
  663             }
  664 
  665             # Compressed file are always read from the begining
  666             if ($lfile =~ /\.(gz|bz2)$/i) {
  667                 if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
  668                     $self->{end_offset} = 0;
  669                 } elsif (!$self->{is_squidguard_log}) {
  670                     $self->{ug_end_offset} = 0;
  671                 } else {
  672                     $self->{sg_end_offset} = 0;
  673                 }
  674             } else {
  675                 # Look at first line to see if the file should be parse from the begining.
  676                 my $logfile = new IO::File;
  677                 $logfile->open($lfile) || $self->localdie("ERROR: Unable to open log file $lfile. $!\n");
  678                 my $line = <$logfile>;
  679                 chomp($line);
  680 
  681                 # Remove syslog header and mark the format
  682                 if ($line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /) {
  683                     print STDERR "DEBUG: log was generated through syslog, the header will be removed.\n" if (!$self->{QuietMode});
  684                     $self->{Syslog} = 1;
  685                 }
  686 
  687                 my $curtime = $self->look_for_timestamp($line);
  688 
  689                 my $hist_time = $self->{history_time};
  690                 if ($self->{is_squidguard_log}) {
  691                     $hist_time = $self->{sg_history_time};
  692                 } elsif ($self->{is_ufdbguard_log}) {
  693                     $hist_time = $self->{ug_history_time};
  694                 }
  695                 # if the first timestamp is higher that the history time, start from the beginning
  696                 if ($curtime > $hist_time) {
  697                     print STDERR "DEBUG: new file: $lfile, start from the beginning.\n" if (!$self->{QuietMode});
  698                     if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
  699                         $self->{end_offset} = 0;
  700                     } elsif (!$self->{is_squidguard_log}) {
  701                         $self->{ug_end_offset} = 0;
  702                     } else {
  703                         $self->{sg_end_offset} = 0;
  704                     }
  705                 # If the size of the file is lower than the history offset, parse this file from the beginning
  706                 } elsif ((lstat($lfile))[7] <= $history_offset) {
  707                     # move at begining of the file to see if this is a new one
  708                     $logfile->seek(0, 0);
  709                     for (my $i = 1; $i <= 10000; $i++) {
  710                         $line = <$logfile>;
  711                         chomp($line);
  712                         # Remove syslog header and mark the format
  713                         if ($self->{Syslog}) {
  714                             $line =~ s/^[A-Z][a-z]{2} \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ \[[^\]]+\] (\d{10}\.\d{3})/$1/;
  715                         }
  716                         $curtime = $self->look_for_timestamp($line);
  717                         if ($curtime) {
  718                             # If timestamp found at startup is lower than the history file,
  719                             # the file will not be parsed at all.
  720                             if ($hist_time > $curtime) {
  721                                 print STDERR "DEBUG: this file will not be parsed: $lfile, size is lower than expected.\n" if (!$self->{QuietMode});
  722                                 print STDERR "DEBUG: exploring $lfile, timestamp found at startup, $curtime, is lower than history time $hist_time.\n" if (!$self->{QuietMode});
  723                                 $line = 'NOK';
  724                                 last;
  725                             }
  726                         }
  727                     }
  728                     $logfile->close;
  729                     # This file should be ommitted jump to the next file
  730                     next if ($line eq 'NOK');
  731                     
  732                     print STDERR "DEBUG: new file: $lfile, start from the beginning.\n" if (!$self->{QuietMode});
  733                     if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
  734                         $self->{end_offset} = 0;
  735                     } elsif (!$self->{is_squidguard_log}) {
  736                         $self->{ug_end_offset} = 0;
  737                     } else {
  738                         $self->{sg_end_offset} = 0;
  739                     }
  740                 } else {
  741                     # move at offset and see if next line is older than history time
  742                     $logfile->seek($history_offset, 0);
  743                     for (my $i = 1; $i <= 10; $i++) {
  744                         $line = <$logfile>;
  745                         chomp($line);
  746                         # Remove syslog header and mark the format
  747                         if ($self->{Syslog}) {
  748                             $line =~ s/^[A-Z][a-z]{2} \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ \[[^\]]+\] (\d{10}\.\d{3})/$1/;
  749                         }
  750                         $curtime = $self->look_for_timestamp($line);
  751                         if ($curtime) {
  752                             if ($curtime < $hist_time) {
  753                                 my $tmp_time = CORE::localtime($curtime);
  754                                 print STDERR "DEBUG: this file will not be parsed: $lfile, line after offset is older than expected: $curtime < $hist_time.\n" if (!$self->{QuietMode});
  755                                 $line = 'NOK';
  756                                 last;
  757                             }
  758                         }
  759                     }
  760                     $logfile->close;
  761                     # This file should be ommitted jump to the next file
  762                     next if ($line eq 'NOK');
  763                 }
  764                 $logfile->close;
  765             }
  766 
  767         } else {
  768             print STDERR "DEBUG: this file will be parsed, no history found.\n" if (!$self->{QuietMode});
  769             # Initialise start offset for each file
  770             if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
  771                 $self->{end_offset} = 0;
  772             } elsif (!$self->{is_squidguard_log}) {
  773                 $self->{ug_end_offset} = 0;
  774             } else {
  775                 $self->{sg_end_offset} = 0;
  776             }
  777         }
  778 
  779         if ($self->{queue_size} <= 1) {
  780             if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
  781                 $self->_parse_file_part($lfile, $self->{end_offset});
  782             } elsif (!$self->{is_squidguard_log}) {
  783                 $self->_parse_file_part($lfile, $self->{ug_end_offset});
  784             } else {
  785                 $self->_parse_file_part($lfile, $self->{sg_end_offset});
  786             }
  787         } else {
  788             # Create multiple processes to parse one log file by chunks of data
  789             my @chunks = $self->split_logfile($lfile);
  790             my $child_count = 0;
  791             for (my $i = 0; $i < $#chunks; $i++) {
  792                 if ($self->{interrupt}) {
  793                     print STDERR "FATAL: Abort signal received when processing to next chunk\n";
  794                     return;
  795                 }
  796                 $self->spawn(sub {
  797                     $self->_parse_file_part($lfile, $chunks[$i], $chunks[$i+1], $i);
  798                 });
  799                 $child_count = $self->manage_queue_size(++$child_count);
  800             }
  801         }
  802     }
  803 
  804     # Wait for last child stop
  805     $self->wait_all_childs() if ($self->{queue_size} > 1);
  806 
  807     # Get the last information parsed in this file part
  808     foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'sg_last_parsed.tmp') {
  809 
  810         if (-e "$self->{pid_dir}/$tmp_file") {
  811 
  812             if (open(IN, "$self->{pid_dir}/$tmp_file")) {
  813                 my %history_tmp = ();
  814                 while (my $l = <IN>) {
  815                     chomp($l);
  816                     my @data = split(/\s/, $l);
  817                     $history_tmp{"$data[0]$data[1]$data[2]"}{$data[4]} = join(' ', @data);
  818                     $line_stored_count += $data[5];
  819                     $line_processed_count += $data[6];
  820                     $line_count += $data[7];
  821                     if (!$self->{first_year} || ("$data[8]$data[9]" lt "$self->{first_year}$self->{first_month}{$data[8]}}") ) {
  822                         $self->{first_year} = $data[8];
  823                         $self->{first_month}{$data[8]} = $data[9];
  824                     }
  825                     my @tmp = split(/,/, $data[10]);
  826                     foreach my $w (@tmp) {
  827                         if (!grep(/^$w$/, @{$self->{week_parsed}})) {
  828                             push(@{$self->{week_parsed}}, $w);
  829                         }
  830                     }
  831                 }
  832                 close(IN);
  833                 foreach my $date (sort {$b <=> $a} keys %history_tmp) {
  834                     foreach my $offset (sort {$b <=> $a} keys %{$history_tmp{$date}}) {
  835                         my @data = split(/\s/, $history_tmp{$date}{$offset});
  836                         $self->{last_year} = $data[0];
  837                         $self->{last_month}{$data[0]} = $data[1];
  838                         $self->{last_day}{$data[0]} = $data[2];
  839                         if ($tmp_file eq 'last_parsed.tmp') {
  840                             $self->{end_time} = $data[3];
  841                             $self->{end_offset} = $data[4];
  842                         } elsif ($tmp_file eq 'ug_last_parsed.tmp') {
  843                             $self->{ug_end_time} = $data[3];
  844                             $self->{ug_end_offset} = $data[4];
  845                         } elsif ($tmp_file eq 'sg_last_parsed.tmp') {
  846                             $self->{sg_end_time} = $data[3];
  847                             $self->{sg_end_offset} = $data[4];
  848                         }
  849                         last;
  850                     }
  851                     last;
  852                 }
  853             } else {
  854                 print STDERR "ERROR: can't read last parsed line from $self->{pid_dir}/$tmp_file, $!\n";
  855             }
  856         }
  857     }
  858 
  859     if (!$self->{last_year}) {
  860 
  861         print STDERR "No new log registered...\n" if (!$self->{QuietMode});
  862 
  863     } else {
  864 
  865         if (!$self->{QuietMode}) {
  866             print STDERR "SQUID LOG END TIME  : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{end_time})), "\n" if ($self->{end_time});
  867             print STDERR "SQUIGUARD LOG END TIME  : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{sg_end_time})), "\n" if ($self->{sg_end_time});
  868             print STDERR "UFDBGUARD LOG END TIME  : ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{ug_end_time})), "\n" if ($self->{ug_end_time});
  869             print STDERR "Read $line_count lines, matched $line_processed_count and found $line_stored_count new lines\n";
  870         }
  871 
  872         # Set the current start time into history file
  873         $self->save_current_line();
  874 
  875         # Force reordering and unique sorting of data files
  876         my $child_count = 0;
  877         if (!$self->{rebuild}) {
  878             if (!$self->{QuietMode}) {
  879                 print STDERR "Reordering daily data files now...\n";
  880             }
  881             for my $date ("$self->{first_year}$self->{first_month}{$self->{first_year}}" .. "$self->{last_year}$self->{last_month}{$self->{last_year}}") {
  882                 $date =~ /^(\d{4})(\d{2})$/;
  883                 my $y = $1;
  884                 my $m = $2;
  885                 next if (($m < 1) || ($m > 12));
  886                 if ($self->{interrupt}) {
  887                     print STDERR "FATAL: Abort signal received\n";
  888                     return;
  889                 }
  890                 if (-d "$self->{Output}/$y/$m") {
  891                     foreach my $d ("01" .. "31") {
  892                         if (-d "$self->{Output}/$y/$m/$d") {
  893                             if ($self->{queue_size} > 1) {
  894                                 $self->spawn(sub {
  895                                     $self->_save_stat($y, $m, $d);
  896                                 });
  897                                 $child_count = $self->manage_queue_size(++$child_count);
  898                             } else {
  899                                 $self->_save_stat($y, $m, $d);
  900                             }
  901                             $self->_clear_stats();
  902                         }
  903                     }
  904                 }
  905             }
  906             # Wait for last child stop
  907             $self->wait_all_childs() if ($self->{queue_size} > 1);
  908             $child_count = 0;
  909         }
  910 
  911         # Compute week statistics
  912         if (!$self->{no_week_stat}) {
  913             if (!$self->{QuietMode}) {
  914                 print STDERR "Generating weekly data files now...\n";
  915             }
  916 
  917             foreach my $week (@{$self->{week_parsed}}) {
  918                 my ($y, $m, $wn) = split(/\//, $week);
  919                 my @wd = &get_wdays_per_month($wn, "$y-$m");
  920                 $wn++;
  921 
  922                 print STDERR "Compute and dump weekly statistics for week $wn on $y\n" if (!$self->{QuietMode});
  923                 if ($self->{queue_size} > 1) {
  924                     $self->spawn(sub {
  925                         $self->_save_data($y, $m, undef, sprintf("%02d", $wn), @wd);
  926                     });
  927                     $child_count = $self->manage_queue_size(++$child_count);
  928                 } else {
  929                     $self->_save_data($y, $m, undef, sprintf("%02d", $wn), @wd);
  930                 }
  931                 $self->_clear_stats();
  932             }
  933         }
  934         # Wait for last child stop
  935         $self->wait_all_childs() if ($self->{queue_size} > 1);
  936         $child_count = 0;
  937 
  938         # Compute month statistics
  939         if (!$self->{QuietMode}) {
  940             print STDERR "Generating monthly data files now...\n";
  941         }
  942 
  943         for my $date ("$self->{first_year}$self->{first_month}{$self->{first_year}}" .. "$self->{last_year}$self->{last_month}{$self->{last_year}}") {
  944             $date =~ /^(\d{4})(\d{2})$/;
  945             my $y = $1;
  946             my $m = $2;
  947             next if (($m < 1) || ($m > 12));
  948             if ($self->{interrupt}) {
  949                 print STDERR "FATAL: Abort signal received\n";
  950                 return;
  951             }
  952             if (-d "$self->{Output}/$y/$m") {
  953                 print STDERR "Compute and dump month statistics for $y/$m\n" if (!$self->{QuietMode});
  954                 if ($self->{queue_size} > 1) {
  955                     $self->spawn(sub {
  956                         $self->_save_data("$y", "$m");
  957                     });
  958                     $child_count = $self->manage_queue_size(++$child_count);
  959                 } else {
  960                     $self->_save_data("$y", "$m");
  961                 }
  962                 $self->_clear_stats();
  963             }
  964         }
  965 
  966         # Wait for last child stop
  967         $self->wait_all_childs() if ($self->{queue_size} > 1);
  968 
  969         # Compute year statistics
  970         $child_count = 0;
  971         if (!$self->{QuietMode}) {
  972             print STDERR "Generating yearly data files now...\n";
  973         }
  974         for my $year ($self->{first_year} .. $self->{last_year}) {
  975             if ($self->{interrupt}) {
  976                 print STDERR "FATAL: Abort signal received\n";
  977                 return;
  978             }
  979             if (-d "$self->{Output}/$year") {
  980                 print STDERR "Compute and dump year statistics for $year\n" if (!$self->{QuietMode});
  981                 if ($self->{queue_size} > 1) {
  982                     $self->spawn(sub {
  983                         $self->_save_data("$year");
  984                     });
  985                     $child_count = $self->manage_queue_size(++$child_count);
  986                 } else {
  987                     $self->_save_data("$year");
  988                 }
  989                 $self->_clear_stats();
  990             }
  991         }
  992 
  993         # Wait for last child stop
  994         $self->wait_all_childs() if ($self->{queue_size} > 1);
  995 
  996     }
  997 
  998 }
  999 
 1000 sub split_logfile
 1001 {
 1002     my ($self, $logf) = @_;
 1003 
 1004     my @chunks = (0);
 1005 
 1006     # get file size
 1007     my $totalsize = (stat("$logf"))[7] || 0;
 1008 
 1009     my $offsplit = $self->{end_offset};
 1010     if ($self->{is_squidguard_log}) {
 1011         $offsplit = $self->{sg_end_offset};
 1012     } elsif ($self->{is_ufdbguard_log}) {
 1013         $offsplit = $self->{ug_end_offset};
 1014     }
 1015 
 1016     # If the file is very small, many jobs actually make the parsing take longer
 1017     if ( ($totalsize <= 16777216) || ($totalsize <= $offsplit)) { #16MB
 1018         push(@chunks, $totalsize);
 1019         return @chunks;
 1020     }
 1021 
 1022     # Split and search the right position in file corresponding to the number of jobs
 1023     my $i = 1;
 1024     if ($offsplit && ($offsplit < $totalsize)) {
 1025         $chunks[0] = $offsplit;
 1026     }
 1027     my $lfile = undef;
 1028     open($lfile, $logf) || die "FATAL: cannot read log file $logf. $!\n";
 1029     while ($i < $self->{queue_size}) {
 1030         my $pos = int(($totalsize/$self->{queue_size}) * $i);
 1031         if ($pos > $chunks[0]) {
 1032             $lfile->seek($pos, 0);
 1033             #Move the offset to the BEGINNING of each line, because the logic in process_file requires so
 1034             $pos = $pos + length(<$lfile>) - 1;
 1035             push(@chunks, $pos) if ($pos < $totalsize);
 1036         }
 1037         last if ($pos >= $totalsize);
 1038         $i++;
 1039     }
 1040     $lfile->close();
 1041 
 1042     push(@chunks, $totalsize);
 1043 
 1044     return @chunks;
 1045 }
 1046 
 1047 sub check_exclusions
 1048 {
 1049     my ($self, $login, $client_ip, $url) = @_;
 1050 
 1051     return 0 if (!exists $self->{Exclude}{users} && !exists $self->{Exclude}{clients} && !exists $self->{Exclude}{networks} && !exists $self->{Exclude}{uris});
 1052 
 1053     # check for user exclusion
 1054     if (exists $self->{Exclude}{users} && $login) {
 1055         foreach my $e (@{$self->{Exclude}{users}}) {
 1056             # look for users using the following format: user@domain.tld, domain\user and user 
 1057             if ( ($login =~ m#^$e$#i) || ($login =~ m#^$e\@#i) || ($login =~ m#\\$e$#i) ) {
 1058                 return 1;
 1059             }
 1060         }
 1061     }
 1062 
 1063     # check for client exclusion
 1064     if (exists $self->{Exclude}{clients} && $client_ip) {
 1065         foreach my $e (@{$self->{Exclude}{clients}}) {
 1066             if ($client_ip =~ m#^$e$#i) {
 1067                 return 1;
 1068             }
 1069         }
 1070     }
 1071 
 1072     # check for Network exclusion
 1073     if (exists $self->{Exclude}{networks} && $client_ip) {
 1074         foreach my $e (@{$self->{Exclude}{networks}}) {
 1075             if (&check_ip($client_ip, $e)) {
 1076                 return 1;
 1077             }
 1078         }
 1079     }
 1080 
 1081     # check for URL exclusion
 1082     if (exists $self->{Exclude}{uris} && $url) {
 1083         foreach my $e (@{$self->{Exclude}{uris}}) {
 1084             if ($url =~ m#^$e$#i) {
 1085                 return 1;
 1086             }
 1087         }
 1088     }
 1089 
 1090     return 0;
 1091 }
 1092 
 1093 sub check_inclusions
 1094 {
 1095     my ($self, $login, $client_ip) = @_;
 1096 
 1097     return 1 if (!exists $self->{Include}{users} && !exists $self->{Include}{clients} && !exists $self->{Include}{networks});
 1098 
 1099     # check for user inclusion
 1100     if (exists $self->{Include}{users} && $login) {
 1101         foreach my $e (@{$self->{Include}{users}}) {
 1102             # look for users using the following format: user@domain.tld, domain\user and user 
 1103             if ( ($login =~ m#^$e$#i) || ($login =~ m#^$e\@#i) || ($login =~ m#\\$e$#i) ) {
 1104                 return 1;
 1105             }
 1106         }
 1107     }
 1108 
 1109     # If login is a client ip, checked login against clients and networks filters
 1110     if (!$client_ip && ($login =~ /^\d+\.\d+\.\d+\.\d+$/)) {
 1111         $client_ip = $login;
 1112     }
 1113 
 1114     # check for client inclusion
 1115     if (exists $self->{Include}{clients} && $client_ip) {
 1116         foreach my $e (@{$self->{Include}{clients}}) {
 1117             if ($client_ip =~ m#^$e$#i) {
 1118                 return 1;
 1119             }
 1120         }
 1121     }
 1122 
 1123     # check for Network inclusion
 1124     if (exists $self->{Include}{networks} && $client_ip) {
 1125         foreach my $e (@{$self->{Include}{networks}}) {
 1126             if (&check_ip($client_ip, $e)) {
 1127                 return 1;
 1128             }
 1129         }
 1130     }
 1131 
 1132     return 0;
 1133 }
 1134 
 1135 sub _parse_file_part
 1136 {
 1137     my ($self, $file, $start_offset, $stop_offset) = @_;
 1138 
 1139     print STDERR "Reading file $file from offset $start_offset to ", ($stop_offset||'end'), ".\n" if (!$self->{QuietMode});
 1140 
 1141     # Open logfile
 1142     my $logfile = new IO::File;
 1143     if ($file =~ /\.gz/) {
 1144         # Open a pipe to zcat program for compressed log
 1145         $logfile->open("$ZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $ZCAT_PROG $file. $!\n");
 1146     } elsif ($file =~ /\.bz2/) {
 1147         # Open a pipe to bzcat program for compressed log
 1148         $logfile->open("$BZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $BZCAT_PROG $file. $!\n");
 1149     } elsif ($file =~ /\.xz/) {
 1150         # Open a pipe to xzcat program for compressed log
 1151         $logfile->open("$XZCAT_PROG $file |") || $self->localdie("ERROR: cannot read from pipe to $XZCAT_PROG $file. $!\n");
 1152     } else {
 1153         $logfile->open($file) || $self->localdie("ERROR: Unable to open Squid access.log file $file. $!\n");
 1154     }
 1155 
 1156     my $line = '';
 1157     my $time = 0;
 1158     my $elapsed = 0;
 1159     my $client_ip = '';
 1160     my $client_name = '';
 1161     my $code = '';
 1162     my $bytes = 0;
 1163     my $method = '';
 1164     my $url = '';
 1165     my $login = '';
 1166     my $status = '';
 1167     my $mime_type = '';
 1168     $self->{Syslog} = 0;
 1169 
 1170     my $acl = '';
 1171 
 1172     my $line_count = 0;
 1173     my $line_processed_count = 0;
 1174     my $line_stored_count = 0;
 1175 
 1176     # Move directly to the start position
 1177     if ($start_offset) {
 1178         $logfile->seek($start_offset, 0);
 1179         if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
 1180             $self->{end_offset} = $start_offset;
 1181         } elsif (!$self->{is_squidguard_log}) {
 1182             $self->{ug_end_offset} = $start_offset;
 1183         } else {
 1184             $self->{sg_end_offset} = $start_offset;
 1185         }
 1186     }
 1187 
 1188     # Set timezone in seconds
 1189     my $tz = ((0-$self->{TimeZone})*3600);
 1190 
 1191     # The log file format must be :
 1192     #   time elapsed client code/status bytes method URL rfc931 peerstatus/peerhost type
 1193     # This is the default format of squid access log file.
 1194 
 1195     # Read and parse each line of the access log file
 1196     while ($line = <$logfile>) {
 1197 
 1198         # quit this log if we reach the ending offset
 1199         if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
 1200             last if ($stop_offset && ($self->{end_offset}>= $stop_offset));
 1201             # Store the current position in logfile
 1202             $self->{end_offset} += length($line);
 1203         } elsif (!$self->{is_squidguard_log}) {
 1204             last if ($stop_offset && ($self->{ug_end_offset}>= $stop_offset));
 1205             # Store the current position in logfile
 1206             $self->{ug_end_offset} += length($line);
 1207         } else {
 1208             last if ($stop_offset && ($self->{sg_end_offset}>= $stop_offset));
 1209             # Store the current position in logfile
 1210             $self->{sg_end_offset} += length($line);
 1211         }
 1212 
 1213         chomp($line);
 1214         next if (!$line);
 1215 
 1216         # skip immediately lines that squid is not able to tag.
 1217         next if ($line =~ / TAG_NONE(_ABORTED)?\//);
 1218 
 1219         # Remove syslog header and mark the format
 1220         if ($self->{Syslog} == 1) {
 1221             $line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /;
 1222         # Remove syslog header and mark the format
 1223         } elsif (!$self->{Syslog} && ($line =~ s/^... \d+ \d{2}:\d{2}:\d{2} [^\s]+ [^\s]+ \d+ [^\s]+ (\d{10}\.\d{3}) /$1 /)) {
 1224             print STDERR "DEBUG: log was generated through syslog, the header will be removed.\n" if (!$self->{QuietMode});
 1225             $self->{Syslog} = 1;
 1226         } else {
 1227             $self->{Syslog} = 2;
 1228         }
 1229 
 1230         # Number of log lines parsed
 1231         $line_count++;
 1232 
 1233         # SquidAnalyzer supports the following squid log format:
 1234         #logformat squid      %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt
 1235         #logformat squidmime  %ts.%03tu %6tr %>a %Ss/%03>Hs %<st %rm %ru %un %Sh/%<A %mt [%>h] [%<h]
 1236         #logformat common     %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st %Ss:%Sh
 1237         #logformat combined   %>a %[ui %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %<st "%{Referer}>h" "%{User-Agent}>h" %Ss:%Sh
 1238         # Parse log with format: time elapsed client code/status bytes method URL rfc931 peerstatus/peerhost mime_type
 1239         my $format = 'native';
 1240         if ( !$self->{is_squidguard_log} && !$self->{is_ufdbguard_log} && ($line =~ $native_format_regex1) ) {
 1241             $time = $1;
 1242             $time += $tz;
 1243             $elapsed = abs($2);
 1244             $client_ip = $3;
 1245             $code = $4;
 1246             $bytes = $5;
 1247             $method = $6;
 1248             $line = $7;
 1249             if ($self->{TimeStart} || $self->{TimeStop}) {
 1250                 my $hour = strftime("%H:%M", CORE::localtime($time));
 1251                 next if ($self->{TimeStart} && $hour lt $self->{TimeStart});
 1252                 last if ($self->{TimeStop} && $hour gt $self->{TimeStop});
 1253             }
 1254         } elsif ( !$self->{is_squidguard_log} && !$self->{is_ufdbguard_log} && ($line =~ $common_format_regex1) ) {
 1255             $format = 'http';
 1256             $client_ip = $1;
 1257             $elapsed = abs($2);
 1258             $login = lc($3);
 1259             $time = $4;
 1260             $method = $5;
 1261             $url = lc($6);
 1262             $status = $8;
 1263             $bytes = $9;
 1264             $line = $10;
 1265             $code = $11;
 1266             $mime_type = $12;
 1267             $time =~ /(\d+)\/(...)\/(\d+):(\d+):(\d+):(\d+)\s/;
 1268             next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart});
 1269             last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop});
 1270             if (!$self->{TimeZone}) {
 1271                 $time = timelocal_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900);
 1272             } else {
 1273                 $time = timegm_nocheck($6, $5, $4, $1, $month_number{$2} - 1, $3 - 1900) + $tz;
 1274             }
 1275             # Some site has corrupted mime_type, try to remove nasty characters
 1276             if ($mime_type =~ s/[^\-\/\.\(\)\+\_,\=a-z0-9]+//igs) {
 1277                 $mime_type = 'invalid/type';
 1278             }
 1279                 } elsif ( !$self->{is_ufdbguard_log} && ($line =~ $sg_format_regex1) ) {
 1280                         $format = 'squidguard';
 1281             $self->{is_squidguard_log} = 1;
 1282             $acl = $7;
 1283                         $client_ip = $9;
 1284                         $elapsed = 0;
 1285                         $login = lc($10);
 1286                         $method = $11;
 1287                         $url = lc($8);
 1288                         $status = 301;
 1289                         $bytes = 0;
 1290                         $code = $12 . ':';
 1291                         $mime_type = '';
 1292             next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart});
 1293             last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop});
 1294             if (!$self->{TimeZone}) {
 1295                 $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
 1296             } else {
 1297                 $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
 1298             }
 1299         # Log format for ufdbGuard logs: timestamp [pid] BLOCK user clienthost aclname category url method
 1300         } elsif ($line =~ $ug_format_regex1) {
 1301                         $format = 'ufdbguard';
 1302             $self->{is_ufdbguard_log} = 1;
 1303             $acl = "$10/$11";
 1304                         $client_ip = $9;
 1305                         $elapsed = 0;
 1306                         $login = lc($8);
 1307                         $method = $13;
 1308                         $url = lc($12);
 1309                         $status = 301;
 1310                         $bytes = 0;
 1311                         $code = 'REDIRECT:';
 1312                         $mime_type = '';
 1313             next if ($self->{TimeStart} && "$4:$5" lt $self->{TimeStart});
 1314             last if ($self->{TimeStop} && "$4:$5" gt $self->{TimeStop});
 1315             if (!$self->{TimeZone}) {
 1316                 $time = timelocal_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900);
 1317             } else {
 1318                 $time = timegm_nocheck($6, $5, $4, $3, $2 - 1, $1 - 1900) + $tz;
 1319             }
 1320         } else {
 1321             next;
 1322         }
 1323         
 1324         if ($time) {
 1325             # Do not parse some unwanted method
 1326             my $qm_method = quotemeta($method) || '';
 1327             next if (($#{$self->{ExcludedMethods}} >= 0) && grep(/^$qm_method$/, @{$self->{ExcludedMethods}}));
 1328 
 1329             # Do not parse some unwanted code; e.g. TCP_DENIED/403
 1330             my $qm_code = quotemeta($code) || '';
 1331             next if (($#{$self->{ExcludedCodes}} >= 0) && grep(m#^$code$#, @{$self->{ExcludedCodes}}));
 1332 
 1333             # Go to last parsed date (incremental mode)
 1334             if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
 1335                 next if ($self->{history_time} && ($time <= $self->{history_time}));
 1336             } elsif (!$self->{is_squidguard_log}) {
 1337                 next if ($self->{ug_history_time} && ($time <= $self->{ug_history_time}));
 1338             } else {
 1339                 next if ($self->{sg_history_time} && ($time <= $self->{sg_history_time}));
 1340             }
 1341 
 1342             # Register the last parsing time and last offset position in logfile
 1343             if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
 1344                 $self->{end_time} = $time if (!$time || ($self->{end_time} < $time));
 1345                 # Register the first parsing time
 1346                 if (!$self->{begin_time} || ($self->{begin_time} > $time)) {
 1347                     $self->{begin_time} = $time;
 1348                     print STDERR "SQUID LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time)), "\n" if (!$self->{QuietMode});
 1349                 }
 1350             } elsif (!$self->{is_squidguard_log}) {
 1351                 $self->{ug_end_time} = $time if (!$time || ($self->{ug_end_time} < $time));
 1352                 # Register the first parsing time
 1353                 if (!$self->{ug_begin_time} || ($self->{ug_begin_time} > $time)) {
 1354                     $self->{ug_begin_time} = $time;
 1355                     print STDERR "UFDBGUARD LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time)), "\n" if (!$self->{QuietMode});
 1356                 }
 1357             } else {
 1358                 $self->{sg_end_time} = $time if (!$time || ($self->{sg_end_time} < $time));
 1359                 # Register the first parsing time
 1360                 if (!$self->{sg_begin_time} || ($self->{sg_begin_time} > $time)) {
 1361                     $self->{sg_begin_time} = $time;
 1362                     print STDERR "SQUIDGUARD LOG SET START TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($time)), "\n" if (!$self->{QuietMode});
 1363                 }
 1364             }
 1365 
 1366             # Only store (HIT|UNMODIFIED)/(MISS|MODIFIED|TUNNEL)/(DENIED|REDIRECT) status
 1367             # and peer CD_SIBLING_HIT/ aswell as peer SIBLING_HIT/...
 1368             if ( ($code =~ m#(HIT|UNMODIFIED)[:/]#) || ($self->{SiblingHit} && ($line =~ / (CD_)?SIBLING_HIT/)) ) {
 1369                 $code = 'HIT';
 1370             } elsif ($code =~ m#(MISS|MODIFIED|TUNNEL)[:/]#) {
 1371                 $code = 'MISS';
 1372             } elsif ($code =~ m#(DENIED|DENIED_REPLY|REDIRECT)[:/]#) {
 1373                 $code = 'DENIED';
 1374             } else {
 1375                 next;
 1376             }
 1377 
 1378             # With common and combined log format those fields have already been parsed
 1379             if (($format eq 'native') && ($line =~ $native_format_regex2) ) {
 1380                 $url = lc($1);
 1381                 $login = lc($2);
 1382                 $status = lc($3);
 1383                 $mime_type = lc($4);
 1384                 # Some site has corrupted mime_type, try to remove nasty characters
 1385                 if ($mime_type =~ s/[^\-\/\.\(\)\+\_,\=a-z0-9]+//igs) {
 1386                     $mime_type = 'invalid/type';
 1387                 }
 1388             }
 1389 
 1390             if ($url) {
 1391                 if (!$mime_type || ($mime_type eq '-')) {
 1392                     $mime_type = 'none';
 1393                 }
 1394 
 1395                 # Do not parse some unwanted method
 1396                 next if (($#{$self->{ExcludedMimes}} >= 0) && map {$mime_type =~ m#^$_$#} @{$self->{ExcludedMimes}});
 1397 
 1398                 # Remove extra space character in username
 1399                 $login =~ s/\%20//g;
 1400 
 1401                 my $id = $client_ip || '';
 1402                 if ($login ne '-') {
 1403                     $id = $login;
 1404                 }
 1405                 next if (!$id || (!$bytes && ($code ne 'DENIED')));
 1406 
 1407                 #####
 1408                 # If there's some mandatory inclusion, check the entry against the definitions
 1409                 # The entry is skipped directly if it is not in an inclusion list
 1410                 #####
 1411                 next if (!$self->check_inclusions($login, $client_ip));
 1412 
 1413                 #####
 1414                 # Check the entry against the exclusion definitions. The entry
 1415                 # is skipped directly when it match an exclusion definition.
 1416                 #####
 1417                 next if ($self->check_exclusions($login, $client_ip, $url));
 1418 
 1419                 # Set default user login to client ip address
 1420                 # Anonymize all users
 1421                 if ($self->{AnonymizeLogin} && ($client_ip ne $id)) {
 1422                     if (!exists $self->{AnonymizedId}{$id}) {
 1423                         $self->{AnonymizedId}{$id} = &anonymize_id();
 1424                     }
 1425                     $id = $self->{AnonymizedId}{$id};
 1426                 }
 1427 
 1428                 # Now parse data and generate statistics
 1429                 $self->_parseData($time, $elapsed, $client_ip, $code, $bytes, $url, $id, $mime_type, $acl, $method);
 1430                 $line_stored_count++;
 1431 
 1432             }
 1433             $line_processed_count++;
 1434         }
 1435     }
 1436     $logfile->close();
 1437 
 1438     if ($self->{cur_year}) {
 1439         # Save last parsed data
 1440         $self->_append_data($self->{cur_year}, $self->{cur_month}, $self->{cur_day});
 1441         # Stats can be cleared
 1442         $self->_clear_stats();
 1443 
 1444         # Stores last week to process
 1445         my $wn = &get_week_number($self->{cur_year}, $self->{cur_month}, $self->{cur_day});
 1446         if (!grep(/^$self->{cur_year}\/$self->{cur_month}\/$wn$/, @{$self->{week_parsed}})) {
 1447             push(@{$self->{week_parsed}}, "$self->{cur_year}/$self->{cur_month}/$wn");
 1448         }
 1449 
 1450         # Save the last information parsed in this file part
 1451         my $tmp_file = 'last_parsed.tmp';
 1452         if ($self->{is_squidguard_log}) {
 1453             $tmp_file = 'sg_last_parsed.tmp';
 1454         } elsif ($self->{is_ufdbguard_log}) {
 1455             $tmp_file = 'ug_last_parsed.tmp';
 1456         }
 1457         if (open(OUT, ">>$self->{pid_dir}/$tmp_file")) {
 1458             flock(OUT, 2) || die "FATAL: can't acquire lock on file $tmp_file, $!\n";
 1459             if (!$self->{is_squidguard_log} && !$self->{is_ufdbguard_log}) {
 1460                 print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{end_time} $self->{end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n";
 1461             } elsif (!$self->{is_squidguard_log}) {
 1462                 print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{ug_end_time} $self->{ug_end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n";
 1463             } else {
 1464                 print OUT "$self->{last_year} $self->{last_month}{$self->{last_year}} $self->{last_day}{$self->{last_year}} $self->{sg_end_time} $self->{sg_end_offset} $line_stored_count $line_processed_count $line_count $self->{first_year} $self->{first_month}{$self->{first_year}} ", join(',', @{$self->{week_parsed}}), "\n";
 1465             }
 1466             close(OUT);
 1467         } else {
 1468             print STDERR "ERROR: can't save last parsed line into $self->{pid_dir}/$tmp_file, $!\n";
 1469         }
 1470     }
 1471 
 1472 }
 1473 
 1474 sub _clear_stats
 1475 {
 1476     my $self = shift;
 1477 
 1478     # Hashes to store user statistics
 1479     $self->{stat_user_hour} = ();
 1480     $self->{stat_user_day} = ();
 1481     $self->{stat_user_month} = ();
 1482     $self->{stat_usermax_hour} = ();
 1483     $self->{stat_usermax_day} = ();
 1484     $self->{stat_usermax_month} = ();
 1485     $self->{stat_user_url_hour} = ();
 1486     $self->{stat_user_url_day} = ();
 1487     $self->{stat_user_url_month} = ();
 1488 
 1489     # Hashes to store network statistics
 1490     $self->{stat_network_hour} = ();
 1491     $self->{stat_network_day} = ();
 1492     $self->{stat_network_month} = ();
 1493     $self->{stat_netmax_hour} = ();
 1494     $self->{stat_netmax_day} = ();
 1495     $self->{stat_netmax_month} = ();
 1496 
 1497     # Hashes to store user / network statistics
 1498     $self->{stat_netuser_hour} = ();
 1499     $self->{stat_netuser_day} = ();
 1500     $self->{stat_netuser_month} = ();
 1501 
 1502     # Hashes to store cache status (hit/miss)
 1503     $self->{stat_code_hour} = ();
 1504     $self->{stat_code_day} = ();
 1505     $self->{stat_code_month} = ();
 1506 
 1507     # Hashes to store throughput statsœ
 1508     $self->{stat_throughput_hour} = ();
 1509     $self->{stat_throughput_day} = ();
 1510     $self->{stat_throughput_month} = ();
 1511 
 1512     # Hashes to store mime type
 1513     $self->{stat_mime_type_hour} = ();
 1514     $self->{stat_mime_type_day} = ();
 1515     $self->{stat_mime_type_month} = ();
 1516 
 1517 }
 1518 
 1519 sub _init
 1520 {
 1521     my ($self, $conf_file, $log_file, $debug, $rebuild, $pid_dir, $pidfile, $timezone) = @_;
 1522 
 1523     # Set path to pid file
 1524     $pidfile = $pid_dir . '/' . $pidfile;
 1525 
 1526     # Prevent for a call without instance
 1527     if (!ref($self)) {
 1528         print STDERR "ERROR - init : Unable to call init without an object instance.\n";
 1529         unlink("$pidfile");
 1530         exit(0);
 1531     }
 1532     $self->{pidfile} = $pidfile || '/tmp/squid-analyzer.pid';
 1533 
 1534     # Load configuration information
 1535     if (!$conf_file) {
 1536         if (-f '/etc/squidanalyzer/squidanalyzer.conf') {
 1537             $conf_file = '/etc/squidanalyzer/squidanalyzer.conf';
 1538         } elsif (-f '/etc/squidanalyzer.conf') {
 1539             $conf_file = '/etc/squidanalyzer.conf';
 1540         } elsif (-f 'squidanalyzer.conf') {
 1541             $conf_file = 'squidanalyzer.conf';
 1542         }
 1543     }
 1544     my %options = $self->parse_config($conf_file, $log_file, $rebuild);
 1545 
 1546     # Configuration options
 1547     $self->{MinPie} = $options{MinPie} || 2;
 1548     $self->{QuietMode} = $options{QuietMode} || 0;
 1549     $self->{UrlReport} = $options{UrlReport} || 0;
 1550     $self->{UrlHitsOnly} = $options{UrlHitsOnly} || 0;
 1551     $self->{MaxFormatError} = $options{MaxFormatError} || 0;
 1552     if (defined $options{UserReport}) {
 1553         $self->{UserReport} = $options{UserReport};
 1554     } else {
 1555         # Assure backward compatibility after update otherwize
 1556         # data files will lost users information if directive
 1557         # is not found in the configuration file
 1558         $self->{UserReport} = 1;
 1559     }
 1560     $self->{Output} = $options{Output} || '';
 1561     $self->{WebUrl} = $options{WebUrl} || '';
 1562     $self->{WebUrl} .= '/' if ($self->{WebUrl} && ($self->{WebUrl} !~ /\/$/));
 1563     $self->{DateFormat} = $options{DateFormat} || '%y-%m-%d';
 1564     $self->{Lang} = $options{Lang} || '';
 1565     $self->{AnonymizeLogin} = $options{AnonymizeLogin} || 0;
 1566     $self->{SiblingHit} = $options{SiblingHit} || 1;
 1567     $self->{ImgFormat} = $options{ImgFormat} || 'png';
 1568     $self->{Locale} = $options{Locale} || '';
 1569     $self->{TopUrlUser} = $options{TopUrlUser} || 0;
 1570     $self->{no_year_stat} = 0;
 1571     $self->{no_week_stat} = 0;
 1572     $self->{UseClientDNSName} = $options{UseClientDNSName} || 0;
 1573     $self->{DNSLookupTimeout} = $options{DNSLookupTimeout} || 0.0001;
 1574     $self->{DNSLookupTimeout} = int($self->{DNSLookupTimeout} * 1000000);
 1575     $self->{LogFile} = ();
 1576     $self->{queue_size} = 1;
 1577     $self->{running_pids} = ();
 1578     $self->{pid_dir} = $pid_dir || '/tmp';
 1579     $self->{child_count} = 0;
 1580     $self->{rebuild} = $rebuild || 0;
 1581     $self->{is_squidguard_log} = 0;
 1582     $self->{TimeZone} = $options{TimeZone} || $timezone || 0;
 1583     $self->{Syslog} = 0;
 1584     $self->{UseUrlPort} = 1;
 1585     $self->{TimeStart} = $options{TimeStart} || '';
 1586     $self->{TimeStop} = $options{TimeStop} || '';
 1587 
 1588     # Cleanup old temporary files
 1589     foreach my $tmp_file ('last_parsed.tmp', 'sg_last_parsed.tmp', 'ug_last_parsed.tmp') {
 1590         unlink("$self->{pid_dir}/$tmp_file");
 1591     }
 1592 
 1593     $self->{CustomHeader} = $options{CustomHeader} || qq{<a href="$self->{WebUrl}"><img src="$self->{WebUrl}images/logo-squidanalyzer.png" title="SquidAnalyzer $VERSION" border="0"></a> SquidAnalyzer};
 1594     $self->{ExcludedMethods} = ();
 1595     if ($options{ExcludedMethods}) {
 1596         push(@{$self->{ExcludedMethods}}, split(/\s*,\s*/, $options{ExcludedMethods}));
 1597     }
 1598     $self->{ExcludedCodes} = ();
 1599     if ($options{ExcludedCodes}) {
 1600         push(@{$self->{ExcludedCodes}}, split(/\s*,\s*/, $options{ExcludedCodes}));
 1601     }
 1602     $self->{ExcludedMimes} = ();
 1603     if ($options{ExcludedMimes}) {
 1604         push(@{$self->{ExcludedMimes}}, split(/\s*,\s*/, $options{ExcludedMimes}));
 1605     }
 1606 
 1607     if ($self->{Lang}) {
 1608         open(IN, "$self->{Lang}") or die "ERROR: can't open translation file $self->{Lang}, $!\n";
 1609         while (my $l = <IN>) {
 1610             chomp($l);
 1611             $l =~ s/\r//gs;
 1612             next if ($l =~ /^\s*#/);
 1613             next if (!$l);
 1614             my ($key, $str) = split(/\t+/, $l);
 1615             $Translate{$key} = $str;
 1616         }
 1617         close(IN);
 1618     }
 1619     if (!$self->{Output}) {
 1620         die "ERROR: 'Output' configuration option must be set.\n";
 1621     }
 1622     if (! -d $self->{Output}) {
 1623         die "ERROR: 'Output' directory $self->{Output} doesn't exists.\n";
 1624     }
 1625     if (!$self->{rebuild}) {
 1626         push(@{$self->{LogFile}}, @{$options{LogFile}});
 1627         if ($#{$self->{LogFile}} < 0) {
 1628             die "ERROR: 'LogFile' configuration directive must be set or a log file given at command line.\n";
 1629         }
 1630     }
 1631     $self->{OrderUser} = lc($options{OrderUser}) || 'bytes';
 1632     $self->{OrderNetwork} = lc($options{OrderNetwork}) || 'bytes';
 1633     $self->{OrderUrl} = lc($options{OrderUrl}) || 'bytes';
 1634     $self->{OrderMime} = lc($options{OrderMime}) || 'bytes';
 1635     if ($self->{OrderUser} !~ /^(hits|bytes|duration)$/) {
 1636         die "ERROR: OrderUser must be one of these values: hits, bytes or duration\n";
 1637     }
 1638     if ($self->{OrderNetwork} !~ /^(hits|bytes|duration)$/) {
 1639         die "ERROR: OrderNetwork must be one of these values: hits, bytes or duration\n";
 1640     }
 1641     if ($self->{OrderUrl} !~ /^(hits|bytes|duration)$/) {
 1642         die "ERROR: OrderUrl must be one of these values: hits, bytes or duration\n";
 1643     }
 1644     if ($self->{OrderMime} !~ /^(hits|bytes)$/) {
 1645         die "ERROR: OrderMime must be one of these values: hits or bytes\n";
 1646     }
 1647     %{$self->{NetworkAlias}} = $self->parse_network_aliases($options{NetworkAlias} || '');
 1648     %{$self->{UserAlias}}    = $self->parse_user_aliases($options{UserAlias} || '');
 1649     %{$self->{Exclude}}      = $self->parse_exclusion($options{Exclude} || '');
 1650     %{$self->{Include}}      = $self->parse_inclusion($options{Include} || '');
 1651     %{$self->{NetworkAliasCache}} = ();
 1652     %{$self->{UserAliasCache}} = ();
 1653     $self->{has_nework_alias} = scalar keys %{$self->{NetworkAlias}};
 1654     $self->{has_user_alias} = scalar keys %{$self->{UserAlias}};
 1655 
 1656     $self->{CostPrice} = $options{CostPrice} || 0;
 1657     $self->{Currency} = $options{Currency} || '&euro;';
 1658     $self->{TopNumber} = $options{TopNumber} || 10;
 1659     $self->{TopStorage} = $options{TopStorage} || 0;
 1660     $self->{TransfertUnit} = $options{TransfertUnit} || 'BYTES';
 1661     if (!grep(/^$self->{TransfertUnit}$/i, 'BYTES', 'KB', 'MB', 'GB')) {
 1662         die "ERROR: TransfertUnit must be one of these values: KB, MB or GB\n";
 1663     } else {
 1664         if (uc($self->{TransfertUnit}) eq 'BYTES') {
 1665             $self->{TransfertUnitValue} = 1;
 1666             $self->{TransfertUnit} = 'Bytes';
 1667         } elsif (uc($self->{TransfertUnit}) eq 'KB') {
 1668             $self->{TransfertUnitValue} = 1024;
 1669         } elsif (uc($self->{TransfertUnit}) eq 'MB') {
 1670             $self->{TransfertUnitValue} = 1024*1024;
 1671         } elsif (uc($self->{TransfertUnit}) eq 'GB') {
 1672             $self->{TransfertUnitValue} = 1024*1024*1024;
 1673         }
 1674     }
 1675 
 1676     # Init statistics storage hashes
 1677     $self->_clear_stats();
 1678 
 1679     # Used to store the first and last date parsed
 1680         $self->{last_year} = 0;
 1681         $self->{last_month} = ();
 1682         $self->{last_day} = ();
 1683         $self->{cur_year} = 0;
 1684         $self->{cur_month} = 0;
 1685         $self->{cur_day} = 0;
 1686         $self->{first_year} = 0;
 1687         $self->{first_month} = ();
 1688     $self->{begin_time} = 0;
 1689     $self->{end_time} = 0;
 1690     $self->{end_offset} = 0;
 1691     $self->{week_parsed} = ();
 1692     # Used to stored command line parameters from squid-analyzer
 1693     $self->{history_time} = 0;
 1694     $self->{preserve} = 0;
 1695     $self->{sg_end_time} = 0;
 1696     $self->{sg_end_offset} = 0;
 1697     $self->{ug_end_time} = 0;
 1698     $self->{ug_end_offset} = 0;
 1699 
 1700     # Override verbose mode
 1701     $self->{QuietMode} = 0 if ($debug);
 1702 
 1703     # Enable local date format if defined, else strftime will be used. The limitation
 1704     # this behavior is that all dates in HTML files will be the same for performences reasons.
 1705     if ($self->{Locale}) {
 1706         my $lang = 'LANG=' . $self->{Locale};
 1707         $self->{start_date} = `$lang date | iconv -t $Translate{CharSet} 2>/dev/null`;
 1708         chomp($self->{start_date});
 1709     }
 1710 
 1711     # Get the last parsing date for Squid log incremental parsing
 1712     if (!$rebuild && -e "$self->{Output}/SquidAnalyzer.current") {
 1713         my $current = new IO::File;
 1714         unless($current->open("$self->{Output}/SquidAnalyzer.current")) {
 1715             print STDERR "ERROR: Can't read file $self->{Output}/SquidAnalyzer.current, $!\n" if (!$self->{QuietMode});
 1716             print STDERR "Starting at the first line of Squid access log file.\n" if (!$self->{QuietMode});
 1717         } else {
 1718             my $tmp = <$current>;
 1719             chomp($tmp);
 1720             ($self->{history_time}, $self->{end_offset}) = split(/[\t]/, $tmp);
 1721             $self->{begin_time} = $self->{history_time};
 1722             $current->close();
 1723             if ($self->{history_time}) {
 1724                 print STDERR "SQUID LOG HISTORY TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{history_time})), " - HISTORY OFFSET: $self->{end_offset}\n" if (!$self->{QuietMode});
 1725             }
 1726         }
 1727     }
 1728 
 1729     # Get the last parsing date for SquidGuard log incremental parsing
 1730     if (!$rebuild && -e "$self->{Output}/SquidGuard.current") {
 1731         my $current = new IO::File;
 1732         unless($current->open("$self->{Output}/SquidGuard.current")) {
 1733             print STDERR "ERROR: Can't read file $self->{Output}/SquidGuard.current, $!\n" if (!$self->{QuietMode});
 1734             print STDERR "Starting at the first line of SquidGuard log file.\n" if (!$self->{QuietMode});
 1735         } else {
 1736             my $tmp = <$current>;
 1737             chomp($tmp);
 1738             ($self->{sg_history_time}, $self->{sg_end_offset}) = split(/[\t]/, $tmp);
 1739             $self->{sg_begin_time} = $self->{sg_history_time};
 1740             $current->close();
 1741             if ($self->{sg_history_time}) {
 1742                 print STDERR "SQUIDGUARD LOG HISTORY TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{sg_history_time})), " - HISTORY OFFSET: $self->{sg_end_offset}\n" if (!$self->{QuietMode});
 1743             }
 1744         }
 1745     }
 1746 
 1747     # Get the last parsing date for ufdbGuard log incremental parsing
 1748     if (!$rebuild && -e "$self->{Output}/ufdbGuard.current") {
 1749         my $current = new IO::File;
 1750         unless($current->open("$self->{Output}/ufdbGuard.current")) {
 1751             print STDERR "ERROR: Can't read file $self->{Output}/ufdbGuard.current, $!\n" if (!$self->{QuietMode});
 1752             print STDERR "Starting at the first line of ufdbGuard log file.\n" if (!$self->{QuietMode});
 1753         } else {
 1754             my $tmp = <$current>;
 1755             chomp($tmp);
 1756             ($self->{ug_history_time}, $self->{ug_end_offset}) = split(/[\t]/, $tmp);
 1757             $self->{ug_begin_time} = $self->{ug_history_time};
 1758             $current->close();
 1759             if ($self->{ug_history_time}) {
 1760                 print STDERR "UFDBGUARD LOG HISTORY TIME: ", strftime("%a %b %e %H:%M:%S %Y", CORE::localtime($self->{ug_history_time})), " - HISTORY OFFSET: $self->{ug_end_offset}\n" if (!$self->{QuietMode});
 1761             }
 1762         }
 1763     }
 1764 
 1765     $self->{menu} = qq{
 1766 <div id="menu">
 1767 <ul>
 1768 <li><a href="../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li>
 1769 };
 1770     if ($self->{UrlReport}) {
 1771         $self->{menu} .= qq{
 1772 <li><a href="domain.html"><span class="iconDomain">$Translate{'Top_domain_link'}</span></a></li>
 1773 <li><a href="url.html"><span class="iconUrl">$Translate{'Top_url_link'}</span></a></li>
 1774 <li><a href="denied.html"><span class="iconUrl">$Translate{'Top_denied_link'}</span></a></li>
 1775 };
 1776     }
 1777     if ($self->{UserReport}) {
 1778         $self->{menu} .= qq{
 1779 <li><a href="user.html"><span class="iconUser">$Translate{'User_link'}</span></a></li>
 1780 };
 1781     }
 1782     $self->{menu} .= qq{
 1783 <li><a href="network.html"><span class="iconNetwork">$Translate{'Network_link'}</span></a></li>
 1784 <li><a href="mime_type.html"><span class="iconMime">$Translate{'Mime_link'}</span></a></li>
 1785 </ul>
 1786 </div>
 1787 };
 1788 
 1789     $self->{menu2} = qq{
 1790 <div id="menu">
 1791 <ul>
 1792 <li><a href="../../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li>
 1793 };
 1794     if ($self->{UrlReport}) {
 1795         $self->{menu2} .= qq{
 1796 <li><a href="../../domain.html"><span class="iconDomain">$Translate{'Top_domain_link'}</span></a></li>
 1797 <li><a href="../../url.html"><span class="iconUrl">$Translate{'Top_url_link'}</span></a></li>A
 1798 <li><a href="../../denied.html"><span class="iconUrl">$Translate{'Top_denied_link'}</span></a></li>A
 1799 };
 1800     }
 1801     if ($self->{UserReport}) {
 1802         $self->{menu2} .= qq{
 1803 <li><a href="../../user.html"><span class="iconUser">$Translate{'User_link'}</span></a></li>
 1804 };
 1805     }
 1806     $self->{menu2} .= qq{
 1807 <li><a href="../../network.html"><span class="iconNetwork">$Translate{'Network_link'}</span></a></li>
 1808 <li><a href="../../mime_type.html"><span class="iconMime">$Translate{'Mime_link'}</span></a></li>
 1809 </ul>
 1810 </div>
 1811 };
 1812 
 1813     $self->{menu3} = qq{
 1814 <div id="menu">
 1815 <ul>
 1816 <li><a href="../index.html"><span class="iconArrow">$Translate{'Back_link'}</span></a></li>
 1817 </ul>
 1818 </div>
 1819 };
 1820 
 1821 }
 1822 
 1823 sub _gethostbyaddr
 1824 {
 1825     my ($self, $ip) = @_;
 1826 
 1827 
 1828     my $host = undef;
 1829     my $err = '';
 1830     unless(exists $CACHE{$ip}) {
 1831         eval {
 1832             local $SIG{ALRM} = sub { die "DNS lookup timeout.\n"; };
 1833             ualarm $self->{DNSLookupTimeout};
 1834             my @addrs = ();
 1835             if ($] < 5.014) {
 1836                 $host = gethostbyaddr(inet_aton($ip), AF_INET);
 1837             } else {
 1838                 # We also need to resolve IPV6 addresses
 1839                 if ($ip =~ /^\d+\.\d+\.\d+\.\d+$/) {
 1840                     ($err, @addrs) = Socket::getaddrinfo( $ip, 0, { 'protocol' => Socket::IPPROTO_TCP, 'family' => Socket::AF_INET } );
 1841                 } else {
 1842                     ($err, @addrs) = Socket::getaddrinfo( $ip, 0, { 'protocol' => Socket::IPPROTO_TCP, 'family' => Socket::AF_INET6 } );
 1843                 }
 1844             }
 1845             for my $addr (@addrs) {
 1846                 ($err, $host) = Socket::getnameinfo( $addr->{addr});
 1847                 last;
 1848             }
 1849             ualarm 0;
 1850         };
 1851         if ($@) {
 1852             delete $CACHE{$ip};
 1853             if (!$self->{QuietMode}) {
 1854                 warn "_gethostbyaddr timeout reach for ip: $ip, timeout can be adjusted with directive DNSLookupTimeout\n";
 1855             }
 1856         } elsif ($err) {
 1857             delete $CACHE{$ip};
 1858             if (!$self->{QuietMode}) {
 1859                 warn "_gethostbyaddr error resolving ip: $ip, $err\n";
 1860             }
 1861         }
 1862         else {
 1863             $CACHE{$ip} = $host;
 1864             #printf "_gethostbyaddr success : %s (%s)\n", $ip, $host;
 1865         }
 1866     }
 1867     return $CACHE{$ip} || $ip;
 1868 }
 1869 
 1870 sub apply_network_alias
 1871 {
 1872     my ($self, $ip) = @_;
 1873 
 1874     return $self->{NetworkAliasCache}{$ip} if (exists $self->{NetworkAliasCache}{$ip});
 1875 
 1876     my $found = 0;
 1877     foreach my $r (keys %{$self->{NetworkAlias}}) {
 1878         if ($r =~ $cidr_regex) {
 1879             if (&check_ip($ip, $r)) {
 1880                 $self->{NetworkAliasCache}{$ip} = $self->{NetworkAlias}->{$r};
 1881                 $ip = $self->{NetworkAlias}->{$r};
 1882                 $found = 1;
 1883                 last;
 1884             }
 1885         } elsif ($ip =~ /^$r/) {
 1886             $self->{NetworkAliasCache}{$ip} = $self->{NetworkAlias}->{$r};
 1887             $ip = $self->{NetworkAlias}->{$r};
 1888             $found = 1;
 1889             last;
 1890         }
 1891     }
 1892     $self->{NetworkAliasCache}{$ip} = $ip if (!$found);
 1893 
 1894     return $ip;
 1895 }
 1896 
 1897 sub apply_user_alias
 1898 {
 1899     my ($self, $id) = @_;
 1900 
 1901     return $self->{UserAliasCache}{$id} if (exists $self->{UserAliasCache}{$id});
 1902 
 1903     my $found = 0;
 1904     foreach my $u (keys %{$self->{UserAlias}}) {
 1905         if ( $id =~ /^$u$/i ) {
 1906             $self->{UserAliasCache}{$id} = $self->{UserAlias}->{$u};
 1907             $id = $self->{UserAlias}->{$u};
 1908             $found = 1;
 1909             last;
 1910         }
 1911     }
 1912     $self->{UserAliasCache}{$id} = $id if (!$found);
 1913 
 1914     return $id;
 1915 }
 1916 
 1917 sub _parseData
 1918 {
 1919     my ($self, $time, $elapsed, $client, $code, $bytes, $url, $id, $type, $acl, $method) = @_;
 1920 
 1921     # Save original IP address for dns resolving
 1922     my $client_ip_addr = $client;
 1923 
 1924     # Get the current year and month
 1925     my ($sec,$min,$hour,$day,$month,$year,$wday,$yday,$isdst) = CORE::localtime($time);
 1926     $year += 1900;
 1927     $month = sprintf("%02d", $month + 1);
 1928     $day = sprintf("%02d", $day);
 1929 
 1930     # Store data when hour change to save memory
 1931     if ($self->{cur_year} && ($self->{cur_hour} ne '') && ($hour != $self->{cur_hour}) ) {
 1932         # If the day has changed then we want to save stats of the previous one
 1933         $self->_append_data($self->{cur_year}, $self->{cur_month}, $self->{cur_day});
 1934         # Stats can be cleared
 1935         print STDERR "Clearing statistics storage hashes, for $self->{cur_year}-$self->{cur_month}-$self->{cur_day} ", sprintf("%02d", $self->{cur_hour}), ":00:00.\n" if (!$self->{QuietMode});
 1936         $self->_clear_stats();
 1937     }
 1938 
 1939     # Stores weeks to process
 1940     if (!$self->{no_week_stat}) {
 1941         if ("$year$month$day" ne "$self->{cur_year}$self->{cur_month}$self->{cur_day}") {
 1942             my $wn = &get_week_number($year, $month, $day);
 1943             if (!grep(/^$year\/$month\/$wn$/, @{$self->{week_parsed}})) {
 1944                 push(@{$self->{week_parsed}}, "$year/$month/$wn");
 1945             }
 1946         }
 1947     }
 1948 
 1949     # Extract the domainname part of the URL
 1950     $url =~ s/:\d+.*// if (!$self->{UseUrlPort});
 1951     $url =~ m/^[^\/]+\/\/([^\/]+)/;
 1952     my $dest = $1 || $url;
 1953 
 1954     # Replace username by his dnsname if there's no username
 1955     # (login is equal to ip) and if client is an ip address
 1956     if ( ($id eq $client) && $self->{UseClientDNSName}) {
 1957         if ($client =~ $ip_regexp) {
 1958             my $dnsname = $self->_gethostbyaddr($client);
 1959             if ($dnsname) {
 1960                 $id = $dnsname;
 1961             }
 1962         }
 1963     }
 1964 
 1965     # Replace network by his aliases if any
 1966     my $network = (!$self->{has_nework_alias}) ? $client : $self->apply_network_alias($client);
 1967 
 1968     # Set default to a class C network
 1969     if (!$network) {
 1970         $client =~ /^(.*)([:\.]+)\d+$/;
 1971         $network = "$1$2". "0";
 1972     }
 1973 
 1974     # Replace username by his alias if any
 1975     $id = (!$self->{has_user_alias}) ? $id : $self->apply_user_alias($id);
 1976 
 1977     # Stores last parsed date part
 1978     if (!$self->{last_year} || ("$year$month$day" gt "$self->{last_year}$self->{last_month}{$self->{last_year}}$self->{last_day}{$self->{last_year}}")) {
 1979         $self->{last_year} = $year;
 1980         $self->{last_month}{$self->{last_year}} = $month;
 1981         $self->{last_day}{$self->{last_year}} = $day;
 1982     }
 1983 
 1984     # Stores first parsed date part
 1985     if (!$self->{first_year} || ("$self->{first_year}$self->{first_month}{$self->{first_year}}" gt "$year$month")) {
 1986         $self->{first_year} = $year;
 1987         $self->{first_month}{$self->{first_year}} = $month;
 1988     }
 1989 
 1990     # Stores current processed values
 1991     $self->{cur_year} = $year;
 1992     $self->{cur_month} = $month;
 1993     $self->{cur_day} = $day;
 1994     $self->{cur_hour} = $hour;
 1995     $hour = sprintf("%02d", $hour);
 1996 
 1997     #### Store access denied statistics
 1998     if ($code eq 'DENIED') {
 1999         $self->{stat_code_hour}{$code}{$hour}{hits}++;
 2000         $self->{stat_code_hour}{$code}{$hour}{bytes} += $bytes;
 2001         $self->{stat_code_day}{$code}{$self->{last_day}}{hits}++;
 2002         $self->{stat_code_day}{$code}{$self->{last_day}}{bytes} += $bytes;
 2003 
 2004         $self->{stat_throughput_hour}{$code}{$hour}{bytes} += $bytes;
 2005         $self->{stat_throughput_day}{$code}{$self->{last_day}}{bytes} += $bytes;
 2006         $self->{stat_throughput_hour}{$code}{$hour}{elapsed} += $elapsed;
 2007         $self->{stat_throughput_day}{$code}{$self->{last_day}}{elapsed} += $elapsed;
 2008 
 2009         #### Store url statistics
 2010         if ($self->{UrlReport}) {
 2011             $self->{stat_denied_url_hour}{$id}{$dest}{hits}++;
 2012             $self->{stat_denied_url_hour}{$id}{$dest}{firsthit} = $time if (!$self->{stat_denied_url_hour}{$id}{$dest}{firsthit} || ($time < $self->{stat_denied_url_hour}{$id}{$dest}{firsthit}));
 2013             $self->{stat_denied_url_hour}{$id}{$dest}{lasthit} = $time if (!$self->{stat_denied_url_hour}{$id}{$dest}{lasthit} || ($time > $self->{stat_denied_url_hour}{$id}{$dest}{lasthit}));
 2014             $self->{stat_denied_url_hour}{$id}{$dest}{blacklist}{$acl}++ if ($acl);
 2015             $self->{stat_denied_url_day}{$id}{$dest}{hits}++;
 2016             $self->{stat_denied_url_day}{$id}{$dest}{firsthit} = $time if (!$self->{stat_denied_url_day}{$id}{$dest}{firsthit} || ($time < $self->{stat_denied_url_day}{$id}{$dest}{firsthit}));
 2017             $self->{stat_denied_url_day}{$id}{$dest}{lasthit} = $time if (!$self->{stat_denied_url_day}{$id}{$dest}{lasthit} || ($time > $self->{stat_denied_url_day}{$id}{$dest}{lasthit}));
 2018             $self->{stat_denied_url_day}{$id}{$dest}{blacklist}{$acl}++ if ($acl);
 2019             $self->{stat_user_hour}{$id}{$hour}{hits} += 0;
 2020             $self->{stat_user_hour}{$id}{$hour}{bytes} += 0;
 2021             $self->{stat_user_hour}{$id}{$hour}{duration} += 0;
 2022             $self->{stat_user_day}{$id}{$self->{last_day}}{hits} += 0;
 2023             $self->{stat_user_day}{$id}{$self->{last_day}}{bytes} += 0;
 2024             $self->{stat_user_day}{$id}{$self->{last_day}}{duration} += 0;
 2025         }
 2026         return;
 2027     }
 2028     
 2029     #### Store client statistics
 2030     if ($self->{UserReport}) {
 2031         $self->{stat_user_hour}{$id}{$hour}{hits}++;
 2032         $self->{stat_user_hour}{$id}{$hour}{bytes} += $bytes;
 2033         $self->{stat_user_hour}{$id}{$hour}{duration} += $elapsed;
 2034         $self->{stat_user_day}{$id}{$self->{last_day}}{hits}++;
 2035         $self->{stat_user_day}{$id}{$self->{last_day}}{bytes} += $bytes;
 2036         $self->{stat_user_day}{$id}{$self->{last_day}}{duration} += $elapsed;
 2037         if ($bytes > $self->{stat_usermax_hour}{$id}{largest_file_size}) {
 2038             $self->{stat_usermax_hour}{$id}{largest_file_size} = $bytes;
 2039             $self->{stat_usermax_hour}{$id}{largest_file_url} = $url;
 2040         }
 2041         if ($bytes > $self->{stat_usermax_day}{$id}{largest_file_size}) {
 2042             $self->{stat_usermax_day}{$id}{largest_file_size} = $bytes;
 2043             $self->{stat_usermax_day}{$id}{largest_file_url} = $url;
 2044         }
 2045     }
 2046 
 2047     #### Store networks statistics
 2048     $self->{stat_network_hour}{$network}{$hour}{hits}++;
 2049     $self->{stat_network_hour}{$network}{$hour}{bytes} += $bytes;
 2050     $self->{stat_network_hour}{$network}{$hour}{duration} += $elapsed;
 2051     $self->{stat_network_day}{$network}{$self->{last_day}}{hits}++;
 2052     $self->{stat_network_day}{$network}{$self->{last_day}}{bytes} += $bytes;
 2053     $self->{stat_network_day}{$network}{$self->{last_day}}{duration} += $elapsed;
 2054     if ($bytes > $self->{stat_netmax_hour}{$network}{largest_file_size}) {
 2055         $self->{stat_netmax_hour}{$network}{largest_file_size} = $bytes;
 2056         $self->{stat_netmax_hour}{$network}{largest_file_url} = $url;
 2057     }
 2058     if ($bytes > $self->{stat_netmax_day}{$network}{largest_file_size}) {
 2059         $self->{stat_netmax_day}{$network}{largest_file_size} = $bytes;
 2060         $self->{stat_netmax_day}{$network}{largest_file_url} = $url;
 2061     }
 2062 
 2063     #### Store HIT/MISS/DENIED statistics
 2064     $self->{stat_code_hour}{$code}{$hour}{hits}++;
 2065     $self->{stat_code_hour}{$code}{$hour}{bytes} += $bytes;
 2066     $self->{stat_code_hour}{$code}{$hour}{elapsed} += $elapsed;
 2067     $self->{stat_code_day}{$code}{$self->{last_day}}{hits}++;
 2068     $self->{stat_code_day}{$code}{$self->{last_day}}{bytes} += $bytes;
 2069     $self->{stat_code_day}{$code}{$self->{last_day}}{elapsed} += $elapsed;
 2070 
 2071     $self->{stat_throughput_hour}{$code}{$hour}{bytes} += $bytes;
 2072     $self->{stat_throughput_day}{$code}{$self->{last_day}}{bytes} += $bytes;
 2073     $self->{stat_throughput_hour}{$code}{$hour}{elapsed} += $elapsed;
 2074     $self->{stat_throughput_day}{$code}{$self->{last_day}}{elapsed} += $elapsed;
 2075 
 2076     #### Store url statistics
 2077     if ($self->{UrlReport}) {
 2078         $self->{stat_user_url_hour}{$id}{$dest}{duration} += $elapsed;
 2079         $self->{stat_user_url_hour}{$id}{$dest}{hits}++;
 2080         $self->{stat_user_url_hour}{$id}{$dest}{bytes} += $bytes;
 2081         $self->{stat_user_url_hour}{$id}{$dest}{firsthit} = $time if (!$self->{stat_user_url_hour}{$id}{$dest}{firsthit} || ($time < $self->{stat_user_url_hour}{$id}{$dest}{firsthit}));
 2082         $self->{stat_user_url_hour}{$id}{$dest}{lasthit} = $time if (!$self->{stat_user_url_hour}{$id}{$dest}{lasthit} || ($time > $self->{stat_user_url_hour}{$id}{$dest}{lasthit}));
 2083         if (!exists $self->{stat_user_url_hour}{$id}{$dest}{arr_last} || ($#{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}} < 9) || ($time > ($self->{stat_user_url_hour}{$id}{$dest}{arr_last}[-1]+300))) {
 2084             push(@{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}}, $time);
 2085             shift(@{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}}) if ($#{$self->{stat_user_url_hour}{$id}{$dest}{arr_last}} > 9);
 2086         }
 2087         $self->{stat_user_url_day}{$id}{$dest}{duration} += $elapsed;
 2088         $self->{stat_user_url_day}{$id}{$dest}{hits}++;
 2089         $self->{stat_user_url_day}{$id}{$dest}{firsthit} = $time if (!$self->{stat_user_url_day}{$id}{$dest}{firsthit} || ($time < $self->{stat_user_url_day}{$id}{$dest}{firsthit}));
 2090         $self->{stat_user_url_day}{$id}{$dest}{lasthit} = $time if (!$self->{stat_user_url_day}{$id}{$dest}{lasthit} || ($time > $self->{stat_user_url_day}{$id}{$dest}{lasthit}));
 2091         $self->{stat_user_url_day}{$id}{$dest}{bytes} += $bytes;
 2092         if ($code eq 'HIT') {
 2093             $self->{stat_user_url_day}{$id}{$dest}{cache_hit}++;
 2094             $self->{stat_user_url_day}{$id}{$dest}{cache_bytes} += $bytes;
 2095         }
 2096         if (!exists $self->{stat_user_url_day}{$id}{$dest}{arr_last} || ($#{$self->{stat_user_url_day}{$id}{$dest}{arr_last}} < 9) || ($time > ($self->{stat_user_url_day}{$id}{$dest}{arr_last}[-1]+1800))) {
 2097             push(@{$self->{stat_user_url_day}{$id}{$dest}{arr_last}}, $time);
 2098             shift(@{$self->{stat_user_url_day}{$id}{$dest}{arr_last}}) if ($#{$self->{stat_user_url_day}{$id}{$dest}{arr_last}} > 9);
 2099         }
 2100     }
 2101 
 2102     #### Store user per networks statistics
 2103     if ($self->{UserReport}) {
 2104         $self->{stat_netuser_hour}{$network}{$id}{duration} += $elapsed;
 2105         $self->{stat_netuser_hour}{$network}{$id}{bytes} += $bytes;
 2106         $self->{stat_netuser_hour}{$network}{$id}{hits}++;
 2107         if ($bytes > $self->{stat_netuser_hour}{$network}{$id}{largest_file_size}) {
 2108             $self->{stat_netuser_hour}{$network}{$id}{largest_file_size} = $bytes;
 2109             $self->{stat_netuser_hour}{$network}{$id}{largest_file_url} = $url;
 2110         }
 2111         $self->{stat_netuser_day}{$network}{$id}{duration} += $elapsed;
 2112         $self->{stat_netuser_day}{$network}{$id}{bytes} += $bytes;
 2113         $self->{stat_netuser_day}{$network}{$id}{hits}++;
 2114         if ($bytes > $self->{stat_netuser_day}{$network}{$id}{largest_file_size}) {
 2115             $self->{stat_netuser_day}{$network}{$id}{largest_file_size} = $bytes;
 2116             $self->{stat_netuser_day}{$network}{$id}{largest_file_url} = $url;
 2117         }
 2118     }
 2119 
 2120     #### Store mime type statistics
 2121     $self->{stat_mime_type_hour}{"$type"}{hits}++;
 2122     $self->{stat_mime_type_hour}{"$type"}{bytes} += $bytes;
 2123     $self->{stat_mime_type_day}{"$type"}{hits}++;
 2124     $self->{stat_mime_type_day}{"$type"}{bytes} += $bytes;
 2125 }
 2126 
 2127 sub _load_history
 2128 {
 2129     my ($self, $type, $year, $month, $day, $path, $kind, $wn, @wd) = @_;
 2130 
 2131     #### Load history
 2132     if ($type eq 'day') {
 2133         foreach my $d ("01" .. "31") {
 2134             $self->_read_stat($year, $month, $d, 'day', $kind);
 2135         }
 2136     } elsif ($type eq 'week') {
 2137         $path = "$year/week$wn";
 2138         foreach my $wdate (@wd) {
 2139             $wdate =~ /^(\d+)-(\d+)-(\d+)$/;
 2140             $self->_read_stat($1, $2, $3, 'day', $kind, $wn);
 2141         }
 2142         $type = 'day';
 2143     } elsif ($type eq 'month') {
 2144         foreach my $m ("01" .. "12") {
 2145             $self->_read_stat($year, $m, $day, 'month', $kind);
 2146         }
 2147     } else {
 2148         $self->_read_stat($year, $month, $day, '', $kind);
 2149     }
 2150 
 2151 }
 2152 
 2153 sub _append_stat
 2154 {
 2155     my ($self, $year, $month, $day) = @_;
 2156 
 2157     my $read_type = '';
 2158     my $type = 'hour';
 2159     if (!$day) {
 2160         $type = 'day';
 2161     }
 2162     if (!$month) {
 2163         $type = 'month';
 2164     }
 2165     $read_type = $type;
 2166 
 2167     my $path = join('/', $year, $month, $day);
 2168     $path =~ s/[\/]+$//;
 2169 
 2170     print STDERR "Appending data into $self->{Output}/$path\n" if (!$self->{QuietMode});
 2171 
 2172     #### Save cache statistics
 2173     my $dat_file_code = new IO::File;
 2174     $dat_file_code->open(">>$self->{Output}/$path/stat_code.dat")
 2175         or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_code.dat, $!\n");
 2176     flock($dat_file_code, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2177     $self->_write_stat_data($dat_file_code, $type, 'stat_code');
 2178     $dat_file_code->close();
 2179 
 2180     #### With huge log file we only store global statistics in year and month views
 2181     return if ( $self->{no_year_stat} && ($type ne 'hour') );
 2182 
 2183     #### Save url statistics per user
 2184     if ($self->{UrlReport}) {
 2185         my $dat_file_user_url = new IO::File;
 2186         $dat_file_user_url->open(">>$self->{Output}/$path/stat_user_url.dat")
 2187             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user_url.dat, $!\n");
 2188         flock($dat_file_user_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2189         $self->_write_stat_data($dat_file_user_url, $type, 'stat_user_url');
 2190         $dat_file_user_url->close();
 2191         # Denied URL
 2192         my $dat_file_denied_url = new IO::File;
 2193         $dat_file_denied_url->open(">>$self->{Output}/$path/stat_denied_url.dat")
 2194             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_denied_url.dat, $!\n");
 2195         flock($dat_file_denied_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2196         $self->_write_stat_data($dat_file_denied_url, $type, 'stat_denied_url');
 2197         $dat_file_denied_url->close();
 2198     }
 2199 
 2200     #### Save user statistics
 2201     if ($self->{UserReport}) {
 2202         my $dat_file_user = new IO::File;
 2203         $dat_file_user->open(">>$self->{Output}/$path/stat_user.dat")
 2204             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user.dat, $!\n");
 2205         flock($dat_file_user, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2206         $self->_write_stat_data($dat_file_user, $type, 'stat_user');
 2207         $dat_file_user->close();
 2208     }
 2209 
 2210     #### Save network statistics
 2211     my $dat_file_network = new IO::File;
 2212     $dat_file_network->open(">>$self->{Output}/$path/stat_network.dat")
 2213         or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_network.dat, $!\n");
 2214     flock($dat_file_network, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2215     $self->_write_stat_data($dat_file_network, $type, 'stat_network');
 2216     $dat_file_network->close();
 2217 
 2218     #### Save user per network statistics
 2219     if ($self->{UserReport}) {
 2220         my $dat_file_netuser = new IO::File;
 2221         $dat_file_netuser->open(">>$self->{Output}/$path/stat_netuser.dat")
 2222             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_netuser.dat, $!\n");
 2223         flock($dat_file_netuser, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2224         $self->_write_stat_data($dat_file_netuser, $type, 'stat_netuser');
 2225         $dat_file_netuser->close();
 2226     }
 2227 
 2228     #### Save mime statistics
 2229     my $dat_file_mime_type = new IO::File;
 2230     $dat_file_mime_type->open(">>$self->{Output}/$path/stat_mime_type.dat")
 2231         or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_mime_type.dat, $!\n");
 2232     flock($dat_file_mime_type, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2233     $self->_write_stat_data($dat_file_mime_type, $type, 'stat_mime_type');
 2234     $dat_file_mime_type->close();
 2235     
 2236 }
 2237 
 2238 sub _save_stat
 2239 {
 2240     my ($self, $year, $month, $day, $wn, @wd) = @_;
 2241 
 2242     my $path = join('/', $year, $month, $day);
 2243     $path =~ s/[\/]+$//;
 2244 
 2245     my $read_type = '';
 2246     my $type = 'hour';
 2247     if (!$day) {
 2248         $type = 'day';
 2249     }
 2250     if ($wn) {
 2251         $type = 'week';
 2252         $path = "$year/week$wn";
 2253     }
 2254     if (!$month) {
 2255         $type = 'month';
 2256     }
 2257     $read_type = $type;
 2258 
 2259     print STDERR "Saving data into $self->{Output}/$path\n" if (!$self->{QuietMode});
 2260 
 2261     #### Save cache statistics
 2262     my $dat_file_code = new IO::File;
 2263     $self->_load_history($read_type, $year, $month, $day, $path, 'stat_code', $wn, @wd);
 2264     $dat_file_code->open(">$self->{Output}/$path/stat_code.dat")
 2265         or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_code.dat, $!\n");
 2266     flock($dat_file_code, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2267     $self->_write_stat_data($dat_file_code, $type, 'stat_code');
 2268     $dat_file_code->close();
 2269 
 2270     #### With huge log file we only store global statistics in year and month views
 2271     return if ( $self->{no_year_stat} && (($type ne 'hour') && !$wn) );
 2272 
 2273     #### Save url statistics per user
 2274     if ($self->{UrlReport}) {
 2275         my $dat_file_user_url = new IO::File;
 2276         $self->_load_history($read_type, $year, $month, $day, $path, 'stat_user_url', $wn, @wd);
 2277         $dat_file_user_url->open(">$self->{Output}/$path/stat_user_url.dat")
 2278             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user_url.dat, $!\n");
 2279         flock($dat_file_user_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2280         $self->_write_stat_data($dat_file_user_url, $type, 'stat_user_url');
 2281         $dat_file_user_url->close();
 2282         # Denied URL
 2283         my $dat_file_denied_url = new IO::File;
 2284         $self->_load_history($read_type, $year, $month, $day, $path, 'stat_denied_url', $wn, @wd);
 2285         $dat_file_denied_url->open(">$self->{Output}/$path/stat_denied_url.dat")
 2286             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_denied_url.dat, $!\n");
 2287         flock($dat_file_denied_url, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2288         $self->_write_stat_data($dat_file_denied_url, $type, 'stat_denied_url');
 2289         $dat_file_denied_url->close();
 2290     }
 2291 
 2292     #### Save user statistics
 2293     if ($self->{UserReport}) {
 2294         my $dat_file_user = new IO::File;
 2295         $self->_load_history($read_type, $year, $month, $day, $path, 'stat_user', $wn, @wd);
 2296         $dat_file_user->open(">$self->{Output}/$path/stat_user.dat")
 2297             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_user.dat, $!\n");
 2298         flock($dat_file_user, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2299         $self->_write_stat_data($dat_file_user, $type, 'stat_user');
 2300         $dat_file_user->close();
 2301     }
 2302 
 2303     #### Save network statistics
 2304     my $dat_file_network = new IO::File;
 2305     $self->_load_history($read_type, $year, $month, $day, $path, 'stat_network', $wn, @wd);
 2306     $dat_file_network->open(">$self->{Output}/$path/stat_network.dat")
 2307         or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_network.dat, $!\n");
 2308     flock($dat_file_network, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2309     $self->_write_stat_data($dat_file_network, $type, 'stat_network');
 2310     $dat_file_network->close();
 2311 
 2312     #### Save user per network statistics
 2313     if ($self->{UserReport}) {
 2314         my $dat_file_netuser = new IO::File;
 2315         $self->_load_history($read_type, $year, $month, $day, $path, 'stat_netuser', $wn, @wd);
 2316         $dat_file_netuser->open(">$self->{Output}/$path/stat_netuser.dat")
 2317             or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_netuser.dat, $!\n");
 2318         flock($dat_file_netuser, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2319         $self->_write_stat_data($dat_file_netuser, $type, 'stat_netuser');
 2320         $dat_file_netuser->close();
 2321     }
 2322 
 2323     #### Save mime statistics
 2324     my $dat_file_mime_type = new IO::File;
 2325     $self->_load_history($read_type, $year, $month, $day, $path, 'stat_mime_type', $wn, @wd);
 2326     $dat_file_mime_type->open(">$self->{Output}/$path/stat_mime_type.dat")
 2327         or $self->localdie("ERROR: Can't write to file $self->{Output}/$path/stat_mime_type.dat, $!\n");
 2328     flock($dat_file_mime_type, 2) || die "FATAL: can't acquire lock on file, $!\n";
 2329     $self->_write_stat_data($dat_file_mime_type, $type, 'stat_mime_type');
 2330     $dat_file_mime_type->close();
 2331 
 2332 }
 2333 
 2334 sub _write_stat_data
 2335 {
 2336     my ($self, $fh, $type, $kind) = @_;
 2337 
 2338     $type = 'day' if ($type eq 'week');
 2339 
 2340     #### Save cache statistics
 2341     if ($kind eq 'stat_code') {
 2342         foreach my $code (sort {$a cmp $b} keys %{$self->{"stat_code_$type"}}) {
 2343             $fh->print("$code " .  "hits_$type=");
 2344             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_code_$type"}{$code}}) {
 2345                 $fh->print("$tmp:" . $self->{"stat_code_$type"}{$code}{$tmp}{hits} . ",");
 2346             }
 2347             $fh->print(";bytes_$type=");
 2348             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_code_$type"}{$code}}) {
 2349                 $fh->print("$tmp:" . $self->{"stat_code_$type"}{$code}{$tmp}{bytes} . ",");
 2350             }
 2351             $fh->print(";thp_bytes_$type=");
 2352             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_throughput_$type"}{$code}}) {
 2353                 $fh->print("$tmp:" . $self->{"stat_throughput_$type"}{$code}{$tmp}{bytes} . ",");
 2354             }
 2355             $fh->print(";thp_duration_$type=");
 2356             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_throughput_$type"}{$code}}) {
 2357                 $fh->print("$tmp:" . $self->{"stat_throughput_$type"}{$code}{$tmp}{elapsed} . ",");
 2358             }
 2359             $fh->print("\n");
 2360         }
 2361         $self->{"stat_code_$type"} = ();
 2362     }
 2363 
 2364     #### Save denied url statistics per user
 2365     if ($kind eq 'stat_denied_url') {
 2366         foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_denied_url_$type"}}) {
 2367             foreach my $dest (keys %{$self->{"stat_denied_url_$type"}{$id}}) {
 2368                 next if (!$dest);
 2369                 my $u = $id;
 2370                 $u = '-' if (!$self->{UserReport});
 2371                 my $bl = '';
 2372                 if (exists $self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}) {
 2373                     foreach my $b (keys %{$self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}}) {
 2374                         $bl .= $b . ',' . $self->{"stat_denied_url_$type"}{$id}{$dest}{blacklist}{$b} . ',';
 2375                     }
 2376                     $bl =~ s/,$//;
 2377                 }
 2378                 $fh->print(
 2379                     "$id hits=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{hits} . ";" .
 2380                     "first=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{firsthit} . ";" .
 2381                     "last=" . $self->{"stat_denied_url_$type"}{$id}{$dest}{lasthit} . ";" .
 2382                     "url=$dest" . ";" .
 2383                     "blacklist=" . $bl .
 2384                     "\n");
 2385             }
 2386         }
 2387         $self->{"stat_denied_url_$type"} = ();
 2388     }
 2389 
 2390     #### Save url statistics per user
 2391     if ($kind eq 'stat_user_url') {
 2392         foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_user_url_$type"}}) {
 2393             my $i = 0;
 2394             foreach my $dest (sort  {
 2395                 $self->{"stat_user_url_$type"}{$id}{$b}{$self->{OrderUrl}} <=>  $self->{"stat_user_url_$type"}{$id}{$a}{$self->{OrderUrl}}
 2396                         } keys %{$self->{"stat_user_url_$type"}{$id}}) {
 2397                 last if ($self->{TopStorage} && ($i > $self->{TopStorage}));
 2398                 my $u = $id;
 2399                 $u = '-' if (!$self->{UserReport});
 2400                 $i++;
 2401                 $fh->print(
 2402                     "$id hits=" . $self->{"stat_user_url_$type"}{$id}{$dest}{hits} . ";" .
 2403                     "bytes=" . $self->{"stat_user_url_$type"}{$id}{$dest}{bytes} . ";" .
 2404                     "duration=" . $self->{"stat_user_url_$type"}{$id}{$dest}{duration} . ";" .
 2405                     "first=" . $self->{"stat_user_url_$type"}{$id}{$dest}{firsthit} . ";" .
 2406                     "last=" . $self->{"stat_user_url_$type"}{$id}{$dest}{lasthit} . ";" .
 2407                     "url=$dest;" .
 2408                     "cache_hit=" . ($self->{"stat_user_url_$type"}{$id}{$dest}{cache_hit}||0) . ";" .
 2409                     "cache_bytes=" . ($self->{"stat_user_url_$type"}{$id}{$dest}{cache_bytes}||0) . ";" .
 2410                     "arr_last=" . join(',', @{$self->{"stat_user_url_$type"}{$id}{$dest}{arr_last}}) . "\n");
 2411             }
 2412         }
 2413         $self->{"stat_user_url_$type"} = ();
 2414     }
 2415 
 2416     #### Save user statistics
 2417     if ($kind eq 'stat_user') {
 2418         foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_user_$type"}}) {
 2419             my $name = $id;
 2420             $name =~ s/\s+//g;
 2421             $fh->print("$name hits_$type=");
 2422             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) {
 2423                 $fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{hits} . ",");
 2424             }
 2425             $fh->print(";bytes_$type=");
 2426             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) {
 2427                 $fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{bytes} . ",");
 2428             }
 2429             $fh->print(";duration_$type=");
 2430             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_user_$type"}{$id}}) {
 2431                 $fh->print("$tmp:" . $self->{"stat_user_$type"}{$id}{$tmp}{duration} . ",");
 2432             }
 2433             $fh->print(";largest_file_size=" . $self->{"stat_usermax_$type"}{$id}{largest_file_size});
 2434             $fh->print(";largest_file_url=" . $self->{"stat_usermax_$type"}{$id}{largest_file_url} . "\n");
 2435         }
 2436         $self->{"stat_user_$type"} = ();
 2437         $self->{"stat_usermax_$type"} = ();
 2438     }
 2439 
 2440     #### Save network statistics
 2441     if ($kind eq 'stat_network') {
 2442         foreach my $net (sort {$a cmp $b} keys %{$self->{"stat_network_$type"}}) {
 2443             $fh->print("$net\thits_$type=");
 2444             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) {
 2445                 $fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{hits} . ",");
 2446             }
 2447             $fh->print(";bytes_$type=");
 2448             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) {
 2449                 $fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{bytes} . ",");
 2450             }
 2451             $fh->print(";duration_$type=");
 2452             foreach my $tmp (sort {$a <=> $b} keys %{$self->{"stat_network_$type"}{$net}}) {
 2453                 $fh->print("$tmp:" . $self->{"stat_network_$type"}{$net}{$tmp}{duration} . ",");
 2454             }
 2455             $fh->print(";largest_file_size=" . $self->{"stat_netmax_$type"}{$net}{largest_file_size});
 2456             $fh->print(";largest_file_url=" . $self->{"stat_netmax_$type"}{$net}{largest_file_url} . "\n");
 2457         }
 2458         $self->{"stat_network_$type"} = ();
 2459         $self->{"stat_netmax_$type"} = ();
 2460     }
 2461 
 2462     #### Save user per network statistics
 2463     if ($kind eq 'stat_netuser') {
 2464         foreach my $net (sort {$a cmp $b} keys %{$self->{"stat_netuser_$type"}}) {
 2465             foreach my $id (sort {$a cmp $b} keys %{$self->{"stat_netuser_$type"}{$net}}) {
 2466                 $fh->print("$net\t$id\thits=" . $self->{"stat_netuser_$type"}{$net}{$id}{hits} . ";" .
 2467                     "bytes=" . $self->{"stat_netuser_$type"}{$net}{$id}{bytes} . ";" .
 2468                     "duration=" . $self->{"stat_netuser_$type"}{$net}{$id}{duration} . ";");
 2469                 $fh->print("largest_file_size=" .
 2470                     $self->{"stat_netuser_$type"}{$net}{$id}{largest_file_size} . ";" .
 2471                     "largest_file_url=" . $self->{"stat_netuser_$type"}{$net}{$id}{largest_file_url} . "\n");
 2472             }
 2473         }
 2474         $self->{"stat_netuser_$type"} = ();
 2475     }
 2476 
 2477     #### Save mime statistics
 2478     if ($kind eq 'stat_mime_type') {
 2479         foreach my $mime (sort {$a cmp $b} keys %{$self->{"stat_mime_type_$type"}}) {
 2480             $fh->print("$mime hits=" . $self->{"stat_mime_type_$type"}{$mime}{hits} . ";" .
 2481                 "bytes=" . $self->{"stat_mime_type_$type"}{$mime}{bytes} .  "\n");
 2482         }
 2483         $self->{"stat_mime_type_$type"} = ();
 2484     }
 2485     
 2486 }
 2487 
 2488 sub _read_stat
 2489 {
 2490     my ($self, $year, $month, $day, $sum_type, $kind, $wn) = @_;
 2491 
 2492     my $type = 'hour';
 2493     if (!$day) {
 2494         $type = 'day';
 2495     }
 2496     if (!$month) {
 2497         $type = 'month';
 2498     }
 2499 
 2500     my $path = join('/',  $year, $month, $day);
 2501     $path =~ s/[\/]+$//;
 2502 
 2503     return if (! -d "$self->{Output}/$path");
 2504 
 2505     my $k = '';
 2506     my $key = '';
 2507     $key = $day if ($sum_type eq 'day');
 2508     $key = $month if ($sum_type eq 'month');
 2509     $sum_type ||= $type;
 2510 
 2511     #### Read previous cache statistics
 2512     if (!$kind || ($kind eq 'stat_code')) {
 2513         my $dat_file_code = new IO::File;
 2514         if ($dat_file_code->open("$self->{Output}/$path/stat_code.dat")) {
 2515             my $i = 1;
 2516             my $error = 0;
 2517             while (my $l = <$dat_file_code>) {
 2518                 chomp($l);
 2519                 if ($l =~ s/^([^\s]+)\s+hits_$type=([^;]+);bytes_$type=([^;]+)//) {
 2520                     my $code = $1;
 2521                     my $hits = $2 || '';
 2522                     my $bytes = $3 || '';
 2523                     $hits =~ s/,$//;
 2524                     $bytes =~ s/,$//;
 2525                     my %hits_tmp = split(/[:,]/, $hits);
 2526                     foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
 2527                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2528                         $self->{"stat_code_$sum_type"}{$code}{$k}{hits} += $hits_tmp{$tmp};
 2529                     }
 2530                     my %bytes_tmp = split(/[:,]/, $bytes);
 2531                     foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
 2532                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2533                         $self->{"stat_code_$sum_type"}{$code}{$k}{bytes} += $bytes_tmp{$tmp};
 2534                     }
 2535                     if ($l =~ s/thp_bytes_$type=([^;]+);thp_duration_$type=([^;]+)//) {
 2536                         $bytes = $1 || '';
 2537                         my $elapsed = $2 || '';
 2538                         $elapsed =~ s/,$//;
 2539                         my %bytes_tmp = split(/[:,]/, $bytes);
 2540                         foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
 2541                             if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2542                             $self->{"stat_throughput_$sum_type"}{$code}{$k}{bytes} += $bytes_tmp{$tmp};
 2543                         }
 2544                         my %elapsed_tmp = split(/[:,]/, $elapsed);
 2545                         foreach my $tmp (sort {$a <=> $b} keys %elapsed_tmp) {
 2546                             if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2547                             $self->{"stat_throughput_$sum_type"}{$code}{$k}{elapsed} += $elapsed_tmp{$tmp};
 2548                         }
 2549                     }
 2550                 } else {
 2551                     print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_code.dat\n";
 2552                     print STDERR "$l\n";
 2553                     if ($error > $self->{MaxFormatError}) {
 2554                         unlink($self->{pidfile});
 2555                         exit 0;
 2556                     }
 2557                     $error++;
 2558                 }
 2559                 $i++;
 2560             }
 2561             $dat_file_code->close();
 2562         }
 2563     }
 2564 
 2565     #### With huge log file we only store global statistics in year and month views
 2566     return if ($self->{no_year_stat} && ($type ne 'hour'));
 2567 
 2568     #### Read previous client statistics
 2569     if (!$kind || ($kind eq 'stat_user')) {
 2570         my $dat_file_user = new IO::File;
 2571         if ($dat_file_user->open("$self->{Output}/$path/stat_user.dat")) {
 2572             my $i = 1;
 2573             my $error = 0;
 2574             while (my $l = <$dat_file_user>) {
 2575                 chomp($l);
 2576                 if ($l =~ s/^([^\s]+)\s+hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) {
 2577                     my $id = $1;
 2578                     my $hits = $2 || '';
 2579                     my $bytes = $3 || '';
 2580                     my $duration = $4 || '';
 2581                     my $lsize = $5 || 0;
 2582                     my $lurl = $6 || 0;
 2583 
 2584                     if ($self->{rebuild}) {
 2585                         next if (!$self->check_inclusions($id));
 2586                         next if ($self->check_exclusions($id));
 2587                     }
 2588 
 2589                     # Anonymize all users
 2590                     if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
 2591                         if (!exists $self->{AnonymizedId}{$id}) {
 2592                             $self->{AnonymizedId}{$id} = &anonymize_id();
 2593                         }
 2594                         $id = $self->{AnonymizedId}{$id};
 2595                     }
 2596 
 2597                     if ($lsize > $self->{"stat_usermax_$sum_type"}{$id}{largest_file_size}) {
 2598                         $self->{"stat_usermax_$sum_type"}{$id}{largest_file_size} = $lsize;
 2599                         $self->{"stat_usermax_$sum_type"}{$id}{largest_file_url} = $lurl;
 2600                     }
 2601                     $hits =~ s/,$//;
 2602                     $bytes =~ s/,$//;
 2603                     $duration =~ s/,$//;
 2604                     my %hits_tmp = split(/[:,]/, $hits);
 2605                     foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
 2606                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2607                         $self->{"stat_user_$sum_type"}{$id}{$k}{hits} += $hits_tmp{$tmp};
 2608                     }
 2609                     my %bytes_tmp = split(/[:,]/, $bytes);
 2610                     foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
 2611                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2612                         $self->{"stat_user_$sum_type"}{$id}{$k}{bytes} += $bytes_tmp{$tmp};
 2613                     }
 2614                     my %duration_tmp = split(/[:,]/, $duration);
 2615                     foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) {
 2616                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2617                         $self->{"stat_user_$sum_type"}{$id}{$k}{duration} += $duration_tmp{$tmp};
 2618                     }
 2619                 } else {
 2620                     print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_user.dat:\n";
 2621                     print STDERR "$l\n";
 2622                     if ($error > $self->{MaxFormatError}) {
 2623                         unlink($self->{pidfile});
 2624                         exit 0;
 2625                     }
 2626                     $error++;
 2627                 }
 2628                 $i++;
 2629             }
 2630             $dat_file_user->close();
 2631         }
 2632     }
 2633 
 2634     #### Read previous url statistics
 2635     if ($self->{UrlReport}) {
 2636 
 2637         if (!$kind || ($kind eq 'stat_user_url')) {
 2638             my $dat_file_user_url = new IO::File;
 2639             if ($dat_file_user_url->open("$self->{Output}/$path/stat_user_url.dat")) {
 2640                 my $i = 1;
 2641                 my $error = 0;
 2642                 while (my $l = <$dat_file_user_url>) {
 2643                     chomp($l);
 2644                     my $id = '';
 2645                     if ($l =~ /^([^\s]+)\s+hits=/) {
 2646                         $id = $1;
 2647                     }
 2648                     $id = '-' if (!$self->{UserReport});
 2649 
 2650                     if ($self->{rebuild}) {
 2651                         next if (!$self->check_inclusions($id));
 2652                         next if ($self->check_exclusions($id));
 2653                     }
 2654 
 2655                     # Anonymize all users
 2656                     if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
 2657                         if (!exists $self->{AnonymizedId}{$id}) {
 2658                             $self->{AnonymizedId}{$id} = &anonymize_id();
 2659                         }
 2660                         $id = $self->{AnonymizedId}{$id};
 2661                     }
 2662 
 2663                     if ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*?);cache_hit=(\d*);cache_bytes=(\d*)//) {
 2664                         my $url = $7;
 2665                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2;
 2666                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3;
 2667                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4);
 2668                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} = $5 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} || ($5 < $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit}));
 2669                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} = $6 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} || ($6 > $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit}));
 2670                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{cache_hit} += $8;
 2671                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{cache_bytes} += $9;
 2672                         if ($self->{rebuild}) {
 2673                             if ($self->check_exclusions('', '', $url)) {
 2674                                 delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"};
 2675                                 next;
 2676                             }
 2677                         }
 2678                         if ($l =~ s/^;arr_last=(.*)//) {
 2679                             my $incr = 1800;
 2680                             $incr = 300 if ($sum_type eq 'hour');
 2681                             $incr = 86400 if ($sum_type eq 'month');
 2682                             foreach my $tm (split(/,/, $1)) {
 2683                                 if (!exists $self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last} || ($#{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}} < 9) || ($tm > ${$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}[-1] + $incr)) { 
 2684                                     push(@{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}, $tm);
 2685                                     shift(@{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}}) if ($#{$self->{"stat_user_url_$sum_type"}{$id}{$url}{arr_last}} > 9);
 2686                                 }
 2687                             }
 2688                         }
 2689                     } elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);first=([^;]*);last=([^;]*);url=(.*)$//) {
 2690                         my $url = $7;
 2691                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2;
 2692                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3;
 2693                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4);
 2694                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} = $5 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit} || ($5 < $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{firsthit}));
 2695                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} = $6 if (!$self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit} || ($6 > $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{lasthit}));
 2696                         if ($self->{rebuild}) {
 2697                             if ($self->check_exclusions('', '', $url)) {
 2698                                 delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"};
 2699                                 next;
 2700                             }
 2701                         }
 2702                     } elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);bytes=(\d+);duration=([\-\d]+);url=(.*)$//) {
 2703                         my $url = $5;
 2704                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{hits} += $2;
 2705                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{bytes} += $3;
 2706                         $self->{"stat_user_url_$sum_type"}{$id}{"$url"}{duration} += abs($4);
 2707                         if ($self->{rebuild}) {
 2708                             if ($self->check_exclusions('', '', $url)) {
 2709                                 delete $self->{"stat_user_url_$sum_type"}{$id}{"$url"};
 2710                                 next;
 2711                             }
 2712                         }
 2713                     } else {
 2714                         print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_user_url.dat\n";
 2715                         print STDERR "$l\n";
 2716                         if ($error > $self->{MaxFormatError}) {
 2717                             unlink($self->{pidfile});
 2718                             exit 0;
 2719                         }
 2720                         $error++;
 2721                     }
 2722                     $i++;
 2723                 }
 2724                 $dat_file_user_url->close();
 2725             }
 2726         }
 2727 
 2728         if (!$kind || ($kind eq 'stat_denied_url')) {
 2729 
 2730             my $dat_file_denied_url = new IO::File;
 2731             if ($dat_file_denied_url->open("$self->{Output}/$path/stat_denied_url.dat")) {
 2732                 my $i = 1;
 2733                 my $error = 0;
 2734                 while (my $l = <$dat_file_denied_url>) {
 2735                     chomp($l);
 2736                     my $id = '';
 2737                     if ($l =~ /^([^\s]+)\s+hits=/) {
 2738                         $id = $1;
 2739                     }
 2740                     $id = '-' if (!$self->{UserReport});
 2741 
 2742                     if ($self->{rebuild}) {
 2743                         next if (!$self->check_inclusions($id));
 2744                         next if ($self->check_exclusions($id));
 2745                     }
 2746 
 2747                     # Anonymize all denieds
 2748                     if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
 2749                         if (!exists $self->{AnonymizedId}{$id}) {
 2750                             $self->{AnonymizedId}{$id} = &anonymize_id();
 2751                         }
 2752                         $id = $self->{AnonymizedId}{$id};
 2753                     }
 2754 
 2755                     if ($l =~ s/^([^\s]+)\s+hits=(\d+);first=([^;]*);last=([^;]*);url=(.*);blacklist=(.*)//) {
 2756                         if ($self->{rebuild}) {
 2757                             next if ($self->check_exclusions('', '', $5));
 2758                         }
 2759                         $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{hits} += $2;
 2760                         $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} = $3 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} || ($3 < $self->{"stat_denied_url_$sum_type"}{$id}{"$7"}{firsthit}));
 2761                         $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} = $4 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} || ($4 > $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit}));
 2762                         if ($6) {
 2763                             my %tmp = split(/,/, $6);
 2764                             foreach my $k (keys %tmp) {
 2765                                 $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{blacklist}{$k} += $tmp{$k};
 2766                             }
 2767                         }
 2768                     } elsif ($l =~ s/^([^\s]+)\s+hits=(\d+);first=([^;]*);last=([^;]*);url=(.*)//) {
 2769                         if ($self->{rebuild}) {
 2770                             next if ($self->check_exclusions('', '', $5));
 2771                         }
 2772                         $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{hits} += $2;
 2773                         $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} = $3 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{firsthit} || ($3 < $self->{"stat_denied_url_$sum_type"}{$id}{"$7"}{firsthit}));
 2774                         $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} = $4 if (!$self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit} || ($4 > $self->{"stat_denied_url_$sum_type"}{$id}{"$5"}{lasthit}));
 2775                     } elsif ($l =~ /^([^\s]+)\s+hits=;first=;last=;url=/) {
 2776                         # do nothing, this should not appears, but fixes issue #81
 2777                     } else {
 2778                         print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_denied_url.dat\n";
 2779                         print STDERR "$l\n";
 2780                         if ($error > $self->{MaxFormatError}) {
 2781                             unlink($self->{pidfile});
 2782                             exit 0;
 2783                         }
 2784                         $error++;
 2785                     }
 2786                     $i++;
 2787                 }
 2788                 $dat_file_denied_url->close();
 2789             }
 2790         }
 2791 
 2792     }
 2793 
 2794     #### Read previous network statistics
 2795     if (!$kind || ($kind eq 'stat_network')) {
 2796         my $dat_file_network = new IO::File;
 2797         if ($dat_file_network->open("$self->{Output}/$path/stat_network.dat")) {
 2798             my $i = 1;
 2799             my $error = 0;
 2800             while (my $l = <$dat_file_network>) {
 2801                 chomp($l);
 2802                 my ($net, $data) = split(/\t/, $l);
 2803                 if (!$data) {
 2804                     # Assume backward compatibility
 2805                     $l =~ s/^(.*)\shits_$type=/hits_$type=/;
 2806                     $net = $1;
 2807                     $data = $l;
 2808                 }
 2809 
 2810                 if ($self->{rebuild} && !exists $self->{NetworkAlias}->{$net}) {
 2811                     next if (!$self->check_inclusions('', $net));
 2812                     next if ($self->check_exclusions('', $net));
 2813 
 2814                 }
 2815 
 2816                 if ($self->{UpdateAlias}) {
 2817                     # Replace network by his aliases if any
 2818                     $net = (!$self->{has_nework_alias}) ? $net : $self->apply_network_alias($net)
 2819                 }
 2820 
 2821                 if ($data =~ s/^hits_$type=([^;]+);bytes_$type=([^;]+);duration_$type=([^;]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) {
 2822                     my $hits = $1 || '';
 2823                     my $bytes = $2 || '';
 2824                     my $duration = $3 || '';
 2825 
 2826                     if ($4 > $self->{"stat_netmax_$sum_type"}{$net}{largest_file_size}) {
 2827                         $self->{"stat_netmax_$sum_type"}{$net}{largest_file_size} = $4;
 2828                         $self->{"stat_netmax_$sum_type"}{$net}{largest_file_url} = $5;
 2829                     }
 2830                     $hits =~ s/,$//;
 2831                     $bytes =~ s/,$//;
 2832                     $duration =~ s/,$//;
 2833                     my %hits_tmp = split(/[:,]/, $hits);
 2834                     foreach my $tmp (sort {$a <=> $b} keys %hits_tmp) {
 2835                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2836                         $self->{"stat_network_$sum_type"}{$net}{$k}{hits} += $hits_tmp{$tmp};
 2837                     }
 2838                     my %bytes_tmp = split(/[:,]/, $bytes);
 2839                     foreach my $tmp (sort {$a <=> $b} keys %bytes_tmp) {
 2840                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2841                         $self->{"stat_network_$sum_type"}{$net}{$k}{bytes} += $bytes_tmp{$tmp};
 2842                     }
 2843                     my %duration_tmp = split(/[:,]/, $duration);
 2844                     foreach my $tmp (sort {$a <=> $b} keys %duration_tmp) {
 2845                         if ($key ne '') { $k = $key; } else { $k = $tmp; }
 2846                         $self->{"stat_network_$sum_type"}{$net}{$k}{duration} += $duration_tmp{$tmp};
 2847                     }
 2848                 } else {
 2849                     print STDERR "ERROR: bad format at line $i into $self->{Output}/$path/stat_network.dat\n";
 2850                     print STDERR "$l\n";
 2851                     if ($error > $self->{MaxFormatError}) {
 2852                         unlink($self->{pidfile});
 2853                         exit 0;
 2854                     }
 2855                     $error++;
 2856                 }
 2857                 $i++;
 2858             }
 2859             $dat_file_network->close();
 2860         }
 2861     }
 2862 
 2863     #### Read previous user per network statistics
 2864     if ($self->{UserReport}) {
 2865         if (!$kind || ($kind eq 'stat_netuser')) {
 2866             my $dat_file_netuser = new IO::File;
 2867             if ($dat_file_netuser->open("$self->{Output}/$path/stat_netuser.dat")) {
 2868                 my $i = 1;
 2869                 my $error = 0;
 2870                 while (my $l = <$dat_file_netuser>) {
 2871                     chomp($l);
 2872                     my ($net, $id, $data) = split(/\t/, $l);
 2873                     if (!$data) {
 2874                         # Assume backward compatibility
 2875                         $l =~ s/^(.*)\s([^\s]+)\shits=/hits=/;
 2876                         $net = $1;
 2877                         $id = $2;
 2878                         $data = $l;
 2879                     }
 2880 
 2881                     if ($self->{rebuild}) {
 2882                         next if (!$self->check_inclusions($id, $net));
 2883                         next if ($self->check_exclusions($id, $net));
 2884                     }
 2885 
 2886                     # Replace network by his aliases if any
 2887                     $net = (!$self->{has_nework_alias}) ? $net : $self->apply_network_alias($net);
 2888 
 2889                     # Anonymize all users
 2890                     if ($self->{AnonymizeLogin} && ($id !~ /^Anon[a-zA-Z0-9]{16}$/)) {
 2891                         if (!exists $self->{AnonymizedId}{$id}) {
 2892                             $self->{AnonymizedId}{$id} = &anonymize_id();
 2893                         }
 2894                         $id = $self->{AnonymizedId}{$id};
 2895                     }
 2896 
 2897                     if ($data =~ s/^hits=(\d+);bytes=(\d+);duration=([\-\d]+);largest_file_size=([^;]*);largest_file_url=(.*)$//) {
 2898                         $self->{"stat_netuser_$sum_type"}{$net}{$id}{hits} += $1;
 2899                         $self->{"stat_netuser_$sum_type"}{$net}{$id}{bytes} += $2;
 2900                         $self->{"stat_netuser_$sum_type"}{$net}{$id}{duration} += abs($3);
 2901                         if ($4 > $self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_size}) {
 2902                             $self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_size} = $4;
 2903                             $self->{"stat_netuser_$sum_type"}{$net}{$id}{largest_file_url} = $5;
 2904