"Fossies" - the Fresh Open Source Software Archive

Member "sarg-2.4.0/readlog.c" (24 Dec 2019, 28902 Bytes) of package /linux/privat/sarg-2.4.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "readlog.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * SARG Squid Analysis Report Generator      http://sarg.sourceforge.net
    3  *                                                            1998, 2015
    4  *
    5  * SARG donations:
    6  *      please look at http://sarg.sourceforge.net/donations.php
    7  * Support:
    8  *     http://sourceforge.net/projects/sarg/forums/forum/363374
    9  * ---------------------------------------------------------------------
   10  *
   11  *  This program is free software; you can redistribute it and/or modify
   12  *  it under the terms of the GNU General Public License as published by
   13  *  the Free Software Foundation; either version 2 of the License, or
   14  *  (at your option) any later version.
   15  *
   16  *  This program is distributed in the hope that it will be useful,
   17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19  *  GNU General Public License for more details.
   20  *
   21  *  You should have received a copy of the GNU General Public License
   22  *  along with this program; if not, write to the Free Software
   23  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
   24  *
   25  */
   26 
   27 #include "include/conf.h"
   28 #include "include/defs.h"
   29 #include "include/readlog.h"
   30 #include "include/filelist.h"
   31 
   32 #define REPORT_EVERY_X_LINES 5000
   33 #define MAX_OPEN_USER_FILES 10
   34 
   35 struct userfilestruct
   36 {
   37     struct userfilestruct *next;
   38     struct userinfostruct *user;
   39     FILE *file;
   40 };
   41 
   42 enum ExcludeReasonEnum
   43 {
   44     //! User name too long.
   45     ER_UserNameTooLong,
   46     //! Squid logged an incomplete query received from the client.
   47     ER_IncompleteQuery,
   48     //! Log file turned over.
   49     ER_LogfileTurnedOver,
   50     //! Excluded by exclude_string from sarg.conf.
   51     ER_ExcludeString,
   52     //! Unknown input log file format.
   53     ER_UnknownFormat,
   54     //! Line to be ignored from the input log file.
   55     ER_FormatData,
   56     //! Entry not withing the requested date range. 
   57     ER_OutOfDateRange,
   58     //! Ignored week day.
   59     ER_OutOfWDayRange,
   60     //! Ignored hour.
   61     ER_OutOfHourRange,
   62     //! User is not in the include_users list.
   63     ER_User,
   64     //! HTTP code excluded by exclude_code file.
   65     ER_HttpCode,
   66     //! Invalid character found in user name.
   67     ER_InvalidUserChar,
   68     //! No URL in entry.
   69     ER_NoUrl,
   70     //! Not the IP address requested with -a.
   71     ER_UntrackedIpAddr,
   72     //! URL excluded by -c or exclude_hosts.
   73     ER_Url,
   74     //! Entry time outside of requested hour range.
   75     ER_OutOfTimeRange,
   76     //! Not the URL requested by -s.
   77     ER_UntrackedUrl,
   78     //! No user in entry.
   79     ER_NoUser,
   80     //! Not the user requested by -u.
   81     ER_UntrackedUser,
   82     //! System user.
   83     ER_SysUser,
   84     //! User ignored by exclude_users 
   85     ER_IgnoredUser,
   86 
   87     ER_Last //!< last entry of the list
   88 };
   89 
   90 int weekdays[7] = { 1, 2, 3, 4, 5, 6, 7};
   91 int hours[24] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
   92 //! Domain suffix to strip from the user name.
   93 char StripUserSuffix[MAX_USER_LEN]="";
   94 //! Length of the suffix to strip from the user name.
   95 int StripSuffixLen=0;
   96 
   97 extern FileListObject AccessLog;
   98 
   99 extern const struct ReadLogProcessStruct ReadSquidLog;
  100 extern const struct ReadLogProcessStruct ReadCommonLog;
  101 extern const struct ReadLogProcessStruct ReadSargLog;
  102 extern const struct ReadLogProcessStruct ReadExtLog;
  103 
  104 //! The list of the supported log formats.
  105 static const struct ReadLogProcessStruct * const LogFormats[]=
  106 {
  107     &ReadSquidLog,
  108     &ReadCommonLog,
  109     &ReadSargLog,
  110     &ReadExtLog
  111 };
  112 
  113 //! The path to the sarg log file.
  114 static char SargLogFile[4096]="";
  115 //! Handle to the sarg log file. NULL if not created.
  116 static FILE *fp_log=NULL;
  117 //! The number of records read from the input logs.
  118 static long int totregsl=0;
  119 //! The number of records kept.
  120 static long int totregsg=0;
  121 //! The number of records excluded.
  122 static long int totregsx=0;
  123 //! The beginning of a linked list of user's file.
  124 static struct userfilestruct *first_user_file=NULL;
  125 //! Count the number of occurence of each input log format.
  126 static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)];
  127 //! The minimum date found in the input logs.
  128 static int mindate=0;
  129 static int maxdate=0;
  130 //! Count the number of excluded records.
  131 static unsigned long int excluded_count[ER_Last];
  132 //! Earliest date found in the log.
  133 static int EarliestDate=-1;
  134 //! The earliest date in time format.
  135 static struct tm EarliestDateTime;
  136 //! Latest date found in the log.
  137 static int LatestDate=-1;
  138 //! The latest date in time format.
  139 static struct tm LatestDateTime;
  140 
  141 /*!
  142  * Read from standard input.
  143  *
  144  * \param Data The file object.
  145  * \param Buffer The boffer to store the data read.
  146  * \param Size How many bytes to read.
  147  *
  148  * \return The number of bytes read.
  149  */
  150 static int Stdin_Read(void *Data,void *Buffer,int Size)
  151 {
  152     return(fread(Buffer,1,Size,(FILE *)Data));
  153 }
  154 
  155 /*!
  156  * Check if end of file is reached.
  157  *
  158  * \param Data The file object.
  159  *
  160  * \return \c True if end of file is reached.
  161  */
  162 static int Stdin_Eof(void *Data)
  163 {
  164     return(feof((FILE *)Data));
  165 }
  166 
  167 /*!
  168  * Mimic a close of standard input but do nothing
  169  *
  170  * \param Data File to close.
  171  *
  172  * \return EOF on error.
  173  */
  174 static int Stdin_Close(void *Data)
  175 {
  176     return(0);
  177 }
  178 
  179 /*!
  180  * Open a file object to read from standard input.
  181  *
  182  * \return The object to pass to other function in this module.
  183  */
  184 static FileObject *Stdin_Open(void)
  185 {
  186     FileObject *File;
  187 
  188     FileObject_SetLastOpenError(NULL);
  189     File=calloc(1,sizeof(*File));
  190     if (!File)
  191     {
  192         FileObject_SetLastOpenError(_("Not enough memory"));
  193         return(NULL);
  194     }
  195     File->Data=stdin;
  196     File->Read=Stdin_Read;
  197     File->Eof=Stdin_Eof;
  198     File->Rewind=NULL;
  199     File->Close=Stdin_Close;
  200     return(File);
  201 }
  202 
  203 /*!
  204  * Initialize the memory structure needed by LogLine_Parse() to parse
  205  * a log line.
  206  *
  207  * \param log_line The structure to initialize.
  208  */
  209 void LogLine_Init(struct LogLineStruct *log_line)
  210 {
  211     log_line->current_format=NULL;
  212     log_line->current_format_idx=-1;
  213     log_line->file_name="";
  214     log_line->successive_errors=0;
  215     log_line->total_errors=0;
  216 }
  217 
  218 /*!
  219  * Set the name of the log file being parsed.
  220  *
  221  * \param log_line Data structure to parse the log line.
  222  * \param file_name The name of the log file being read.
  223  */
  224 void LogLine_File(struct LogLineStruct *log_line,const char *file_name)
  225 {
  226     log_line->file_name=file_name;
  227 }
  228 
  229 /*!
  230  * Parse the next line from a log file.
  231  *
  232  * \param log_line A buffer to store the data about the current parsing.
  233  * \param log_entry The variable to store the parsed data.
  234  * \param linebuf The text line read from the log file.
  235  *
  236  * \return
  237  */
  238 enum ReadLogReturnCodeEnum LogLine_Parse(struct LogLineStruct *log_line,struct ReadLogStruct *log_entry,char *linebuf)
  239 {
  240     enum ReadLogReturnCodeEnum log_entry_status=RLRC_Unknown;
  241     int x;
  242 
  243     if (log_line->current_format)
  244     {
  245         memset(log_entry,0,sizeof(*log_entry));
  246         log_entry_status=log_line->current_format->ReadEntry(linebuf,log_entry);
  247     }
  248 
  249     // find out what line format to use
  250     if (log_entry_status==RLRC_Unknown)
  251     {
  252         for (x=0 ; x<(int)(sizeof(LogFormats)/sizeof(*LogFormats)) ; x++)
  253         {
  254             if (LogFormats[x]==log_line->current_format) continue;
  255             memset(log_entry,0,sizeof(*log_entry));
  256             log_entry_status=LogFormats[x]->ReadEntry(linebuf,log_entry);
  257             if (log_entry_status!=RLRC_Unknown)
  258             {
  259                 log_line->current_format=LogFormats[x];
  260                 log_line->current_format_idx=x;
  261                 if (debugz>=LogLevel_Process)
  262                 {
  263                     /* TRANSLATORS: The argument is the log format name as translated by you. */
  264                     debuga(__FILE__,__LINE__,_("Log format identified as \"%s\" for %s\n"),_(log_line->current_format->Name),log_line->file_name);
  265                 }
  266                 break;
  267             }
  268         }
  269         if (x>=(int)(sizeof(LogFormats)/sizeof(*LogFormats)))
  270         {
  271             if (++log_line->successive_errors>NumLogSuccessiveErrors) {
  272                 debuga(__FILE__,__LINE__,ngettext("%d consecutive error found in the input log file %s\n",
  273                                                 "%d consecutive errors found in the input log file %s\n",log_line->successive_errors),log_line->successive_errors,log_line->file_name);
  274                 exit(EXIT_FAILURE);
  275             }
  276             if (NumLogTotalErrors>=0 && ++log_line->total_errors>NumLogTotalErrors) {
  277                 debuga(__FILE__,__LINE__,ngettext("%d error found in the input log file (last in %s)\n",
  278                                                 "%d errors found in the input log file (last in %s)\n",log_line->total_errors),log_line->total_errors,log_line->file_name);
  279                 exit(EXIT_FAILURE);
  280             }
  281             debuga(__FILE__,__LINE__,_("The following line read from %s could not be parsed and is ignored\n%s\n"),log_line->file_name,linebuf);
  282         }
  283         else
  284             log_line->successive_errors=0;
  285     }
  286 
  287     if (log_line->current_format_idx<0 || log_line->current_format==NULL) {
  288         debuga(__FILE__,__LINE__,_("Sarg failed to determine the format of the input log file %s\n"),log_line->file_name);
  289         exit(EXIT_FAILURE);
  290     }
  291     if (log_entry_status==RLRC_InternalError) {
  292         debuga(__FILE__,__LINE__,_("Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line->file_name);
  293         exit(EXIT_FAILURE);
  294     }
  295     return(log_entry_status);
  296 }
  297 
  298 /*!
  299 Read a single log file.
  300 
  301 \param arq The log file name to read.
  302 */
  303 static void ReadOneLogFile(struct ReadLogDataStruct *Filter,const char *arq)
  304 {
  305     longline line;
  306     char *linebuf;
  307     char *str;
  308     char hora[30];
  309     char dia[128]="";
  310     char tmp3[MAXLEN]="";
  311     char download_url[MAXLEN];
  312     char smartfilter[MAXLEN];
  313     const char *url;
  314     int OutputNonZero = REPORT_EVERY_X_LINES ;
  315     int idata=0;
  316     int x;
  317     int hmr;
  318     int nopen;
  319     int maxopenfiles=MAX_OPEN_USER_FILES;
  320     unsigned long int recs1=0UL;
  321     unsigned long int recs2=0UL;
  322     FileObject *fp_in=NULL;
  323     bool download_flag=false;
  324     bool id_is_ip;
  325     enum ReadLogReturnCodeEnum log_entry_status;
  326     enum UserProcessError PUser;
  327     struct stat logstat;
  328     struct getwordstruct gwarea;
  329     struct userfilestruct *prev_ufile;
  330     struct userinfostruct *uinfo;
  331     struct userfilestruct *ufile;
  332     struct userfilestruct *ufile1;
  333     struct ReadLogStruct log_entry;
  334     struct LogLineStruct log_line;
  335     FILE *UseragentLog=NULL;
  336 
  337     LogLine_Init(&log_line);
  338     LogLine_File(&log_line,arq);
  339     for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++)
  340         if (LogFormats[x]->NewFile)
  341             LogFormats[x]->NewFile(arq);
  342 
  343     if (arq[0]=='-' && arq[1]=='\0') {
  344         fp_in=Stdin_Open();
  345         if (debug)
  346             debuga(__FILE__,__LINE__,_("Reading access log file: from stdin\n"));
  347     } else {
  348         if (Filter->DateRange[0]!='\0') {
  349             if (stat(arq,&logstat)!=0) {
  350                 debuga(__FILE__,__LINE__,_("Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
  351             } else {
  352                 struct tm *logtime=localtime(&logstat.st_mtime);
  353                 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<Filter->StartDate) {
  354                     debuga(__FILE__,__LINE__,_("Ignoring old log file %s\n"),arq);
  355                     return;
  356                 }
  357             }
  358         }
  359         fp_in=decomp(arq);
  360         if (fp_in==NULL) {
  361             debuga(__FILE__,__LINE__,_("Cannot open input log file \"%s\": %s\n"),arq,FileObject_GetLastOpenError());
  362             exit(EXIT_FAILURE);
  363         }
  364         if (debug) debuga(__FILE__,__LINE__,_("Reading access log file: %s\n"),arq);
  365     }
  366 
  367     download_flag=false;
  368 
  369     recs1=0UL;
  370     recs2=0UL;
  371 
  372     // pre-read the file only if we have to show stats
  373     if (ShowReadStatistics && ShowReadPercent && fp_in->Rewind) {
  374         int nread,i;
  375         bool skipcr=false;
  376         char tmp4[MAXLEN];
  377 
  378         while ((nread=FileObject_Read(fp_in,tmp4,sizeof(tmp4)))>0) {
  379             for (i=0 ; i<nread ; i++)
  380                 if (skipcr) {
  381                     if (tmp4[i]!='\n' && tmp4[i]!='\r') {
  382                         skipcr=false;
  383                     }
  384                 } else {
  385                     if (tmp4[i]=='\n' || tmp4[i]=='\r') {
  386                         skipcr=true;
  387                         recs1++;
  388                     }
  389                 }
  390         }
  391         FileObject_Rewind(fp_in);
  392         printf(_("SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(float) 0);
  393         putchar('\r');
  394         fflush( stdout ) ;
  395     }
  396 
  397     if ((line=longline_create())==NULL) {
  398         debuga(__FILE__,__LINE__,_("Not enough memory to read file \"%s\"\n"),arq);
  399         exit(EXIT_FAILURE);
  400     }
  401 
  402     while ((linebuf=longline_read(fp_in,line))!=NULL) {
  403         lines_read++;
  404 
  405         recs2++;
  406         if (ShowReadStatistics && --OutputNonZero<=0) {
  407             if (recs1>0) {
  408                 double perc = recs2 * 100. / recs1 ;
  409                 printf(_("SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
  410             } else {
  411                 printf(_("SARG: Records in file: %lu"),recs2);
  412             }
  413             putchar('\r');
  414             fflush (stdout);
  415             OutputNonZero = REPORT_EVERY_X_LINES ;
  416         }
  417 
  418         /*
  419         The following checks are retained here as I don't know to
  420         what format they apply. They date back to pre 2.4 versions.
  421         */
  422         //if (blen < 58) continue; //this test conflict with the reading of the sarg log header line
  423         if (strstr(linebuf,"HTTP/0.0") != 0) {//recorded by squid when encountering an incomplete query
  424             excluded_count[ER_IncompleteQuery]++;
  425             continue;
  426         }
  427         if (strstr(linebuf,"logfile turned over") != 0) {//reported by newsyslog
  428             excluded_count[ER_LogfileTurnedOver]++;
  429             continue;
  430         }
  431 
  432         // exclude_string
  433         if (ExcludeString[0] != '\0') {
  434             bool exstring=false;
  435             getword_start(&gwarea,ExcludeString);
  436             while(strchr(gwarea.current,':') != 0) {
  437                 if (getword_multisep(val1,sizeof(val1),&gwarea,':')<0) {
  438                     debuga(__FILE__,__LINE__,_("Invalid record in exclusion string\n"));
  439                     exit(EXIT_FAILURE);
  440                 }
  441                 if ((str=(char *) strstr(linebuf,val1)) != (char *) NULL ) {
  442                     exstring=true;
  443                     break;
  444                 }
  445             }
  446             if (!exstring && (str=(char *) strstr(linebuf,gwarea.current)) != (char *) NULL )
  447                 exstring=true;
  448             if (exstring) {
  449                 excluded_count[ER_ExcludeString]++;
  450                 continue;
  451             }
  452         }
  453 
  454         totregsl++;
  455         if (debugz>=LogLevel_Data)
  456             printf("BUF=%s\n",linebuf);
  457 
  458         // process the line
  459         log_entry_status=LogLine_Parse(&log_line,&log_entry,linebuf);
  460         if (log_entry_status==RLRC_Unknown)
  461         {
  462             excluded_count[ER_UnknownFormat]++;
  463             continue;
  464         }
  465         if (log_entry_status==RLRC_Ignore) {
  466             excluded_count[ER_FormatData]++;
  467             continue;
  468         }
  469         format_count[log_line.current_format_idx]++;
  470 
  471         if (!fp_log && ParsedOutputLog[0] && log_line.current_format!=&ReadSargLog) {
  472             if (access(ParsedOutputLog,R_OK) != 0) {
  473                 my_mkdir(ParsedOutputLog);
  474             }
  475             if (snprintf(SargLogFile,sizeof(SargLogFile),"%s/sarg_temp.log",ParsedOutputLog)>=sizeof(SargLogFile)) {
  476                 debuga(__FILE__,__LINE__,_("Path too long: "));
  477                 debuga_more("%s/sarg_temp.log\n",ParsedOutputLog);
  478                 exit(EXIT_FAILURE);
  479             }
  480             if ((fp_log=MY_FOPEN(SargLogFile,"w"))==NULL) {
  481                 debuga(__FILE__,__LINE__,_("Cannot open file \"%s\": %s\n"),SargLogFile,strerror(errno));
  482                 exit(EXIT_FAILURE);
  483             }
  484             fputs("*** SARG Log ***\n",fp_log);
  485         }
  486 
  487         if (log_entry.Ip==NULL) {
  488             debuga(__FILE__,__LINE__,_("Unknown input log file format: no IP addresses\n"));
  489             break;
  490         }
  491         if (log_entry.User==NULL) {
  492             debuga(__FILE__,__LINE__,_("Unknown input log file format: no user\n"));
  493             break;
  494         }
  495         if (log_entry.Url==NULL) {
  496             debuga(__FILE__,__LINE__,_("Unknown input log file format: no URL\n"));
  497             break;
  498         }
  499 
  500         idata=builddia(log_entry.EntryTime.tm_mday,log_entry.EntryTime.tm_mon+1,log_entry.EntryTime.tm_year+1900);
  501         if (debugz>=LogLevel_Data)
  502             printf("DATE=%s IDATA=%d DFROM=%d DUNTIL=%d\n",Filter->DateRange,idata,Filter->StartDate,Filter->EndDate);
  503 
  504         if (EarliestDate<0 || idata<EarliestDate) {
  505             EarliestDate=idata;
  506             memcpy(&EarliestDateTime,&log_entry.EntryTime,sizeof(struct tm));
  507         }
  508         if (LatestDate<0 || idata>LatestDate) {
  509             LatestDate=idata;
  510             memcpy(&LatestDateTime,&log_entry.EntryTime,sizeof(struct tm));
  511         }
  512         if (Filter->DateRange[0] != '\0'){
  513             if (idata<Filter->StartDate || idata>Filter->EndDate) {
  514                 excluded_count[ER_OutOfDateRange]++;
  515                 continue;
  516             }
  517         }
  518 
  519         // Record only hours usage which is required
  520         if (!numlistcontains(weekdays, 7, log_entry.EntryTime.tm_wday))
  521         {
  522             excluded_count[ER_OutOfWDayRange]++;
  523             continue;
  524         }
  525 
  526         if (!numlistcontains(hours, 24, log_entry.EntryTime.tm_hour))
  527         {
  528             excluded_count[ER_OutOfHourRange]++;
  529             continue;
  530         }
  531 
  532         PUser=process_user(&log_entry.User,log_entry.Ip,&id_is_ip);
  533         switch (PUser)
  534         {
  535             case USERERR_NoError:
  536                 break;
  537             case USERERR_NameTooLong:
  538                 if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("User ID too long: %s\n"),log_entry.User);
  539                 excluded_count[ER_UserNameTooLong]++;
  540                 totregsx++;
  541                 continue;
  542             case USERERR_Excluded:
  543                 excluded_count[ER_User]++;
  544                 continue;
  545             case USERERR_InvalidChar:
  546                 excluded_count[ER_InvalidUserChar]++;
  547                 continue;
  548             case USERERR_EmptyUser:
  549                 excluded_count[ER_NoUser]++;
  550                 continue;
  551             case USERERR_SysUser:
  552                 excluded_count[ER_SysUser]++;
  553                 continue;
  554             case USERERR_Ignored:
  555                 excluded_count[ER_IgnoredUser]++;
  556                 totregsx++;
  557                 continue;
  558             case USERERR_Untracked:
  559                 excluded_count[ER_UntrackedUser]++;
  560                 continue;
  561         }
  562 
  563         if (vercode(log_entry.HttpCode)) {
  564             if (debugz>=LogLevel_Process) debuga(__FILE__,__LINE__,_("Excluded code: %s\n"),log_entry.HttpCode);
  565             excluded_count[ER_HttpCode]++;
  566             totregsx++;
  567             continue;
  568         }
  569 
  570         // replace any tab by a single space
  571         for (str=log_entry.Url ; *str ; str++)
  572             if (*str=='\t') *str=' ';
  573         for (str=log_entry.HttpCode ; *str ; str++)
  574             if (*str=='\t') *str=' ';
  575 
  576         if (log_line.current_format!=&ReadSargLog) {
  577             /*
  578             The full URL is not saved in sarg log. There is no point in testing the URL to detect
  579             a downloaded file.
  580             */
  581             download_flag=is_download_suffix(log_entry.Url);
  582             if (download_flag) {
  583                 safe_strcpy(download_url,log_entry.Url,sizeof(download_url));
  584             }
  585         } else
  586             download_flag=false;
  587 
  588         url=process_url(log_entry.Url,LongUrl);
  589         if (!url || url[0] == '\0') {
  590             excluded_count[ER_NoUrl]++;
  591             continue;
  592         }
  593 
  594         if (addr[0] != '\0'){
  595             if (strcmp(addr,log_entry.Ip)!=0) {
  596                 excluded_count[ER_UntrackedIpAddr]++;
  597                 continue;
  598             }
  599         }
  600         if (Filter->HostFilter) {
  601             if (!vhexclude(url)) {
  602                 if (debugz>=LogLevel_Data) debuga(__FILE__,__LINE__,_("Excluded site: %s\n"),url);
  603                 excluded_count[ER_Url]++;
  604                 totregsx++;
  605                 continue;
  606             }
  607         }
  608 
  609         if (Filter->StartTime >= 0 && Filter->EndTime >= 0) {
  610             hmr=log_entry.EntryTime.tm_hour*100+log_entry.EntryTime.tm_min;
  611             if (hmr < Filter->StartTime || hmr >= Filter->EndTime) {
  612                 excluded_count[ER_OutOfTimeRange]++;
  613                 continue;
  614             }
  615         }
  616 
  617         if (site[0] != '\0'){
  618             if (strstr(url,site)==0) {
  619                 excluded_count[ER_UntrackedUrl]++;
  620                 continue;
  621             }
  622         }
  623 
  624         if (log_entry.DataSize<0) log_entry.DataSize=0;
  625 
  626         if (log_entry.ElapsedTime<0) log_entry.ElapsedTime=0;
  627         if (Filter->max_elapsed>0 && log_entry.ElapsedTime>Filter->max_elapsed) {
  628             log_entry.ElapsedTime=0;
  629         }
  630 
  631         if ((str=(char *) strstr(linebuf, "[SmartFilter:")) != (char *) NULL ) {
  632             fixendofline(str);
  633             snprintf(smartfilter,sizeof(smartfilter),"\"%s\"",str+1);
  634         } else strcpy(smartfilter,"\"\"");
  635 
  636         nopen=0;
  637         prev_ufile=NULL;
  638         for (ufile=first_user_file ; ufile && strcmp(log_entry.User,ufile->user->id)!=0 ; ufile=ufile->next) {
  639             prev_ufile=ufile;
  640             if (ufile->file) nopen++;
  641         }
  642         if (!ufile) {
  643             ufile=malloc(sizeof(*ufile));
  644             if (!ufile) {
  645                 debuga(__FILE__,__LINE__,_("Not enough memory to store the user %s\n"),log_entry.User);
  646                 exit(EXIT_FAILURE);
  647             }
  648             memset(ufile,0,sizeof(*ufile));
  649             ufile->next=first_user_file;
  650             first_user_file=ufile;
  651             /*
  652              * This id_is_ip stuff is just to store the string only once if the user is
  653              * identified by its IP address instead of a distinct ID and IP address.
  654              */
  655             uinfo=userinfo_create(log_entry.User,(id_is_ip) ? NULL : log_entry.Ip);
  656             ufile->user=uinfo;
  657             nusers++;
  658         } else {
  659             if (prev_ufile) {
  660                 prev_ufile->next=ufile->next;
  661                 ufile->next=first_user_file;
  662                 first_user_file=ufile;
  663             }
  664         }
  665 #ifdef ENABLE_DOUBLE_CHECK_DATA
  666         if (strcmp(log_entry.HttpCode,"TCP_DENIED/407")!=0) {
  667             ufile->user->nbytes+=log_entry.DataSize;
  668             ufile->user->elap+=log_entry.ElapsedTime;
  669         }
  670 #endif
  671 
  672         if (ufile->file==NULL) {
  673             if (nopen>=maxopenfiles) {
  674                 x=0;
  675                 for (ufile1=first_user_file ; ufile1 ; ufile1=ufile1->next) {
  676                     if (ufile1->file!=NULL) {
  677                         if (x>=maxopenfiles) {
  678                             if (fclose(ufile1->file)==EOF) {
  679                                 debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile1->user->id,strerror(errno));
  680                                 exit(EXIT_FAILURE);
  681                             }
  682                             ufile1->file=NULL;
  683                         }
  684                         x++;
  685                     }
  686                 }
  687             }
  688             if (snprintf (tmp3, sizeof(tmp3), "%s/%s.user_unsort", tmp, ufile->user->filename)>=sizeof(tmp3)) {
  689                 debuga(__FILE__,__LINE__,_("Temporary user file name too long: %s/%s.user_unsort\n"), tmp, ufile->user->filename);
  690                 exit(EXIT_FAILURE);
  691             }
  692             if ((ufile->file = MY_FOPEN (tmp3, "a")) == NULL) {
  693                 debuga(__FILE__,__LINE__,_("(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
  694                 exit(EXIT_FAILURE);
  695             }
  696         }
  697 
  698         strftime(dia, sizeof(dia), "%d/%m/%Y",&log_entry.EntryTime);
  699         strftime(hora,sizeof(hora),"%H:%M:%S",&log_entry.EntryTime);
  700 
  701         if (fprintf(ufile->file, "%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
  702                                 log_entry.Ip,url,(uint64_t)log_entry.DataSize,
  703                                 log_entry.HttpCode,log_entry.ElapsedTime,smartfilter)<=0) {
  704             debuga(__FILE__,__LINE__,_("Write error in the log file of user %s\n"),log_entry.User);
  705             exit(EXIT_FAILURE);
  706         }
  707         records_kept++;
  708 
  709         if (fp_log && log_line.current_format!=&ReadSargLog) {
  710             fprintf(fp_log, "%s\t%s\t%s\t%s\t%s\t%"PRIu64"\t%s\t%ld\t%s\n",dia,hora,
  711                             log_entry.User,log_entry.Ip,url,(uint64_t)log_entry.DataSize,
  712                             log_entry.HttpCode,log_entry.ElapsedTime,smartfilter);
  713         }
  714 
  715         totregsg++;
  716 
  717         denied_write(&log_entry);
  718         authfail_write(&log_entry);
  719         if (download_flag) download_write(&log_entry,download_url);
  720         if (log_entry.UserAgent)
  721         {
  722             if (!UseragentLog)
  723                 UseragentLog=UserAgent_Open();
  724             UserAgent_Write(UseragentLog,&log_entry.EntryTime,log_entry.Ip,log_entry.User,log_entry.UserAgent);
  725         }
  726 
  727         if (log_line.current_format!=&ReadSargLog) {
  728             if (period.start.tm_year==0 || idata<mindate || compare_date(&period.start,&log_entry.EntryTime)>0){
  729                 mindate=idata;
  730                 memcpy(&period.start,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
  731             }
  732             if (period.end.tm_year==0 || idata>maxdate || compare_date(&period.end,&log_entry.EntryTime)<0) {
  733                 maxdate=idata;
  734                 memcpy(&period.end,&log_entry.EntryTime,sizeof(log_entry.EntryTime));
  735             }
  736         }
  737 
  738         if (debugz>=LogLevel_Data){
  739             printf("IP=\t%s\n",log_entry.Ip);
  740             printf("USER=\t%s\n",log_entry.User);
  741             printf("ELAP=\t%ld\n",log_entry.ElapsedTime);
  742             printf("DATE=\t%s\n",dia);
  743             printf("TIME=\t%s\n",hora);
  744             //printf("FUNC=\t%s\n",fun);
  745             printf("URL=\t%s\n",url);
  746             printf("CODE=\t%s\n",log_entry.HttpCode);
  747             printf("LEN=\t%"PRIu64"\n",(uint64_t)log_entry.DataSize);
  748         }
  749     }
  750     longline_destroy(&line);
  751 
  752     if (FileObject_Close(fp_in)) {
  753         debuga(__FILE__,__LINE__,_("Read error in \"%s\": %s\n"),arq,FileObject_GetLastCloseError());
  754         exit(EXIT_FAILURE);
  755     }
  756     if (UseragentLog) fclose(UseragentLog);
  757     if (ShowReadStatistics) {
  758         if (ShowReadPercent)
  759             printf(_("SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (float) 100 );
  760         else
  761             printf(_("SARG: Records in file: %lu\n"),recs2);
  762     }
  763 }
  764 
  765 /*!
  766  * Display a line with the excluded entries count.
  767  *
  768  * \param Explain A translated string explaining the exluded count.
  769  * \param Reason The reason number.
  770  */
  771 static void DisplayExcludeCount(const char *Explain,enum ExcludeReasonEnum Reason)
  772 {
  773     if (excluded_count[Reason]>0) {
  774         debuga(__FILE__,__LINE__,"   %s: %lu\n",Explain,excluded_count[Reason]);
  775     }
  776 }
  777 
  778 /*!
  779 Read the log files.
  780 
  781 \param Filter The filtering parameters for the file to load.
  782 
  783 \retval 1 Records found.
  784 \retval 0 No record found.
  785 */
  786 int ReadLogFile(struct ReadLogDataStruct *Filter)
  787 {
  788     int x;
  789     int cstatus;
  790     struct userfilestruct *ufile;
  791     struct userfilestruct *ufile1;
  792     FileListIterator FIter;
  793     const char *file;
  794 
  795     for (x=0 ; x<sizeof(format_count)/sizeof(*format_count) ; x++) format_count[x]=0;
  796     for (x=0 ; x<sizeof(excluded_count)/sizeof(*excluded_count) ; x++) excluded_count[x]=0;
  797     first_user_file=NULL;
  798 
  799     if (!dataonly) {
  800         denied_open();
  801         authfail_open();
  802         download_open();
  803     }
  804 
  805     FIter=FileListIter_Open(AccessLog);
  806     while ((file=FileListIter_Next(FIter))!=NULL)
  807         ReadOneLogFile(Filter,file);
  808     FileListIter_Close(FIter);
  809 
  810     if (fp_log != NULL) {
  811         char val2[40];
  812         char val4[4096];//val4 must not be bigger than SargLogFile without fixing the strcpy below
  813 
  814         if (fclose(fp_log)==EOF) {
  815             debuga(__FILE__,__LINE__,_("Write error in \"%s\": %s\n"),SargLogFile,strerror(errno));
  816             exit(EXIT_FAILURE);
  817         }
  818         strftime(val2,sizeof(val2),"%d%m%Y_%H%M",&period.start);
  819         strftime(val1,sizeof(val1),"%d%m%Y_%H%M",&period.end);
  820         if (snprintf(val4,sizeof(val4),"%s/sarg-%s-%s.log",ParsedOutputLog,val2,val1)>=sizeof(val4)) {
  821             debuga(__FILE__,__LINE__,_("Path too long: "));
  822             debuga_more("%s/sarg-%s-%s.log\n",ParsedOutputLog,val2,val1);
  823             exit(EXIT_FAILURE);
  824         }
  825         if (rename(SargLogFile,val4)) {
  826             debuga(__FILE__,__LINE__,_("failed to rename %s to %s - %s\n"),SargLogFile,val4,strerror(errno));
  827         } else {
  828             strcpy(SargLogFile,val4);
  829 
  830             if (strcmp(ParsedOutputLogCompress,"nocompress") != 0 && ParsedOutputLogCompress[0] != '\0') {
  831                 /*
  832                 No double quotes around ParsedOutputLogCompress because it may contain command line options. If double quotes are
  833                 necessary around the command name, put them in the configuration file.
  834                 */
  835                 if (snprintf(val1,sizeof(val1),"%s \"%s\"",ParsedOutputLogCompress,SargLogFile)>=sizeof(val1)) {
  836                     debuga(__FILE__,__LINE__,_("Command too long: %s \"%s\"\n"),ParsedOutputLogCompress,SargLogFile);
  837                     exit(EXIT_FAILURE);
  838                 }
  839                 cstatus=system(val1);
  840                 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
  841                     debuga(__FILE__,__LINE__,_("command return status %d\n"),WEXITSTATUS(cstatus));
  842                     debuga(__FILE__,__LINE__,_("command: %s\n"),val1);
  843                     exit(EXIT_FAILURE);
  844                 }
  845             }
  846         }
  847         if (debug)
  848             debuga(__FILE__,__LINE__,_("Sarg parsed log saved as %s\n"),SargLogFile);
  849     }
  850 
  851     denied_close();
  852     authfail_close();
  853     download_close();
  854 
  855     for (ufile=first_user_file ; ufile ; ufile=ufile1) {
  856         ufile1=ufile->next;
  857         if (ufile->file!=NULL && fclose(ufile->file)==EOF) {
  858             debuga(__FILE__,__LINE__,_("Write error in log file of user %s: %s\n"),ufile->user->id,strerror(errno));
  859             exit(EXIT_FAILURE);
  860         }
  861         free(ufile);
  862     }
  863 
  864     if (debug) {
  865         unsigned long int totalcount=0;
  866 
  867         debuga(__FILE__,__LINE__,_("   Records read: %ld, written: %ld, excluded: %ld\n"),totregsl,totregsg,totregsx);
  868 
  869         for (x=sizeof(excluded_count)/sizeof(*excluded_count)-1 ; x>=0 && excluded_count[x]>0 ; x--);
  870         if (x>=0) {
  871             debuga(__FILE__,__LINE__,_("Reasons for excluded entries:\n"));
  872             DisplayExcludeCount(_("User name too long"),ER_UserNameTooLong);
  873             DisplayExcludeCount(_("Squid logged an incomplete query received from the client"),ER_IncompleteQuery);
  874             DisplayExcludeCount(_("Log file turned over"),ER_LogfileTurnedOver);
  875             DisplayExcludeCount(_("Excluded by \"exclude_string\" in sarg.conf"),ER_ExcludeString);
  876             DisplayExcludeCount(_("Unknown input log file format"),ER_UnknownFormat);
  877             DisplayExcludeCount(_("Line ignored by the input log format"),ER_FormatData);
  878             DisplayExcludeCount(_("Time outside the requested date range (-d)"),ER_OutOfDateRange);
  879             DisplayExcludeCount(_("Ignored week day (\"weekdays\" parameter in sarg.conf)"),ER_OutOfWDayRange);
  880             DisplayExcludeCount(_("Ignored hour (\"hours\" parameter in sarg.conf)"),ER_OutOfHourRange);
  881             DisplayExcludeCount(_("User is not in the \"include_users\" list"),ER_User);
  882             DisplayExcludeCount(_("HTTP code excluded by \"exclude_code\" file"),ER_HttpCode);
  883             DisplayExcludeCount(_("Invalid character found in user name"),ER_InvalidUserChar);
  884             DisplayExcludeCount(_("No URL in entry"),ER_NoUrl);
  885             DisplayExcludeCount(_("Not the IP address requested with -a"),ER_UntrackedIpAddr);
  886             DisplayExcludeCount(_("URL excluded by -c or \"exclude_hosts\""),ER_Url);
  887             DisplayExcludeCount(_("Entry time outside of requested hour range (-t)"),ER_OutOfTimeRange);
  888             DisplayExcludeCount(_("Not the URL requested by -s"),ER_UntrackedUrl);
  889             DisplayExcludeCount(_("No user in entry"),ER_NoUser);
  890             DisplayExcludeCount(_("Not the user requested by -u"),ER_UntrackedUser);
  891             DisplayExcludeCount(_("System user as defined by \"password\" in sarg.conf"),ER_SysUser);
  892             DisplayExcludeCount(_("User ignored by \"exclude_users\""),ER_IgnoredUser);
  893         }
  894 
  895         for (x=0 ; x<sizeof(LogFormats)/sizeof(*LogFormats) ; x++) {
  896             if (format_count[x]>0) {
  897                 /* TRANSLATORS: It displays the number of lines found in the input log files
  898                 * for each supported log format. The log format name is the %s and is a string
  899                 * you translate somewhere else. */
  900                 debuga(__FILE__,__LINE__,_("%s: %lu entries\n"),_(LogFormats[x]->Name),format_count[x]);
  901                 totalcount+=format_count[x];
  902             }
  903         }
  904 
  905         if (totalcount==0 && totregsg)
  906             debuga(__FILE__,__LINE__,_("Log with invalid format\n"));
  907     }
  908 
  909     return((totregsg!=0) ? 1 : 0);
  910 }
  911 
  912 /*!
  913  * Get the start and end date of the period covered by the log files.
  914  */
  915 bool GetLogPeriod(struct tm *Start,struct tm *End)
  916 {
  917     bool Valid=false;
  918 
  919     if (EarliestDate>=0) {
  920         memcpy(Start,&EarliestDateTime,sizeof(struct tm));
  921         Valid=true;
  922     } else {
  923         memset(Start,0,sizeof(struct tm));
  924     }
  925     if (LatestDate>=0) {
  926         memcpy(End,&LatestDateTime,sizeof(struct tm));
  927         Valid=true;
  928     } else {
  929         memset(End,0,sizeof(struct tm));
  930     }
  931     return(Valid);
  932 }