sarg
2.4.0
About: SARG ia a Squid Analysis Report Generator. Fossies Dox: sarg-2.4.0.tar.gz ("unofficial" and yet experimental doxygen-generated source code documentation) 
|
Go to the documentation of this file.
32 #define REPORT_EVERY_X_LINES 5000
33 #define MAX_OPEN_USER_FILES 10
91 int hours[24] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
152 return(fread(Buffer,1,Size,(FILE *)Data));
164 return(feof((FILE *)Data));
189 File=calloc(1,
sizeof(*File));
245 memset(log_entry,0,
sizeof(*log_entry));
255 memset(log_entry,0,
sizeof(*log_entry));
272 debuga(__FILE__,__LINE__,
ngettext(
"%d consecutive error found in the input log file %s\n",
277 debuga(__FILE__,__LINE__,
ngettext(
"%d error found in the input log file (last in %s)\n",
281 debuga(__FILE__,__LINE__,
_(
"The following line read from %s could not be parsed and is ignored\n%s\n"),log_line->
file_name,linebuf);
288 debuga(__FILE__,__LINE__,
_(
"Sarg failed to determine the format of the input log file %s\n"),log_line->
file_name);
292 debuga(__FILE__,__LINE__,
_(
"Internal error encountered while processing %s\nSee previous message to know the reason for that error.\n"),log_line->
file_name);
295 return(log_entry_status);
311 char download_url[
MAXLEN];
320 unsigned long int recs1=0UL;
321 unsigned long int recs2=0UL;
323 bool download_flag=
false;
335 FILE *UseragentLog=NULL;
343 if (arq[0]==
'-' && arq[1]==
'\0') {
346 debuga(__FILE__,__LINE__,
_(
"Reading access log file: from stdin\n"));
349 if (stat(arq,&logstat)!=0) {
350 debuga(__FILE__,__LINE__,
_(
"Cannot get the modification time of input log file %s (%s). Processing it anyway\n"),arq,strerror(errno));
352 struct tm *logtime=localtime(&logstat.st_mtime);
353 if ((logtime->tm_year+1900)*10000+(logtime->tm_mon+1)*100+logtime->tm_mday<Filter->
StartDate) {
354 debuga(__FILE__,__LINE__,
_(
"Ignoring old log file %s\n"),arq);
364 if (
debug)
debuga(__FILE__,__LINE__,
_(
"Reading access log file: %s\n"),arq);
379 for (i=0 ; i<nread ; i++)
381 if (tmp4[i]!=
'\n' && tmp4[i]!=
'\r') {
385 if (tmp4[i]==
'\n' || tmp4[i]==
'\r') {
392 printf(
_(
"SARG: Records in file: %lu, reading: %3.2f%%"),recs1,(
float) 0);
398 debuga(__FILE__,__LINE__,
_(
"Not enough memory to read file \"%s\"\n"),arq);
408 double perc = recs2 * 100. / recs1 ;
409 printf(
_(
"SARG: Records in file: %lu, reading: %3.2lf%%"),recs2,perc);
411 printf(
_(
"SARG: Records in file: %lu"),recs2);
423 if (strstr(linebuf,
"HTTP/0.0") != 0) {
427 if (strstr(linebuf,
"logfile turned over") != 0) {
436 while(strchr(gwarea.
current,
':') != 0) {
438 debuga(__FILE__,__LINE__,
_(
"Invalid record in exclusion string\n"));
441 if ((str=(
char *) strstr(linebuf,
val1)) != (
char *) NULL ) {
446 if (!exstring && (str=(
char *) strstr(linebuf,gwarea.
current)) != (
char *) NULL )
456 printf(
"BUF=%s\n",linebuf);
459 log_entry_status=
LogLine_Parse(&log_line,&log_entry,linebuf);
476 debuga(__FILE__,__LINE__,
_(
"Path too long: "));
481 debuga(__FILE__,__LINE__,
_(
"Cannot open file \"%s\": %s\n"),
SargLogFile,strerror(errno));
484 fputs(
"*** SARG Log ***\n",
fp_log);
487 if (log_entry.
Ip==NULL) {
488 debuga(__FILE__,__LINE__,
_(
"Unknown input log file format: no IP addresses\n"));
491 if (log_entry.
User==NULL) {
492 debuga(__FILE__,__LINE__,
_(
"Unknown input log file format: no user\n"));
495 if (log_entry.
Url==NULL) {
496 debuga(__FILE__,__LINE__,
_(
"Unknown input log file format: no URL\n"));
513 if (idata<Filter->StartDate || idata>Filter->
EndDate) {
571 for (str=log_entry.
Url ; *str ; str++)
572 if (*str==
'\t') *str=
' ';
573 for (str=log_entry.
HttpCode ; *str ; str++)
574 if (*str==
'\t') *str=
' ';
589 if (!url || url[0] ==
'\0') {
594 if (
addr[0] !=
'\0'){
595 if (strcmp(
addr,log_entry.
Ip)!=0) {
611 if (hmr < Filter->StartTime || hmr >= Filter->
EndTime) {
617 if (
site[0] !=
'\0'){
618 if (strstr(url,
site)==0) {
631 if ((str=(
char *) strstr(linebuf,
"[SmartFilter:")) != (
char *) NULL ) {
640 if (ufile->
file) nopen++;
643 ufile=malloc(
sizeof(*ufile));
645 debuga(__FILE__,__LINE__,
_(
"Not enough memory to store the user %s\n"),log_entry.
User);
648 memset(ufile,0,
sizeof(*ufile));
665 #ifdef ENABLE_DOUBLE_CHECK_DATA
666 if (strcmp(log_entry.
HttpCode,
"TCP_DENIED/407")!=0) {
672 if (ufile->
file==NULL) {
673 if (nopen>=maxopenfiles) {
676 if (ufile1->
file!=NULL) {
677 if (x>=maxopenfiles) {
678 if (fclose(ufile1->
file)==EOF) {
679 debuga(__FILE__,__LINE__,
_(
"Write error in log file of user %s: %s\n"),ufile1->
user->
id,strerror(errno));
688 if (snprintf (tmp3,
sizeof(tmp3),
"%s/%s.user_unsort",
tmp, ufile->
user->
filename)>=
sizeof(tmp3)) {
689 debuga(__FILE__,__LINE__,
_(
"Temporary user file name too long: %s/%s.user_unsort\n"),
tmp, ufile->
user->
filename);
693 debuga(__FILE__,__LINE__,
_(
"(log) Cannot open temporary file %s: %s\n"), tmp3, strerror(errno));
698 strftime(dia,
sizeof(dia),
"%d/%m/%Y",&log_entry.
EntryTime);
699 strftime(hora,
sizeof(hora),
"%H:%M:%S",&log_entry.
EntryTime);
701 if (fprintf(ufile->
file,
"%s\t%s\t%s\t%s\t%"PRIu64
"\t%s\t%ld\t%s\n",dia,hora,
702 log_entry.
Ip,url,(uint64_t)log_entry.
DataSize,
704 debuga(__FILE__,__LINE__,
_(
"Write error in the log file of user %s\n"),log_entry.
User);
710 fprintf(
fp_log,
"%s\t%s\t%s\t%s\t%s\t%"PRIu64
"\t%s\t%ld\t%s\n",dia,hora,
739 printf(
"IP=\t%s\n",log_entry.
Ip);
740 printf(
"USER=\t%s\n",log_entry.
User);
742 printf(
"DATE=\t%s\n",dia);
743 printf(
"TIME=\t%s\n",hora);
745 printf(
"URL=\t%s\n",url);
746 printf(
"CODE=\t%s\n",log_entry.
HttpCode);
747 printf(
"LEN=\t%"PRIu64
"\n",(uint64_t)log_entry.
DataSize);
756 if (UseragentLog) fclose(UseragentLog);
759 printf(
_(
"SARG: Records in file: %lu, reading: %3.2f%%\n"),recs2, (
float) 100 );
761 printf(
_(
"SARG: Records in file: %lu\n"),recs2);
814 if (fclose(
fp_log)==EOF) {
815 debuga(__FILE__,__LINE__,
_(
"Write error in \"%s\": %s\n"),
SargLogFile,strerror(errno));
818 strftime(val2,
sizeof(val2),
"%d%m%Y_%H%M",&
period.
start);
820 if (snprintf(val4,
sizeof(val4),
"%s/sarg-%s-%s.log",
ParsedOutputLog,val2,
val1)>=
sizeof(val4)) {
821 debuga(__FILE__,__LINE__,
_(
"Path too long: "));
826 debuga(__FILE__,__LINE__,
_(
"failed to rename %s to %s - %s\n"),
SargLogFile,val4,strerror(errno));
839 cstatus=system(
val1);
840 if (!WIFEXITED(cstatus) || WEXITSTATUS(cstatus)) {
841 debuga(__FILE__,__LINE__,
_(
"command return status %d\n"),WEXITSTATUS(cstatus));
857 if (ufile->
file!=NULL && fclose(ufile->
file)==EOF) {
858 debuga(__FILE__,__LINE__,
_(
"Write error in log file of user %s: %s\n"),ufile->
user->
id,strerror(errno));
865 unsigned long int totalcount=0;
871 debuga(__FILE__,__LINE__,
_(
"Reasons for excluded entries:\n"));
906 debuga(__FILE__,__LINE__,
_(
"Log with invalid format\n"));
923 memset(Start,0,
sizeof(
struct tm));
929 memset(End,0,
sizeof(
struct tm));
void authfail_write(const struct ReadLogStruct *log_entry)
@ LogLevel_Data
Display data about what is processed.
static int EarliestDate
Earliest date found in the log.
struct tm EntryTime
The time corresponding to the entry.
static struct tm EarliestDateTime
The earliest date in time format.
@ ER_User
User is not in the include_users list.
@ RLRC_Ignore
Line is known and should be ignored.
void getword_start(struct getwordstruct *gwarea, const char *line)
void debuga(const char *File, int Line, const char *msg,...)
char * HttpCode
HTTP code returned to the user for the entry.
#define REPORT_EVERY_X_LINES
static int Stdin_Read(void *Data, void *Buffer, int Size)
What is known about a user.
FILE * UserAgent_Open(void)
char * longline_read(FileObject *fp_in, longline line)
const char * FileObject_GetLastCloseError(void)
const char * FileListIter_Next(struct _FileListIterator *FIter)
FileObject * decomp(const char *arq)
char StripUserSuffix[256]
Domain suffix to strip from the user name.
@ ER_OutOfWDayRange
Ignored week day.
const char * FileObject_GetLastOpenError(void)
const char * id
The ID of the user as found in the input file.
char ParsedOutputLog[20000]
int(* Read)(void *Data, void *Buffer, int Size)
@ ER_NoUser
No user in entry.
bool my_mkdir(const char *name)
const struct ReadLogProcessStruct ReadSargLog
Object to read a standard squid log format.
static FileObject * Stdin_Open(void)
int builddia(int day, int month, int year)
static int Stdin_Close(void *Data)
void FileListIter_Close(struct _FileListIterator *FIter)
void UserAgent_Write(FILE *fp, const struct tm *Time, const char *Ip, const char *User, const char *Agent)
@ ER_IncompleteQuery
Squid logged an incomplete query received from the client.
@ ER_OutOfHourRange
Ignored hour.
struct tm start
The first date of the period.
const struct ReadLogProcessStruct ReadSquidLog
Object to read a standard squid log format.
long int ElapsedTime
Time necessary to process the user's request.
bool numlistcontains(const int *list, int maxvalue, int value)
Iterator of the file list.
@ ER_OutOfDateRange
Entry not withing the requested date range.
static void DisplayExcludeCount(const char *Explain, enum ExcludeReasonEnum Reason)
bool GetLogPeriod(struct tm *Start, struct tm *End)
@ ER_NoUrl
No URL in entry.
@ ER_UntrackedUser
Not the user requested by -u.
unsigned long int lines_read
Count the number of lines read from the input log files.
const char * Ip
The IP address connecting to internet.
static int mindate
The minimum date found in the input logs.
@ ER_LogfileTurnedOver
Log file turned over.
const char * Name
The name of the log file processed by this object.
enum ReadLogReturnCodeEnum LogLine_Parse(struct LogLineStruct *log_line, struct ReadLogStruct *log_entry, char *linebuf)
int StripSuffixLen
Length of the suffix to strip from the user name.
void(* NewFile)(const char *FileName)
Inform the module about the reading of a new file.
void FileObject_Rewind(FileObject *File)
int EndDate
Last date to include in the report. The format is year*10000+month+100+day.
static long int totregsg
The number of records kept.
int vhexclude(const char *url)
int vercode(const char *code)
static long int totregsl
The number of records read from the input logs.
Data read from an input log file.
bool HostFilter
True to filter on hosts.
int compare_date(const struct tm *date1, const struct tm *date2)
int StartTime
The start time to include in the report(H*100+M). Set to -1 to disable.
UserProcessError
Error codes returned by process_user.
const char * User
The user's name.
const char * process_url(const char *url, bool full_url)
enum ReadLogReturnCodeEnum(* ReadEntry)(char *Line, struct ReadLogStruct *Entry)
Funtion to read one entry from the log.
@ ER_IgnoredUser
User ignored by exclude_users.
const struct ReadLogProcessStruct ReadExtLog
Object to read an extended log.
void LogLine_Init(struct LogLineStruct *log_line)
bool is_download_suffix(const char *url)
Include headers and define global variables. */.
char ParsedOutputLogCompress[512]
Persistant data to parse a log line.
@ ER_UntrackedIpAddr
Not the IP address requested with -a.
void download_write(const struct ReadLogStruct *log_entry, const char *url)
char ExcludeString[20000]
static int LatestDate
Latest date found in the log.
@ RLRC_Unknown
Unknown line format.
static unsigned long int format_count[sizeof(LogFormats)/sizeof(*LogFormats)]
Count the number of occurence of each input log format.
struct userinfostruct * userinfo_create(const char *userid, const char *ip)
@ ER_FormatData
Line to be ignored from the input log file.
longline longline_create(void)
static struct userfilestruct * first_user_file
The beginning of a linked list of user's file.
const char * filename
The mangled name to use in file names of that user.
FileListIterator FileListIter_Open(FileListObject FObj)
const struct ReadLogProcessStruct ReadCommonLog
Object to read a standard common log format.
int FileObject_Read(FileObject *File, void *Buffer, int Size)
void authfail_close(void)
int getword_multisep(char *word, int limit, struct getwordstruct *gwarea, char stop)
int ReadLogFile(struct ReadLogDataStruct *Filter)
void(* Rewind)(void *Data)
void longline_destroy(longline *line_ptr)
@ ER_HttpCode
HTTP code excluded by exclude_code file.
struct userfilestruct * next
@ ER_UserNameTooLong
User name too long.
@ ER_ExcludeString
Excluded by exclude_string from sarg.conf.
const struct ReadLogProcessStruct * current_format
static char SargLogFile[4096]
The path to the sarg log file.
static long int totregsx
The number of records excluded.
static FILE * fp_log
Handle to the sarg log file. NULL if not created.
#define MAX_OPEN_USER_FILES
Functions to read a log file.
@ RLRC_InternalError
Error encountered during the parsing of the file.
long long int DataSize
Number of transfered bytes.
void download_close(void)
void safe_strcpy(char *dest, const char *src, int length)
int NumLogSuccessiveErrors
The number of consecutive errors allowed in an input log file before the process is interrupted.
int EndTime
The end time to include in the report(H*100+M). Set to -1 to disable.
static const struct ReadLogProcessStruct *const LogFormats[]
The list of the supported log formats.
FileListObject AccessLog
List of the input log files to process.
bool smartfilter
True to enable the smart filter.
Declaration of the structures and functions.
#define ngettext(Msgid1, Msgid2, N)
const char * UserAgent
Useragent string or NULL if it isn't available.
@ ER_InvalidUserChar
Invalid character found in user name.
enum UserProcessError process_user(const char **UserPtr, const char *IpAddress, bool *IsIp)
@ ER_OutOfTimeRange
Entry time outside of requested hour range.
char DateRange[255]
The filtering date range.
struct periodstruct period
ReadLogReturnCodeEnum
Possible return codes for the functions parsing the input log.
void denied_write(const struct ReadLogStruct *log_entry)
@ ER_Last
last entry of the list
static int Stdin_Eof(void *Data)
bool ShowReadStatistics
if true, show the number of lines read from the input log file during the reading of the file.
struct userinfostruct * user
int StartDate
First date to include in the report. The format is year*10000+month+100+day.
unsigned long int nusers
Count the number of users.
static unsigned long int excluded_count[ER_Last]
Count the number of excluded records.
struct tm end
The last date of the period.
int FileObject_Close(FileObject *File)
unsigned long int records_kept
Count the number of records kept for the processing.
@ ER_UntrackedUrl
Not the URL requested by -s.
void fixendofline(char *str)
void debuga_more(const char *msg,...)
static struct tm LatestDateTime
The latest date in time format.
@ ER_Url
URL excluded by -c or exclude_hosts.
void LogLine_File(struct LogLineStruct *log_line, const char *file_name)
void FileObject_SetLastOpenError(const char *Message)
long int max_elapsed
Maximum elpased time allowed. Any time greater than this value is set to zero.
@ ER_UnknownFormat
Unknown input log file format.
static void ReadOneLogFile(struct ReadLogDataStruct *Filter, const char *arq)
@ LogLevel_Process
Process informational messages.