"Fossies" - the Fresh Open Source Software Archive

Member "monit-5.28.0/src/validate.c" (28 Mar 2021, 130295 Bytes) of package /linux/privat/monit-5.28.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "validate.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.27.2_vs_5.28.0.

    1 
    2 /*
    3  * Copyright (C) Tildeslash Ltd. All rights reserved.
    4  *
    5  * This program is free software: you can redistribute it and/or modify
    6  * it under the terms of the GNU Affero General Public License version 3.
    7  *
    8  * This program is distributed in the hope that it will be useful,
    9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11  * GNU General Public License for more details.
   12  *
   13  * You should have received a copy of the GNU Affero General Public License
   14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   15  *
   16  * In addition, as a special exception, the copyright holders give
   17  * permission to link the code of portions of this program with the
   18  * OpenSSL library under certain conditions as described in each
   19  * individual source file, and distribute linked combinations
   20  * including the two.
   21  *
   22  * You must obey the GNU Affero General Public License in all respects
   23  * for all of the code used other than OpenSSL.
   24  */
   25 
   26 #include "config.h"
   27 
   28 #ifdef HAVE_STDIO_H
   29 #include <stdio.h>
   30 #endif
   31 
   32 #ifdef HAVE_STDARG_H
   33 #include <stdarg.h>
   34 #endif
   35 
   36 #ifdef HAVE_ERRNO_H
   37 #include <errno.h>
   38 #endif
   39 
   40 #ifdef HAVE_STDLIB_H
   41 #include <stdlib.h>
   42 #endif
   43 
   44 #ifdef HAVE_SIGNAL_H
   45 #include <signal.h>
   46 #endif
   47 
   48 #ifdef HAVE_SETJMP_H
   49 #include <setjmp.h>
   50 #endif
   51 
   52 #ifdef HAVE_SYS_TYPES_H
   53 #include <sys/types.h>
   54 #endif
   55 
   56 #ifdef HAVE_SYS_SOCKET_H
   57 #include <sys/socket.h>
   58 #endif
   59 
   60 #ifdef HAVE_IFADDRS_H
   61 #include <ifaddrs.h>
   62 #endif
   63 
   64 #ifdef HAVE_STRING_H
   65 #include <string.h>
   66 #endif
   67 
   68 #ifdef HAVE_UNISTD_H
   69 #include <unistd.h>
   70 #endif
   71 
   72 #ifdef HAVE_SYS_TIME_H
   73 #include <sys/time.h>
   74 #endif
   75 
   76 #ifdef HAVE_TIME_H
   77 #include <time.h>
   78 #endif
   79 
   80 #ifdef HAVE_NETINET_IN_SYSTM_H
   81 #include <netinet/in_systm.h>
   82 #endif
   83 
   84 #ifdef HAVE_NETINET_IN_H
   85 #include <netinet/in.h>
   86 #endif
   87 
   88 #ifdef HAVE_NETINET_IP_H
   89 #include <netinet/ip.h>
   90 #endif
   91 
   92 #ifdef HAVE_NETINET_IP_ICMP_H
   93 #include <netinet/ip_icmp.h>
   94 #endif
   95 
   96 #include "monit.h"
   97 #include "alert.h"
   98 #include "event.h"
   99 #include "device.h"
  100 #include "net/net.h"
  101 #include "ProcessTree.h"
  102 #include "protocol.h"
  103 #include "md5.h"
  104 #include "sha1.h"
  105 #include "checksum.h"
  106 
  107 // libmonit
  108 #include "system/Time.h"
  109 #include "util/Convert.h"
  110 #include "io/File.h"
  111 #include "io/InputStream.h"
  112 #include "exceptions/AssertException.h"
  113 
  114 /**
  115  *  Implementation of validation engine
  116  *
  117  *  @file
  118  */
  119 
  120 
  121 /* ----------------------------------------------------------------- Private */
  122 
  123 
  124 /**
  125  * Read program output. The output is saved to StringBuffer up to Run.limits.programOutput,
  126  * remaining bytes are dropped (must read whole output so the program doesn't hang on full
  127  * stdout / stderr pipe).
  128  */
  129 static void _programOutput(InputStream_T I, StringBuffer_T S) {
  130         int n;
  131         char buf[STRLEN];
  132         InputStream_setTimeout(I, 0);
  133         do {
  134                 n = InputStream_readBytes(I, buf, sizeof(buf) - 1);
  135                 if (n > 0 && StringBuffer_length(S) < Run.limits.programOutput) {
  136                         buf[n] = 0;
  137                         StringBuffer_append(S, "%s", buf);
  138                 }
  139         } while (n > 0);
  140 }
  141 
  142 
  143 /**
  144  * Test the connection and protocol
  145  */
  146 static State_Type _checkConnection(Service_T s, Port_T p) {
  147         ASSERT(s);
  148         ASSERT(p);
  149         volatile int retry_count = p->retry;
  150         volatile State_Type rv = State_Succeeded;
  151         char buf[STRLEN];
  152         char report[1024] = {};
  153 retry:
  154         TRY
  155         {
  156                 Socket_test(p);
  157                 rv = p->check_invers ? State_Failed : State_Succeeded;
  158                 DEBUG("'%s' succeeded testing protocol [%s] at %s [response time %s]\n", s->name, p->protocol->name, Util_portDescription(p, buf, sizeof(buf)), Convert_time2str(p->responsetime.current, (char[11]){}));
  159         }
  160         ELSE
  161         {
  162                 rv = p->check_invers ? State_Succeeded : State_Failed;
  163                 snprintf(report, sizeof(report), "failed protocol test [%s] at %s -- %s", p->protocol->name, Util_portDescription(p, buf, sizeof(buf)), Exception_frame.message);
  164         }
  165         END_TRY;
  166         if ((rv == State_Failed && ! p->check_invers) || (rv == State_Succeeded && p->check_invers)) {
  167                 if (retry_count-- > 1) {
  168                         Log_warning("'%s' %s (attempt %d/%d)\n", s->name, report, p->retry - retry_count, p->retry);
  169                         goto retry;
  170                 }
  171                 Event_post(s, Event_Connection, p->check_invers ? State_Succeeded : State_Failed, p->action, "%s", report);
  172         } else {
  173                 Event_post(s, Event_Connection, p->check_invers ? State_Failed : State_Succeeded, p->action, "connection succeeded to %s", Util_portDescription(p, buf, sizeof(buf)));
  174         }
  175         if (p->responsetime.limit > -1.) {
  176                 if (Util_evalDoubleQExpression(p->responsetime.operator, p->responsetime.current, p->responsetime.limit)) {
  177                         rv = State_Failed;
  178                         Event_post(s, Event_Speed, State_Failed, p->action, "response time %s doesn't match limit [time %s %s]", Convert_time2str(p->responsetime.current, (char[11]){}), operatorshortnames[p->responsetime.operator], Convert_time2str(p->responsetime.limit, (char[11]){}));
  179                 } else {
  180                         Event_post(s, Event_Speed, State_Failed, p->action, "response time %s matches limit [time %s %s]", Convert_time2str(p->responsetime.current, (char[11]){}), operatorshortnames[p->responsetime.operator], Convert_time2str(p->responsetime.limit, (char[11]){}));
  181                 }
  182         }
  183         if (p->target.net.ssl.options.flags && p->target.net.ssl.certificate.validDays >= 0 && p->target.net.ssl.certificate.minimumDays > 0) {
  184                 if (p->target.net.ssl.certificate.validDays < p->target.net.ssl.certificate.minimumDays) {
  185                         Event_post(s, Event_Timestamp, State_Failed, p->action, "certificate expiry in %d days matches check limit [valid > %d days]", p->target.net.ssl.certificate.validDays, p->target.net.ssl.certificate.minimumDays);
  186                         rv = State_Failed;
  187                 } else {
  188                         Event_post(s, Event_Timestamp, State_Succeeded, p->action, "certificate valid days test succeeded [valid for %d days]", p->target.net.ssl.certificate.validDays);
  189                 }
  190         }
  191         return rv;
  192 }
  193 
  194 
  195 /**
  196  * Test process state (e.g. Zombie)
  197  */
  198 static State_Type _checkProcessState(Service_T s) {
  199         ASSERT(s);
  200         if (s->inf.process->zombie) {
  201                 Event_post(s, Event_Data, State_Failed, s->action_DATA, "process with pid %d is a zombie", s->inf.process->pid);
  202                 return State_Failed;
  203         }
  204         Event_post(s, Event_Data, State_Succeeded, s->action_DATA, "zombie check succeeded");
  205         return State_Succeeded;
  206 }
  207 
  208 
  209 /**
  210  * Test process pid for possible change since last cycle
  211  */
  212 static State_Type _checkProcessPid(Service_T s) {
  213         ASSERT(s);
  214         if (s->inf.process->_pid < 0 || s->inf.process->pid < 0) // process pid was not initialized yet
  215                 return State_Init;
  216         if (s->inf.process->_pid != s->inf.process->pid) {
  217                 for (Pid_T l = s->pidlist; l; l = l->next)
  218                         Event_post(s, Event_Pid, State_Changed, l->action, "process PID changed from %d to %d", s->inf.process->_pid, s->inf.process->pid);
  219                 return State_Changed;
  220         }
  221         for (Pid_T l = s->pidlist; l; l = l->next)
  222                 Event_post(s, Event_Pid, State_ChangedNot, l->action, "process PID has not changed since last cycle");
  223         return State_ChangedNot;
  224 }
  225 
  226 
  227 /**
  228  * Test process ppid for possible change since last cycle
  229  */
  230 static State_Type _checkProcessPpid(Service_T s) {
  231         ASSERT(s);
  232         if (s->inf.process->_ppid < 0 || s->inf.process->ppid < 0) // process ppid was not initialized yet
  233                 return State_Init;
  234         if (s->inf.process->_ppid != s->inf.process->ppid) {
  235                 for (Pid_T l = s->ppidlist; l; l = l->next)
  236                         Event_post(s, Event_PPid, State_Changed, l->action, "process PPID changed from %d to %d", s->inf.process->_ppid, s->inf.process->ppid);
  237                 return State_Changed;
  238         }
  239         for (Pid_T l = s->ppidlist; l; l = l->next)
  240                 Event_post(s, Event_PPid, State_ChangedNot, l->action, "process PPID has not changed since last cycle");
  241         return State_ChangedNot;
  242 }
  243 
  244 
  245 /**
  246  * Check process resources
  247  */
  248 static State_Type _checkProcessResources(Service_T s, Resource_T r) {
  249         ASSERT(s);
  250         ASSERT(r);
  251         State_Type rv = State_Succeeded;
  252         char report[STRLEN] = {}, buf1[10], buf2[10];
  253         switch (r->resource_id) {
  254                 case Resource_CpuPercent:
  255                         if (s->inf.process->cpu_percent < 0.) {
  256                                 DEBUG("'%s' cpu usage check skipped (initializing)\n", s->name);
  257                                 return State_Init;
  258                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->cpu_percent, r->limit)) {
  259                                 rv = State_Failed;
  260                                 snprintf(report, STRLEN, "cpu usage of %.1f%% matches resource limit [cpu usage %s %.1f%%]", s->inf.process->cpu_percent, operatorshortnames[r->operator], r->limit);
  261                         } else {
  262                                 snprintf(report, STRLEN, "cpu usage check succeeded [current cpu usage = %.1f%%]", s->inf.process->cpu_percent);
  263                         }
  264                         break;
  265 
  266                 case Resource_CpuPercentTotal:
  267                         if (s->inf.process->total_cpu_percent < 0.) {
  268                                 DEBUG("'%s' total cpu usage check skipped (initializing)\n", s->name);
  269                                 return State_Init;
  270                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->total_cpu_percent, r->limit)) {
  271                                 rv = State_Failed;
  272                                 snprintf(report, STRLEN, "total cpu usage of %.1f%% matches resource limit [cpu usage %s %.1f%%]", s->inf.process->total_cpu_percent, operatorshortnames[r->operator], r->limit);
  273                         } else {
  274                                 snprintf(report, STRLEN, "total cpu usage check succeeded [current cpu usage = %.1f%%]", s->inf.process->total_cpu_percent);
  275                         }
  276                         break;
  277 
  278                 case Resource_MemoryPercent:
  279                         if (s->inf.process->mem_percent < 0.) {
  280                                 DEBUG("'%s' memory usage check skipped (initializing)\n", s->name);
  281                                 return State_Init;
  282                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->mem_percent, r->limit)) {
  283                                 rv = State_Failed;
  284                                 snprintf(report, STRLEN, "mem usage of %.1f%% matches resource limit [mem usage %s %.1f%%]", s->inf.process->mem_percent, operatorshortnames[r->operator], r->limit);
  285                         } else {
  286                                 snprintf(report, STRLEN, "mem usage check succeeded [current mem usage = %.1f%%]", s->inf.process->mem_percent);
  287                         }
  288                         break;
  289 
  290                 case Resource_MemoryKbyte:
  291                         if (s->inf.process->mem == 0) {
  292                                 DEBUG("'%s' process memory usage check skipped (initializing)\n", s->name);
  293                                 return State_Init;
  294                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->mem, r->limit)) {
  295                                 rv = State_Failed;
  296                                 snprintf(report, STRLEN, "mem amount of %s matches resource limit [mem amount %s %s]", Convert_bytes2str(s->inf.process->mem, buf1), operatorshortnames[r->operator], Convert_bytes2str(r->limit, buf2));
  297                         } else {
  298                                 snprintf(report, STRLEN, "mem amount check succeeded [current mem amount = %s]", Convert_bytes2str(s->inf.process->mem, buf1));
  299                         }
  300                         break;
  301 
  302                 case Resource_Threads:
  303                         if (s->inf.process->threads < 0) {
  304                                 DEBUG("'%s' process threads count check skipped (initializing)\n", s->name);
  305                                 return State_Init;
  306                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->threads, r->limit)) {
  307                                 rv = State_Failed;
  308                                 snprintf(report, STRLEN, "threads count %i matches resource limit [threads %s %.0f]", s->inf.process->threads, operatorshortnames[r->operator], r->limit);
  309                         } else {
  310                                 snprintf(report, STRLEN, "threads check succeeded [current threads = %i]", s->inf.process->threads);
  311                         }
  312                         break;
  313 
  314                 case Resource_Children:
  315                         if (s->inf.process->children < 0) {
  316                                 DEBUG("'%s' process children count check skipped (initializing)\n", s->name);
  317                                 return State_Init;
  318                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->children, r->limit)) {
  319                                 rv = State_Failed;
  320                                 snprintf(report, STRLEN, "children count %i matches resource limit [children %s %.0f]", s->inf.process->children, operatorshortnames[r->operator], r->limit);
  321                         } else {
  322                                 snprintf(report, STRLEN, "children check succeeded [current children = %i]", s->inf.process->children);
  323                         }
  324                         break;
  325 
  326                 case Resource_MemoryKbyteTotal:
  327                         if (s->inf.process->total_mem == 0) {
  328                                 DEBUG("'%s' process total memory usage check skipped (initializing)\n", s->name);
  329                                 return State_Init;
  330                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->total_mem, r->limit)) {
  331                                 rv = State_Failed;
  332                                 snprintf(report, STRLEN, "total mem amount of %s matches resource limit [total mem amount %s %s]", Convert_bytes2str(s->inf.process->total_mem, buf1), operatorshortnames[r->operator], Convert_bytes2str(r->limit, buf2));
  333                         } else {
  334                                 snprintf(report, STRLEN, "total mem amount check succeeded [current total mem amount = %s]", Convert_bytes2str(s->inf.process->total_mem, buf1));
  335                         }
  336                         break;
  337 
  338                 case Resource_MemoryPercentTotal:
  339                         if (s->inf.process->total_mem_percent < 0.) {
  340                                 DEBUG("'%s' total memory usage check skipped (initializing)\n", s->name);
  341                                 return State_Init;
  342                         } else if (Util_evalDoubleQExpression(r->operator, s->inf.process->total_mem_percent, r->limit)) {
  343                                 rv = State_Failed;
  344                                 snprintf(report, STRLEN, "total mem amount of %.1f%% matches resource limit [total mem amount %s %.1f%%]", (float)s->inf.process->total_mem_percent, operatorshortnames[r->operator], (float)r->limit);
  345                         } else {
  346                                 snprintf(report, STRLEN, "total mem amount check succeeded [current total mem amount = %.1f%%]", s->inf.process->total_mem_percent);
  347                         }
  348                         break;
  349 
  350                 case Resource_ReadBytes:
  351                         if (Statistics_initialized(&(s->inf.process->read.bytes))) {
  352                                 double value = Statistics_deltaNormalize(&(s->inf.process->read.bytes));
  353                                 if (Util_evalDoubleQExpression(r->operator, value, r->limit)) {
  354                                         rv = State_Failed;
  355                                         snprintf(report, STRLEN, "read rate %s/s matches resource limit [read %s %s/s]", Convert_bytes2str(value, (char[10]){}), operatorshortnames[r->operator], Convert_bytes2str(r->limit, (char[10]){}));
  356                                 } else {
  357                                         snprintf(report, STRLEN, "read rate test succeeded [current read = %s/s]", Convert_bytes2str(value, (char[10]){}));
  358                                 }
  359                         } else {
  360                                 DEBUG("'%s' warning -- no data are available for bytes read rate test\n", s->name);
  361                                 return State_Init;
  362                         }
  363                         break;
  364 
  365                 case Resource_ReadBytesPhysical:
  366                         if (Statistics_initialized(&(s->inf.process->read.bytesPhysical))) {
  367                                 double value = Statistics_deltaNormalize(&(s->inf.process->read.bytesPhysical));
  368                                 if (Util_evalDoubleQExpression(r->operator, value, r->limit)) {
  369                                         rv = State_Failed;
  370                                         snprintf(report, STRLEN, "physical read activity %s/s matches resource limit [read %s %s/s]", Convert_bytes2str(value, (char[10]){}), operatorshortnames[r->operator], Convert_bytes2str(r->limit, (char[10]){}));
  371                                 } else {
  372                                         snprintf(report, STRLEN, "physical read activity test succeeded [current read = %s/s]", Convert_bytes2str(value, (char[10]){}));
  373                                 }
  374                         } else {
  375                                 DEBUG("'%s' warning -- no data are available for physical read activity test\n", s->name);
  376                                 return State_Init;
  377                         }
  378                         break;
  379 
  380                 case Resource_ReadOperations:
  381                         if (Statistics_initialized(&(s->inf.process->read.operations))) {
  382                                 double value = Statistics_deltaNormalize(&(s->inf.process->read.operations));
  383                                 if (Util_evalDoubleQExpression(r->operator, value, r->limit)) {
  384                                         rv = State_Failed;
  385                                         snprintf(report, STRLEN, "read rate %.1f operations/s matches resource limit [read %s %.0f operations/s]", value, operatorshortnames[r->operator], r->limit);
  386                                 } else {
  387                                         snprintf(report, STRLEN, "read rate test succeeded [current read = %.1f operations/s]", value);
  388                                 }
  389                         } else {
  390                                 DEBUG("'%s' warning -- no data are available for read rate test\n", s->name);
  391                                 return State_Init;
  392                         }
  393                         break;
  394 
  395                 case Resource_WriteBytes:
  396                         if (Statistics_initialized(&(s->inf.process->write.bytes))) {
  397                                 double value = Statistics_deltaNormalize(&(s->inf.process->write.bytes));
  398                                 if (Util_evalDoubleQExpression(r->operator, value, r->limit)) {
  399                                         rv = State_Failed;
  400                                         snprintf(report, STRLEN, "write rate %s/s matches resource limit [write %s %s/s]", Convert_bytes2str(value, (char[10]){}), operatorshortnames[r->operator], Convert_bytes2str(r->limit, (char[10]){}));
  401                                 } else {
  402                                         snprintf(report, STRLEN, "write rate test succeeded [current write = %s/s]", Convert_bytes2str(value, (char[10]){}));
  403                                 }
  404                         } else {
  405                                 DEBUG("'%s' warning -- no data are available for bytes write rate test\n", s->name);
  406                                 return State_Init;
  407                         }
  408                         break;
  409 
  410                 case Resource_WriteBytesPhysical:
  411                         if (Statistics_initialized(&(s->inf.process->write.bytesPhysical))) {
  412                                 double value = Statistics_deltaNormalize(&(s->inf.process->write.bytesPhysical));
  413                                 if (Util_evalDoubleQExpression(r->operator, value, r->limit)) {
  414                                         rv = State_Failed;
  415                                         snprintf(report, STRLEN, "physical write activity %s/s matches resource limit [write %s %s/s]", Convert_bytes2str(value, (char[10]){}), operatorshortnames[r->operator], Convert_bytes2str(r->limit, (char[10]){}));
  416                                 } else {
  417                                         snprintf(report, STRLEN, "physical write activity test succeeded [current write = %s/s]", Convert_bytes2str(value, (char[10]){}));
  418                                 }
  419                         } else {
  420                                 DEBUG("'%s' warning -- no data are available for physical write activity test\n", s->name);
  421                                 return State_Init;
  422                         }
  423                         break;
  424 
  425                 case Resource_WriteOperations:
  426                         if (Statistics_initialized(&(s->inf.process->write.operations))) {
  427                                 double value = Statistics_deltaNormalize(&(s->inf.process->write.operations));
  428                                 if (Util_evalDoubleQExpression(r->operator, value, r->limit)) {
  429                                         rv = State_Failed;
  430                                         snprintf(report, STRLEN, "write rate %.1f operations/s matches resource limit [write %s %.0f operations/s]", value, operatorshortnames[r->operator], r->limit);
  431                                 } else {
  432                                         snprintf(report, STRLEN, "write rate test succeeded [current write = %.1f operations/s]", value);
  433                                 }
  434                         } else {
  435                                 DEBUG("'%s' warning -- no data are available for write rate test\n", s->name);
  436                                 return State_Init;
  437                         }
  438                         break;
  439 
  440                 default:
  441                         Log_error("'%s' error -- unknown resource ID: [%d]\n", s->name, r->resource_id);
  442                         return State_Failed;
  443         }
  444         Event_post(s, Event_Resource, rv, r->action, "%s", report);
  445         return rv;
  446 }
  447 
  448 
  449 static State_Type _checkLoadAverage(Resource_T r, double loadavg, const char *name, char report[STRLEN]) {
  450         if (Util_evalDoubleQExpression(r->operator, loadavg, r->limit)) {
  451                 snprintf(report, STRLEN, "%s of %.1f matches resource limit [%s %s %.1f]", name, loadavg, name, operatorshortnames[r->operator], r->limit);
  452                 return State_Failed;
  453         }
  454         snprintf(report, STRLEN, "%s check succeeded [current %s = %.1f]", name, name, loadavg);
  455         return State_Succeeded;
  456 }
  457 
  458 
  459 static State_Type _checkSystemResources(Service_T s, Resource_T r) {
  460         ASSERT(s);
  461         ASSERT(r);
  462         State_Type rv = State_Succeeded;
  463         char report[STRLEN] = {}, buf1[10], buf2[10];
  464         switch (r->resource_id) {
  465                 case Resource_CpuPercent:
  466                         {
  467                                 float cpu =
  468                                         (systeminfo.cpu.usage.system > 0. ? systeminfo.cpu.usage.system : 0.) +
  469                                         (systeminfo.cpu.usage.user > 0. ? systeminfo.cpu.usage.user : 0.);
  470                                 if (cpu < 0.) {
  471                                         DEBUG("'%s' cpu usage check skipped (initializing)\n", s->name);
  472                                         return State_Init;
  473                                 } else if (Util_evalDoubleQExpression(r->operator, cpu, r->limit)) {
  474                                         rv = State_Failed;
  475                                         snprintf(report, STRLEN, "cpu usage of %.1f%% matches resource limit [cpu usage %s %.1f%%]", cpu, operatorshortnames[r->operator], r->limit);
  476                                 } else {
  477                                         snprintf(report, STRLEN, "cpu usage check succeeded [current cpu usage = %.1f%%]", cpu);
  478                                 }
  479                         }
  480                         break;
  481 
  482                 case Resource_CpuUser:
  483                         if (systeminfo.cpu.usage.user < 0.) {
  484                                 DEBUG("'%s' cpu user usage check skipped (initializing)\n", s->name);
  485                                 return State_Init;
  486                         } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.user, r->limit)) {
  487                                 rv = State_Failed;
  488                                 snprintf(report, STRLEN, "cpu user usage of %.1f%% matches resource limit [cpu user usage %s %.1f%%]", systeminfo.cpu.usage.user, operatorshortnames[r->operator], r->limit);
  489                         } else {
  490                                 snprintf(report, STRLEN, "cpu user usage check succeeded [current cpu user usage = %.1f%%]", systeminfo.cpu.usage.user);
  491                         }
  492                         break;
  493 
  494                 case Resource_CpuSystem:
  495                         if (systeminfo.cpu.usage.system < 0.) {
  496                                 DEBUG("'%s' cpu system usage check skipped (initializing)\n", s->name);
  497                                 return State_Init;
  498                         } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.system, r->limit)) {
  499                                 rv = State_Failed;
  500                                 snprintf(report, STRLEN, "cpu system usage of %.1f%% matches resource limit [cpu system usage %s %.1f%%]", systeminfo.cpu.usage.system, operatorshortnames[r->operator], r->limit);
  501                         } else {
  502                                 snprintf(report, STRLEN, "cpu system usage check succeeded [current cpu system usage = %.1f%%]", systeminfo.cpu.usage.system);
  503                         }
  504                         break;
  505 
  506                 case Resource_CpuWait:
  507                         if (systeminfo.statisticsAvailable & Statistics_CpuIOWait) {
  508                                 if (systeminfo.cpu.usage.iowait < 0.) {
  509                                         DEBUG("'%s' cpu I/O wait check skipped (initializing)\n", s->name);
  510                                         return State_Init;
  511                                 } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.iowait, r->limit)) {
  512                                         rv = State_Failed;
  513                                         snprintf(report, STRLEN, "cpu I/O wait of %.1f%% matches resource limit [cpu I/O wait %s %.1f%%]", systeminfo.cpu.usage.iowait, operatorshortnames[r->operator], r->limit);
  514                                 } else {
  515                                         snprintf(report, STRLEN, "cpu I/O wait check succeeded [current cpu I/O wait = %.1f%%]", systeminfo.cpu.usage.iowait);
  516                                 }
  517                         } else {
  518                                 Log_warning("Cannot test cpu I/O wait usage as the statistics is not available on this system\n");
  519                         }
  520                         break;
  521 
  522                 case Resource_CpuNice:
  523                         if (systeminfo.statisticsAvailable & Statistics_CpuNice) {
  524                                 if (systeminfo.cpu.usage.nice < 0.) {
  525                                         DEBUG("'%s' cpu nice usage check skipped (initializing)\n", s->name);
  526                                         return State_Init;
  527                                 } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.nice, r->limit)) {
  528                                         rv = State_Failed;
  529                                         snprintf(report, STRLEN, "cpu nice usage of %.1f%% matches resource limit [cpu nice usage %s %.1f%%]", systeminfo.cpu.usage.nice, operatorshortnames[r->operator], r->limit);
  530                                 } else {
  531                                         snprintf(report, STRLEN, "cpu nice usage check succeeded [current cpu nice usage = %.1f%%]", systeminfo.cpu.usage.nice);
  532                                 }
  533                         } else {
  534                                 Log_warning("Cannot test cpu nice usage as the statistics is not available on this system\n");
  535                         }
  536                         break;
  537 
  538                 case Resource_CpuHardIRQ:
  539                         if (systeminfo.statisticsAvailable & Statistics_CpuHardIRQ) {
  540                                 if (systeminfo.cpu.usage.hardirq < 0.) {
  541                                         DEBUG("'%s' cpu hardware IRQ usage check skipped (initializing)\n", s->name);
  542                                         return State_Init;
  543                                 } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.hardirq, r->limit)) {
  544                                         rv = State_Failed;
  545                                         snprintf(report, STRLEN, "cpu hardware IRQ usage of %.1f%% matches resource limit [cpu hardware IRQ usage %s %.1f%%]", systeminfo.cpu.usage.hardirq, operatorshortnames[r->operator], r->limit);
  546                                 } else {
  547                                         snprintf(report, STRLEN, "cpu hardware IRQ usage check succeeded [current cpu hardware IRQ usage = %.1f%%]", systeminfo.cpu.usage.hardirq);
  548                                 }
  549                         } else {
  550                                 Log_warning("Cannot test cpu hardware IRQ usage as the statistics is not available on this system\n");
  551                         }
  552                         break;
  553 
  554                 case Resource_CpuSoftIRQ:
  555                         if (systeminfo.statisticsAvailable & Statistics_CpuSoftIRQ) {
  556                                 if (systeminfo.cpu.usage.softirq < 0.) {
  557                                         DEBUG("'%s' cpu software IRQ usage check skipped (initializing)\n", s->name);
  558                                         return State_Init;
  559                                 } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.softirq, r->limit)) {
  560                                         rv = State_Failed;
  561                                         snprintf(report, STRLEN, "cpu software IRQ usage of %.1f%% matches resource limit [cpu software IRQ usage %s %.1f%%]", systeminfo.cpu.usage.softirq, operatorshortnames[r->operator], r->limit);
  562                                 } else {
  563                                         snprintf(report, STRLEN, "cpu software IRQ usage check succeeded [current cpu software IRQ usage = %.1f%%]", systeminfo.cpu.usage.softirq);
  564                                 }
  565                         } else {
  566                                 Log_warning("Cannot test cpu software IRQ usage as the statistics is not available on this system\n");
  567                         }
  568                         break;
  569 
  570                 case Resource_CpuSteal:
  571                         if (systeminfo.statisticsAvailable & Statistics_CpuSteal) {
  572                                 if (systeminfo.cpu.usage.steal < 0.) {
  573                                         DEBUG("'%s' cpu steal usage check skipped (initializing)\n", s->name);
  574                                         return State_Init;
  575                                 } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.steal, r->limit)) {
  576                                         rv = State_Failed;
  577                                         snprintf(report, STRLEN, "cpu steal usage of %.1f%% matches resource limit [cpu steal usage %s %.1f%%]", systeminfo.cpu.usage.steal, operatorshortnames[r->operator], r->limit);
  578                                 } else {
  579                                         snprintf(report, STRLEN, "cpu steal usage check succeeded [current cpu steal usage = %.1f%%]", systeminfo.cpu.usage.steal);
  580                                 }
  581                         } else {
  582                                 Log_warning("Cannot test cpu steal usage as the statistics is not available on this system\n");
  583                         }
  584                         break;
  585 
  586                 case Resource_CpuGuest:
  587                         if (systeminfo.statisticsAvailable & Statistics_CpuGuest) {
  588                                 if (systeminfo.cpu.usage.guest < 0.) {
  589                                         DEBUG("'%s' cpu guest usage check skipped (initializing)\n", s->name);
  590                                         return State_Init;
  591                                 } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.guest, r->limit)) {
  592                                         rv = State_Failed;
  593                                         snprintf(report, STRLEN, "cpu guest usage of %.1f%% matches resource limit [cpu guest usage %s %.1f%%]", systeminfo.cpu.usage.guest, operatorshortnames[r->operator], r->limit);
  594                                 } else {
  595                                         snprintf(report, STRLEN, "cpu guest usage check succeeded [current cpu guest usage = %.1f%%]", systeminfo.cpu.usage.guest);
  596                                 }
  597                         } else {
  598                                 Log_warning("Cannot test cpu guest usage as the statistics is not available on this system\n");
  599                         }
  600                         break;
  601 
  602                 case Resource_CpuGuestNice:
  603                         if (systeminfo.statisticsAvailable & Statistics_CpuGuestNice) {
  604                                 if (systeminfo.cpu.usage.guest_nice < 0.) {
  605                                         DEBUG("'%s' cpu guest nice usage check skipped (initializing)\n", s->name);
  606                                         return State_Init;
  607                                 } else if (Util_evalDoubleQExpression(r->operator, systeminfo.cpu.usage.guest_nice, r->limit)) {
  608                                         rv = State_Failed;
  609                                         snprintf(report, STRLEN, "cpu guest nice usage of %.1f%% matches resource limit [cpu guest nice usage %s %.1f%%]", systeminfo.cpu.usage.guest_nice, operatorshortnames[r->operator], r->limit);
  610                                 } else {
  611                                         snprintf(report, STRLEN, "cpu guest nice usage check succeeded [current cpu guest nice usage = %.1f%%]", systeminfo.cpu.usage.guest_nice);
  612                                 }
  613                         } else {
  614                                 Log_warning("Cannot test cpu guestnice usage as the statistics is not available on this system\n");
  615                         }
  616                         break;
  617 
  618                 case Resource_MemoryPercent:
  619                         if (Util_evalDoubleQExpression(r->operator, systeminfo.memory.usage.percent, r->limit)) {
  620                                 rv = State_Failed;
  621                                 snprintf(report, STRLEN, "mem usage of %.1f%% matches resource limit [mem usage %s %.1f%%]", systeminfo.memory.usage.percent, operatorshortnames[r->operator], r->limit);
  622                         } else {
  623                                 snprintf(report, STRLEN, "mem usage check succeeded [current mem usage = %.1f%%]", systeminfo.memory.usage.percent);
  624                         }
  625                         break;
  626 
  627                 case Resource_MemoryKbyte:
  628                         if (Util_evalDoubleQExpression(r->operator, systeminfo.memory.usage.bytes, r->limit)) {
  629                                 rv = State_Failed;
  630                                 snprintf(report, STRLEN, "mem amount of %s matches resource limit [mem amount %s %s]", Convert_bytes2str(systeminfo.memory.usage.bytes, buf1), operatorshortnames[r->operator], Convert_bytes2str(r->limit, buf2));
  631                         } else {
  632                                 snprintf(report, STRLEN, "mem amount check succeeded [current mem amount = %s]", Convert_bytes2str(systeminfo.memory.usage.bytes, buf1));
  633                         }
  634                         break;
  635 
  636                 case Resource_SwapPercent:
  637                         if (Util_evalDoubleQExpression(r->operator, systeminfo.swap.usage.percent, r->limit)) {
  638                                 rv = State_Failed;
  639                                 snprintf(report, STRLEN, "swap usage of %.1f%% matches resource limit [swap usage %s %.1f%%]", systeminfo.swap.usage.percent, operatorshortnames[r->operator], r->limit);
  640                         } else {
  641                                 snprintf(report, STRLEN, "swap usage check succeeded [current swap usage = %.1f%%]", systeminfo.swap.usage.percent);
  642                         }
  643                         break;
  644 
  645                 case Resource_SwapKbyte:
  646                         if (s->type == Service_System) {
  647                                 if (Util_evalDoubleQExpression(r->operator, systeminfo.swap.usage.bytes, r->limit)) {
  648                                         rv = State_Failed;
  649                                         snprintf(report, STRLEN, "swap amount of %s matches resource limit [swap amount %s %s]", Convert_bytes2str(systeminfo.swap.usage.bytes, buf1), operatorshortnames[r->operator], Convert_bytes2str(r->limit, buf2));
  650                                 } else {
  651                                         snprintf(report, STRLEN, "swap amount check succeeded [current swap amount = %s]", Convert_bytes2str(systeminfo.swap.usage.bytes, buf1));
  652                                 }
  653                         }
  654                         break;
  655 
  656                 case Resource_LoadAverage1m:
  657                         rv = _checkLoadAverage(r, systeminfo.loadavg[0], "loadavg (1min)", report);
  658                         break;
  659 
  660                 case Resource_LoadAverage5m:
  661                         rv = _checkLoadAverage(r, systeminfo.loadavg[1], "loadavg (5min)", report);
  662                         break;
  663 
  664                 case Resource_LoadAverage15m:
  665                         rv = _checkLoadAverage(r, systeminfo.loadavg[2], "loadavg (15min)", report);
  666                         break;
  667 
  668                 case Resource_LoadAveragePerCore1m:
  669                         rv = _checkLoadAverage(r, systeminfo.loadavg[0] / (double)systeminfo.cpu.count, "loadavg per core (1min)", report);
  670                         break;
  671 
  672                 case Resource_LoadAveragePerCore5m:
  673                         rv = _checkLoadAverage(r, systeminfo.loadavg[1] / (double)systeminfo.cpu.count, "loadavg per core (5min)", report);
  674                         break;
  675 
  676                 case Resource_LoadAveragePerCore15m:
  677                         rv = _checkLoadAverage(r, systeminfo.loadavg[2] / (double)systeminfo.cpu.count, "loadavg per core (15min)", report);
  678                         break;
  679 
  680                 default:
  681                         Log_error("'%s' error -- unknown resource ID: [%d]\n", s->name, r->resource_id);
  682                         return State_Failed;
  683         }
  684         Event_post(s, Event_Resource, rv, r->action, "%s", report);
  685         return rv;
  686 }
  687 
  688 
  689 /**
  690  * Test for associated path checksum change
  691  */
  692 static State_Type _checkChecksum(Service_T s) {
  693         ASSERT(s);
  694         ASSERT(s->path);
  695         State_Type rv = State_Succeeded;
  696         if (s->checksum) {
  697                 Checksum_T cs = s->checksum;
  698                 if (Checksum_getChecksum(s->path, cs->type, s->inf.file->cs_sum, sizeof(s->inf.file->cs_sum))) {
  699                         Event_post(s, Event_Data, State_Succeeded, s->action_DATA, "checksum %s", s->inf.file->cs_sum);
  700                         if (! cs->initialized) {
  701                                 cs->initialized = true;
  702                                 snprintf(cs->hash, sizeof(cs->hash), "%s", s->inf.file->cs_sum);
  703                         }
  704                         int changed;
  705                         switch (cs->type) {
  706                                 case Hash_Md5:
  707                                         changed = strncmp(cs->hash, s->inf.file->cs_sum, 32);
  708                                         break;
  709                                 case Hash_Sha1:
  710                                         changed = strncmp(cs->hash, s->inf.file->cs_sum, 40);
  711                                         break;
  712                                 default:
  713                                         Log_error("'%s' unknown hash type (%d)\n", s->name, cs->type);
  714                                         *s->inf.file->cs_sum = 0;
  715                                         return State_Failed;
  716                         }
  717                         if (changed) {
  718                                 if (cs->test_changes) {
  719                                         rv = State_Changed;
  720                                         /* reset expected value for next cycle */
  721                                         snprintf(cs->hash, sizeof(cs->hash), "%s", s->inf.file->cs_sum);
  722                                         /* if we are testing for changes only, the value is variable */
  723                                         Event_post(s, Event_Checksum, State_Changed, cs->action, "checksum changed to %s", s->inf.file->cs_sum);
  724                                 } else {
  725                                         /* we are testing constant value for failed or succeeded state */
  726                                         rv = State_Failed;
  727                                         Event_post(s, Event_Checksum, State_Failed, cs->action, "checksum failed, expected %s got %s", cs->hash, s->inf.file->cs_sum);
  728                                 }
  729                         } else if (cs->test_changes) {
  730                                 rv = State_ChangedNot;
  731                                 Event_post(s, Event_Checksum, State_ChangedNot, cs->action, "checksum has not changed");
  732                         } else {
  733                                 Event_post(s, Event_Checksum, State_Succeeded, cs->action, "checksum is valid");
  734                         }
  735                         return rv;
  736                 }
  737                 Event_post(s, Event_Data, State_Failed, s->action_DATA, "cannot compute checksum for %s", s->path);
  738                 return State_Failed;
  739         }
  740         return rv;
  741 }
  742 
  743 
  744 /**
  745  * Test for associated path permission change
  746  */
  747 static State_Type _checkPerm(Service_T s, int mode) {
  748         ASSERT(s);
  749         if (s->perm) {
  750                 if (mode >= 0) {
  751                         mode_t m = mode & 07777;
  752                         if (m != (mode_t)s->perm->perm) {
  753                                 if (s->perm->test_changes) {
  754                                         Event_post(s, Event_Permission, State_Changed, s->perm->action, "permission for %s changed from %04o to %04o", s->path, s->perm->perm, m);
  755                                         s->perm->perm = m;
  756                                         return State_Changed;
  757                                 } else {
  758                                         Event_post(s, Event_Permission, State_Failed, s->perm->action, "permission test failed for %s [current permission %04o]", s->path, m);
  759                                         return State_Failed;
  760                                 }
  761                         } else {
  762                                 if (s->perm->test_changes) {
  763                                         Event_post(s, Event_Permission, State_ChangedNot, s->perm->action, "permission not changed for %s", s->path);
  764                                         return State_ChangedNot;
  765                                 } else {
  766                                         Event_post(s, Event_Permission, State_Succeeded, s->perm->action, "permission test succeeded [current permission %04o]", m);
  767                                         return State_Succeeded;
  768                                 }
  769                         }
  770                 }
  771                 return State_Init;
  772         }
  773         return State_Succeeded;
  774 }
  775 
  776 
  777 /**
  778  * Test UID of file or process
  779  */
  780 static State_Type _checkUid(Service_T s, int uid) {
  781         ASSERT(s);
  782         if (s->uid) {
  783                 if (uid >= 0) {
  784                         if ((uid_t)uid != s->uid->uid) {
  785                                 Event_post(s, Event_Uid, State_Failed, s->uid->action, "uid test failed for %s -- current uid is %d", s->name, uid);
  786                                 return State_Failed;
  787                         } else {
  788                                 Event_post(s, Event_Uid, State_Succeeded, s->uid->action, "uid test succeeded [current uid = %d]", uid);
  789                                 return State_Succeeded;
  790                         }
  791                 }
  792                 return State_Init;
  793         }
  794         return State_Succeeded;
  795 }
  796 
  797 
  798 /**
  799  * Test effective UID of process
  800  */
  801 static State_Type _checkEuid(Service_T s, int euid) {
  802         ASSERT(s);
  803         if (s->euid) {
  804                 if (euid >= 0) {
  805                         if ((uid_t)euid != s->euid->uid) {
  806                                 Event_post(s, Event_Uid, State_Failed, s->euid->action, "euid test failed for %s -- current euid is %d", s->name, euid);
  807                                 return State_Failed;
  808                         } else {
  809                                 Event_post(s, Event_Uid, State_Succeeded, s->euid->action, "euid test succeeded [current euid = %d]", euid);
  810                                 return State_Succeeded;
  811                         }
  812                 }
  813                 return State_Init;
  814         }
  815         return State_Succeeded;
  816 }
  817 
  818 
  819 static State_Type _checkSecurityAttribute(Service_T s, char *attribute) {
  820         ASSERT(s);
  821         State_Type rv = State_Succeeded;
  822         const char *attr = NVLSTR(attribute);
  823         for (SecurityAttribute_T a = s->secattrlist; a; a = a->next) {
  824                 if (IS(attr, a->attribute)) {
  825                         Event_post(s, Event_Invalid, State_Succeeded, a->action, "Security attribute test succeeded [current attribute = '%s']", attr);
  826                 } else {
  827                         rv = State_Failed;
  828                         Event_post(s, Event_Invalid, State_Failed, a->action, "Security attribute test failed for %s -- current attribute is '%s'", s->name, attr);
  829                 }
  830         }
  831         return rv;
  832 }
  833 
  834 
  835 static State_Type _checkSystemFiledescriptors(Service_T s) {
  836         ASSERT(s);
  837         State_Type rv = State_Succeeded;
  838         if (systeminfo.statisticsAvailable & Statistics_FiledescriptorsPerSystem) {
  839                 for (Filedescriptors_T o = s->filedescriptorslist; o; o = o->next) {
  840                         if (o->limit_absolute > -1LL) {
  841                                 if (Util_evalQExpression(o->operator, systeminfo.filedescriptors.allocated, o->limit_absolute)) {
  842                                         rv = State_Failed;
  843                                         Event_post(s, Event_Resource, State_Failed, o->action, "filedescriptors usage of %lld matches limit [filedescriptors %s %lld]", systeminfo.filedescriptors.allocated, operatorshortnames[o->operator], o->limit_absolute);
  844                                 } else {
  845                                         Event_post(s, Event_Resource, State_Succeeded, o->action, "filedescriptors test succeeded [current filedescriptors usage = %lld]", systeminfo.filedescriptors.allocated);
  846                                 }
  847                         } else {
  848                                 float usage = systeminfo.filedescriptors.maximum > 0 ? ((float)100 * (float)systeminfo.filedescriptors.allocated / (float)systeminfo.filedescriptors.maximum) : 0;
  849                                 if (Util_evalDoubleQExpression(o->operator, usage, o->limit_percent)) {
  850                                         rv = State_Failed;
  851                                         Event_post(s, Event_Resource, State_Failed, o->action, "filedescriptors usage of %.1f%% matches limit [filedescriptors %s %.1f%%]", usage, operatorshortnames[o->operator], o->limit_percent);
  852                                 } else {
  853                                         Event_post(s, Event_Resource, State_Succeeded, o->action, "filedescriptors usage test succeeded [current filedescriptors usage = %.1f%%]", usage);
  854                                 }
  855                         }
  856                 }
  857         } else if (s->filedescriptorslist) {
  858                 Log_warning("Cannot test filesdescriptors usage as the statistics is not available on this system\n");
  859         }
  860         return rv;
  861 }
  862 
  863 
  864 static State_Type _checkProcessFiledescriptors(Service_T s) {
  865         ASSERT(s);
  866         State_Type rv = State_Succeeded;
  867         for (Filedescriptors_T o = s->filedescriptorslist; o; o = o->next) {
  868                 if (o->total) {
  869                         if (Util_evalQExpression(o->operator, s->inf.process->filedescriptors.openTotal, o->limit_absolute)) {
  870                                 rv = State_Failed;
  871                                 Event_post(s, Event_Resource, State_Failed, o->action, "total  filedescriptors usage of %lld matches limit [filedescriptors %s %lld]", s->inf.process->filedescriptors.openTotal, operatorshortnames[o->operator], o->limit_absolute);
  872                         } else {
  873                                 Event_post(s, Event_Resource, State_Succeeded, o->action, "total filedescriptors usage test succeeded [current filedescriptors usage = %lld]", s->inf.process->filedescriptors.openTotal);
  874                         }
  875                 } else {
  876                         if (o->limit_absolute > -1LL) {
  877                                 if (Util_evalQExpression(o->operator, s->inf.process->filedescriptors.open, o->limit_absolute)) {
  878                                         rv = State_Failed;
  879                                         Event_post(s, Event_Resource, State_Failed, o->action, "filedescriptors usage of %lld matches limit [filedescriptors %s %lld]", s->inf.process->filedescriptors.open, operatorshortnames[o->operator], o->limit_absolute);
  880                                 } else {
  881                                         Event_post(s, Event_Resource, State_Succeeded, o->action, "filedescriptors test succeeded [current filedescriptors usage = %lld]", s->inf.process->filedescriptors.open);
  882                                 }
  883                         } else {
  884                                 if (systeminfo.statisticsAvailable & Statistics_FiledescriptorsPerProcessMax) {
  885                                         long long limit = s->inf.process->filedescriptors.limit.soft < s->inf.process->filedescriptors.limit.hard ? s->inf.process->filedescriptors.limit.soft : s->inf.process->filedescriptors.limit.hard;
  886                                         float usage = limit > 0 ? (float)100 * (float)s->inf.process->filedescriptors.open / (float)limit : 0;
  887                                         if (Util_evalDoubleQExpression(o->operator, usage, o->limit_percent)) {
  888                                                 rv = State_Failed;
  889                                                 Event_post(s, Event_Resource, State_Failed, o->action, "filedescriptors usage of %.1f%% matches limit [filedescriptors %s %.1f%%]", usage, operatorshortnames[o->operator], o->limit_percent);
  890                                         } else {
  891                                                 Event_post(s, Event_Resource, State_Succeeded, o->action, "filedescriptors usage test succeeded [current filedescriptors usage = %.1f%%]", usage);
  892                                         }
  893                                 } else {
  894                                         Log_warning("Cannot compute filesdescriptors usage %% as per-process maximum is not exposed on this system -- filesdecriptors usage test skipped, please switch to testing absolute value\n");
  895                                 }
  896                         }
  897                 }
  898         }
  899         return rv;
  900 }
  901 
  902 
  903 /**
  904  * Test GID of file or process
  905  */
  906 static State_Type _checkGid(Service_T s, int gid) {
  907         ASSERT(s);
  908         if (s->gid) {
  909                 if (gid >= 0) {
  910                         if ((gid_t)gid != s->gid->gid) {
  911                                 Event_post(s, Event_Gid, State_Failed, s->gid->action, "gid test failed for %s -- current gid is %d", s->name, gid);
  912                                 return State_Failed;
  913                         } else {
  914                                 Event_post(s, Event_Gid, State_Succeeded, s->gid->action, "gid test succeeded [current gid = %d]", gid);
  915                                 return State_Succeeded;
  916                         }
  917                 }
  918                 return State_Init;
  919         }
  920         return State_Succeeded;
  921 }
  922 
  923 
  924 static State_Type _checkTimestamp(Service_T s, Timestamp_T t, time_t timestamp) {
  925         State_Type rv = State_Succeeded;
  926         if (t->test_changes) {
  927                 if (! t->initialized) {
  928                         t->initialized = true;
  929                         t->lastTimestamp = timestamp;
  930                 } else {
  931                         if (t->lastTimestamp != timestamp) {
  932                                 rv = State_Changed;
  933                                 Event_post(s, Event_Timestamp, State_Changed, t->action, "%s for %s changed from %s to %s", timestampnames[t->type], s->path, t->lastTimestamp ? Time_string(t->lastTimestamp, (char[26]){}) : "N/A", Time_string(timestamp, (char[26]){}));
  934                                 t->lastTimestamp = timestamp; // reset expected value for next cycle
  935                         } else {
  936                                 Event_post(s, Event_Timestamp, State_ChangedNot, t->action, "%s was not changed for %s", timestampnames[t->type], s->path);
  937                         }
  938                 }
  939         } else {
  940                 /* we are testing constant value for failed or succeeded state */
  941                 if (Util_evalQExpression(t->operator, Time_now() - timestamp, t->time)) {
  942                         rv = State_Failed;
  943                         Event_post(s, Event_Timestamp, State_Failed, t->action, "%s for %s failed -- current %s is %s", timestampnames[t->type], s->path, timestampnames[t->type], Time_string(timestamp, (char[26]){}));
  944                 } else {
  945                         Event_post(s, Event_Timestamp, State_Succeeded, t->action, "%s test succeeded for %s [current %s is %s]", timestampnames[t->type], s->path, timestampnames[t->type], Time_string(timestamp, (char[26]){}));
  946                 }
  947         }
  948         return rv;
  949 }
  950 
  951 
  952 /**
  953  * Validate timestamps of a service s
  954  */
  955 static State_Type _checkTimestamps(Service_T s, time_t atime, time_t ctime, time_t mtime) {
  956         ASSERT(s);
  957         if (atime > 0 && ctime > 0 && mtime > 0) {
  958                 State_Type rv;
  959                 int failed = 0, changed = 0;
  960                 for (Timestamp_T t = s->timestamplist; t; t = t->next) {
  961                         switch (t->type) {
  962                                 case Timestamp_Access:
  963                                         rv = _checkTimestamp(s, t, atime);
  964                                         break;
  965                                 case Timestamp_Change:
  966                                         rv = _checkTimestamp(s, t, ctime);
  967                                         break;
  968                                 case Timestamp_Modification:
  969                                         rv = _checkTimestamp(s, t, mtime);
  970                                         break;
  971                                 default:
  972                                         rv = _checkTimestamp(s, t, MAX(mtime, ctime));
  973                                         break;
  974                         }
  975                         if (rv == State_Failed) {
  976                                 failed++;
  977                         } else if (rv == State_Changed) {
  978                                 changed++;
  979                         }
  980                 }
  981                 return failed ? State_Failed : (changed ? State_Changed : State_Succeeded);
  982         }
  983         return State_Init;
  984 }
  985 
  986 
  987 /**
  988  * Test size
  989  */
  990 static State_Type _checkSize(Service_T s, off_t size) {
  991         ASSERT(s);
  992         if (size >= 0) {
  993                 State_Type rv = State_Succeeded;
  994                 if (s->sizelist) {
  995                         char buf[10];
  996                         for (Size_T sl = s->sizelist; sl; sl = sl->next) {
  997                                 /* if we are testing for changes only, the value is variable */
  998                                 if (sl->test_changes) {
  999                                         if (! sl->initialized) {
 1000                                                 /* the size was not initialized during monit start, so set the size now
 1001                                                  * and allow further size change testing */
 1002                                                 sl->initialized = true;
 1003                                                 sl->size = size;
 1004                                         } else {
 1005                                                 if ((off_t)sl->size != size) {
 1006                                                         rv = State_Changed;
 1007                                                         Event_post(s, Event_Size, State_Changed, sl->action, "size for %s changed to %s", s->path, Convert_bytes2str(size, buf));
 1008                                                         /* reset expected value for next cycle */
 1009                                                         sl->size = size;
 1010                                                 } else {
 1011                                                         Event_post(s, Event_Size, State_ChangedNot, sl->action, "size has not changed [current size = %s]", Convert_bytes2str(size, buf));
 1012                                                 }
 1013                                         }
 1014                                 } else {
 1015                                         /* we are testing constant value for failed or succeeded state */
 1016                                         if (Util_evalQExpression(sl->operator, size, sl->size)) {
 1017                                                 rv = State_Failed;
 1018                                                 Event_post(s, Event_Size, State_Failed, sl->action, "size test failed for %s -- current size is %s", s->path, Convert_bytes2str(size, buf));
 1019                                         } else {
 1020                                                 Event_post(s, Event_Size, State_Succeeded, sl->action, "size check succeeded [current size = %s]", Convert_bytes2str(size, buf));
 1021                                         }
 1022                                 }
 1023                         }
 1024                 }
 1025                 return rv;
 1026         } else {
 1027                 return State_Init;
 1028         }
 1029 }
 1030 
 1031 
 1032 /**
 1033  * Test uptime
 1034  */
 1035 static State_Type _checkUptime(Service_T s, long long uptime) {
 1036         ASSERT(s);
 1037         State_Type rv = State_Succeeded;
 1038         if (uptime < 0)
 1039                 return State_Init;
 1040         for (Uptime_T ul = s->uptimelist; ul; ul = ul->next) {
 1041                 if (Util_evalQExpression(ul->operator, uptime, ul->uptime)) {
 1042                         rv = State_Failed;
 1043                         Event_post(s, Event_Uptime, State_Failed, ul->action, "uptime test failed for %s -- current uptime is %llu seconds", s->path, (unsigned long long)uptime);
 1044                 } else {
 1045                         Event_post(s, Event_Uptime, State_Succeeded, ul->action, "uptime test succeeded [current uptime = %llu seconds]", (unsigned long long)uptime);
 1046                 }
 1047         }
 1048         return rv;
 1049 }
 1050 
 1051 
 1052 static int _checkPattern(Match_T pattern, const char *line) {
 1053         return regexec(pattern->regex_comp, line, 0, NULL, 0);
 1054 }
 1055 
 1056 
 1057 /**
 1058  * Match content.
 1059  *
 1060  * The test compares only the lines terminated with \n.
 1061  *
 1062  * In the case that line with missing \n is read, the test stops, as we suppose that the file contains only partial line and the rest of it is yet stored in the buffer of the application which writes to the file.
 1063  * The test will resume at the beginning of the incomplete line during the next cycle, allowing the writer to finish the write.
 1064  *
 1065  * We test only Run.limits.fileContentBuffer at maximum - in the case that the line is bigger, we read the rest of the line (till '\n') but ignore the characters past the maximum
 1066  */
 1067 static State_Type _checkMatch(Service_T s) {
 1068         ASSERT(s);
 1069         /* TODO: https://bitbucket.org/tildeslash/monit/issues/401 Refactor and use mmap instead of naive std file io.
 1070          mmap can make code simpler, more efficient and support multi-line matching as there is no line-buffer, but the
 1071          whole file is in the buffer.
 1072          */
 1073         State_Type rv = State_Succeeded;
 1074         if (s->matchlist) {
 1075                 FILE *file = fopen(s->path, "r");
 1076                 if (! file) {
 1077                         Log_error("'%s' cannot open file %s: %s\n", s->name, s->path, STRERROR);
 1078                         return State_Failed;
 1079                 }
 1080                 /* FIXME: Refactor: Initialize the filesystems table ahead of file and filesystems test and index it by device id + replace the Str_startsWith() with lookup to the table by device id (obtained via file's stat()).
 1081                  The central filesystems initialization will allow to reduce the statfs() calls in the case that there will be multiple file and/or filesystems tests for the same fs. Temporarily we go with
 1082                  dummy Str_startsWith() as quick fix which will cover 99.9% of use cases without rising the statfs overhead if statfs call would be inlined here.
 1083                  */
 1084                 if (Str_startsWith(s->path, "/proc")) {
 1085                         s->inf.file->readpos = 0;
 1086                 } else {
 1087                         /* If inode changed or size shrunk -> set read position = 0 */
 1088                         if (s->inf.file->inode != s->inf.file->inode_prev || s->inf.file->readpos > s->inf.file->size)
 1089                                 s->inf.file->readpos = 0;
 1090                         /* Do we need to match? Even if not, go to final, so we can reset the content match error flags in this cycle */
 1091                         if (s->inf.file->readpos == s->inf.file->size) {
 1092                                 DEBUG("'%s' content match skipped - file size nor inode has not changed since last test\n", s->name);
 1093                                 goto final1;
 1094                         }
 1095                 }
 1096                 char *line = CALLOC(sizeof(unsigned char), Run.limits.fileContentBuffer);
 1097                 while (true) {
 1098 next:
 1099                         /* Seek to the read position */
 1100                         if (fseek(file, (long)s->inf.file->readpos, SEEK_SET)) {
 1101                                 rv = State_Failed;
 1102                                 Log_error("'%s' cannot seek file %s: %s\n", s->name, s->path, STRERROR);
 1103                                 goto final2;
 1104                         }
 1105                         if (! fgets(line, (int)Run.limits.fileContentBuffer, file)) {
 1106                                 if (! feof(file)) {
 1107                                         rv = State_Failed;
 1108                                         Log_error("'%s' cannot read file %s: %s\n", s->name, s->path, STRERROR);
 1109                                 }
 1110                                 goto final2;
 1111                         }
 1112                         size_t length = strlen(line);
 1113                         if (length == 0) {
 1114                                 /* No content: shouldn't happen - empty line will contain at least '\n' */
 1115                                 goto final2;
 1116                         } else if (line[length - 1] != '\n') {
 1117                                 if (length < (size_t)(Run.limits.fileContentBuffer - 1)) {
 1118                                         /* Incomplete line: we gonna read it next time again, allowing the writer to complete the write */
 1119                                         DEBUG("'%s' content match: incomplete line read - no new line at end. (retrying next cycle)\n", s->name);
 1120                                         goto final2;
 1121                                 } else if (length >= Run.limits.fileContentBuffer - 1) {
 1122                                         /* Our read buffer is full: ignore the content past the Run.limits.fileContentBuffer */
 1123                                         int _rv;
 1124                                         do {
 1125                                                 if ((_rv = fgetc(file)) == EOF)
 1126                                                         goto final2;
 1127                                                 length++;
 1128                                         } while (_rv != '\n');
 1129                                 }
 1130                         } else {
 1131                                 /* Remove trailing newline */
 1132                                 line[length - 1] = 0;
 1133                         }
 1134                         /* Set read position to the end of last read */
 1135                         s->inf.file->readpos += length;
 1136                         /* Check ignores */
 1137                         for (Match_T ml = s->matchignorelist; ml; ml = ml->next) {
 1138                                 if ((_checkPattern(ml, line) == 0) ^ (ml->not)) {
 1139                                         /* We match! -> line is ignored! */
 1140                                         DEBUG("'%s' Ignore pattern %s'%s' match on content line\n", s->name, ml->not ? "not " : "", ml->match_string);
 1141                                         goto next;
 1142                                 }
 1143                         }
 1144                         /* Check non ignores */
 1145                         for (Match_T ml = s->matchlist; ml; ml = ml->next) {
 1146                                 if ((_checkPattern(ml, line) == 0) ^ (ml->not)) {
 1147                                         DEBUG("'%s' Pattern %s'%s' match on content line [%s]\n", s->name, ml->not ? "not " : "", ml->match_string, line);
 1148                                         /* Save the line for Event_post */
 1149                                         if (! ml->log)
 1150                                                 ml->log = StringBuffer_create((int)Run.limits.fileContentBuffer);
 1151                                         if ((size_t)StringBuffer_length(ml->log) < Run.limits.fileContentBuffer) {
 1152                                                 StringBuffer_append(ml->log, "%s\n", line);
 1153                                                 if ((size_t)StringBuffer_length(ml->log) >= Run.limits.fileContentBuffer)
 1154                                                         StringBuffer_append(ml->log, "...\n");
 1155                                         }
 1156                                 } else {
 1157                                         DEBUG("'%s' Pattern %s'%s' doesn't match on content line [%s]\n", s->name, ml->not ? "not " : "", ml->match_string, line);
 1158                                 }
 1159                         }
 1160                 }
 1161 final2:
 1162                 FREE(line);
 1163 final1:
 1164                 if (fclose(file)) {
 1165                         rv = State_Failed;
 1166                         Log_error("'%s' cannot close file %s: %s\n", s->name, s->path, STRERROR);
 1167                 }
 1168                 /* Post process the matches: generate events for particular patterns */
 1169                 for (Match_T ml = s->matchlist; ml; ml = ml->next) {
 1170                         if (ml->log) {
 1171                                 rv = State_Changed;
 1172                                 Event_post(s, Event_Content, State_Changed, ml->action, "content match:\n%s", StringBuffer_toString(ml->log));
 1173                                 if (ml->log) {
 1174                                         // If the service has dependants, the dependant tests if the parent service (file) is running with no errors before it'll be allowed to start. That recursive check_file() call will
 1175                                         // enter the _checkMatch() too and will free the SringBuffer as part of Event_post => must check ml->log here again before free
 1176                                         StringBuffer_free(&ml->log);
 1177                                 }
 1178                         } else {
 1179                                 Event_post(s, Event_Content, State_ChangedNot, ml->action, "content doesn't match");
 1180                         }
 1181                 }
 1182         }
 1183         return rv;
 1184 }
 1185 
 1186 
 1187 /**
 1188  * Test filesystem flags for possible change since last cycle
 1189  */
 1190 static State_Type _checkFilesystemFlags(Service_T s) {
 1191         ASSERT(s);
 1192         if (*(s->inf.filesystem->flags)) {
 1193                 if (s->inf.filesystem->flagsChanged) {
 1194                         s->inf.filesystem->flagsChanged = false;
 1195                         for (FsFlag_T l = s->fsflaglist; l; l = l->next)
 1196                                 Event_post(s, Event_FsFlag, State_Changed, l->action, "filesystem flags changed to %s", s->inf.filesystem->flags);
 1197                         return State_Changed;
 1198                 }
 1199                 for (FsFlag_T l = s->fsflaglist; l; l = l->next)
 1200                         Event_post(s, Event_FsFlag, State_ChangedNot, l->action, "filesystem flags has not changed [current flags %s]", s->inf.filesystem->flags);
 1201                 return State_ChangedNot;
 1202         }
 1203         return State_Init;
 1204 }
 1205 
 1206 
 1207 /**
 1208  * Filesystem test
 1209  */
 1210 static State_Type _checkFilesystemResources(Service_T s, FileSystem_T td) {
 1211         ASSERT(s);
 1212         ASSERT(td);
 1213         if ((td->limit_percent < 0) && (td->limit_absolute < 0)) {
 1214                 Log_error("'%s' error: filesystem limit not set\n", s->name);
 1215                 return State_Failed;
 1216         }
 1217         switch (td->resource) {
 1218 
 1219                 case Resource_Inode:
 1220                         if (s->inf.filesystem->f_files <= 0) {
 1221                                 DEBUG("'%s' filesystem doesn't support inodes\n", s->name);
 1222                                 return State_Succeeded;
 1223                         }
 1224                         if (td->limit_percent >= 0.) {
 1225                                 if (Util_evalDoubleQExpression(td->operator, s->inf.filesystem->inode_percent, td->limit_percent)) {
 1226                                         Event_post(s, Event_Resource, State_Failed, td->action, "inode usage %.1f%% matches resource limit [inode usage %s %.1f%%]", s->inf.filesystem->inode_percent, operatorshortnames[td->operator], td->limit_percent);
 1227                                         return State_Failed;
 1228                                 }
 1229                         } else {
 1230                                 if (Util_evalQExpression(td->operator, s->inf.filesystem->f_filesused, td->limit_absolute)) {
 1231                                         Event_post(s, Event_Resource, State_Failed, td->action, "inode usage %lld matches resource limit [inode usage %s %lld]", s->inf.filesystem->f_filesused, operatorshortnames[td->operator], td->limit_absolute);
 1232                                         return State_Failed;
 1233                                 }
 1234                         }
 1235                         Event_post(s, Event_Resource, State_Succeeded, td->action, "inode usage test succeeded [current inode usage = %.1f%%]", s->inf.filesystem->inode_percent);
 1236                         return State_Succeeded;
 1237 
 1238                 case Resource_InodeFree:
 1239                         if (s->inf.filesystem->f_files <= 0) {
 1240                                 DEBUG("'%s' filesystem doesn't support inodes\n", s->name);
 1241                                 return State_Succeeded;
 1242                         }
 1243                         if (td->limit_percent >= 0.) {
 1244                                 if (Util_evalDoubleQExpression(td->operator, 100. - s->inf.filesystem->inode_percent, td->limit_percent)) {
 1245                                         Event_post(s, Event_Resource, State_Failed, td->action, "inode free %.1f%% matches resource limit [inode free %s %.1f%%]", 100. - s->inf.filesystem->inode_percent, operatorshortnames[td->operator], td->limit_percent);
 1246                                         return State_Failed;
 1247                                 }
 1248                         } else {
 1249                                 if (Util_evalQExpression(td->operator, s->inf.filesystem->f_filesfree, td->limit_absolute)) {
 1250                                         Event_post(s, Event_Resource, State_Failed, td->action, "inode free %lld matches resource limit [inode free %s %lld]", s->inf.filesystem->f_filesfree, operatorshortnames[td->operator], td->limit_absolute);
 1251                                         return State_Failed;
 1252                                 }
 1253                         }
 1254                         Event_post(s, Event_Resource, State_Succeeded, td->action, "inode free test succeeded [current inode free = %.1f%%]", 100. - s->inf.filesystem->inode_percent);
 1255                         return State_Succeeded;
 1256 
 1257                 case Resource_Space:
 1258                         if (td->limit_percent >= 0.) {
 1259                                 if (Util_evalDoubleQExpression(td->operator, s->inf.filesystem->space_percent, td->limit_percent)) {
 1260                                         Event_post(s, Event_Resource, State_Failed, td->action, "space usage %.1f%% matches resource limit [space usage %s %.1f%%]", s->inf.filesystem->space_percent, operatorshortnames[td->operator], td->limit_percent);
 1261                                         return State_Failed;
 1262                                 }
 1263                         } else {
 1264                                 long long bytesUsed = s->inf.filesystem->f_blocksused * (s->inf.filesystem->f_bsize > 0 ? s->inf.filesystem->f_bsize : 1);
 1265                                 if (Util_evalQExpression(td->operator, bytesUsed, td->limit_absolute)) {
 1266                                         char buf1[10];
 1267                                         char buf2[10];
 1268                                         Convert_bytes2str(bytesUsed, buf1);
 1269                                         Convert_bytes2str(td->limit_absolute, buf2);
 1270                                         Event_post(s, Event_Resource, State_Failed, td->action, "space usage %s matches resource limit [space usage %s %s]", buf1, operatorshortnames[td->operator], buf2);
 1271                                         return State_Failed;
 1272                                 }
 1273                         }
 1274                         Event_post(s, Event_Resource, State_Succeeded, td->action, "space usage test succeeded [current space usage = %.1f%%]", s->inf.filesystem->space_percent);
 1275                         return State_Succeeded;
 1276 
 1277                 case Resource_SpaceFree:
 1278                         if (td->limit_percent >= 0.) {
 1279                                 if (Util_evalDoubleQExpression(td->operator, 100. - s->inf.filesystem->space_percent, td->limit_percent)) {
 1280                                         Event_post(s, Event_Resource, State_Failed, td->action, "space free %.1f%% matches resource limit [space free %s %.1f%%]", 100. - s->inf.filesystem->space_percent, operatorshortnames[td->operator], td->limit_percent);
 1281                                         return State_Failed;
 1282                                 }
 1283                         } else {
 1284                 long long bytesFreeTotal = s->inf.filesystem->f_blocksfreetotal * (s->inf.filesystem->f_bsize > 0 ? s->inf.filesystem->f_bsize : 1);
 1285                                 if (Util_evalQExpression(td->operator, bytesFreeTotal, td->limit_absolute)) {
 1286                                         char buf1[10];
 1287                                         char buf2[10];
 1288                                         Convert_bytes2str(bytesFreeTotal, buf1);
 1289                                         Convert_bytes2str(td->limit_absolute, buf2);
 1290                                         Event_post(s, Event_Resource, State_Failed, td->action, "space free %s matches resource limit [space free %s %s]", buf1, operatorshortnames[td->operator], buf2);
 1291                                         return State_Failed;
 1292                                 }
 1293                         }
 1294                         Event_post(s, Event_Resource, State_Succeeded, td->action, "space free test succeeded [current space free = %.1f%%]", 100. - s->inf.filesystem->space_percent);
 1295                         return State_Succeeded;
 1296 
 1297                 case Resource_ReadBytes:
 1298                         if (Statistics_initialized(&(s->inf.filesystem->read.bytes))) {
 1299                                 double value = Statistics_deltaNormalize(&(s->inf.filesystem->read.bytes));
 1300                                 if (Util_evalDoubleQExpression(td->operator, value, td->limit_absolute)) {
 1301                                         Event_post(s, Event_Resource, State_Failed, td->action, "read rate %s/s matches resource limit [read %s %s/s]", Convert_bytes2str(value, (char[10]){}), operatorshortnames[td->operator], Convert_bytes2str(td->limit_absolute, (char[10]){}));
 1302                                         return State_Failed;
 1303                                 }
 1304                                 Event_post(s, Event_Resource, State_Succeeded, td->action, "read rate test succeeded [current read = %s/s]", Convert_bytes2str(value, (char[10]){}));
 1305                         } else {
 1306                                 DEBUG("'%s' warning -- no data are available for bytes read rate test\n", s->name);
 1307                         }
 1308                         return State_Succeeded;
 1309 
 1310                 case Resource_ReadOperations:
 1311                         if (Statistics_initialized(&(s->inf.filesystem->read.operations))) {
 1312                                 double value = Statistics_deltaNormalize(&(s->inf.filesystem->read.operations));
 1313                                 if (Util_evalDoubleQExpression(td->operator, value, td->limit_absolute)) {
 1314                                         Event_post(s, Event_Resource, State_Failed, td->action, "read rate %.1f operations/s matches resource limit [read %s %llu operations/s]", value, operatorshortnames[td->operator], td->limit_absolute);
 1315                                         return State_Failed;
 1316                                 }
 1317                                 Event_post(s, Event_Resource, State_Succeeded, td->action, "read rate test succeeded [current read = %.1f operations/s]", value);
 1318                         } else {
 1319                                 DEBUG("'%s' warning -- no data are available for read rate test\n", s->name);
 1320                         }
 1321                         return State_Succeeded;
 1322 
 1323                 case Resource_WriteBytes:
 1324                         if (Statistics_initialized(&(s->inf.filesystem->write.bytes))) {
 1325                                 double value = Statistics_deltaNormalize(&(s->inf.filesystem->write.bytes));
 1326                                 if (Util_evalDoubleQExpression(td->operator, value, td->limit_absolute)) {
 1327                                         Event_post(s, Event_Resource, State_Failed, td->action, "write rate %s/s matches resource limit [write %s %s/s]", Convert_bytes2str(value, (char[10]){}), operatorshortnames[td->operator], Convert_bytes2str(td->limit_absolute, (char[10]){}));
 1328                                         return State_Failed;
 1329                                 }
 1330                                 Event_post(s, Event_Resource, State_Succeeded, td->action, "write rate test succeeded [current write = %s/s]", Convert_bytes2str(value, (char[10]){}));
 1331                         } else {
 1332                                 DEBUG("'%s' warning -- no data are available for bytes write rate test\n", s->name);
 1333                         }
 1334                         return State_Succeeded;
 1335 
 1336                 case Resource_WriteOperations:
 1337                         if (Statistics_initialized(&(s->inf.filesystem->write.operations))) {
 1338                                 double value = Statistics_deltaNormalize(&(s->inf.filesystem->write.operations));
 1339                                 if (Util_evalDoubleQExpression(td->operator, value, td->limit_absolute)) {
 1340                                         Event_post(s, Event_Resource, State_Failed, td->action, "write rate %.1f operations/s matches resource limit [write %s %llu operations/s]", value, operatorshortnames[td->operator], td->limit_absolute);
 1341                                         return State_Failed;
 1342                                 }
 1343                                 Event_post(s, Event_Resource, State_Succeeded, td->action, "write rate test succeeded [current write = %.1f operations/s]", value);
 1344                         } else {
 1345                                 DEBUG("'%s' warning -- no data are available for write rate test\n", s->name);
 1346                         }
 1347                         return State_Succeeded;
 1348 
 1349                 case Resource_ServiceTime:
 1350                         {
 1351                                 double deltaTime = 0.;
 1352                                 bool hasReadTime = Statistics_initialized(&(s->inf.filesystem->time.read));
 1353                                 bool hasWriteTime = Statistics_initialized(&(s->inf.filesystem->time.write));
 1354                                 bool hasWaitTime = Statistics_initialized(&(s->inf.filesystem->time.wait));
 1355                                 bool hasRunTime = Statistics_initialized(&(s->inf.filesystem->time.run));
 1356                                 // Some platforms have detailed R/W time (Linux, MacOS), other just total R/W time (*BSD), Solaris has total R/W time with wait/run granularity. To make the test cross-platform and simple, we operate on sum
 1357                                 if (! hasReadTime && ! hasWriteTime && ! hasWaitTime && ! hasRunTime) {
 1358                                         DEBUG("'%s' warning -- no data are available for service time test\n", s->name);
 1359                                         return State_Succeeded;
 1360                                 }
 1361                                 if (hasReadTime) {
 1362                                         deltaTime += Statistics_delta(&(s->inf.filesystem->time.read));
 1363                                 }
 1364                                 if (hasWriteTime) {
 1365                                         deltaTime += Statistics_delta(&(s->inf.filesystem->time.write));
 1366                                 }
 1367                                 if (hasWaitTime) {
 1368                                         deltaTime += Statistics_delta(&(s->inf.filesystem->time.wait));
 1369                                 }
 1370                                 if (hasRunTime) {
 1371                                         deltaTime += Statistics_delta(&(s->inf.filesystem->time.run));
 1372                                 }
 1373                                 double deltaOperations = Statistics_delta(&(s->inf.filesystem->read.operations)) + Statistics_delta(&(s->inf.filesystem->write.operations));
 1374                                 double serviceTime = deltaOperations > 0. ? deltaTime / deltaOperations : 0.;
 1375                                 if (Util_evalDoubleQExpression(td->operator, serviceTime, td->limit_absolute)) {
 1376                                         Event_post(s, Event_Resource, State_Failed, td->action, "service time %.3fms/operation matches resource limit [service time %s %s/operation]", serviceTime, operatorshortnames[td->operator], Convert_time2str(td->limit_absolute, (char[11]){}));
 1377                                         return State_Failed;
 1378                                 }
 1379                                 Event_post(s, Event_Resource, State_Succeeded, td->action, "service time test succeeded [current service time = %.3f ms/operations]", serviceTime);
 1380                         }
 1381                         return State_Succeeded;
 1382 
 1383                 default:
 1384                         Log_error("'%s' error -- unknown resource type: [%d]\n", s->name, td->resource);
 1385                         return State_Failed;
 1386         }
 1387 }
 1388 
 1389 
 1390 static void _checkTimeout(Service_T s) {
 1391         if (s->actionratelist) {
 1392                 /* Start counting cycles */
 1393                 if (s->nstart > 0)
 1394                         s->ncycle++;
 1395                 int max = 0;
 1396                 for (ActionRate_T ar = s->actionratelist; ar; ar = ar->next) {
 1397                         if (max < ar->cycle)
 1398                                 max = ar->cycle;
 1399                         if (s->nstart >= ar->count && s->ncycle <= ar->cycle)
 1400                                 Event_post(s, Event_Timeout, State_Failed, ar->action, "service restarted %d times within %d cycles(s) - %s", s->nstart, s->ncycle, actionnames[ar->action->failed->id]);
 1401                 }
 1402                 /* Stop counting and reset if the cycle interval is succeeded */
 1403                 if (s->ncycle > max) {
 1404                         s->ncycle = 0;
 1405                         s->nstart = 0;
 1406                 }
 1407         }
 1408 }
 1409 
 1410 
 1411 static bool _incron(Service_T s, time_t now) {
 1412         if ((now - s->every.last_run) > 59) { // Minute is the lowest resolution, so only run once per minute
 1413                 if (Time_incron(s->every.spec.cron, now)) {
 1414                         s->every.last_run = now;
 1415                         return true;
 1416                 }
 1417         }
 1418         return false;
 1419 }
 1420 
 1421 
 1422 /**
 1423  * Returns true if validation should be skipped for this service in this cycle, otherwise false. Handle every statement
 1424  */
 1425 static bool _checkSkip(Service_T s) {
 1426         ASSERT(s);
 1427         time_t now = Time_now();
 1428         if (! s->onrebootRestored) {
 1429                 // If the service state was not restored (e.g. new service or state file is missing), handle the onreboot flag
 1430                 if (s->onreboot == Onreboot_Nostart)
 1431                         s->monitor = Monitor_Not;
 1432                 s->onrebootRestored = true;
 1433         }
 1434         if (s->every.type == Every_SkipCycles) {
 1435                 s->every.spec.cycle.counter++;
 1436                 if (s->every.spec.cycle.counter < s->every.spec.cycle.number) {
 1437                         s->monitor |= Monitor_Waiting;
 1438                         DEBUG("'%s' test skipped as current cycle (%d) < every cycle (%d) \n", s->name, s->every.spec.cycle.counter, s->every.spec.cycle.number);
 1439                         return true;
 1440                 }
 1441                 s->every.spec.cycle.counter = 0;
 1442         } else if (s->every.type == Every_Cron && ! _incron(s, now)) {
 1443                 s->monitor |= Monitor_Waiting;
 1444                 DEBUG("'%s' test skipped as current time (%lld) does not match every's cron spec \"%s\"\n", s->name, (long long)now, s->every.spec.cron);
 1445                 return true;
 1446         } else if (s->every.type == Every_NotInCron && Time_incron(s->every.spec.cron, now)) {
 1447                 s->monitor |= Monitor_Waiting;
 1448                 DEBUG("'%s' test skipped as current time (%lld) matches every's cron spec \"not %s\"\n", s->name, (long long)now, s->every.spec.cron);
 1449                 return true;
 1450         }
 1451         s->monitor &= ~Monitor_Waiting;
 1452         // Skip if parent is not initialized
 1453         for (Dependant_T d = s->dependantlist; d; d = d->next ) {
 1454                 Service_T parent = Util_getService(d->dependant);
 1455                 if (parent) {
 1456                         if (parent->monitor != Monitor_Yes) {
 1457                                 DEBUG("'%s' test skipped as required service '%s' is %s\n", s->name, parent->name, parent->monitor == Monitor_Init ? "initializing" : "not monitored");
 1458                                 return true;
 1459                         } else if (parent->error) {
 1460                                 DEBUG("'%s' test skipped as required service '%s' has errors\n", s->name, parent->name);
 1461                                 return true;
 1462                         }
 1463                 }
 1464         }
 1465         return false;
 1466 }
 1467 
 1468 
 1469 /**
 1470  * Returns true if scheduled action was performed
 1471  */
 1472 static bool _doScheduledAction(Service_T s) {
 1473         int rv = false;
 1474         Action_Type action = s->doaction;
 1475         if (action != Action_Ignored) {
 1476                 rv = control_service(s->name, action);
 1477                 Event_post(s, Event_Action, State_Changed, s->action_ACTION, "%s action %s", actionnames[action], rv ? "done" : "failed");
 1478         }
 1479         return rv;
 1480 }
 1481 
 1482 
 1483 /* ---------------------------------------------------------------- Public */
 1484 
 1485 
 1486 /**
 1487  *  This function contains the main check machinery for  monit. The
 1488  *  validate function check services in the service list to see if
 1489  *  they will pass all defined tests.
 1490  */
 1491 int validate() {
 1492         Run.handler_flag = Handler_Succeeded;
 1493         Event_queue_process();
 1494 
 1495         update_system_info();
 1496         ProcessTree_init(ProcessEngine_None);
 1497         gettimeofday(&systeminfo.collected, NULL);
 1498 
 1499         /* In the case that at least one action is pending, perform quick loop to handle the actions ASAP */
 1500         if (Run.flags & Run_ActionPending) {
 1501                 Run.flags &= ~Run_ActionPending;
 1502                 for (Service_T s = servicelist; s; s = s->next)
 1503                         _doScheduledAction(s);
 1504         }
 1505 
 1506         int errors = 0;
 1507         /* Check the services */
 1508         for (Service_T s = servicelist; s && ! interrupt(); s = s->next) {
 1509                 // FIXME: The Service_Program must collect the exit value from last run, even if the program start should be skipped in this cycle => let check program always run the test (to be refactored with new scheduler)
 1510                 if (! _doScheduledAction(s) && s->monitor && (s->type == Service_Program || ! _checkSkip(s))) {
 1511                         _checkTimeout(s); // Can disable monitoring => need to check s->monitor again
 1512                         if (s->monitor) {
 1513                                 State_Type state = s->check(s);
 1514                                 if (state != State_Init && s->monitor != Monitor_Not) // The monitoring can be disabled by some matching rule in s->check so we have to check again before setting to Monitor_Yes
 1515                                         s->monitor = Monitor_Yes;
 1516                                 if (state == State_Failed)
 1517                                         errors++;
 1518                         }
 1519                         gettimeofday(&s->collected, NULL);
 1520                 }
 1521         }
 1522         return errors;
 1523 }
 1524 
 1525 
 1526 /**
 1527  * Validate a given process service s. Events are posted according to
 1528  * its configuration. In case of a fatal event false is returned.
 1529  */
 1530 State_Type check_process(Service_T s) {
 1531         ASSERT(s);
 1532         State_Type rv = State_Succeeded;
 1533         bool checkResources = false;
 1534         pid_t pid = ProcessTree_findProcess(s);
 1535         if (! pid) {
 1536                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1537                         rv = State_Failed;
 1538                         Event_post(s, Event_NonExist, State_Failed, l->action, "process is not running");
 1539                 }
 1540                 for (Exist_T l = s->existlist; l; l = l->next) {
 1541                         Event_post(s, Event_Exist, State_Succeeded, l->action, "process is not running");
 1542                 }
 1543                 return rv;
 1544         }
 1545         if (Run.flags & Run_ProcessEngineEnabled) {
 1546                 // Update statistics (event can execute a program and set environment like MONIT_PROCESS_PID)
 1547                 if (! (checkResources = ProcessTree_updateProcess(s, pid))) {
 1548                         Log_error("'%s' failed to get process data\n", s->name);
 1549                         rv = State_Failed;
 1550                 }
 1551         }
 1552         for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1553                 Event_post(s, Event_NonExist, State_Succeeded, l->action, "process is running with pid %d", (int)pid);
 1554         }
 1555         for (Exist_T l = s->existlist; l; l = l->next) {
 1556                 rv = State_Failed;
 1557                 Event_post(s, Event_Exist, State_Failed, l->action, "process is running with pid %d", (int)pid);
 1558         }
 1559         // Double-check the monitoring state: the "if does exist" may call unmonitor/stop, which resets the service object
 1560         if (s->monitor == Monitor_Not)
 1561                 return rv;
 1562         /* Reset the exec and timeout errors if active ... the process is running (most probably after manual intervention) */
 1563         if (IS_EVENT_SET(s->error, Event_Exec))
 1564                 Event_post(s, Event_Exec, State_Succeeded, s->action_EXEC, "process is running after previous exec error (slow starting or manually recovered?)");
 1565         if (IS_EVENT_SET(s->error, Event_Timeout))
 1566                 for (ActionRate_T ar = s->actionratelist; ar; ar = ar->next)
 1567                         Event_post(s, Event_Timeout, State_Succeeded, ar->action, "process is running after previous restart timeout (manually recovered?)");
 1568         if (checkResources) {
 1569                 if (_checkProcessState(s) == State_Failed)
 1570                         rv = State_Failed;
 1571                 if (_checkProcessPid(s) == State_Failed)
 1572                         rv = State_Failed;
 1573                 if (_checkProcessPpid(s) == State_Failed)
 1574                         rv = State_Failed;
 1575                 if (_checkUid(s, s->inf.process->uid) == State_Failed)
 1576                         rv = State_Failed;
 1577                 if (_checkEuid(s, s->inf.process->euid) == State_Failed)
 1578                         rv = State_Failed;
 1579                 if (_checkGid(s, s->inf.process->gid) == State_Failed)
 1580                         rv = State_Failed;
 1581                 if (_checkUptime(s, s->inf.process->uptime) == State_Failed)
 1582                         rv = State_Failed;
 1583                 if (_checkSecurityAttribute(s, s->inf.process->secattr) == State_Failed)
 1584                         rv = State_Failed;
 1585                 if (_checkProcessFiledescriptors(s) == State_Failed)
 1586                         rv = State_Failed;
 1587                 for (Resource_T pr = s->resourcelist; pr; pr = pr->next)
 1588                         if (_checkProcessResources(s, pr) == State_Failed)
 1589                                 rv = State_Failed;
 1590         }
 1591         long long uptimeMilli = (long long)(s->inf.process->uptime) * 1000LL;
 1592         for (Port_T pp = s->portlist; pp; pp = pp->next) {
 1593                 //FIXME: instead of pause, try to test, but ignore any errors in the start timeout timeframe ... will allow to display the port response time as soon as available, instead of waiting for 30+ seconds
 1594                 /* pause port tests in the start timeout timeframe while the process is starting (it may take some time to the process before it starts accepting connections) */
 1595                 if (! s->start || uptimeMilli > s->start->timeout) {
 1596                         if (_checkConnection(s, pp) == State_Failed)
 1597                                 rv = State_Failed;
 1598                 } else {
 1599                         pp->is_available = Connection_Init;
 1600                         DEBUG("'%s' connection test paused for %s while the process is starting\n", s->name, Convert_time2str(s->start->timeout - (uptimeMilli < 0 ? 0 : uptimeMilli), (char[11]){}));
 1601                 }
 1602         }
 1603         for (Port_T pp = s->socketlist; pp; pp = pp->next) {
 1604                 //FIXME: instead of pause, try to test, but ignore any errors in the start timeout timeframe ... will allow to display the port response time as soon as available, instead of waiting for 30+ seconds
 1605                 /* pause socket tests in the start timeout timeframe while the process is starting (it may take some time to the process before it starts accepting connections) */
 1606                 if (! s->start || uptimeMilli > s->start->timeout) {
 1607                         if (_checkConnection(s, pp) == State_Failed)
 1608                                 rv = State_Failed;
 1609                 } else {
 1610                         pp->is_available = Connection_Init;
 1611                         DEBUG("'%s' connection test paused for %s while the process is starting\n", s->name, Convert_time2str(s->start->timeout - (uptimeMilli < 0 ? 0 : uptimeMilli), (char[11]){}));
 1612                 }
 1613         }
 1614         return rv;
 1615 }
 1616 
 1617 
 1618 /**
 1619  * Validate a given filesystem service s. Events are posted according to
 1620  * its configuration. In case of a fatal event false is returned.
 1621  */
 1622 State_Type check_filesystem(Service_T s) {
 1623         ASSERT(s);
 1624         State_Type rv = State_Succeeded;
 1625         if (! filesystem_usage(s)) {
 1626                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1627                         rv = State_Failed;
 1628                         Event_post(s, Event_NonExist, State_Failed, l->action, "unable to read filesystem '%s' state", s->path);
 1629                 }
 1630                 for (Exist_T l = s->existlist; l; l = l->next) {
 1631                         Event_post(s, Event_Exist, State_Succeeded, l->action, "filesystem '%s' doesn't exist", s->path);
 1632                 }
 1633                 return rv;
 1634         }
 1635         for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1636                 Event_post(s, Event_NonExist, State_Succeeded, l->action, "succeeded getting filesystem statistics for '%s'", s->path);
 1637         }
 1638         for (Exist_T l = s->existlist; l; l = l->next) {
 1639                 rv = State_Failed;
 1640                 Event_post(s, Event_Exist, State_Failed, l->action, "filesystem '%s' exists", s->path);
 1641         }
 1642         // Double-check the monitoring state: the "if does exist" may call unmonitor/stop, which resets the service object
 1643         if (s->monitor == Monitor_Not)
 1644                 return rv;
 1645         if (_checkPerm(s, s->inf.filesystem->mode) == State_Failed)
 1646                 rv = State_Failed;
 1647         if (_checkUid(s, s->inf.filesystem->uid) == State_Failed)
 1648                 rv = State_Failed;
 1649         if (_checkGid(s, s->inf.filesystem->gid) == State_Failed)
 1650                 rv = State_Failed;
 1651         if (_checkFilesystemFlags(s) == State_Failed)
 1652                 rv = State_Failed;
 1653         for (FileSystem_T fs = s->filesystemlist; fs; fs = fs->next)
 1654                 if (_checkFilesystemResources(s, fs) == State_Failed)
 1655                         rv = State_Failed;
 1656         return rv;
 1657 }
 1658 
 1659 
 1660 /**
 1661  * Validate a given file service s. Events are posted according to
 1662  * its configuration. In case of a fatal event false is returned.
 1663  */
 1664 State_Type check_file(Service_T s) {
 1665         ASSERT(s);
 1666         struct stat stat_buf;
 1667         State_Type rv = State_Succeeded;
 1668         if (stat(s->path, &stat_buf) != 0) {
 1669                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1670                         rv = State_Failed;
 1671                         Event_post(s, Event_NonExist, State_Failed, l->action, "file doesn't exist");
 1672                 }
 1673                 for (Exist_T l = s->existlist; l; l = l->next) {
 1674                         Event_post(s, Event_Exist, State_Succeeded, l->action, "file doesn't exist");
 1675                 }
 1676                 return rv;
 1677         } else {
 1678                 s->inf.file->mode = stat_buf.st_mode;
 1679                 if (s->inf.file->inode) {
 1680                         s->inf.file->inode_prev = s->inf.file->inode;
 1681                 } else {
 1682                         // Seek to the end of the file the first time we see it => skip existing content (files which passed the test at least once have inode always set via state file)
 1683                         DEBUG("'%s' seeking to the end of the file\n", s->name);
 1684                         s->inf.file->readpos = stat_buf.st_size;
 1685                         s->inf.file->inode_prev = stat_buf.st_ino;
 1686                 }
 1687                 s->inf.file->inode = stat_buf.st_ino;
 1688                 s->inf.file->uid = stat_buf.st_uid;
 1689                 s->inf.file->gid = stat_buf.st_gid;
 1690                 s->inf.file->size = stat_buf.st_size;
 1691                 s->inf.file->timestamp.access = stat_buf.st_atime;
 1692                 s->inf.file->timestamp.change = stat_buf.st_ctime;
 1693                 s->inf.file->timestamp.modify = stat_buf.st_mtime;
 1694                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1695                         Event_post(s, Event_NonExist, State_Succeeded, l->action, "file exists");
 1696                 }
 1697                 for (Exist_T l = s->existlist; l; l = l->next) {
 1698                         rv = State_Failed;
 1699                         Event_post(s, Event_Exist, State_Failed, l->action, "file exists");
 1700                 }
 1701         }
 1702         // Double-check the monitoring state: the "if does exist" may call unmonitor/stop, which resets the service object
 1703         if (s->monitor == Monitor_Not)
 1704                 return rv;
 1705         if (! S_ISREG(s->inf.file->mode) && ! S_ISSOCK(s->inf.file->mode)) {
 1706                 Event_post(s, Event_Invalid, State_Failed, s->action_INVALID, "is neither a regular file nor a socket");
 1707                 return State_Failed;
 1708         } else {
 1709                 Event_post(s, Event_Invalid, State_Succeeded, s->action_INVALID, "is a regular %s",
 1710                            S_ISSOCK(s->inf.file->mode) ? "socket" : "file");
 1711         }
 1712         if (_checkChecksum(s) == State_Failed)
 1713                 rv = State_Failed;
 1714         if (_checkPerm(s, s->inf.file->mode) == State_Failed)
 1715                 rv = State_Failed;
 1716         if (_checkUid(s, s->inf.file->uid) == State_Failed)
 1717                 rv = State_Failed;
 1718         if (_checkGid(s, s->inf.file->gid) == State_Failed)
 1719                 rv = State_Failed;
 1720         if (_checkSize(s, s->inf.file->size) == State_Failed)
 1721                 rv = State_Failed;
 1722         if (_checkTimestamps(s, s->inf.file->timestamp.access, s->inf.file->timestamp.change, s->inf.file->timestamp.modify) == State_Failed)
 1723                 rv = State_Failed;
 1724         if (_checkMatch(s) == State_Failed)
 1725                 rv = State_Failed;
 1726         return rv;
 1727 }
 1728 
 1729 
 1730 /**
 1731  * Validate a given directory service s. Events are posted according to
 1732  * its configuration. In case of a fatal event false is returned.
 1733  */
 1734 State_Type check_directory(Service_T s) {
 1735         ASSERT(s);
 1736         struct stat stat_buf;
 1737         State_Type rv = State_Succeeded;
 1738         if (stat(s->path, &stat_buf) != 0) {
 1739                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1740                         rv = State_Failed;
 1741                         Event_post(s, Event_NonExist, State_Failed, l->action, "directory doesn't exist");
 1742                 }
 1743                 for (Exist_T l = s->existlist; l; l = l->next) {
 1744                         Event_post(s, Event_Exist, State_Succeeded, l->action, "directory doesn't exist");
 1745                 }
 1746                 return rv;
 1747         } else {
 1748                 s->inf.directory->mode = stat_buf.st_mode;
 1749                 s->inf.directory->uid = stat_buf.st_uid;
 1750                 s->inf.directory->gid = stat_buf.st_gid;
 1751                 s->inf.directory->timestamp.access = stat_buf.st_atime;
 1752                 s->inf.directory->timestamp.change = stat_buf.st_ctime;
 1753                 s->inf.directory->timestamp.modify = stat_buf.st_mtime;
 1754                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1755                         Event_post(s, Event_NonExist, State_Succeeded, l->action, "directory exists");
 1756                 }
 1757                 for (Exist_T l = s->existlist; l; l = l->next) {
 1758                         rv = State_Failed;
 1759                         Event_post(s, Event_Exist, State_Failed, l->action, "directory exists");
 1760                 }
 1761         }
 1762         // Double-check the monitoring state: the "if does exist" may call unmonitor/stop, which resets the service object
 1763         if (s->monitor == Monitor_Not)
 1764                 return rv;
 1765         if (! S_ISDIR(s->inf.directory->mode)) {
 1766                 Event_post(s, Event_Invalid, State_Failed, s->action_INVALID, "is not directory");
 1767                 return State_Failed;
 1768         } else {
 1769                 Event_post(s, Event_Invalid, State_Succeeded, s->action_INVALID, "is directory");
 1770         }
 1771         if (_checkPerm(s, s->inf.directory->mode) == State_Failed)
 1772                 rv = State_Failed;
 1773         if (_checkUid(s, s->inf.directory->uid) == State_Failed)
 1774                 rv = State_Failed;
 1775         if (_checkGid(s, s->inf.directory->gid) == State_Failed)
 1776                 rv = State_Failed;
 1777         if (_checkTimestamps(s, s->inf.directory->timestamp.access, s->inf.directory->timestamp.change, s->inf.directory->timestamp.modify) == State_Failed)
 1778                 rv = State_Failed;
 1779         return rv;
 1780 }
 1781 
 1782 
 1783 /**
 1784  * Validate a given fifo service s. Events are posted according to
 1785  * its configuration. In case of a fatal event false is returned.
 1786  */
 1787 State_Type check_fifo(Service_T s) {
 1788         ASSERT(s);
 1789         struct stat stat_buf;
 1790         State_Type rv = State_Succeeded;
 1791         if (stat(s->path, &stat_buf) != 0) {
 1792                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1793                         rv = State_Failed;
 1794                         Event_post(s, Event_NonExist, State_Failed, l->action, "fifo doesn't exist");
 1795                 }
 1796                 for (Exist_T l = s->existlist; l; l = l->next) {
 1797                         Event_post(s, Event_Exist, State_Succeeded, l->action, "fifo doesn't exist");
 1798                 }
 1799                 return rv;
 1800         } else {
 1801                 s->inf.fifo->mode = stat_buf.st_mode;
 1802                 s->inf.fifo->uid = stat_buf.st_uid;
 1803                 s->inf.fifo->gid = stat_buf.st_gid;
 1804                 s->inf.fifo->timestamp.access = stat_buf.st_atime;
 1805                 s->inf.fifo->timestamp.change = stat_buf.st_ctime;
 1806                 s->inf.fifo->timestamp.modify = stat_buf.st_mtime;
 1807                 for (NonExist_T l = s->nonexistlist; l; l = l->next) {
 1808                         Event_post(s, Event_NonExist, State_Succeeded, l->action, "fifo exists");
 1809                 }
 1810                 for (Exist_T l = s->existlist; l; l = l->next) {
 1811                         rv = State_Failed;
 1812                         Event_post(s, Event_Exist, State_Failed, l->action, "fifo exists");
 1813                 }
 1814         }
 1815         // Double-check the monitoring state: the "if does exist" may call unmonitor/stop, which resets the service object
 1816         if (s->monitor == Monitor_Not)
 1817                 return rv;
 1818         if (! S_ISFIFO(s->inf.fifo->mode)) {
 1819                 Event_post(s, Event_Invalid, State_Failed, s->action_INVALID, "is not fifo");
 1820                 return State_Failed;
 1821         } else {
 1822                 Event_post(s, Event_Invalid, State_Succeeded, s->action_INVALID, "is fifo");
 1823         }
 1824         if (_checkPerm(s, s->inf.fifo->mode) == State_Failed)
 1825                 rv = State_Failed;
 1826         if (_checkUid(s, s->inf.fifo->uid) == State_Failed)
 1827                 rv = State_Failed;
 1828         if (_checkGid(s, s->inf.fifo->gid) == State_Failed)
 1829                 rv = State_Failed;
 1830         if (_checkTimestamps(s, s->inf.fifo->timestamp.access, s->inf.fifo->timestamp.change, s->inf.fifo->timestamp.modify) == State_Failed)
 1831                 rv = State_Failed;
 1832         return rv;
 1833 }
 1834 
 1835 
 1836 /**
 1837  * Validate a program status. Events are posted according to
 1838  * its configuration. In case of a fatal event false is returned.
 1839  */
 1840 State_Type check_program(Service_T s) {
 1841         ASSERT(s);
 1842         ASSERT(s->program);
 1843         State_Type rv = State_Succeeded;
 1844         time_t now = Time_now();
 1845         Process_T P = s->program->P;
 1846         if (P) {
 1847                 // Process program output
 1848                 _programOutput(Process_getErrorStream(P), s->program->inprogressOutput);
 1849                 _programOutput(Process_getInputStream(P), s->program->inprogressOutput);
 1850                 // Is the program still running?
 1851                 if (Process_exitStatus(P) < 0) {
 1852                         long long execution_time = (now - s->program->started) * 1000;
 1853                         if (execution_time > s->program->timeout) { // Program timed out
 1854                                 rv = State_Failed;
 1855                                 Log_error("'%s' program timed out after %s. Killing program with pid %ld\n", s->name, Convert_time2str(execution_time, (char[11]){}), (long)Process_getPid(P));
 1856                                 Process_kill(P);
 1857                                 Process_waitFor(P); // Wait for child to exit to get correct exit value
 1858                                 // Fall-through with P and evaluate exit value below.
 1859                         } else {
 1860                                 // Defer test of exit value until program exit or timeout
 1861                                 DEBUG("'%s' status check deferred - waiting on program to exit\n", s->name);
 1862                                 return State_Init;
 1863                         }
 1864                 }
 1865                 s->program->exitStatus = Process_exitStatus(P); // Save exit status for web-view display
 1866                 StringBuffer_trim(s->program->inprogressOutput);
 1867                 // Swap program output (instance finished)
 1868                 StringBuffer_clear(s->program->lastOutput);
 1869                 StringBuffer_append(s->program->lastOutput, "%s", StringBuffer_toString(s->program->inprogressOutput));
 1870                 // Evaluate program's exit status against our status checks.
 1871                 const char *output = StringBuffer_length(s->program->inprogressOutput) ? StringBuffer_toString(s->program->inprogressOutput) : "no output";
 1872                 for (Status_T status = s->statuslist; status; status = status->next) {
 1873                         if (status->operator == Operator_Changed) {
 1874                                 if (status->initialized) {
 1875                                         if (Util_evalQExpression(status->operator, s->program->exitStatus, status->return_value)) {
 1876                                                 Event_post(s, Event_Status, State_Changed, status->action, "status changed (%d -> %d) -- %s", status->return_value, s->program->exitStatus, output);
 1877                                                 status->return_value = s->program->exitStatus;
 1878                                         } else {
 1879                                                 Event_post(s, Event_Status, State_ChangedNot, status->action, "status didn't change (%d) -- %s", s->program->exitStatus, output);
 1880                                         }
 1881                                 } else {
 1882                                         status->initialized = true;
 1883                                         status->return_value = s->program->exitStatus;
 1884                                 }
 1885                         } else {
 1886                                 if (Util_evalQExpression(status->operator, s->program->exitStatus, status->return_value)) {
 1887                                         rv = State_Failed;
 1888                                         Event_post(s, Event_Status, State_Failed, status->action, "status failed (%d) -- %s", s->program->exitStatus, output);
 1889                                 } else {
 1890                                         Event_post(s, Event_Status, State_Succeeded, status->action, "status succeeded (%d) -- %s", s->program->exitStatus, output);
 1891                                 }
 1892                         }
 1893                 }
 1894                 Process_free(&s->program->P);
 1895         } else {
 1896                 rv = State_Init;
 1897         }
 1898         //FIXME: the current off-by-one-cycle based design requires that the check program will collect the exit value next cycle even if program startup should be skipped in the given cycle => must test skip here (new scheduler will obsolete this deferred skip checking)
 1899         if (s->monitor != Monitor_Not && ! _checkSkip(s)) { // The status evaluation may disable service monitoring
 1900                 // Start program
 1901                 StringBuffer_clear(s->program->inprogressOutput);
 1902                 s->program->P = Command_execute(s->program->C);
 1903                 if (! s->program->P) {
 1904                         rv = State_Failed;
 1905                         Event_post(s, Event_Status, State_Failed, s->action_EXEC, "failed to execute '%s' -- %s", s->path, STRERROR);
 1906                 } else {
 1907                         Event_post(s, Event_Status, State_Succeeded, s->action_EXEC, "program started");
 1908                         s->program->started = now;
 1909                 }
 1910         }
 1911         return rv;
 1912 }
 1913 
 1914 
 1915 /**
 1916  * Validate a remote service.
 1917  * @param s The remote service to validate
 1918  * @return false if there was an error otherwise true
 1919  */
 1920 State_Type check_remote_host(Service_T s) {
 1921         ASSERT(s);
 1922         State_Type rv = State_Succeeded;
 1923         Icmp_T last_ping = NULL;
 1924         /* Test each icmp type in the service's icmplist */
 1925         for (Icmp_T icmp = s->icmplist; icmp; icmp = icmp->next) {
 1926                 switch (icmp->type) {
 1927                         case ICMP_ECHO:
 1928                                 icmp->responsetime.current = icmp_echo(s->path, icmp->family, &(icmp->outgoing), icmp->size, icmp->timeout, icmp->count);
 1929                                 if (icmp->responsetime.current == -2) {
 1930                                         icmp->is_available = Connection_Init;
 1931 #ifdef SOLARIS
 1932                                         DEBUG("'%s' ping test skipped -- the monit user has no permission to create raw socket, please add net_icmpaccess privilege or run monit as root\n", s->name);
 1933 #elif defined LINUX
 1934                                         DEBUG("'%s' ping test skipped -- the monit user has no permission to create raw socket, please add CAP_NET_RAW capability or run monit as root\n", s->name);
 1935 #else
 1936                                         DEBUG("'%s' ping test skipped -- the monit user has no permission to create raw socket, please run monit as root\n", s->name);
 1937 #endif
 1938                                 } else if (icmp->responsetime.current == -1) {
 1939                                         rv = icmp->check_invers ? State_Succeeded : State_Failed;
 1940                                         icmp->is_available = Connection_Failed;
 1941                                         Event_post(s, Event_Icmp, rv, icmp->action, "ping test failed");
 1942                                 } else {
 1943                                         rv = icmp->check_invers ? State_Failed : State_Succeeded;
 1944                                         icmp->is_available = Connection_Ok;
 1945                                         Event_post(s, Event_Icmp, rv, icmp->action, "ping test succeeded [response time %s]", Convert_time2str(icmp->responsetime.current, (char[11]){}));
 1946 
 1947                                         // Check response time
 1948                                         if (icmp->responsetime.limit > -1.) {
 1949                                                 if (Util_evalDoubleQExpression(icmp->responsetime.operator, icmp->responsetime.current, icmp->responsetime.limit)) {
 1950                                                         rv = State_Failed;
 1951                                                         Event_post(s, Event_Speed, State_Failed, icmp->action, "response time %s doesn't match limit [time %s %s]", Convert_time2str(icmp->responsetime.current, (char[11]){}), operatorshortnames[icmp->responsetime.operator], Convert_time2str(icmp->responsetime.limit, (char[11]){}));
 1952                                                 } else {
 1953                                                         Event_post(s, Event_Speed, State_Failed, icmp->action, "response time %s matches limit [time %s %s]", Convert_time2str(icmp->responsetime.current, (char[11]){}), operatorshortnames[icmp->responsetime.operator], Convert_time2str(icmp->responsetime.limit, (char[11]){}));
 1954                                                 }
 1955                                         }
 1956                                 }
 1957                                 last_ping = icmp;
 1958                                 break;
 1959                         default:
 1960                                 Log_error("'%s' error -- unknown ICMP type: [%d]\n", s->name, icmp->type);
 1961                                 return State_Failed;
 1962                 }
 1963         }
 1964         /* If we could not ping the host we assume it's down and do not continue to check any port connections  */
 1965         if (last_ping && last_ping->is_available == Connection_Failed && s->portlist) {
 1966                 DEBUG("'%s' icmp ping failed, skipping any port connection tests\n", s->name);
 1967                 return State_Failed;
 1968         }
 1969         /* Test each host:port and protocol in the service's portlist */
 1970         for (Port_T p = s->portlist; p; p = p->next)
 1971                 if (_checkConnection(s, p) == State_Failed)
 1972                         rv = State_Failed;
 1973         return rv;
 1974 }
 1975 
 1976 
 1977 /**
 1978  * Validate the general system indicators. In case of a fatal event
 1979  * false is returned.
 1980  */
 1981 State_Type check_system(Service_T s) {
 1982         ASSERT(s);
 1983         State_Type rv = State_Succeeded;
 1984         for (Resource_T r = s->resourcelist; r; r = r->next)
 1985                 if (_checkSystemResources(s, r) == State_Failed)
 1986                         rv = State_Failed;
 1987         if (_checkUptime(s, Time_now() - systeminfo.booted) == State_Failed)
 1988                 rv = State_Failed;
 1989         if ( systeminfo.statisticsAvailable & Statistics_FiledescriptorsPerSystem ) {
 1990                 if (_checkSystemFiledescriptors(s) == State_Failed)
 1991                         rv = State_Failed;
 1992         }
 1993         return rv;
 1994 }
 1995 
 1996 
 1997 State_Type check_net(Service_T s) {
 1998         volatile State_Type rv = State_Succeeded;
 1999         volatile bool havedata = true;
 2000         // Get link statistics
 2001         TRY
 2002         {
 2003                 Link_update(s->inf.net->stats);
 2004         }
 2005         ELSE
 2006         {
 2007                 havedata = false;
 2008                 for (LinkStatus_T link = s->linkstatuslist; link; link = link->next) {
 2009                         rv = link->check_invers ? State_Succeeded : State_Failed;
 2010                         Event_post(s, Event_Link, link->check_invers ? State_Succeeded : State_Failed, link->action, "link data collection failed -- %s", Exception_frame.message);
 2011                 }
 2012         }
 2013         END_TRY;
 2014         // State
 2015         if (! havedata) {
 2016                 return s->inverseStatus ? State_Succeeded : State_Failed; // No data, event handled in the TRY-ELSE loop already, terminate remaining tests
 2017         } else if (! Link_getState(s->inf.net->stats)) {
 2018                 for (LinkStatus_T link = s->linkstatuslist; link; link = link->next) {
 2019                         Event_post(s, Event_Link, link->check_invers ? State_Succeeded : State_Failed, link->action, "link down");
 2020                 }
 2021                 return s->inverseStatus ? State_Succeeded : State_Failed; // Link is down, terminate remaining tests
 2022         } else {
 2023                 for (LinkStatus_T link = s->linkstatuslist; link; link = link->next)
 2024                         Event_post(s, Event_Link, link->check_invers ? State_Failed : State_Succeeded, link->action, "link up");
 2025         }
 2026         if (! s->inverseStatus) {
 2027                 //FIXME: these tests share the same class (Event_Link), so if "link up" test is set, it would set the state to failure, but these tests will reset it back to success. When we'll add more event types,
 2028                 //       we shoud assign a new type for link in/out errors and then we can perform these tests even if "link up" is set
 2029 
 2030                 // Link errors
 2031                 long long oerrors = Link_getErrorsOutPerSecond(s->inf.net->stats);
 2032                 for (LinkStatus_T link = s->linkstatuslist; link; link = link->next) {
 2033                         if (oerrors > 0) {
 2034                                 rv = State_Failed;
 2035                                 Event_post(s, Event_Link, State_Failed, link->action, "%lld upload errors detected", oerrors);
 2036                         } else {
 2037                                 Event_post(s, Event_Link, State_Succeeded, link->action, "upload errors check succeeded");
 2038                         }
 2039                 }
 2040                 long long ierrors = Link_getErrorsInPerSecond(s->inf.net->stats);
 2041                 for (LinkStatus_T link = s->linkstatuslist; link; link = link->next) {
 2042                         if (ierrors > 0) {
 2043                                 rv = State_Failed;
 2044                                 Event_post(s, Event_Link, State_Failed, link->action, "%lld download errors detected", ierrors);
 2045                         } else {
 2046                                 Event_post(s, Event_Link, State_Succeeded, link->action, "download errors check succeeded");
 2047                         }
 2048                 }
 2049         }
 2050         // Link speed
 2051         int duplex = Link_getDuplex(s->inf.net->stats);
 2052         long long speed = Link_getSpeed(s->inf.net->stats);
 2053         for (LinkSpeed_T link = s->linkspeedlist; link; link = link->next) {
 2054                 if (speed > 0 && link->speed) {
 2055                         if (duplex > -1 && duplex != link->duplex)
 2056                                 Event_post(s, Event_Speed, State_Changed, link->action, "link mode is now %s-duplex", duplex ? "full" : "half");
 2057                         else
 2058                                 Event_post(s, Event_Speed, State_ChangedNot, link->action, "link mode has not changed since last cycle [current mode is %s-duplex]", duplex ? "full" : "half");
 2059                         if (speed != link->speed)
 2060                                 Event_post(s, Event_Speed, State_Changed, link->action, "link speed changed to %.0lf Mb/s", (double)speed / 1000000.);
 2061                         else
 2062                                 Event_post(s, Event_Speed, State_ChangedNot, link->action, "link speed has not changed since last cycle [current speed = %.0lf Mb/s]", (double)speed / 1000000.);
 2063                 }
 2064                 link->duplex = duplex;
 2065                 link->speed = speed;
 2066         }
 2067         // Link saturation
 2068         double osaturation = Link_getSaturationOutPerSecond(s->inf.net->stats);
 2069         double isaturation = Link_getSaturationInPerSecond(s->inf.net->stats);
 2070         if (osaturation >= 0. && isaturation >= 0.) {
 2071                 for (LinkSaturation_T link = s->linksaturationlist; link; link = link->next) {
 2072                         if (duplex) {
 2073                                 if (Util_evalDoubleQExpression(link->operator, osaturation, link->limit))
 2074                                         Event_post(s, Event_Saturation, State_Failed, link->action, "link upload saturation of %.1f%% matches limit [saturation %s %.1f%%]", osaturation, operatorshortnames[link->operator], link->limit);
 2075                                 else
 2076                                         Event_post(s, Event_Saturation, State_Succeeded, link->action, "link upload saturation check succeeded [current upload saturation %.1f%%]", osaturation);
 2077                                 if (Util_evalDoubleQExpression(link->operator, isaturation, link->limit))
 2078                                         Event_post(s, Event_Saturation, State_Failed, link->action, "link download saturation of %.1f%% matches limit [saturation %s %.1f%%]", isaturation, operatorshortnames[link->operator], link->limit);
 2079                                 else
 2080                                         Event_post(s, Event_Saturation, State_Succeeded, link->action, "link download saturation check succeeded [current download saturation %.1f%%]", isaturation);
 2081                         } else {
 2082                                 double iosaturation = osaturation + isaturation;
 2083                                 if (Util_evalDoubleQExpression(link->operator, iosaturation, link->limit))
 2084                                         Event_post(s, Event_Saturation, State_Failed, link->action, "link saturation of %.1f%% matches limit [saturation %s %.1f%%]", iosaturation, operatorshortnames[link->operator], link->limit);
 2085                                 else
 2086                                         Event_post(s, Event_Saturation, State_Succeeded, link->action, "link saturation check succeeded [current saturation %.1f%%]", iosaturation);
 2087                         }
 2088                 }
 2089         }
 2090         // Upload
 2091         char buf1[10], buf2[10];
 2092         for (Bandwidth_T upload = s->uploadbyteslist; upload; upload = upload->next) {
 2093                 long long obytes;
 2094                 switch (upload->range) {
 2095                         case Time_Minute:
 2096                                 obytes = Link_getBytesOutPerMinute(s->inf.net->stats, upload->rangecount);
 2097                                 break;
 2098                         case Time_Hour:
 2099                                 if (upload->rangecount == 1) // Use precise minutes range for "last hour"
 2100                                         obytes = Link_getBytesOutPerMinute(s->inf.net->stats, 60);
 2101                                 else
 2102                                         obytes = Link_getBytesOutPerHour(s->inf.net->stats, upload->rangecount);
 2103                                 break;
 2104                         default:
 2105                                 obytes = Link_getBytesOutPerSecond(s->inf.net->stats);
 2106                                 break;
 2107                 }
 2108                 if (obytes >= 0 && Util_evalQExpression(upload->operator, obytes, upload->limit))
 2109                         Event_post(s, Event_ByteOut, State_Failed, upload->action, "%supload %s matches limit [upload rate %s %s in last %d %s]", upload->range != Time_Second ? "total " : "", Convert_bytes2str(obytes, buf1), operatorshortnames[upload->operator], Convert_bytes2str(upload->limit, buf2), upload->rangecount, Util_timestr(upload->range));
 2110                 else
 2111                         Event_post(s, Event_ByteOut, State_Succeeded, upload->action, "%supload check succeeded [current upload rate %s in last %d %s]", upload->range != Time_Second ? "total " : "", Convert_bytes2str(obytes, buf1), upload->rangecount, Util_timestr(upload->range));
 2112         }
 2113         for (Bandwidth_T upload = s->uploadpacketslist; upload; upload = upload->next) {
 2114                 long long opackets;
 2115                 switch (upload->range) {
 2116                         case Time_Minute:
 2117                                 opackets = Link_getPacketsOutPerMinute(s->inf.net->stats, upload->rangecount);
 2118                                 break;
 2119                         case Time_Hour:
 2120                                 if (upload->rangecount == 1) // Use precise minutes range for "last hour"
 2121                                         opackets = Link_getPacketsOutPerMinute(s->inf.net->stats, 60);
 2122                                 else
 2123                                         opackets = Link_getPacketsOutPerHour(s->inf.net->stats, upload->rangecount);
 2124                                 break;
 2125                         default:
 2126                                 opackets = Link_getPacketsOutPerSecond(s->inf.net->stats);
 2127                                 break;
 2128                 }
 2129                 if (opackets >= 0 && Util_evalQExpression(upload->operator, opackets, upload->limit))
 2130                         Event_post(s, Event_PacketOut, State_Failed, upload->action, "%supload packets %lld matches limit [upload packets %s %lld in last %d %s]", upload->range != Time_Second ? "total " : "", opackets, operatorshortnames[upload->operator], upload->limit, upload->rangecount, Util_timestr(upload->range));
 2131                 else
 2132                         Event_post(s, Event_PacketOut, State_Succeeded, upload->action, "%supload packets check succeeded [current upload packets %lld in last %d %s]", upload->range != Time_Second ? "total " : "", opackets, upload->rangecount, Util_timestr(upload->range));
 2133         }
 2134         // Download
 2135         for (Bandwidth_T download = s->downloadbyteslist; download; download = download->next) {
 2136                 long long ibytes;
 2137                 switch (download->range) {
 2138                         case Time_Minute:
 2139                                 ibytes = Link_getBytesInPerMinute(s->inf.net->stats, download->rangecount);
 2140                                 break;
 2141                         case Time_Hour:
 2142                                 if (download->rangecount == 1) // Use precise minutes range for "last hour"
 2143                                         ibytes = Link_getBytesInPerMinute(s->inf.net->stats, 60);
 2144                                 else
 2145                                         ibytes = Link_getBytesInPerHour(s->inf.net->stats, download->rangecount);
 2146                                 break;
 2147                         default:
 2148                                 ibytes = Link_getBytesInPerSecond(s->inf.net->stats);
 2149                                 break;
 2150                 }
 2151                 if (ibytes >= 0 && Util_evalQExpression(download->operator, ibytes, download->limit))
 2152                         Event_post(s, Event_ByteIn, State_Failed, download->action, "%sdownload %s matches limit [download rate %s %s in last %d %s]", download->range != Time_Second ? "total " : "", Convert_bytes2str(ibytes, buf1), operatorshortnames[download->operator], Convert_bytes2str(download->limit, buf2), download->rangecount, Util_timestr(download->range));
 2153                 else
 2154                         Event_post(s, Event_ByteIn, State_Succeeded, download->action, "%sdownload check succeeded [current download rate %s in last %d %s]", download->range != Time_Second ? "total " : "", Convert_bytes2str(ibytes, buf1), download->rangecount, Util_timestr(download->range));
 2155         }
 2156         for (Bandwidth_T download = s->downloadpacketslist; download; download = download->next) {
 2157                 long long ipackets;
 2158                 switch (download->range) {
 2159                         case Time_Minute:
 2160                                 ipackets = Link_getPacketsInPerMinute(s->inf.net->stats, download->rangecount);
 2161                                 break;
 2162                         case Time_Hour:
 2163                                 if (download->rangecount == 1) // Use precise minutes range for "last hour"
 2164                                         ipackets = Link_getPacketsInPerMinute(s->inf.net->stats, 60);
 2165                                 else
 2166                                         ipackets = Link_getPacketsInPerHour(s->inf.net->stats, download->rangecount);
 2167                                 break;
 2168                         default:
 2169                                 ipackets = Link_getPacketsInPerSecond(s->inf.net->stats);
 2170                                 break;
 2171                 }
 2172                 if (ipackets >= 0 && Util_evalQExpression(download->operator, ipackets, download->limit))
 2173                         Event_post(s, Event_PacketIn, State_Failed, download->action, "%sdownload packets %lld matches limit [download packets %s %lld in last %d %s]", download->range != Time_Second ? "total " : "", ipackets, operatorshortnames[download->operator], download->limit, download->rangecount, Util_timestr(download->range));
 2174                 else
 2175                         Event_post(s, Event_PacketIn, State_Succeeded, download->action, "%sdownload packets check succeeded [current download packets %lld in last %d %s]", download->range != Time_Second ? "total " : "", ipackets, download->rangecount, Util_timestr(download->range));
 2176         }
 2177         return rv;
 2178 }
 2179 
 2180