"Fossies" - the Fresh Open Source Software Archive

Member "libs/PHPCrawler/ProcessCommunication/PHPCrawlerProcessCommunication.class.php" (8 Jan 2013, 6572 Bytes) of package /linux/www/SitemapCreator.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 <?php
    2 /**
    3  * Class containing methods for process handling and communication
    4  *
    5  * @package phpcrawl
    6  * @internal
    7  */
    8 class PHPCrawlerProcessCommunication
    9 {
   10   protected $crawler_uniqid;
   11   
   12   protected $multiprocess_mode;
   13   
   14   protected $working_directory;
   15   
   16   protected $crawlerStatus;
   17   
   18   /**
   19    * Flag indicating whether resumtion is activated
   20    *
   21    * @var PHPCrawlerDocumentInfoQueue
   22    */
   23   protected $resumtion_enabled = false;
   24   
   25   /**
   26    * Initiates a new PHPCrawlerProcessCommunication-object.
   27    *
   28    * @param string $crawler_uniqid     UID of the crawler
   29    * @param int    $multiprocess_mode  Multprocess-mode the crawler is running (one of the PHPCrawlerMultiProcessModes-constants)
   30    * @param string $working_directory  Working-dir of the crawler
   31    * @param bool   $enable_resumtion   TRUE if resumption of crawling-processes should be possible
   32    */ 
   33   public function __construct($crawler_uniqid, $multiprocess_mode, $working_directory, $enable_resumtion)
   34   {
   35     $this->crawler_uniqid = $crawler_uniqid;
   36     $this->multiprocess_mode = $multiprocess_mode;
   37     $this->working_directory = $working_directory;
   38     $this->resumtion_enabled = $enable_resumtion;
   39     
   40     $this->crawlerStatus = new PHPCrawlerStatus();
   41   }
   42   
   43   /**
   44    * Sets/writes the current crawler-status
   45    *
   46    * @param PHPCrawlerStatus $crawler_status The status to set
   47    */
   48   public function setCrawlerStatus(PHPCrawlerStatus $crawler_status)
   49   {
   50     $this->crawlerStatus = $crawler_status;
   51     
   52     // Write crawler-status back to file if crawler is multiprocessed
   53     if ($this->multiprocess_mode == PHPCrawlerMultiProcessModes::MPMODE_CHILDS_EXECUTES_USERCODE || $this->resumtion_enabled == true)
   54     {
   55       PHPCrawlerUtils::serializeToFile($this->working_directory."crawlerstatus.tmp", $crawler_status);
   56     }
   57   }
   58   
   59   /**
   60    * Returns/reads the current crawler-status
   61    *
   62    * @return PHPCrawlerStatus The current crawlerstatus as a PHPCrawlerStatus-object
   63    */
   64   public function getCrawlerStatus()
   65   {
   66     // Get crawler-status from file if crawler is multiprocessed
   67     if ($this->multiprocess_mode == PHPCrawlerMultiProcessModes::MPMODE_CHILDS_EXECUTES_USERCODE || $this->resumtion_enabled == true)
   68     {
   69       $this->crawlerStatus = PHPCrawlerUtils::deserializeFromFile($this->working_directory."crawlerstatus.tmp");
   70       if ($this->crawlerStatus == null) $this->crawlerStatus = new PHPCrawlerStatus();
   71     }
   72     
   73     return $this->crawlerStatus;
   74   }
   75   
   76   /**
   77    * Updates the status of the crawler
   78    *
   79    * @param PHPCrawlerDocumentInfo $PageInfo          The PHPCrawlerDocumentInfo-object of the last received document
   80    *                                                  or NULL if no document was received.
   81    * @param int                    $abort_reason      One of the PHPCrawlerAbortReasons::ABORTREASON-constants if the crawling-process
   82    *                                                  should get aborted, otherwise NULL
   83    * @param string                 $first_content_url The first URL some content was found in (or NULL if no content was found so far).
   84    */
   85   public function updateCrawlerStatus($PageInfo, $abort_reason = null, $first_content_url = null)
   86   {
   87     PHPCrawlerBenchmark::start("updating_crawler_status");
   88     
   89     // Set semaphore if crawler is multiprocessed
   90     if ($this->multiprocess_mode == PHPCrawlerMultiProcessModes::MPMODE_CHILDS_EXECUTES_USERCODE || $this->resumtion_enabled == true)
   91     {
   92       $sem_key = sem_get($this->crawler_uniqid);
   93       sem_acquire($sem_key);
   94     }
   95     
   96     // Get current Status
   97     $crawler_status = $this->getCrawlerStatus();
   98     
   99     // Update status
  100     if ($PageInfo != null)
  101     {
  102       // Increase number of followed links
  103       $crawler_status->links_followed++;
  104       
  105       // Increase documents_received-counter
  106       if ($PageInfo->received == true) $crawler_status->documents_received++;
  107         
  108       // Increase bytes-counter
  109       $crawler_status->bytes_received += $PageInfo->bytes_received;
  110     }
  111     
  112     // Set abortreason
  113     if ($abort_reason !== null) $crawler_status->abort_reason = $abort_reason;
  114     
  115     // Set first_content_url
  116     if ($first_content_url !== null) $crawler_status->first_content_url = $first_content_url;
  117     
  118     // Write crawler-status back
  119     $this->setCrawlerStatus($crawler_status);
  120     
  121     // Remove semaphore if crawler is multiprocessed
  122     if ($this->multiprocess_mode == PHPCrawlerMultiProcessModes::MPMODE_CHILDS_EXECUTES_USERCODE || $this->resumtion_enabled == true)
  123     {
  124       sem_release($sem_key);
  125     }
  126     
  127     PHPCrawlerBenchmark::stop("updating_crawler_status");
  128   }
  129   
  130   /**
  131    * Registers the PID of a child-process
  132    *
  133    * @param int The IPD
  134    */
  135   public function registerChildPID($pid)
  136   {
  137     $sem_key = sem_get($this->crawler_uniqid);
  138     sem_acquire($sem_key);
  139     
  140     file_put_contents($this->working_directory."pids", $pid."\n", FILE_APPEND);
  141     
  142     sem_release($sem_key);
  143   }
  144   
  145   /**
  146    * Returns alls PIDs of all running child-processes
  147    *
  148    * @param int $process_count If set, this function tries to get the child-PIDs until the gievn number of PIDs
  149    *                           was determinated.
  150    * @return array Numeric array conatining the PIDs
  151    */
  152   public function getChildPIDs($process_count = null)
  153   { 
  154     $child_pids = array();
  155     $try = true;
  156     
  157     while ($try == true)
  158     {
  159       if (file_exists($this->working_directory."pids"))
  160       {
  161         $ct = file_get_contents($this->working_directory."pids");
  162         $child_pids = preg_split("#\n#", $ct, -1, PREG_SPLIT_NO_EMPTY);
  163         
  164         if ($process_count == null) $try = false;
  165         if (count($child_pids) == $process_count) $try = false;
  166       }
  167       
  168       sleep(0.2);
  169     }
  170     
  171     return $child_pids;
  172     
  173   }
  174   
  175   /**
  176    * Kills all running child-processes
  177    */
  178   public function killChildProcesses()
  179   {
  180     $child_pids = $this->getChildPIDs();
  181     for ($x=0; $x<count($child_pids); $x++)
  182     {
  183       posix_kill($child_pids[$x], SIGKILL);
  184     }
  185   }
  186   
  187   /**
  188    * Checks wehther any child-processes a (still) running.
  189    *
  190    * @return bool
  191    */
  192   public function childProcessAlive()
  193   {
  194     $pids = $this->getChildPIDs();
  195     $cnt = count($pids);
  196     
  197     for ($x=0; $x<$cnt; $x++)
  198     {
  199       if (posix_getsid($pids[$x]) != false)
  200       {
  201         return true;
  202       }
  203     }
  204     
  205     return false;
  206   }
  207 }
  208 ?>