"Fossies" - the Fresh Open Source Software Archive

Member "libs/PHPCrawler/UrlCache/PHPCrawlerMemoryURLCache.class.php" (11 Jan 2013, 3855 Bytes) of package /linux/www/SitemapCreator.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 <?php
    2 /**
    3  * Class for caching/storing URLs/links in memory.
    4  *
    5  * @package phpcrawl
    6  * @internal
    7  */
    8 class PHPCrawlerMemoryURLCache extends PHPCrawlerURLCacheBase
    9 {
   10   protected $urls = array();
   11   protected $url_map = array();
   12   
   13   /**
   14    * Returns the next URL from the cache that should be crawled.
   15    *
   16    * @return PhpCrawlerURLDescriptor
   17    */
   18   public function getNextUrl()
   19   {
   20     //PHPCrawlerBenchmark::start("getting_cached_url");
   21     
   22     $max_pri_lvl = $this->getMaxPriorityLevel();
   23     
   24     @reset($this->urls[$max_pri_lvl]);
   25     while (list($key) = @each($this->urls[$max_pri_lvl]))
   26     {
   27       $UrlDescriptor_next = $this->urls[$max_pri_lvl][$key];
   28       unset($this->urls[$max_pri_lvl][$key]);
   29       break;
   30     }
   31     
   32     // If there's no URL in the priority-level-array left -> unset
   33     if (count($this->urls[$max_pri_lvl]) == 0) unset($this->urls[$max_pri_lvl]);
   34     
   35     //PHPCrawlerBenchmark::stop("getting_cached_url");
   36      
   37     return $UrlDescriptor_next;
   38   }
   39   
   40   /**
   41    * Returns all URLs currently cached in the URL-cache.
   42    *
   43    * @return array Numeric array containing all URLs as PHPCrawlerURLDescriptor-objects
   44    */
   45   public function getAllURLs()
   46   {
   47     $URLs = array();
   48     
   49     @reset($this->urls);
   50     while (list($pri_lvl) = @each($this->urls))
   51     {
   52       $cnt = count($this->urls[$pri_lvl]);
   53       for ($x=0; $x<$cnt; $x++)
   54       {
   55         $URLs[] = &$this->urls[$pri_lvl][$x];
   56       }
   57     }
   58     
   59     return $URLs;
   60   }
   61   
   62   /**
   63    * Removes all URLs and all priority-rules from the URL-cache.
   64    */
   65   public function clear()
   66   {
   67     $this->urls = array();
   68     $this->url_map = array();
   69     $this->url_priorities = array();
   70   }
   71   
   72   /**
   73    * Adds an URL to the url-cache
   74    *
   75    * @param PHPCrawlerURLDescriptor $UrlDescriptor      
   76    */
   77   public function addURL(PHPCrawlerURLDescriptor $UrlDescriptor)
   78   { 
   79     if ($UrlDescriptor == null) return;
   80     
   81     // Hash of the URL
   82     $map_key = $this->getDistinctURLHash($UrlDescriptor);
   83     
   84     // If URL already in cache -> abort
   85     if($map_key != null && isset($this->url_map[$map_key])) return;
   86     
   87     // Retrieve priority-level
   88     $priority_level = $this->getUrlPriority($UrlDescriptor->url_rebuild);
   89     
   90     // Add URL to URL-Array
   91     $this->urls[$priority_level][] = $UrlDescriptor;
   92     
   93     // Add URL to URL-Map
   94     if ($this->url_distinct_property != self::URLHASH_NONE)
   95       $this->url_map[$map_key] = true;
   96   }
   97   
   98   /**
   99    * Adds an bunch of URLs to the url-cache
  100    *
  101    * @param array $urls  A numeric array containing the URLs as PHPCrawlerURLDescriptor-objects
  102    */
  103   public function addURLs($urls)
  104   {
  105     //PHPCrawlerBenchmark::start("caching_urls");
  106     
  107     $cnt = count($urls);
  108     for ($x=0; $x<$cnt; $x++)
  109     {
  110       if ($urls[$x] != null)
  111       {
  112         $this->addURL($urls[$x]);
  113       }
  114     }
  115     
  116     //PHPCrawlerBenchmark::stop("caching_urls");
  117   }
  118   
  119   /**
  120    * Checks whether there are URLs left in the cache or not.
  121    *
  122    * @return bool
  123    */
  124   public function containsURLs()
  125   {
  126     if (count($this->urls) == 0) return false;
  127     else return true;
  128   }
  129   
  130   /**
  131    * Has no function in this class.
  132    */
  133   public function cleanup()
  134   {
  135   }
  136   
  137   /**
  138    * Has no function in this class.
  139    */
  140   public function purgeCache()
  141   {
  142   }
  143   
  144   /**
  145    * Has no function in this memory-cache.
  146    */
  147   public function markUrlAsFollowed(PHPCrawlerURLDescriptor $UrlDescriptor)
  148   {
  149   }
  150   
  151   /**
  152    * Returns the highest priority-level an URL exists in cache for.
  153    *
  154    * @return int
  155    */
  156   protected function getMaxPriorityLevel()
  157   {
  158     $defined_priority_levels = array_keys($this->urls);
  159     rsort($defined_priority_levels);
  160     return $defined_priority_levels[0];
  161   }
  162 }
  163 ?>