"Fossies" - the Fresh Open Source Software Archive

Member "SitemapCreatorCrawler.class.php" (20 Jan 2013, 2969 Bytes) of package /linux/www/SitemapCreator.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 <?php
    2 
    3 /**
    4  * @file 
    5  * Sitemap Creator Crawler class
    6  *  
    7  * extends PHPCrawler class 
    8  * @link http://phpcrawl.cuab.de/classreferences/index.html
    9  * 
   10  * @package SitemapCreator
   11  * @category Crawler
   12  * @author Waleed Gadelkareem <gadelkareem@gmail.com>
   13  * @link http://gadelkareem.com/
   14  * @license  GPLv2 
   15  */
   16 
   17 /**
   18  * Loading external PHPCrawler-class
   19  * 
   20  * Uncomment for standalone
   21  */
   22 //if (!class_exists("PHPCrawler"))
   23 //    require_once(dirname(__FILE__) . "/libs/PHPCrawler/PHPCrawler.class.php");
   24 
   25 class SMCCrawler extends PHPCrawler {
   26 
   27     /**
   28      *  get Last Modified header
   29      * @see enableLastModifiedCount()
   30      * @var bool 
   31      */
   32     var $LastModifiedCount = true;
   33 
   34     /**
   35      * Array contianing the entries.
   36      *
   37      * @var array
   38      */
   39     var $entries = array();
   40 
   41     /**
   42      *  get access to all information about a page or file the crawler found and received.
   43      *
   44      * @param PHPCrawlerDocumentInfo A PHPCrawlerDocumentInfo-object containing all information about the currently received document.
   45      * @section 3 Crawler
   46      */
   47     //@todo crawl reporting for ajax getCrawlerStatus()
   48     public function handleDocumentInfo(PHPCrawlerDocumentInfo $PageInfo) {
   49         $entry = array(
   50             'URL' => $PageInfo->url,
   51         );
   52         //set 'Last-Modified'
   53         $this->getLastModified($PageInfo, $entry);
   54         //add new entry
   55         $this->addURL_Entry($entry); //unset($PageInfo);
   56         //if ($this->checkForAbort())
   57         //    echo 'aborted';
   58     }
   59 
   60     /**
   61      *  get Last-Modified header
   62      *
   63      * @param PHPCrawlerDocumentInfo A PHPCrawlerDocumentInfo-object containing all information about the currently received document.
   64      * @section 3 Crawler
   65      */
   66     protected function getLastModified(PHPCrawlerDocumentInfo $PageInfo, &$entry) {
   67         //check if enabled
   68         if (!$this->LastModifiedCount)
   69             return;
   70 
   71         //get 'Last-Modified' header from the Document Info
   72         $last_modified = strtotime(PHPCrawlerUtils::getHeaderValue($PageInfo->header, 'last-modified'));
   73         //if 'Last-Modified' header not found then get 'Date' header
   74         if (!$last_modified)
   75             $last_modified = strtotime(PHPCrawlerUtils::getHeaderValue($PageInfo->header, 'date'));
   76         //set last modified
   77         $entry['Last-Modified'] = $last_modified;
   78     }
   79 
   80     /**
   81      *  add URL entry {@link $entries}
   82      * 
   83      * @param array $entry  URL set to be added to sitemap
   84      * @section 3 Crawler
   85      */
   86     protected function addURL_Entry($entry) {
   87         $this->entries[] = $entry;
   88     }
   89 
   90     /**
   91      * Enable or diable last-Modified calculation {@link $LastModifiedCount}
   92      * 
   93      * @param bool $mode trure to enable, false otherwise
   94      * @section 3 Crawler
   95      */
   96     public function enableLastModifiedCount($mode) {
   97         $this->LastModifiedCount = ($mode);
   98     }
   99 
  100 }