"Fossies" - the Fresh Open Source Software Archive

Member "docs/phpcrawl/PHPCrawler.html" (20 Jan 2013, 149370 Bytes) of package /linux/www/SitemapCreator.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) HTML source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 <?xml version="1.0" encoding="iso-8859-1"?>
    2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    3   <html xmlns="http://www.w3.org/1999/xhtml">
    4         <head>
    5             <!-- template designed by Marco Von Ballmoos -->
    6             <title>Docs For Class PHPCrawler</title>
    7             <link rel="stylesheet" href="../media/stylesheet.css" />
    8             <meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1'/>
    9         </head>
   10         <body>
   11             <div class="page-body">         
   12 <h2 class="class-name">Class PHPCrawler</h2>
   13 
   14 <a name="sec-description"></a>
   15 <div class="info-box">
   16     <div class="info-box-title">Description</div>
   17     <div class="nav-bar">
   18                     <span class="disabled">Description</span> |
   19                             <a href="#sec-descendents">Descendents</a>
   20             |                                           <a href="#sec-var-summary">Vars</a> (<a href="#sec-vars">details</a>)
   21                         |                                           <a href="#sec-method-summary">Methods</a> (<a href="#sec-methods">details</a>)
   22                         
   23                     </div>
   24     <div class="info-box-body">
   25                 <!-- ========== Info from phpDoc block ========= -->
   26 <p class="short-description">PHPCrawl mainclass</p>
   27     <ul class="tags">
   28                 <li><span class="field">author:</span> Uwe Hunfeld (phpcrawl@cuab.de)</li>
   29                 <li><span class="field">version:</span> 0.81</li>
   30             </ul>
   31         <p class="notes">
   32             Located in <a class="field" href="_libs---PHPCrawler---PHPCrawler.class.php.html">/libs/PHPCrawler/PHPCrawler.class.php</a> (line <span class="field">10</span>)
   33         </p>
   34         
   35                 
   36         <pre></pre>
   37     
   38             </div>
   39 </div>
   40 
   41     <a name="sec-descendents"></a>
   42     <div class="info-box">
   43         <div class="info-box-title">Direct descendents</div>
   44         <div class="nav-bar">
   45             <a href="#sec-description">Description</a> |
   46             <span class="disabled">Descendents</span>
   47             |                                               <a href="#sec-var-summary">Vars</a> (<a href="#sec-vars">details</a>)
   48                                 |                                                           <a href="#sec-method-summary">Methods</a> (<a href="#sec-methods">details</a>)
   49                             
   50                                 </div>
   51         <div class="info-box-body">
   52             <table cellpadding="2" cellspacing="0" class="class-table">
   53                 <tr>
   54                     <th class="class-table-header">Class</th>
   55                     <th class="class-table-header">Description</th>
   56                 </tr>
   57                                 <tr>
   58                     <td style="padding-right: 2em"><a href="../phpcrawl/SMCCrawler.html">SMCCrawler</a></td>
   59                     <td>
   60                                             Loading external PHPCrawler-class
   61                                         </td>
   62                 </tr>
   63                             </table>
   64         </div>
   65     </div>
   66 
   67 
   68     <a name="sec-var-summary"></a>
   69     <div class="info-box">
   70         <div class="info-box-title">Variable Summary</span></div>
   71         <div class="nav-bar">
   72             <a href="#sec-description">Description</a> |
   73                             <a href="#sec-descendents">Descendents</a> |
   74                         <span class="disabled">Vars</span> (<a href="#sec-vars">details</a>)
   75                             | 
   76                                     <a href="#sec-method-summary">Methods</a> (<a href="#sec-methods">details</a>)
   77                             
   78                                 </div>
   79         <div class="info-box-body">
   80             <div class="var-summary">
   81                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 <div class="var-title">
   82                     <span class="var-type">int</span>
   83                     <a href="#$child_process_number" title="details" class="var-name">$child_process_number</a>
   84                 </div>
   85                                                                 <div class="var-title">
   86                     <span class="var-type">mixed</span>
   87                     <a href="#$class_version" title="details" class="var-name">$class_version</a>
   88                 </div>
   89                                                                 <div class="var-title">
   90                     <span class="var-type">PHPCrawlerCookieCache</span>
   91                     <a href="#$CookieCache" title="details" class="var-name">$CookieCache</a>
   92                 </div>
   93                                                                 <div class="var-title">
   94                     <span class="var-type">bool</span>
   95                     <a href="#$cookie_handling_enabled" title="details" class="var-name">$cookie_handling_enabled</a>
   96                 </div>
   97                                                                 <div class="var-title">
   98                     <span class="var-type">string</span>
   99                     <a href="#$crawler_uniqid" title="details" class="var-name">$crawler_uniqid</a>
  100                 </div>
  101                                                                 <div class="var-title">
  102                     <span class="var-type"><a href="../phpcrawl/PHPCrawlerDocumentInfoQueue.html">PHPCrawlerDocumentInfoQueue</a></span>
  103                     <a href="#$DocumentInfoQueue" title="details" class="var-name">$DocumentInfoQueue</a>
  104                 </div>
  105                                                                 <div class="var-title">
  106                     <span class="var-type">int</span>
  107                     <a href="#$document_limit" title="details" class="var-name">$document_limit</a>
  108                 </div>
  109                                                                 <div class="var-title">
  110                     <span class="var-type">mixed</span>
  111                     <a href="#$follow_redirects_till_content" title="details" class="var-name">$follow_redirects_till_content</a>
  112                 </div>
  113                                                                 <div class="var-title">
  114                     <span class="var-type">mixed</span>
  115                     <a href="#$is_chlid_process" title="details" class="var-name">$is_chlid_process</a>
  116                 </div>
  117                                                                 <div class="var-title">
  118                     <span class="var-type">mixed</span>
  119                     <a href="#$is_parent_process" title="details" class="var-name">$is_parent_process</a>
  120                 </div>
  121                                                                 <div class="var-title">
  122                     <span class="var-type">PHPCrawlerURLCache</span>
  123                     <a href="#$LinkCache" title="details" class="var-name">$LinkCache</a>
  124                 </div>
  125                                                                 <div class="var-title">
  126                     <span class="var-type">mixed</span>
  127                     <a href="#$link_priority_array" title="details" class="var-name">$link_priority_array</a>
  128                 </div>
  129                                                                 <div class="var-title">
  130                     <span class="var-type">int</span>
  131                     <a href="#$multiprocess_mode" title="details" class="var-name">$multiprocess_mode</a>
  132                 </div>
  133                                                                 <div class="var-title">
  134                     <span class="var-type">mixed</span>
  135                     <a href="#$obey_robots_txt" title="details" class="var-name">$obey_robots_txt</a>
  136                 </div>
  137                                                                 <div class="var-title">
  138                     <span class="var-type">bool</span>
  139                     <a href="#$only_count_received_documents" title="details" class="var-name">$only_count_received_documents</a>
  140                 </div>
  141                                                                 <div class="var-title">
  142                     <span class="var-type"><a href="../phpcrawl/PHPCrawlerHTTPRequest.html">PHPCrawlerHTTPRequest</a></span>
  143                     <a href="#$PageRequest" title="details" class="var-name">$PageRequest</a>
  144                 </div>
  145                                                                 <div class="var-title">
  146                     <span class="var-type">int</span>
  147                     <a href="#$porcess_abort_reason" title="details" class="var-name">$porcess_abort_reason</a>
  148                 </div>
  149                                                                 <div class="var-title">
  150                     <span class="var-type"><a href="../phpcrawl/PHPCrawlerProcessCommunication.html">PHPCrawlerProcessCommunication</a></span>
  151                     <a href="#$ProcessCommunication" title="details" class="var-name">$ProcessCommunication</a>
  152                 </div>
  153                                                                 <div class="var-title">
  154                     <span class="var-type"><a href="../phpcrawl/PHPCrawlerDocumentInfoQueue.html">PHPCrawlerDocumentInfoQueue</a></span>
  155                     <a href="#$resumtion_enabled" title="details" class="var-name">$resumtion_enabled</a>
  156                 </div>
  157                                                                 <div class="var-title">
  158                     <span class="var-type"><a href="../phpcrawl/PHPCrawlerRobotsTxtParser.html">PHPCrawlerRobotsTxtParser</a></span>
  159                     <a href="#$RobotsTxtParser" title="details" class="var-name">$RobotsTxtParser</a>
  160                 </div>
  161                                                                 <div class="var-title">
  162                     <span class="var-type">string</span>
  163                     <a href="#$starting_url" title="details" class="var-name">$starting_url</a>
  164                 </div>
  165                                                                 <div class="var-title">
  166                     <span class="var-type">int</span>
  167                     <a href="#$traffic_limit" title="details" class="var-name">$traffic_limit</a>
  168                 </div>
  169                                                                 <div class="var-title">
  170                     <span class="var-type">mixed</span>
  171                     <a href="#$urlcache_purged" title="details" class="var-name">$urlcache_purged</a>
  172                 </div>
  173                                                                 <div class="var-title">
  174                     <span class="var-type"><a href="../phpcrawl/PHPCrawlerURLFilter.html">PHPCrawlerURLFilter</a></span>
  175                     <a href="#$UrlFilter" title="details" class="var-name">$UrlFilter</a>
  176                 </div>
  177                                                                 <div class="var-title">
  178                     <span class="var-type">int</span>
  179                     <a href="#$url_cache_type" title="details" class="var-name">$url_cache_type</a>
  180                 </div>
  181                                                                 <div class="var-title">
  182                     <span class="var-type"><a href="../phpcrawl/PHPCrawlerUserSendDataCache.html">PHPCrawlerUserSendDataCache</a></span>
  183                     <a href="#$UserSendDataCache" title="details" class="var-name">$UserSendDataCache</a>
  184                 </div>
  185                                                                 <div class="var-title">
  186                     <span class="var-type">string</span>
  187                     <a href="#$working_base_directory" title="details" class="var-name">$working_base_directory</a>
  188                 </div>
  189                                                                 <div class="var-title">
  190                     <span class="var-type">string</span>
  191                     <a href="#$working_directory" title="details" class="var-name">$working_directory</a>
  192                 </div>
  193                                             </div>
  194         </div>
  195     </div>
  196 
  197     <a name="sec-method-summary"></a>
  198     <div class="info-box">
  199         <div class="info-box-title">Method Summary</span></div>
  200         <div class="nav-bar">
  201             <a href="#sec-description">Description</a> |
  202                             <a href="#sec-descendents">Descendents</a> |
  203                                                                         <a href="#sec-var-summary">Vars</a> (<a href="#sec-vars">details</a>)
  204                  
  205                 |
  206                         <span class="disabled">Methods</span> (<a href="#sec-methods">details</a>)
  207         </div>
  208         <div class="info-box-body">         
  209             <div class="method-summary">
  210                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
  211                                                 <div class="method-definition">
  212                                             <span class="method-result">PHPCrawler</span>
  213                                         <a href="#__construct" title="details" class="method-name">__construct</a>
  214                                         ()
  215                                     </div>
  216                                                                 <div class="method-definition">
  217                                             <span class="method-result">bool</span>
  218                                         <a href="#addBasicAuthentication" title="details" class="method-name">addBasicAuthentication</a>
  219                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$url_regex</span>, <span class="var-type">string</span>&nbsp;<span class="var-name">$username</span>, <span class="var-type">string</span>&nbsp;<span class="var-name">$password</span>)
  220                                     </div>
  221                                                                 <div class="method-definition">
  222                                             <span class="method-result">bool</span>
  223                                         <a href="#addContentTypeReceiveRule" title="details" class="method-name">addContentTypeReceiveRule</a>
  224                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
  225                                     </div>
  226                                                                 <div class="method-definition">
  227                                             <span class="method-result">void</span>
  228                                         <a href="#addFollowMatch" title="details" class="method-name">addFollowMatch</a>
  229                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
  230                                     </div>
  231                                                                 <div class="method-definition">
  232                                             <span class="method-result">void</span>
  233                                         <a href="#addLinkExtractionTags" title="details" class="method-name">addLinkExtractionTags</a>
  234                                         ()
  235                                     </div>
  236                                                                 <div class="method-definition">
  237                                             <span class="method-result">bool</span>
  238                                         <a href="#addLinkPriority" title="details" class="method-name">addLinkPriority</a>
  239                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>, <span class="var-type">int</span>&nbsp;<span class="var-name">$level</span>)
  240                                     </div>
  241                                                                 <div class="method-definition">
  242                                             <span class="method-result">bool</span>
  243                                         <a href="#addLinkSearchContentType" title="details" class="method-name">addLinkSearchContentType</a>
  244                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
  245                                     </div>
  246                                                                 <div class="method-definition">
  247                                             <span class="method-result">void</span>
  248                                         <a href="#addNonFollowMatch" title="details" class="method-name">addNonFollowMatch</a>
  249                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
  250                                     </div>
  251                                                                 <div class="method-definition">
  252                                             <span class="method-result">bool</span>
  253                                         <a href="#addPostData" title="details" class="method-name">addPostData</a>
  254                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$url_regex</span>, <span class="var-type">array</span>&nbsp;<span class="var-name">$post_data_array</span>)
  255                                     </div>
  256                                                                 <div class="method-definition">
  257                                             <span class="method-result">void</span>
  258                                         <a href="#addReceiveContentType" title="details" class="method-name">addReceiveContentType</a>
  259                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
  260                                     </div>
  261                                                                 <div class="method-definition">
  262                                             <span class="method-result">void</span>
  263                                         <a href="#addReceiveToMemoryMatch" title="details" class="method-name">addReceiveToMemoryMatch</a>
  264                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
  265                                     </div>
  266                                                                 <div class="method-definition">
  267                                             <span class="method-result">void</span>
  268                                         <a href="#addReceiveToTmpFileMatch" title="details" class="method-name">addReceiveToTmpFileMatch</a>
  269                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
  270                                     </div>
  271                                                                 <div class="method-definition">
  272                                             <span class="method-result">bool</span>
  273                                         <a href="#addStreamToFileContentType" title="details" class="method-name">addStreamToFileContentType</a>
  274                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
  275                                     </div>
  276                                                                 <div class="method-definition">
  277                                             <span class="method-result">bool</span>
  278                                         <a href="#addURLFilterRule" title="details" class="method-name">addURLFilterRule</a>
  279                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
  280                                     </div>
  281                                                                 <div class="method-definition">
  282                                             <span class="method-result">bool</span>
  283                                         <a href="#addURLFollowRule" title="details" class="method-name">addURLFollowRule</a>
  284                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
  285                                     </div>
  286                                                                 <div class="method-definition">
  287                                             <span class="method-result">int</span>
  288                                         <a href="#checkForAbort" title="details" class="method-name">checkForAbort</a>
  289                                         ()
  290                                     </div>
  291                                                                 <div class="method-definition">
  292                                             <span class="method-result">void</span>
  293                                         <a href="#cleanup" title="details" class="method-name">cleanup</a>
  294                                         ()
  295                                     </div>
  296                                                                 <div class="method-definition">
  297                                             <span class="method-result">void</span>
  298                                         <a href="#createWorkingDirectory" title="details" class="method-name">createWorkingDirectory</a>
  299                                         ()
  300                                     </div>
  301                                                                 <div class="method-definition">
  302                                             <span class="method-result">void</span>
  303                                         <a href="#disableExtendedLinkInfo" title="details" class="method-name">disableExtendedLinkInfo</a>
  304                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$mode</span>)
  305                                     </div>
  306                                                                 <div class="method-definition">
  307                                             <span class="method-result">bool</span>
  308                                         <a href="#enableAggressiveLinkSearch" title="details" class="method-name">enableAggressiveLinkSearch</a>
  309                                             (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
  310                                     </div>
  311                                                                 <div class="method-definition">
  312                                             <span class="method-result">bool</span>
  313                                         <a href="#enableCookieHandling" title="details" class="method-name">enableCookieHandling</a>
  314                                             (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
  315                                     </div>
  316                                                                 <div class="method-definition">
  317                                             <span class="method-result">void</span>
  318                                         <a href="#enableResumption" title="details" class="method-name">enableResumption</a>
  319                                         ()
  320                                     </div>
  321                                                                 <div class="method-definition">
  322                                             <span class="method-result">int</span>
  323                                         <a href="#getCrawlerId" title="details" class="method-name">getCrawlerId</a>
  324                                         ()
  325                                     </div>
  326                                                                 <div class="method-definition">
  327                                             <span class="method-result"><a href="../phpcrawl/PHPCrawlerProcessReport.html">PHPCrawlerProcessReport</a></span>
  328                                         <a href="#getProcessReport" title="details" class="method-name">getProcessReport</a>
  329                                         ()
  330                                     </div>
  331                                                                 <div class="method-definition">
  332                                             <span class="method-result">void</span>
  333                                         <a href="#getReport" title="details" class="method-name">getReport</a>
  334                                         ()
  335                                     </div>
  336                                                                 <div class="method-definition">
  337                                             <span class="method-result">void</span>
  338                                         <a href="#go" title="details" class="method-name">go</a>
  339                                         ()
  340                                     </div>
  341                                                                 <div class="method-definition">
  342                                             <span class="method-result">void</span>
  343                                         <a href="#goMultiProcessed" title="details" class="method-name">goMultiProcessed</a>
  344                                             ([<span class="var-type">int</span>&nbsp;<span class="var-name">$process_count</span> = <span class="var-default">3</span>], [<span class="var-type">int</span>&nbsp;<span class="var-name">$multiprocess_mode</span> = <span class="var-default">1</span>])
  345                                     </div>
  346                                                                 <div class="method-definition">
  347                                             <span class="method-result">int</span>
  348                                         <a href="#handleDocumentInfo" title="details" class="method-name">handleDocumentInfo</a>
  349                                             (<span class="var-type"><a href="../phpcrawl/PHPCrawlerDocumentInfo.html">PHPCrawlerDocumentInfo</a></span>&nbsp;<span class="var-name">$PageInfo</span>)
  350                                     </div>
  351                                                                 <div class="method-definition">
  352                                             <span class="method-result">int</span>
  353                                         <a href="#handleHeaderInfo" title="details" class="method-name">handleHeaderInfo</a>
  354                                             (<span class="var-type"><a href="../phpcrawl/PHPCrawlerResponseHeader.html">PHPCrawlerResponseHeader</a></span>&nbsp;<span class="var-name">$header</span>)
  355                                     </div>
  356                                                                 <div class="method-definition">
  357                                             <span class="method-result">int</span>
  358                                         <a href="#handlePageData" title="details" class="method-name">handlePageData</a>
  359                                             (<span class="var-type">array</span>&nbsp;<span class="var-name">&$page_data</span>)
  360                                     </div>
  361                                                                 <div class="method-definition">
  362                                             <span class="method-result">void</span>
  363                                         <a href="#initChildProcess" title="details" class="method-name">initChildProcess</a>
  364                                         ()
  365                                     </div>
  366                                                                 <div class="method-definition">
  367                                             <span class="method-result">void</span>
  368                                         <a href="#initCrawlerProcess" title="details" class="method-name">initCrawlerProcess</a>
  369                                         ()
  370                                     </div>
  371                                                                 <div class="method-definition">
  372                                             <span class="method-result">void</span>
  373                                         <a href="#obeyNoFollowTags" title="details" class="method-name">obeyNoFollowTags</a>
  374                                             (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
  375                                     </div>
  376                                                                 <div class="method-definition">
  377                                             <span class="method-result">bool</span>
  378                                         <a href="#obeyRobotsTxt" title="details" class="method-name">obeyRobotsTxt</a>
  379                                             (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
  380                                     </div>
  381                                                                 <div class="method-definition">
  382                                             <span class="method-result">void</span>
  383                                         <a href="#processRobotsTxt" title="details" class="method-name">processRobotsTxt</a>
  384                                         ()
  385                                     </div>
  386                                                                 <div class="method-definition">
  387                                             <span class="method-result">bool</span>
  388                                         <a href="#processUrl" title="details" class="method-name">processUrl</a>
  389                                             (<span class="var-type"><a href="../phpcrawl/PHPCrawlerURLDescriptor.html">PHPCrawlerURLDescriptor</a></span>&nbsp;<span class="var-name">$UrlDescriptor</span>)
  390                                     </div>
  391                                                                 <div class="method-definition">
  392                                             <span class="method-result">void</span>
  393                                         <a href="#resume" title="details" class="method-name">resume</a>
  394                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$crawler_id</span>)
  395                                     </div>
  396                                                                 <div class="method-definition">
  397                                             <span class="method-result">void</span>
  398                                         <a href="#setAggressiveLinkExtraction" title="details" class="method-name">setAggressiveLinkExtraction</a>
  399                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$mode</span>)
  400                                     </div>
  401                                                                 <div class="method-definition">
  402                                             <span class="method-result">bool</span>
  403                                         <a href="#setConnectionTimeout" title="details" class="method-name">setConnectionTimeout</a>
  404                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$timeout</span>)
  405                                     </div>
  406                                                                 <div class="method-definition">
  407                                             <span class="method-result">bool</span>
  408                                         <a href="#setContentSizeLimit" title="details" class="method-name">setContentSizeLimit</a>
  409                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$bytes</span>)
  410                                     </div>
  411                                                                 <div class="method-definition">
  412                                             <span class="method-result">void</span>
  413                                         <a href="#setCookieHandling" title="details" class="method-name">setCookieHandling</a>
  414                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$mode</span>)
  415                                     </div>
  416                                                                 <div class="method-definition">
  417                                             <span class="method-result">bool</span>
  418                                         <a href="#setFollowMode" title="details" class="method-name">setFollowMode</a>
  419                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$follow_mode</span>)
  420                                     </div>
  421                                                                 <div class="method-definition">
  422                                             <span class="method-result">bool</span>
  423                                         <a href="#setFollowRedirects" title="details" class="method-name">setFollowRedirects</a>
  424                                             (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
  425                                     </div>
  426                                                                 <div class="method-definition">
  427                                             <span class="method-result">void</span>
  428                                         <a href="#setFollowRedirectsTillContent" title="details" class="method-name">setFollowRedirectsTillContent</a>
  429                                             (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
  430                                     </div>
  431                                                                 <div class="method-definition">
  432                                             <span class="method-result">void</span>
  433                                         <a href="#setLinkExtractionTags" title="details" class="method-name">setLinkExtractionTags</a>
  434                                             (<span class="var-type">array</span>&nbsp;<span class="var-name">$tag_array</span>)
  435                                     </div>
  436                                                                 <div class="method-definition">
  437                                             <span class="method-result">void</span>
  438                                         <a href="#setPageLimit" title="details" class="method-name">setPageLimit</a>
  439                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$limit</span>, [<span class="var-type">bool</span>&nbsp;<span class="var-name">$only_count_received_documents</span> = <span class="var-default">false</span>])
  440                                     </div>
  441                                                                 <div class="method-definition">
  442                                             <span class="method-result">bool</span>
  443                                         <a href="#setPort" title="details" class="method-name">setPort</a>
  444                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$port</span>)
  445                                     </div>
  446                                                                 <div class="method-definition">
  447                                             <span class="method-result">void</span>
  448                                         <a href="#setProxy" title="details" class="method-name">setProxy</a>
  449                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$proxy_host</span>, <span class="var-type">int</span>&nbsp;<span class="var-name">$proxy_port</span>, [<span class="var-type">string</span>&nbsp;<span class="var-name">$proxy_username</span> = <span class="var-default">null</span>], [<span class="var-type">string</span>&nbsp;<span class="var-name">$proxy_password</span> = <span class="var-default">null</span>])
  450                                     </div>
  451                                                                 <div class="method-definition">
  452                                             <span class="method-result">bool</span>
  453                                         <a href="#setStreamTimeout" title="details" class="method-name">setStreamTimeout</a>
  454                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$timeout</span>)
  455                                     </div>
  456                                                                 <div class="method-definition">
  457                                             <span class="method-result">void</span>
  458                                         <a href="#setTmpFile" title="details" class="method-name">setTmpFile</a>
  459                                             (<span class="var-type"></span>&nbsp;<span class="var-name">$tmp_file</span>)
  460                                     </div>
  461                                                                 <div class="method-definition">
  462                                             <span class="method-result">bool</span>
  463                                         <a href="#setTrafficLimit" title="details" class="method-name">setTrafficLimit</a>
  464                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$bytes</span>, [<span class="var-type">bool</span>&nbsp;<span class="var-name">$complete_requested_files</span> = <span class="var-default">true</span>])
  465                                     </div>
  466                                                                 <div class="method-definition">
  467                                             <span class="method-result">bool</span>
  468                                         <a href="#setURL" title="details" class="method-name">setURL</a>
  469                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$url</span>)
  470                                     </div>
  471                                                                 <div class="method-definition">
  472                                             <span class="method-result">bool</span>
  473                                         <a href="#setUrlCacheType" title="details" class="method-name">setUrlCacheType</a>
  474                                             (<span class="var-type">int</span>&nbsp;<span class="var-name">$url_cache_type</span>)
  475                                     </div>
  476                                                                 <div class="method-definition">
  477                                             <span class="method-result">void</span>
  478                                         <a href="#setUserAgentString" title="details" class="method-name">setUserAgentString</a>
  479                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$user_agent</span>)
  480                                     </div>
  481                                                                 <div class="method-definition">
  482                                             <span class="method-result">bool</span>
  483                                         <a href="#setWorkingDirectory" title="details" class="method-name">setWorkingDirectory</a>
  484                                             (<span class="var-type">string</span>&nbsp;<span class="var-name">$directory</span>)
  485                                     </div>
  486                                                                 <div class="method-definition">
  487                                             <span class="method-result">void</span>
  488                                         <a href="#starControllerProcessLoop" title="details" class="method-name">starControllerProcessLoop</a>
  489                                         ()
  490                                     </div>
  491                                                                 <div class="method-definition">
  492                                             <span class="method-result">void</span>
  493                                         <a href="#startChildProcessLoop" title="details" class="method-name">startChildProcessLoop</a>
  494                                         ()
  495                                     </div>
  496                                 </div>
  497         </div>
  498     </div>      
  499 
  500     <a name="sec-vars"></a>
  501     <div class="info-box">
  502         <div class="info-box-title">Variables</div>
  503         <div class="nav-bar">
  504             <a href="#sec-description">Description</a> |
  505                             <a href="#sec-descendents">Descendents</a> |
  506                                         <a href="#sec-var-summary">Vars</a> (<span class="disabled">details</span>)
  507                         
  508             
  509                                         | 
  510                                     <a href="#sec-method-summary">Methods</a> (<a href="#sec-methods">details</a>)
  511                             
  512                     </div>
  513         <div class="info-box-body">
  514             <a name="var$child_process_number" id="$child_process_number"><!-- --></A>
  515 <div class="oddrow">
  516 
  517     <div class="var-header">
  518         <span class="var-title">
  519             <span class="var-type">int</span>
  520             <span class="var-name">$child_process_number</span>
  521              = <span class="var-default"> null</span>           (line <span class="line-number">152</span>)
  522         </span>
  523     </div>
  524 
  525     <!-- ========== Info from phpDoc block ========= -->
  526 <p class="short-description">Number of child-process (NOT the PID!)</p>
  527     <ul class="tags">
  528                 <li><span class="field">access:</span> protected</li>
  529             </ul>
  530     
  531     
  532         
  533         
  534 
  535 </div>
  536 <a name="var$class_version" id="$class_version"><!-- --></A>
  537 <div class="evenrow">
  538 
  539     <div class="var-header">
  540         <span class="var-title">
  541             <span class="var-type">mixed</span>
  542             <span class="var-name">$class_version</span>
  543              = <span class="var-default"> &quot;0.81&quot;</span>           (line <span class="line-number">12</span>)
  544         </span>
  545     </div>
  546 
  547     <!-- ========== Info from phpDoc block ========= -->
  548     <ul class="tags">
  549                 <li><span class="field">access:</span> public</li>
  550             </ul>
  551     
  552     
  553         
  554         
  555 
  556 </div>
  557 <a name="var$CookieCache" id="$CookieCache"><!-- --></A>
  558 <div class="oddrow">
  559 
  560     <div class="var-header">
  561         <span class="var-title">
  562             <span class="var-type">PHPCrawlerCookieCache</span>
  563             <span class="var-name">$CookieCache</span>
  564                         (line <span class="line-number">33</span>)
  565         </span>
  566     </div>
  567 
  568     <!-- ========== Info from phpDoc block ========= -->
  569 <p class="short-description">The PHPCrawlerCookieCache-Object</p>
  570     <ul class="tags">
  571                 <li><span class="field">access:</span> protected</li>
  572             </ul>
  573     
  574     
  575         
  576         
  577 
  578 </div>
  579 <a name="var$cookie_handling_enabled" id="$cookie_handling_enabled"><!-- --></A>
  580 <div class="evenrow">
  581 
  582     <div class="var-header">
  583         <span class="var-title">
  584             <span class="var-type">bool</span>
  585             <span class="var-name">$cookie_handling_enabled</span>
  586              = <span class="var-default"> true</span>           (line <span class="line-number">98</span>)
  587         </span>
  588     </div>
  589 
  590     <!-- ========== Info from phpDoc block ========= -->
  591 <p class="short-description">Flag cookie-handling enabled/diabled</p>
  592     <ul class="tags">
  593                 <li><span class="field">access:</span> protected</li>
  594             </ul>
  595     
  596     
  597         
  598         
  599 
  600 </div>
  601 <a name="var$crawler_uniqid" id="$crawler_uniqid"><!-- --></A>
  602 <div class="oddrow">
  603 
  604     <div class="var-header">
  605         <span class="var-title">
  606             <span class="var-type">string</span>
  607             <span class="var-name">$crawler_uniqid</span>
  608              = <span class="var-default"> null</span>           (line <span class="line-number">129</span>)
  609         </span>
  610     </div>
  611 
  612     <!-- ========== Info from phpDoc block ========= -->
  613 <p class="short-description">UID of this instance of the crawler</p>
  614     <ul class="tags">
  615                 <li><span class="field">access:</span> protected</li>
  616             </ul>
  617     
  618     
  619         
  620         
  621 
  622 </div>
  623 <a name="var$DocumentInfoQueue" id="$DocumentInfoQueue"><!-- --></A>
  624 <div class="evenrow">
  625 
  626     <div class="var-header">
  627         <span class="var-title">
  628             <span class="var-type"><a href="../phpcrawl/PHPCrawlerDocumentInfoQueue.html">PHPCrawlerDocumentInfoQueue</a></span>
  629             <span class="var-name">$DocumentInfoQueue</span>
  630              = <span class="var-default"> null</span>           (line <span class="line-number">173</span>)
  631         </span>
  632     </div>
  633 
  634     <!-- ========== Info from phpDoc block ========= -->
  635 <p class="short-description">DocumentInfoQueue-object</p>
  636     <ul class="tags">
  637                 <li><span class="field">access:</span> protected</li>
  638             </ul>
  639     
  640     
  641         
  642         
  643 
  644 </div>
  645 <a name="var$document_limit" id="$document_limit"><!-- --></A>
  646 <div class="oddrow">
  647 
  648     <div class="var-header">
  649         <span class="var-title">
  650             <span class="var-type">int</span>
  651             <span class="var-name">$document_limit</span>
  652              = <span class="var-default"> 0</span>          (line <span class="line-number">77</span>)
  653         </span>
  654     </div>
  655 
  656     <!-- ========== Info from phpDoc block ========= -->
  657 <p class="short-description">Limit of documents to receive</p>
  658     <ul class="tags">
  659                 <li><span class="field">access:</span> protected</li>
  660             </ul>
  661     
  662     
  663         
  664         
  665 
  666 </div>
  667 <a name="var$follow_redirects_till_content" id="$follow_redirects_till_content"><!-- --></A>
  668 <div class="evenrow">
  669 
  670     <div class="var-header">
  671         <span class="var-title">
  672             <span class="var-type">mixed</span>
  673             <span class="var-name">$follow_redirects_till_content</span>
  674              = <span class="var-default"> true</span>           (line <span class="line-number">175</span>)
  675         </span>
  676     </div>
  677 
  678     <!-- ========== Info from phpDoc block ========= -->
  679     <ul class="tags">
  680                 <li><span class="field">access:</span> protected</li>
  681             </ul>
  682     
  683     
  684         
  685         
  686 
  687 </div>
  688 <a name="var$is_chlid_process" id="$is_chlid_process"><!-- --></A>
  689 <div class="oddrow">
  690 
  691     <div class="var-header">
  692         <span class="var-title">
  693             <span class="var-type">mixed</span>
  694             <span class="var-name">$is_chlid_process</span>
  695              = <span class="var-default"> false</span>          (line <span class="line-number">110</span>)
  696         </span>
  697     </div>
  698 
  699     <!-- ========== Info from phpDoc block ========= -->
  700 <p class="short-description">Flag indicating whether this instance is running in a child-process (if crawler runs multi-processed)</p>
  701     <ul class="tags">
  702                 <li><span class="field">access:</span> protected</li>
  703             </ul>
  704     
  705     
  706         
  707         
  708 
  709 </div>
  710 <a name="var$is_parent_process" id="$is_parent_process"><!-- --></A>
  711 <div class="evenrow">
  712 
  713     <div class="var-header">
  714         <span class="var-title">
  715             <span class="var-type">mixed</span>
  716             <span class="var-name">$is_parent_process</span>
  717              = <span class="var-default"> false</span>          (line <span class="line-number">115</span>)
  718         </span>
  719     </div>
  720 
  721     <!-- ========== Info from phpDoc block ========= -->
  722 <p class="short-description">Flag indicating whether this instance is running in the parent-process (if crawler runs multi-processed)</p>
  723     <ul class="tags">
  724                 <li><span class="field">access:</span> protected</li>
  725             </ul>
  726     
  727     
  728         
  729         
  730 
  731 </div>
  732 <a name="var$LinkCache" id="$LinkCache"><!-- --></A>
  733 <div class="oddrow">
  734 
  735     <div class="var-header">
  736         <span class="var-title">
  737             <span class="var-type">PHPCrawlerURLCache</span>
  738             <span class="var-name">$LinkCache</span>
  739                         (line <span class="line-number">26</span>)
  740         </span>
  741     </div>
  742 
  743     <!-- ========== Info from phpDoc block ========= -->
  744 <p class="short-description">The PHPCrawlerLinkCache-Object</p>
  745     <ul class="tags">
  746                 <li><span class="field">access:</span> public</li>
  747             </ul>
  748     
  749     
  750         
  751         
  752 
  753 </div>
  754 <a name="var$link_priority_array" id="$link_priority_array"><!-- --></A>
  755 <div class="evenrow">
  756 
  757     <div class="var-header">
  758         <span class="var-title">
  759             <span class="var-type">mixed</span>
  760             <span class="var-name">$link_priority_array</span>
  761              = <span class="var-default">array()</span>         (line <span class="line-number">145</span>)
  762         </span>
  763     </div>
  764 
  765     <!-- ========== Info from phpDoc block ========= -->
  766     <ul class="tags">
  767                 <li><span class="field">access:</span> protected</li>
  768             </ul>
  769     
  770     
  771         
  772         
  773 
  774 </div>
  775 <a name="var$multiprocess_mode" id="$multiprocess_mode"><!-- --></A>
  776 <div class="oddrow">
  777 
  778     <div class="var-header">
  779         <span class="var-title">
  780             <span class="var-type">int</span>
  781             <span class="var-name">$multiprocess_mode</span>
  782              = <span class="var-default"> 0</span>          (line <span class="line-number">166</span>)
  783         </span>
  784     </div>
  785 
  786     <!-- ========== Info from phpDoc block ========= -->
  787 <p class="short-description">Multiprocess-mode the crawler is runnung in.</p>
  788     <ul class="tags">
  789                 <li><span class="field">var:</span> One of the PHPCrawlerMultiProcessModes-constants</li>
  790                 <li><span class="field">access:</span> protected</li>
  791             </ul>
  792     
  793     
  794         
  795         
  796 
  797 </div>
  798 <a name="var$obey_robots_txt" id="$obey_robots_txt"><!-- --></A>
  799 <div class="evenrow">
  800 
  801     <div class="var-header">
  802         <span class="var-title">
  803             <span class="var-type">mixed</span>
  804             <span class="var-name">$obey_robots_txt</span>
  805              = <span class="var-default"> false</span>          (line <span class="line-number">70</span>)
  806         </span>
  807     </div>
  808 
  809     <!-- ========== Info from phpDoc block ========= -->
  810 <p class="short-description">Defines whether robots.txt-file should be obeyed</p>
  811     <ul class="tags">
  812                 <li><span class="field">access:</span> protected</li>
  813             </ul>
  814     
  815     
  816         
  817         
  818 
  819 </div>
  820 <a name="var$only_count_received_documents" id="$only_count_received_documents"><!-- --></A>
  821 <div class="oddrow">
  822 
  823     <div class="var-header">
  824         <span class="var-title">
  825             <span class="var-type">bool</span>
  826             <span class="var-name">$only_count_received_documents</span>
  827              = <span class="var-default"> true</span>           (line <span class="line-number">91</span>)
  828         </span>
  829     </div>
  830 
  831     <!-- ========== Info from phpDoc block ========= -->
  832 <p class="short-description">Defines if only documents that were received will be counted.</p>
  833     <ul class="tags">
  834                 <li><span class="field">access:</span> protected</li>
  835             </ul>
  836     
  837     
  838         
  839         
  840 
  841 </div>
  842 <a name="var$PageRequest" id="$PageRequest"><!-- --></A>
  843 <div class="evenrow">
  844 
  845     <div class="var-header">
  846         <span class="var-title">
  847             <span class="var-type"><a href="../phpcrawl/PHPCrawlerHTTPRequest.html">PHPCrawlerHTTPRequest</a></span>
  848             <span class="var-name">$PageRequest</span>
  849                         (line <span class="line-number">19</span>)
  850         </span>
  851     </div>
  852 
  853     <!-- ========== Info from phpDoc block ========= -->
  854 <p class="short-description">The PHPCrawlerHTTPRequest-Object</p>
  855     <ul class="tags">
  856                 <li><span class="field">access:</span> protected</li>
  857             </ul>
  858     
  859     
  860         
  861         
  862 
  863 </div>
  864 <a name="var$porcess_abort_reason" id="$porcess_abort_reason"><!-- --></A>
  865 <div class="oddrow">
  866 
  867     <div class="var-header">
  868         <span class="var-title">
  869             <span class="var-type">int</span>
  870             <span class="var-name">$porcess_abort_reason</span>
  871              = <span class="var-default"> null</span>           (line <span class="line-number">105</span>)
  872         </span>
  873     </div>
  874 
  875     <!-- ========== Info from phpDoc block ========= -->
  876 <p class="short-description">The reason why the process was aborted/finished.</p>
  877     <ul class="tags">
  878                 <li><span class="field">var:</span> One of the PHPCrawlerAbortReasons::ABORTREASON-constants.</li>
  879                 <li><span class="field">access:</span> protected</li>
  880             </ul>
  881     
  882     
  883         
  884         
  885 
  886 </div>
  887 <a name="var$ProcessCommunication" id="$ProcessCommunication"><!-- --></A>
  888 <div class="evenrow">
  889 
  890     <div class="var-header">
  891         <span class="var-title">
  892             <span class="var-type"><a href="../phpcrawl/PHPCrawlerProcessCommunication.html">PHPCrawlerProcessCommunication</a></span>
  893             <span class="var-name">$ProcessCommunication</span>
  894              = <span class="var-default"> null</span>           (line <span class="line-number">159</span>)
  895         </span>
  896     </div>
  897 
  898     <!-- ========== Info from phpDoc block ========= -->
  899 <p class="short-description">ProcessCommunication-object</p>
  900     <ul class="tags">
  901                 <li><span class="field">access:</span> protected</li>
  902             </ul>
  903     
  904     
  905         
  906         
  907 
  908 </div>
  909 <a name="var$resumtion_enabled" id="$resumtion_enabled"><!-- --></A>
  910 <div class="oddrow">
  911 
  912     <div class="var-header">
  913         <span class="var-title">
  914             <span class="var-type"><a href="../phpcrawl/PHPCrawlerDocumentInfoQueue.html">PHPCrawlerDocumentInfoQueue</a></span>
  915             <span class="var-name">$resumtion_enabled</span>
  916              = <span class="var-default"> false</span>          (line <span class="line-number">182</span>)
  917         </span>
  918     </div>
  919 
  920     <!-- ========== Info from phpDoc block ========= -->
  921 <p class="short-description">Flag indicating whether resumtion is activated</p>
  922     <ul class="tags">
  923                 <li><span class="field">access:</span> protected</li>
  924             </ul>
  925     
  926     
  927         
  928         
  929 
  930 </div>
  931 <a name="var$RobotsTxtParser" id="$RobotsTxtParser"><!-- --></A>
  932 <div class="evenrow">
  933 
  934     <div class="var-header">
  935         <span class="var-title">
  936             <span class="var-type"><a href="../phpcrawl/PHPCrawlerRobotsTxtParser.html">PHPCrawlerRobotsTxtParser</a></span>
  937             <span class="var-name">$RobotsTxtParser</span>
  938                         (line <span class="line-number">47</span>)
  939         </span>
  940     </div>
  941 
  942     <!-- ========== Info from phpDoc block ========= -->
  943 <p class="short-description">The RobotsTxtParser-Object</p>
  944     <ul class="tags">
  945                 <li><span class="field">access:</span> protected</li>
  946             </ul>
  947     
  948     
  949         
  950         
  951 
  952 </div>
  953 <a name="var$starting_url" id="$starting_url"><!-- --></A>
  954 <div class="oddrow">
  955 
  956     <div class="var-header">
  957         <span class="var-title">
  958             <span class="var-type">string</span>
  959             <span class="var-name">$starting_url</span>
  960              = <span class="var-default"> &quot;&quot;</span>           (line <span class="line-number">63</span>)
  961         </span>
  962     </div>
  963 
  964     <!-- ========== Info from phpDoc block ========= -->
  965 <p class="short-description">The URL the crawler should start with.</p>
  966 <p class="description"><p>The URL is full qualified and normalized.</p></p>
  967     <ul class="tags">
  968                 <li><span class="field">access:</span> protected</li>
  969             </ul>
  970     
  971     
  972         
  973         
  974 
  975 </div>
  976 <a name="var$traffic_limit" id="$traffic_limit"><!-- --></A>
  977 <div class="evenrow">
  978 
  979     <div class="var-header">
  980         <span class="var-title">
  981             <span class="var-type">int</span>
  982             <span class="var-name">$traffic_limit</span>
  983              = <span class="var-default"> 0</span>          (line <span class="line-number">84</span>)
  984         </span>
  985     </div>
  986 
  987     <!-- ========== Info from phpDoc block ========= -->
  988 <p class="short-description">Limit of bytes to receive</p>
  989     <ul class="tags">
  990                 <li><span class="field">var:</span> The limit in bytes</li>
  991                 <li><span class="field">access:</span> protected</li>
  992             </ul>
  993     
  994     
  995         
  996         
  997 
  998 </div>
  999 <a name="var$urlcache_purged" id="$urlcache_purged"><!-- --></A>
 1000 <div class="oddrow">
 1001 
 1002     <div class="var-header">
 1003         <span class="var-title">
 1004             <span class="var-type">mixed</span>
 1005             <span class="var-name">$urlcache_purged</span>
 1006              = <span class="var-default"> false</span>          (line <span class="line-number">187</span>)
 1007         </span>
 1008     </div>
 1009 
 1010     <!-- ========== Info from phpDoc block ========= -->
 1011 <p class="short-description">Flag indicating whether the URL-cahce was purged at the beginning of a crawling-process</p>
 1012     <ul class="tags">
 1013                 <li><span class="field">access:</span> protected</li>
 1014             </ul>
 1015     
 1016     
 1017         
 1018         
 1019 
 1020 </div>
 1021 <a name="var$UrlFilter" id="$UrlFilter"><!-- --></A>
 1022 <div class="evenrow">
 1023 
 1024     <div class="var-header">
 1025         <span class="var-title">
 1026             <span class="var-type"><a href="../phpcrawl/PHPCrawlerURLFilter.html">PHPCrawlerURLFilter</a></span>
 1027             <span class="var-name">$UrlFilter</span>
 1028                         (line <span class="line-number">40</span>)
 1029         </span>
 1030     </div>
 1031 
 1032     <!-- ========== Info from phpDoc block ========= -->
 1033 <p class="short-description">The UrlFilter-Object</p>
 1034     <ul class="tags">
 1035                 <li><span class="field">access:</span> protected</li>
 1036             </ul>
 1037     
 1038     
 1039         
 1040         
 1041 
 1042 </div>
 1043 <a name="var$url_cache_type" id="$url_cache_type"><!-- --></A>
 1044 <div class="oddrow">
 1045 
 1046     <div class="var-header">
 1047         <span class="var-title">
 1048             <span class="var-type">int</span>
 1049             <span class="var-name">$url_cache_type</span>
 1050              = <span class="var-default"> 1</span>          (line <span class="line-number">122</span>)
 1051         </span>
 1052     </div>
 1053 
 1054     <!-- ========== Info from phpDoc block ========= -->
 1055 <p class="short-description">URl cache-type.</p>
 1056     <ul class="tags">
 1057                 <li><span class="field">var:</span> One of the PHPCrawlerUrlCacheTypes::URLCACHE..-constants.</li>
 1058                 <li><span class="field">access:</span> protected</li>
 1059             </ul>
 1060     
 1061     
 1062         
 1063         
 1064 
 1065 </div>
 1066 <a name="var$UserSendDataCache" id="$UserSendDataCache"><!-- --></A>
 1067 <div class="evenrow">
 1068 
 1069     <div class="var-header">
 1070         <span class="var-title">
 1071             <span class="var-type"><a href="../phpcrawl/PHPCrawlerUserSendDataCache.html">PHPCrawlerUserSendDataCache</a></span>
 1072             <span class="var-name">$UserSendDataCache</span>
 1073                         (line <span class="line-number">54</span>)
 1074         </span>
 1075     </div>
 1076 
 1077     <!-- ========== Info from phpDoc block ========= -->
 1078 <p class="short-description">UserSendDataCahce-object.</p>
 1079     <ul class="tags">
 1080                 <li><span class="field">access:</span> protected</li>
 1081             </ul>
 1082     
 1083     
 1084         
 1085         
 1086 
 1087 </div>
 1088 <a name="var$working_base_directory" id="$working_base_directory"><!-- --></A>
 1089 <div class="oddrow">
 1090 
 1091     <div class="var-header">
 1092         <span class="var-title">
 1093             <span class="var-type">string</span>
 1094             <span class="var-name">$working_base_directory</span>
 1095                         (line <span class="line-number">136</span>)
 1096         </span>
 1097     </div>
 1098 
 1099     <!-- ========== Info from phpDoc block ========= -->
 1100 <p class="short-description">Base-directory for temporary directories</p>
 1101     <ul class="tags">
 1102                 <li><span class="field">access:</span> protected</li>
 1103             </ul>
 1104     
 1105     
 1106         
 1107         
 1108 
 1109 </div>
 1110 <a name="var$working_directory" id="$working_directory"><!-- --></A>
 1111 <div class="evenrow">
 1112 
 1113     <div class="var-header">
 1114         <span class="var-title">
 1115             <span class="var-type">string</span>
 1116             <span class="var-name">$working_directory</span>
 1117              = <span class="var-default"> null</span>           (line <span class="line-number">143</span>)
 1118         </span>
 1119     </div>
 1120 
 1121     <!-- ========== Info from phpDoc block ========= -->
 1122 <p class="short-description">Complete path to the temporary directory</p>
 1123     <ul class="tags">
 1124                 <li><span class="field">access:</span> protected</li>
 1125             </ul>
 1126     
 1127     
 1128         
 1129         
 1130 
 1131 </div>
 1132                         
 1133         </div>
 1134     </div>
 1135     
 1136     <a name="sec-methods"></a>
 1137     <div class="info-box">
 1138         <div class="info-box-title">Methods</div>
 1139         <div class="nav-bar">
 1140             <a href="#sec-description">Description</a> |
 1141                             <a href="#sec-descendents">Descendents</a> |
 1142                                                             <a href="#sec-var-summary">Vars</a> (<a href="#sec-vars">details</a>)
 1143                                                                     <a href="#sec-method-summary">Methods</a> (<span class="disabled">details</span>)
 1144                         
 1145         </div>
 1146         <div class="info-box-body">
 1147             <A NAME='method_detail'></A>
 1148 
 1149 <a name="method__construct" id="__construct"><!-- --></a>
 1150 <div class="oddrow">
 1151     
 1152     <div class="method-header">
 1153         <span class="method-title">Constructor __construct</span> (line <span class="line-number">192</span>)
 1154     </div> 
 1155     
 1156     <!-- ========== Info from phpDoc block ========= -->
 1157 <p class="short-description">Initiates a new crawler.</p>
 1158     <ul class="tags">
 1159                 <li><span class="field">access:</span> public</li>
 1160             </ul>
 1161     
 1162     <div class="method-signature">
 1163         <span class="method-result">PHPCrawler</span>
 1164         <span class="method-name">
 1165             __construct
 1166         </span>
 1167                 ()
 1168             </div>
 1169     
 1170         
 1171             
 1172     </div>
 1173 <a name="methodaddBasicAuthentication" id="addBasicAuthentication"><!-- --></a>
 1174 <div class="evenrow">
 1175     
 1176     <div class="method-header">
 1177         <span class="method-title">addBasicAuthentication</span> (line <span class="line-number">1549</span>)
 1178     </div> 
 1179     
 1180     <!-- ========== Info from phpDoc block ========= -->
 1181 <p class="short-description">Adds a basic-authentication (username and password) to the list of basic authentications that will be send with requests.</p>
 1182 <p class="description"><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddBasicAuthentication">addBasicAuthentication</a><span class="src-sym">(</span><span class="src-str">&quot;#http://www\.foo\.com/protected_path/#&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;myusername&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;mypasswd&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1183 </ol></div>  This lets the crawler send the authentication &quot;myusername/mypasswd&quot; with every request for content placed  in the path &quot;protected_path&quot; on the host &quot;www.foo.com&quot;.</p></p>
 1184     <ul class="tags">
 1185                 <li><span class="field">section:</span> 10 Other settings</li>
 1186                 <li><span class="field">access:</span> public</li>
 1187             </ul>
 1188     
 1189     <div class="method-signature">
 1190         <span class="method-result">bool</span>
 1191         <span class="method-name">
 1192             addBasicAuthentication
 1193         </span>
 1194                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$url_regex</span>, <span class="var-type">string</span>&nbsp;<span class="var-name">$username</span>, <span class="var-type">string</span>&nbsp;<span class="var-name">$password</span>)
 1195             </div>
 1196     
 1197             <ul class="parameters">
 1198                     <li>
 1199                 <span class="var-type">string</span>
 1200                 <span class="var-name">$url_regex</span><span class="var-description">: Regular-expression defining the URL(s) the authentication should be send to.</span>         </li>
 1201                     <li>
 1202                 <span class="var-type">string</span>
 1203                 <span class="var-name">$username</span><span class="var-description">: The username</span>          </li>
 1204                     <li>
 1205                 <span class="var-type">string</span>
 1206                 <span class="var-name">$password</span><span class="var-description">: The password</span>          </li>
 1207                 </ul>
 1208         
 1209             
 1210     </div>
 1211 <a name="methodaddContentTypeReceiveRule" id="addContentTypeReceiveRule"><!-- --></a>
 1212 <div class="oddrow">
 1213     
 1214     <div class="method-header">
 1215         <span class="method-title">addContentTypeReceiveRule</span> (line <span class="line-number">1182</span>)
 1216     </div> 
 1217     
 1218     <!-- ========== Info from phpDoc block ========= -->
 1219 <p class="short-description">Adds a rule to the list of rules that decides which pages or files - regarding their content-type - should be received</p>
 1220 <p class="description"><p>After receiving the HTTP-header of a followed URL, the crawler check's - based on the given rules - whether the content of that URL  should be received.  If no rule matches with the content-type of the document, the content won't be received.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddContentTypeReceiveRule">addContentTypeReceiveRule</a><span class="src-sym">(</span><span class="src-str">&quot;#text/html#&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1221 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddContentTypeReceiveRule">addContentTypeReceiveRule</a><span class="src-sym">(</span><span class="src-str">&quot;#text/css#&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1222 </ol></div>  This rules lets the crawler receive the content/source of pages with the Content-Type &quot;text/html&quot; AND &quot;text/css&quot;.  Other pages or files with different content-types (e.g. &quot;image/gif&quot;) won't be received (if this is the only rule added to the list).</p><p><strong>IMPORTANT:</strong> By default, if no rule was added to the list, the crawler receives every content.</p><p>Note: To reduce the traffic the crawler will cause, you only should add content-types of pages/files you really want to receive.  But at least you should add the content-type &quot;text/html&quot; to this list, otherwise the crawler can't find any links.</p></p>
 1223     <ul class="tags">
 1224                 <li><span class="field">return:</span> TRUE if the rule was added to the list.               FALSE if the given regex is not valid.</li>
 1225                 <li><span class="field">section:</span> 2 Filter-settings</li>
 1226                 <li><span class="field">access:</span> public</li>
 1227             </ul>
 1228     
 1229     <div class="method-signature">
 1230         <span class="method-result">bool</span>
 1231         <span class="method-name">
 1232             addContentTypeReceiveRule
 1233         </span>
 1234                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
 1235             </div>
 1236     
 1237             <ul class="parameters">
 1238                     <li>
 1239                 <span class="var-type">string</span>
 1240                 <span class="var-name">$regex</span><span class="var-description">: The rule as a regular-expression</span>         </li>
 1241                 </ul>
 1242         
 1243             
 1244     </div>
 1245 <a name="methodaddFollowMatch" id="addFollowMatch"><!-- --></a>
 1246 <div class="evenrow">
 1247     
 1248     <div class="method-header">
 1249         <span class="method-title">addFollowMatch</span> (line <span class="line-number">1255</span>)
 1250     </div> 
 1251     
 1252     <!-- ========== Info from phpDoc block ========= -->
 1253 <p class="short-description">Alias for addURLFollowRule().</p>
 1254     <ul class="tags">
 1255                 <li><span class="field">deprecated:</span> </li>
 1256                 <li><span class="field">section:</span> 11 Deprecated</li>
 1257                 <li><span class="field">access:</span> public</li>
 1258             </ul>
 1259     
 1260     <div class="method-signature">
 1261         <span class="method-result">void</span>
 1262         <span class="method-name">
 1263             addFollowMatch
 1264         </span>
 1265                     (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
 1266             </div>
 1267     
 1268             <ul class="parameters">
 1269                     <li>
 1270                 <span class="var-type"></span>
 1271                 <span class="var-name">$regex</span>            </li>
 1272                 </ul>
 1273         
 1274             
 1275     </div>
 1276 <a name="methodaddLinkExtractionTags" id="addLinkExtractionTags"><!-- --></a>
 1277 <div class="oddrow">
 1278     
 1279     <div class="method-header">
 1280         <span class="method-title">addLinkExtractionTags</span> (line <span class="line-number">1525</span>)
 1281     </div> 
 1282     
 1283     <!-- ========== Info from phpDoc block ========= -->
 1284 <p class="short-description">Sets the list of html-tags from which links should be extracted from.</p>
 1285 <p class="description"><p>This method was named wrong in previous versions of phpcrawl.  It does not ADD tags, it SETS the tags from which links should be extracted from.</p><p>Example  <div class="src-code"><ol><li><div class="src-line"><span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddLinkExtractionTags">addLinkExtractionTags</a><span class="src-sym">(</span><span class="src-str">&quot;href&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;src&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1286 </ol></div></p></p>
 1287     <ul class="tags">
 1288                 <li><span class="field">deprecated:</span> Please use setLinkExtractionTags()</li>
 1289                 <li><span class="field">section:</span> 11 Deprecated</li>
 1290                 <li><span class="field">access:</span> public</li>
 1291             </ul>
 1292     
 1293     <div class="method-signature">
 1294         <span class="method-result">void</span>
 1295         <span class="method-name">
 1296             addLinkExtractionTags
 1297         </span>
 1298                 ()
 1299             </div>
 1300     
 1301         
 1302             
 1303     </div>
 1304 <a name="methodaddLinkPriority" id="addLinkPriority"><!-- --></a>
 1305 <div class="evenrow">
 1306     
 1307     <div class="method-header">
 1308         <span class="method-title">addLinkPriority</span> (line <span class="line-number">1073</span>)
 1309     </div> 
 1310     
 1311     <!-- ========== Info from phpDoc block ========= -->
 1312 <p class="short-description">Adds a regular expression togehter with a priority-level to the list of rules that decide what links should be prefered.</p>
 1313 <p class="description"><p>Links/URLs that match an expression with a high priority-level will be followed before links with a lower level.  All links that don't match with any of the given rules will get the level 0 (lowest level) automatically.</p><p>The level can be any positive integer.</p><p><strong>Example:</strong></p><p>Telling the crawler to follow links that contain the string &quot;forum&quot; before links that contain &quot;.gif&quot; before all other found links.  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddLinkPriority">addLinkPriority</a><span class="src-sym">(</span><span class="src-str">&quot;/forum/&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-num">10</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1314 <li><div class="src-line">&nbsp;<span class="src-var">$cralwer</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddLinkPriority">addLinkPriority</a><span class="src-sym">(</span><span class="src-str">&quot;/\.gif/&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-num">5</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1315 </ol></div></p></p>
 1316     <ul class="tags">
 1317                 <li><span class="field">return:</span> TRUE if a valid preg-pattern is given as argument and was succsessfully added, otherwise it returns FALSE.</li>
 1318                 <li><span class="field">section:</span> 10 Other settings</li>
 1319             </ul>
 1320     
 1321     <div class="method-signature">
 1322         <span class="method-result">bool</span>
 1323         <span class="method-name">
 1324             addLinkPriority
 1325         </span>
 1326                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>, <span class="var-type">int</span>&nbsp;<span class="var-name">$level</span>)
 1327             </div>
 1328     
 1329             <ul class="parameters">
 1330                     <li>
 1331                 <span class="var-type">string</span>
 1332                 <span class="var-name">$regex</span><span class="var-description">: Regular expression definig the rule</span>          </li>
 1333                     <li>
 1334                 <span class="var-type">int</span>
 1335                 <span class="var-name">$level</span><span class="var-description">: The priority-level</span>           </li>
 1336                 </ul>
 1337         
 1338             
 1339     </div>
 1340 <a name="methodaddLinkSearchContentType" id="addLinkSearchContentType"><!-- --></a>
 1341 <div class="oddrow">
 1342     
 1343     <div class="method-header">
 1344         <span class="method-title">addLinkSearchContentType</span> (line <span class="line-number">1700</span>)
 1345     </div> 
 1346     
 1347     <!-- ========== Info from phpDoc block ========= -->
 1348 <p class="short-description">Adds a rule to the list of rules that decide in what kind of documents the crawler  should search for links in (regarding their content-type)</p>
 1349 <p class="description"><p>By default the crawler ONLY searches for links in documents of type &quot;text/html&quot;.  Use this method to add one or more other content-types the crawler should check for links.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddLinkSearchContentType">addLinkSearchContentType</a><span class="src-sym">(</span><span class="src-str">&quot;#text/css#&nbsp;i&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1350 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddLinkSearchContentType">addLinkSearchContentType</a><span class="src-sym">(</span><span class="src-str">&quot;#text/xml#&nbsp;i&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1351 </ol></div>  These rules let the crawler search for links in HTML-, CSS- ans XML-documents.</p><p><strong>Please note:</strong> It is NOT recommended to let the crawler checkfor links in EVERY document-  type! This could slow down the crawling-process dramatically (e.g. if the crawler receives large  binary-files like images and tries to find links in them).</p></p>
 1352     <ul class="tags">
 1353                 <li><span class="field">return:</span> TRUE if the rule was successfully added</li>
 1354                 <li><span class="field">section:</span> 6 Linkfinding settings</li>
 1355                 <li><span class="field">access:</span> public</li>
 1356             </ul>
 1357     
 1358     <div class="method-signature">
 1359         <span class="method-result">bool</span>
 1360         <span class="method-name">
 1361             addLinkSearchContentType
 1362         </span>
 1363                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
 1364             </div>
 1365     
 1366             <ul class="parameters">
 1367                     <li>
 1368                 <span class="var-type">string</span>
 1369                 <span class="var-name">$regex</span><span class="var-description">: Regular-expression defining the rule</span>         </li>
 1370                 </ul>
 1371         
 1372             
 1373     </div>
 1374 <a name="methodaddNonFollowMatch" id="addNonFollowMatch"><!-- --></a>
 1375 <div class="evenrow">
 1376     
 1377     <div class="method-header">
 1378         <span class="method-title">addNonFollowMatch</span> (line <span class="line-number">1267</span>)
 1379     </div> 
 1380     
 1381     <!-- ========== Info from phpDoc block ========= -->
 1382 <p class="short-description">Alias for addURLFilterRule().</p>
 1383     <ul class="tags">
 1384                 <li><span class="field">deprecated:</span> </li>
 1385                 <li><span class="field">section:</span> 11 Deprecated</li>
 1386                 <li><span class="field">access:</span> public</li>
 1387             </ul>
 1388     
 1389     <div class="method-signature">
 1390         <span class="method-result">void</span>
 1391         <span class="method-name">
 1392             addNonFollowMatch
 1393         </span>
 1394                     (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
 1395             </div>
 1396     
 1397             <ul class="parameters">
 1398                     <li>
 1399                 <span class="var-type"></span>
 1400                 <span class="var-name">$regex</span>            </li>
 1401                 </ul>
 1402         
 1403             
 1404     </div>
 1405 <a name="methodaddPostData" id="addPostData"><!-- --></a>
 1406 <div class="oddrow">
 1407     
 1408     <div class="method-header">
 1409         <span class="method-title">addPostData</span> (line <span class="line-number">1781</span>)
 1410     </div> 
 1411     
 1412     <!-- ========== Info from phpDoc block ========= -->
 1413 <p class="short-description">Adds post-data together with an URL-rule to the list of post-data to send with requests.</p>
 1414 <p class="description"><p>Example  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$post_data&nbsp;</span>=&nbsp;<span class="src-key">array</span><span class="src-sym">(</span><span class="src-str">&quot;username&quot;&nbsp;</span>=&gt;&nbsp;<span class="src-str">&quot;me&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;password&quot;&nbsp;</span>=&gt;&nbsp;<span class="src-str">&quot;my_password&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;action&quot;&nbsp;</span>=&gt;&nbsp;<span class="src-str">&quot;do_login&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1415 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddPostData">addPostData</a><span class="src-sym">(</span><span class="src-str">&quot;#http://www\.foo\.com/login.php#&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-var">$post_data</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1416 </ol></div>  This example sends the post-values &quot;username=me&quot;, &quot;password=my_password&quot; and &quot;action=do_login&quot; to the URL  http://www.foo.com/login.php</p></p>
 1417     <ul class="tags">
 1418                 <li><span class="field">section:</span> 10 Other settings</li>
 1419                 <li><span class="field">access:</span> public</li>
 1420             </ul>
 1421     
 1422     <div class="method-signature">
 1423         <span class="method-result">bool</span>
 1424         <span class="method-name">
 1425             addPostData
 1426         </span>
 1427                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$url_regex</span>, <span class="var-type">array</span>&nbsp;<span class="var-name">$post_data_array</span>)
 1428             </div>
 1429     
 1430             <ul class="parameters">
 1431                     <li>
 1432                 <span class="var-type">string</span>
 1433                 <span class="var-name">$url_regex</span><span class="var-description">: Regular expression defining the URL(s) the post-data should be send to.</span>          </li>
 1434                     <li>
 1435                 <span class="var-type">array</span>
 1436                 <span class="var-name">$post_data_array</span><span class="var-description">: Post-data-array, the array-keys are the post-data-keys, the array-values the post-values.                                 (like array(&quot;post_key1&quot; =&gt; &quot;post_value1&quot;, &quot;post_key2&quot; =&gt; &quot;post_value2&quot;)</span>            </li>
 1437                 </ul>
 1438         
 1439             
 1440     </div>
 1441 <a name="methodaddReceiveContentType" id="addReceiveContentType"><!-- --></a>
 1442 <div class="evenrow">
 1443     
 1444     <div class="method-header">
 1445         <span class="method-title">addReceiveContentType</span> (line <span class="line-number">1194</span>)
 1446     </div> 
 1447     
 1448     <!-- ========== Info from phpDoc block ========= -->
 1449 <p class="short-description">Alias for addContentTypeReceiveRule().</p>
 1450     <ul class="tags">
 1451                 <li><span class="field">deprecated:</span> </li>
 1452                 <li><span class="field">section:</span> 11 Deprecated</li>
 1453                 <li><span class="field">access:</span> public</li>
 1454             </ul>
 1455     
 1456     <div class="method-signature">
 1457         <span class="method-result">void</span>
 1458         <span class="method-name">
 1459             addReceiveContentType
 1460         </span>
 1461                     (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
 1462             </div>
 1463     
 1464             <ul class="parameters">
 1465                     <li>
 1466                 <span class="var-type"></span>
 1467                 <span class="var-name">$regex</span>            </li>
 1468                 </ul>
 1469         
 1470             
 1471     </div>
 1472 <a name="methodaddReceiveToMemoryMatch" id="addReceiveToMemoryMatch"><!-- --></a>
 1473 <div class="oddrow">
 1474     
 1475     <div class="method-header">
 1476         <span class="method-title">addReceiveToMemoryMatch</span> (line <span class="line-number">1363</span>)
 1477     </div> 
 1478     
 1479     <!-- ========== Info from phpDoc block ========= -->
 1480 <p class="short-description">Has no function anymore!</p>
 1481 <p class="description"><p>This method was redundant, please use addStreamToFileContentType().  It just still exists because of compatibility-reasons.</p></p>
 1482     <ul class="tags">
 1483                 <li><span class="field">deprecated:</span> This method has no function anymore since v 0.8.</li>
 1484                 <li><span class="field">section:</span> 11 Deprecated</li>
 1485                 <li><span class="field">access:</span> public</li>
 1486             </ul>
 1487     
 1488     <div class="method-signature">
 1489         <span class="method-result">void</span>
 1490         <span class="method-name">
 1491             addReceiveToMemoryMatch
 1492         </span>
 1493                     (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
 1494             </div>
 1495     
 1496             <ul class="parameters">
 1497                     <li>
 1498                 <span class="var-type"></span>
 1499                 <span class="var-name">$regex</span>            </li>
 1500                 </ul>
 1501         
 1502             
 1503     </div>
 1504 <a name="methodaddReceiveToTmpFileMatch" id="addReceiveToTmpFileMatch"><!-- --></a>
 1505 <div class="evenrow">
 1506     
 1507     <div class="method-header">
 1508         <span class="method-title">addReceiveToTmpFileMatch</span> (line <span class="line-number">1349</span>)
 1509     </div> 
 1510     
 1511     <!-- ========== Info from phpDoc block ========= -->
 1512 <p class="short-description">Alias for addStreamToFileContentType().</p>
 1513     <ul class="tags">
 1514                 <li><span class="field">deprecated:</span> </li>
 1515                 <li><span class="field">section:</span> 11 Deprecated</li>
 1516                 <li><span class="field">access:</span> public</li>
 1517             </ul>
 1518     
 1519     <div class="method-signature">
 1520         <span class="method-result">void</span>
 1521         <span class="method-name">
 1522             addReceiveToTmpFileMatch
 1523         </span>
 1524                     (<span class="var-type"></span>&nbsp;<span class="var-name">$regex</span>)
 1525             </div>
 1526     
 1527             <ul class="parameters">
 1528                     <li>
 1529                 <span class="var-type"></span>
 1530                 <span class="var-name">$regex</span>            </li>
 1531                 </ul>
 1532         
 1533             
 1534     </div>
 1535 <a name="methodaddStreamToFileContentType" id="addStreamToFileContentType"><!-- --></a>
 1536 <div class="oddrow">
 1537     
 1538     <div class="method-header">
 1539         <span class="method-title">addStreamToFileContentType</span> (line <span class="line-number">1300</span>)
 1540     </div> 
 1541     
 1542     <!-- ========== Info from phpDoc block ========= -->
 1543 <p class="short-description">Adds a rule to the list of rules that decides what types of content should be streamed diretly to a temporary file.</p>
 1544 <p class="description"><p>If a content-type of a page or file matches with one of these rules, the content will be streamed directly into a  temporary file without claiming local RAM.</p><p>It's recommendend to add all content-types of files that may be of bigger size to prevent memory-overflows.  By default the crawler will receive every content to memory!</p><p>The content/source of pages and files that were streamed to file are not accessible directly within the overidden method  <a href="../phpcrawl/PHPCrawler.html#methodhandleDocumentInfo">handleDocumentInfo()</a>, instead you get information about the file the content was stored in.  (see properties PHPCrawlerDocumentInfo::received_to_file and PHPCrawlerDocumentInfo::content_tmp_file).</p><p>Please note that this setting doesn't effect the link-finding results, also file-streams will be checked for links.</p><p>A common setup may look like this example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;Basically&nbsp;let&nbsp;the&nbsp;crawler&nbsp;receive&nbsp;every&nbsp;content&nbsp;(default-setting)</span></div></li>
 1545 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddReceiveContentType">addReceiveContentType</a><span class="src-sym">(</span><span class="src-str">&quot;##&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1546 <li><div class="src-line">&nbsp;</div></li>
 1547 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;Tell&nbsp;the&nbsp;crawler&nbsp;to&nbsp;stream&nbsp;everything&nbsp;but&nbsp;&quot;text/html&quot;-documents&nbsp;to&nbsp;a&nbsp;tmp-file</span></div></li>
 1548 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddStreamToFileContentType">addStreamToFileContentType</a><span class="src-sym">(</span><span class="src-str">&quot;#^((?!text/html).)*$#&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1549 </ol></div></p></p>
 1550     <ul class="tags">
 1551                 <li><span class="field">return:</span> TRUE if the rule was added to the list and the regex is valid.</li>
 1552                 <li><span class="field">section:</span> 10 Other settings</li>
 1553                 <li><span class="field">access:</span> public</li>
 1554             </ul>
 1555     
 1556     <div class="method-signature">
 1557         <span class="method-result">bool</span>
 1558         <span class="method-name">
 1559             addStreamToFileContentType
 1560         </span>
 1561                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
 1562             </div>
 1563     
 1564             <ul class="parameters">
 1565                     <li>
 1566                 <span class="var-type">string</span>
 1567                 <span class="var-name">$regex</span><span class="var-description">: The rule as a regular-expression</span>         </li>
 1568                 </ul>
 1569         
 1570             
 1571     </div>
 1572 <a name="methodaddURLFilterRule" id="addURLFilterRule"><!-- --></a>
 1573 <div class="evenrow">
 1574     
 1575     <div class="method-header">
 1576         <span class="method-title">addURLFilterRule</span> (line <span class="line-number">1243</span>)
 1577     </div> 
 1578     
 1579     <!-- ========== Info from phpDoc block ========= -->
 1580 <p class="short-description">Adds a rule to the list of rules that decide which URLs found on a page should be ignored by the crawler.</p>
 1581 <p class="description"><p>If the crawler finds an URL and this URL matches with one of the given regular-expressions, the crawler  will ignore this URL and won't follow it.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddURLFilterRule">addURLFilterRule</a><span class="src-sym">(</span><span class="src-str">&quot;#(jpg|jpeg|gif|png|bmp)$#&nbsp;i&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1582 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddURLFilterRule">addURLFilterRule</a><span class="src-sym">(</span><span class="src-str">&quot;#(css|js)$#&nbsp;i&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1583 </ol></div>  These rules let the crawler ignore URLs that end with &quot;jpg&quot;, &quot;jpeg&quot;, &quot;gif&quot;, ..., &quot;css&quot;  and &quot;js&quot;.</p></p>
 1584     <ul class="tags">
 1585                 <li><span class="field">return:</span> TRUE if the regex is valid and the rule was added to the list, otherwise FALSE.</li>
 1586                 <li><span class="field">section:</span> 2 Filter-settings</li>
 1587                 <li><span class="field">access:</span> public</li>
 1588             </ul>
 1589     
 1590     <div class="method-signature">
 1591         <span class="method-result">bool</span>
 1592         <span class="method-name">
 1593             addURLFilterRule
 1594         </span>
 1595                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
 1596             </div>
 1597     
 1598             <ul class="parameters">
 1599                     <li>
 1600                 <span class="var-type">string</span>
 1601                 <span class="var-name">$regex</span><span class="var-description">: Regular-expression defining the rule</span>         </li>
 1602                 </ul>
 1603         
 1604             
 1605     </div>
 1606 <a name="methodaddURLFollowRule" id="addURLFollowRule"><!-- --></a>
 1607 <div class="oddrow">
 1608     
 1609     <div class="method-header">
 1610         <span class="method-title">addURLFollowRule</span> (line <span class="line-number">1220</span>)
 1611     </div> 
 1612     
 1613     <!-- ========== Info from phpDoc block ========= -->
 1614 <p class="short-description">Adds a rule to the list of rules that decide which URLs found on a page should be followd explicitly.</p>
 1615 <p class="description"><p>If the crawler finds an URL and this URL doesn't match with any of the given regular-expressions, the crawler  will ignore this URL and won't follow it.</p><p>NOTE: By default and if no rule was added to this list, the crawler will NOT filter ANY URLs, every URL the crawler finds  will be followed (except the ones &quot;excluded&quot; by other options of course).</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddURLFollowRule">addURLFollowRule</a><span class="src-sym">(</span><span class="src-str">&quot;#(htm|html)$#&nbsp;i&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1616 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodaddURLFollowRule">addURLFollowRule</a><span class="src-sym">(</span><span class="src-str">&quot;#(php|php3|php4|php5)$#&nbsp;i&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1617 </ol></div>  These rules let the crawler ONLY follow URLs/links that end with &quot;html&quot;, &quot;htm&quot;, &quot;php&quot;, &quot;php3&quot; etc.</p></p>
 1618     <ul class="tags">
 1619                 <li><span class="field">return:</span> TRUE if the regex is valid and the rule was added to the list, otherwise FALSE.</li>
 1620                 <li><span class="field">section:</span> 2 Filter-settings</li>
 1621                 <li><span class="field">access:</span> public</li>
 1622             </ul>
 1623     
 1624     <div class="method-signature">
 1625         <span class="method-result">bool</span>
 1626         <span class="method-name">
 1627             addURLFollowRule
 1628         </span>
 1629                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$regex</span>)
 1630             </div>
 1631     
 1632             <ul class="parameters">
 1633                     <li>
 1634                 <span class="var-type">string</span>
 1635                 <span class="var-name">$regex</span><span class="var-description">: Regular-expression defining the rule</span>         </li>
 1636                 </ul>
 1637         
 1638             
 1639     </div>
 1640 <a name="methodcheckForAbort" id="checkForAbort"><!-- --></a>
 1641 <div class="evenrow">
 1642     
 1643     <div class="method-header">
 1644         <span class="method-title">checkForAbort</span> (line <span class="line-number">730</span>)
 1645     </div> 
 1646     
 1647     <!-- ========== Info from phpDoc block ========= -->
 1648 <p class="short-description">Checks if the crawling-process should be aborted.</p>
 1649     <ul class="tags">
 1650                 <li><span class="field">return:</span> NULL if the process shouldn't be aborted yet, otherwise one of the PHPCrawlerAbortReasons::ABORTREASON-constants.</li>
 1651                 <li><span class="field">access:</span> protected</li>
 1652             </ul>
 1653     
 1654     <div class="method-signature">
 1655         <span class="method-result">int</span>
 1656         <span class="method-name">
 1657             checkForAbort
 1658         </span>
 1659                 ()
 1660             </div>
 1661     
 1662         
 1663             
 1664     </div>
 1665 <a name="methodcleanup" id="cleanup"><!-- --></a>
 1666 <div class="oddrow">
 1667     
 1668     <div class="method-header">
 1669         <span class="method-title">cleanup</span> (line <span class="line-number">795</span>)
 1670     </div> 
 1671     
 1672     <!-- ========== Info from phpDoc block ========= -->
 1673 <p class="short-description">Cleans up the crawler after it has finished.</p>
 1674     <ul class="tags">
 1675                 <li><span class="field">access:</span> protected</li>
 1676             </ul>
 1677     
 1678     <div class="method-signature">
 1679         <span class="method-result">void</span>
 1680         <span class="method-name">
 1681             cleanup
 1682         </span>
 1683                 ()
 1684             </div>
 1685     
 1686         
 1687             
 1688     </div>
 1689 <a name="methodcreateWorkingDirectory" id="createWorkingDirectory"><!-- --></a>
 1690 <div class="evenrow">
 1691     
 1692     <div class="method-header">
 1693         <span class="method-title">createWorkingDirectory</span> (line <span class="line-number">775</span>)
 1694     </div> 
 1695     
 1696     <!-- ========== Info from phpDoc block ========= -->
 1697 <p class="short-description">Creates the working-directory for this instance of the cralwer.</p>
 1698     <ul class="tags">
 1699                 <li><span class="field">access:</span> protected</li>
 1700             </ul>
 1701     
 1702     <div class="method-signature">
 1703         <span class="method-result">void</span>
 1704         <span class="method-name">
 1705             createWorkingDirectory
 1706         </span>
 1707                 ()
 1708             </div>
 1709     
 1710         
 1711             
 1712     </div>
 1713 <a name="methoddisableExtendedLinkInfo" id="disableExtendedLinkInfo"><!-- --></a>
 1714 <div class="oddrow">
 1715     
 1716     <div class="method-header">
 1717         <span class="method-title">disableExtendedLinkInfo</span> (line <span class="line-number">1574</span>)
 1718     </div> 
 1719     
 1720     <!-- ========== Info from phpDoc block ========= -->
 1721 <p class="short-description">Has no function anymore.</p>
 1722 <p class="description"><p>Thes method has no function anymore, just still exists because of compatibility-reasons.</p></p>
 1723     <ul class="tags">
 1724                 <li><span class="field">deprecated:</span> </li>
 1725                 <li><span class="field">section:</span> 11 Deprecated</li>
 1726                 <li><span class="field">access:</span> public</li>
 1727             </ul>
 1728     
 1729     <div class="method-signature">
 1730         <span class="method-result">void</span>
 1731         <span class="method-name">
 1732             disableExtendedLinkInfo
 1733         </span>
 1734                     (<span class="var-type"></span>&nbsp;<span class="var-name">$mode</span>)
 1735             </div>
 1736     
 1737             <ul class="parameters">
 1738                     <li>
 1739                 <span class="var-type"></span>
 1740                 <span class="var-name">$mode</span>         </li>
 1741                 </ul>
 1742         
 1743             
 1744     </div>
 1745 <a name="methodenableAggressiveLinkSearch" id="enableAggressiveLinkSearch"><!-- --></a>
 1746 <div class="evenrow">
 1747     
 1748     <div class="method-header">
 1749         <span class="method-title">enableAggressiveLinkSearch</span> (line <span class="line-number">1477</span>)
 1750     </div> 
 1751     
 1752     <!-- ========== Info from phpDoc block ========= -->
 1753 <p class="short-description">Enables or disables agressive link-searching.</p>
 1754 <p class="description"><p>If this is set to FALSE, the crawler tries to find links only inside html-tags (&lt; and &gt;).  If this is set to TRUE, the crawler tries to find links everywhere in an html-page, even outside of html-tags.  The default value is TRUE.</p><p>Please note that if agressive-link-searching is enabled, it happens that the crawler will find links that are not meant as links and it also happens that it  finds links in script-parts of pages that can't be rebuild correctly - since there is no javascript-parser/interpreter implemented.  (E.g. javascript-code like document.location.href= a_var + &quot;.html&quot;).</p><p>Disabling agressive-link-searchingn results in a better crawling-performance.</p></p>
 1755     <ul class="tags">
 1756                 <li><span class="field">section:</span> 6 Linkfinding settings</li>
 1757                 <li><span class="field">access:</span> public</li>
 1758             </ul>
 1759     
 1760     <div class="method-signature">
 1761         <span class="method-result">bool</span>
 1762         <span class="method-name">
 1763             enableAggressiveLinkSearch
 1764         </span>
 1765                     (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
 1766             </div>
 1767     
 1768             <ul class="parameters">
 1769                     <li>
 1770                 <span class="var-type">bool</span>
 1771                 <span class="var-name">$mode</span>         </li>
 1772                 </ul>
 1773         
 1774             
 1775     </div>
 1776 <a name="methodenableCookieHandling" id="enableCookieHandling"><!-- --></a>
 1777 <div class="oddrow">
 1778     
 1779     <div class="method-header">
 1780         <span class="method-title">enableCookieHandling</span> (line <span class="line-number">1441</span>)
 1781     </div> 
 1782     
 1783     <!-- ========== Info from phpDoc block ========= -->
 1784 <p class="short-description">Enables or disables cookie-handling.</p>
 1785 <p class="description"><p>If cookie-handling is set to TRUE, the crawler will handle all cookies sent by webservers just like a common browser does.  The default-value is TRUE.</p><p>It's strongly recommended to set or leave the cookie-handling enabled!</p></p>
 1786     <ul class="tags">
 1787                 <li><span class="field">section:</span> 10 Other settings</li>
 1788                 <li><span class="field">access:</span> public</li>
 1789             </ul>
 1790     
 1791     <div class="method-signature">
 1792         <span class="method-result">bool</span>
 1793         <span class="method-name">
 1794             enableCookieHandling
 1795         </span>
 1796                     (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
 1797             </div>
 1798     
 1799             <ul class="parameters">
 1800                     <li>
 1801                 <span class="var-type">bool</span>
 1802                 <span class="var-name">$mode</span>         </li>
 1803                 </ul>
 1804         
 1805             
 1806     </div>
 1807 <a name="methodenableResumption" id="enableResumption"><!-- --></a>
 1808 <div class="evenrow">
 1809     
 1810     <div class="method-header">
 1811         <span class="method-title">enableResumption</span> (line <span class="line-number">1875</span>)
 1812     </div> 
 1813     
 1814     <!-- ========== Info from phpDoc block ========= -->
 1815 <p class="short-description">Prepares the crawler for process-resumption.</p>
 1816 <p class="description"><p>In order to be able to resume an aborted/terminated crawling-process, it is necessary to  initially call the enableResumption() method in your script/project.</p><p>For further details on how to resume aborted processes please see the documentation of the  <a href="../phpcrawl/PHPCrawler.html#methodresume">resume()</a> method.</p></p>
 1817     <ul class="tags">
 1818                 <li><span class="field">section:</span> 9 Process resumption</li>
 1819                 <li><span class="field">access:</span> public</li>
 1820             </ul>
 1821     
 1822     <div class="method-signature">
 1823         <span class="method-result">void</span>
 1824         <span class="method-name">
 1825             enableResumption
 1826         </span>
 1827                 ()
 1828             </div>
 1829     
 1830         
 1831             
 1832     </div>
 1833 <a name="methodgetCrawlerId" id="getCrawlerId"><!-- --></a>
 1834 <div class="oddrow">
 1835     
 1836     <div class="method-header">
 1837         <span class="method-title">getCrawlerId</span> (line <span class="line-number">1792</span>)
 1838     </div> 
 1839     
 1840     <!-- ========== Info from phpDoc block ========= -->
 1841 <p class="short-description">Returns the unique ID of the instance of the crawler</p>
 1842     <ul class="tags">
 1843                 <li><span class="field">section:</span> 9 Process resumption</li>
 1844                 <li><span class="field">access:</span> public</li>
 1845             </ul>
 1846     
 1847     <div class="method-signature">
 1848         <span class="method-result">int</span>
 1849         <span class="method-name">
 1850             getCrawlerId
 1851         </span>
 1852                 ()
 1853             </div>
 1854     
 1855         
 1856             
 1857     </div>
 1858 <a name="methodgetProcessReport" id="getProcessReport"><!-- --></a>
 1859 <div class="evenrow">
 1860     
 1861     <div class="method-header">
 1862         <span class="method-title">getProcessReport</span> (line <span class="line-number">814</span>)
 1863     </div> 
 1864     
 1865     <!-- ========== Info from phpDoc block ========= -->
 1866 <p class="short-description">Retruns summarizing report-information about the crawling-process after it has finished.</p>
 1867     <ul class="tags">
 1868                 <li><span class="field">return:</span> PHPCrawlerProcessReport-object containing process-summary-information</li>
 1869                 <li><span class="field">section:</span> 1 Basic settings</li>
 1870                 <li><span class="field">access:</span> public</li>
 1871             </ul>
 1872     
 1873     <div class="method-signature">
 1874         <span class="method-result"><a href="../phpcrawl/PHPCrawlerProcessReport.html">PHPCrawlerProcessReport</a></span>
 1875         <span class="method-name">
 1876             getProcessReport
 1877         </span>
 1878                 ()
 1879             </div>
 1880     
 1881         
 1882             
 1883     </div>
 1884 <a name="methodgetReport" id="getReport"><!-- --></a>
 1885 <div class="oddrow">
 1886     
 1887     <div class="method-header">
 1888         <span class="method-title">getReport</span> (line <span class="line-number">857</span>)
 1889     </div> 
 1890     
 1891     <!-- ========== Info from phpDoc block ========= -->
 1892 <p class="short-description">Retruns an array with summarizing report-information after the crawling-process has finished</p>
 1893 <p class="description"><p>For detailed information on the conatining array-keys see PHPCrawlerProcessReport-class.</p></p>
 1894     <ul class="tags">
 1895                 <li><span class="field">deprecated:</span> Please use getProcessReport() instead.</li>
 1896                 <li><span class="field">section:</span> 11 Deprecated</li>
 1897                 <li><span class="field">access:</span> public</li>
 1898             </ul>
 1899     
 1900     <div class="method-signature">
 1901         <span class="method-result">void</span>
 1902         <span class="method-name">
 1903             getReport
 1904         </span>
 1905                 ()
 1906             </div>
 1907     
 1908         
 1909             
 1910     </div>
 1911 <a name="methodgo" id="go"><!-- --></a>
 1912 <div class="evenrow">
 1913     
 1914     <div class="method-header">
 1915         <span class="method-title">go</span> (line <span class="line-number">324</span>)
 1916     </div> 
 1917     
 1918     <!-- ========== Info from phpDoc block ========= -->
 1919 <p class="short-description">Starts the crawling process in single-process-mode.</p>
 1920 <p class="description"><p>Be sure you did override the <a href="../phpcrawl/PHPCrawler.html#methodhandleDocumentInfo">handleDocumentInfo()</a>- or <a href="../phpcrawl/PHPCrawler.html#methodhandlePageData">handlePageData()</a>-method before calling the go()-method  to process the documents the crawler finds.</p></p>
 1921     <ul class="tags">
 1922                 <li><span class="field">section:</span> 1 Basic settings</li>
 1923                 <li><span class="field">access:</span> public</li>
 1924             </ul>
 1925     
 1926     <div class="method-signature">
 1927         <span class="method-result">void</span>
 1928         <span class="method-name">
 1929             go
 1930         </span>
 1931                 ()
 1932             </div>
 1933     
 1934         
 1935             
 1936     </div>
 1937 <a name="methodgoMultiProcessed" id="goMultiProcessed"><!-- --></a>
 1938 <div class="oddrow">
 1939     
 1940     <div class="method-header">
 1941         <span class="method-title">goMultiProcessed</span> (line <span class="line-number">387</span>)
 1942     </div> 
 1943     
 1944     <!-- ========== Info from phpDoc block ========= -->
 1945 <p class="short-description">Starts the cralwer by using multi processes.</p>
 1946 <p class="description"><p>When using this method instead of the <a href="../phpcrawl/PHPCrawler.html#methodgo">go()</a>-method to start the crawler, phpcrawl will use the given  number of processes simultaneously for spidering the target-url.  Using multi processes will speed up the crawling-progress dramatically in most cases.</p><p>There are some requirements though to successfully run the cralwler in multi-process mode:  <ul><li>The multi-process mode only works on unix-based systems (linux)</li><li>Scripts using the crawler have to be run from the commandline (cli)</li><li>The &lt;a href=&quot;http://php.net/manual/en/pcntl.installation.php&quot;&gt;PCNTL-extension&lt;/a&gt; for php (process control) has to be installed and activated.</li><li>The &lt;a href=&quot;http://php.net/manual/en/sem.installation.php&quot;&gt;SEMAPHORE-extension&lt;/a&gt; for php has to be installed and activated.</li><li>The &lt;a href=&quot;http://de.php.net/manual/en/posix.installation.php&quot;&gt;POSIX-extension&lt;/a&gt; for php has to be installed and activated.</li><li>The &lt;a href=&quot;http://de2.php.net/manual/en/pdo.installation.php&quot;&gt;PDO-extension&lt;/a&gt; together with the SQLite-driver (PDO_SQLITE) has to be installed and activated.</li></ul></p><p>PHPCrawls supports two different modes of multiprocessing:  <ol><li><strong><a href="../phpcrawl-enums/PHPCrawlerMultiProcessModes.html">PHPCrawlerMultiProcessModes</a>::MPMODE_PARENT_EXECUTES_USERCODE</strong>
 1947 
 1948  The cralwer uses multi processes simultaneously for spidering the target URL, but the usercode provided to
 1949  the overridable function <a href="../phpcrawl/PHPCrawler.html#methodhandleDocumentInfo">handleDocumentInfo()</a> gets always executed on the same main-process. This
 1950  means that the <strong>usercode never gets executed simultaneously</strong> and so you dont't have to care about
 1951  concurrent file/database/handle-accesses or smimilar things.
 1952  But on the other side the usercode may slow down the crawling-procedure because every child-process has to
 1953  wait until the usercode got executed on the main-process. <strong>This ist the recommended multiprocess-mode!</strong></li><li><strong><a href="../phpcrawl-enums/PHPCrawlerMultiProcessModes.html">PHPCrawlerMultiProcessModes</a>::MPMODE_CHILDS_EXECUTES_USERCODE</strong>
 1954 
 1955  The cralwer uses multi processes simultaneously for spidering the target URL, and every chld-process executes
 1956  the usercode provided to the overridable function <a href="../phpcrawl/PHPCrawler.html#methodhandleDocumentInfo">handleDocumentInfo()</a> directly from it's process. This
 1957  means that the <strong>usercode gets executed simultaneously</strong> by the different child-processes and you should
 1958  take care of concurrent file/data/handle-accesses proberbly (if used).
 1959 
 1960  When using this mode and you use any handles like database-connections or filestreams in your extended
 1961  crawler-class, you should open them within the overridden mehtod <a href="../phpcrawl/PHPCrawler.html#methodinitChildProcess">initChildProcess()</a> instead of opening
 1962  them from the constructor. For more details see the documentation of the <a href="../phpcrawl/PHPCrawler.html#methodinitChildProcess">initChildProcess()</a>-method.</li></ol></p><p>Example for starting the crawler with 5 processes using the recommended MPMODE_PARENT_EXECUTES_USERCODE-mode:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodgoMultiProcessed">goMultiProcessed</a><span class="src-sym">(</span><span class="src-num">5</span><span class="src-sym">,&nbsp;</span><span class="src-id"><a href="../phpcrawl-enums/PHPCrawlerMultiProcessModes.html">PHPCrawlerMultiProcessModes</a></span><span class="src-sym">::</span><span class="src-id">MPMODE_PARENT_EXECUTES_USERCODE</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 1963 </ol></div></p><p>Please note that increasing the number of processes to high values does't automatically mean that the crawling-process  will go off faster! Using 3 to 5 processes should be good values to start from.</p></p>
 1964     <ul class="tags">
 1965                 <li><span class="field">section:</span> 1 Basic settings</li>
 1966                 <li><span class="field">access:</span> public</li>
 1967             </ul>
 1968     
 1969     <div class="method-signature">
 1970         <span class="method-result">void</span>
 1971         <span class="method-name">
 1972             goMultiProcessed
 1973         </span>
 1974                     ([<span class="var-type">int</span>&nbsp;<span class="var-name">$process_count</span> = <span class="var-default">3</span>], [<span class="var-type">int</span>&nbsp;<span class="var-name">$multiprocess_mode</span> = <span class="var-default">1</span>])
 1975             </div>
 1976     
 1977             <ul class="parameters">
 1978                     <li>
 1979                 <span class="var-type">int</span>
 1980                 <span class="var-name">$process_count</span><span class="var-description">: Number of processes to use</span>           </li>
 1981                     <li>
 1982                 <span class="var-type">int</span>
 1983                 <span class="var-name">$multiprocess_mode</span><span class="var-description">: The multiprocess-mode to use.                                One of the <a href="../phpcrawl-enums/PHPCrawlerMultiProcessModes.html">PHPCrawlerMultiProcessModes</a>-constants</span>           </li>
 1984                 </ul>
 1985         
 1986             
 1987     </div>
 1988 <a name="methodhandleDocumentInfo" id="handleDocumentInfo"><!-- --></a>
 1989 <div class="evenrow">
 1990     
 1991     <div class="method-header">
 1992         <span class="method-title">handleDocumentInfo</span> (line <span class="line-number">990</span>)
 1993     </div> 
 1994     
 1995     <!-- ========== Info from phpDoc block ========= -->
 1996 <p class="short-description">Override this method to get access to all information about a page or file the crawler found and received.</p>
 1997 <p class="description"><p>Everytime the crawler found and received a document on it's way this method will be called.  The crawler passes all information about the currently received page or file to this method  by a PHPCrawlerDocumentInfo-object.</p><p>Please see the <a href="../phpcrawl/PHPCrawlerDocumentInfo.html">PHPCrawlerDocumentInfo</a> documentation for a list of all properties describing the  html-document.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-key">class&nbsp;</span><span class="src-id">MyCrawler&nbsp;</span><span class="src-key">extends&nbsp;</span><a href="../phpcrawl/PHPCrawler.html">PHPCrawler</a></div></li>
 1998 <li><div class="src-line">&nbsp;<span class="src-sym">{</span></div></li>
 1999 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-key">function&nbsp;</span><span class="src-id">handleDocumentInfo</span><span class="src-sym">(</span><span class="src-var">$PageInfo</span><span class="src-sym">)</span></div></li>
 2000 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">{</span></div></li>
 2001 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-comm">//&nbsp;Print&nbsp;the&nbsp;URL&nbsp;of&nbsp;the&nbsp;document</span></div></li>
 2002 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;echo&nbsp;<span class="src-str">&quot;URL:&nbsp;&quot;</span>.<span class="src-var">$PageInfo</span><span class="src-sym">-&gt;</span><span class="src-id">url</span>.<span class="src-str">&quot;&lt;br&nbsp;/&gt;&quot;</span><span class="src-sym">;</span></div></li>
 2003 <li><div class="src-line">&nbsp;</div></li>
 2004 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-comm">//&nbsp;Print&nbsp;the&nbsp;http-status-code</span></div></li>
 2005 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;echo&nbsp;<span class="src-str">&quot;HTTP-statuscode:&nbsp;&quot;</span>.<span class="src-var">$PageInfo</span><span class="src-sym">-&gt;</span><span class="src-id">http_status_code</span>.<span class="src-str">&quot;&lt;br&nbsp;/&gt;&quot;</span><span class="src-sym">;</span></div></li>
 2006 <li><div class="src-line">&nbsp;</div></li>
 2007 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-comm">//&nbsp;Print&nbsp;the&nbsp;number&nbsp;of&nbsp;found&nbsp;links&nbsp;in&nbsp;this&nbsp;document</span></div></li>
 2008 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;echo&nbsp;<span class="src-str">&quot;Links&nbsp;found:&nbsp;&quot;</span>.<a href="http://www.php.net/count">count</a><span class="src-sym">(</span><span class="src-var">$PageInfo</span><span class="src-sym">-&gt;</span><span class="src-id">links_found_url_descriptors</span><span class="src-sym">)</span>.<span class="src-str">&quot;&lt;br&nbsp;/&gt;&quot;</span><span class="src-sym">;</span></div></li>
 2009 <li><div class="src-line">&nbsp;</div></li>
 2010 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-comm">//&nbsp;..</span></div></li>
 2011 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">}</span></div></li>
 2012 <li><div class="src-line">&nbsp;<span class="src-sym">}</span></div></li>
 2013 </ol></div></p></p>
 2014     <ul class="tags">
 2015                 <li><span class="field">return:</span> The crawling-process will stop immedeatly if you let this method return any negative value.</li>
 2016                 <li><span class="field">section:</span> 3 Overridable methods / User data-processing</li>
 2017                 <li><span class="field">access:</span> public</li>
 2018             </ul>
 2019     
 2020     <div class="method-signature">
 2021         <span class="method-result">int</span>
 2022         <span class="method-name">
 2023             handleDocumentInfo
 2024         </span>
 2025                     (<span class="var-type"><a href="../phpcrawl/PHPCrawlerDocumentInfo.html">PHPCrawlerDocumentInfo</a></span>&nbsp;<span class="var-name">$PageInfo</span>)
 2026             </div>
 2027     
 2028             <ul class="parameters">
 2029                     <li>
 2030                 <span class="var-type"><a href="../phpcrawl/PHPCrawlerDocumentInfo.html">PHPCrawlerDocumentInfo</a></span>
 2031                 <span class="var-name">$PageInfo</span><span class="var-description">: A PHPCrawlerDocumentInfo-object containing all information about the currently received document.                                          Please see the reference of the <a href="../phpcrawl/PHPCrawlerDocumentInfo.html">PHPCrawlerDocumentInfo</a>-class for detailed information.</span>           </li>
 2032                 </ul>
 2033         
 2034             
 2035             <hr class="separator" />
 2036         <div class="notes">Redefined in descendants as:</div>
 2037         <ul class="redefinitions">
 2038                     <li>
 2039                 <a href="../phpcrawl/SMCCrawler.html#methodhandleDocumentInfo">SMCCrawler::handleDocumentInfo()</a>
 2040                                 : get access to all information about a page or file the crawler found and received.
 2041                             </li>
 2042                 </ul>
 2043     </div>
 2044 <a name="methodhandleHeaderInfo" id="handleHeaderInfo"><!-- --></a>
 2045 <div class="oddrow">
 2046     
 2047     <div class="method-header">
 2048         <span class="method-title">handleHeaderInfo</span> (line <span class="line-number">893</span>)
 2049     </div> 
 2050     
 2051     <!-- ========== Info from phpDoc block ========= -->
 2052 <p class="short-description">Overridable method that will be called after the header of a document was received and BEFORE the content  will be received.</p>
 2053 <p class="description"><p>Everytime a header of a document was received, the crawler will call this method.  If this method returns any negative integer, the crawler will NOT reveice the content of the particular page or file.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-key">class&nbsp;</span><span class="src-id">MyCrawler&nbsp;</span><span class="src-key">extends&nbsp;</span><a href="../phpcrawl/PHPCrawler.html">PHPCrawler</a></div></li>
 2054 <li><div class="src-line">&nbsp;<span class="src-sym">{</span></div></li>
 2055 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-key">function&nbsp;</span><span class="src-id">handleHeaderInfo</span><span class="src-sym">(</span><span class="src-id">PHPCrawlerResponseHeader&nbsp;</span><span class="src-var">$header</span><span class="src-sym">)</span></div></li>
 2056 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">{</span></div></li>
 2057 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-comm">//&nbsp;If&nbsp;the&nbsp;content-type&nbsp;of&nbsp;the&nbsp;document&nbsp;isn't&nbsp;&quot;text/html&quot;&nbsp;-&gt;&nbsp;don't&nbsp;receive&nbsp;it.</span></div></li>
 2058 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-key">if&nbsp;</span><span class="src-sym">(</span><span class="src-var">$header</span><span class="src-sym">-&gt;</span><span class="src-id">content_type&nbsp;</span>!=&nbsp;<span class="src-str">&quot;text/html&quot;</span><span class="src-sym">)</span></div></li>
 2059 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-sym">{</span></div></li>
 2060 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-key">return&nbsp;</span>-<span class="src-num">1</span><span class="src-sym">;</span></div></li>
 2061 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-sym">}</span></div></li>
 2062 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">}</span></div></li>
 2063 <li><div class="src-line">&nbsp;</div></li>
 2064 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-key">function&nbsp;</span><span class="src-id">handleDocumentInfo</span><span class="src-sym">(</span><span class="src-var">$PageInfo</span><span class="src-sym">)</span></div></li>
 2065 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">{</span></div></li>
 2066 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-comm">//&nbsp;...</span></div></li>
 2067 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">}</span></div></li>
 2068 <li><div class="src-line">&nbsp;<span class="src-sym">}</span></div></li>
 2069 </ol></div></p></p>
 2070     <ul class="tags">
 2071                 <li><span class="field">return:</span> The document won't be received if you let this method return any negative value.</li>
 2072                 <li><span class="field">section:</span> 3 Overridable methods / User data-processing</li>
 2073                 <li><span class="field">access:</span> public</li>
 2074             </ul>
 2075     
 2076     <div class="method-signature">
 2077         <span class="method-result">int</span>
 2078         <span class="method-name">
 2079             handleHeaderInfo
 2080         </span>
 2081                     (<span class="var-type"><a href="../phpcrawl/PHPCrawlerResponseHeader.html">PHPCrawlerResponseHeader</a></span>&nbsp;<span class="var-name">$header</span>)
 2082             </div>
 2083     
 2084             <ul class="parameters">
 2085                     <li>
 2086                 <span class="var-type"><a href="../phpcrawl/PHPCrawlerResponseHeader.html">PHPCrawlerResponseHeader</a></span>
 2087                 <span class="var-name">$header</span><span class="var-description">: The header as PHPCrawlerResponseHeader-object</span>           </li>
 2088                 </ul>
 2089         
 2090             
 2091     </div>
 2092 <a name="methodhandlePageData" id="handlePageData"><!-- --></a>
 2093 <div class="evenrow">
 2094     
 2095     <div class="method-header">
 2096         <span class="method-title">handlePageData</span> (line <span class="line-number">952</span>)
 2097     </div> 
 2098     
 2099     <!-- ========== Info from phpDoc block ========= -->
 2100 <p class="short-description">Override this method to get access to all information about a page or file the crawler found and received.</p>
 2101 <p class="description"><p>Everytime the crawler found and received a document on it's way this method will be called.  The crawler passes all information about the currently received page or file to this method  by the array $page_data.</p></p>
 2102     <ul class="tags">
 2103                 <li><span class="field">return:</span> The crawling-process will stop immedeatly if you let this method return any negative value.</li>
 2104                 <li><span class="field">deprecated:</span> Please use and override the <a href="../phpcrawl/PHPCrawler.html#methodhandleDocumentInfo">handleDocumentInfo</a>-method to access document-information instead.</li>
 2105                 <li><span class="field">section:</span> 3 Overridable methods / User data-processing</li>
 2106                 <li><span class="field">access:</span> public</li>
 2107             </ul>
 2108     
 2109     <div class="method-signature">
 2110         <span class="method-result">int</span>
 2111         <span class="method-name">
 2112             handlePageData
 2113         </span>
 2114                     (<span class="var-type">array</span>&nbsp;<span class="var-name">&$page_data</span>)
 2115             </div>
 2116     
 2117             <ul class="parameters">
 2118                     <li>
 2119                 <span class="var-type">array</span>
 2120                 <span class="var-name">&$page_data</span><span class="var-description">: Array containing all information about the currently received document.                           For detailed information on the conatining keys see <a href="../phpcrawl/PHPCrawlerDocumentInfo.html">PHPCrawlerDocumentInfo</a>-class.</span>           </li>
 2121                 </ul>
 2122         
 2123             
 2124     </div>
 2125 <a name="methodinitChildProcess" id="initChildProcess"><!-- --></a>
 2126 <div class="oddrow">
 2127     
 2128     <div class="method-header">
 2129         <span class="method-title">initChildProcess</span> (line <span class="line-number">935</span>)
 2130     </div> 
 2131     
 2132     <!-- ========== Info from phpDoc block ========= -->
 2133 <p class="short-description">Overridable method that will be called by every used child-process just before it starts the crawling-procedure.</p>
 2134 <p class="description"><p>Every child-process of the crawler will call this method just before it starts it's crawling-loop from within it's  process-context.</p><p>So when using the multi-process mode &quot;<a href="../phpcrawl-enums/PHPCrawlerMultiProcessModes.html#constMPMODE_CHILDS_EXECUTES_USERCODE">PHPCrawlerMultiProcessModes::MPMODE_CHILDS_EXECUTES_USERCODE</a>&quot;, this method  should be overidden and used to open any needed database-connections, file streams or other similar handles to ensure  that they will get opened and accessible for every used child-process.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-key">class&nbsp;</span><span class="src-id">MyCrawler&nbsp;</span><span class="src-key">extends&nbsp;</span><a href="../phpcrawl/PHPCrawler.html">PHPCrawler</a></div></li>
 2135 <li><div class="src-line">&nbsp;<span class="src-sym">{</span></div></li>
 2136 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-key">protected&nbsp;</span><span class="src-var">$mysql_link</span><span class="src-sym">;</span></div></li>
 2137 <li><div class="src-line">&nbsp;</div></li>
 2138 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-key">function&nbsp;</span><span class="src-id">initChildProcess</span><span class="src-sym">(</span><span class="src-sym">)</span></div></li>
 2139 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">{</span></div></li>
 2140 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-comm">//&nbsp;Open&nbsp;a&nbsp;database-connection&nbsp;for&nbsp;every&nbsp;used&nbsp;process</span></div></li>
 2141 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<span class="src-var">$this</span><span class="src-sym">-&gt;</span><span class="src-var">mysql_link&nbsp;</span>=&nbsp;<a href="http://www.php.net/mysql_connect">mysql_connect</a><span class="src-sym">(</span><span class="src-str">&quot;myhost&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;myusername&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;mypassword&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2142 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href="http://www.php.net/mysql_select_db">mysql_select_db</a>&nbsp;<span class="src-sym">(</span><span class="src-str">&quot;mydatabasename&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-var">$this</span><span class="src-sym">-&gt;</span><span class="src-var">mysql_link</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2143 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">}</span></div></li>
 2144 <li><div class="src-line">&nbsp;</div></li>
 2145 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-key">function&nbsp;</span><span class="src-id">handleDocumentInfo</span><span class="src-sym">(</span><span class="src-var">$PageInfo</span><span class="src-sym">)</span></div></li>
 2146 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">{</span></div></li>
 2147 <li><div class="src-line">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href="http://www.php.net/mysql_query">mysql_query</a><span class="src-sym">(</span><span class="src-str">&quot;INSERT&nbsp;INTO&nbsp;urls&nbsp;SET&nbsp;url&nbsp;=&nbsp;'&quot;</span>.<span class="src-var">$PageInfo</span><span class="src-sym">-&gt;</span><span class="src-id">url</span>.<span class="src-str">&quot;';&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-var">$this</span><span class="src-sym">-&gt;</span><span class="src-var">mysql_link</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2148 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-sym">}</span></div></li>
 2149 <li><div class="src-line">&nbsp;<span class="src-sym">}</span></div></li>
 2150 <li><div class="src-line">&nbsp;</div></li>
 2151 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;Start&nbsp;crawler&nbsp;with&nbsp;5&nbsp;processes</span></div></li>
 2152 <li><div class="src-line">&nbsp;<span class="src-var">$crawler&nbsp;</span>=&nbsp;<span class="src-key">new&nbsp;</span><span class="src-id">MyCrawler</span><span class="src-sym">(</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2153 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><span class="src-id">setURL</span><span class="src-sym">(</span><span class="src-str">&quot;http://www.any-url.com&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2154 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><span class="src-id">goMultiProcessed</span><span class="src-sym">(</span><span class="src-num">5</span><span class="src-sym">,&nbsp;</span><span class="src-id"><a href="../phpcrawl-enums/PHPCrawlerMultiProcessModes.html">PHPCrawlerMultiProcessModes</a></span><span class="src-sym">::</span><span class="src-id">MPMODE_CHILDS_EXECUTES_USERCODE</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2155 </ol></div></p></p>
 2156     <ul class="tags">
 2157                 <li><span class="field">section:</span> 3 Overridable methods / User data-processing</li>
 2158                 <li><span class="field">access:</span> public</li>
 2159             </ul>
 2160     
 2161     <div class="method-signature">
 2162         <span class="method-result">void</span>
 2163         <span class="method-name">
 2164             initChildProcess
 2165         </span>
 2166                 ()
 2167             </div>
 2168     
 2169         
 2170             
 2171     </div>
 2172 <a name="methodinitCrawlerProcess" id="initCrawlerProcess"><!-- --></a>
 2173 <div class="evenrow">
 2174     
 2175     <div class="method-header">
 2176         <span class="method-title">initCrawlerProcess</span> (line <span class="line-number">273</span>)
 2177     </div> 
 2178     
 2179     <!-- ========== Info from phpDoc block ========= -->
 2180 <p class="short-description">Initiates a crawler-process</p>
 2181     <ul class="tags">
 2182                 <li><span class="field">access:</span> protected</li>
 2183             </ul>
 2184     
 2185     <div class="method-signature">
 2186         <span class="method-result">void</span>
 2187         <span class="method-name">
 2188             initCrawlerProcess
 2189         </span>
 2190                 ()
 2191             </div>
 2192     
 2193         
 2194             
 2195     </div>
 2196 <a name="methodobeyNoFollowTags" id="obeyNoFollowTags"><!-- --></a>
 2197 <div class="oddrow">
 2198     
 2199     <div class="method-header">
 2200         <span class="method-title">obeyNoFollowTags</span> (line <span class="line-number">1758</span>)
 2201     </div> 
 2202     
 2203     <!-- ========== Info from phpDoc block ========= -->
 2204 <p class="short-description">Decides whether the crawler should obey &quot;nofollow&quot;-tags</p>
 2205 <p class="description"><p>If set to TRUE, the crawler will not follow links that a marked with rel=&quot;nofollow&quot;  (like &amp;lt;a href=&quot;page.html&quot; rel=&quot;nofollow&quot;&amp;gt;) nor links from pages containing the meta-tag  &lt;meta name=&quot;robots&quot; content=&quot;nofollow&quot;&gt;.</p><p>By default, the crawler will NOT obey nofollow-tags.</p></p>
 2206     <ul class="tags">
 2207                 <li><span class="field">section:</span> 2 Filter-settings</li>
 2208                 <li><span class="field">access:</span> public</li>
 2209             </ul>
 2210     
 2211     <div class="method-signature">
 2212         <span class="method-result">void</span>
 2213         <span class="method-name">
 2214             obeyNoFollowTags
 2215         </span>
 2216                     (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
 2217             </div>
 2218     
 2219             <ul class="parameters">
 2220                     <li>
 2221                 <span class="var-type">bool</span>
 2222                 <span class="var-name">$mode</span><span class="var-description">: If set to TRUE, the crawler will obey &quot;nofollow&quot;-tags</span>           </li>
 2223                 </ul>
 2224         
 2225             
 2226     </div>
 2227 <a name="methodobeyRobotsTxt" id="obeyRobotsTxt"><!-- --></a>
 2228 <div class="evenrow">
 2229     
 2230     <div class="method-header">
 2231         <span class="method-title">obeyRobotsTxt</span> (line <span class="line-number">1335</span>)
 2232     </div> 
 2233     
 2234     <!-- ========== Info from phpDoc block ========= -->
 2235 <p class="short-description">Decides whether the crawler should parse and obey robots.txt-files.</p>
 2236 <p class="description"><p>If this is set to TRUE, the crawler looks for a robots.txt-file for every host that sites or files should be received  from during the crawling process. If a robots.txt-file for a host was found, the containig directives appliying to the  useragent-identification of the cralwer  (&quot;PHPCrawl&quot; or manually set by calling <a href="../phpcrawl/PHPCrawler.html#methodsetUserAgentString">setUserAgentString()</a>) will be obeyed.</p><p>The default-value is FALSE (for compatibility reasons).</p><p>Pleas note that the directives found in a robots.txt-file have a higher priority than other settings made by the user.  If e.g. <a href="../phpcrawl/PHPCrawler.html#methodaddFollowMatch">addFollowMatch</a>(&quot;#http://foo\.com/path/file\.html#&quot;) was set, but a directive in the robots.txt-file of the host  foo.com says &quot;Disallow: /path/&quot;, the URL http://foo.com/path/file.html will be ignored by the crawler anyway.</p></p>
 2237     <ul class="tags">
 2238                 <li><span class="field">section:</span> 2 Filter-settings</li>
 2239                 <li><span class="field">access:</span> public</li>
 2240             </ul>
 2241     
 2242     <div class="method-signature">
 2243         <span class="method-result">bool</span>
 2244         <span class="method-name">
 2245             obeyRobotsTxt
 2246         </span>
 2247                     (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
 2248             </div>
 2249     
 2250             <ul class="parameters">
 2251                     <li>
 2252                 <span class="var-type">bool</span>
 2253                 <span class="var-name">$mode</span><span class="var-description">: Set to TRUE if you want the crawler to obey robots.txt-files.</span>         </li>
 2254                 </ul>
 2255         
 2256             
 2257     </div>
 2258 <a name="methodprocessRobotsTxt" id="processRobotsTxt"><!-- --></a>
 2259 <div class="oddrow">
 2260     
 2261     <div class="method-header">
 2262         <span class="method-title">processRobotsTxt</span> (line <span class="line-number">717</span>)
 2263     </div> 
 2264     
 2265     <!-- ========== Info from phpDoc block ========= -->
 2266     <ul class="tags">
 2267                 <li><span class="field">access:</span> protected</li>
 2268             </ul>
 2269     
 2270     <div class="method-signature">
 2271         <span class="method-result">void</span>
 2272         <span class="method-name">
 2273             processRobotsTxt
 2274         </span>
 2275                 ()
 2276             </div>
 2277     
 2278         
 2279             
 2280     </div>
 2281 <a name="methodprocessUrl" id="processUrl"><!-- --></a>
 2282 <div class="evenrow">
 2283     
 2284     <div class="method-header">
 2285         <span class="method-title">processUrl</span> (line <span class="line-number">601</span>)
 2286     </div> 
 2287     
 2288     <!-- ========== Info from phpDoc block ========= -->
 2289 <p class="short-description">Receives and processes the given URL</p>
 2290     <ul class="tags">
 2291                 <li><span class="field">return:</span> TURE if the crawling-process should be aborted after processig the URL, otherwise FALSE.</li>
 2292                 <li><span class="field">access:</span> protected</li>
 2293             </ul>
 2294     
 2295     <div class="method-signature">
 2296         <span class="method-result">bool</span>
 2297         <span class="method-name">
 2298             processUrl
 2299         </span>
 2300                     (<span class="var-type"><a href="../phpcrawl/PHPCrawlerURLDescriptor.html">PHPCrawlerURLDescriptor</a></span>&nbsp;<span class="var-name">$UrlDescriptor</span>)
 2301             </div>
 2302     
 2303             <ul class="parameters">
 2304                     <li>
 2305                 <span class="var-type"><a href="../phpcrawl/PHPCrawlerURLDescriptor.html">PHPCrawlerURLDescriptor</a></span>
 2306                 <span class="var-name">$UrlDescriptor</span><span class="var-description">: The URL as PHPCrawlerURLDescriptor-object</span>            </li>
 2307                 </ul>
 2308         
 2309             
 2310     </div>
 2311 <a name="methodresume" id="resume"><!-- --></a>
 2312 <div class="oddrow">
 2313     
 2314     <div class="method-header">
 2315         <span class="method-title">resume</span> (line <span class="line-number">1846</span>)
 2316     </div> 
 2317     
 2318     <!-- ========== Info from phpDoc block ========= -->
 2319 <p class="short-description">Resumes the crawling-process with the given crawler-ID</p>
 2320 <p class="description"><p>If a crawling-process was aborted (for whatever reasons), it is possible  to resume it by calling the resume()-method before calling the go() or goMultiProcessed() method  and passing the crawler-ID of the aborted process to it (as returned by <a href="../phpcrawl/PHPCrawler.html#methodgetCrawlerId">getCrawlerId()</a>).</p><p>In order to be able to resume a process, it is necessary that it was initially  started with resumption enabled (by calling the <a href="../phpcrawl/PHPCrawler.html#methodenableResumption">enableResumption()</a> method).</p><p>This method throws an exception if resuming of a crawling-process failed.</p><p>Example of a resumeable crawler-script:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;...</span></div></li>
 2321 <li><div class="src-line">&nbsp;<span class="src-var">$crawler&nbsp;</span>=&nbsp;<span class="src-key">new&nbsp;</span><span class="src-id">MyCrawler</span><span class="src-sym">(</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2322 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodenableResumption">enableResumption</a><span class="src-sym">(</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2323 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetURL">setURL</a><span class="src-sym">(</span><span class="src-str">&quot;www.url123.com&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2324 <li><div class="src-line">&nbsp;</div></li>
 2325 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;If&nbsp;process&nbsp;was&nbsp;started&nbsp;the&nbsp;first&nbsp;time:</span></div></li>
 2326 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;Get&nbsp;the&nbsp;crawler-ID&nbsp;and&nbsp;store&nbsp;it&nbsp;somewhere&nbsp;in&nbsp;order&nbsp;to&nbsp;be&nbsp;able&nbsp;to&nbsp;resume&nbsp;the&nbsp;process&nbsp;later&nbsp;on</span></div></li>
 2327 <li><div class="src-line">&nbsp;<span class="src-key">if&nbsp;</span><span class="src-sym">(</span><span class="src-sym">!</span><a href="http://www.php.net/file_exists">file_exists</a><span class="src-sym">(</span><span class="src-str">&quot;/tmp/crawlerid_for_url123.tmp&quot;</span><span class="src-sym">))</span></div></li>
 2328 <li><div class="src-line">&nbsp;<span class="src-sym">{</span></div></li>
 2329 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-var">$crawler_id&nbsp;</span>=&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodgetCrawlerId">getCrawlerId</a><span class="src-sym">(</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2330 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<a href="http://www.php.net/file_put_contents">file_put_contents</a><span class="src-sym">(</span><span class="src-str">&quot;/tmp/crawlerid_for_url123.tmp&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-var">$crawler_id</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2331 <li><div class="src-line">&nbsp;<span class="src-sym">}</span></div></li>
 2332 <li><div class="src-line">&nbsp;</div></li>
 2333 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;If&nbsp;process&nbsp;was&nbsp;restarted&nbsp;again&nbsp;(after&nbsp;a&nbsp;termination):</span></div></li>
 2334 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;Read&nbsp;the&nbsp;crawler-id&nbsp;and&nbsp;resume&nbsp;the&nbsp;process</span></div></li>
 2335 <li><div class="src-line">&nbsp;<span class="src-key">else</span></div></li>
 2336 <li><div class="src-line">&nbsp;<span class="src-sym">{</span></div></li>
 2337 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-var">$crawler_id&nbsp;</span>=&nbsp;<a href="http://www.php.net/file_get_contents">file_get_contents</a><span class="src-sym">(</span><span class="src-str">&quot;/tmp/crawlerid_for_url123.tmp&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2338 <li><div class="src-line">&nbsp;&nbsp;&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodresume">resume</a><span class="src-sym">(</span><span class="src-var">$crawler_id</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2339 <li><div class="src-line">&nbsp;<span class="src-sym">}</span></div></li>
 2340 <li><div class="src-line">&nbsp;</div></li>
 2341 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;...</span></div></li>
 2342 <li><div class="src-line">&nbsp;</div></li>
 2343 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;Start&nbsp;your&nbsp;crawling&nbsp;process</span></div></li>
 2344 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodgoMultiProcessed">goMultiProcessed</a><span class="src-sym">(</span><span class="src-num">5</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2345 <li><div class="src-line">&nbsp;</div></li>
 2346 <li><div class="src-line">&nbsp;<span class="src-comm">//&nbsp;After&nbsp;the&nbsp;process&nbsp;is&nbsp;finished&nbsp;completely:&nbsp;Delete&nbsp;the&nbsp;crawler-ID</span></div></li>
 2347 <li><div class="src-line">&nbsp;<a href="http://www.php.net/unlink">unlink</a><span class="src-sym">(</span><span class="src-str">&quot;/tmp/crawlerid_for_url123.tmp&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2348 </ol></div></p></p>
 2349     <ul class="tags">
 2350                 <li><span class="field">section:</span> 9 Process resumption</li>
 2351                 <li><span class="field">access:</span> public</li>
 2352             </ul>
 2353     
 2354     <div class="method-signature">
 2355         <span class="method-result">void</span>
 2356         <span class="method-name">
 2357             resume
 2358         </span>
 2359                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$crawler_id</span>)
 2360             </div>
 2361     
 2362             <ul class="parameters">
 2363                     <li>
 2364                 <span class="var-type">int</span>
 2365                 <span class="var-name">$crawler_id</span><span class="var-description">: The crawler-ID of the crawling-process that should be resumed.                         (see <a href="../phpcrawl/PHPCrawler.html#methodgetCrawlerId">getCrawlerId()</a>)</span>            </li>
 2366                 </ul>
 2367         
 2368             
 2369     </div>
 2370 <a name="methodsetAggressiveLinkExtraction" id="setAggressiveLinkExtraction"><!-- --></a>
 2371 <div class="evenrow">
 2372     
 2373     <div class="method-header">
 2374         <span class="method-title">setAggressiveLinkExtraction</span> (line <span class="line-number">1488</span>)
 2375     </div> 
 2376     
 2377     <!-- ========== Info from phpDoc block ========= -->
 2378 <p class="short-description">Alias for enableAggressiveLinkSearch()</p>
 2379     <ul class="tags">
 2380                 <li><span class="field">deprecated:</span> Please use enableAggressiveLinkSearch()</li>
 2381                 <li><span class="field">section:</span> 11 Deprecated</li>
 2382                 <li><span class="field">access:</span> public</li>
 2383             </ul>
 2384     
 2385     <div class="method-signature">
 2386         <span class="method-result">void</span>
 2387         <span class="method-name">
 2388             setAggressiveLinkExtraction
 2389         </span>
 2390                     (<span class="var-type"></span>&nbsp;<span class="var-name">$mode</span>)
 2391             </div>
 2392     
 2393             <ul class="parameters">
 2394                     <li>
 2395                 <span class="var-type"></span>
 2396                 <span class="var-name">$mode</span>         </li>
 2397                 </ul>
 2398         
 2399             
 2400     </div>
 2401 <a name="methodsetConnectionTimeout" id="setConnectionTimeout"><!-- --></a>
 2402 <div class="oddrow">
 2403     
 2404     <div class="method-header">
 2405         <span class="method-title">setConnectionTimeout</span> (line <span class="line-number">1640</span>)
 2406     </div> 
 2407     
 2408     <!-- ========== Info from phpDoc block ========= -->
 2409 <p class="short-description">Sets the timeout in seconds for connection tries to hosting webservers.</p>
 2410 <p class="description"><p>If the the connection to a host can't be established within the given time, the  request will be aborted.</p></p>
 2411     <ul class="tags">
 2412                 <li><span class="field">section:</span> 10 Other settings</li>
 2413                 <li><span class="field">access:</span> public</li>
 2414             </ul>
 2415     
 2416     <div class="method-signature">
 2417         <span class="method-result">bool</span>
 2418         <span class="method-name">
 2419             setConnectionTimeout
 2420         </span>
 2421                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$timeout</span>)
 2422             </div>
 2423     
 2424             <ul class="parameters">
 2425                     <li>
 2426                 <span class="var-type">int</span>
 2427                 <span class="var-name">$timeout</span><span class="var-description">: The timeout in seconds, the default-value is 5 seconds.</span>            </li>
 2428                 </ul>
 2429         
 2430             
 2431     </div>
 2432 <a name="methodsetContentSizeLimit" id="setContentSizeLimit"><!-- --></a>
 2433 <div class="evenrow">
 2434     
 2435     <div class="method-header">
 2436         <span class="method-title">setContentSizeLimit</span> (line <span class="line-number">1402</span>)
 2437     </div> 
 2438     
 2439     <!-- ========== Info from phpDoc block ========= -->
 2440 <p class="short-description">Sets the content-size-limit for content the crawler should receive from documents.</p>
 2441 <p class="description"><p>If the crawler is receiving the content of a page or file and the contentsize-limit is reached, the crawler stops receiving content  from this page or file.</p><p>Please note that the crawler can only find links in the received portion of a document.</p><p>The default-value is 0 (no limit).</p></p>
 2442     <ul class="tags">
 2443                 <li><span class="field">section:</span> 5 Limit-settings</li>
 2444                 <li><span class="field">access:</span> public</li>
 2445             </ul>
 2446     
 2447     <div class="method-signature">
 2448         <span class="method-result">bool</span>
 2449         <span class="method-name">
 2450             setContentSizeLimit
 2451         </span>
 2452                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$bytes</span>)
 2453             </div>
 2454     
 2455             <ul class="parameters">
 2456                     <li>
 2457                 <span class="var-type">int</span>
 2458                 <span class="var-name">$bytes</span><span class="var-description">: The limit in bytes.</span>          </li>
 2459                 </ul>
 2460         
 2461             
 2462     </div>
 2463 <a name="methodsetCookieHandling" id="setCookieHandling"><!-- --></a>
 2464 <div class="oddrow">
 2465     
 2466     <div class="method-header">
 2467         <span class="method-title">setCookieHandling</span> (line <span class="line-number">1455</span>)
 2468     </div> 
 2469     
 2470     <!-- ========== Info from phpDoc block ========= -->
 2471 <p class="short-description">Alias for enableCookieHandling()</p>
 2472     <ul class="tags">
 2473                 <li><span class="field">deprecated:</span> Please use enableCookieHandling()</li>
 2474                 <li><span class="field">section:</span> 11 Deprecated</li>
 2475                 <li><span class="field">access:</span> public</li>
 2476             </ul>
 2477     
 2478     <div class="method-signature">
 2479         <span class="method-result">void</span>
 2480         <span class="method-name">
 2481             setCookieHandling
 2482         </span>
 2483                     (<span class="var-type"></span>&nbsp;<span class="var-name">$mode</span>)
 2484             </div>
 2485     
 2486             <ul class="parameters">
 2487                     <li>
 2488                 <span class="var-type"></span>
 2489                 <span class="var-name">$mode</span>         </li>
 2490                 </ul>
 2491         
 2492             
 2493     </div>
 2494 <a name="methodsetFollowMode" id="setFollowMode"><!-- --></a>
 2495 <div class="evenrow">
 2496     
 2497     <div class="method-header">
 2498         <span class="method-title">setFollowMode</span> (line <span class="line-number">1148</span>)
 2499     </div> 
 2500     
 2501     <!-- ========== Info from phpDoc block ========= -->
 2502 <p class="short-description">Sets the basic follow-mode of the crawler.</p>
 2503 <p class="description"><p>The following list explains the supported follow-modes:</p><p><strong>0 - The crawler will follow EVERY link, even if the link leads to a different host or domain.</strong>  If you choose this mode, you really should set a limit to the crawling-process (see limit-options),  otherwise the crawler maybe will crawl the whole WWW!</p><p><strong>1 - The crawler only follow links that lead to the same domain like the one in the root-url.</strong>  E.g. if the root-url (setURL()) is &quot;http://www.foo.com&quot;, the crawler will follow links to &quot;http://www.foo.com/...&quot;  and &quot;http://bar.foo.com/...&quot;, but not to &quot;http://www.another-domain.com/...&quot;.</p><p><strong>2 - The crawler will only follow links that lead to the same host like the one in the root-url.</strong>  E.g. if the root-url (setURL()) is &quot;http://www.foo.com&quot;, the crawler will ONLY follow links to &quot;http://www.foo.com/...&quot;, but not  to &quot;http://bar.foo.com/...&quot; and &quot;http://www.another-domain.com/...&quot;. <strong>This is the default mode.</strong></p><p><strong>3 - The crawler only follows links to pages or files located in or under the same path like the one of the root-url.</strong>  E.g. if the root-url is &quot;http://www.foo.com/bar/index.html&quot;, the crawler will follow links to &quot;http://www.foo.com/bar/page.html&quot; and  &quot;http://www.foo.com/bar/path/index.html&quot;, but not links to &quot;http://www.foo.com/page.html&quot;.</p></p>
 2504     <ul class="tags">
 2505                 <li><span class="field">section:</span> 1 Basic settings</li>
 2506                 <li><span class="field">access:</span> public</li>
 2507             </ul>
 2508     
 2509     <div class="method-signature">
 2510         <span class="method-result">bool</span>
 2511         <span class="method-name">
 2512             setFollowMode
 2513         </span>
 2514                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$follow_mode</span>)
 2515             </div>
 2516     
 2517             <ul class="parameters">
 2518                     <li>
 2519                 <span class="var-type">int</span>
 2520                 <span class="var-name">$follow_mode</span><span class="var-description">: The basic follow-mode for the crawling-process (0, 1, 2 or 3).</span>         </li>
 2521                 </ul>
 2522         
 2523             
 2524     </div>
 2525 <a name="methodsetFollowRedirects" id="setFollowRedirects"><!-- --></a>
 2526 <div class="oddrow">
 2527     
 2528     <div class="method-header">
 2529         <span class="method-title">setFollowRedirects</span> (line <span class="line-number">1095</span>)
 2530     </div> 
 2531     
 2532     <!-- ========== Info from phpDoc block ========= -->
 2533 <p class="short-description">Defines whether the crawler should follow redirects sent with headers by a webserver or not.</p>
 2534     <ul class="tags">
 2535                 <li><span class="field">section:</span> 10 Other settings</li>
 2536                 <li><span class="field">access:</span> public</li>
 2537             </ul>
 2538     
 2539     <div class="method-signature">
 2540         <span class="method-result">bool</span>
 2541         <span class="method-name">
 2542             setFollowRedirects
 2543         </span>
 2544                     (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
 2545             </div>
 2546     
 2547             <ul class="parameters">
 2548                     <li>
 2549                 <span class="var-type">bool</span>
 2550                 <span class="var-name">$mode</span><span class="var-description">: If TRUE, the crawler will follow header-redirects.                     The default-value is TRUE.</span>         </li>
 2551                 </ul>
 2552         
 2553             
 2554     </div>
 2555 <a name="methodsetFollowRedirectsTillContent" id="setFollowRedirectsTillContent"><!-- --></a>
 2556 <div class="evenrow">
 2557     
 2558     <div class="method-header">
 2559         <span class="method-title">setFollowRedirectsTillContent</span> (line <span class="line-number">1117</span>)
 2560     </div> 
 2561     
 2562     <!-- ========== Info from phpDoc block ========= -->
 2563 <p class="short-description">Defines whether the crawler should follow HTTP-redirects until first content was found, regardless of defined filter-rules and follow-modes.</p>
 2564 <p class="description"><p>Sometimes, when requesting an URL, the first thing the webserver does is sending a redirect to  another location, and sometimes the server of this new location is sending a redirect again  (and so on).  So at least its possible that you find the expected content on a totally different host  as expected.</p><p>If you set this option to TRUE, the crawler will follow all these redirects until it finds some content.  If content finally was found, the root-url of the crawling-process will be set to this url and all  defined options (folllow-mode, filter-rules etc.) will relate to it from now on.</p></p>
 2565     <ul class="tags">
 2566                 <li><span class="field">section:</span> 10 Other settings</li>
 2567                 <li><span class="field">access:</span> public</li>
 2568             </ul>
 2569     
 2570     <div class="method-signature">
 2571         <span class="method-result">void</span>
 2572         <span class="method-name">
 2573             setFollowRedirectsTillContent
 2574         </span>
 2575                     (<span class="var-type">bool</span>&nbsp;<span class="var-name">$mode</span>)
 2576             </div>
 2577     
 2578             <ul class="parameters">
 2579                     <li>
 2580                 <span class="var-type">bool</span>
 2581                 <span class="var-name">$mode</span><span class="var-description">: If TRUE, the crawler will follow redirects until content was finally found.                    Defaults to TRUE.</span>          </li>
 2582                 </ul>
 2583         
 2584             
 2585     </div>
 2586 <a name="methodsetLinkExtractionTags" id="setLinkExtractionTags"><!-- --></a>
 2587 <div class="oddrow">
 2588     
 2589     <div class="method-header">
 2590         <span class="method-title">setLinkExtractionTags</span> (line <span class="line-number">1508</span>)
 2591     </div> 
 2592     
 2593     <!-- ========== Info from phpDoc block ========= -->
 2594 <p class="short-description">Sets the list of html-tags the crawler should search for links in.</p>
 2595 <p class="description"><p>By default the crawler searches for links in the following html-tags: href, src, url, location, codebase, background, data, profile, action and open.  As soon as the list is set manually, this default list will be overwritten completly.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line"><span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetLinkExtractionTags">setLinkExtractionTags</a><span class="src-sym">(</span><span class="src-key">array</span><span class="src-sym">(</span><span class="src-str">&quot;href&quot;</span><span class="src-sym">,&nbsp;</span><span class="src-str">&quot;src&quot;</span><span class="src-sym">))</span><span class="src-sym">;</span></div></li>
 2596 </ol></div>  This setting lets the crawler search for links (only) in &quot;href&quot; and &quot;src&quot;-tags.</p><p>Note: Reducing the number of tags in this list will improve the crawling-performance (a little).</p></p>
 2597     <ul class="tags">
 2598                 <li><span class="field">section:</span> 6 Linkfinding settings</li>
 2599                 <li><span class="field">access:</span> public</li>
 2600             </ul>
 2601     
 2602     <div class="method-signature">
 2603         <span class="method-result">void</span>
 2604         <span class="method-name">
 2605             setLinkExtractionTags
 2606         </span>
 2607                     (<span class="var-type">array</span>&nbsp;<span class="var-name">$tag_array</span>)
 2608             </div>
 2609     
 2610             <ul class="parameters">
 2611                     <li>
 2612                 <span class="var-type">array</span>
 2613                 <span class="var-name">$tag_array</span><span class="var-description">: Numeric array containing the tags.</span>           </li>
 2614                 </ul>
 2615         
 2616             
 2617     </div>
 2618 <a name="methodsetPageLimit" id="setPageLimit"><!-- --></a>
 2619 <div class="evenrow">
 2620     
 2621     <div class="method-header">
 2622         <span class="method-title">setPageLimit</span> (line <span class="line-number">1379</span>)
 2623     </div> 
 2624     
 2625     <!-- ========== Info from phpDoc block ========= -->
 2626 <p class="short-description">Sets a limit to the number of pages/files the crawler should follow.</p>
 2627 <p class="description"><p>If the limit is reached, the crawler stops the crawling-process. The default-value is 0 (no limit).</p></p>
 2628     <ul class="tags">
 2629                 <li><span class="field">section:</span> 5 Limit-settings</li>
 2630                 <li><span class="field">access:</span> public</li>
 2631             </ul>
 2632     
 2633     <div class="method-signature">
 2634         <span class="method-result">void</span>
 2635         <span class="method-name">
 2636             setPageLimit
 2637         </span>
 2638                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$limit</span>, [<span class="var-type">bool</span>&nbsp;<span class="var-name">$only_count_received_documents</span> = <span class="var-default">false</span>])
 2639             </div>
 2640     
 2641             <ul class="parameters">
 2642                     <li>
 2643                 <span class="var-type">int</span>
 2644                 <span class="var-name">$limit</span><span class="var-description">: The limit, set to 0 for no limit (default value).</span>            </li>
 2645                     <li>
 2646                 <span class="var-type">bool</span>
 2647                 <span class="var-name">$only_count_received_documents</span><span class="var-description">: OPTIONAL.                                             TRUE means that only documents the crawler received will be counted.                                             FALSE means that ALL followed and requested pages/files will be counted, even if the content wasn't be received.</span>          </li>
 2648                 </ul>
 2649         
 2650             
 2651     </div>
 2652 <a name="methodsetPort" id="setPort"><!-- --></a>
 2653 <div class="oddrow">
 2654     
 2655     <div class="method-header">
 2656         <span class="method-title">setPort</span> (line <span class="line-number">1038</span>)
 2657     </div> 
 2658     
 2659     <!-- ========== Info from phpDoc block ========= -->
 2660 <p class="short-description">Sets the port to connect to for crawling the starting-url set in setUrl().</p>
 2661 <p class="description"><p>The default port is 80.</p><p>Note:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$cralwer</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetURL">setURL</a><span class="src-sym">(</span><span class="src-str">&quot;http://www.foo.com&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2662 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetPort">setPort</a><span class="src-sym">(</span><span class="src-num">443</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2663 </ol></div>  effects the same as</p><p><div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$cralwer</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetURL">setURL</a><span class="src-sym">(</span><span class="src-str">&quot;http://www.foo.com:443&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2664 </ol></div></p></p>
 2665     <ul class="tags">
 2666                 <li><span class="field">section:</span> 1 Basic settings</li>
 2667                 <li><span class="field">access:</span> public</li>
 2668             </ul>
 2669     
 2670     <div class="method-signature">
 2671         <span class="method-result">bool</span>
 2672         <span class="method-name">
 2673             setPort
 2674         </span>
 2675                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$port</span>)
 2676             </div>
 2677     
 2678             <ul class="parameters">
 2679                     <li>
 2680                 <span class="var-type">int</span>
 2681                 <span class="var-name">$port</span><span class="var-description">: The port</span>          </li>
 2682                 </ul>
 2683         
 2684             
 2685     </div>
 2686 <a name="methodsetProxy" id="setProxy"><!-- --></a>
 2687 <div class="evenrow">
 2688     
 2689     <div class="method-header">
 2690         <span class="method-title">setProxy</span> (line <span class="line-number">1624</span>)
 2691     </div> 
 2692     
 2693     <!-- ========== Info from phpDoc block ========= -->
 2694 <p class="short-description">Assigns a proxy-server the crawler should use for all HTTP-Requests.</p>
 2695     <ul class="tags">
 2696                 <li><span class="field">section:</span> 10 Other settings</li>
 2697                 <li><span class="field">access:</span> public</li>
 2698             </ul>
 2699     
 2700     <div class="method-signature">
 2701         <span class="method-result">void</span>
 2702         <span class="method-name">
 2703             setProxy
 2704         </span>
 2705                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$proxy_host</span>, <span class="var-type">int</span>&nbsp;<span class="var-name">$proxy_port</span>, [<span class="var-type">string</span>&nbsp;<span class="var-name">$proxy_username</span> = <span class="var-default">null</span>], [<span class="var-type">string</span>&nbsp;<span class="var-name">$proxy_password</span> = <span class="var-default">null</span>])
 2706             </div>
 2707     
 2708             <ul class="parameters">
 2709                     <li>
 2710                 <span class="var-type">string</span>
 2711                 <span class="var-name">$proxy_host</span><span class="var-description">: Hostname or IP of the proxy-server</span>          </li>
 2712                     <li>
 2713                 <span class="var-type">int</span>
 2714                 <span class="var-name">$proxy_port</span><span class="var-description">: Port of the proxy-server</span>            </li>
 2715                     <li>
 2716                 <span class="var-type">string</span>
 2717                 <span class="var-name">$proxy_username</span><span class="var-description">: Optional. The username for proxy-authentication or NULL if no authentication is required.</span>           </li>
 2718                     <li>
 2719                 <span class="var-type">string</span>
 2720                 <span class="var-name">$proxy_password</span><span class="var-description">: Optional. The password for proxy-authentication or NULL if no authentication is required.</span>           </li>
 2721                 </ul>
 2722         
 2723             
 2724     </div>
 2725 <a name="methodsetStreamTimeout" id="setStreamTimeout"><!-- --></a>
 2726 <div class="oddrow">
 2727     
 2728     <div class="method-header">
 2729         <span class="method-title">setStreamTimeout</span> (line <span class="line-number">1664</span>)
 2730     </div> 
 2731     
 2732     <!-- ========== Info from phpDoc block ========= -->
 2733 <p class="short-description">Sets the timeout in seconds for waiting for data on an established server-connection.</p>
 2734 <p class="description"><p>If the connection to a server was be etablished but the server doesnt't send data anymore without  closing the connection, the crawler will wait the time given in timeout and then close the connection.</p></p>
 2735     <ul class="tags">
 2736                 <li><span class="field">section:</span> 10 Other settings</li>
 2737                 <li><span class="field">access:</span> public</li>
 2738             </ul>
 2739     
 2740     <div class="method-signature">
 2741         <span class="method-result">bool</span>
 2742         <span class="method-name">
 2743             setStreamTimeout
 2744         </span>
 2745                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$timeout</span>)
 2746             </div>
 2747     
 2748             <ul class="parameters">
 2749                     <li>
 2750                 <span class="var-type">int</span>
 2751                 <span class="var-name">$timeout</span><span class="var-description">: The timeout in seconds, the default-value is 2 seconds.</span>            </li>
 2752                 </ul>
 2753         
 2754             
 2755     </div>
 2756 <a name="methodsetTmpFile" id="setTmpFile"><!-- --></a>
 2757 <div class="evenrow">
 2758     
 2759     <div class="method-header">
 2760         <span class="method-title">setTmpFile</span> (line <span class="line-number">1313</span>)
 2761     </div> 
 2762     
 2763     <!-- ========== Info from phpDoc block ========= -->
 2764 <p class="short-description">Has no function anymore.</p>
 2765 <p class="description"><p>Please use setWorkingDirectory()</p></p>
 2766     <ul class="tags">
 2767                 <li><span class="field">deprecated:</span> This method has no function anymore since v 0.8.</li>
 2768                 <li><span class="field">section:</span> 11 Deprecated</li>
 2769                 <li><span class="field">access:</span> public</li>
 2770             </ul>
 2771     
 2772     <div class="method-signature">
 2773         <span class="method-result">void</span>
 2774         <span class="method-name">
 2775             setTmpFile
 2776         </span>
 2777                     (<span class="var-type"></span>&nbsp;<span class="var-name">$tmp_file</span>)
 2778             </div>
 2779     
 2780             <ul class="parameters">
 2781                     <li>
 2782                 <span class="var-type"></span>
 2783                 <span class="var-name">$tmp_file</span>         </li>
 2784                 </ul>
 2785         
 2786             
 2787     </div>
 2788 <a name="methodsetTrafficLimit" id="setTrafficLimit"><!-- --></a>
 2789 <div class="oddrow">
 2790     
 2791     <div class="method-header">
 2792         <span class="method-title">setTrafficLimit</span> (line <span class="line-number">1419</span>)
 2793     </div> 
 2794     
 2795     <!-- ========== Info from phpDoc block ========= -->
 2796 <p class="short-description">Sets a limit to the number of bytes the crawler should receive alltogether during crawling-process.</p>
 2797 <p class="description"><p>If the limit is reached, the crawler stops the crawling-process.  The default-value is 0 (no limit).</p></p>
 2798     <ul class="tags">
 2799                 <li><span class="field">section:</span> 5 Limit-settings</li>
 2800                 <li><span class="field">access:</span> public</li>
 2801             </ul>
 2802     
 2803     <div class="method-signature">
 2804         <span class="method-result">bool</span>
 2805         <span class="method-name">
 2806             setTrafficLimit
 2807         </span>
 2808                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$bytes</span>, [<span class="var-type">bool</span>&nbsp;<span class="var-name">$complete_requested_files</span> = <span class="var-default">true</span>])
 2809             </div>
 2810     
 2811             <ul class="parameters">
 2812                     <li>
 2813                 <span class="var-type">int</span>
 2814                 <span class="var-name">$bytes</span><span class="var-description">: Maximum number of bytes</span>          </li>
 2815                     <li>
 2816                 <span class="var-type">bool</span>
 2817                 <span class="var-name">$complete_requested_files</span><span class="var-description">: This parameter has no function anymore!</span>           </li>
 2818                 </ul>
 2819         
 2820             
 2821     </div>
 2822 <a name="methodsetURL" id="setURL"><!-- --></a>
 2823 <div class="evenrow">
 2824     
 2825     <div class="method-header">
 2826         <span class="method-title">setURL</span> (line <span class="line-number">1006</span>)
 2827     </div> 
 2828     
 2829     <!-- ========== Info from phpDoc block ========= -->
 2830 <p class="short-description">Sets the URL of the first page the crawler should crawl (root-page).</p>
 2831 <p class="description"><p>The given url may contain the protocol (http://www.foo.com or https://www.foo.com), the port (http://www.foo.com:4500/index.php)  and/or basic-authentication-data (http://loginname:passwd@www.foo.com)</p><p>This url has to be set before calling the <a href="../phpcrawl/PHPCrawler.html#methodgo">go()</a>-method (of course)!  If this root-page doesn't contain any further links, the crawling-process will stop immediately.</p></p>
 2832     <ul class="tags">
 2833                 <li><span class="field">section:</span> 1 Basic settings</li>
 2834                 <li><span class="field">access:</span> public</li>
 2835             </ul>
 2836     
 2837     <div class="method-signature">
 2838         <span class="method-result">bool</span>
 2839         <span class="method-name">
 2840             setURL
 2841         </span>
 2842                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$url</span>)
 2843             </div>
 2844     
 2845             <ul class="parameters">
 2846                     <li>
 2847                 <span class="var-type">string</span>
 2848                 <span class="var-name">$url</span><span class="var-description">: The URL</span>            </li>
 2849                 </ul>
 2850         
 2851             
 2852     </div>
 2853 <a name="methodsetUrlCacheType" id="setUrlCacheType"><!-- --></a>
 2854 <div class="oddrow">
 2855     
 2856     <div class="method-header">
 2857         <span class="method-title">setUrlCacheType</span> (line <span class="line-number">1736</span>)
 2858     </div> 
 2859     
 2860     <!-- ========== Info from phpDoc block ========= -->
 2861 <p class="short-description">Defines what type of cache will be internally used for caching URLs.</p>
 2862 <p class="description"><p>Currently phpcrawl is able to use a in-memory-cache or a SQlite-database-cache for  caching/storing found URLs internally.</p><p>The memory-cache (<a href="../phpcrawl-enums/PHPCrawlerUrlCacheTypes.html">PHPCrawlerUrlCacheTypes</a>::URLCACHE_MEMORY) is recommended for spidering small to medium websites.  It provides better performance, but the php-memory-limit may be hit when too many URLs get added to the cache.  This is the default-setting.</p><p>The SQlite-cache (<a href="../phpcrawl-enums/PHPCrawlerUrlCacheTypes.html">PHPCrawlerUrlCacheTypes</a>::URLCACHE_SQLite) is recommended for spidering huge websites.  URLs get cached in a SQLite-database-file, so the cache only is limited by available harddisk-space.  To increase performance of the SQLite-cache you may set it's location to a shared-memory device like &quot;/dev/shm/&quot;  by using the <a href="../phpcrawl/PHPCrawler.html#methodsetWorkingDirectory">setWorkingDirectory()</a>-method.</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetUrlCacheType">setUrlCacheType</a><span class="src-sym">(</span><span class="src-id"><a href="../phpcrawl-enums/PHPCrawlerUrlCacheTypes.html">PHPCrawlerUrlCacheTypes</a></span><span class="src-sym">::</span><span class="src-id">URLCACHE_SQLITE</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2863 <li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetWorkingDirectory">setWorkingDirectory</a><span class="src-sym">(</span><span class="src-str">&quot;/dev/shm/&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2864 </ol></div></p><p><strong>NOTE:</strong> When using phpcrawl in multi-process-mode (<a href="../phpcrawl/PHPCrawler.html#methodgoMultiProcessed">goMultiProcessed()</a>), the cache-type is automatically set  to PHPCrawlerUrlCacheTypes::URLCACHE_SQLITE.</p></p>
 2865     <ul class="tags">
 2866                 <li><span class="field">section:</span> 1 Basic settings</li>
 2867                 <li><span class="field">access:</span> public</li>
 2868             </ul>
 2869     
 2870     <div class="method-signature">
 2871         <span class="method-result">bool</span>
 2872         <span class="method-name">
 2873             setUrlCacheType
 2874         </span>
 2875                     (<span class="var-type">int</span>&nbsp;<span class="var-name">$url_cache_type</span>)
 2876             </div>
 2877     
 2878             <ul class="parameters">
 2879                     <li>
 2880                 <span class="var-type">int</span>
 2881                 <span class="var-name">$url_cache_type</span><span class="var-description">: <p>1 -&gt; in-memory-cache (default setting)                             2 -&gt; SQlite-database-cache</p><p>Or one of the <a href="../phpcrawl-enums/PHPCrawlerUrlCacheTypes.html">PHPCrawlerUrlCacheTypes</a>::URLCACHE..-constants.</p></span>          </li>
 2882                 </ul>
 2883         
 2884             
 2885     </div>
 2886 <a name="methodsetUserAgentString" id="setUserAgentString"><!-- --></a>
 2887 <div class="evenrow">
 2888     
 2889     <div class="method-header">
 2890         <span class="method-title">setUserAgentString</span> (line <span class="line-number">1560</span>)
 2891     </div> 
 2892     
 2893     <!-- ========== Info from phpDoc block ========= -->
 2894 <p class="short-description">Sets the &quot;User-Agent&quot; identification-string that will be send with HTTP-requests.</p>
 2895     <ul class="tags">
 2896                 <li><span class="field">section:</span> 10 Other settings</li>
 2897                 <li><span class="field">access:</span> public</li>
 2898             </ul>
 2899     
 2900     <div class="method-signature">
 2901         <span class="method-result">void</span>
 2902         <span class="method-name">
 2903             setUserAgentString
 2904         </span>
 2905                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$user_agent</span>)
 2906             </div>
 2907     
 2908             <ul class="parameters">
 2909                     <li>
 2910                 <span class="var-type">string</span>
 2911                 <span class="var-name">$user_agent</span><span class="var-description">: The user-agent-string. The default-value is &quot;PHPCrawl&quot;.</span>           </li>
 2912                 </ul>
 2913         
 2914             
 2915     </div>
 2916 <a name="methodsetWorkingDirectory" id="setWorkingDirectory"><!-- --></a>
 2917 <div class="oddrow">
 2918     
 2919     <div class="method-header">
 2920         <span class="method-title">setWorkingDirectory</span> (line <span class="line-number">1604</span>)
 2921     </div> 
 2922     
 2923     <!-- ========== Info from phpDoc block ========= -->
 2924 <p class="short-description">Sets the working-directory the crawler should use for storing temporary data.</p>
 2925 <p class="description"><p>Every instance of the crawler needs and creates a temporary directory for storing some  internal data.</p><p>This setting defines which base-directory the crawler will use to store the temporary  directories in. By default, the crawler uses the systems temp-directory as working-directory.  (i.e. &quot;/tmp/&quot; on linux-systems)</p><p>All temporary directories created in the working-directory will be deleted automatically  after a crawling-process has finished.</p><p>NOTE: To speed up the performance of a crawling-process (especially when using the  SQLite-urlcache), try to set a mounted shared-memory device as working-direcotry  (i.e. &quot;/dev/shm/&quot; on Debian/Ubuntu-systems).</p><p>Example:  <div class="src-code"><ol><li><div class="src-line">&nbsp;<span class="src-var">$crawler</span><span class="src-sym">-&gt;</span><a href="../phpcrawl/PHPCrawler.html#methodsetWorkingDirectory">setWorkingDirectory</a><span class="src-sym">(</span><span class="src-str">&quot;/tmp/&quot;</span><span class="src-sym">)</span><span class="src-sym">;</span></div></li>
 2926 </ol></div></p></p>
 2927     <ul class="tags">
 2928                 <li><span class="field">return:</span> TRUE on success, otherwise false.</li>
 2929                 <li><span class="field">section:</span> 1 Basic settings</li>
 2930                 <li><span class="field">access:</span> public</li>
 2931             </ul>
 2932     
 2933     <div class="method-signature">
 2934         <span class="method-result">bool</span>
 2935         <span class="method-name">
 2936             setWorkingDirectory
 2937         </span>
 2938                     (<span class="var-type">string</span>&nbsp;<span class="var-name">$directory</span>)
 2939             </div>
 2940     
 2941             <ul class="parameters">
 2942                     <li>
 2943                 <span class="var-type">string</span>
 2944                 <span class="var-name">$directory</span><span class="var-description">: The working-directory</span>            </li>
 2945                 </ul>
 2946         
 2947             
 2948     </div>
 2949 <a name="methodstarControllerProcessLoop" id="starControllerProcessLoop"><!-- --></a>
 2950 <div class="evenrow">
 2951     
 2952     <div class="method-header">
 2953         <span class="method-title">starControllerProcessLoop</span> (line <span class="line-number">475</span>)
 2954     </div> 
 2955     
 2956     <!-- ========== Info from phpDoc block ========= -->
 2957 <p class="short-description">Starts the loop of the controller-process (main-process).</p>
 2958     <ul class="tags">
 2959                 <li><span class="field">access:</span> protected</li>
 2960             </ul>
 2961     
 2962     <div class="method-signature">
 2963         <span class="method-result">void</span>
 2964         <span class="method-name">
 2965             starControllerProcessLoop
 2966         </span>
 2967                 ()
 2968             </div>
 2969     
 2970         
 2971             
 2972     </div>
 2973 <a name="methodstartChildProcessLoop" id="startChildProcessLoop"><!-- --></a>
 2974 <div class="oddrow">
 2975     
 2976     <div class="method-header">
 2977         <span class="method-title">startChildProcessLoop</span> (line <span class="line-number">530</span>)
 2978     </div> 
 2979     
 2980     <!-- ========== Info from phpDoc block ========= -->
 2981 <p class="short-description">Starts the loop of a child-process.</p>
 2982     <ul class="tags">
 2983                 <li><span class="field">access:</span> protected</li>
 2984             </ul>
 2985     
 2986     <div class="method-signature">
 2987         <span class="method-result">void</span>
 2988         <span class="method-name">
 2989             startChildProcessLoop
 2990         </span>
 2991                 ()
 2992             </div>
 2993     
 2994         
 2995             
 2996     </div>
 2997                         
 2998         </div>
 2999     </div>
 3000 
 3001 
 3002     <p class="notes" id="credit">
 3003         Documentation generated on Sun, 20 Jan 2013 21:18:49 +0200 by <a href="http://www.phpdoc.org" target="_blank">phpDocumentor 1.4.4</a>
 3004     </p>
 3005     </div></body>
 3006 </html>