"Fossies" - the Fresh Open Source Software Archive

Member "papayacms-core-6.12.5/src/system/Papaya/Utility/Server/Agent.php" (7 Jul 2020, 8352 Bytes) of package /linux/www/papayacms-core-6.12.5.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) PHP source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "Agent.php" see the Fossies "Dox" file reference documentation.

    1 <?php
    2 /**
    3  * papaya CMS
    4  *
    5  * @copyright 2000-2018 by papayaCMS project - All rights reserved.
    6  * @link http://www.papaya-cms.com/
    7  * @license http://www.gnu.org/licenses/old-licenses/gpl-2.0.html GNU General Public License, version 2
    8  *
    9  *  You can redistribute and/or modify this script under the terms of the GNU General Public
   10  *  License (GPL) version 2, provided that the copyright and license notes, including these
   11  *  lines, remain unmodified. papaya is distributed in the hope that it will be useful, but
   12  *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   13  *  FOR A PARTICULAR PURPOSE.
   14  */
   15 namespace Papaya\Utility\Server;
   16 
   17 /**
   18  * Static utility Class to identify robots by useragent strings
   19  *
   20  * @package Papaya-Library
   21  * @subpackage Util
   22  */
   23 class Agent {
   24   /**
   25    * If it contains one of these substrings it is not an robot
   26    *
   27    * @var array
   28    */
   29   private static $_agents = [
   30     'Lynx', 'FirePHP'
   31   ];
   32 
   33   /**
   34    * If it contains one of these substrings it is an robot
   35    *
   36    * @var array
   37    */
   38   private static $_robots = [
   39     ':robot', 'AOLpress', 'ASPSeek', 'ASPseek', 'Anonymouse.org', 'Ask Jeeves',
   40     'AvantGo', 'BSDSeek', 'BilgiBot', 'Bimbot', 'BladeRunner', 'Blaiz-Bee',
   41     'BlitzBOT', 'BlogBot', 'Bloglines', 'Bookmark-Manager', 'Bot42', 'CCC-178_8',
   42     'CFNetwork', 'COAST WebMaster', 'COAST scan engine', 'Charybdis', 'Checkbot',
   43     'Chilkat', 'CoMaSYSTEM', 'ColdFusion', 'Combine', 'Crawl', 'Cynthia',
   44     'DISCo Pump', 'DLFileWI', 'DLMAN', 'DataparkSearch', 'DeleGate',
   45     'Desktop Sidebar', 'DiaGem', 'DoCoMo', 'Drupal', 'EasyDL', 'EmailSiphon',
   46     'EuripBot', 'Exabot', 'FDM', 'FOTOCHECKER', 'FairAd', 'FavIconizer', 'FavOrg',
   47     'FeedValidator', 'FindoFix', 'Firefly', 'Francis', 'GOFORITBOT', 'GetRight',
   48     'Getweb', 'Gigabot', 'GobbleGobble', 'Goldfire', 'Google', 'Gozilla', 'Gulliver',
   49     'GurujiBot', 'GurujiBot', 'HBZ-Digbib', 'HBZ-Digibib', 'HSE', 'HTMLParser',
   50     'HTTrack', 'HuckleberryBot', 'ISC Systems', 'Ideare', 'IlTrovatore', 'IlseBot',
   51     'ImagesHere', 'Indexer', 'Indy Library', 'Ineta', 'Infoseek', 'InsurancoBot',
   52     'InternetLinkAgent', 'ItsyBitsy', 'JSpindel', 'Jakarta Commons', 'Java',
   53     'Jigsaw', 'KRetrieve', 'Kopernikus', 'LBot', 'LeechGet', 'LiSEn', 'Link Checker',
   54     'Link Sleuth', 'LinkAlarm', 'LinkControl', 'LinkLint', 'LinkMan', 'LinkWalker',
   55     'Live.Com', 'Lorkyll', 'MEGAUPLOAD', 'MELBOT', 'MFC_Tear_Sample', 'MJ12bot',
   56     'MMHttp', 'MS Search', 'MSRBOT', 'MVAClient', 'MaSagool', 'Maoch', 'Mercator',
   57     'MetaGer_PreChecker', 'MetagerBot', 'Microsoft Data Access',
   58     'Microsoft Office Protocol Discovery', 'Microsoft URL Control', 'Missigua',
   59     'MnoGoSearch', 'Mo College', 'Mozzarella', 'My-Bot', 'MyEngines-Bot', 'MySource',
   60     'NECBot', 'NG-Search', 'NG/1.0', 'NG/2.0', 'NPBot', 'NaverBot', 'NetObjects',
   61     'NetResearchServer', 'NetSprint', 'Netcraft', 'Netluchs', 'Netprospector',
   62     'NextGenSearchBot', 'Norton', 'NuSpidr', 'Nutch', 'OPen sourfce retriver',
   63     'OctBot', 'Octora', 'Offline Explorer', 'OmniExplorer_Bot', 'Openfind', 'Optimizer',
   64     'Oracle Ultra Search', 'Oracle Ultra Search', 'PEAR', 'PHOTO CHECK', 'PHP',
   65     'POE-Component-Client', 'Pagebull', 'Perl', 'PictureOfInternet',
   66     'Pluck Soap Client', 'Plucker', 'Plumtree', 'Poirot', 'Pompos', 'PostFavorites',
   67     'Powermarks', 'PuxaRapido', 'PycURL', 'Python-urllib', 'QMina', 'RAMPyBot',
   68     'RPT-HTTPClient', 'RSSOwl', 'RealDownload', 'Renderer', 'Robot', 'Robozilla',
   69     'SAcc', 'SEOsearch', 'SMBot', 'STEROID', 'ScanWebBot', 'Scivias', 'Scooter',
   70     'ScoutAbout', 'Search', 'SearchTone', 'Searcher', 'Seekbot', 'Semager', 'shelob',
   71     'Shrook', 'SignSite', 'SiteBar', 'SiteSucker', 'SiteXpert', 'Skywalker', 'Slurp',
   72     'SlySearch', 'SmartDownload', 'Snapbot', 'Snappy', 'Snoopy', 'Spider', 'Spinne',
   73     'SquidClamAV_Redirector', 'Star Downloader', 'Steeler', 'Steganos', 'SumeetBot',
   74     'SuperBot', 'SurferX', 'SurveyBot', 'SygolBot', 'SynooBot', 'Szook', 'T-Online',
   75     'TCF', 'Tagyu', 'Tcl http client', 'TeamSoft', 'Teleport', 'Teradex Mapper',
   76     'Tkensaku', 'Touche', 'Twiceler', '\URI::Fetch', 'URL Validator', 'URLBase',
   77     'Ultraseek', 'UniversalFeedParser', 'Vagabondo', 'Validome XML-Validator',
   78     'Vayala', 'Verity-URL-Gateway', 'Viking', 'W3C_Validator', 'WWW-Mechanize',
   79     'WWWC', 'WWWOFFLE', 'Wapsilon', 'Watchfire WebXM', 'Web Downloader',
   80     'WebCapture', 'WebCopier', 'WebCorp', 'WebDAV', 'WebDownloader', 'WebImages',
   81     'WebReaper', 'WebRepository', 'WebSnatcher', 'WebStripper', 'WebTrends',
   82     'WebWasher', 'WebarooBot', 'Webshuttle', 'Webster Pro', 'Webverzeichnis.de',
   83     'West Wind Internet Protocols', 'Wget', 'WhizBang', 'Whizbang',
   84     'Wildsoft Surfer', 'WinSysClean', 'WinSysClean', 'WordPress', 'XenTarY',
   85     'Xenu Link Sleuth', 'XoviBot', 'Yahoo Pipes', 'Yahoo! Mindset', 'YooW!', 'ZyBorg',
   86     'agadine', 'aipbot', 'appie', 'asterias', 'bigfoot.com', 'blogchecker',
   87     'blogsear.ch', 'bot', 'bumblebee@relevare.com', 'cHAINsAW massacre', 'cfetch',
   88     'cometrics-bot', 'cosmos', 'crawl', 'csci', 'curiosity', 'curl', 'db/0.2; spc',
   89     'eCatch', 'eagle', 'ejupiter', 'eltopi', 'facebookexternalhit', 'findlinks', 'flunky',
   90     'fmII URL validator', 'gazz', 'genieBot', 'gnome-vfs', 'gonzo',
   91     'headbangers.info', 'ht://check', 'htdig', 'http://putf.info/', 'httpclient',
   92     'httpunit', 'iOpus', 'iSiloX', 'ia_archiver', 'icerocket', 'kykapeky',
   93     'lanshanbot', 'larbin', 'libcurl', 'libwww', 'linkchecker.sourceforge',
   94     'lithopssoft.com', 'lwp', 'medical-info.de', 'miniRank', 'mnogo',
   95     'mnogosearch-dimensional', 'moget', 'msnbot', 'mylinkcheck', 'mysmutsearch',
   96     'nagios-plugins',
   97     'nestListener', 'nestReader', 'netforex.org', 'noyona', 'oegp', 'ozelot',
   98     'page_verifier', 'panscient.com', 'papaya-Benchmarking-Tool', 'penthesila',
   99     'penthesilea', 'petitsage.fr', 'playstarmusic.com', 'pmafind',
  100     'pressemitteilung.ws', 'psbot', 'puf', 'redax', 'reifier', 'riba-it.de',
  101     'seo.ag', 'sitecheck.internetseer.com', 'spider', 'suchbaer.de', 'sun4u',
  102     'szukaj', 'teoma', 'thumbshots.de', 'topicblogs', 'troovziBot',
  103     'vias.ncsa.uiuc.edu', 'voyager', 'w3development', 'w3mir', 'webbot',
  104     'webcollage', 'webmeasurement-bot', 'www.adressendeutschland.de',
  105     'www.anonymous.com', 'www.walhello.com', 'wwwster', 'yacy.net', 'yahoo.com',
  106     'zero-knowledge', 'flash mediaserver'
  107   ];
  108 
  109   /**
  110    * Cache for robot identifications (true/false)
  111    */
  112   private static $_cache = [];
  113 
  114   /**
  115    * Fetch the user agent from $_SERVER['HTTP_USER_AGENT'].
  116    *
  117    * @param string
  118    *
  119    * @return string
  120    */
  121   public static function get() {
  122     return !empty($_SERVER['HTTP_USER_AGENT'])
  123       ? $_SERVER['HTTP_USER_AGENT'] : '';
  124   }
  125 
  126   /**
  127    * Check if the given user agent string is an robot.
  128    *
  129    * If the user agent string is empty, $_SERVER['HTTP_USER_AGENT'] is used.
  130    *
  131    * The result is cached in the class, it is possible to call this method without to much overhead
  132    * for the checks.
  133    *
  134    * @param string $userAgent
  135    * @param bool $useCache
  136    * @param bool
  137    *
  138    * @return bool
  139    */
  140   public static function isRobot($userAgent = '', $useCache = TRUE) {
  141     if (empty($userAgent)) {
  142       $userAgent = self::get();
  143     }
  144     if (!empty($userAgent)) {
  145       if ($useCache && isset(self::$_cache[$userAgent])) {
  146         return self::$_cache[$userAgent];
  147       }
  148       return self::$_cache[$userAgent] = self::_checkAgentIsRobot($userAgent);
  149     }
  150     return FALSE;
  151   }
  152 
  153   /**
  154    * Check if the given user agent string is an robot.
  155    *
  156    * This method checks the user agent string agains the two internal lists of user agents and
  157    * robots.
  158    *
  159    * @param string $userAgent
  160    * @param bool
  161    *
  162    * @return bool
  163    */
  164   private static function _checkAgentIsRobot($userAgent) {
  165     if (self::_checkAgainstList($userAgent, self::$_agents)) {
  166       return FALSE;
  167     }
  168     return self::_checkAgainstList($userAgent, self::$_robots);
  169   }
  170 
  171   /**
  172    * Check if the user agent contains one of the identifier strings in the list.
  173    *
  174    * @param string $userAgent
  175    * @param array $list
  176    *
  177    * @return bool
  178    */
  179   private static function _checkAgainstList($userAgent, $list) {
  180     foreach ($list as $pattern) {
  181       if (FALSE !== \strpos($userAgent, $pattern)) {
  182         return TRUE;
  183       }
  184     }
  185     return FALSE;
  186   }
  187 }