"Fossies" - the Fresh Open Source Software Archive

Member "eric6-20.8/eric/eric6/WebBrowser/AdBlock/AdBlockMatcher.py" (1 Jan 2020, 7741 Bytes) of package /linux/misc/eric6-20.8.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "AdBlockMatcher.py" see the Fossies "Dox" file reference documentation.

    1 # -*- coding: utf-8 -*-
    2 
    3 # Copyright (c) 2017 - 2020 Detlev Offenbach <detlev@die-offenbachs.de>
    4 #
    5 
    6 """
    7 Module implementing the AdBlock matcher.
    8 """
    9 
   10 
   11 from PyQt5.QtCore import QObject
   12 
   13 from .AdBlockSearchTree import AdBlockSearchTree
   14 from .AdBlockRule import AdBlockRule, AdBlockRuleOption
   15 
   16 
   17 class AdBlockMatcher(QObject):
   18     """
   19     Class implementing the AdBlock matcher.
   20     """
   21     def __init__(self, manager):
   22         """
   23         Constructor
   24         
   25         @param manager reference to the AdBlock manager object
   26         @type AdBlockManager
   27         """
   28         super(AdBlockMatcher, self).__init__(manager)
   29         
   30         self.__manager = manager
   31         
   32         self.__createdRules = []
   33         self.__networkExceptionRules = []
   34         self.__networkBlockRules = []
   35         self.__domainRestrictedCssRules = []
   36         self.__documentRules = []
   37         self.__elemhideRules = []
   38         
   39         self.__elementHidingRules = ""
   40         self.__networkBlockTree = AdBlockSearchTree()
   41         self.__networkExceptionTree = AdBlockSearchTree()
   42     
   43     def match(self, request, urlDomain, urlString):
   44         """
   45         Public method to match a request.
   46         
   47         @param request URL request to be matched
   48         @type QWebEngineUrlRequestInfo
   49         @param urlDomain domain of the URL
   50         @type str
   51         @param urlString requested URL as a lowercase string
   52         @type str
   53         @return reference to the matched rule
   54         @rtype AdBlockRule
   55         """
   56         # exception rules
   57         if self.__networkExceptionTree.find(request, urlDomain, urlString):
   58             return None
   59         
   60         for rule in self.__networkExceptionRules:
   61             if rule.networkMatch(request, urlDomain, urlString):
   62                 return None
   63         
   64         # block rules
   65         rule = self.__networkBlockTree.find(request, urlDomain, urlString)
   66         if rule:
   67             return rule
   68         
   69         for rule in self.__networkBlockRules:
   70             if rule.networkMatch(request, urlDomain, urlString):
   71                 return rule
   72         
   73         return None
   74     
   75     def adBlockDisabledForUrl(self, url):
   76         """
   77         Public method to check, if AdBlock is disabled for the given URL.
   78         
   79         @param url URL to check
   80         @type QUrl
   81         @return flag indicating disabled state
   82         @rtype bool
   83         """
   84         for rule in self.__documentRules:
   85             if rule.urlMatch(url):
   86                 return True
   87         
   88         return False
   89     
   90     def elemHideDisabledForUrl(self, url):
   91         """
   92         Public method to check, if element hiding is disabled for the given
   93         URL.
   94         
   95         @param url URL to check
   96         @type QUrl
   97         @return flag indicating disabled state
   98         @rtype bool
   99         """
  100         if self.adBlockDisabledForUrl(url):
  101             return True
  102         
  103         for rule in self.__elemhideRules:
  104             if rule.urlMatch(url):
  105                 return True
  106         
  107         return False
  108     
  109     def elementHidingRules(self):
  110         """
  111         Public method to get the element hiding rules.
  112         
  113         @return element hiding rules
  114         @rtype str
  115         """
  116         return self.__elementHidingRules
  117     
  118     def elementHidingRulesForDomain(self, domain):
  119         """
  120         Public method to get the element hiding rules for the given domain.
  121         
  122         @param domain domain name
  123         @type str
  124         @return element hiding rules
  125         @rtype str
  126         """
  127         rules = ""
  128         addedRulesCount = 0
  129         
  130         for rule in self.__domainRestrictedCssRules:
  131             if not rule.matchDomain(domain):
  132                 continue
  133             
  134             if addedRulesCount == 1000:
  135                 rules += rule.cssSelector()
  136                 rules += "{display:none !important;}\n"
  137                 addedRulesCount = 0
  138             else:
  139                 rules += rule.cssSelector() + ","
  140                 addedRulesCount += 1
  141         
  142         if addedRulesCount != 0:
  143             rules = rules[:-1]
  144             rules += "{display:none !important;}\n"
  145         
  146         return rules
  147     
  148     def update(self):
  149         """
  150         Public slot to update the internal state.
  151         """
  152         self.clear()
  153         
  154         cssRulesDict = {}
  155         exceptionCssRules = []
  156         
  157         for subscription in self.__manager.subscriptions():
  158             if subscription.isEnabled():
  159                 for rule in subscription.allRules():
  160                     # Don't add internally disabled rules to the cache
  161                     if rule.isInternalDisabled():
  162                         continue
  163                     
  164                     if rule.isCSSRule():
  165                         # Only enabled CSS rules are added to the cache because
  166                         # there is no enabled/disabled check on match. They are
  167                         # directly embedded to pages.
  168                         if not rule.isEnabled():
  169                             continue
  170                         
  171                         if rule.isException():
  172                             exceptionCssRules.append(rule)
  173                         else:
  174                             cssRulesDict[rule.cssSelector()] = rule
  175                     elif rule.isDocument():
  176                         self.__documentRules.append(rule)
  177                     elif rule.isElementHiding():
  178                         self.__elemhideRules.append(rule)
  179                     elif rule.isException():
  180                         if not self.__networkExceptionTree.add(rule):
  181                             self.__networkBlockRules.append(rule)
  182                     else:
  183                         if not self.__networkBlockTree.add(rule):
  184                             self.__networkBlockRules.append(rule)
  185         
  186         for rule in exceptionCssRules:
  187             try:
  188                 originalRule = cssRulesDict[rule.cssSelector()]
  189             except KeyError:
  190                 # If there is no such selector, the exception does nothing.
  191                 continue
  192             
  193             copiedRule = AdBlockRule()
  194             copiedRule.copyFrom(originalRule)
  195             copiedRule.setOption(AdBlockRuleOption.DomainRestrictedOption)
  196             copiedRule.addBlockedDomains(rule.allowedDomains())
  197             
  198             cssRulesDict[rule.cssSelector()] = copiedRule
  199             self.__createdRules.append(copiedRule)
  200         
  201         # Excessive amount of selectors for one CSS rule is not what the
  202         # rendering engine likes. So split them up by 1.000 selectors.
  203         hidingRulesCount = 0
  204         for key in cssRulesDict:
  205             rule = cssRulesDict[key]
  206             
  207             if rule.isDomainRestricted():
  208                 self.__domainRestrictedCssRules.append(rule)
  209             elif hidingRulesCount == 1000:
  210                 self.__elementHidingRules += rule.cssSelector()
  211                 self.__elementHidingRules += "{display:none !important;} "
  212                 hidingRulesCount = 0
  213             else:
  214                 self.__elementHidingRules += rule.cssSelector() + ","
  215                 hidingRulesCount += 1
  216         
  217         if hidingRulesCount != 0:
  218             self.__elementHidingRules = self.__elementHidingRules[:-1]
  219             self.__elementHidingRules += "{display:none !important;} "
  220     
  221     def clear(self):
  222         """
  223         Public slot to clear the internal structures.
  224         """
  225         self.__createdRules = []
  226         self.__networkExceptionRules = []
  227         self.__networkBlockRules = []
  228         self.__domainRestrictedCssRules = []
  229         self.__documentRules = []
  230         self.__elemhideRules = []
  231         
  232         self.__elementHidingRules = ""
  233         self.__networkBlockTree.clear()
  234         self.__networkExceptionTree.clear()