"Fossies" - the Fresh Open Source Software Archive

Member "xhtml2pdf-0.2.5/xhtml2pdf/w3c/cssParser.py" (8 Oct 2020, 42918 Bytes) of package /linux/www/xhtml2pdf-0.2.5.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "cssParser.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.2.4_vs_0.2.5.

    1 #!/usr/bin/env python
    2 
    3 ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    4 ##~ Copyright (C) 2002-2004  TechGame Networks, LLC.
    5 ##~
    6 ##~ This library is free software; you can redistribute it and/or
    7 ##~ modify it under the terms of the BSD style License as found in the
    8 ##~ LICENSE file included with this distribution.
    9 ##
   10 ##  Modified by Dirk Holtwick <holtwick@web.de>, 2007-2008
   11 ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   12 from __future__ import absolute_import
   13 
   14 
   15 # Added by benjaoming to fix python3 tests
   16 from __future__ import unicode_literals
   17 
   18 import xhtml2pdf.default
   19 from xhtml2pdf.util import getSize
   20 from reportlab.lib.pagesizes import landscape
   21 
   22 try:
   23     from future_builtins import filter
   24 except ImportError:
   25     pass
   26 
   27 """CSS-2.1 parser.
   28 
   29 The CSS 2.1 Specification this parser was derived from can be found at http://www.w3.org/TR/CSS21/
   30 
   31 Primary Classes:
   32     * CSSParser
   33         Parses CSS source forms into results using a Builder Pattern.  Must
   34         provide concrete implemenation of CSSBuilderAbstract.
   35 
   36     * CSSBuilderAbstract
   37         Outlines the interface between CSSParser and it's rule-builder.
   38         Compose CSSParser with a concrete implementation of the builder to get
   39         usable results from the CSS parser.
   40 
   41 Dependencies:
   42     python 2.3 (or greater)
   43     re
   44 """
   45 
   46 import re
   47 import six
   48 
   49 from . import cssSpecial
   50 
   51 
   52 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   53 #~ Definitions
   54 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   55 
   56 def isAtRuleIdent(src, ident):
   57     return re.match(r'^@' + ident + r'\s*', src)
   58 
   59 
   60 def stripAtRuleIdent(src):
   61     return re.sub(r'^@[a-z\-]+\s*', '', src)
   62 
   63 
   64 class CSSSelectorAbstract(object):
   65     """Outlines the interface between CSSParser and it's rule-builder for selectors.
   66 
   67     CSSBuilderAbstract.selector and CSSBuilderAbstract.combineSelectors must
   68     return concrete implementations of this abstract.
   69 
   70     See css.CSSMutableSelector for an example implementation.
   71     """
   72 
   73 
   74     def addHashId(self, hashId):
   75         raise NotImplementedError('Subclass responsibility')
   76 
   77 
   78     def addClass(self, class_):
   79         raise NotImplementedError('Subclass responsibility')
   80 
   81 
   82     def addAttribute(self, attrName):
   83         raise NotImplementedError('Subclass responsibility')
   84 
   85 
   86     def addAttributeOperation(self, attrName, op, attrValue):
   87         raise NotImplementedError('Subclass responsibility')
   88 
   89 
   90     def addPseudo(self, name):
   91         raise NotImplementedError('Subclass responsibility')
   92 
   93 
   94     def addPseudoFunction(self, name, value):
   95         raise NotImplementedError('Subclass responsibility')
   96 
   97 
   98 class CSSBuilderAbstract(object):
   99     """Outlines the interface between CSSParser and it's rule-builder.  Compose
  100     CSSParser with a concrete implementation of the builder to get usable
  101     results from the CSS parser.
  102 
  103     See css.CSSBuilder for an example implementation
  104     """
  105 
  106 
  107     def setCharset(self, charset):
  108         raise NotImplementedError('Subclass responsibility')
  109 
  110 
  111     #~ css results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  112 
  113     def beginStylesheet(self):
  114         raise NotImplementedError('Subclass responsibility')
  115 
  116 
  117     def stylesheet(self, elements):
  118         raise NotImplementedError('Subclass responsibility')
  119 
  120 
  121     def endStylesheet(self):
  122         raise NotImplementedError('Subclass responsibility')
  123 
  124 
  125     def beginInline(self):
  126         raise NotImplementedError('Subclass responsibility')
  127 
  128 
  129     def inline(self, declarations):
  130         raise NotImplementedError('Subclass responsibility')
  131 
  132 
  133     def endInline(self):
  134         raise NotImplementedError('Subclass responsibility')
  135 
  136 
  137     def ruleset(self, selectors, declarations):
  138         raise NotImplementedError('Subclass responsibility')
  139 
  140 
  141     #~ css namespaces ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  142 
  143     def resolveNamespacePrefix(self, nsPrefix, name):
  144         raise NotImplementedError('Subclass responsibility')
  145 
  146 
  147     #~ css @ directives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  148 
  149     def atCharset(self, charset):
  150         raise NotImplementedError('Subclass responsibility')
  151 
  152 
  153     def atImport(self, import_, mediums, cssParser):
  154         raise NotImplementedError('Subclass responsibility')
  155 
  156 
  157     def atNamespace(self, nsPrefix, uri):
  158         raise NotImplementedError('Subclass responsibility')
  159 
  160 
  161     def atMedia(self, mediums, ruleset):
  162         raise NotImplementedError('Subclass responsibility')
  163 
  164 
  165     def atPage(self, page, pseudopage, declarations):
  166         raise NotImplementedError('Subclass responsibility')
  167 
  168 
  169     def atFontFace(self, declarations):
  170         raise NotImplementedError('Subclass responsibility')
  171 
  172 
  173     def atIdent(self, atIdent, cssParser, src):
  174         return src, NotImplemented
  175 
  176 
  177     #~ css selectors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  178 
  179     def combineSelectors(self, selectorA, combiner, selectorB):
  180         """Return value must implement CSSSelectorAbstract"""
  181         raise NotImplementedError('Subclass responsibility')
  182 
  183 
  184     def selector(self, name):
  185         """Return value must implement CSSSelectorAbstract"""
  186         raise NotImplementedError('Subclass responsibility')
  187 
  188 
  189     #~ css declarations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  190 
  191     def property(self, name, value, important=False):
  192         raise NotImplementedError('Subclass responsibility')
  193 
  194 
  195     def combineTerms(self, termA, combiner, termB):
  196         raise NotImplementedError('Subclass responsibility')
  197 
  198 
  199     def termIdent(self, value):
  200         raise NotImplementedError('Subclass responsibility')
  201 
  202 
  203     def termNumber(self, value, units=None):
  204         raise NotImplementedError('Subclass responsibility')
  205 
  206 
  207     def termRGB(self, value):
  208         raise NotImplementedError('Subclass responsibility')
  209 
  210 
  211     def termURI(self, value):
  212         raise NotImplementedError('Subclass responsibility')
  213 
  214 
  215     def termString(self, value):
  216         raise NotImplementedError('Subclass responsibility')
  217 
  218 
  219     def termUnicodeRange(self, value):
  220         raise NotImplementedError('Subclass responsibility')
  221 
  222 
  223     def termFunction(self, name, value):
  224         raise NotImplementedError('Subclass responsibility')
  225 
  226 
  227     def termUnknown(self, src):
  228         raise NotImplementedError('Subclass responsibility')
  229 
  230 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  231 #~ CSS Parser
  232 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  233 
  234 class CSSParseError(Exception):
  235     src = None
  236     ctxsrc = None
  237     fullsrc = None
  238     inline = False
  239     srcCtxIdx = None
  240     srcFullIdx = None
  241     ctxsrcFullIdx = None
  242 
  243 
  244     def __init__(self, msg, src, ctxsrc=None):
  245         Exception.__init__(self, msg)
  246         self.src = src
  247         self.ctxsrc = ctxsrc or src
  248         if self.ctxsrc:
  249             self.srcCtxIdx = self.ctxsrc.find(self.src)
  250             if self.srcCtxIdx < 0:
  251                 del self.srcCtxIdx
  252 
  253 
  254     def __str__(self):
  255         if self.ctxsrc:
  256             return Exception.__str__(self) + ':: (' + repr(self.ctxsrc[:self.srcCtxIdx]) + ', ' + repr(
  257                 self.ctxsrc[self.srcCtxIdx:self.srcCtxIdx + 20]) + ')'
  258         else:
  259             return Exception.__str__(self) + ':: ' + repr(self.src[:40])
  260 
  261 
  262     def setFullCSSSource(self, fullsrc, inline=False):
  263         self.fullsrc = fullsrc
  264         if type(self.fullsrc) == six.binary_type:
  265             self.fullsrc = six.text_type(self.fullsrc, 'utf-8')
  266         if inline:
  267             self.inline = inline
  268         if self.fullsrc:
  269             self.srcFullIdx = self.fullsrc.find(self.src)
  270             if self.srcFullIdx < 0:
  271                 del self.srcFullIdx
  272             self.ctxsrcFullIdx = self.fullsrc.find(self.ctxsrc)
  273             if self.ctxsrcFullIdx < 0:
  274                 del self.ctxsrcFullIdx
  275 
  276 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  277 
  278 class CSSParser(object):
  279     """CSS-2.1 parser dependent only upon the re module.
  280 
  281     Implemented directly from http://www.w3.org/TR/CSS21/grammar.html
  282     Tested with some existing CSS stylesheets for portability.
  283 
  284     CSS Parsing API:
  285         * setCSSBuilder()
  286             To set your concrete implementation of CSSBuilderAbstract
  287 
  288         * parseFile()
  289             Use to parse external stylesheets using a file-like object
  290 
  291             >>> cssFile = open('test.css', 'r')
  292             >>> stylesheets = myCSSParser.parseFile(cssFile)
  293 
  294         * parse()
  295             Use to parse embedded stylesheets using source string
  296 
  297             >>> cssSrc = '''
  298                 body,body.body {
  299                     font: 110%, "Times New Roman", Arial, Verdana, Helvetica, serif;
  300                     background: White;
  301                     color: Black;
  302                 }
  303                 a {text-decoration: underline;}
  304             '''
  305             >>> stylesheets = myCSSParser.parse(cssSrc)
  306 
  307         * parseInline()
  308             Use to parse inline stylesheets using attribute source string
  309 
  310             >>> style = 'font: 110%, "Times New Roman", Arial, Verdana, Helvetica, serif; background: White; color: Black'
  311             >>> stylesheets = myCSSParser.parseInline(style)
  312 
  313         * parseAttributes()
  314             Use to parse attribute string values into inline stylesheets
  315 
  316             >>> stylesheets = myCSSParser.parseAttributes(
  317                     font='110%, "Times New Roman", Arial, Verdana, Helvetica, serif',
  318                     background='White',
  319                     color='Black')
  320 
  321         * parseSingleAttr()
  322             Use to parse a single string value into a CSS expression
  323 
  324             >>> fontValue = myCSSParser.parseSingleAttr('110%, "Times New Roman", Arial, Verdana, Helvetica, serif')
  325     """
  326 
  327     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  328     #~ Constants / Variables / Etc.
  329     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  330 
  331     ParseError = CSSParseError
  332 
  333     AttributeOperators = ['=', '~=', '|=', '&=', '^=', '!=', '<>']
  334     SelectorQualifiers = ('#', '.', '[', ':')
  335     SelectorCombiners = ['+', '>']
  336     ExpressionOperators = ('/', '+', ',')
  337 
  338     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  339     #~ Regular expressions
  340     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  341 
  342     if True: # makes the following code foldable
  343         _orRule = lambda *args: '|'.join(args)
  344         _reflags = re.I | re.M | re.U
  345         i_hex = '[0-9a-fA-F]'
  346         i_nonascii = '[\200-\377]'
  347         i_unicode = '\\\\(?:%s){1,6}\s?' % i_hex
  348         i_escape = _orRule(i_unicode, '\\\\[ -~\200-\377]')
  349         # i_nmstart = _orRule('[A-Za-z_]', i_nonascii, i_escape)
  350         i_nmstart = _orRule('\-[^0-9]|[A-Za-z_]', i_nonascii,
  351                             i_escape) # XXX Added hyphen, http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier
  352         i_nmchar = _orRule('[-0-9A-Za-z_]', i_nonascii, i_escape)
  353         i_ident = '((?:%s)(?:%s)*)' % (i_nmstart, i_nmchar)
  354         re_ident = re.compile(i_ident, _reflags)
  355         # Caution: treats all characters above 0x7f as legal for an identifier.
  356         i_unicodeid = r'([^\u0000-\u007f]+)'
  357         re_unicodeid = re.compile(i_unicodeid, _reflags)
  358         i_unicodestr1 = r'(\'[^\u0000-\u007f]+\')'
  359         i_unicodestr2 = r'(\"[^\u0000-\u007f]+\")'
  360         i_unicodestr = _orRule(i_unicodestr1, i_unicodestr2)
  361         re_unicodestr = re.compile(i_unicodestr, _reflags)
  362         i_element_name = '((?:%s)|\*)' % (i_ident[1:-1],)
  363         re_element_name = re.compile(i_element_name, _reflags)
  364         i_namespace_selector = '((?:%s)|\*|)\|(?!=)' % (i_ident[1:-1],)
  365         re_namespace_selector = re.compile(i_namespace_selector, _reflags)
  366         i_class = '\\.' + i_ident
  367         re_class = re.compile(i_class, _reflags)
  368         i_hash = '#((?:%s)+)' % i_nmchar
  369         re_hash = re.compile(i_hash, _reflags)
  370         i_rgbcolor = '(#%s{8}|#%s{6}|#%s{3})' % (i_hex, i_hex, i_hex)
  371         re_rgbcolor = re.compile(i_rgbcolor, _reflags)
  372         i_nl = '\n|\r\n|\r|\f'
  373         i_escape_nl = '\\\\(?:%s)' % i_nl
  374         i_string_content = _orRule('[\t !#$%&(-~]', i_escape_nl, i_nonascii, i_escape)
  375         i_string1 = '\"((?:%s|\')*)\"' % i_string_content
  376         i_string2 = '\'((?:%s|\")*)\'' % i_string_content
  377         i_string = _orRule(i_string1, i_string2)
  378         re_string = re.compile(i_string, _reflags)
  379         i_uri = ('url\\(\s*(?:(?:%s)|((?:%s)+))\s*\\)'
  380                  % (i_string, _orRule('[!#$%&*-~]', i_nonascii, i_escape)))
  381         # XXX For now
  382         # i_uri = '(url\\(.*?\\))'
  383         re_uri = re.compile(i_uri, _reflags)
  384         i_num = '(([-+]?[0-9]+(?:\\.[0-9]+)?)|([-+]?\\.[0-9]+))' # XXX Added out paranthesis, because e.g. .5em was not parsed correctly
  385         re_num = re.compile(i_num, _reflags)
  386         i_unit = '(%%|%s)?' % i_ident
  387         re_unit = re.compile(i_unit, _reflags)
  388         i_function = i_ident + '\\('
  389         re_function = re.compile(i_function, _reflags)
  390         i_functionterm = '[-+]?' + i_function
  391         re_functionterm = re.compile(i_functionterm, _reflags)
  392         i_unicoderange1 = "(?:U\\+%s{1,6}-%s{1,6})" % (i_hex, i_hex)
  393         i_unicoderange2 = "(?:U\\+\?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h}))))))"
  394         i_unicoderange = i_unicoderange1 # '(%s|%s)' % (i_unicoderange1, i_unicoderange2)
  395         re_unicoderange = re.compile(i_unicoderange, _reflags)
  396 
  397         # i_comment = '(?:\/\*[^*]*\*+([^/*][^*]*\*+)*\/)|(?://.*)'
  398         # gabriel: only C convention for comments is allowed in CSS
  399         i_comment = '(?:\/\*[^*]*\*+([^/*][^*]*\*+)*\/)'
  400         re_comment = re.compile(i_comment, _reflags)
  401         i_important = '!\s*(important)'
  402         re_important = re.compile(i_important, _reflags)
  403         del _orRule
  404 
  405     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  406     #~ Public
  407     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  408 
  409     def __init__(self, cssBuilder=None):
  410         self.setCSSBuilder(cssBuilder)
  411 
  412 
  413     #~ CSS Builder to delegate to ~~~~~~~~~~~~~~~~~~~~~~~~
  414 
  415     def getCSSBuilder(self):
  416         """A concrete instance implementing CSSBuilderAbstract"""
  417         return self._cssBuilder
  418 
  419 
  420     def setCSSBuilder(self, cssBuilder):
  421         """A concrete instance implementing CSSBuilderAbstract"""
  422         self._cssBuilder = cssBuilder
  423 
  424 
  425     cssBuilder = property(getCSSBuilder, setCSSBuilder)
  426 
  427     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  428     #~ Public CSS Parsing API
  429     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  430 
  431     def parseFile(self, srcFile, closeFile=False):
  432         """Parses CSS file-like objects using the current cssBuilder.
  433         Use for external stylesheets."""
  434 
  435         try:
  436             result = self.parse(srcFile.read())
  437         finally:
  438             if closeFile:
  439                 srcFile.close()
  440         return result
  441 
  442 
  443     def parse(self, src):
  444         """Parses CSS string source using the current cssBuilder.
  445         Use for embedded stylesheets."""
  446 
  447         self.cssBuilder.beginStylesheet()
  448         try:
  449 
  450             # XXX Some simple preprocessing
  451             src = cssSpecial.cleanupCSS(src)
  452 
  453             try:
  454                 src, stylesheet = self._parseStylesheet(src)
  455             except self.ParseError as err:
  456                 err.setFullCSSSource(src)
  457                 raise
  458         finally:
  459             self.cssBuilder.endStylesheet()
  460         return stylesheet
  461 
  462 
  463     def parseInline(self, src):
  464         """Parses CSS inline source string using the current cssBuilder.
  465         Use to parse a tag's 'sytle'-like attribute."""
  466 
  467         self.cssBuilder.beginInline()
  468         try:
  469             try:
  470                 src, properties = self._parseDeclarationGroup(src.strip(), braces=False)
  471             except self.ParseError as err:
  472                 err.setFullCSSSource(src, inline=True)
  473                 raise
  474 
  475             result = self.cssBuilder.inline(properties)
  476         finally:
  477             self.cssBuilder.endInline()
  478         return result
  479 
  480     def parseAttributes(self, attributes=None, **kwAttributes):
  481         """Parses CSS attribute source strings, and return as an inline stylesheet.
  482         Use to parse a tag's highly CSS-based attributes like 'font'.
  483 
  484         See also: parseSingleAttr
  485         """
  486         attributes = attributes if attributes is not None else {}
  487         if attributes:
  488             kwAttributes.update(attributes)
  489 
  490         self.cssBuilder.beginInline()
  491         try:
  492             properties = []
  493             try:
  494                 for propertyName, src in six.iteritems(kwAttributes):
  495                     src, property = self._parseDeclarationProperty(src.strip(), propertyName)
  496                     properties.append(property)
  497 
  498             except self.ParseError as err:
  499                 err.setFullCSSSource(src, inline=True)
  500                 raise
  501 
  502             result = self.cssBuilder.inline(properties)
  503         finally:
  504             self.cssBuilder.endInline()
  505         return result
  506 
  507 
  508     def parseSingleAttr(self, attrValue):
  509         """Parse a single CSS attribute source string, and returns the built CSS expression.
  510         Use to parse a tag's highly CSS-based attributes like 'font'.
  511 
  512         See also: parseAttributes
  513         """
  514 
  515         results = self.parseAttributes(temp=attrValue)
  516         if 'temp' in results[1]:
  517             return results[1]['temp']
  518         else:
  519             return results[0]['temp']
  520 
  521 
  522     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  523     #~ Internal _parse methods
  524     #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  525 
  526     def _parseStylesheet(self, src):
  527         """stylesheet
  528         : [ CHARSET_SYM S* STRING S* ';' ]?
  529             [S|CDO|CDC]* [ import [S|CDO|CDC]* ]*
  530             [ [ ruleset | media | page | font_face ] [S|CDO|CDC]* ]*
  531         ;
  532         """
  533         # FIXME: BYTES to STR 
  534         if type(src) == six.binary_type:
  535             src=six.text_type(src)
  536         # Get rid of the comments
  537         src = self.re_comment.sub('', src)
  538 
  539         # [ CHARSET_SYM S* STRING S* ';' ]?
  540         src = self._parseAtCharset(src)
  541 
  542         # [S|CDO|CDC]*
  543         src = self._parseSCDOCDC(src)
  544         #  [ import [S|CDO|CDC]* ]*
  545         src, stylesheetImports = self._parseAtImports(src)
  546 
  547         # [ namespace [S|CDO|CDC]* ]*
  548         src = self._parseAtNamespace(src)
  549 
  550         stylesheetElements = []
  551 
  552         # [ [ ruleset | atkeywords ] [S|CDO|CDC]* ]*
  553         while src: # due to ending with ]*
  554             if src.startswith('@'):
  555                 # @media, @page, @font-face
  556                 src, atResults = self._parseAtKeyword(src)
  557                 if atResults is not None and atResults != NotImplemented:
  558                     stylesheetElements.extend(atResults)
  559             else:
  560                 # ruleset
  561                 src, ruleset = self._parseRuleset(src)
  562                 stylesheetElements.append(ruleset)
  563 
  564             # [S|CDO|CDC]*
  565             src = self._parseSCDOCDC(src)
  566 
  567         stylesheet = self.cssBuilder.stylesheet(stylesheetElements, stylesheetImports)
  568         return src, stylesheet
  569 
  570 
  571     def _parseSCDOCDC(self, src):
  572         """[S|CDO|CDC]*"""
  573         while 1:
  574             src = src.lstrip()
  575             if src.startswith('<!--'):
  576                 src = src[4:]
  577             elif src.startswith('-->'):
  578                 src = src[3:]
  579             else:
  580                 break
  581         return src
  582 
  583 
  584     #~ CSS @ directives ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  585 
  586     def _parseAtCharset(self, src):
  587         """[ CHARSET_SYM S* STRING S* ';' ]?"""
  588         if isAtRuleIdent(src, 'charset'):
  589             src = stripAtRuleIdent(src)
  590             charset, src = self._getString(src)
  591             src = src.lstrip()
  592             if src[:1] != ';':
  593                 raise self.ParseError('@charset expected a terminating \';\'', src, self.ctxsrc)
  594             src = src[1:].lstrip()
  595 
  596             self.cssBuilder.atCharset(charset)
  597         return src
  598 
  599 
  600     def _parseAtImports(self, src):
  601         """[ import [S|CDO|CDC]* ]*"""
  602         result = []
  603         while isAtRuleIdent(src, 'import'):
  604             ctxsrc = src
  605             src = stripAtRuleIdent(src)
  606 
  607             import_, src = self._getStringOrURI(src)
  608             if import_ is None:
  609                 raise self.ParseError('Import expecting string or url', src, ctxsrc)
  610 
  611             mediums = []
  612             medium, src = self._getIdent(src.lstrip())
  613             while medium is not None:
  614                 mediums.append(medium)
  615                 if src[:1] == ',':
  616                     src = src[1:].lstrip()
  617                     medium, src = self._getIdent(src)
  618                 else:
  619                     break
  620 
  621             # XXX No medium inherits and then "all" is appropriate
  622             if not mediums:
  623                 mediums = ["all"]
  624 
  625             if src[:1] != ';':
  626                 raise self.ParseError('@import expected a terminating \';\'', src, ctxsrc)
  627             src = src[1:].lstrip()
  628 
  629             stylesheet = self.cssBuilder.atImport(import_, mediums, self)
  630             if stylesheet is not None:
  631                 result.append(stylesheet)
  632 
  633             src = self._parseSCDOCDC(src)
  634         return src, result
  635 
  636 
  637     def _parseAtNamespace(self, src):
  638         """namespace :
  639 
  640         @namespace S* [IDENT S*]? [STRING|URI] S* ';' S*
  641         """
  642 
  643         src = self._parseSCDOCDC(src)
  644         while isAtRuleIdent(src, 'namespace'):
  645             ctxsrc = src
  646             src = stripAtRuleIdent(src)
  647 
  648             namespace, src = self._getStringOrURI(src)
  649             if namespace is None:
  650                 nsPrefix, src = self._getIdent(src)
  651                 if nsPrefix is None:
  652                     raise self.ParseError('@namespace expected an identifier or a URI', src, ctxsrc)
  653                 namespace, src = self._getStringOrURI(src.lstrip())
  654                 if namespace is None:
  655                     raise self.ParseError('@namespace expected a URI', src, ctxsrc)
  656             else:
  657                 nsPrefix = None
  658 
  659             src = src.lstrip()
  660             if src[:1] != ';':
  661                 raise self.ParseError('@namespace expected a terminating \';\'', src, ctxsrc)
  662             src = src[1:].lstrip()
  663 
  664             self.cssBuilder.atNamespace(nsPrefix, namespace)
  665 
  666             src = self._parseSCDOCDC(src)
  667         return src
  668 
  669 
  670     def _parseAtKeyword(self, src):
  671         """[media | page | font_face | unknown_keyword]"""
  672         ctxsrc = src
  673         if isAtRuleIdent(src, 'media'):
  674             src, result = self._parseAtMedia(src)
  675         elif isAtRuleIdent(src, 'page'):
  676             src, result = self._parseAtPage(src)
  677         elif isAtRuleIdent(src, 'font-face'):
  678             src, result = self._parseAtFontFace(src)
  679         # XXX added @import, was missing!
  680         elif isAtRuleIdent(src, 'import'):
  681             src, result = self._parseAtImports(src)
  682         elif isAtRuleIdent(src, 'frame'):
  683             src, result = self._parseAtFrame(src)
  684         elif src.startswith('@'):
  685             src, result = self._parseAtIdent(src)
  686         else:
  687             raise self.ParseError('Unknown state in atKeyword', src, ctxsrc)
  688         return src, result
  689 
  690 
  691     def _parseAtMedia(self, src):
  692         """media
  693         : MEDIA_SYM S* medium [ ',' S* medium ]* '{' S* ruleset* '}' S*
  694         ;
  695         """
  696         ctxsrc = src
  697         src = src[len('@media '):].lstrip()
  698         mediums = []
  699         while src and src[0] != '{':
  700             medium, src = self._getIdent(src)
  701             if medium is None:
  702                 raise self.ParseError('@media rule expected media identifier', src, ctxsrc)
  703             # make "and ... {" work
  704             if medium == 'and':
  705                 # strip up to curly bracket
  706                 pattern = re.compile('.*({.*)')
  707                 match = re.match(pattern, src)
  708                 src = src[match.end()-1:]
  709                 break
  710             mediums.append(medium)
  711             if src[0] == ',':
  712                 src = src[1:].lstrip()
  713             else:
  714                 src = src.lstrip()
  715 
  716         if not src.startswith('{'):
  717             raise self.ParseError('Ruleset opening \'{\' not found', src, ctxsrc)
  718         src = src[1:].lstrip()
  719 
  720         stylesheetElements = []
  721         #while src and not src.startswith('}'):
  722         #    src, ruleset = self._parseRuleset(src)
  723         #    stylesheetElements.append(ruleset)
  724         #    src = src.lstrip()
  725 
  726         # Containing @ where not found and parsed
  727         while src and not src.startswith('}'):
  728             if src.startswith('@'):
  729                 # @media, @page, @font-face
  730                 src, atResults = self._parseAtKeyword(src)
  731                 if atResults is not None:
  732                     stylesheetElements.extend(atResults)
  733             else:
  734                 # ruleset
  735                 src, ruleset = self._parseRuleset(src)
  736                 stylesheetElements.append(ruleset)
  737             src = src.lstrip()
  738 
  739         if not src.startswith('}'):
  740             raise self.ParseError('Ruleset closing \'}\' not found', src, ctxsrc)
  741         else:
  742             src = src[1:].lstrip()
  743 
  744         result = self.cssBuilder.atMedia(mediums, stylesheetElements)
  745         return src, result
  746 
  747 
  748     def _parseAtPage(self, src):
  749         """page
  750         : PAGE_SYM S* IDENT? pseudo_page? S*
  751             '{' S* declaration [ ';' S* declaration ]* '}' S*
  752         ;
  753         """
  754 
  755         data = {}
  756         pageBorder = None
  757         isLandscape = False
  758 
  759         ctxsrc = src
  760         src = src[len('@page'):].lstrip()
  761         page, src = self._getIdent(src)
  762         if src[:1] == ':':
  763             pseudopage, src = self._getIdent(src[1:])
  764             page = page + '_' + pseudopage
  765         else:
  766             pseudopage = None
  767 
  768         #src, properties = self._parseDeclarationGroup(src.lstrip())
  769 
  770         # Containing @ where not found and parsed
  771         stylesheetElements = []
  772         src = src.lstrip()
  773         properties = []
  774 
  775         # XXX Extended for PDF use
  776         if not src.startswith('{'):
  777             raise self.ParseError('Ruleset opening \'{\' not found', src, ctxsrc)
  778         else:
  779             src = src[1:].lstrip()
  780 
  781         while src and not src.startswith('}'):
  782             if src.startswith('@'):
  783                 # @media, @page, @font-face
  784                 src, atResults = self._parseAtKeyword(src)
  785                 if atResults is not None:
  786                     stylesheetElements.extend(atResults)
  787             else:
  788                 src, nproperties = self._parseDeclarationGroup(src.lstrip(), braces=False)
  789                 properties += nproperties
  790 
  791                 # Set pagesize, orientation (landscape, portrait)
  792                 data = {}
  793                 pageBorder = None
  794 
  795                 if properties:
  796                     result = self.cssBuilder.ruleset([self.cssBuilder.selector('*')], properties)
  797                     try:
  798                         data = result[0].values()[0]
  799                     except Exception:
  800                         data = result[0].popitem()[1]
  801                     pageBorder = data.get("-pdf-frame-border", None)
  802 
  803                 if "-pdf-page-size" in data:
  804                     self.c.pageSize = xhtml2pdf.default.PML_PAGESIZES.get(
  805                         str(data["-pdf-page-size"]).lower(), self.c.pageSize)
  806 
  807                 isLandscape = False
  808                 if "size" in data:
  809                     size = data["size"]
  810                     if not isinstance(size, list):
  811                         size = [size]
  812                     sizeList = []
  813                     for value in size:
  814                         valueStr = str(value).lower()
  815                         if isinstance(value, tuple):
  816                             sizeList.append(getSize(value))
  817                         elif valueStr == "landscape":
  818                             isLandscape = True
  819                         elif valueStr == "portrait":
  820                             isLandscape = False
  821                         elif valueStr in xhtml2pdf.default.PML_PAGESIZES:
  822                             self.c.pageSize = xhtml2pdf.default.PML_PAGESIZES[valueStr]
  823                         else:
  824                             raise RuntimeError("Unknown size value for @page")
  825 
  826                     if len(sizeList) == 2:
  827                         self.c.pageSize = tuple(sizeList)
  828 
  829                     if isLandscape:
  830                         self.c.pageSize = landscape(self.c.pageSize)
  831 
  832             src = src.lstrip()
  833 
  834         result = [self.cssBuilder.atPage(page, pseudopage, data, isLandscape, pageBorder)]
  835 
  836         return src[1:].lstrip(), result
  837 
  838 
  839     def _parseAtFrame(self, src):
  840         """
  841         XXX Proprietary for PDF
  842         """
  843         src = src[len('@frame '):].lstrip()
  844         box, src = self._getIdent(src)
  845         src, properties = self._parseDeclarationGroup(src.lstrip())
  846         result = [self.cssBuilder.atFrame(box, properties)]
  847         return src.lstrip(), result
  848 
  849 
  850     def _parseAtFontFace(self, src):
  851         src = src[len('@font-face '):].lstrip()
  852         src, properties = self._parseDeclarationGroup(src)
  853         result = [self.cssBuilder.atFontFace(properties)]
  854         return src, result
  855 
  856 
  857     def _parseAtIdent(self, src):
  858         ctxsrc = src
  859         atIdent, src = self._getIdent(src[1:])
  860         if atIdent is None:
  861             raise self.ParseError('At-rule expected an identifier for the rule', src, ctxsrc)
  862 
  863         src, result = self.cssBuilder.atIdent(atIdent, self, src)
  864 
  865         if result is NotImplemented:
  866             # An at-rule consists of everything up to and including the next semicolon (;) or the next block, whichever comes first
  867 
  868             semiIdx = src.find(';')
  869             if semiIdx < 0:
  870                 semiIdx = None
  871             blockIdx = src[:semiIdx].find('{')
  872             if blockIdx < 0:
  873                 blockIdx = None
  874 
  875             if semiIdx is not None and semiIdx < blockIdx:
  876                 src = src[semiIdx + 1:].lstrip()
  877             elif blockIdx is None:
  878                 # consume the rest of the content since we didn't find a block or a semicolon
  879                 src = src[-1:-1]
  880             elif blockIdx is not None:
  881                 # expecing a block...
  882                 src = src[blockIdx:]
  883                 try:
  884                     # try to parse it as a declarations block
  885                     src, declarations = self._parseDeclarationGroup(src)
  886                 except self.ParseError:
  887                     # try to parse it as a stylesheet block
  888                     src, stylesheet = self._parseStylesheet(src)
  889             else:
  890                 raise self.ParserError('Unable to ignore @-rule block', src, ctxsrc)
  891 
  892         return src.lstrip(), result
  893 
  894 
  895     #~ ruleset - see selector and declaration groups ~~~~
  896 
  897     def _parseRuleset(self, src):
  898         """ruleset
  899         : selector [ ',' S* selector ]*
  900             '{' S* declaration [ ';' S* declaration ]* '}' S*
  901         ;
  902         """
  903         src, selectors = self._parseSelectorGroup(src)
  904         src, properties = self._parseDeclarationGroup(src.lstrip())
  905         result = self.cssBuilder.ruleset(selectors, properties)
  906         return src, result
  907 
  908 
  909     #~ selector parsing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  910 
  911     def _parseSelectorGroup(self, src):
  912         selectors = []
  913         while src[:1] not in ('{', '}', ']', '(', ')', ';', ''):
  914             src, selector = self._parseSelector(src)
  915             if selector is None:
  916                 break
  917             selectors.append(selector)
  918             if src.startswith(','):
  919                 src = src[1:].lstrip()
  920         return src, selectors
  921 
  922 
  923     def _parseSelector(self, src):
  924         """selector
  925         : simple_selector [ combinator simple_selector ]*
  926         ;
  927         """
  928         src, selector = self._parseSimpleSelector(src)
  929         srcLen = len(src) # XXX
  930         while src[:1] not in ('', ',', ';', '{', '}', '[', ']', '(', ')'):
  931             for combiner in self.SelectorCombiners:
  932                 if src.startswith(combiner):
  933                     src = src[len(combiner):].lstrip()
  934                     break
  935             else:
  936                 combiner = ' '
  937             src, selectorB = self._parseSimpleSelector(src)
  938 
  939             # XXX Fix a bug that occured here e.g. : .1 {...}
  940             if len(src) >= srcLen:
  941                 src = src[1:]
  942                 while src and (src[:1] not in ('', ',', ';', '{', '}', '[', ']', '(', ')')):
  943                     src = src[1:]
  944                 return src.lstrip(), None
  945 
  946             selector = self.cssBuilder.combineSelectors(selector, combiner, selectorB)
  947 
  948         return src.lstrip(), selector
  949 
  950 
  951     def _parseSimpleSelector(self, src):
  952         """simple_selector
  953         : [ namespace_selector ]? element_name? [ HASH | class | attrib | pseudo ]* S*
  954         ;
  955         """
  956         ctxsrc = src.lstrip()
  957         nsPrefix, src = self._getMatchResult(self.re_namespace_selector, src)
  958         name, src = self._getMatchResult(self.re_element_name, src)
  959         if name:
  960             pass # already *successfully* assigned
  961         elif src[:1] in self.SelectorQualifiers:
  962             name = '*'
  963         else:
  964             raise self.ParseError('Selector name or qualifier expected', src, ctxsrc)
  965 
  966         name = self.cssBuilder.resolveNamespacePrefix(nsPrefix, name)
  967         selector = self.cssBuilder.selector(name)
  968         while src and src[:1] in self.SelectorQualifiers:
  969             hash_, src = self._getMatchResult(self.re_hash, src)
  970             if hash_ is not None:
  971                 selector.addHashId(hash_)
  972                 continue
  973 
  974             class_, src = self._getMatchResult(self.re_class, src)
  975             if class_ is not None:
  976                 selector.addClass(class_)
  977                 continue
  978 
  979             if src.startswith('['):
  980                 src, selector = self._parseSelectorAttribute(src, selector)
  981             elif src.startswith(':'):
  982                 src, selector = self._parseSelectorPseudo(src, selector)
  983             else:
  984                 break
  985 
  986         return src.lstrip(), selector
  987 
  988 
  989     def _parseSelectorAttribute(self, src, selector):
  990         """attrib
  991         : '[' S* [ namespace_selector ]? IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
  992             [ IDENT | STRING ] S* ]? ']'
  993         ;
  994         """
  995         ctxsrc = src
  996         if not src.startswith('['):
  997             raise self.ParseError('Selector Attribute opening \'[\' not found', src, ctxsrc)
  998         src = src[1:].lstrip()
  999 
 1000         nsPrefix, src = self._getMatchResult(self.re_namespace_selector, src)
 1001         attrName, src = self._getIdent(src)
 1002 
 1003         src = src.lstrip()
 1004 
 1005         if attrName is None:
 1006             raise self.ParseError('Expected a selector attribute name', src, ctxsrc)
 1007         if nsPrefix is not None:
 1008             attrName = self.cssBuilder.resolveNamespacePrefix(nsPrefix, attrName)
 1009 
 1010         for op in self.AttributeOperators:
 1011             if src.startswith(op):
 1012                 break
 1013         else:
 1014             op = ''
 1015         src = src[len(op):].lstrip()
 1016 
 1017         if op:
 1018             attrValue, src = self._getIdent(src)
 1019             if attrValue is None:
 1020                 attrValue, src = self._getString(src)
 1021                 if attrValue is None:
 1022                     raise self.ParseError('Expected a selector attribute value', src, ctxsrc)
 1023         else:
 1024             attrValue = None
 1025 
 1026         if not src.startswith(']'):
 1027             raise self.ParseError('Selector Attribute closing \']\' not found', src, ctxsrc)
 1028         else:
 1029             src = src[1:]
 1030 
 1031         if op:
 1032             selector.addAttributeOperation(attrName, op, attrValue)
 1033         else:
 1034             selector.addAttribute(attrName)
 1035         return src, selector
 1036 
 1037 
 1038     def _parseSelectorPseudo(self, src, selector):
 1039         """pseudo
 1040         : ':' [ IDENT | function ]
 1041         ;
 1042         """
 1043         ctxsrc = src
 1044         if not src.startswith(':'):
 1045             raise self.ParseError('Selector Pseudo \':\' not found', src, ctxsrc)
 1046         src = re.search('^:{1,2}(.*)', src, re.M | re.S).group(1)
 1047 
 1048         name, src = self._getIdent(src)
 1049         if not name:
 1050             raise self.ParseError('Selector Pseudo identifier not found', src, ctxsrc)
 1051 
 1052         if src.startswith('('):
 1053             # function
 1054             src = src[1:].lstrip()
 1055             src, term = self._parseExpression(src, True)
 1056             if not src.startswith(')'):
 1057                 raise self.ParseError('Selector Pseudo Function closing \')\' not found', src, ctxsrc)
 1058             src = src[1:]
 1059             selector.addPseudoFunction(name, term)
 1060         else:
 1061             selector.addPseudo(name)
 1062 
 1063         return src, selector
 1064 
 1065 
 1066     #~ declaration and expression parsing ~~~~~~~~~~~~~~~
 1067 
 1068     def _parseDeclarationGroup(self, src, braces=True):
 1069         ctxsrc = src
 1070         if src.startswith('{'):
 1071             src, braces = src[1:], True
 1072         elif braces:
 1073             raise self.ParseError('Declaration group opening \'{\' not found', src, ctxsrc)
 1074 
 1075         properties = []
 1076         src = src.lstrip()
 1077         while src[:1] not in ('', ',', '{', '}', '[', ']', '(', ')', '@'): # XXX @?
 1078             src, property = self._parseDeclaration(src)
 1079 
 1080             # XXX Workaround for styles like "*font: smaller"
 1081             if src.startswith("*"):
 1082                 src = "-nothing-" + src[1:]
 1083                 continue
 1084 
 1085             if property is None:
 1086                 src = src[1:].lstrip()
 1087                 break
 1088             properties.append(property)
 1089             if src.startswith(';'):
 1090                 src = src[1:].lstrip()
 1091             else:
 1092                 break
 1093 
 1094         if braces:
 1095             if not src.startswith('}'):
 1096                 raise self.ParseError('Declaration group closing \'}\' not found', src, ctxsrc)
 1097             src = src[1:]
 1098 
 1099         return src.lstrip(), properties
 1100 
 1101 
 1102     def _parseDeclaration(self, src):
 1103         """declaration
 1104         : ident S* ':' S* expr prio?
 1105         | /* empty */
 1106         ;
 1107         """
 1108         # property
 1109         propertyName, src = self._getIdent(src)
 1110 
 1111         if propertyName is not None:
 1112             src = src.lstrip()
 1113             # S* : S*
 1114             if src[:1] in (':', '='):
 1115                 # Note: we are being fairly flexable here...  technically, the
 1116                 # ":" is *required*, but in the name of flexibility we
 1117                 # suppor a null transition, as well as an "=" transition
 1118                 src = src[1:].lstrip()
 1119 
 1120             src, property = self._parseDeclarationProperty(src, propertyName)
 1121         else:
 1122             property = None
 1123 
 1124         return src, property
 1125 
 1126 
 1127     def _parseDeclarationProperty(self, src, propertyName):
 1128         # expr
 1129         src, expr = self._parseExpression(src)
 1130 
 1131         # prio?
 1132         important, src = self._getMatchResult(self.re_important, src)
 1133         src = src.lstrip()
 1134 
 1135         property = self.cssBuilder.property(propertyName, expr, important)
 1136         return src, property
 1137 
 1138 
 1139     def _parseExpression(self, src, returnList=False):
 1140         """
 1141         expr
 1142         : term [ operator term ]*
 1143         ;
 1144         """
 1145         src, term = self._parseExpressionTerm(src)
 1146         operator = None
 1147         while src[:1] not in ('', ';', '{', '}', '[', ']', ')'):
 1148             for operator in self.ExpressionOperators:
 1149                 if src.startswith(operator):
 1150                     src = src[len(operator):]
 1151                     break
 1152             else:
 1153                 operator = ' '
 1154             src, term2 = self._parseExpressionTerm(src.lstrip())
 1155             if term2 is NotImplemented:
 1156                 break
 1157             else:
 1158                 term = self.cssBuilder.combineTerms(term, operator, term2)
 1159 
 1160         if operator is None and returnList:
 1161             term = self.cssBuilder.combineTerms(term, None, None)
 1162             return src, term
 1163         else:
 1164             return src, term
 1165 
 1166 
 1167     def _parseExpressionTerm(self, src):
 1168         """term
 1169         : unary_operator?
 1170             [ NUMBER S* | PERCENTAGE S* | LENGTH S* | EMS S* | EXS S* | ANGLE S* |
 1171             TIME S* | FREQ S* | function ]
 1172         | STRING S* | IDENT S* | URI S* | RGB S* | UNICODERANGE S* | hexcolor
 1173         ;
 1174         """
 1175         ctxsrc = src
 1176 
 1177         result, src = self._getMatchResult(self.re_num, src)
 1178         if result is not None:
 1179             units, src = self._getMatchResult(self.re_unit, src)
 1180             term = self.cssBuilder.termNumber(result, units)
 1181             return src.lstrip(), term
 1182 
 1183         result, src = self._getString(src, self.re_uri)
 1184         if result is not None:
 1185             # XXX URL!!!!
 1186             term = self.cssBuilder.termURI(result)
 1187             return src.lstrip(), term
 1188 
 1189         result, src = self._getString(src)
 1190         if result is not None:
 1191             term = self.cssBuilder.termString(result)
 1192             return src.lstrip(), term
 1193 
 1194         result, src = self._getMatchResult(self.re_functionterm, src)
 1195         if result is not None:
 1196             src, params = self._parseExpression(src, True)
 1197             if src[0] != ')':
 1198                 raise self.ParseError('Terminal function expression expected closing \')\'', src, ctxsrc)
 1199             src = src[1:].lstrip()
 1200             term = self.cssBuilder.termFunction(result, params)
 1201             return src, term
 1202 
 1203         result, src = self._getMatchResult(self.re_rgbcolor, src)
 1204         if result is not None:
 1205             term = self.cssBuilder.termRGB(result)
 1206             return src.lstrip(), term
 1207 
 1208         result, src = self._getMatchResult(self.re_unicoderange, src)
 1209         if result is not None:
 1210             term = self.cssBuilder.termUnicodeRange(result)
 1211             return src.lstrip(), term
 1212 
 1213         nsPrefix, src = self._getMatchResult(self.re_namespace_selector, src)
 1214         result, src = self._getIdent(src)
 1215         if result is not None:
 1216             if nsPrefix is not None:
 1217                 result = self.cssBuilder.resolveNamespacePrefix(nsPrefix, result)
 1218             term = self.cssBuilder.termIdent(result)
 1219             return src.lstrip(), term
 1220 
 1221         result, src = self._getMatchResult(self.re_unicodeid, src)
 1222         if result is not None:
 1223             term = self.cssBuilder.termIdent(result)
 1224             return src.lstrip(), term
 1225 
 1226         result, src = self._getMatchResult(self.re_unicodestr, src)
 1227         if result is not None:
 1228             term = self.cssBuilder.termString(result)
 1229             return src.lstrip(), term
 1230 
 1231         return self.cssBuilder.termUnknown(src)
 1232 
 1233 
 1234     #~ utility methods ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 1235 
 1236     def _getIdent(self, src, default=None):
 1237         return self._getMatchResult(self.re_ident, src, default)
 1238 
 1239 
 1240     def _getString(self, src, rexpression=None, default=None):
 1241         if rexpression is None:
 1242             rexpression = self.re_string
 1243         result = rexpression.match(src)
 1244         if result:
 1245             strres = tuple(filter(None, result.groups()))
 1246             if strres:
 1247                 try:
 1248                     strres = strres[0]
 1249                 except Exception:
 1250                     strres = result.groups()[0]
 1251             else:
 1252                 strres = ''
 1253             return strres, src[result.end():]
 1254         else:
 1255             return default, src
 1256 
 1257 
 1258     def _getStringOrURI(self, src):
 1259         result, src = self._getString(src, self.re_uri)
 1260         if result is None:
 1261             result, src = self._getString(src)
 1262         return result, src
 1263 
 1264 
 1265     def _getMatchResult(self, rexpression, src, default=None, group=1):
 1266         result = rexpression.match(src)
 1267         if result:
 1268             return result.group(group), src[result.end():]
 1269         else:
 1270             return default, src
 1271