"Fossies" - the Fresh Open Source Software Archive

Member "Tahchee-1.0.0/Sources/tahchee/plugins/_kiwi/blocks.py" (22 Oct 2009, 49635 Bytes) of package /linux/privat/old/tahchee-1.0.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "blocks.py" see the Fossies "Dox" file reference documentation.

    1 #!/usr/bin/env python
    2 # Encoding: iso-8859-1
    3 # vim: tw=80 ts=4 sw=4 noet
    4 # -----------------------------------------------------------------------------
    5 # Project           :   Kiwi
    6 # Module            :   Block parsers
    7 # -----------------------------------------------------------------------------
    8 # Author            :   Sebastien Pierre                 <sebastien@type-z.org>
    9 # License           :   Revised BSD License
   10 # -----------------------------------------------------------------------------
   11 # Creation date     :   19-Nov-2003
   12 # Last mod.         :   07-Oct-2009
   13 # -----------------------------------------------------------------------------
   14 
   15 import re, string
   16 from formatting import *
   17 
   18 __doc__       = """Write module doc here"""
   19 __pychecker__ = "unusednames=recogniseInfo,content"
   20 
   21 EMPTY_LIST_ITEM = "Empty list item."
   22 
   23 BLOCK_ELEMENTS = ("Block", "ListItem", "Definition", "Content", "Chapter", "Section", "Appendix")
   24 
   25 STANDARD_LIST    = 1
   26 DEFINITION_LIST  = 2
   27 TODO_LIST        = 3
   28 ORDERED_LIST     = 4
   29 
   30 STANDARD_ITEM    = 100
   31 TODO_ITEM        = 101
   32 TODO_DONE_ITEM   = 102
   33 
   34 #------------------------------------------------------------------------------
   35 #
   36 #  Regular expressions
   37 #
   38 #------------------------------------------------------------------------------
   39 
   40 RE_BLANK          = re.compile(u"\s*", re.LOCALE|re.MULTILINE)
   41 
   42 TITLE             = u"^\s*(==)([^=].+)$"
   43 RE_TITLE          = re.compile(TITLE, re.LOCALE|re.MULTILINE)
   44 TITLE_HEADER      = u"^\s*(--)([^\:]+):(.+)?$"
   45 RE_TITLES         = re.compile(u"%s|%s" % (TITLE, TITLE_HEADER), re.LOCALE|re.MULTILINE)
   46 
   47 SECTION_HEADING   = u"^\s*((([0-9]+|[A-z])\.)+([0-9]+|[A-z])?\.?)"
   48 RE_SECTION_HEADING= re.compile(SECTION_HEADING, re.LOCALE)
   49 SECTION_HEADING_ALT = u"^(\=+\s*).+$"
   50 RE_SECTION_HEADING_ALT= re.compile(SECTION_HEADING_ALT, re.LOCALE)
   51 SECTION_UNDERLINE = u"^\s*[\*\-\=#][\*\-\=#][\*\-\=#]+\s*$"
   52 RE_SECTION_UNDERLINE = re.compile(SECTION_UNDERLINE, re.LOCALE|re.MULTILINE)
   53 
   54 DEFINITION_ITEM   = u"^(\s*(\:[^\:]|[^\:])+)\:\:+\s*(\n+\s*|\s*\|\s*\n)*"
   55 RE_DEFINITION_ITEM = re.compile(DEFINITION_ITEM, re.LOCALE|re.MULTILINE)
   56 
   57 TAGGED_BLOCK      = u"^\s*(([^_]+\s*)(\:[^_]+)?)?(____+)\s*$"
   58 RE_TAGGED_BLOCK   = re.compile(TAGGED_BLOCK, re.MULTILINE | re.LOCALE)
   59 LIST_ITEM         = u"^(\s*)(-|\*\)|[0-9A-z][\)/]|\[[ \-\~xX]\])\s*"
   60 RE_LIST_ITEM      = re.compile(LIST_ITEM, re.MULTILINE | re.LOCALE)
   61 LIST_HEADING      = u"(^\s*[^:{().<]*:)"
   62 RE_LIST_HEADING   = re.compile(LIST_HEADING, re.MULTILINE | re.LOCALE)
   63 LIST_ITEM_HEADING = u"^([^:]+(:\s*\n\s*|::\s*))|([^/\\\]+[/\\\]\s*\n\s*)"
   64 RE_LIST_ITEM_HEADING =  re.compile(LIST_ITEM_HEADING, re.MULTILINE|re.LOCALE)
   65 RE_NUMBER          = re.compile("\d+[\)\.]")
   66 
   67 PREFORMATTED      = u"^(\s*\>(\t|   ))(.*)$"
   68 RE_PREFORMATTED   = re.compile(PREFORMATTED, re.LOCALE)
   69 
   70 CUSTOM_MARKUP = u"\s*-\s*\"([^\"]+)\"\s*[=:]\s*([\w\-_]+)(\s*\(\s*(\w+)\s*\))?"
   71 RE_CUSTOM_MARKUP = re.compile(CUSTOM_MARKUP, re.LOCALE|re.MULTILINE)
   72 
   73 META_TYPE        = u"\s*(\w+)\s*(\((\w+)\))?"
   74 RE_META_TYPE     = re.compile(META_TYPE, re.LOCALE|re.MULTILINE)
   75 
   76 META_FIELD = u'(^|\n)\s*([\w\-]+)\s*:\s*'
   77 RE_META_FIELD= re.compile(META_FIELD, re.LOCALE)
   78 RE_META_AUTHOR_EMAIL = re.compile("\<([^>]+)\>", re.LOCALE)
   79 
   80 REFERENCE_ENTRY    = u"\s+\[([^\]]+)]:"
   81 RE_REFERENCE_ENTRY = re.compile(REFERENCE_ENTRY, re.LOCALE|re.MULTILINE)
   82 
   83 TABLE_ROW_SEPARATOR    = "^\s*([\-\+]+|[\=\+]+)\s*$"
   84 RE_TABLE_ROW_SEPARATOR = re.compile(TABLE_ROW_SEPARATOR)
   85 
   86 LANGUAGE_CODES = ("EN", "FR", "DE", "UK" )
   87 
   88 #------------------------------------------------------------------------------
   89 #
   90 #  Error messages
   91 #
   92 #------------------------------------------------------------------------------
   93 
   94 ERROR_TITLE_TOO_DEEPLY_NESTED = "Title too deeply nested"
   95 
   96 #------------------------------------------------------------------------------
   97 #
   98 #  BlockParser
   99 #
  100 #------------------------------------------------------------------------------
  101 
  102 class BlockParser:
  103 
  104     def __init__( self, name ):
  105         self.name = name
  106 
  107     def recognises( self, context ):
  108         """Tells wether the given block is recognised or not. This returns
  109         this block recognition information, or False (or None) if the block was
  110         not recongised."""
  111         return False
  112 
  113     def process( self, context, recogniseInfo ):
  114         return None
  115 
  116     def processText( self, context, text ):
  117         assert context, text
  118         return text
  119 
  120 #------------------------------------------------------------------------------
  121 #
  122 #  ParagraphBlockParser
  123 #
  124 #------------------------------------------------------------------------------
  125 
  126 class ParagraphBlockParser(BlockParser):
  127     """Parses a paragraph block. This parser always recognised the given block,
  128     so it should not appear in the block parsers."""
  129 
  130     def __init__( self ):
  131         BlockParser.__init__(self, "Paragraph")
  132 
  133     def recognises( self, context ):
  134         return True
  135 
  136     def process( self, context, recogniseInfo ):
  137         # We make sure that the current node is a block element
  138         paragraph_depth = context.getBlockIndentation()
  139         # Here we move to the first block element that has an indentation that
  140         # is lower or equal to this paragraph
  141         while context.currentNode.nodeName not in BLOCK_ELEMENTS \
  142         or context.currentNode.getAttribute("_indent") \
  143         and int(context.currentNode.getAttribute("_indent"))>paragraph_depth:
  144             context.currentNode = context.currentNode.parentNode
  145         # If the currentNode last element is a paragraph with a higher
  146         # indentation than the current one, then we create a block, and set it
  147         # as current node (this allows to create "indented paragraphs" - the
  148         # equivalent of blockquotes).
  149         if context.currentNode.childNodes \
  150         and context.currentNode.childNodes[-1].nodeName == "Paragraph" \
  151         and context.currentNode.childNodes[-1].getAttribute("_indent") \
  152         and int(context.currentNode.childNodes[-1].getAttribute("_indent"))<paragraph_depth:
  153             block_node = context.document.createElementNS(None, "Block")
  154             block_node.setAttributeNS(None, "_indent", str(paragraph_depth))
  155             context.currentNode.appendChild(block_node)
  156             context.currentNode = block_node
  157         # Now we can process the document
  158         para_node = context.document.createElementNS(None, self.name)
  159         para_node.setAttributeNS(None, "_indent", str(paragraph_depth))
  160         para_node.setAttributeNS(None, "_start", str(context.blockStartOffset))
  161         para_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
  162         context.parser.parseBlock(context, para_node, self.processText)
  163         # Now we suppress leading and trailing whitespaces
  164         first_text_node = para_node.childNodes[0]
  165         last_text_node  = para_node.childNodes[-1]
  166         if first_text_node.nodeType != para_node.TEXT_NODE: first_text_node = None
  167         if last_text_node.nodeType  != para_node.TEXT_NODE: last_text_node  = None
  168         # Removed first and last text nodes if empty
  169         if first_text_node!=None and first_text_node.data.strip()=="":
  170             para_node.removeChild(first_text_node)
  171             first_text_node = None
  172         if last_text_node!=None and last_text_node.data.strip()=="":
  173             para_node.removeChild(last_text_node)
  174             last_text_node = None
  175         # We strip the leading whitespace
  176         if first_text_node!=None and len(first_text_node.data)>0 and \
  177             first_text_node.data[0] == " ":
  178             first_text_node.data = first_text_node.data[1:]
  179         if last_text_node!=None and len(last_text_node.data)>0 and \
  180             last_text_node.data[-1] == " ":
  181             last_text_node.data = last_text_node.data[:-1]
  182         # FIXME: Maybe the paragraph contains text nodes with only spaces ?
  183         if len(para_node.childNodes)>0:
  184             context.currentNode.appendChild(para_node)
  185         else:
  186             context.parser.warning("Empty paragraph removed", context)
  187 
  188     def processText( self, context, text ):
  189         assert text
  190         text = context.parser.expandTabs(text)
  191         text =  context.parser.normaliseText(text)
  192         return text
  193 
  194 #------------------------------------------------------------------------------
  195 #
  196 #  TaggedBlockParser
  197 #
  198 #------------------------------------------------------------------------------
  199 
  200 class TaggedBlockParser(BlockParser):
  201     """Parses a tagged block. Notes are the common example of tagged
  202     block."""
  203 
  204     def __init__( self ):
  205         BlockParser.__init__(self, "TaggedBlock")
  206 
  207     def recognises( self, context ):
  208         lines = filter(lambda l:l.strip(), context.currentFragment().split("\n"))
  209         if not lines: return
  210         return RE_TAGGED_BLOCK.match(lines[0])
  211 
  212     def _goToParent( self, thisblock, parent ):
  213         if not parent: return parent
  214         if parent.nodeName == "Block":
  215             return parent.parentNode
  216         else:
  217             return parent
  218 
  219     def process( self, context, recogniseInfo ):
  220         tagname  = recogniseInfo.group(2)
  221         tagtitle = recogniseInfo.group(3)
  222         # This is an opening tag
  223         if tagname and tagname[0] != "_":
  224             # TODO: Asserts we are not already in a sepcific block
  225             block_depth = context.getBlockIndentation()
  226             block_node = context.document.createElementNS(None, "Block")
  227             block_node.setAttributeNS(None, "type", tagname.strip().lower())
  228             block_node.setAttributeNS(None, "_indent",str(block_depth))
  229             if tagtitle:
  230                 block_node.setAttributeNS(None, "title", tagtitle[1:].strip())
  231             # We get to a content node
  232             # Now we can process the document
  233             context.increaseOffset(len(recogniseInfo.group()))
  234             context.parser.parseBlock(context, block_node, self.processText)
  235             context.currentNode = self._goToParent( block_node, context.currentNode)
  236             context.currentNode.appendChild(block_node)
  237             context.currentNode = block_node
  238             assert context.currentNode
  239         # This is a closing tag
  240         elif tagname and tagname[0] == "_":
  241             while context.currentNode.nodeName != "Block":
  242                 context.currentNode = context.currentNode.parentNode
  243             context.currentNode = context.currentNode.parentNode
  244 
  245 #------------------------------------------------------------------------------
  246 #
  247 #  CommentBlockParser
  248 #
  249 #------------------------------------------------------------------------------
  250 
  251 class CommentBlockParser(BlockParser):
  252     """Parses a comment markup block."""
  253 
  254     def __init__( self ):
  255         BlockParser.__init__(self, "CommentBlock")
  256 
  257     def recognises( self, context ):
  258         assert context and context.parser.commentParser
  259         lines = context.currentFragment().split("\n")
  260         for line in lines:
  261             line = line.strip()
  262             if line and line.strip()[0]!= "#": return False
  263         return True
  264 
  265     def process( self, context, recogniseInfo ):
  266         context.currentNode.appendChild( context.document.createComment(
  267         self.processText(context, context.currentFragment())))
  268         context.setOffset(context.blockEndOffset)
  269 
  270 
  271 #------------------------------------------------------------------------------
  272 #
  273 #  MarkupBlockParser
  274 #
  275 #------------------------------------------------------------------------------
  276 
  277 class MarkupBlockParser(BlockParser):
  278     """Parses a custom markup block."""
  279 
  280     def __init__( self ):
  281         BlockParser.__init__(self, "MarkupBlock")
  282 
  283     def recognises( self, context ):
  284         assert context and context.parser.markupParser
  285         offset, match = context.parser.markupParser.recognises(context)
  286         # We make sure that the recognised markup is a block markup which has
  287         # only whitespaces at the beginning
  288         if match and context.parser.markupParser.isStartTag(match) \
  289         and len(context.currentFragment()[:match.start()].strip())==0:
  290             # We parse the tag to see if it is a block tag and that it spans
  291             # the whole context current fragment.
  292             dummy_node = context.document.createElementNS(None, "Dummy")
  293             match_end = context.parser.markupParser.parse(context, dummy_node, match)
  294             # The returned matched end MUST BE GREATER than the start tag match
  295             # end, and there MUST BE ONLY SPACES after the match end for this
  296             # tag to represent a standalone block, and not a block inlined into
  297             # a paragraph.
  298             if match_end > match.end() and \
  299             len(context.currentFragment()[match_end:].strip())==0:
  300                 # If there is a child node, we return it
  301                 if len(dummy_node.childNodes)>=1:
  302                     result_node = dummy_node.childNodes[0]
  303                     # We take care of the attributes
  304                     for key, value \
  305                     in context.parseAttributes(match.group(2)).items():
  306                         result_node.setAttributeNS(None, key, value)
  307                     return result_node
  308                 # Otherwise this means that the block is empty
  309                 else: return True
  310             else:
  311                 return False
  312         else:
  313             return False
  314 
  315     def process( self, context, recogniseInfo ):
  316         if recogniseInfo!=True:
  317             context.currentNode.appendChild(recogniseInfo)
  318         context.setOffset(context.blockEndOffset)
  319 
  320 
  321 #------------------------------------------------------------------------------
  322 #
  323 #  TitleBlockParser
  324 #
  325 #------------------------------------------------------------------------------
  326 
  327 class TitleBlockParser(BlockParser):
  328     """Parses a title object"""
  329 
  330     def __init__( self ):
  331         BlockParser.__init__(self, "title")
  332 
  333     def recognises( self, context ):
  334         matches = []
  335         if context.content.childNodes: return None
  336         while not context.blockEndReached():
  337             match = RE_TITLES.match(context.currentFragment())
  338             if match!=None:
  339                 context.increaseOffset(match.end())
  340                 matches.append(match)
  341             else:
  342                 return matches or False
  343         return matches
  344 
  345     def _processLine( self, line ):
  346         pass
  347 
  348     def process( self, context, recogniseInfo ):
  349         assert recogniseInfo
  350         for match in recogniseInfo:
  351             if match.group(1):
  352                 titleNode = context.ensureElement( context.header, "Title" )
  353                 # We get the content of the title
  354                 titleText = Upper(match.group(2) or match.group(4))
  355                 # We prefix with 'sub' or 'subsub' depending on the number of
  356                 # preceding titles
  357                 titleType  = u"sub" * len(filter(lambda n:n.nodeName.endswith("title"), titleNode.childNodes))
  358                 titleType += u"title"
  359                 #We add the node to the document tree
  360                 resultNode = context.ensureElement(titleNode, titleType)
  361                 titleNode.appendChild(resultNode)
  362                 resultNode.appendChild(context.document.createTextNode(self.processText(context, titleText)))
  363             elif match.group(3):
  364                 metaNode  = context.ensureElement( context.header, "Meta" )
  365                 # We get the header name
  366                 header_name = match.group(4).strip()
  367                 header_text = match.group(5).strip()
  368                 # We prepare the header node
  369                 node = context.document.createElementNS(None, "meta")
  370                 node.setAttributeNS(None, "name", header_name)
  371                 node.appendChild(context.document.createTextNode(self.processText(context,
  372                 header_text)))
  373                 # And we add it to the document header
  374                 metaNode.appendChild(node)
  375             else:
  376                 raise Exception("We should not be here ! " + match.group())
  377         context.setOffset(context.blockEndOffset)
  378 
  379     def processText( self, context, text ):
  380         return context.parser.normaliseText(text.strip())
  381 
  382 #------------------------------------------------------------------------------
  383 #
  384 #  SectionBlockParser
  385 #
  386 #------------------------------------------------------------------------------
  387 
  388 class SectionBlockParser(BlockParser):
  389     """Parses a section markup element."""
  390 
  391     def __init__( self ):
  392         BlockParser.__init__(self, "Section")
  393 
  394     def recognises( self, context ):
  395         # We look for the number prefix
  396         match     = RE_SECTION_HEADING.match(context.currentFragment())
  397         # We return directly if there are at least two section numbers (2.3)
  398         if match:
  399             match_underline = RE_SECTION_UNDERLINE.search(context.currentFragment())
  400             if match_underline: return (RE_SECTION_UNDERLINE, match_underline)
  401             else: return (RE_SECTION_HEADING, match) 
  402         # We return directly for a section prefixed by '=='
  403         match_alt = RE_SECTION_HEADING_ALT.match(context.currentFragment())
  404         if match_alt:
  405             return (RE_SECTION_HEADING_ALT, match_alt)
  406         # Or a separator followed by blank space
  407         match = RE_SECTION_UNDERLINE.search(context.currentFragment())
  408         if  match:
  409             # If we reached the end of the block, and that there is something
  410             # before, this OK
  411             if match.end() == context.blockEndOffset and \
  412             context.currentFragment()[:match.start()].strip():
  413                 return (RE_SECTION_UNDERLINE, match)
  414             # Otherwise the rest must be blank
  415             else:
  416                 blank_match = RE_BLANK.match(context.currentFragment()[match.end():])
  417                 # The rest is blank, it's OK
  418                 if blank_match.end()+match.end()+context.getOffset()\
  419                     ==context.blockEndOffset:
  420                     return (RE_SECTION_UNDERLINE, match)
  421                 # Otherwise there is a trailing text
  422                 else:
  423                     return None
  424         # Nothing matched
  425         else:
  426             return None
  427 
  428     def process( self, context, recogniseInfo ):
  429         context.ensureParent( ("Content", "Appendix", "Chapter", "Section") )
  430         matched_type, match = recogniseInfo
  431         section_indent = context.getBlockIndentation()
  432         trail = match.group().strip()
  433         # RULE:
  434         # A section underlined with '==' weights more than a section
  435         # underlined with '--', which weights more than a section 
  436         # underline with nothing. This means that if you have
  437         #
  438         #  1. One
  439         #  ======
  440         #
  441         #  2. Two
  442         #  ------
  443         #
  444         #  3. Three
  445         #
  446         # These sections will all be children of the previous section
  447         section_weight = trail.endswith("==") and 2 or trail.endswith("--") and 1 or 0
  448         #
  449         # FIRST STEP - We detect section text bounds
  450         #
  451         block_start  = context.blockStartOffset
  452         block_end    = context.blockEndOffset
  453         section_type = "Section"
  454         # We have an underlined section
  455         if matched_type == RE_SECTION_UNDERLINE:
  456             block_end = context.getOffset() + match.start()
  457         if matched_type == RE_SECTION_HEADING_ALT:
  458             block_start = context.getOffset() + match.start() + len(match.group(1))
  459             block_end   = context.getOffset() + match.end()
  460         
  461         # We look for a number prefix
  462         heading_text = context.fragment(block_start, block_end)
  463         prefix_match = RE_SECTION_HEADING.match(heading_text)
  464         dots_count   = 0
  465         if prefix_match:
  466             res         = prefix_match.group()
  467             dots_count  = len( filter(lambda x:x, res.split(".")) )
  468             block_start = context.getOffset() + prefix_match.end()
  469         if matched_type == RE_SECTION_HEADING_ALT:
  470             dots_count += len(match.group(1))
  471         # We make sure that we end the section before the block delimiter
  472         delim_match = RE_SECTION_UNDERLINE.search(context.currentFragment())
  473         if delim_match:
  474             block_end = context.getOffset() + delim_match.start()
  475         context.currentNode = context.getParentSection(dots_count-section_weight, section_indent)
  476         section_depth       = context.getDepthInSection(context.currentNode) + 1
  477         #
  478         # SECOND STEP - We create the section
  479         #
  480         section_node = context.document.createElementNS(None, section_type)
  481         section_node.setAttributeNS(None, "_indent", str(section_indent ))
  482         section_node.setAttributeNS(None, "_depth", str(section_depth))
  483         section_node.setAttributeNS(None, "_start", str(block_start))
  484         section_node.setAttributeNS(None, "_sstart", str(block_start))
  485         heading_node = context.document.createElementNS(None, "Heading")
  486         section_node.appendChild(heading_node)
  487         offsets = context.saveOffsets()
  488         context.blockEndOffset = block_end
  489         context.setOffset(block_start)
  490         context.parser.parseBlock(context, heading_node, self.processText)
  491         context.restoreOffsets(offsets)
  492         # Now we create a Content node
  493         content_node = context.document.createElementNS(None, "Content")
  494         content_node.setAttributeNS(None, "_indent", str(section_indent ))
  495         section_node.appendChild(content_node)
  496         # We append the section node and assign it as current node
  497         context.currentNode.appendChild(section_node)
  498         context.currentNode = content_node
  499         context.declareSection(section_node, content_node, dots_count-section_weight)
  500 
  501     def processText( self, context, text ):
  502         return context.parser.normaliseText(text.strip())
  503 
  504 #------------------------------------------------------------------------------
  505 #
  506 #  DefinitionBlockParser
  507 #
  508 #------------------------------------------------------------------------------
  509 
  510 class DefinitionBlockParser(BlockParser):
  511     """Parses a definition markup element."""
  512 
  513     def __init__( self ):
  514         BlockParser.__init__(self, "Definition")
  515 
  516     def recognises( self, context ):
  517         return RE_DEFINITION_ITEM.match(context.currentFragment())
  518 
  519     def _getParentDefinition( self, node ):
  520         while node and node.nodeName != "Definition":
  521             node = node.parentNode
  522         return node
  523 
  524     def process( self, context, match ):
  525         parent_node = self._getParentDefinition(context.currentNode)
  526         _indent = context.getBlockIndentation()
  527         # Ensures that the parent Definition node exists
  528         if not parent_node:
  529             parent_node = context.currentNode
  530             while True:
  531                 if parent_node.parentNode == None: break
  532                 if parent_node.parentNode.nodeType == parent_node.DOCUMENT_NODE: break
  533                 if not parent_node.getAttributeNS(None, "_indent"): break
  534                 if int(parent_node.getAttributeNS(None, "_indent")) <= _indent: break
  535                 parent_node = parent_node.parentNode
  536                 if parent_node.nodeName not in BLOCK_ELEMENTS: continue
  537             context.currentNode = parent_node
  538             definition_node = context.document.createElementNS(None, "Definition")
  539             definition_node.setAttributeNS(None, "_indent", str(_indent))
  540             context.currentNode.appendChild(definition_node)
  541             parent_node = definition_node
  542         # Creates the defintion item
  543         definition_item = context.document.createElementNS(None, "DefinitionItem")
  544         definition_item.setAttributeNS(None, "_indent", str(_indent + 1))
  545         definition_title = context.document.createElementNS(None, "Title")
  546         definition_title.setAttributeNS(None, "_start", str(context.blockStartOffset))
  547         definition_title.setAttributeNS(None, "_end", str(context.blockStartOffset + len(match.group())))
  548         # Parse the content of the definition title
  549         offsets = context.saveOffsets()
  550         context.setCurrentBlock(context.blockStartOffset, context.blockStartOffset + len(match.group(1)))
  551         context.parser.parseBlock(context, definition_title, self.processText)
  552         context.restoreOffsets(offsets)
  553         # And continue the processing
  554         definition_content = context.document.createElementNS(None, "Content")
  555         definition_content.setAttributeNS(None, "_indent", str(_indent + 1))
  556         definition_content.setAttributeNS(None, "_start", str(context.blockStartOffset + match.end()))
  557         definition_content.setAttributeNS(None, "_end", str(context.blockEndOffset))
  558         definition_item.appendChild(definition_title)
  559         definition_item.appendChild(definition_content)
  560         parent_node.appendChild(definition_item)
  561         context.currentNode = definition_content
  562         # We check if there is a rest after the definition name
  563         rest = context.documentText[context.blockStartOffset + match.end():context.blockEndOffset]
  564         if not context.parser.normaliseText(rest).strip(): rest = ""
  565         if rest:
  566             offsets = context.saveOffsets()
  567             context.setCurrentBlock(context.blockStartOffset + match.end(), context.blockEndOffset)
  568             context.parser.parseBlock(context, definition_content, self.processText)
  569             context.restoreOffsets(offsets)
  570 
  571     def processText( self, context, text ):
  572         return context.parser.normaliseText(text)
  573 
  574 #------------------------------------------------------------------------------
  575 #
  576 #  ListItemBlockParser
  577 #
  578 #------------------------------------------------------------------------------
  579 
  580 class ListItemBlockParser(BlockParser):
  581     """Parses a list item. A list item is an element within a list."""
  582 
  583     def __init__( self ):
  584         BlockParser.__init__(self, "ListItem")
  585 
  586     def recognises( self, context ):
  587         return RE_LIST_ITEM.match(context.currentFragment())
  588 
  589     def process( self, context, itemMatch ):
  590 
  591         context.ensureParent( ("Content", "Appendix", "Chapter", "Section", "List") )
  592         start_offset = context.getOffset()
  593 
  594         # Step 1: Determine the range of the current line item in the current
  595         # block. There may be more than one line item as in the following:
  596         # "- blah blah\n - blah blah"
  597         # So we have to look for another line item in the current block
  598 
  599         # To do so, we move the offset after the recognised list item, ie.
  600         # after the leading "1)", "*)", etc
  601         context.increaseOffset(itemMatch.end())
  602 
  603         # Next item match will indicate where in the current fragment the next
  604         # item starts.
  605         next_item_match = None
  606         if context.blockEndReached():
  607             context.parser.warning(EMPTY_LIST_ITEM, context)
  608             return
  609 
  610         # We search a possible next list item after the first eol
  611         next_eol = context.currentFragment().find("\n")
  612         if next_eol!=-1:
  613             next_item_match = RE_LIST_ITEM.search(
  614                 context.currentFragment(), next_eol)
  615         else:
  616             next_item_match = None
  617 
  618         # We assign to current_item_text the text of the current item
  619         current_item_text = context.currentFragment()
  620         if next_item_match:
  621             current_item_text = current_item_text[:next_item_match.start()]
  622 
  623         # We get the list item identation
  624         indent = context.parser.getIndentation(
  625             context.parser.charactersToSpaces(itemMatch.group()))
  626         
  627         # We look for the optional list heading
  628         heading = RE_LIST_ITEM_HEADING.match(current_item_text)
  629         heading_offset = 0
  630         list_type   = STANDARD_LIST
  631         item_type   = STANDARD_ITEM
  632         if heading:
  633             # We remove the heading from the item text
  634             heading_offset = heading.end()
  635             # And update the heading variable with the heading text
  636             if heading.group(1):
  637                 list_type = STANDARD_LIST
  638                 heading_end = heading.group().rfind(":")
  639             else:
  640                 list_type = DEFINITION_LIST
  641                 heading_end = heading.group().rfind("/")
  642 
  643         head = itemMatch.group(2)
  644         if head:
  645             head = head.upper()
  646             if  head == "[ ]":
  647                 item_type = TODO_ITEM
  648                 list_type = TODO_LIST
  649             elif head == "[X]":
  650                 item_type = TODO_DONE_ITEM
  651                 list_type = TODO_LIST
  652             elif RE_NUMBER.match(head):
  653                 list_type = ORDERED_LIST
  654 
  655         # The current_item_text is no longer used in the following code
  656 
  657         # Step 2: Now that we have the item body, and that we know if there is
  658         # a next item (next_item_match), we can create the list item node. To
  659         # do so, we first have to look for a parent "List" node in which the
  660         # "ListItem" node we wish to create will be inserted.
  661 
  662         # We want either a "List" with a LOWER OR EQUAL indent, or a "ListItem"
  663         # with a STRICLY LOWER indentation, or a node which is neither a List
  664         # or a ListItem.
  665         while context.currentNode.nodeName == "List" and \
  666         int(context.currentNode.getAttributeNS(None, "_indent"))>indent or \
  667         context.currentNode.nodeName == "ListItem" and \
  668         int(context.currentNode.getAttributeNS(None, "_indent"))>=indent:
  669             context.currentNode = context.currentNode.parentNode
  670 
  671         # If the current node is a list, then we have to create a nested list.
  672         # A List ALWAYS have at least one child ListItem. If the last ListItem
  673         # has the same indentation as our current list item, then it is a
  674         # sibling, otherwise it is a parent.
  675         if context.currentNode.nodeName == "List":
  676             # A List should always have a least one ListItem
  677             items = context._getElementsByTagName( context.currentNode, "ListItem")
  678             assert len(items)>0
  679             if int(items[-1].getAttributeNS(None, "_indent")) < indent:
  680                 context.currentNode = items[-1]
  681 
  682         # We may need to create a new "List" node to hold our list items
  683         list_node = context.currentNode
  684         # If the current node is not a list, then we must create a new list
  685         if context.currentNode.nodeName != "List":
  686             list_node = context.document.createElementNS(None, "List")
  687             list_node.setAttributeNS(None, "_indent", str(indent))
  688             context.currentNode.appendChild(list_node)
  689             context.currentNode = list_node
  690         # We create the list item
  691         list_item_node = context.document.createElementNS(None, "ListItem")
  692         list_item_node.setAttributeNS(None, "_indent", str(indent))
  693         if item_type == TODO_ITEM:
  694             list_item_node.setAttributeNS(None, "todo", "true")
  695         elif item_type == TODO_DONE_ITEM:
  696             list_item_node.setAttributeNS(None, "todo", "done")
  697         #list_item_node.setAttributeNS(None, "_start", str(start_offset))
  698         if next_item_match:
  699             list_item_node.setAttributeNS(None, "_end", str(context.getOffset() + next_item_match.start() -1))
  700         else:
  701             list_item_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
  702         # and the optional heading
  703         if heading:
  704             offsets = context.saveOffsets()
  705             heading_node = context.document.createElementNS(None, "heading")
  706             context.setCurrentBlock(context.getOffset(), context.getOffset()+heading_end)
  707             context.parser.parseBlock(context, heading_node, self.processText)
  708             # heading_text = context.document.createTextNode(heading)
  709             # heading_node.appendChild(heading_text)
  710             list_item_node.appendChild(heading_node)
  711             context.restoreOffsets(offsets)
  712         # and the content
  713         offsets = context.saveOffsets()
  714         if next_item_match:
  715             context.setCurrentBlock(heading_offset+context.getOffset() ,
  716                 context.getOffset()+next_item_match.start())
  717         else:
  718             context.increaseOffset(heading_offset)
  719         # We parse the content of the list item
  720         old_node = context.currentNode
  721         # FIXME: This is necessary to assign the current node, but I do not
  722         # quite understand why... this needs some code review.
  723         context.currentNode = list_item_node
  724         context.parser.parseBlock(context, list_item_node, self.processText)
  725         context.currentNode = old_node
  726         context.restoreOffsets(offsets)
  727         # We eventually append the created list item node to the parent list
  728         # node
  729         list_node.appendChild(list_item_node)
  730         # We set the type attribute of the list if necesseary
  731         if list_type == DEFINITION_LIST:
  732             list_node.setAttributeNS(None, "type", "definition")
  733         elif list_type == TODO_LIST:
  734             list_node.setAttributeNS(None, "type", "todo")
  735         elif list_type == ORDERED_LIST:
  736             list_node.setAttributeNS(None, "type", "ordered")
  737 
  738         # And recurse with other line items
  739         if next_item_match:
  740             # We set the offset in which the next_item Match object was
  741             # created, because match object start and end are relative
  742             # to the context offset at pattern matching time.
  743             list_item_node = self.process(context, next_item_match)
  744         # Or we have reached the block end
  745         else:
  746             context.setOffset(context.blockEndOffset)
  747 
  748         # We set the current node to be the list item node
  749         context.currentNode = list_item_node
  750         return list_item_node
  751 
  752     def processText( self, context, text ):
  753         text = context.parser.expandTabs(text)
  754         text = context.parser.normaliseText(text)
  755         return text
  756 
  757 #------------------------------------------------------------------------------
  758 #
  759 #  PreBlockParser
  760 #
  761 #------------------------------------------------------------------------------
  762 
  763 class PreBlockParser( BlockParser ):
  764     """Parses the content of a preformatted block, where every line is
  765     prefixed by '>   '."""
  766 
  767     def __init__( self ):
  768         BlockParser.__init__(self, "pre")
  769 
  770     def recognises( self, context ):
  771         for line in context.currentFragment().split("\n"):
  772             if line and not RE_PREFORMATTED.match(line):
  773                 return False
  774         return True
  775 
  776     def process( self, context, recogniseInfo ):
  777         text = ""
  778         for line in context.currentFragment().split("\n"):
  779             match = RE_PREFORMATTED.match(line)
  780             if match:
  781                 text += match.group(3) + "\n"
  782             else:
  783                 text += line + "\n"
  784         if text[-1] == "\n": text = text[:-1]
  785         pre_node = context.document.createElementNS(None, self.name)
  786         pre_node.appendChild(context.document.createTextNode(text))
  787         pre_node.setAttributeNS(None, "_start", str(context.getOffset()))
  788         pre_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
  789         context.currentNode.appendChild(pre_node)
  790 
  791 class PreBlockParser2( BlockParser ):
  792     """Parses the content of a preformatted block which is delimited with
  793     '<<<' and '>>>' characters."""
  794 
  795     def __init__( self ):
  796         BlockParser.__init__(self, "pre")
  797 
  798     def recognises( self, context ):
  799         head_lines =  context.currentFragment().split("\n")
  800         if not head_lines: return False
  801         if self.isStartLine(context, head_lines[0]):
  802             indent = context.parser.getIndentation(head_lines[0])
  803             for line in head_lines[1:]:
  804                 if not line.replace("\t", " ").strip(): continue
  805                 if context.parser.getIndentation(line) < indent:
  806                     return False
  807         else:
  808             return False
  809         return True, indent
  810 
  811     def isStartLine( self, context, line ):
  812         line_indent = context.parser.getIndentation(line)
  813         if line.replace("\t", " ").strip() == "---":
  814             return True, line_indent
  815         else:
  816             return None
  817 
  818     def isEndLine( self, context, line, indent ):
  819         line_indent = context.parser.getIndentation(line)
  820         if line_indent != indent: return False
  821         line = line.replace("\t", " ").strip()
  822         return  line == "---"
  823 
  824     def findBlockEnd( self, context, indent ):
  825         # FIXME: Issue a warning if no end is found
  826         cur_offset = context.blockEndOffset + 1
  827         block_end = context.blockEndOffset
  828         lines = context.currentFragment().split("\n")
  829         if self.isEndLine(context, lines[-1], indent):
  830             return block_end
  831         while True:
  832             next_eol = context.documentText.find("\n", cur_offset)
  833             if next_eol == -1:
  834                 break
  835             line = context.documentText[cur_offset:next_eol]
  836             if self.isEndLine(context, line, indent):
  837                 block_end = next_eol + 1
  838                 break
  839             if line.strip() and context.parser.getIndentation(line) < indent:
  840                 break
  841             block_end = next_eol + 1
  842             cur_offset = block_end
  843         return block_end - 1
  844 
  845     def getCommonPrefix( self, linea, lineb ):
  846         if not lineb.replace("\t", " ").strip():
  847             return linea
  848         else:
  849             limit = 0
  850             max_limit = min(len(linea), len(lineb))
  851             while limit < max_limit and linea[limit] in "\t " and linea[limit] == lineb[limit]:
  852                 limit += 1
  853             assert linea[:limit] == lineb[:limit]
  854             return linea[:limit]
  855 
  856     def process( self, context, recogniseInfo ):
  857         result = []
  858         indent = recogniseInfo[1]
  859         context.setCurrentBlockEnd(self.findBlockEnd(context, indent))
  860         lines = context.currentFragment().split("\n")
  861         lines = lines[1:-1]
  862         prefix   = lines[0]
  863         for line in lines:
  864             prefix = self.getCommonPrefix(prefix, line)
  865         for line in lines:
  866             line = line[len(prefix):]
  867             result.append(line)
  868         text = "\n".join(result)
  869         pre_node = context.document.createElementNS(None, self.name)
  870         pre_node.appendChild(context.document.createTextNode(text))
  871         pre_node.setAttributeNS(None, "_start", str(context.getOffset()))
  872         pre_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
  873         context.currentNode.appendChild(pre_node)
  874 
  875 #------------------------------------------------------------------------------
  876 #
  877 #  TableBlockParser
  878 #
  879 #------------------------------------------------------------------------------
  880 
  881 class Table:
  882     """The table class allows to easily create tables and then generate the
  883     XML objects from them."""
  884     
  885     def __init__( self ):
  886         # Table is an array of array of (char, string) where char is either
  887         # 'H' for header, or 'T' for text.
  888         self._table = []
  889         self._rows  = 0
  890         self._cols  = 0
  891         self._title = None
  892         self._id    = None
  893     
  894     def dimension( self ):
  895         return len(self._table[0]), len(self._table) 
  896 
  897     def getRow( self, y):
  898         return self._table[y]
  899 
  900     def _ensureCell( self, x, y ):
  901         """Ensures that the cell at the given position exists and returns its
  902         pair value."""
  903         while y >= len(self._table): self._table.append([])
  904         row = self._table[y]
  905         while x >= len(row): row.append(["T", None])
  906         self._cols = max(self._cols, x+1)
  907         self._rows = max(self._rows, y+1)
  908         return row[x]
  909         
  910     def setTitle( self, title ):
  911         """Sets the title for this table."""
  912         self._title = title.strip()
  913 
  914     def setID( self, id ):
  915         """Sets the id for this table."""
  916         self._id = id.strip()
  917 
  918     def appendCellContent( self, x, y, text ):
  919         cell_type, cell_text = self._ensureCell(x,y)
  920         if cell_text == None:
  921             self._table[y][x] = [cell_type, text]
  922         else:
  923             self._table[y][x] = [cell_type, cell_text + "\n" + text]
  924 
  925     def headerCell( self, x, y ):
  926         self._table[y][x] = ["H", self._ensureCell(x,y)[1]]
  927 
  928     def dataCell( self, x, y ):
  929         self._table[y][x] = ["T", self._ensureCell(x,y)[1]]
  930 
  931     def isHeader( self, x, y ):
  932         if len(self._table) < y or len(self._table[y]) < x: return False
  933         row = self._table[y]
  934         if x>=len(row): return False
  935         return self._table[y][x][0] == "H"
  936 
  937     def getNode( self, context, processText ):
  938         """Renders the table as a Kiwi XML document node."""
  939         table_node   = context.document.createElementNS(None, "Table")
  940         content_node = context.document.createElementNS(None, "Content")
  941         # We set the id
  942         if self._id:
  943             table_node.setAttributeNS(None, "id", self._id)
  944         # We take care of the title
  945         if self._title:
  946             caption_node = context.document.createElementNS(None, "Caption")
  947             caption_text = context.document.createTextNode(self._title)
  948             caption_node.appendChild(caption_text)
  949             table_node.appendChild(caption_node)
  950         # And now of the table
  951         for row in self._table:
  952             row_node = context.document.createElementNS(None, "Row")
  953             i = 0
  954             for cell_type, cell_text in row:
  955                 is_first = i == 0
  956                 is_last  = i == len(row) - 1
  957                 cell_node = context.document.createElementNS(None, "Cell")
  958                 if cell_type == "H":
  959                     cell_node.setAttributeNS(None, "type", "header")
  960                 # We create a temporary Content node that will stop the nodes
  961                 # from seeking a parent content
  962                 cell_content_node = context.document.createElementNS(None, "Content")
  963                 if is_last and len(row) != self._cols:
  964                     cell_node.setAttributeNS(None, "colspan", "%s" % (len(row) + 2 - i))
  965                 new_context = context.clone()
  966                 new_context.setDocumentText(cell_text)
  967                 new_context.currentNode = cell_content_node
  968                 new_context.parser.parseContext(new_context)
  969                 # This is slightly hackish, but we simply move the nodes there
  970                 for child in cell_content_node.childNodes:
  971                     cell_node.appendChild(child)
  972                 row_node.appendChild(cell_node)
  973                 i += 1
  974             content_node.appendChild(row_node)
  975         table_node.appendChild(content_node)
  976         return table_node
  977 
  978     def __repr__( self ):
  979         s = ""
  980         i = 0
  981         for row in self._table:
  982             s += "%2d: %s\n" % (i,row)
  983             i += 1
  984         return s
  985 
  986 class TableBlockParser( BlockParser ):
  987     """Parses the content of a tables"""
  988 
  989     def __init__( self ):
  990         BlockParser.__init__(self, "table")
  991 
  992     def recognises( self, context ):
  993         lines = context.currentFragment().strip().split("\n")
  994         if not len(lines)>1: return False
  995         title_match = RE_TITLE.match(lines[0])
  996         if title_match:
  997             if not len(lines) >= 3: return False
  998             start_match = RE_TABLE_ROW_SEPARATOR.match(lines[1])
  999         else:
 1000             start_match = RE_TABLE_ROW_SEPARATOR.match(lines[0])
 1001         end_match = RE_TABLE_ROW_SEPARATOR.match(lines[-1])
 1002         return start_match and end_match
 1003 
 1004     def process( self, context, recogniseInfo ):
 1005         y = 0
 1006         table = Table()
 1007         # For each cell in a row
 1008         rows = context.currentFragment().strip().split("\n")[:-1]
 1009         # We take care of the title
 1010         title_match = RE_TITLE.match(rows[0])
 1011         if title_match:
 1012             title_name = title_match.group(2).split("#",1)
 1013             title_id   = None
 1014             if len(title_name) == 2:
 1015                 title_name, title_id = title_name
 1016             table.setTitle(title_name)
 1017             table.setID(title_id)
 1018             rows = rows[2:]
 1019         else:
 1020             rows = rows[1:]
 1021         # The cells are separated by pipes (||)
 1022         for row in rows:
 1023             cells = []
 1024             x = 0
 1025             # Empty rows are simply ignored
 1026             if not row.strip(): continue
 1027             separator = RE_TABLE_ROW_SEPARATOR.match(row)
 1028             # If we have not found a separator yet, we simply ensure that the
 1029             # cell exists and appends content to it
 1030             if not separator:
 1031                 # If the separtor is not '||' it is '|'
 1032                 if  row.find("||") == -1:
 1033                     row = row.replace("|", "||")
 1034                 for cell in row.split("||"):
 1035                     cells.append(cell)
 1036                     # We remove leading or trailing borders (|)
 1037                     if cell and cell[0]  == "|": cell = cell[1:]
 1038                     if cell and cell[-1] == "|": cell = cell[:-1]
 1039                     table.appendCellContent(x,y,cell)
 1040                     # FIXME: Weird rule
 1041                     # The default cell type is the same as the above
 1042                     # cell, if any.
 1043                     #if y>0 and table.isHeader(x,y-1):
 1044                     #   table.headerCell(x,y)
 1045                     x += 1
 1046             # We move to the next row only when we encounter a separator. The
 1047             # analysis of the separtor will tell you if the above cell is a
 1048             # header or a data cell
 1049             else:
 1050                 # FIXME: This is wrong, see below
 1051                 if separator.group(1)[0] == "=":
 1052                     row_count = table.dimension()[1]
 1053                     if row_count > 0:
 1054                         for cell in table.getRow(row_count - 1):
 1055                             cell[0] = "H"
 1056                 if separator.group(1)[0] == "-":
 1057                     row_count = table.dimension()[1]
 1058                     if row_count > 0:
 1059                         for cell in table.getRow(row_count - 1):
 1060                             cell[0] = "T"
 1061                 # FIXME: Should handle vertical tables also
 1062                 # ==================================
 1063                 # HEADER || DATA
 1064                 # =======++-------------------------
 1065                 # ....
 1066                 offset = 0
 1067                 x      = 0
 1068                 # FIXME: Here cells is always empty
 1069                 for cell in cells:
 1070                     assert None, "Should not be here"
 1071                     if separator.group(1)[offset] == "=": table.headerCell(x,y)
 1072                     else: table.dataCell(x,y)
 1073                     offset += len(cell)
 1074                     x      += 1
 1075                 y += 1
 1076         context.currentNode.appendChild(table.getNode(context, self.processText))
 1077 
 1078 #------------------------------------------------------------------------------
 1079 #
 1080 #  MetaBlockParser
 1081 #
 1082 #------------------------------------------------------------------------------
 1083 
 1084 class MetaBlockParser( BlockParser ):
 1085     """Parses the content of a Meta block"""
 1086 
 1087     def __init__( self ):
 1088         BlockParser.__init__(self, "Meta")
 1089         #This is a binding from meta block section names to meta content
 1090         #parsers
 1091         self.field_parsers = {
 1092             u'abstract':        self.p_abstract,
 1093             u'acknowledgements':    self.p_ack,
 1094             u'author':      self.p_author,
 1095             u'authors':     self.p_author,
 1096             u'creation':        self.p_creation,
 1097             u'keywords':        self.p_keywords,
 1098             u'language':        self.p_language,
 1099             u'last-mod':        self.p_last_mod,
 1100             u'markup':      self.p_markup,
 1101             u'organisation':    self.p_organisation,
 1102             u'organization':    self.p_organisation,
 1103             u'revision':        self.p_revision,
 1104             u'type':        self.p_type,
 1105             u'reference':       self.p_reference
 1106         }
 1107 
 1108     def process( self, context, recogniseInfo ):
 1109         # Parses a particular field, with the given content
 1110         def parse_field( field ):
 1111             field = field.lower()
 1112             if self.field_parsers.get(field):
 1113                 self.field_parsers.get(field)(context, context.currentFragment())
 1114             else:
 1115                 context.parser.warning("Unknown Meta field: " + last_field,
 1116                 context)
 1117 
 1118         match  = True
 1119         offset = 0
 1120         last_field = None
 1121         # Iterates through the fields
 1122         while match != None:
 1123             match = RE_META_FIELD.search(context.currentFragment(), offset)
 1124             if match:
 1125                 if last_field != None:
 1126                     offsets = context.saveOffsets()
 1127                     # We set the current fragment to be the field value
 1128                     context.setCurrentBlock( context.getOffset() + offset,
 1129                     context.getOffset() + match.start() )
 1130                     parse_field(last_field)
 1131                     context.restoreOffsets(offsets)
 1132                 last_field = match.group(2)
 1133                 offset = match.end()
 1134 
 1135         # And parse the last field
 1136         if last_field != None:
 1137             offsets = context.saveOffsets()
 1138             context.setCurrentBlock( context.getOffset() + offset,
 1139             context.blockEndOffset )
 1140             parse_field(last_field)
 1141             context.restoreOffsets(offsets)
 1142         else:
 1143             context.parser.warning("Empty Meta block.", context)
 1144 
 1145     # Field parsers __________________________________________________________
 1146 
 1147     def p_abstract( self, context, content ):
 1148         old_node = context.currentNode 
 1149         abstract_node = context.document.createElementNS(None, "Abstract")
 1150         context.currentNode = abstract_node
 1151         context.parser.parseBlock(context, abstract_node, self.processText)
 1152         context.currentNode  = old_node
 1153         context.header.appendChild(abstract_node)
 1154 
 1155     def p_ack( self, context, content ):
 1156         old_node = context.currentNode 
 1157         ack_node = context.document.createElementNS(None, "Acknowledgement")
 1158         context.currentNode = ack_node
 1159         context.parser.parseBlock(context, ack_node, self.processText)
 1160         context.currentNode  = old_node
 1161         context.header.appendChild(ack_node)
 1162 
 1163     def p_author( self, context, content ):
 1164         authors_node = context.document.createElementNS(None, "Authors")
 1165         text = self._flatify(content).strip()
 1166         # Cuts the trailing dot if present
 1167         if text[-1]==u'.': text=text[:-1]
 1168         for author in text.split(','):
 1169             author_node = context.document.createElementNS(None, "person")
 1170             # We take care of email
 1171             email_match = RE_META_AUTHOR_EMAIL.search(author)
 1172             if email_match:
 1173                 author = author[:email_match.start()]
 1174                 author_node.setAttributeNS(None, "email", email_match.group(1))
 1175             text_node   = context.document.createTextNode(author.strip())
 1176             author_node.appendChild(text_node)
 1177             authors_node.appendChild(author_node)
 1178         context.header.appendChild(authors_node)
 1179     
 1180     def p_creation( self, context, content ):
 1181         creation_node = context.document.createElementNS(None, "creation")
 1182         if self._parseDateToNode( context, content, creation_node ):
 1183             context.header.appendChild(creation_node)
 1184     
 1185     def _parseDateToNode( self, context, content, node ):
 1186         content = content.strip()
 1187         date = content.split("-")
 1188         for elem in date:
 1189             format = None
 1190             try:
 1191                 format = "%0" + str(len(elem)) + "d"
 1192                 format = format % (int(elem))
 1193             except:
 1194                 pass
 1195             if len(date)!=3 or format != elem:
 1196                 context.parser.error("Malformed date meta field: " + content,
 1197                 context)
 1198                 context.parser.tip("Should be YYYY-MM-DD", context)
 1199                 return False
 1200         date = map(lambda x:int(x), date)
 1201         if date[1] < 1 or date[1] > 12:
 1202             context.parser.warning("Bad month number: " + str(date[1]),
 1203             context)
 1204         if date[2] < 1 or date[2] > 31:
 1205             context.parser.warning("Bad day number: " + str(date[2]),
 1206             context)
 1207         node.setAttributeNS(None, "year",  str(date[0]))
 1208         node.setAttributeNS(None, "month", str(date[1]))
 1209         node.setAttributeNS(None, "day",   str(date[2]))
 1210         return True
 1211 
 1212     def p_keywords( self, context, content ):
 1213         keywords_node = context.document.createElementNS(None, "Keywords")
 1214         text = self._flatify(content).strip()
 1215         # Cuts the trailing dot if present
 1216         if text[-1]==u'.': text=text[:-1]
 1217         for keyword in text.split(','):
 1218             keyword_node = context.document.createElementNS(None, "keyword")
 1219             text_node   = context.document.createTextNode(keyword.strip())
 1220             keyword_node.appendChild(text_node)
 1221             keywords_node.appendChild(keyword_node)
 1222         context.header.appendChild(keywords_node)
 1223 
 1224     def p_last_mod( self, context, content ):
 1225         lastmod_node = context.document.createElementNS(None, "modification")
 1226         if self._parseDateToNode( context, content, lastmod_node ):
 1227             context.header.appendChild(lastmod_node)
 1228 
 1229     def p_revision( self, context, content ):
 1230         revision_node = context.document.createElementNS(None, "revision")
 1231         text_node   = context.document.createTextNode(content.strip())
 1232         revision_node.appendChild(text_node)
 1233         context.header.appendChild(revision_node)
 1234 
 1235     def p_type( self, context, content ):
 1236         match = RE_META_TYPE.match(content)
 1237         if match:
 1238             style_node = context.document.createElementNS(None, "type")
 1239             style_node.setAttributeNS(None, "name", match.group(1).lower())
 1240             if match.group(3):
 1241                 style_node.setAttributeNS(None, "style", match.group(3).lower())
 1242             context.header.appendChild(style_node)
 1243         else:
 1244             context.parser.warning("Malformed meta type field: " + content,
 1245             context)
 1246 
 1247     def p_reference( self, context, content ):
 1248         ref_node = context.document.createElementNS(None, "reference")
 1249         ref_node.setAttributeNS(None, "id", content)
 1250         context.header.appendChild(ref_node)
 1251 
 1252     def p_language( self, context, content ):
 1253         lang = content.strip()[0:2].upper()
 1254         lang_node = context.document.createElementNS(None, "language")
 1255         #We assign the language code
 1256         if len(lang)>=2 and lang.upper()[0:2] in LANGUAGE_CODES:
 1257             lang_code = unicode(lang.upper()[0:2])
 1258         else:
 1259             lang_code = "UK"
 1260         lang_node.setAttributeNS(None, "code", lang_code)
 1261         context.header.appendChild(lang_node)
 1262 
 1263     def p_organisation( self, context, content ):
 1264         old_node = context.currentNode 
 1265         org_node = context.document.createElementNS(None, "Organisation")
 1266         context.currentNode = org_node
 1267         context.parser.parseBlock(context, org_node, self.processText)
 1268         context.currentNode  = old_node
 1269         context.header.appendChild(org_node)
 1270 
 1271     def p_markup( self, context, content ):
 1272         """Parses custom markup and registers the new parsers in the current
 1273         Kiwi parser"""
 1274         # TODO
 1275         match = 1
 1276         start = 0
 1277         end   = len(content)
 1278         custom_markup = RE_CUSTOM_MARKUP
 1279         while match!=None and start<end:
 1280             match = custom_markup.search(content,start)
 1281             if match:
 1282                 regexp  = match.group(1)
 1283                 element = match.group(2)
 1284                 option  = match.group(4)
 1285                 if option == None:
 1286                     self.parser.txt_parsers.append(InlineParser(self.parser,
 1287                     element, regexp))
 1288                 elif option.lower() == u"empty":
 1289                     self.parser.txt_parsers.append(EmptyInlineParser(self.parser,
 1290                     element, regexp))
 1291                 else:
 1292                     #FIXME: OUTPUT ERROR FOR UNKNOWN OPTION
 1293                     pass
 1294                 start = match.end()
 1295 
 1296     def _flatify( self, text ):
 1297         new_text = u""
 1298         for line in text.split(): new_text += line+u" "
 1299         return new_text
 1300 
 1301     def processText( self, context, text ):
 1302         assert text
 1303         text = context.parser.expandTabs(text)
 1304         text =  context.parser.normaliseText(text)
 1305         return text
 1306 
 1307 #------------------------------------------------------------------------------
 1308 #
 1309 # ReferenceEntryBlockParser
 1310 #
 1311 #------------------------------------------------------------------------------
 1312 
 1313 class ReferenceEntryBlockParser( BlockParser ):
 1314     """Parses the content of a Reference entry"""
 1315 
 1316     def __init__( self ):
 1317         BlockParser.__init__(self, "Entry")
 1318 
 1319     def recognises( self, context ):
 1320         assert context
 1321         return RE_REFERENCE_ENTRY.match(context.currentFragment())
 1322 
 1323     def process( self, context, match ):
 1324         offsets = context.saveOffsets()
 1325         ranges  = []
 1326         offset  = 0
 1327         # We get the start and end offsets of entry blocks
 1328         while True:
 1329             m = RE_REFERENCE_ENTRY.search(context.currentFragment(), offset)
 1330             if not m: break
 1331             ranges.append((m, m.start()))
 1332             offset = m.end()
 1333         ranges.append((None, len(context.currentFragment())))
 1334         new_ranges = []
 1335         for i in range(0, len(ranges)-1):
 1336             new_ranges.append((ranges[i][0], ranges[i][1], ranges[i+1][1]))
 1337         ranges = new_ranges
 1338         # We loop for each found reference entry
 1339         for match, start_offset, end_offset in ranges:
 1340             entry_name  = match.group(1)
 1341             # We set the current block and process it
 1342             sub_offsets = context.saveOffsets()
 1343             context.setCurrentBlock(context.getOffset() + match.end(), context.getOffset() + end_offset)
 1344             entry_node = context.document.createElementNS(None, "Entry")
 1345             entry_node.setAttributeNS(None, "id", entry_name)
 1346             context.parser.parseBlock(context, entry_node, self.processText)
 1347             context.references.appendChild(entry_node)
 1348             context.restoreOffsets(sub_offsets)
 1349         context.restoreOffsets(offsets)
 1350 
 1351 # EOF