"Fossies" - the Fresh Open Source Software Archive 
Member "Tahchee-1.0.0/Sources/tahchee/plugins/_kiwi/blocks.py" (22 Oct 2009, 49635 Bytes) of package /linux/privat/old/tahchee-1.0.0.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style:
standard) with prefixed line numbers.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "blocks.py" see the
Fossies "Dox" file reference documentation.
1 #!/usr/bin/env python
2 # Encoding: iso-8859-1
3 # vim: tw=80 ts=4 sw=4 noet
4 # -----------------------------------------------------------------------------
5 # Project : Kiwi
6 # Module : Block parsers
7 # -----------------------------------------------------------------------------
8 # Author : Sebastien Pierre <sebastien@type-z.org>
9 # License : Revised BSD License
10 # -----------------------------------------------------------------------------
11 # Creation date : 19-Nov-2003
12 # Last mod. : 07-Oct-2009
13 # -----------------------------------------------------------------------------
14
15 import re, string
16 from formatting import *
17
18 __doc__ = """Write module doc here"""
19 __pychecker__ = "unusednames=recogniseInfo,content"
20
21 EMPTY_LIST_ITEM = "Empty list item."
22
23 BLOCK_ELEMENTS = ("Block", "ListItem", "Definition", "Content", "Chapter", "Section", "Appendix")
24
25 STANDARD_LIST = 1
26 DEFINITION_LIST = 2
27 TODO_LIST = 3
28 ORDERED_LIST = 4
29
30 STANDARD_ITEM = 100
31 TODO_ITEM = 101
32 TODO_DONE_ITEM = 102
33
34 #------------------------------------------------------------------------------
35 #
36 # Regular expressions
37 #
38 #------------------------------------------------------------------------------
39
40 RE_BLANK = re.compile(u"\s*", re.LOCALE|re.MULTILINE)
41
42 TITLE = u"^\s*(==)([^=].+)$"
43 RE_TITLE = re.compile(TITLE, re.LOCALE|re.MULTILINE)
44 TITLE_HEADER = u"^\s*(--)([^\:]+):(.+)?$"
45 RE_TITLES = re.compile(u"%s|%s" % (TITLE, TITLE_HEADER), re.LOCALE|re.MULTILINE)
46
47 SECTION_HEADING = u"^\s*((([0-9]+|[A-z])\.)+([0-9]+|[A-z])?\.?)"
48 RE_SECTION_HEADING= re.compile(SECTION_HEADING, re.LOCALE)
49 SECTION_HEADING_ALT = u"^(\=+\s*).+$"
50 RE_SECTION_HEADING_ALT= re.compile(SECTION_HEADING_ALT, re.LOCALE)
51 SECTION_UNDERLINE = u"^\s*[\*\-\=#][\*\-\=#][\*\-\=#]+\s*$"
52 RE_SECTION_UNDERLINE = re.compile(SECTION_UNDERLINE, re.LOCALE|re.MULTILINE)
53
54 DEFINITION_ITEM = u"^(\s*(\:[^\:]|[^\:])+)\:\:+\s*(\n+\s*|\s*\|\s*\n)*"
55 RE_DEFINITION_ITEM = re.compile(DEFINITION_ITEM, re.LOCALE|re.MULTILINE)
56
57 TAGGED_BLOCK = u"^\s*(([^_]+\s*)(\:[^_]+)?)?(____+)\s*$"
58 RE_TAGGED_BLOCK = re.compile(TAGGED_BLOCK, re.MULTILINE | re.LOCALE)
59 LIST_ITEM = u"^(\s*)(-|\*\)|[0-9A-z][\)/]|\[[ \-\~xX]\])\s*"
60 RE_LIST_ITEM = re.compile(LIST_ITEM, re.MULTILINE | re.LOCALE)
61 LIST_HEADING = u"(^\s*[^:{().<]*:)"
62 RE_LIST_HEADING = re.compile(LIST_HEADING, re.MULTILINE | re.LOCALE)
63 LIST_ITEM_HEADING = u"^([^:]+(:\s*\n\s*|::\s*))|([^/\\\]+[/\\\]\s*\n\s*)"
64 RE_LIST_ITEM_HEADING = re.compile(LIST_ITEM_HEADING, re.MULTILINE|re.LOCALE)
65 RE_NUMBER = re.compile("\d+[\)\.]")
66
67 PREFORMATTED = u"^(\s*\>(\t| ))(.*)$"
68 RE_PREFORMATTED = re.compile(PREFORMATTED, re.LOCALE)
69
70 CUSTOM_MARKUP = u"\s*-\s*\"([^\"]+)\"\s*[=:]\s*([\w\-_]+)(\s*\(\s*(\w+)\s*\))?"
71 RE_CUSTOM_MARKUP = re.compile(CUSTOM_MARKUP, re.LOCALE|re.MULTILINE)
72
73 META_TYPE = u"\s*(\w+)\s*(\((\w+)\))?"
74 RE_META_TYPE = re.compile(META_TYPE, re.LOCALE|re.MULTILINE)
75
76 META_FIELD = u'(^|\n)\s*([\w\-]+)\s*:\s*'
77 RE_META_FIELD= re.compile(META_FIELD, re.LOCALE)
78 RE_META_AUTHOR_EMAIL = re.compile("\<([^>]+)\>", re.LOCALE)
79
80 REFERENCE_ENTRY = u"\s+\[([^\]]+)]:"
81 RE_REFERENCE_ENTRY = re.compile(REFERENCE_ENTRY, re.LOCALE|re.MULTILINE)
82
83 TABLE_ROW_SEPARATOR = "^\s*([\-\+]+|[\=\+]+)\s*$"
84 RE_TABLE_ROW_SEPARATOR = re.compile(TABLE_ROW_SEPARATOR)
85
86 LANGUAGE_CODES = ("EN", "FR", "DE", "UK" )
87
88 #------------------------------------------------------------------------------
89 #
90 # Error messages
91 #
92 #------------------------------------------------------------------------------
93
94 ERROR_TITLE_TOO_DEEPLY_NESTED = "Title too deeply nested"
95
96 #------------------------------------------------------------------------------
97 #
98 # BlockParser
99 #
100 #------------------------------------------------------------------------------
101
102 class BlockParser:
103
104 def __init__( self, name ):
105 self.name = name
106
107 def recognises( self, context ):
108 """Tells wether the given block is recognised or not. This returns
109 this block recognition information, or False (or None) if the block was
110 not recongised."""
111 return False
112
113 def process( self, context, recogniseInfo ):
114 return None
115
116 def processText( self, context, text ):
117 assert context, text
118 return text
119
120 #------------------------------------------------------------------------------
121 #
122 # ParagraphBlockParser
123 #
124 #------------------------------------------------------------------------------
125
126 class ParagraphBlockParser(BlockParser):
127 """Parses a paragraph block. This parser always recognised the given block,
128 so it should not appear in the block parsers."""
129
130 def __init__( self ):
131 BlockParser.__init__(self, "Paragraph")
132
133 def recognises( self, context ):
134 return True
135
136 def process( self, context, recogniseInfo ):
137 # We make sure that the current node is a block element
138 paragraph_depth = context.getBlockIndentation()
139 # Here we move to the first block element that has an indentation that
140 # is lower or equal to this paragraph
141 while context.currentNode.nodeName not in BLOCK_ELEMENTS \
142 or context.currentNode.getAttribute("_indent") \
143 and int(context.currentNode.getAttribute("_indent"))>paragraph_depth:
144 context.currentNode = context.currentNode.parentNode
145 # If the currentNode last element is a paragraph with a higher
146 # indentation than the current one, then we create a block, and set it
147 # as current node (this allows to create "indented paragraphs" - the
148 # equivalent of blockquotes).
149 if context.currentNode.childNodes \
150 and context.currentNode.childNodes[-1].nodeName == "Paragraph" \
151 and context.currentNode.childNodes[-1].getAttribute("_indent") \
152 and int(context.currentNode.childNodes[-1].getAttribute("_indent"))<paragraph_depth:
153 block_node = context.document.createElementNS(None, "Block")
154 block_node.setAttributeNS(None, "_indent", str(paragraph_depth))
155 context.currentNode.appendChild(block_node)
156 context.currentNode = block_node
157 # Now we can process the document
158 para_node = context.document.createElementNS(None, self.name)
159 para_node.setAttributeNS(None, "_indent", str(paragraph_depth))
160 para_node.setAttributeNS(None, "_start", str(context.blockStartOffset))
161 para_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
162 context.parser.parseBlock(context, para_node, self.processText)
163 # Now we suppress leading and trailing whitespaces
164 first_text_node = para_node.childNodes[0]
165 last_text_node = para_node.childNodes[-1]
166 if first_text_node.nodeType != para_node.TEXT_NODE: first_text_node = None
167 if last_text_node.nodeType != para_node.TEXT_NODE: last_text_node = None
168 # Removed first and last text nodes if empty
169 if first_text_node!=None and first_text_node.data.strip()=="":
170 para_node.removeChild(first_text_node)
171 first_text_node = None
172 if last_text_node!=None and last_text_node.data.strip()=="":
173 para_node.removeChild(last_text_node)
174 last_text_node = None
175 # We strip the leading whitespace
176 if first_text_node!=None and len(first_text_node.data)>0 and \
177 first_text_node.data[0] == " ":
178 first_text_node.data = first_text_node.data[1:]
179 if last_text_node!=None and len(last_text_node.data)>0 and \
180 last_text_node.data[-1] == " ":
181 last_text_node.data = last_text_node.data[:-1]
182 # FIXME: Maybe the paragraph contains text nodes with only spaces ?
183 if len(para_node.childNodes)>0:
184 context.currentNode.appendChild(para_node)
185 else:
186 context.parser.warning("Empty paragraph removed", context)
187
188 def processText( self, context, text ):
189 assert text
190 text = context.parser.expandTabs(text)
191 text = context.parser.normaliseText(text)
192 return text
193
194 #------------------------------------------------------------------------------
195 #
196 # TaggedBlockParser
197 #
198 #------------------------------------------------------------------------------
199
200 class TaggedBlockParser(BlockParser):
201 """Parses a tagged block. Notes are the common example of tagged
202 block."""
203
204 def __init__( self ):
205 BlockParser.__init__(self, "TaggedBlock")
206
207 def recognises( self, context ):
208 lines = filter(lambda l:l.strip(), context.currentFragment().split("\n"))
209 if not lines: return
210 return RE_TAGGED_BLOCK.match(lines[0])
211
212 def _goToParent( self, thisblock, parent ):
213 if not parent: return parent
214 if parent.nodeName == "Block":
215 return parent.parentNode
216 else:
217 return parent
218
219 def process( self, context, recogniseInfo ):
220 tagname = recogniseInfo.group(2)
221 tagtitle = recogniseInfo.group(3)
222 # This is an opening tag
223 if tagname and tagname[0] != "_":
224 # TODO: Asserts we are not already in a sepcific block
225 block_depth = context.getBlockIndentation()
226 block_node = context.document.createElementNS(None, "Block")
227 block_node.setAttributeNS(None, "type", tagname.strip().lower())
228 block_node.setAttributeNS(None, "_indent",str(block_depth))
229 if tagtitle:
230 block_node.setAttributeNS(None, "title", tagtitle[1:].strip())
231 # We get to a content node
232 # Now we can process the document
233 context.increaseOffset(len(recogniseInfo.group()))
234 context.parser.parseBlock(context, block_node, self.processText)
235 context.currentNode = self._goToParent( block_node, context.currentNode)
236 context.currentNode.appendChild(block_node)
237 context.currentNode = block_node
238 assert context.currentNode
239 # This is a closing tag
240 elif tagname and tagname[0] == "_":
241 while context.currentNode.nodeName != "Block":
242 context.currentNode = context.currentNode.parentNode
243 context.currentNode = context.currentNode.parentNode
244
245 #------------------------------------------------------------------------------
246 #
247 # CommentBlockParser
248 #
249 #------------------------------------------------------------------------------
250
251 class CommentBlockParser(BlockParser):
252 """Parses a comment markup block."""
253
254 def __init__( self ):
255 BlockParser.__init__(self, "CommentBlock")
256
257 def recognises( self, context ):
258 assert context and context.parser.commentParser
259 lines = context.currentFragment().split("\n")
260 for line in lines:
261 line = line.strip()
262 if line and line.strip()[0]!= "#": return False
263 return True
264
265 def process( self, context, recogniseInfo ):
266 context.currentNode.appendChild( context.document.createComment(
267 self.processText(context, context.currentFragment())))
268 context.setOffset(context.blockEndOffset)
269
270
271 #------------------------------------------------------------------------------
272 #
273 # MarkupBlockParser
274 #
275 #------------------------------------------------------------------------------
276
277 class MarkupBlockParser(BlockParser):
278 """Parses a custom markup block."""
279
280 def __init__( self ):
281 BlockParser.__init__(self, "MarkupBlock")
282
283 def recognises( self, context ):
284 assert context and context.parser.markupParser
285 offset, match = context.parser.markupParser.recognises(context)
286 # We make sure that the recognised markup is a block markup which has
287 # only whitespaces at the beginning
288 if match and context.parser.markupParser.isStartTag(match) \
289 and len(context.currentFragment()[:match.start()].strip())==0:
290 # We parse the tag to see if it is a block tag and that it spans
291 # the whole context current fragment.
292 dummy_node = context.document.createElementNS(None, "Dummy")
293 match_end = context.parser.markupParser.parse(context, dummy_node, match)
294 # The returned matched end MUST BE GREATER than the start tag match
295 # end, and there MUST BE ONLY SPACES after the match end for this
296 # tag to represent a standalone block, and not a block inlined into
297 # a paragraph.
298 if match_end > match.end() and \
299 len(context.currentFragment()[match_end:].strip())==0:
300 # If there is a child node, we return it
301 if len(dummy_node.childNodes)>=1:
302 result_node = dummy_node.childNodes[0]
303 # We take care of the attributes
304 for key, value \
305 in context.parseAttributes(match.group(2)).items():
306 result_node.setAttributeNS(None, key, value)
307 return result_node
308 # Otherwise this means that the block is empty
309 else: return True
310 else:
311 return False
312 else:
313 return False
314
315 def process( self, context, recogniseInfo ):
316 if recogniseInfo!=True:
317 context.currentNode.appendChild(recogniseInfo)
318 context.setOffset(context.blockEndOffset)
319
320
321 #------------------------------------------------------------------------------
322 #
323 # TitleBlockParser
324 #
325 #------------------------------------------------------------------------------
326
327 class TitleBlockParser(BlockParser):
328 """Parses a title object"""
329
330 def __init__( self ):
331 BlockParser.__init__(self, "title")
332
333 def recognises( self, context ):
334 matches = []
335 if context.content.childNodes: return None
336 while not context.blockEndReached():
337 match = RE_TITLES.match(context.currentFragment())
338 if match!=None:
339 context.increaseOffset(match.end())
340 matches.append(match)
341 else:
342 return matches or False
343 return matches
344
345 def _processLine( self, line ):
346 pass
347
348 def process( self, context, recogniseInfo ):
349 assert recogniseInfo
350 for match in recogniseInfo:
351 if match.group(1):
352 titleNode = context.ensureElement( context.header, "Title" )
353 # We get the content of the title
354 titleText = Upper(match.group(2) or match.group(4))
355 # We prefix with 'sub' or 'subsub' depending on the number of
356 # preceding titles
357 titleType = u"sub" * len(filter(lambda n:n.nodeName.endswith("title"), titleNode.childNodes))
358 titleType += u"title"
359 #We add the node to the document tree
360 resultNode = context.ensureElement(titleNode, titleType)
361 titleNode.appendChild(resultNode)
362 resultNode.appendChild(context.document.createTextNode(self.processText(context, titleText)))
363 elif match.group(3):
364 metaNode = context.ensureElement( context.header, "Meta" )
365 # We get the header name
366 header_name = match.group(4).strip()
367 header_text = match.group(5).strip()
368 # We prepare the header node
369 node = context.document.createElementNS(None, "meta")
370 node.setAttributeNS(None, "name", header_name)
371 node.appendChild(context.document.createTextNode(self.processText(context,
372 header_text)))
373 # And we add it to the document header
374 metaNode.appendChild(node)
375 else:
376 raise Exception("We should not be here ! " + match.group())
377 context.setOffset(context.blockEndOffset)
378
379 def processText( self, context, text ):
380 return context.parser.normaliseText(text.strip())
381
382 #------------------------------------------------------------------------------
383 #
384 # SectionBlockParser
385 #
386 #------------------------------------------------------------------------------
387
388 class SectionBlockParser(BlockParser):
389 """Parses a section markup element."""
390
391 def __init__( self ):
392 BlockParser.__init__(self, "Section")
393
394 def recognises( self, context ):
395 # We look for the number prefix
396 match = RE_SECTION_HEADING.match(context.currentFragment())
397 # We return directly if there are at least two section numbers (2.3)
398 if match:
399 match_underline = RE_SECTION_UNDERLINE.search(context.currentFragment())
400 if match_underline: return (RE_SECTION_UNDERLINE, match_underline)
401 else: return (RE_SECTION_HEADING, match)
402 # We return directly for a section prefixed by '=='
403 match_alt = RE_SECTION_HEADING_ALT.match(context.currentFragment())
404 if match_alt:
405 return (RE_SECTION_HEADING_ALT, match_alt)
406 # Or a separator followed by blank space
407 match = RE_SECTION_UNDERLINE.search(context.currentFragment())
408 if match:
409 # If we reached the end of the block, and that there is something
410 # before, this OK
411 if match.end() == context.blockEndOffset and \
412 context.currentFragment()[:match.start()].strip():
413 return (RE_SECTION_UNDERLINE, match)
414 # Otherwise the rest must be blank
415 else:
416 blank_match = RE_BLANK.match(context.currentFragment()[match.end():])
417 # The rest is blank, it's OK
418 if blank_match.end()+match.end()+context.getOffset()\
419 ==context.blockEndOffset:
420 return (RE_SECTION_UNDERLINE, match)
421 # Otherwise there is a trailing text
422 else:
423 return None
424 # Nothing matched
425 else:
426 return None
427
428 def process( self, context, recogniseInfo ):
429 context.ensureParent( ("Content", "Appendix", "Chapter", "Section") )
430 matched_type, match = recogniseInfo
431 section_indent = context.getBlockIndentation()
432 trail = match.group().strip()
433 # RULE:
434 # A section underlined with '==' weights more than a section
435 # underlined with '--', which weights more than a section
436 # underline with nothing. This means that if you have
437 #
438 # 1. One
439 # ======
440 #
441 # 2. Two
442 # ------
443 #
444 # 3. Three
445 #
446 # These sections will all be children of the previous section
447 section_weight = trail.endswith("==") and 2 or trail.endswith("--") and 1 or 0
448 #
449 # FIRST STEP - We detect section text bounds
450 #
451 block_start = context.blockStartOffset
452 block_end = context.blockEndOffset
453 section_type = "Section"
454 # We have an underlined section
455 if matched_type == RE_SECTION_UNDERLINE:
456 block_end = context.getOffset() + match.start()
457 if matched_type == RE_SECTION_HEADING_ALT:
458 block_start = context.getOffset() + match.start() + len(match.group(1))
459 block_end = context.getOffset() + match.end()
460
461 # We look for a number prefix
462 heading_text = context.fragment(block_start, block_end)
463 prefix_match = RE_SECTION_HEADING.match(heading_text)
464 dots_count = 0
465 if prefix_match:
466 res = prefix_match.group()
467 dots_count = len( filter(lambda x:x, res.split(".")) )
468 block_start = context.getOffset() + prefix_match.end()
469 if matched_type == RE_SECTION_HEADING_ALT:
470 dots_count += len(match.group(1))
471 # We make sure that we end the section before the block delimiter
472 delim_match = RE_SECTION_UNDERLINE.search(context.currentFragment())
473 if delim_match:
474 block_end = context.getOffset() + delim_match.start()
475 context.currentNode = context.getParentSection(dots_count-section_weight, section_indent)
476 section_depth = context.getDepthInSection(context.currentNode) + 1
477 #
478 # SECOND STEP - We create the section
479 #
480 section_node = context.document.createElementNS(None, section_type)
481 section_node.setAttributeNS(None, "_indent", str(section_indent ))
482 section_node.setAttributeNS(None, "_depth", str(section_depth))
483 section_node.setAttributeNS(None, "_start", str(block_start))
484 section_node.setAttributeNS(None, "_sstart", str(block_start))
485 heading_node = context.document.createElementNS(None, "Heading")
486 section_node.appendChild(heading_node)
487 offsets = context.saveOffsets()
488 context.blockEndOffset = block_end
489 context.setOffset(block_start)
490 context.parser.parseBlock(context, heading_node, self.processText)
491 context.restoreOffsets(offsets)
492 # Now we create a Content node
493 content_node = context.document.createElementNS(None, "Content")
494 content_node.setAttributeNS(None, "_indent", str(section_indent ))
495 section_node.appendChild(content_node)
496 # We append the section node and assign it as current node
497 context.currentNode.appendChild(section_node)
498 context.currentNode = content_node
499 context.declareSection(section_node, content_node, dots_count-section_weight)
500
501 def processText( self, context, text ):
502 return context.parser.normaliseText(text.strip())
503
504 #------------------------------------------------------------------------------
505 #
506 # DefinitionBlockParser
507 #
508 #------------------------------------------------------------------------------
509
510 class DefinitionBlockParser(BlockParser):
511 """Parses a definition markup element."""
512
513 def __init__( self ):
514 BlockParser.__init__(self, "Definition")
515
516 def recognises( self, context ):
517 return RE_DEFINITION_ITEM.match(context.currentFragment())
518
519 def _getParentDefinition( self, node ):
520 while node and node.nodeName != "Definition":
521 node = node.parentNode
522 return node
523
524 def process( self, context, match ):
525 parent_node = self._getParentDefinition(context.currentNode)
526 _indent = context.getBlockIndentation()
527 # Ensures that the parent Definition node exists
528 if not parent_node:
529 parent_node = context.currentNode
530 while True:
531 if parent_node.parentNode == None: break
532 if parent_node.parentNode.nodeType == parent_node.DOCUMENT_NODE: break
533 if not parent_node.getAttributeNS(None, "_indent"): break
534 if int(parent_node.getAttributeNS(None, "_indent")) <= _indent: break
535 parent_node = parent_node.parentNode
536 if parent_node.nodeName not in BLOCK_ELEMENTS: continue
537 context.currentNode = parent_node
538 definition_node = context.document.createElementNS(None, "Definition")
539 definition_node.setAttributeNS(None, "_indent", str(_indent))
540 context.currentNode.appendChild(definition_node)
541 parent_node = definition_node
542 # Creates the defintion item
543 definition_item = context.document.createElementNS(None, "DefinitionItem")
544 definition_item.setAttributeNS(None, "_indent", str(_indent + 1))
545 definition_title = context.document.createElementNS(None, "Title")
546 definition_title.setAttributeNS(None, "_start", str(context.blockStartOffset))
547 definition_title.setAttributeNS(None, "_end", str(context.blockStartOffset + len(match.group())))
548 # Parse the content of the definition title
549 offsets = context.saveOffsets()
550 context.setCurrentBlock(context.blockStartOffset, context.blockStartOffset + len(match.group(1)))
551 context.parser.parseBlock(context, definition_title, self.processText)
552 context.restoreOffsets(offsets)
553 # And continue the processing
554 definition_content = context.document.createElementNS(None, "Content")
555 definition_content.setAttributeNS(None, "_indent", str(_indent + 1))
556 definition_content.setAttributeNS(None, "_start", str(context.blockStartOffset + match.end()))
557 definition_content.setAttributeNS(None, "_end", str(context.blockEndOffset))
558 definition_item.appendChild(definition_title)
559 definition_item.appendChild(definition_content)
560 parent_node.appendChild(definition_item)
561 context.currentNode = definition_content
562 # We check if there is a rest after the definition name
563 rest = context.documentText[context.blockStartOffset + match.end():context.blockEndOffset]
564 if not context.parser.normaliseText(rest).strip(): rest = ""
565 if rest:
566 offsets = context.saveOffsets()
567 context.setCurrentBlock(context.blockStartOffset + match.end(), context.blockEndOffset)
568 context.parser.parseBlock(context, definition_content, self.processText)
569 context.restoreOffsets(offsets)
570
571 def processText( self, context, text ):
572 return context.parser.normaliseText(text)
573
574 #------------------------------------------------------------------------------
575 #
576 # ListItemBlockParser
577 #
578 #------------------------------------------------------------------------------
579
580 class ListItemBlockParser(BlockParser):
581 """Parses a list item. A list item is an element within a list."""
582
583 def __init__( self ):
584 BlockParser.__init__(self, "ListItem")
585
586 def recognises( self, context ):
587 return RE_LIST_ITEM.match(context.currentFragment())
588
589 def process( self, context, itemMatch ):
590
591 context.ensureParent( ("Content", "Appendix", "Chapter", "Section", "List") )
592 start_offset = context.getOffset()
593
594 # Step 1: Determine the range of the current line item in the current
595 # block. There may be more than one line item as in the following:
596 # "- blah blah\n - blah blah"
597 # So we have to look for another line item in the current block
598
599 # To do so, we move the offset after the recognised list item, ie.
600 # after the leading "1)", "*)", etc
601 context.increaseOffset(itemMatch.end())
602
603 # Next item match will indicate where in the current fragment the next
604 # item starts.
605 next_item_match = None
606 if context.blockEndReached():
607 context.parser.warning(EMPTY_LIST_ITEM, context)
608 return
609
610 # We search a possible next list item after the first eol
611 next_eol = context.currentFragment().find("\n")
612 if next_eol!=-1:
613 next_item_match = RE_LIST_ITEM.search(
614 context.currentFragment(), next_eol)
615 else:
616 next_item_match = None
617
618 # We assign to current_item_text the text of the current item
619 current_item_text = context.currentFragment()
620 if next_item_match:
621 current_item_text = current_item_text[:next_item_match.start()]
622
623 # We get the list item identation
624 indent = context.parser.getIndentation(
625 context.parser.charactersToSpaces(itemMatch.group()))
626
627 # We look for the optional list heading
628 heading = RE_LIST_ITEM_HEADING.match(current_item_text)
629 heading_offset = 0
630 list_type = STANDARD_LIST
631 item_type = STANDARD_ITEM
632 if heading:
633 # We remove the heading from the item text
634 heading_offset = heading.end()
635 # And update the heading variable with the heading text
636 if heading.group(1):
637 list_type = STANDARD_LIST
638 heading_end = heading.group().rfind(":")
639 else:
640 list_type = DEFINITION_LIST
641 heading_end = heading.group().rfind("/")
642
643 head = itemMatch.group(2)
644 if head:
645 head = head.upper()
646 if head == "[ ]":
647 item_type = TODO_ITEM
648 list_type = TODO_LIST
649 elif head == "[X]":
650 item_type = TODO_DONE_ITEM
651 list_type = TODO_LIST
652 elif RE_NUMBER.match(head):
653 list_type = ORDERED_LIST
654
655 # The current_item_text is no longer used in the following code
656
657 # Step 2: Now that we have the item body, and that we know if there is
658 # a next item (next_item_match), we can create the list item node. To
659 # do so, we first have to look for a parent "List" node in which the
660 # "ListItem" node we wish to create will be inserted.
661
662 # We want either a "List" with a LOWER OR EQUAL indent, or a "ListItem"
663 # with a STRICLY LOWER indentation, or a node which is neither a List
664 # or a ListItem.
665 while context.currentNode.nodeName == "List" and \
666 int(context.currentNode.getAttributeNS(None, "_indent"))>indent or \
667 context.currentNode.nodeName == "ListItem" and \
668 int(context.currentNode.getAttributeNS(None, "_indent"))>=indent:
669 context.currentNode = context.currentNode.parentNode
670
671 # If the current node is a list, then we have to create a nested list.
672 # A List ALWAYS have at least one child ListItem. If the last ListItem
673 # has the same indentation as our current list item, then it is a
674 # sibling, otherwise it is a parent.
675 if context.currentNode.nodeName == "List":
676 # A List should always have a least one ListItem
677 items = context._getElementsByTagName( context.currentNode, "ListItem")
678 assert len(items)>0
679 if int(items[-1].getAttributeNS(None, "_indent")) < indent:
680 context.currentNode = items[-1]
681
682 # We may need to create a new "List" node to hold our list items
683 list_node = context.currentNode
684 # If the current node is not a list, then we must create a new list
685 if context.currentNode.nodeName != "List":
686 list_node = context.document.createElementNS(None, "List")
687 list_node.setAttributeNS(None, "_indent", str(indent))
688 context.currentNode.appendChild(list_node)
689 context.currentNode = list_node
690 # We create the list item
691 list_item_node = context.document.createElementNS(None, "ListItem")
692 list_item_node.setAttributeNS(None, "_indent", str(indent))
693 if item_type == TODO_ITEM:
694 list_item_node.setAttributeNS(None, "todo", "true")
695 elif item_type == TODO_DONE_ITEM:
696 list_item_node.setAttributeNS(None, "todo", "done")
697 #list_item_node.setAttributeNS(None, "_start", str(start_offset))
698 if next_item_match:
699 list_item_node.setAttributeNS(None, "_end", str(context.getOffset() + next_item_match.start() -1))
700 else:
701 list_item_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
702 # and the optional heading
703 if heading:
704 offsets = context.saveOffsets()
705 heading_node = context.document.createElementNS(None, "heading")
706 context.setCurrentBlock(context.getOffset(), context.getOffset()+heading_end)
707 context.parser.parseBlock(context, heading_node, self.processText)
708 # heading_text = context.document.createTextNode(heading)
709 # heading_node.appendChild(heading_text)
710 list_item_node.appendChild(heading_node)
711 context.restoreOffsets(offsets)
712 # and the content
713 offsets = context.saveOffsets()
714 if next_item_match:
715 context.setCurrentBlock(heading_offset+context.getOffset() ,
716 context.getOffset()+next_item_match.start())
717 else:
718 context.increaseOffset(heading_offset)
719 # We parse the content of the list item
720 old_node = context.currentNode
721 # FIXME: This is necessary to assign the current node, but I do not
722 # quite understand why... this needs some code review.
723 context.currentNode = list_item_node
724 context.parser.parseBlock(context, list_item_node, self.processText)
725 context.currentNode = old_node
726 context.restoreOffsets(offsets)
727 # We eventually append the created list item node to the parent list
728 # node
729 list_node.appendChild(list_item_node)
730 # We set the type attribute of the list if necesseary
731 if list_type == DEFINITION_LIST:
732 list_node.setAttributeNS(None, "type", "definition")
733 elif list_type == TODO_LIST:
734 list_node.setAttributeNS(None, "type", "todo")
735 elif list_type == ORDERED_LIST:
736 list_node.setAttributeNS(None, "type", "ordered")
737
738 # And recurse with other line items
739 if next_item_match:
740 # We set the offset in which the next_item Match object was
741 # created, because match object start and end are relative
742 # to the context offset at pattern matching time.
743 list_item_node = self.process(context, next_item_match)
744 # Or we have reached the block end
745 else:
746 context.setOffset(context.blockEndOffset)
747
748 # We set the current node to be the list item node
749 context.currentNode = list_item_node
750 return list_item_node
751
752 def processText( self, context, text ):
753 text = context.parser.expandTabs(text)
754 text = context.parser.normaliseText(text)
755 return text
756
757 #------------------------------------------------------------------------------
758 #
759 # PreBlockParser
760 #
761 #------------------------------------------------------------------------------
762
763 class PreBlockParser( BlockParser ):
764 """Parses the content of a preformatted block, where every line is
765 prefixed by '> '."""
766
767 def __init__( self ):
768 BlockParser.__init__(self, "pre")
769
770 def recognises( self, context ):
771 for line in context.currentFragment().split("\n"):
772 if line and not RE_PREFORMATTED.match(line):
773 return False
774 return True
775
776 def process( self, context, recogniseInfo ):
777 text = ""
778 for line in context.currentFragment().split("\n"):
779 match = RE_PREFORMATTED.match(line)
780 if match:
781 text += match.group(3) + "\n"
782 else:
783 text += line + "\n"
784 if text[-1] == "\n": text = text[:-1]
785 pre_node = context.document.createElementNS(None, self.name)
786 pre_node.appendChild(context.document.createTextNode(text))
787 pre_node.setAttributeNS(None, "_start", str(context.getOffset()))
788 pre_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
789 context.currentNode.appendChild(pre_node)
790
791 class PreBlockParser2( BlockParser ):
792 """Parses the content of a preformatted block which is delimited with
793 '<<<' and '>>>' characters."""
794
795 def __init__( self ):
796 BlockParser.__init__(self, "pre")
797
798 def recognises( self, context ):
799 head_lines = context.currentFragment().split("\n")
800 if not head_lines: return False
801 if self.isStartLine(context, head_lines[0]):
802 indent = context.parser.getIndentation(head_lines[0])
803 for line in head_lines[1:]:
804 if not line.replace("\t", " ").strip(): continue
805 if context.parser.getIndentation(line) < indent:
806 return False
807 else:
808 return False
809 return True, indent
810
811 def isStartLine( self, context, line ):
812 line_indent = context.parser.getIndentation(line)
813 if line.replace("\t", " ").strip() == "---":
814 return True, line_indent
815 else:
816 return None
817
818 def isEndLine( self, context, line, indent ):
819 line_indent = context.parser.getIndentation(line)
820 if line_indent != indent: return False
821 line = line.replace("\t", " ").strip()
822 return line == "---"
823
824 def findBlockEnd( self, context, indent ):
825 # FIXME: Issue a warning if no end is found
826 cur_offset = context.blockEndOffset + 1
827 block_end = context.blockEndOffset
828 lines = context.currentFragment().split("\n")
829 if self.isEndLine(context, lines[-1], indent):
830 return block_end
831 while True:
832 next_eol = context.documentText.find("\n", cur_offset)
833 if next_eol == -1:
834 break
835 line = context.documentText[cur_offset:next_eol]
836 if self.isEndLine(context, line, indent):
837 block_end = next_eol + 1
838 break
839 if line.strip() and context.parser.getIndentation(line) < indent:
840 break
841 block_end = next_eol + 1
842 cur_offset = block_end
843 return block_end - 1
844
845 def getCommonPrefix( self, linea, lineb ):
846 if not lineb.replace("\t", " ").strip():
847 return linea
848 else:
849 limit = 0
850 max_limit = min(len(linea), len(lineb))
851 while limit < max_limit and linea[limit] in "\t " and linea[limit] == lineb[limit]:
852 limit += 1
853 assert linea[:limit] == lineb[:limit]
854 return linea[:limit]
855
856 def process( self, context, recogniseInfo ):
857 result = []
858 indent = recogniseInfo[1]
859 context.setCurrentBlockEnd(self.findBlockEnd(context, indent))
860 lines = context.currentFragment().split("\n")
861 lines = lines[1:-1]
862 prefix = lines[0]
863 for line in lines:
864 prefix = self.getCommonPrefix(prefix, line)
865 for line in lines:
866 line = line[len(prefix):]
867 result.append(line)
868 text = "\n".join(result)
869 pre_node = context.document.createElementNS(None, self.name)
870 pre_node.appendChild(context.document.createTextNode(text))
871 pre_node.setAttributeNS(None, "_start", str(context.getOffset()))
872 pre_node.setAttributeNS(None, "_end", str(context.blockEndOffset))
873 context.currentNode.appendChild(pre_node)
874
875 #------------------------------------------------------------------------------
876 #
877 # TableBlockParser
878 #
879 #------------------------------------------------------------------------------
880
881 class Table:
882 """The table class allows to easily create tables and then generate the
883 XML objects from them."""
884
885 def __init__( self ):
886 # Table is an array of array of (char, string) where char is either
887 # 'H' for header, or 'T' for text.
888 self._table = []
889 self._rows = 0
890 self._cols = 0
891 self._title = None
892 self._id = None
893
894 def dimension( self ):
895 return len(self._table[0]), len(self._table)
896
897 def getRow( self, y):
898 return self._table[y]
899
900 def _ensureCell( self, x, y ):
901 """Ensures that the cell at the given position exists and returns its
902 pair value."""
903 while y >= len(self._table): self._table.append([])
904 row = self._table[y]
905 while x >= len(row): row.append(["T", None])
906 self._cols = max(self._cols, x+1)
907 self._rows = max(self._rows, y+1)
908 return row[x]
909
910 def setTitle( self, title ):
911 """Sets the title for this table."""
912 self._title = title.strip()
913
914 def setID( self, id ):
915 """Sets the id for this table."""
916 self._id = id.strip()
917
918 def appendCellContent( self, x, y, text ):
919 cell_type, cell_text = self._ensureCell(x,y)
920 if cell_text == None:
921 self._table[y][x] = [cell_type, text]
922 else:
923 self._table[y][x] = [cell_type, cell_text + "\n" + text]
924
925 def headerCell( self, x, y ):
926 self._table[y][x] = ["H", self._ensureCell(x,y)[1]]
927
928 def dataCell( self, x, y ):
929 self._table[y][x] = ["T", self._ensureCell(x,y)[1]]
930
931 def isHeader( self, x, y ):
932 if len(self._table) < y or len(self._table[y]) < x: return False
933 row = self._table[y]
934 if x>=len(row): return False
935 return self._table[y][x][0] == "H"
936
937 def getNode( self, context, processText ):
938 """Renders the table as a Kiwi XML document node."""
939 table_node = context.document.createElementNS(None, "Table")
940 content_node = context.document.createElementNS(None, "Content")
941 # We set the id
942 if self._id:
943 table_node.setAttributeNS(None, "id", self._id)
944 # We take care of the title
945 if self._title:
946 caption_node = context.document.createElementNS(None, "Caption")
947 caption_text = context.document.createTextNode(self._title)
948 caption_node.appendChild(caption_text)
949 table_node.appendChild(caption_node)
950 # And now of the table
951 for row in self._table:
952 row_node = context.document.createElementNS(None, "Row")
953 i = 0
954 for cell_type, cell_text in row:
955 is_first = i == 0
956 is_last = i == len(row) - 1
957 cell_node = context.document.createElementNS(None, "Cell")
958 if cell_type == "H":
959 cell_node.setAttributeNS(None, "type", "header")
960 # We create a temporary Content node that will stop the nodes
961 # from seeking a parent content
962 cell_content_node = context.document.createElementNS(None, "Content")
963 if is_last and len(row) != self._cols:
964 cell_node.setAttributeNS(None, "colspan", "%s" % (len(row) + 2 - i))
965 new_context = context.clone()
966 new_context.setDocumentText(cell_text)
967 new_context.currentNode = cell_content_node
968 new_context.parser.parseContext(new_context)
969 # This is slightly hackish, but we simply move the nodes there
970 for child in cell_content_node.childNodes:
971 cell_node.appendChild(child)
972 row_node.appendChild(cell_node)
973 i += 1
974 content_node.appendChild(row_node)
975 table_node.appendChild(content_node)
976 return table_node
977
978 def __repr__( self ):
979 s = ""
980 i = 0
981 for row in self._table:
982 s += "%2d: %s\n" % (i,row)
983 i += 1
984 return s
985
986 class TableBlockParser( BlockParser ):
987 """Parses the content of a tables"""
988
989 def __init__( self ):
990 BlockParser.__init__(self, "table")
991
992 def recognises( self, context ):
993 lines = context.currentFragment().strip().split("\n")
994 if not len(lines)>1: return False
995 title_match = RE_TITLE.match(lines[0])
996 if title_match:
997 if not len(lines) >= 3: return False
998 start_match = RE_TABLE_ROW_SEPARATOR.match(lines[1])
999 else:
1000 start_match = RE_TABLE_ROW_SEPARATOR.match(lines[0])
1001 end_match = RE_TABLE_ROW_SEPARATOR.match(lines[-1])
1002 return start_match and end_match
1003
1004 def process( self, context, recogniseInfo ):
1005 y = 0
1006 table = Table()
1007 # For each cell in a row
1008 rows = context.currentFragment().strip().split("\n")[:-1]
1009 # We take care of the title
1010 title_match = RE_TITLE.match(rows[0])
1011 if title_match:
1012 title_name = title_match.group(2).split("#",1)
1013 title_id = None
1014 if len(title_name) == 2:
1015 title_name, title_id = title_name
1016 table.setTitle(title_name)
1017 table.setID(title_id)
1018 rows = rows[2:]
1019 else:
1020 rows = rows[1:]
1021 # The cells are separated by pipes (||)
1022 for row in rows:
1023 cells = []
1024 x = 0
1025 # Empty rows are simply ignored
1026 if not row.strip(): continue
1027 separator = RE_TABLE_ROW_SEPARATOR.match(row)
1028 # If we have not found a separator yet, we simply ensure that the
1029 # cell exists and appends content to it
1030 if not separator:
1031 # If the separtor is not '||' it is '|'
1032 if row.find("||") == -1:
1033 row = row.replace("|", "||")
1034 for cell in row.split("||"):
1035 cells.append(cell)
1036 # We remove leading or trailing borders (|)
1037 if cell and cell[0] == "|": cell = cell[1:]
1038 if cell and cell[-1] == "|": cell = cell[:-1]
1039 table.appendCellContent(x,y,cell)
1040 # FIXME: Weird rule
1041 # The default cell type is the same as the above
1042 # cell, if any.
1043 #if y>0 and table.isHeader(x,y-1):
1044 # table.headerCell(x,y)
1045 x += 1
1046 # We move to the next row only when we encounter a separator. The
1047 # analysis of the separtor will tell you if the above cell is a
1048 # header or a data cell
1049 else:
1050 # FIXME: This is wrong, see below
1051 if separator.group(1)[0] == "=":
1052 row_count = table.dimension()[1]
1053 if row_count > 0:
1054 for cell in table.getRow(row_count - 1):
1055 cell[0] = "H"
1056 if separator.group(1)[0] == "-":
1057 row_count = table.dimension()[1]
1058 if row_count > 0:
1059 for cell in table.getRow(row_count - 1):
1060 cell[0] = "T"
1061 # FIXME: Should handle vertical tables also
1062 # ==================================
1063 # HEADER || DATA
1064 # =======++-------------------------
1065 # ....
1066 offset = 0
1067 x = 0
1068 # FIXME: Here cells is always empty
1069 for cell in cells:
1070 assert None, "Should not be here"
1071 if separator.group(1)[offset] == "=": table.headerCell(x,y)
1072 else: table.dataCell(x,y)
1073 offset += len(cell)
1074 x += 1
1075 y += 1
1076 context.currentNode.appendChild(table.getNode(context, self.processText))
1077
1078 #------------------------------------------------------------------------------
1079 #
1080 # MetaBlockParser
1081 #
1082 #------------------------------------------------------------------------------
1083
1084 class MetaBlockParser( BlockParser ):
1085 """Parses the content of a Meta block"""
1086
1087 def __init__( self ):
1088 BlockParser.__init__(self, "Meta")
1089 #This is a binding from meta block section names to meta content
1090 #parsers
1091 self.field_parsers = {
1092 u'abstract': self.p_abstract,
1093 u'acknowledgements': self.p_ack,
1094 u'author': self.p_author,
1095 u'authors': self.p_author,
1096 u'creation': self.p_creation,
1097 u'keywords': self.p_keywords,
1098 u'language': self.p_language,
1099 u'last-mod': self.p_last_mod,
1100 u'markup': self.p_markup,
1101 u'organisation': self.p_organisation,
1102 u'organization': self.p_organisation,
1103 u'revision': self.p_revision,
1104 u'type': self.p_type,
1105 u'reference': self.p_reference
1106 }
1107
1108 def process( self, context, recogniseInfo ):
1109 # Parses a particular field, with the given content
1110 def parse_field( field ):
1111 field = field.lower()
1112 if self.field_parsers.get(field):
1113 self.field_parsers.get(field)(context, context.currentFragment())
1114 else:
1115 context.parser.warning("Unknown Meta field: " + last_field,
1116 context)
1117
1118 match = True
1119 offset = 0
1120 last_field = None
1121 # Iterates through the fields
1122 while match != None:
1123 match = RE_META_FIELD.search(context.currentFragment(), offset)
1124 if match:
1125 if last_field != None:
1126 offsets = context.saveOffsets()
1127 # We set the current fragment to be the field value
1128 context.setCurrentBlock( context.getOffset() + offset,
1129 context.getOffset() + match.start() )
1130 parse_field(last_field)
1131 context.restoreOffsets(offsets)
1132 last_field = match.group(2)
1133 offset = match.end()
1134
1135 # And parse the last field
1136 if last_field != None:
1137 offsets = context.saveOffsets()
1138 context.setCurrentBlock( context.getOffset() + offset,
1139 context.blockEndOffset )
1140 parse_field(last_field)
1141 context.restoreOffsets(offsets)
1142 else:
1143 context.parser.warning("Empty Meta block.", context)
1144
1145 # Field parsers __________________________________________________________
1146
1147 def p_abstract( self, context, content ):
1148 old_node = context.currentNode
1149 abstract_node = context.document.createElementNS(None, "Abstract")
1150 context.currentNode = abstract_node
1151 context.parser.parseBlock(context, abstract_node, self.processText)
1152 context.currentNode = old_node
1153 context.header.appendChild(abstract_node)
1154
1155 def p_ack( self, context, content ):
1156 old_node = context.currentNode
1157 ack_node = context.document.createElementNS(None, "Acknowledgement")
1158 context.currentNode = ack_node
1159 context.parser.parseBlock(context, ack_node, self.processText)
1160 context.currentNode = old_node
1161 context.header.appendChild(ack_node)
1162
1163 def p_author( self, context, content ):
1164 authors_node = context.document.createElementNS(None, "Authors")
1165 text = self._flatify(content).strip()
1166 # Cuts the trailing dot if present
1167 if text[-1]==u'.': text=text[:-1]
1168 for author in text.split(','):
1169 author_node = context.document.createElementNS(None, "person")
1170 # We take care of email
1171 email_match = RE_META_AUTHOR_EMAIL.search(author)
1172 if email_match:
1173 author = author[:email_match.start()]
1174 author_node.setAttributeNS(None, "email", email_match.group(1))
1175 text_node = context.document.createTextNode(author.strip())
1176 author_node.appendChild(text_node)
1177 authors_node.appendChild(author_node)
1178 context.header.appendChild(authors_node)
1179
1180 def p_creation( self, context, content ):
1181 creation_node = context.document.createElementNS(None, "creation")
1182 if self._parseDateToNode( context, content, creation_node ):
1183 context.header.appendChild(creation_node)
1184
1185 def _parseDateToNode( self, context, content, node ):
1186 content = content.strip()
1187 date = content.split("-")
1188 for elem in date:
1189 format = None
1190 try:
1191 format = "%0" + str(len(elem)) + "d"
1192 format = format % (int(elem))
1193 except:
1194 pass
1195 if len(date)!=3 or format != elem:
1196 context.parser.error("Malformed date meta field: " + content,
1197 context)
1198 context.parser.tip("Should be YYYY-MM-DD", context)
1199 return False
1200 date = map(lambda x:int(x), date)
1201 if date[1] < 1 or date[1] > 12:
1202 context.parser.warning("Bad month number: " + str(date[1]),
1203 context)
1204 if date[2] < 1 or date[2] > 31:
1205 context.parser.warning("Bad day number: " + str(date[2]),
1206 context)
1207 node.setAttributeNS(None, "year", str(date[0]))
1208 node.setAttributeNS(None, "month", str(date[1]))
1209 node.setAttributeNS(None, "day", str(date[2]))
1210 return True
1211
1212 def p_keywords( self, context, content ):
1213 keywords_node = context.document.createElementNS(None, "Keywords")
1214 text = self._flatify(content).strip()
1215 # Cuts the trailing dot if present
1216 if text[-1]==u'.': text=text[:-1]
1217 for keyword in text.split(','):
1218 keyword_node = context.document.createElementNS(None, "keyword")
1219 text_node = context.document.createTextNode(keyword.strip())
1220 keyword_node.appendChild(text_node)
1221 keywords_node.appendChild(keyword_node)
1222 context.header.appendChild(keywords_node)
1223
1224 def p_last_mod( self, context, content ):
1225 lastmod_node = context.document.createElementNS(None, "modification")
1226 if self._parseDateToNode( context, content, lastmod_node ):
1227 context.header.appendChild(lastmod_node)
1228
1229 def p_revision( self, context, content ):
1230 revision_node = context.document.createElementNS(None, "revision")
1231 text_node = context.document.createTextNode(content.strip())
1232 revision_node.appendChild(text_node)
1233 context.header.appendChild(revision_node)
1234
1235 def p_type( self, context, content ):
1236 match = RE_META_TYPE.match(content)
1237 if match:
1238 style_node = context.document.createElementNS(None, "type")
1239 style_node.setAttributeNS(None, "name", match.group(1).lower())
1240 if match.group(3):
1241 style_node.setAttributeNS(None, "style", match.group(3).lower())
1242 context.header.appendChild(style_node)
1243 else:
1244 context.parser.warning("Malformed meta type field: " + content,
1245 context)
1246
1247 def p_reference( self, context, content ):
1248 ref_node = context.document.createElementNS(None, "reference")
1249 ref_node.setAttributeNS(None, "id", content)
1250 context.header.appendChild(ref_node)
1251
1252 def p_language( self, context, content ):
1253 lang = content.strip()[0:2].upper()
1254 lang_node = context.document.createElementNS(None, "language")
1255 #We assign the language code
1256 if len(lang)>=2 and lang.upper()[0:2] in LANGUAGE_CODES:
1257 lang_code = unicode(lang.upper()[0:2])
1258 else:
1259 lang_code = "UK"
1260 lang_node.setAttributeNS(None, "code", lang_code)
1261 context.header.appendChild(lang_node)
1262
1263 def p_organisation( self, context, content ):
1264 old_node = context.currentNode
1265 org_node = context.document.createElementNS(None, "Organisation")
1266 context.currentNode = org_node
1267 context.parser.parseBlock(context, org_node, self.processText)
1268 context.currentNode = old_node
1269 context.header.appendChild(org_node)
1270
1271 def p_markup( self, context, content ):
1272 """Parses custom markup and registers the new parsers in the current
1273 Kiwi parser"""
1274 # TODO
1275 match = 1
1276 start = 0
1277 end = len(content)
1278 custom_markup = RE_CUSTOM_MARKUP
1279 while match!=None and start<end:
1280 match = custom_markup.search(content,start)
1281 if match:
1282 regexp = match.group(1)
1283 element = match.group(2)
1284 option = match.group(4)
1285 if option == None:
1286 self.parser.txt_parsers.append(InlineParser(self.parser,
1287 element, regexp))
1288 elif option.lower() == u"empty":
1289 self.parser.txt_parsers.append(EmptyInlineParser(self.parser,
1290 element, regexp))
1291 else:
1292 #FIXME: OUTPUT ERROR FOR UNKNOWN OPTION
1293 pass
1294 start = match.end()
1295
1296 def _flatify( self, text ):
1297 new_text = u""
1298 for line in text.split(): new_text += line+u" "
1299 return new_text
1300
1301 def processText( self, context, text ):
1302 assert text
1303 text = context.parser.expandTabs(text)
1304 text = context.parser.normaliseText(text)
1305 return text
1306
1307 #------------------------------------------------------------------------------
1308 #
1309 # ReferenceEntryBlockParser
1310 #
1311 #------------------------------------------------------------------------------
1312
1313 class ReferenceEntryBlockParser( BlockParser ):
1314 """Parses the content of a Reference entry"""
1315
1316 def __init__( self ):
1317 BlockParser.__init__(self, "Entry")
1318
1319 def recognises( self, context ):
1320 assert context
1321 return RE_REFERENCE_ENTRY.match(context.currentFragment())
1322
1323 def process( self, context, match ):
1324 offsets = context.saveOffsets()
1325 ranges = []
1326 offset = 0
1327 # We get the start and end offsets of entry blocks
1328 while True:
1329 m = RE_REFERENCE_ENTRY.search(context.currentFragment(), offset)
1330 if not m: break
1331 ranges.append((m, m.start()))
1332 offset = m.end()
1333 ranges.append((None, len(context.currentFragment())))
1334 new_ranges = []
1335 for i in range(0, len(ranges)-1):
1336 new_ranges.append((ranges[i][0], ranges[i][1], ranges[i+1][1]))
1337 ranges = new_ranges
1338 # We loop for each found reference entry
1339 for match, start_offset, end_offset in ranges:
1340 entry_name = match.group(1)
1341 # We set the current block and process it
1342 sub_offsets = context.saveOffsets()
1343 context.setCurrentBlock(context.getOffset() + match.end(), context.getOffset() + end_offset)
1344 entry_node = context.document.createElementNS(None, "Entry")
1345 entry_node.setAttributeNS(None, "id", entry_name)
1346 context.parser.parseBlock(context, entry_node, self.processText)
1347 context.references.appendChild(entry_node)
1348 context.restoreOffsets(sub_offsets)
1349 context.restoreOffsets(offsets)
1350
1351 # EOF