"Fossies" - the Fresh Open Source Software Archive 
Member "Tahchee-1.0.0/Sources/tahchee/plugins/_kiwi/inlines.py" (22 Oct 2009, 21437 Bytes) of package /linux/privat/old/tahchee-1.0.0.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style:
standard) with prefixed line numbers.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "inlines.py" see the
Fossies "Dox" file reference documentation.
1 #!/usr/bin/env python
2 # Encoding: iso-8859-1
3 # vim: ts=4 sw=4 tw=80 noet
4 # -----------------------------------------------------------------------------
5 # Project : Kiwi
6 # -----------------------------------------------------------------------------
7 # Author : Sebastien Pierre (SPE) <sebastien@type-z.org>
8 # -----------------------------------------------------------------------------
9 # Creation date : 19-Nov-2003
10 # Last mod. : 05-Aug-2008
11 # -----------------------------------------------------------------------------
12
13 import re
14
15 __pychecker__ = "unusednames=y"
16
17 #------------------------------------------------------------------------------
18 #
19 # Error messages
20 #
21 #------------------------------------------------------------------------------
22
23 END_WITHOUT_START = "Markup `%s' end found without previous markup start"
24 START_WITHOUT_END = "Markup `%s' start found without following markup end"
25 MUST_BE_START_OR_END = \
26 "Unrecognised markup specifier: 'start' or 'end' would be expected"
27
28 #------------------------------------------------------------------------------
29 #
30 # Regular expressions
31 #
32 #------------------------------------------------------------------------------
33
34 #The regular expressions listed below are ordered conforming to their order
35 #of insertion into the parser.
36
37 # Kiwi core
38
39 COMMENT = u"^\s*#.*$"
40 RE_COMMENT = re.compile(COMMENT, re.LOCALE | re.MULTILINE )
41
42 ESCAPED_START = u"<\["
43 RE_ESCAPED_START = re.compile(ESCAPED_START, re.LOCALE)
44 ESCAPED_END = u"\]>"
45 RE_ESCAPED_END = re.compile(ESCAPED_END, re.LOCALE)
46 ESCAPED_REPLACE = u'\\"'
47 RE_ESCAPED_REPLACE=re.compile(ESCAPED_REPLACE, re.LOCALE)
48
49 ESCAPED_STRING = u'\\\\"([^"]+)"'
50 RE_ESCAPED_STRING = re.compile(ESCAPED_STRING, re.MULTILINE|re.LOCALE)
51
52 # Text style
53
54 CODE = u"`([^\`]+)`"
55 RE_CODE = re.compile(CODE, re.LOCALE|re.MULTILINE)
56 CODE_2 = u"``((`?[^`])+)``"
57 RE_CODE_2 = re.compile(CODE_2, re.LOCALE|re.MULTILINE)
58 CODE_3 = u"'([^']+)'"
59 RE_CODE_3 = re.compile(CODE_3, re.LOCALE|re.MULTILINE)
60 PRE = u"^((\s*\>(\t| ))(.*)\n?)+"
61 RE_PRE = re.compile(PRE, re.LOCALE|re.MULTILINE)
62 EMPHASIS = u"\*([^*]+)\*"
63 RE_EMPHASIS = re.compile(EMPHASIS, re.LOCALE|re.MULTILINE)
64 STRONG = u"\*\*([^*]+)\*\*"
65 RE_STRONG = re.compile(STRONG, re.LOCALE|re.MULTILINE)
66 TERM = u"\_([^_]+)_"
67 RE_TERM = re.compile(TERM, re.LOCALE|re.MULTILINE)
68 QUOTED = u"''(('?[^'])+)''"
69 RE_QUOTED = re.compile(QUOTED, re.LOCALE|re.MULTILINE)
70 CITATION = u"«([^»]+)»"
71 RE_CITATION = re.compile(CITATION,re.LOCALE|re.MULTILINE)
72
73 # Special Characters
74
75 BREAK = u"\s*\n\s*\|\s*\n()"
76 RE_BREAK = re.compile(BREAK)
77 SWALLOW_BREAK = u"\s*\|\s*\n()"
78 RE_SWALLOW_BREAK = re.compile(SWALLOW_BREAK)
79 NEWLINE = u"\s*\\\\n\s*()"
80 RE_NEWLINE = re.compile(NEWLINE)
81 LONGDASH = u" -- ()"
82 RE_LONGDASH = re.compile(LONGDASH)
83 LONGLONGDASH = u" --- ()"
84 RE_LONGLONGDASH = re.compile(LONGLONGDASH)
85 ARROW = u"<-+>|-+->|<-+"
86 RE_ARROW = re.compile(ARROW,)
87 DOTS = u"\.\.\.()"
88 RE_DOTS = re.compile(DOTS,)
89 ENTITIES = u"(&(\w+|#[0-9]+);)"
90 RE_ENTITIES = re.compile(ENTITIES,)
91
92 # Linking content
93
94 EMAIL = u"\<([\w.\-_]+@[\w.\-_]+)\>"
95 RE_EMAIL = re.compile(EMAIL, re.LOCALE|re.MULTILINE)
96 URL = u"\<([A-z]+://[^\>]+)\>"
97 RE_URL = re.compile(URL, re.LOCALE|re.MULTILINE)
98 URL_2 = u"([A-z]+://[^\>]+)"
99 RE_URL_2 = re.compile(URL_2, re.LOCALE|re.MULTILINE)
100 LINK = u"""\[([^\]]+)\]\s*((\(([^ \)]+)(\s+"([^"]+)"\s*)?\))|\[([\w\s]+)\])?"""
101 RE_LINK = re.compile(LINK, re.LOCALE|re.MULTILINE)
102 TARGET = u"\|([\w\s]+(:[^\|]*)?)\|"
103 RE_TARGET = re.compile(TARGET, re.LOCALE)
104
105 # Custom markup
106 MARKUP_ATTR = u"""\w+\s*=\s*('[^']*'|"[^"]*")"""
107 MARKUP = u"\<(\w+)(\s*%s)*\s*/?>|\</(\w+)\s*>" % (MARKUP_ATTR)
108 RE_MARKUP = re.compile(MARKUP, re.LOCALE|re.MULTILINE)
109
110 def _processText( context, text ):
111 """Common operation for expanding tabs and normalising text. Use by
112 acronyms, citations and quotes."""
113 if not text: return text
114 text = context.parser.expandTabs(text)
115 text = context.parser.normaliseText(text)
116 return text
117
118 #------------------------------------------------------------------------------
119 #
120 # InlineParser
121 #
122 #------------------------------------------------------------------------------
123
124 class InlineParser:
125
126 def __init__( self, name, regexp, result=lambda x,y: x.group(1),
127 requiresLeadingSpace=False):
128 """Creates a new InlineParser.
129
130 Name is the name of the parser, *regexp* is the string expression
131 of the regular expression that will match the element that the
132 InlineParser is looking for, or regexp can also be a precompiled
133 regular expression object.
134
135 Result is a lambda expression that will return the content of the
136 Inline generated by the *parse* method. The lambda takes two
137 arguments : the match object and the string in which the match object
138 has been found."""
139 self.name = name
140 #Checks if regexp is a string or a precompiled regular expression
141 if type(regexp) in (type(u""), type("")):
142 self.regexp = re.compile(regexp, re.LOCALE|re.MULTILINE)
143 else:
144 self.regexp = regexp
145 self.result = result
146 self.requiresLeadingSpace = requiresLeadingSpace
147
148 def _recognisesBefore( self, context, match ):
149 """A function that is called to check if the text before the current
150 offset is recognized by this parser. This is used by
151 'requiresLeadingSpace'."""
152 if match.start() == 0: return True
153 previous_char = context.currentFragment()[match.start()-1]
154 return previous_char in u' \t();:-!?'
155
156 def recognises( self, context ):
157 """Recognises this inlines in the given context, within the current
158 context block. It returns (None, None) when the inline was not recognised,
159 otherwise it returns the offset of the matching element in the current
160 context, plus information that will be given as argument to the parse
161 method. This means that the returned offset is RELATIVE TO THE CURRENT
162 CONTEXT OFFSET."""
163 match = self.regexp.search(context.currentFragment())
164 fragment = context.currentFragment()
165 if match:
166 match_start = max(0,match.start()-1)
167 if self.requiresLeadingSpace and not self._recognisesBefore(context, match):
168 return (None, None)
169 return (match.start(), match)
170 else:
171 return (None, None)
172
173 def endOf( self, recogniseInfo ):
174 """Returns the end of this inline using the given recogniseInfo."""
175 return recogniseInfo.end()
176
177 def parse( self, context, node, recogniseInfo ):
178 """Parses the given context within the current block range, returning
179 the new offset (relative to the block start offset, ie. the start of
180 context.currentFragment). Note that the context offsets are the same
181 as those given to the recognise method call which created
182 recogniseInfo.
183
184 The given context starts at the same offset as when recognises was
185 called. Modifications are made to the given node."""
186 match = recogniseInfo
187 assert match!=None
188 text = self.result(match, context.documentText)
189 if self.name:
190 inline_node = context.document.createElementNS(None, self.name)
191 if text:
192 text_node = context.document.createTextNode(text)
193 inline_node.appendChild(text_node)
194 node.appendChild(inline_node)
195 elif not self.name is None:
196 inline_node = context.document.createTextNode(text)
197 node.appendChild(inline_node)
198 return self.endOf(recogniseInfo)
199
200 #------------------------------------------------------------------------------
201 #
202 # Arrow parsers
203 #
204 #------------------------------------------------------------------------------
205
206 class ArrowInlineParser( InlineParser ):
207
208 def __init__( self ):
209 InlineParser.__init__( self, "arrow", RE_ARROW )
210
211 def parse( self, context, node, match ):
212 assert match
213 text = match.group()
214 arrow_type = None
215 if text[0] == "<":
216 if text[-1] == ">": arrow_type = "double"
217 else: arrow_type = "left"
218 else:
219 arrow_type = "right"
220 arrow_node = context.document.createElementNS(None, "arrow")
221 arrow_node.setAttributeNS(None, "type", arrow_type)
222 node.appendChild(arrow_node)
223 return match.end()
224
225 #------------------------------------------------------------------------------
226 #
227 # Entity parsers
228 #
229 #------------------------------------------------------------------------------
230
231 class EntityInlineParser( InlineParser ):
232
233 def __init__( self ):
234 InlineParser.__init__( self, "entity", RE_ENTITIES )
235
236 def parse( self, context, node, match ):
237 assert match
238 text = match.group(2)
239 entity_node = context.document.createElementNS(None, "entity")
240 entity_node.setAttributeNS(None, "num", text)
241 node.appendChild(entity_node)
242 return match.end()
243
244 #------------------------------------------------------------------------------
245 #
246 # Pre parsers
247 #
248 #------------------------------------------------------------------------------
249
250 class PreInlineParser( InlineParser ):
251
252 def __init__( self ):
253 InlineParser.__init__( self, "pre", PRE )
254
255 def recognises( self, context ):
256 return InlineParser.recognises(self,context)
257
258 def parse( self, context, node, match ):
259 lines = []
260 for text in match.group().split("\n"):
261 submatch = RE_PRE.match(text + "\n")
262 if text: text = context.parser.expandTabs(submatch.group(4))
263 lines.append(text)
264 pre_node = context.document.createElementNS(None, 'pre')
265 pre_node.appendChild(context.document.createTextNode("\n".join(lines)))
266 node.appendChild(pre_node)
267 return match.end()
268
269 #------------------------------------------------------------------------------
270 #
271 # CommentInlineParser
272 #
273 #------------------------------------------------------------------------------
274
275 class CommentInlineParser( InlineParser ):
276
277 def __init__( self ):
278 InlineParser.__init__( self, "comment", RE_COMMENT )
279
280 def processText( self, text ):
281 new_text = ""
282 for line in text.split("\n"):
283 line = line.strip()
284 if len(line)>1:
285 line = line[1:]
286 new_text += line + "\n"
287 if new_text: new_text = new_text[:-1]
288 new_text = " "+new_text+" "
289 return new_text
290
291 def parse( self, context, node, recogniseInfo ):
292 match = recogniseInfo
293 assert match!=None
294 node.appendChild(context.document.createComment(
295 self.processText(match.group())))
296 return match.end()
297
298 #------------------------------------------------------------------------------
299 #
300 # Escape inline parser
301 #
302 #------------------------------------------------------------------------------
303
304 class EscapedInlineParser( InlineParser ):
305
306 def __init__( self ):
307 InlineParser.__init__( self, "escaped", None )
308
309 def recognises( self, context ):
310 start_match = RE_ESCAPED_START.search(context.currentFragment())
311 if start_match:
312 # And search the escape starting from the end of the escaped
313 end_match = RE_ESCAPED_END.search(
314 context.currentFragment(), start_match.end())
315 if end_match:
316 return (start_match.start(), (start_match, end_match))
317 else:
318 return (None, None)
319 return (None, None)
320
321 def endOf( self, recogniseInfo ):
322 return recogniseInfo[1].end()
323
324 def parse( self, context, node, recogniseInfo ):
325 # We get start and end match, end being relative to the start
326 start_match, end_match = recogniseInfo
327 assert start_match!=None and end_match!=None
328
329 # Create a text node with the escaped text
330 escaped_node = context.document.createTextNode(
331 context.currentFragment()[start_match.end():end_match.start()])
332 node.appendChild(escaped_node)
333 # And increase the offset
334 return self.endOf(recogniseInfo)
335
336 #------------------------------------------------------------------------------
337 #
338 # Escaped String Inline Parser
339 #
340 #------------------------------------------------------------------------------
341
342 class EscapedStringInlineParser( InlineParser ):
343
344 def __init__( self ):
345 InlineParser.__init__( self, None, RE_ESCAPED_STRING )
346
347 def parse( self, context, node, match ):
348 res = context.document.createTextNode(match.group(1))
349 node.appendChild(res)
350 return match.end()
351
352 #------------------------------------------------------------------------------
353 #
354 # Link/Reference parser
355 #
356 #------------------------------------------------------------------------------
357
358 class LinkInlineParser( InlineParser ):
359
360 def __init__( self ):
361 InlineParser.__init__( self, "link", RE_LINK )
362
363 def recognises( self, context ):
364 result = InlineParser.recognises(self, context)
365 # We avoid conflict with the tasks list. This may not be
366 # always necessary, but it's safer to have it.
367 if result[1]:
368 r = result[1].group().strip()
369 if len(r) == 3 and r[0] == "[" and r[2]=="]":
370 return (None, None)
371 return result
372
373 def parse( self, context, node, match ):
374 assert match
375 # We detect wether the link is an URL or Ref link
376 link_node = context.document.createElementNS(None, "link")
377 if match.group(7):
378 ref_entry = match.group(7)
379 link_node.setAttributeNS(None, "type", "ref")
380 link_node.setAttributeNS(None, "target", ref_entry)
381 else:
382 ref_url = match.group(4)
383 ref_title = match.group(5)
384 if not ref_url:
385 link_node.setAttributeNS(None, "type", "ref")
386 link_node.setAttributeNS(None, "target", match.group(1))
387 else:
388 link_node.setAttributeNS(None, "type", "url")
389 link_node.setAttributeNS(None, "target", ref_url)
390 context._links.append(link_node)
391 #Now we parse the content of the link
392 offsets = context.saveOffsets()
393 context.setCurrentBlock(context.getOffset() + match.start() + 1,
394 context.getOffset() + match.start() + 1 + len(match.group(1)))
395 context.parser.parseBlock(context, link_node, _processText)
396 context.restoreOffsets(offsets)
397 node.appendChild(link_node)
398 return match.end()
399
400 #------------------------------------------------------------------------------
401 #
402 # Target parser
403 #
404 #------------------------------------------------------------------------------
405
406 class TargetInlineParser( InlineParser ):
407
408 def __init__( self ):
409 InlineParser.__init__( self, "target", RE_TARGET )
410
411 def parse( self, context, node, match ):
412 assert match
413 # We detect wether the link is an URL or Ref link
414 target_node = context.document.createElementNS(None, "target")
415 name_and_text = match.group(1).split(":", 1)
416 if len(name_and_text) == 1:
417 name = name_and_text[0]
418 text = None
419 else:
420 name = name_and_text[0]
421 text = name_and_text[1]
422 if not text: text = name
423 target_node.setAttributeNS(None, "name", name.replace(" ", " ").strip().lower())
424 if text:
425 text_node = context.document.createTextNode(text)
426 target_node.appendChild(text_node)
427 context._targets.append(target_node)
428 node.appendChild(target_node)
429 return match.end()
430
431 #------------------------------------------------------------------------------
432 #
433 # MarkupInlineParser
434 #
435 #------------------------------------------------------------------------------
436
437 def Markup_isStartTag( match ):
438 return not Markup_isEndTag(match) and not match.group().endswith("/>")
439
440 def Markup_isEndTag( match ):
441 return match.group().startswith("</")
442
443 def Markup_attributes( match ):
444 """Returns the attribute string of this markup stat element."""
445 text = match.group()[1 + len(match.group(1))+1:-1]
446 if text and text[-1] == "/": text = text[:-1]
447 text = text.strip()
448 return text
449
450 class MarkupInlineParser( InlineParser ):
451 """Parses Kiwi generic markup elements."""
452
453 def __init__( self ):
454 InlineParser.__init__(self, None, RE_MARKUP)
455
456 def parse( self, context, node, recogniseInfo ):
457 """Parses the given tag, and returns the offset where the parsed tag
458 ends. If the tag is an "inline block" tag, then its content is also
459 parsed."""
460 match = recogniseInfo
461 # Is it an inline ?
462 if match.group().endswith("/>"):
463 # TODO: Check if element name is recognised or not
464 markup_name = match.group(1)
465 markup_node = context.document.createElementNS(None, markup_name.strip())
466 markup_node.setAttributeNS(None, "_html", "true")
467 for key, value in context.parseAttributes(Markup_attributes(match)).items():
468 markup_node.setAttributeNS(None, key, value)
469 node.appendChild(markup_node)
470 return match.end()
471 # Or is it a block start ?
472 elif self.isStartTag(match):
473 # We search for an end, taking care of setting the offset after the
474 # recognised inline.
475 markup_name = match.group(1).strip()
476 markup_range = self.findEnd( markup_name, context, match.end())
477 if not markup_range:
478 context.parser.error( START_WITHOUT_END % (markup_name), context )
479 return match.end()
480 else:
481 markup_end = markup_range[0] + context.getOffset()
482 # We do not want the context to be altered by block parsing
483 offsets = context.saveOffsets()
484 context.setCurrentBlock(context.getOffset()+match.end(),
485 context.getOffset()+markup_range[0])
486 # We check if there is a specific block parser for this markup
487 custom_parser = context.parser.customParsers.get(markup_name)
488 # Here we have found a custom parser, which is in charge for
489 # creating nodes
490 if custom_parser:
491 custom_parser.process(context, None)
492 # Otherwise we create the node for the markup and continue
493 # parsing
494 else:
495 markup_node = context.document.createElementNS(None, "Content")
496 markup_node.setAttributeNS(None, "_html", "true")
497 node.appendChild(markup_node)
498 # We add the attributes to this tag
499 for key, value in context.parseAttributes(Markup_attributes(match)).items():
500 markup_node.setAttributeNS(None, key, value)
501 # FIXME: This should not be necessary
502 old_node = context.currentNode
503 context.currentNode = markup_node
504 context.currentNode = markup_node
505 before_offset = context.getOffset()
506 next_block = context.parser._findNextBlockSeparator(context)
507 # There may be many blocks contained in the markup delimited
508 # by the node. Here we try to parse all the blocks until be
509 # reach the end of the markup minus 1 (that is the last
510 # separator before the block end)
511 if context.offsetInBlock(next_block[0]) or context.offsetInBlock(next_block[1]):
512 end_offset = context.blockEndOffset
513 context.setOffset(context.blockStartOffset)
514 while context.getOffset() < markup_end :
515 context.parser._parseNextBlock(context, end=markup_end)
516 # If there was no block contained, we parse the text as a
517 # single block
518 else:
519 context.parser.parseBlock(context, markup_node, self.processText)
520 markup_node.nodeName = markup_name
521 markup_node.tagName = markup_name
522 context.currentNode = old_node
523 context.restoreOffsets(offsets)
524 return markup_range[1]
525 # Or is a a closing element ?
526 elif self.isEndTag(match):
527 context.parser.error( END_WITHOUT_START % (match.group(4).strip()),
528 context )
529 return match.end()
530 else:
531 context.parser.error( MUST_BE_START_OR_END, context )
532 return match.end()
533
534 def _searchMarkup( self, context ):
535 """Looks for the next markup inline in the current context. This also
536 takes care of markups that are contained into an escaped text tag.
537
538 WARNING : this operation mutates the context offsets, so this should
539 always be enclosed in offset store and restore. The context offset is
540 set BEFORE the searched markup, so that the returned recognition info
541 is relative to the context offset.
542
543 Returns the result of this parser `recognise' method, or null."""
544 inline_parsers = ( context.parser.escapedParser, self )
545 # We look for a block inline
546 while not context.blockEndReached():
547 result = context.findNextInline(inline_parsers)
548 if result:
549 if result[2] == self:
550 return result[1]
551 else:
552 context.increaseOffset(result[2].endOf(result[1]))
553 else:
554 break
555 return None
556
557 def isStartTag( self, match ):
558 return Markup_isStartTag(match)
559
560 def isEndTag( self, match ):
561 return Markup_isEndTag(match)
562
563 def findEnd( self, blockName, context, offsetIncr=0 ):
564 """Finds the end of the given markup end in the current block. Returns
565 a couple (start, end) indicating the start and end offsets of the found
566 end block, relative to the context offset. The given 'offsetIncr'
567 parameter tells the number of characters to skip before searching for
568 the end markup. This has no impact on the result.
569
570 The context offsets are left unchanged."""
571 depth = markup_match = 1
572 block_name = None
573 offsets = context.saveOffsets()
574 original_offset = context.getOffset()
575 context.increaseOffset(offsetIncr)
576 # We look for start and end markups
577 while depth>0 and markup_match and not context.blockEndReached():
578 markup_match = self._searchMarkup(context)
579 if markup_match:
580 if self.isStartTag(markup_match):
581 depth += 1
582 elif self.isEndTag(markup_match):
583 depth -= 1
584 block_name = markup_match.group(4).strip()
585 if depth > 0:
586 context.increaseOffset(markup_match.end())
587 # We have found at least one matching block
588 end_markup_range = None
589 if depth==0 and block_name and block_name==blockName:
590 # The match is relative to the current context offset
591 match_start = context.getOffset() - original_offset + markup_match.start()
592 match_end = context.getOffset() - original_offset + markup_match.end()
593 end_markup_range = ( match_start, match_end )
594 context.restoreOffsets(offsets)
595 return end_markup_range
596
597 def processText( self, context, text ):
598 return context.parser.normaliseText(text)
599
600 # EOF