"Fossies" - the Fresh Open Source Software Archive

Member "txt2tags-3.7/txt2tags.py" (10 Feb 2020, 169840 Bytes) of package /linux/privat/txt2tags-3.7.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "txt2tags.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.6_vs_3.7.

    1 #!/usr/bin/env python
    2 # txt2tags - generic text conversion tool
    3 # https://txt2tags.org/
    4 # https://github.com/jendrikseipp/txt2tags
    5 #
    6 # Copyright 2001-2010 Aurelio Jargas
    7 # Copyright 2010-2019 Jendrik Seipp
    8 #
    9 # License: GPL2+ (http://www.gnu.org/licenses/gpl-2.0.txt)
   10 #
   11 ########################################################################
   12 #
   13 # The code that [1] parses the marked text is separated from the
   14 # code that [2] insert the target tags.
   15 #
   16 #   [1] made by: def convert()
   17 #   [2] made by: class BlockMaster
   18 #
   19 # The structures of the marked text are identified and its contents are
   20 # extracted into a data holder (Python lists and dictionaries).
   21 #
   22 # When parsing the source file, the blocks (para, lists, quote, table)
   23 # are opened with BlockMaster, right when found. Then its contents,
   24 # which spans on several lines, are feeded into a special holder on the
   25 # BlockMaster instance. Just when the block is closed, the target tags
   26 # are inserted for the full block as a whole, in one pass. This way, we
   27 # have a better control on blocks. Much better than the previous line by
   28 # line approach.
   29 #
   30 # In other words, whenever inside a block, the parser *holds* the tag
   31 # insertion process, waiting until the full block is read. That was
   32 # needed primary to close paragraphs for the XHTML target, but
   33 # proved to be a very good adding, improving many other processing.
   34 #
   35 # -------------------------------------------------------------------
   36 #
   37 # These important classes are all documented:
   38 # CommandLine, SourceDocument, ConfigMaster, ConfigLines.
   39 #
   40 # There is a RAW Config format and all kind of configuration is first
   41 # converted to this format. Then a generic method parses it.
   42 #
   43 # These functions get information about the input file(s) and take
   44 # care of the init processing:
   45 # process_source_file() and convert_file()
   46 #
   47 ########################################################################
   48 
   49 # XXX Smart Image Align don't work if the image is a link
   50 # Can't fix that because the image is expanded together with the
   51 # link, at the linkbank filling moment. Only the image is passed
   52 # to parse_images(), not the full line, so it is always 'middle'.
   53 
   54 # XXX Paragraph separation not valid inside Quote
   55 # Quote will not have <p></p> inside, instead will close and open
   56 # again the <blockquote>. This really sux in CSS, when defining a
   57 # different background color. Still don't know how to fix it.
   58 
   59 # XXX TODO (maybe)
   60 # New mark which expands to an anchor full title.
   61 # It is necessary to parse the full document in this order:
   62 #  DONE  1st scan: HEAD: get all settings, including %!includeconf
   63 #  DONE  2nd scan: BODY: expand includes & apply %!preproc
   64 #        3rd scan: BODY: read titles and compose TOC info
   65 #        4th scan: BODY: full parsing, expanding [#anchor] 1st
   66 # Steps 2 and 3 can be made together, with no tag adding.
   67 # Two complete body scans will be *slow*, don't know if it worths.
   68 # One solution may be add the titles as postproc rules
   69 
   70 from __future__ import print_function
   71 
   72 import collections
   73 import getopt
   74 import io
   75 import os
   76 import re
   77 import sys
   78 
   79 ##############################################################################
   80 
   81 # Program information
   82 my_url = "https://txt2tags.org"
   83 my_name = "txt2tags"
   84 my_email = "jendrikseipp@gmail.com"
   85 __version__ = "3.7"
   86 
   87 # FLAGS   : the conversion related flags  , may be used in %!options
   88 # OPTIONS : the conversion related options, may be used in %!options
   89 # ACTIONS : the other behavior modifiers, valid on command line only
   90 # NO_TARGET: actions that don't require a target specification
   91 # NO_MULTI_INPUT: actions that don't accept more than one input file
   92 # CONFIG_KEYWORDS: the valid %!key:val keywords
   93 #
   94 # FLAGS and OPTIONS are configs that affect the converted document.
   95 # They usually have also a --no-<option> to turn them OFF.
   96 #
   97 # ACTIONS are needed because when handling multiple input files, strange
   98 # behavior may occur. There is no --no-<action>.
   99 # Options --version and --help inside %!options are odd.
  100 
  101 FLAGS = {
  102     "headers": 1,
  103     "enum-title": 0,
  104     "toc": 0,
  105     "rc": 1,
  106     "quiet": 0,
  107     "slides": 0,
  108 }
  109 OPTIONS = {
  110     "target": "",
  111     "style": "",
  112     "infile": "",
  113     "outfile": "",
  114     "config-file": "",
  115     "lang": "",
  116 }
  117 ACTIONS = {
  118     "help": 0,
  119     "version": 0,
  120     "verbose": 0,
  121     "debug": 0,
  122     "targets": 0,
  123 }
  124 NO_TARGET = ["help", "version", "targets"]
  125 CONFIG_KEYWORDS = ["target", "style", "options", "preproc", "postproc"]
  126 
  127 TARGET_NAMES = {
  128     "html": "HTML page",
  129     "sgml": "SGML document",
  130     "dbk": "DocBook document",
  131     "tex": "LaTeX document",
  132     "lout": "Lout document",
  133     "man": "UNIX Manual page",
  134     "mgp": "MagicPoint presentation",
  135     "wiki": "Wikipedia page",
  136     "gwiki": "Google Wiki page",
  137     "doku": "DokuWiki page",
  138     "pmw": "PmWiki page",
  139     "moin": "MoinMoin page",
  140     "txt": "Plain Text",
  141     "adoc": "AsciiDoc document",
  142     "creole": "Creole 1.0 document",
  143     "md": "Markdown document",
  144 }
  145 
  146 TARGETS = sorted(TARGET_NAMES)
  147 
  148 DEBUG = 0  # do not edit here, please use --debug
  149 VERBOSE = 0  # do not edit here, please use -v, -vv or -vvv
  150 QUIET = 0  # do not edit here, please use --quiet
  151 
  152 ENCODING = "utf-8"
  153 DFT_TEXT_WIDTH = 72
  154 
  155 RC_RAW = []
  156 CMDLINE_RAW = []
  157 CONF = {}
  158 BLOCK = None
  159 TITLE = None
  160 regex = {}
  161 TAGS = {}
  162 rules = {}
  163 
  164 TARGET = ""
  165 
  166 STDIN = STDOUT = "-"
  167 MODULEIN = MODULEOUT = "-module-"
  168 ESCCHAR = "\x00"
  169 SEPARATOR = "\x01"
  170 LISTNAMES = {"-": "list", "+": "numlist", ":": "deflist"}
  171 
  172 VERSIONSTR = "{} version {} <{}>".format(my_name, __version__, my_url)
  173 
  174 USAGE = "\n".join(
  175     [
  176         "",
  177         "Usage: %s [OPTIONS] infile.t2t" % my_name,
  178         "",
  179         "      --targets       list available targets and exit",
  180         "  -t, --target=TYPE   set target document type. currently supported:",
  181         "                      %s" % ", ".join(TARGETS),
  182         "  -i, --infile=FILE   set FILE as the input file name ('-' for STDIN)",
  183         "  -o, --outfile=FILE  set FILE as the output file name ('-' for STDOUT)",
  184         "      --toc           add a table of contents to the output",
  185         "  -n, --enum-title    enumerate all titles as 1, 1.1, 1.1.1, etc.",
  186         "      --style=FILE    use FILE as the document style (e.g., a CSS file)",
  187         "  -H, --no-headers    omit header and footer from output",
  188         "  -C, --config-file=F read configuration from file F",
  189         "  -q, --quiet         suppress all output (except errors)",
  190         "  -v, --verbose       print informative messages during conversion",
  191         "  -h, --help          print this help text and exit",
  192         "  -V, --version       print program version and exit",
  193         "",
  194         "Turn off options:",
  195         "     --no-enum-title, --headers, --no-quiet,",
  196         "     --no-rc, --no-style, --no-toc",
  197         "",
  198         "Example:",
  199         "     {} -t html --toc {}".format(my_name, "file.t2t"),
  200         "",
  201         "By default, converted output is saved to 'infile.<target>'.",
  202         "Use --outfile to force an output file name.",
  203         "If  input file is '-', read from STDIN.",
  204         "If output file is '-', dump output to STDOUT.",
  205         "",
  206         my_url,
  207         "",
  208     ]
  209 )
  210 
  211 
  212 ##############################################################################
  213 
  214 
  215 # Here is all the target's templates
  216 # You may edit them to fit your needs
  217 #  - the %(HEADERn)s strings represent the Header lines
  218 #  - the %(STYLE)s string is changed by --style contents
  219 #  - the %(ENCODING)s string is changed to "utf-8"
  220 #  - if any of the above is empty, the full line is removed
  221 #  - use %% to represent a literal %
  222 #
  223 HEADER_TEMPLATE = {
  224     "txt": """\
  225 %(HEADER1)s
  226 %(HEADER2)s
  227 %(HEADER3)s
  228 """,
  229     "sgml": """\
  230 <!doctype linuxdoc system>
  231 <article>
  232 <title>%(HEADER1)s
  233 <author>%(HEADER2)s
  234 <date>%(HEADER3)s
  235 """,
  236     "html": """\
  237 <!DOCTYPE html>
  238 <html>
  239 <head>
  240 <meta charset="%(ENCODING)s">
  241 <title>%(HEADER1)s</title>
  242 <meta name="generator" content="https://txt2tags.org">
  243 <link rel="stylesheet" href="%(STYLE)s">
  244 <style type="text/css">
  245 blockquote{margin: 1em 2em; border-left: 2px solid #999;
  246   font-style: oblique; padding-left: 1em;}
  247 blockquote:first-letter{margin: .2em .1em .1em 0; font-size: 160%%; font-weight: bold;}
  248 blockquote:first-line{font-weight: bold;}
  249 body{font-family: sans-serif;}
  250 hr{background-color:#000;border:0;color:#000;}
  251 hr.heavy{height:2px;}
  252 hr.light{height:1px;}
  253 img{border:0;display:block;}
  254 img.right{margin:0 0 0 auto;}
  255 img.center{border:0;margin:0 auto;}
  256 table{border-collapse: collapse;}
  257 table th,table td{padding: 3px 7px 2px 7px;}
  258 table th{background-color: lightgrey;}
  259 table.center{margin-left:auto; margin-right:auto;}
  260 .center{text-align:center;}
  261 .right{text-align:right;}
  262 .left{text-align:left;}
  263 .tableborder,.tableborder td,.tableborder th{border:1px solid #000;}
  264 .underline{text-decoration:underline;}
  265 </style>
  266 </head>
  267 <body>
  268 <header>
  269 <hgroup>
  270 <h1>%(HEADER1)s</h1>
  271 <h2>%(HEADER2)s</h2>
  272 <h3>%(HEADER3)s</h3>
  273 </hgroup>
  274 </header>
  275 <article>
  276 """,
  277     "dbk": """\
  278 <?xml version="1.0"
  279       encoding="%(ENCODING)s"
  280 ?>
  281 <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"\
  282  "docbook/dtd/xml/4.5/docbookx.dtd">
  283 <article lang="en">
  284   <articleinfo>
  285     <title>%(HEADER1)s</title>
  286     <authorgroup>
  287       <author><othername>%(HEADER2)s</othername></author>
  288     </authorgroup>
  289     <date>%(HEADER3)s</date>
  290   </articleinfo>
  291 """,
  292     "man": """\
  293 .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s"
  294 """,
  295     "mgp": """\
  296 #!/usr/X11R6/bin/mgp -t 90
  297 %%deffont "normal"    xfont  "utopia-medium-r", charset "iso8859-1"
  298 %%deffont "normal-i"  xfont  "utopia-medium-i", charset "iso8859-1"
  299 %%deffont "normal-b"  xfont  "utopia-bold-r"  , charset "iso8859-1"
  300 %%deffont "normal-bi" xfont  "utopia-bold-i"  , charset "iso8859-1"
  301 %%deffont "mono"      xfont "courier-medium-r", charset "iso8859-1"
  302 %%default 1 size 5
  303 %%default 2 size 8, fore "yellow", font "normal-b", center
  304 %%default 3 size 5, fore "white",  font "normal", left, prefix "  "
  305 %%tab 1 size 4, vgap 30, prefix "     ", icon arc "red" 40, leftfill
  306 %%tab 2 prefix "            ", icon arc "orange" 40, leftfill
  307 %%tab 3 prefix "                   ", icon arc "brown" 40, leftfill
  308 %%tab 4 prefix "                          ", icon arc "darkmagenta" 40, leftfill
  309 %%tab 5 prefix "                                ", icon arc "magenta" 40, leftfill
  310 %%%%------------------------- end of headers -----------------------------
  311 %%page
  312 
  313 
  314 
  315 
  316 
  317 %%size 10, center, fore "yellow"
  318 %(HEADER1)s
  319 
  320 %%font "normal-i", size 6, fore "white", center
  321 %(HEADER2)s
  322 
  323 %%font "mono", size 7, center
  324 %(HEADER3)s
  325 """,
  326     "moin": """\
  327 '''%(HEADER1)s'''
  328 
  329 ''%(HEADER2)s''
  330 
  331 %(HEADER3)s
  332 """,
  333     "gwiki": """\
  334 *%(HEADER1)s*
  335 
  336 %(HEADER2)s
  337 
  338 _%(HEADER3)s_
  339 """,
  340     "adoc": """\
  341 = %(HEADER1)s
  342 %(HEADER2)s
  343 %(HEADER3)s
  344 """,
  345     "doku": """\
  346 ===== %(HEADER1)s =====
  347 
  348 **//%(HEADER2)s//**
  349 
  350 //%(HEADER3)s//
  351 """,
  352     "pmw": """\
  353 (:Title %(HEADER1)s:)
  354 
  355 (:Description %(HEADER2)s:)
  356 
  357 (:Summary %(HEADER3)s:)
  358 """,
  359     "wiki": """\
  360 '''%(HEADER1)s'''
  361 
  362 %(HEADER2)s
  363 
  364 ''%(HEADER3)s''
  365 """,
  366     "tex": r"""\documentclass{article}
  367 \usepackage{booktabs} %% needed for tables
  368 \usepackage{graphicx}
  369 \usepackage{paralist} %% needed for compact lists
  370 \usepackage[normalem]{ulem} %% needed by strike
  371 \usepackage[urlcolor=blue,colorlinks=true]{hyperref}
  372 \usepackage[%(ENCODING)s]{inputenc}  %% char encoding
  373 \usepackage{%(STYLE)s}  %% user defined
  374 
  375 \title{%(HEADER1)s}
  376 \author{%(HEADER2)s}
  377 \begin{document}
  378 \date{%(HEADER3)s}
  379 \maketitle
  380 \clearpage
  381 """,
  382     "lout": """\
  383 @SysInclude { doc }
  384 @Document
  385   @InitialFont { Times Base 12p }  # Times, Courier, Helvetica, ...
  386   @PageOrientation { Portrait }    # Portrait, Landscape
  387   @ColumnNumber { 1 }              # Number of columns (2, 3, ...)
  388   @PageHeaders { Simple }          # None, Simple, Titles, NoTitles
  389   @InitialLanguage { English }     # German, French, Portuguese, ...
  390   @OptimizePages { Yes }           # Yes/No smart page break feature
  391 //
  392 @Text @Begin
  393 @Display @Heading { %(HEADER1)s }
  394 @Display @I { %(HEADER2)s }
  395 @Display { %(HEADER3)s }
  396 #@NP                               # Break page after Headers
  397 """,
  398     "creole": """\
  399 %(HEADER1)s
  400 %(HEADER2)s
  401 %(HEADER3)s
  402 """,
  403     "md": """\
  404 %(HEADER1)s
  405 %(HEADER2)s
  406 %(HEADER3)s
  407 """
  408     # @SysInclude { tbl }                   # Tables support
  409     # setup: @MakeContents { Yes }          # show TOC
  410     # setup: @SectionGap                    # break page at each section
  411 }
  412 assert set(HEADER_TEMPLATE) == set(TARGETS)
  413 
  414 
  415 ##############################################################################
  416 
  417 
  418 def getTags(config):
  419     "Returns all the known tags for the specified target"
  420 
  421     keys = """
  422     title1              numtitle1
  423     title2              numtitle2
  424     title3              numtitle3
  425     title4              numtitle4
  426     title5              numtitle5
  427     title1Open          title1Close
  428     title2Open          title2Close
  429     title3Open          title3Close
  430     title4Open          title4Close
  431     title5Open          title5Close
  432     blockTitle1Open     blockTitle1Close
  433     blockTitle2Open     blockTitle2Close
  434     blockTitle3Open     blockTitle3Close
  435 
  436     paragraphOpen       paragraphClose
  437     blockVerbOpen       blockVerbClose  blockVerbLine
  438     blockQuoteOpen      blockQuoteClose blockQuoteLine
  439     blockCommentOpen    blockCommentClose
  440 
  441     fontMonoOpen        fontMonoClose
  442     fontBoldOpen        fontBoldClose
  443     fontItalicOpen      fontItalicClose
  444     fontUnderlineOpen   fontUnderlineClose
  445     fontStrikeOpen      fontStrikeClose
  446 
  447     listOpen            listClose
  448     listOpenCompact     listCloseCompact
  449     listItemOpen        listItemClose     listItemLine
  450     numlistOpen         numlistClose
  451     numlistOpenCompact  numlistCloseCompact
  452     numlistItemOpen     numlistItemClose  numlistItemLine
  453     deflistOpen         deflistClose
  454     deflistOpenCompact  deflistCloseCompact
  455     deflistItem1Open    deflistItem1Close
  456     deflistItem2Open    deflistItem2Close deflistItem2LinePrefix
  457 
  458     bar1                bar2
  459     url                 urlMark
  460     email               emailMark
  461     img                 imgAlignLeft  imgAlignRight  imgAlignCenter
  462                        _imgAlignLeft _imgAlignRight _imgAlignCenter
  463 
  464     tableOpen           tableClose
  465     _tableBorder        _tableAlignLeft      _tableAlignCenter
  466     tableRowOpen        tableRowClose        tableRowSep
  467     tableTitleRowOpen   tableTitleRowClose
  468     tableCellOpen       tableCellClose       tableCellSep
  469     tableTitleCellOpen  tableTitleCellClose  tableTitleCellSep
  470     _tableColAlignLeft  _tableColAlignRight  _tableColAlignCenter
  471     _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter
  472     _tableCellColSpan   tableColAlignSep
  473     _tableCellMulticolOpen
  474     _tableCellMulticolClose
  475 
  476     bodyOpen            bodyClose
  477     cssOpen             cssClose
  478     tocOpen             tocClose             TOC
  479     anchor
  480     comment
  481     pageBreak
  482     EOD
  483     """.split()
  484 
  485     # TIP: \a represents the current text inside the mark
  486     # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts
  487     alltags = {
  488         "txt": {
  489             "title1": "  \a",
  490             "title2": "\t\a",
  491             "title3": "\t\t\a",
  492             "title4": "\t\t\t\a",
  493             "title5": "\t\t\t\t\a",
  494             "blockQuoteLine": "\t",
  495             "listItemOpen": "- ",
  496             "numlistItemOpen": "\a. ",
  497             "bar1": "\a",
  498             "url": "\a",
  499             "urlMark": "\a (\a)",
  500             "email": "\a",
  501             "emailMark": "\a (\a)",
  502             "img": "[\a]",
  503         },
  504         "html": {
  505             "anchor": ' id="\a"',
  506             "bar1": '<hr class="light">',
  507             "bar2": '<hr class="heavy">',
  508             "blockQuoteClose": "</blockquote>",
  509             "blockQuoteOpen": "<blockquote>",
  510             "blockVerbClose": "</pre>",
  511             "blockVerbOpen": "<pre>",
  512             "bodyClose": "</div>",
  513             "bodyOpen": '<div class="body" id="body">',
  514             "comment": "<!-- \a -->",
  515             "cssClose": "</style>",
  516             "cssOpen": "<style>",
  517             "deflistClose": "</dl>",
  518             "deflistItem1Close": "</dt>",
  519             "deflistItem1Open": "<dt>",
  520             "deflistItem2Close": "</dd>",
  521             "deflistItem2Open": "<dd>",
  522             "deflistOpen": "<dl>",
  523             "email": '<a href="mailto:\a">\a</a>',
  524             "emailMark": '<a href="mailto:\a">\a</a>',
  525             "EOD": "</article></body></html>",
  526             "fontBoldClose": "</strong>",
  527             "fontBoldOpen": "<strong>",
  528             "fontItalicClose": "</em>",
  529             "fontItalicOpen": "<em>",
  530             "fontMonoClose": "</code>",
  531             "fontMonoOpen": "<code>",
  532             "fontStrikeClose": "</del>",
  533             "fontStrikeOpen": "<del>",
  534             "fontUnderlineClose": "</span>",
  535             "fontUnderlineOpen": '<span class="underline">',
  536             "_imgAlignCenter": ' class="center"',
  537             "_imgAlignLeft": ' class="left"',
  538             "_imgAlignRight": ' class="right"',
  539             "img": '<img~a~ src="\a" alt="">',
  540             "listClose": "</ul>",
  541             "listItemClose": "</li>",
  542             "listItemOpen": "<li>",
  543             "listOpen": "<ul>",
  544             "numlistClose": "</ol>",
  545             "numlistItemClose": "</li>",
  546             "numlistItemOpen": "<li>",
  547             "numlistOpen": "<ol>",
  548             "paragraphClose": "</p>",
  549             "paragraphOpen": "<p>",
  550             "_tableAlignCenter": ' style="margin-left: auto; margin-right: auto;"',
  551             "_tableBorder": ' class="tableborder"',
  552             "_tableCellAlignCenter": ' class="center"',
  553             "_tableCellAlignRight": ' class="right"',
  554             "tableCellClose": "</td>",
  555             "_tableCellColSpan": ' colspan="\a"',
  556             "tableCellOpen": "<td~a~~s~>",
  557             "tableClose": "</table>",
  558             "tableOpen": "<table~a~~b~>",
  559             "tableRowClose": "</tr>",
  560             "tableRowOpen": "<tr>",
  561             "tableTitleCellClose": "</th>",
  562             "tableTitleCellOpen": "<th~s~>",
  563             "title1Close": "</section>",
  564             "title1Open": "<section~A~>\n<h1>\a</h1>",
  565             "title2Close": "</section>",
  566             "title2Open": "<section~A~>\n<h2>\a</h2>",
  567             "title3Close": "</section>",
  568             "title3Open": "<section~A~>\n<h3>\a</h3>",
  569             "title4Close": "</section>",
  570             "title4Open": "<section~A~>\n<h4>\a</h4>",
  571             "title5Close": "</section>",
  572             "title5Open": "<section~A~>\n<h5>\a</h5>",
  573             "tocClose": "</nav>",
  574             "tocOpen": "<nav>",
  575             "url": '<a href="\a">\a</a>',
  576             "urlMark": '<a href="\a">\a</a>',
  577         },
  578         "sgml": {
  579             "paragraphOpen": "<p>",
  580             "title1": "<sect>\a~A~<p>",
  581             "title2": "<sect1>\a~A~<p>",
  582             "title3": "<sect2>\a~A~<p>",
  583             "title4": "<sect3>\a~A~<p>",
  584             "title5": "<sect4>\a~A~<p>",
  585             "anchor": '<label id="\a">',
  586             "blockVerbOpen": "<tscreen><verb>",
  587             "blockVerbClose": "</verb></tscreen>",
  588             "blockQuoteOpen": "<quote>",
  589             "blockQuoteClose": "</quote>",
  590             "fontMonoOpen": "<tt>",
  591             "fontMonoClose": "</tt>",
  592             "fontBoldOpen": "<bf>",
  593             "fontBoldClose": "</bf>",
  594             "fontItalicOpen": "<em>",
  595             "fontItalicClose": "</em>",
  596             "fontUnderlineOpen": "<bf><em>",
  597             "fontUnderlineClose": "</em></bf>",
  598             "listOpen": "<itemize>",
  599             "listClose": "</itemize>",
  600             "listItemOpen": "<item>",
  601             "numlistOpen": "<enum>",
  602             "numlistClose": "</enum>",
  603             "numlistItemOpen": "<item>",
  604             "deflistOpen": "<descrip>",
  605             "deflistClose": "</descrip>",
  606             "deflistItem1Open": "<tag>",
  607             "deflistItem1Close": "</tag>",
  608             "bar1": "<!-- \a -->",
  609             "url": '<htmlurl url="\a" name="\a">',
  610             "urlMark": '<htmlurl url="\a" name="\a">',
  611             "email": '<htmlurl url="mailto:\a" name="\a">',
  612             "emailMark": '<htmlurl url="mailto:\a" name="\a">',
  613             "img": '<figure><ph vspace=""><img src="\a"></figure>',
  614             "tableOpen": '<table><tabular ca="~C~">',
  615             "tableClose": "</tabular></table>",
  616             "tableRowSep": "<rowsep>",
  617             "tableCellSep": "<colsep>",
  618             "_tableColAlignLeft": "l",
  619             "_tableColAlignRight": "r",
  620             "_tableColAlignCenter": "c",
  621             "comment": "<!-- \a -->",
  622             "TOC": "<toc>",
  623             "EOD": "</article>",
  624         },
  625         "dbk": {
  626             "paragraphOpen": "<para>",
  627             "paragraphClose": "</para>",
  628             "title1Open": "~A~<sect1><title>\a</title>",
  629             "title1Close": "</sect1>",
  630             "title2Open": "~A~  <sect2><title>\a</title>",
  631             "title2Close": "  </sect2>",
  632             "title3Open": "~A~    <sect3><title>\a</title>",
  633             "title3Close": "    </sect3>",
  634             "title4Open": "~A~      <sect4><title>\a</title>",
  635             "title4Close": "      </sect4>",
  636             "title5Open": "~A~        <sect5><title>\a</title>",
  637             "title5Close": "        </sect5>",
  638             "anchor": '<anchor id="\a"/>\n',
  639             "blockVerbOpen": "<programlisting>",
  640             "blockVerbClose": "</programlisting>",
  641             "blockQuoteOpen": "<blockquote><para>",
  642             "blockQuoteClose": "</para></blockquote>",
  643             "fontMonoOpen": "<code>",
  644             "fontMonoClose": "</code>",
  645             "fontBoldOpen": '<emphasis role="bold">',
  646             "fontBoldClose": "</emphasis>",
  647             "fontItalicOpen": "<emphasis>",
  648             "fontItalicClose": "</emphasis>",
  649             "fontUnderlineOpen": '<emphasis role="underline">',
  650             "fontUnderlineClose": "</emphasis>",
  651             "fontStrikeOpen": None,  # Maybe <emphasis role="strikethrough">
  652             "fontStrikeClose": None,  # Maybe </emphasis>
  653             "listOpen": "<itemizedlist>",
  654             "listClose": "</itemizedlist>",
  655             "listItemOpen": "<listitem><para>",
  656             "listItemClose": "</para></listitem>",
  657             "numlistOpen": '<orderedlist numeration="arabic">',
  658             "numlistClose": "</orderedlist>",
  659             "numlistItemOpen": "<listitem><para>",
  660             "numlistItemClose": "</para></listitem>",
  661             "deflistOpen": "<variablelist>",
  662             "deflistClose": "</variablelist>",
  663             "deflistItem1Open": "<varlistentry><term>",
  664             "deflistItem1Close": "</term>",
  665             "deflistItem2Open": "<listitem><para>",
  666             "deflistItem2Close": "</para></listitem></varlistentry>",
  667             "bar1": None,
  668             "bar2": None,
  669             "url": '<ulink url="\a">\a</ulink>',
  670             "urlMark": '<ulink url="\a">\a</ulink>',
  671             "email": "<email>\a</email>",
  672             "emailMark": "<email>\a</email>",
  673             "img": (
  674                 '<mediaobject><imageobject><imagedata fileref="\a"/>'
  675                 "</imageobject></mediaobject>"
  676             ),
  677             # Tables not supported, need to know number of columns.
  678             # 'tableOpen'            : '<informaltable><tgroup cols=""><tbody>',
  679             # 'tableClose'           : '</tbody></tgroup></informaltable>' ,
  680             # 'tableRowOpen'         : '<row>'                             ,
  681             # 'tableRowClose'        : '</row>'                            ,
  682             # 'tableCellOpen'        : '<entry>'                           ,
  683             # 'tableCellClose'       : '</entry>'                          ,
  684             # 'tableTitleRowOpen'    : '<thead>'                           ,
  685             # 'tableTitleRowClose'   : '</thead>'                          ,
  686             # '_tableBorder'         : ' frame="all"'                      ,
  687             # '_tableAlignCenter'    : ' align="center"'                   ,
  688             # '_tableCellAlignRight' : ' align="right"'                    ,
  689             # '_tableCellAlignCenter': ' align="center"'                   ,
  690             # '_tableCellColSpan'    : ' COLSPAN="\a"'                     ,
  691             "TOC": "<index/>",
  692             "comment": "<!-- \a -->",
  693             "EOD": "</article>",
  694         },
  695         "tex": {
  696             "title1": "~A~\\section*{\a}",
  697             "title2": "~A~\\subsection*{\a}",
  698             "title3": "~A~\\subsubsection*{\a}",
  699             # title 4/5: DIRTY: para+BF+\\+\n
  700             "title4": "~A~\\paragraph{}\\textbf{\a}\\\\\n",
  701             "title5": "~A~\\paragraph{}\\textbf{\a}\\\\\n",
  702             "numtitle1": "\n~A~\\section{\a}",
  703             "numtitle2": "~A~\\subsection{\a}",
  704             "numtitle3": "~A~\\subsubsection{\a}",
  705             "anchor": "\\hypertarget{\a}{}\n",
  706             "blockVerbOpen": "\\begin{verbatim}",
  707             "blockVerbClose": "\\end{verbatim}",
  708             "blockQuoteOpen": "\\begin{quotation}",
  709             "blockQuoteClose": "\\end{quotation}",
  710             "fontMonoOpen": "\\texttt{",
  711             "fontMonoClose": "}",
  712             "fontBoldOpen": "\\textbf{",
  713             "fontBoldClose": "}",
  714             "fontItalicOpen": "\\textit{",
  715             "fontItalicClose": "}",
  716             "fontUnderlineOpen": "\\underline{",
  717             "fontUnderlineClose": "}",
  718             "fontStrikeOpen": "\\sout{",
  719             "fontStrikeClose": "}",
  720             "listOpen": "\\begin{itemize}",
  721             "listClose": "\\end{itemize}",
  722             "listOpenCompact": "\\begin{compactitem}",
  723             "listCloseCompact": "\\end{compactitem}",
  724             "listItemOpen": "\\item ",
  725             "numlistOpen": "\\begin{enumerate}",
  726             "numlistClose": "\\end{enumerate}",
  727             "numlistOpenCompact": "\\begin{compactenum}",
  728             "numlistCloseCompact": "\\end{compactenum}",
  729             "numlistItemOpen": "\\item ",
  730             "deflistOpen": "\\begin{description}",
  731             "deflistClose": "\\end{description}",
  732             "deflistOpenCompact": "\\begin{compactdesc}",
  733             "deflistCloseCompact": "\\end{compactdesc}",
  734             "deflistItem1Open": "\\item[",
  735             "deflistItem1Close": "]",
  736             "bar1": "\\hrulefill{}",
  737             "bar2": "\\rule{\\linewidth}{1mm}",
  738             "url": "\\htmladdnormallink{\a}{\a}",
  739             "urlMark": "\\htmladdnormallink{\a}{\a}",
  740             "email": "\\htmladdnormallink{\a}{mailto:\a}",
  741             "emailMark": "\\htmladdnormallink{\a}{mailto:\a}",
  742             "img": "\\includegraphics{\a}",
  743             "tableOpen": "\\begin{tabular}{@{}~C~@{}}",
  744             "tableClose": "\\end{tabular}",
  745             "tableRowOpen": None,
  746             "tableRowClose": " \\\\",
  747             "tableTitleRowClose": " \\\\\n\\midrule",
  748             "tableCellSep": " & ",
  749             "_tableColAlignLeft": "l",
  750             "_tableColAlignRight": "r",
  751             "_tableColAlignCenter": "c",
  752             "_tableCellAlignLeft": "l",
  753             "_tableCellAlignRight": "r",
  754             "_tableCellAlignCenter": "c",
  755             "_tableCellColSpan": "\a",
  756             "_tableCellMulticolOpen": "\\multicolumn{\a}{|~C~|}{",
  757             "_tableCellMulticolClose": "}",
  758             "tableColAlignSep": None,
  759             "comment": "% \a",
  760             "TOC": "\\tableofcontents",
  761             "pageBreak": "\\clearpage",
  762             "EOD": "\\end{document}",
  763         },
  764         "lout": {
  765             "paragraphOpen": "@LP",
  766             "blockTitle1Open": "@BeginSections",
  767             "blockTitle1Close": "@EndSections",
  768             "blockTitle2Open": " @BeginSubSections",
  769             "blockTitle2Close": " @EndSubSections",
  770             "blockTitle3Open": "  @BeginSubSubSections",
  771             "blockTitle3Close": "  @EndSubSubSections",
  772             "title1Open": "~A~@Section @Title { \a } @Begin",
  773             "title1Close": "@End @Section",
  774             "title2Open": "~A~ @SubSection @Title { \a } @Begin",
  775             "title2Close": " @End @SubSection",
  776             "title3Open": "~A~  @SubSubSection @Title { \a } @Begin",
  777             "title3Close": "  @End @SubSubSection",
  778             "title4Open": "~A~@LP @LeftDisplay @B { \a }",
  779             "title5Open": "~A~@LP @LeftDisplay @B { \a }",
  780             "anchor": "@Tag { \a }\n",
  781             "blockVerbOpen": "@LP @ID @F @RawVerbatim @Begin",
  782             "blockVerbClose": "@End @RawVerbatim",
  783             "blockQuoteOpen": "@QD {",
  784             "blockQuoteClose": "}",
  785             # enclosed inside {} to deal with joined**words**
  786             "fontMonoOpen": "{@F {",
  787             "fontMonoClose": "}}",
  788             "fontBoldOpen": "{@B {",
  789             "fontBoldClose": "}}",
  790             "fontItalicOpen": "{@II {",
  791             "fontItalicClose": "}}",
  792             "fontUnderlineOpen": "{@Underline{",
  793             "fontUnderlineClose": "}}",
  794             # the full form is more readable, but could be BL EL LI NL TL DTI
  795             "listOpen": "@BulletList",
  796             "listClose": "@EndList",
  797             "listItemOpen": "@ListItem{",
  798             "listItemClose": "}",
  799             "numlistOpen": "@NumberedList",
  800             "numlistClose": "@EndList",
  801             "numlistItemOpen": "@ListItem{",
  802             "numlistItemClose": "}",
  803             "deflistOpen": "@TaggedList",
  804             "deflistClose": "@EndList",
  805             "deflistItem1Open": "@DropTagItem {",
  806             "deflistItem1Close": "}",
  807             "deflistItem2Open": "{",
  808             "deflistItem2Close": "}",
  809             "bar1": "@DP @FullWidthRule",
  810             "url": "{blue @Colour { \a }}",
  811             "urlMark": "\a ({blue @Colour { \a }})",
  812             "email": "{blue @Colour { \a }}",
  813             "emailMark": "\a ({blue @Colour{ \a }})",
  814             "img": "~A~@IncludeGraphic { \a }",  # eps only!
  815             "_imgAlignLeft": "@LeftDisplay ",
  816             "_imgAlignRight": "@RightDisplay ",
  817             "_imgAlignCenter": "@CentredDisplay ",
  818             # lout tables are *way* too complicated, no support for now
  819             # 'tableOpen'            : '~A~@Tbl~B~\naformat{ @Cell A | @Cell B } {',
  820             # 'tableClose'           : '}'     ,
  821             # 'tableRowOpen'         : '@Rowa\n'       ,
  822             # 'tableTitleRowOpen'    : '@HeaderRowa'       ,
  823             # 'tableCenterAlign'     : '@CentredDisplay '         ,
  824             # 'tableCellOpen'        : '\a {'                     ,  # A, B, ...
  825             # 'tableCellClose'       : '}'                        ,
  826             # '_tableBorder'         : '\nrule {yes}'             ,
  827             "comment": "# \a",
  828             # @MakeContents must be on the config file
  829             "TOC": "@DP @ContentsGoesHere @DP",
  830             "pageBreak": "@NP",
  831             "EOD": "@End @Text",
  832         },
  833         # https://moinmo.in/HelpOnMoinWikiSyntax
  834         "moin": {
  835             "title1": "= \a =",
  836             "title2": "== \a ==",
  837             "title3": "=== \a ===",
  838             "title4": "==== \a ====",
  839             "title5": "===== \a =====",
  840             "blockVerbOpen": "{{{",
  841             "blockVerbClose": "}}}",
  842             "blockQuoteLine": "  ",
  843             "fontMonoOpen": "{{{",
  844             "fontMonoClose": "}}}",
  845             "fontBoldOpen": "'''",
  846             "fontBoldClose": "'''",
  847             "fontItalicOpen": "''",
  848             "fontItalicClose": "''",
  849             "fontUnderlineOpen": "__",
  850             "fontUnderlineClose": "__",
  851             "fontStrikeOpen": "--(",
  852             "fontStrikeClose": ")--",
  853             "listItemOpen": " * ",
  854             "numlistItemOpen": " \a. ",
  855             "deflistItem1Open": " ",
  856             "deflistItem1Close": "::",
  857             "deflistItem2LinePrefix": " :: ",
  858             "bar1": "----",
  859             "bar2": "--------",
  860             "url": "[[\a]]",
  861             "urlMark": "[[\a|\a]]",
  862             "email": "\a",
  863             "emailMark": "[[mailto:\a|\a]]",
  864             "img": "[\a]",
  865             "tableRowOpen": "||",
  866             "tableCellOpen": "~A~",
  867             "tableCellClose": "||",
  868             "tableTitleCellClose": "||",
  869             "_tableCellAlignRight": "<)>",
  870             "_tableCellAlignCenter": "<:>",
  871             "comment": "/* \a */",
  872             "TOC": "[[TableOfContents]]",
  873         },
  874         # http://code.google.com/p/support/wiki/WikiSyntax
  875         "gwiki": {
  876             "title1": "= \a =",
  877             "title2": "== \a ==",
  878             "title3": "=== \a ===",
  879             "title4": "==== \a ====",
  880             "title5": "===== \a =====",
  881             "blockVerbOpen": "{{{",
  882             "blockVerbClose": "}}}",
  883             "blockQuoteLine": "  ",
  884             "fontMonoOpen": "{{{",
  885             "fontMonoClose": "}}}",
  886             "fontBoldOpen": "*",
  887             "fontBoldClose": "*",
  888             "fontItalicOpen": "_",  # underline == italic
  889             "fontItalicClose": "_",
  890             "fontStrikeOpen": "~~",
  891             "fontStrikeClose": "~~",
  892             "listItemOpen": " * ",
  893             "numlistItemOpen": " # ",
  894             "url": "\a",
  895             "urlMark": "[\a \a]",
  896             "email": "mailto:\a",
  897             "emailMark": "[mailto:\a \a]",
  898             "img": "[\a]",
  899             "tableRowOpen": "|| ",
  900             "tableRowClose": " ||",
  901             "tableCellSep": " || ",
  902         },
  903         # http://powerman.name/doc/asciidoc
  904         "adoc": {
  905             "title1": "== \a",
  906             "title2": "=== \a",
  907             "title3": "==== \a",
  908             "title4": "===== \a",
  909             "title5": "===== \a",
  910             "blockVerbOpen": "----",
  911             "blockVerbClose": "----",
  912             "fontMonoOpen": "+",
  913             "fontMonoClose": "+",
  914             "fontBoldOpen": "*",
  915             "fontBoldClose": "*",
  916             "fontItalicOpen": "_",
  917             "fontItalicClose": "_",
  918             "listItemOpen": "- ",
  919             "listItemLine": "\t",
  920             "numlistItemOpen": ". ",
  921             "url": "\a",
  922             "urlMark": "\a[\a]",
  923             "email": "mailto:\a",
  924             "emailMark": "mailto:\a[\a]",
  925             "img": "image::\a[]",
  926         },
  927         # http://wiki.splitbrain.org/wiki:syntax
  928         # Hint: <br> is \\ $
  929         # Hint: You can add footnotes ((This is a footnote))
  930         "doku": {
  931             "title1": "===== \a =====",
  932             "title2": "==== \a ====",
  933             "title3": "=== \a ===",
  934             "title4": "== \a ==",
  935             "title5": "= \a =",
  936             # DokuWiki uses '  ' identation to mark verb blocks (see indentverbblock)
  937             "blockQuoteLine": ">",
  938             "fontMonoOpen": "''",
  939             "fontMonoClose": "''",
  940             "fontBoldOpen": "**",
  941             "fontBoldClose": "**",
  942             "fontItalicOpen": "//",
  943             "fontItalicClose": "//",
  944             "fontUnderlineOpen": "__",
  945             "fontUnderlineClose": "__",
  946             "fontStrikeOpen": "<del>",
  947             "fontStrikeClose": "</del>",
  948             "listItemOpen": "  * ",
  949             "numlistItemOpen": "  - ",
  950             "bar1": "----",
  951             "url": "[[\a]]",
  952             "urlMark": "[[\a|\a]]",
  953             "email": "[[\a]]",
  954             "emailMark": "[[\a|\a]]",
  955             "img": "{{\a}}",
  956             "imgAlignLeft": "{{\a }}",
  957             "imgAlignRight": "{{ \a}}",
  958             "imgAlignCenter": "{{ \a }}",
  959             "tableTitleRowOpen": "^ ",
  960             "tableTitleRowClose": " ^",
  961             "tableTitleCellSep": " ^ ",
  962             "tableRowOpen": "| ",
  963             "tableRowClose": " |",
  964             "tableCellSep": " | ",
  965             # DokuWiki has no attributes. The content must be aligned!
  966             # '_tableCellAlignRight' : '<)>'           , # ??
  967             # '_tableCellAlignCenter': '<:>'           , # ??
  968             # DokuWiki colspan is the same as txt2tags' with multiple |||
  969             # 'comment'             : '## \a'         , # ??
  970             # TOC is automatic
  971         },
  972         # http://www.pmwiki.org/wiki/PmWiki/TextFormattingRules
  973         "pmw": {
  974             "title1": "~A~! \a ",
  975             "title2": "~A~!! \a ",
  976             "title3": "~A~!!! \a ",
  977             "title4": "~A~!!!! \a ",
  978             "title5": "~A~!!!!! \a ",
  979             "blockQuoteOpen": "->",
  980             "blockQuoteClose": "\n",
  981             # In-text font
  982             "fontMonoOpen": "@@",
  983             "fontMonoClose": "@@",
  984             "fontBoldOpen": "'''",
  985             "fontBoldClose": "'''",
  986             "fontItalicOpen": "''",
  987             "fontItalicClose": "''",
  988             "fontUnderlineOpen": "{+",
  989             "fontUnderlineClose": "+}",
  990             "fontStrikeOpen": "{-",
  991             "fontStrikeClose": "-}",
  992             # Lists
  993             "listItemLine": "*",
  994             "numlistItemLine": "#",
  995             "deflistItem1Open": ": ",
  996             "deflistItem1Close": ":",
  997             # Verbatim block
  998             "blockVerbOpen": "[@",
  999             "blockVerbClose": "@]",
 1000             "bar1": "----",
 1001             # URL, email and anchor
 1002             "url": "\a",
 1003             "urlMark": "[[\a -> \a]]",
 1004             "email": "\a",
 1005             "emailMark": "[[\a -> mailto:\a]]",
 1006             "anchor": "[[#\a]]\n",
 1007             # Image markup
 1008             "img": "\a",
 1009             # Table attributes
 1010             "tableTitleRowOpen": "||! ",
 1011             "tableTitleRowClose": "||",
 1012             "tableTitleCellSep": " ||!",
 1013             "tableRowOpen": "||",
 1014             "tableRowClose": "||",
 1015             "tableCellSep": " ||",
 1016         },
 1017         # http://en.wikipedia.org/wiki/Help:Editing
 1018         "wiki": {
 1019             "title1": "== \a ==",
 1020             "title2": "=== \a ===",
 1021             "title3": "==== \a ====",
 1022             "title4": "===== \a =====",
 1023             "title5": "====== \a ======",
 1024             "blockVerbOpen": "<pre>",
 1025             "blockVerbClose": "</pre>",
 1026             "blockQuoteOpen": "<blockquote>",
 1027             "blockQuoteClose": "</blockquote>",
 1028             "fontMonoOpen": "<tt>",
 1029             "fontMonoClose": "</tt>",
 1030             "fontBoldOpen": "'''",
 1031             "fontBoldClose": "'''",
 1032             "fontItalicOpen": "''",
 1033             "fontItalicClose": "''",
 1034             "fontUnderlineOpen": "<u>",
 1035             "fontUnderlineClose": "</u>",
 1036             "fontStrikeOpen": "<s>",
 1037             "fontStrikeClose": "</s>",
 1038             # XXX Mixed lists not working: *#* list inside numlist inside list
 1039             "listItemLine": "*",
 1040             "numlistItemLine": "#",
 1041             "deflistItem1Open": "; ",
 1042             "deflistItem2LinePrefix": ": ",
 1043             "bar1": "----",
 1044             "url": "[\a]",
 1045             "urlMark": "[\a \a]",
 1046             "email": "mailto:\a",
 1047             "emailMark": "[mailto:\a \a]",
 1048             # [[Image:foo.png|right|Optional alt/caption text]]
 1049             # (right, left, center, none)
 1050             "img": "[[Image:\a~A~]]",
 1051             "_imgAlignLeft": "|left",
 1052             "_imgAlignCenter": "|center",
 1053             "_imgAlignRight": "|right",
 1054             # {| border="1" cellspacing="0" cellpadding="4" align="center"
 1055             "tableOpen": '{|~A~~B~ cellpadding="4"',
 1056             "tableClose": "|}",
 1057             "tableRowOpen": "|-\n| ",
 1058             "tableTitleRowOpen": "|-\n! ",
 1059             "tableCellSep": " || ",
 1060             "tableTitleCellSep": " !! ",
 1061             "_tableBorder": ' border="1"',
 1062             "_tableAlignCenter": ' align="center"',
 1063             "comment": "<!-- \a -->",
 1064             "TOC": "__TOC__",
 1065         },
 1066         # http://www.inference.phy.cam.ac.uk/mackay/mgp/SYNTAX
 1067         # http://en.wikipedia.org/wiki/MagicPoint
 1068         "mgp": {
 1069             "paragraphOpen": '%font "normal", size 5',
 1070             "title1": "%page\n\n\a\n",
 1071             "title2": "%page\n\n\a\n",
 1072             "title3": "%page\n\n\a\n",
 1073             "title4": "%page\n\n\a\n",
 1074             "title5": "%page\n\n\a\n",
 1075             "blockVerbOpen": '%font "mono"',
 1076             "blockVerbClose": '%font "normal"',
 1077             "blockQuoteOpen": '%prefix "       "',
 1078             "blockQuoteClose": '%prefix "  "',
 1079             "fontMonoOpen": '\n%cont, font "mono"\n',
 1080             "fontMonoClose": '\n%cont, font "normal"\n',
 1081             "fontBoldOpen": '\n%cont, font "normal-b"\n',
 1082             "fontBoldClose": '\n%cont, font "normal"\n',
 1083             "fontItalicOpen": '\n%cont, font "normal-i"\n',
 1084             "fontItalicClose": '\n%cont, font "normal"\n',
 1085             "fontUnderlineOpen": '\n%cont, fore "cyan"\n',
 1086             "fontUnderlineClose": '\n%cont, fore "white"\n',
 1087             "listItemLine": "\t",
 1088             "numlistItemLine": "\t",
 1089             "numlistItemOpen": "\a. ",
 1090             "deflistItem1Open": '\t\n%cont, font "normal-b"\n',
 1091             "deflistItem1Close": '\n%cont, font "normal"\n',
 1092             "bar1": '%bar "white" 5',
 1093             "bar2": "%pause",
 1094             "url": '\n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
 1095             "urlMark": '\a \n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
 1096             "email": '\n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
 1097             "emailMark": '\a \n%cont, fore "cyan"\n\a' + '\n%cont, fore "white"\n',
 1098             "img": '~A~\n%newimage "\a"\n%left\n',
 1099             "_imgAlignLeft": "\n%left",
 1100             "_imgAlignRight": "\n%right",
 1101             "_imgAlignCenter": "\n%center",
 1102             "comment": "%% \a",
 1103             "pageBreak": "%page\n\n\n",
 1104             "EOD": "%%EOD",
 1105         },
 1106         # man groff_man ; man 7 groff
 1107         "man": {
 1108             "paragraphOpen": ".P",
 1109             "title1": ".SH \a",
 1110             "title2": ".SS \a",
 1111             "title3": ".SS \a",
 1112             "title4": ".SS \a",
 1113             "title5": ".SS \a",
 1114             "blockVerbOpen": ".nf",
 1115             "blockVerbClose": ".fi\n",
 1116             "blockQuoteOpen": ".RS",
 1117             "blockQuoteClose": ".RE",
 1118             "fontBoldOpen": "\\fB",
 1119             "fontBoldClose": "\\fR",
 1120             "fontItalicOpen": "\\fI",
 1121             "fontItalicClose": "\\fR",
 1122             "listOpen": ".RS",
 1123             "listItemOpen": ".IP \\(bu 3\n",
 1124             "listClose": ".RE\n.IP",
 1125             "numlistOpen": ".RS",
 1126             "numlistItemOpen": ".IP \a. 3\n",
 1127             "numlistClose": ".RE\n.IP",
 1128             "deflistItem1Open": ".TP\n",
 1129             "bar1": "\n\n",
 1130             "url": "\a",
 1131             "urlMark": "\a (\a)",
 1132             "email": "\a",
 1133             "emailMark": "\a (\a)",
 1134             "img": "\a",
 1135             "tableOpen": ".TS\n~A~~B~tab(^); ~C~.",
 1136             "tableClose": ".TE",
 1137             "tableRowOpen": " ",
 1138             "tableCellSep": "^",
 1139             "_tableAlignCenter": "center, ",
 1140             "_tableBorder": "allbox, ",
 1141             "_tableColAlignLeft": "l",
 1142             "_tableColAlignRight": "r",
 1143             "_tableColAlignCenter": "c",
 1144             "comment": '.\\" \a',
 1145         },
 1146         # http://www.wikicreole.org/wiki/AllMarkup
 1147         "creole": {
 1148             "title1": "= \a =",
 1149             "title2": "== \a ==",
 1150             "title3": "=== \a ===",
 1151             "title4": "==== \a ====",
 1152             "title5": "===== \a =====",
 1153             "blockVerbOpen": "{{{",
 1154             "blockVerbClose": "}}}",
 1155             "blockQuoteLine": "  ",
 1156             "fontMonoOpen": None,  # planned for 2.0,
 1157             "fontMonoClose": None,  # meanwhile we disable it
 1158             "fontBoldOpen": "**",
 1159             "fontBoldClose": "**",
 1160             "fontItalicOpen": "//",
 1161             "fontItalicClose": "//",
 1162             "fontUnderlineOpen": "//",  # no underline in 1.0, planned for 2.0,
 1163             "fontUnderlineClose": "//",  # meanwhile we use italic (emphasized)
 1164             "fontStrikeOpen": None,  # planned for 2.0,
 1165             "fontStrikeClose": None,  # meanwhile we disable it
 1166             "listItemLine": "*",
 1167             "numlistItemLine": "#",
 1168             "deflistItem2LinePrefix": ":",
 1169             "bar1": "----",
 1170             "url": "[[\a]]",
 1171             "urlMark": "[[\a|\a]]",
 1172             "img": "{{\a}}",
 1173             "tableTitleRowOpen": "|= ",
 1174             "tableTitleRowClose": "|",
 1175             "tableTitleCellSep": " |= ",
 1176             "tableRowOpen": "| ",
 1177             "tableRowClose": " |",
 1178             "tableCellSep": " | ",
 1179             # TODO: placeholder (mark for unknown syntax)
 1180             # if possible: http://www.wikicreole.org/wiki/Placeholder
 1181         },
 1182         # regular markdown: http://daringfireball.net/projects/markdown/syntax
 1183         # markdown extra:   http://michelf.com/projects/php-markdown/extra/
 1184         "md": {
 1185             "title1": "# \a ",
 1186             "title2": "## \a ",
 1187             "title3": "### \a ",
 1188             "title4": "#### \a ",
 1189             "title5": "##### \a ",
 1190             "blockVerbLine": "    ",
 1191             "blockQuoteLine": "> ",
 1192             "fontMonoOpen": "`",
 1193             "fontMonoClose": "`",
 1194             "fontBoldOpen": "**",
 1195             "fontBoldClose": "**",
 1196             "fontItalicOpen": "*",
 1197             "fontItalicClose": "*",
 1198             "fontUnderlineOpen": None,
 1199             "fontUnderlineClose": None,
 1200             "fontStrikeOpen": "~~",
 1201             "fontStrikeClose": "~~",
 1202             # Lists
 1203             "listOpenCompact": None,
 1204             "listItemLine": " ",
 1205             "listItemOpen": "*",
 1206             "numlistItemLine": None,
 1207             "numlistItemOpen": "1.",
 1208             "deflistItem1Open": ": ",
 1209             "deflistItem1Close": None,
 1210             "deflistItem2Open": None,
 1211             "deflistItem2Close": None,
 1212             # Verbatim block
 1213             "blockVerbOpen": None,
 1214             "blockVerbClose": None,
 1215             "bar1": "---",
 1216             "bar2": "---",
 1217             # URL, email and anchor
 1218             "url": "\a",
 1219             "urlMark": "[\a](\a)",
 1220             "email": "<\a>",
 1221             "emailMark": "[\a](mailto:\a)",
 1222             "anchor": None,
 1223             # Image markup
 1224             "img": "![](\a)",
 1225             "imgAlignLeft": None,
 1226             "imgAlignRight": None,
 1227             "imgAlignCenter": None,
 1228             # Table attributes
 1229             "tableTitleRowOpen": "| ",
 1230             "tableTitleRowClose": "|\n|---------------|",
 1231             "tableTitleCellSep": " |",
 1232             "tableRowOpen": "|",
 1233             "tableRowClose": "|",
 1234             "tableCellSep": " |",
 1235         },
 1236     }
 1237     assert set(alltags) == set(TARGETS)
 1238 
 1239     for target, tags in alltags.items():
 1240         for key, value in tags.items():
 1241             if key not in keys:
 1242                 raise AssertionError("{} target has invalid key {}".format(target, key))
 1243             if value is not None and not value:
 1244                 raise AssertionError("{} target drops {}".format(target, key))
 1245 
 1246     # Compose the target tags dictionary.
 1247     tags = collections.defaultdict(str)
 1248     for key, value in alltags[config["target"]].items():
 1249         if value:  # Skip unsupported markup.
 1250             tags[key] = maskEscapeChar(value)
 1251 
 1252     # Map strong line to pagebreak
 1253     if rules["mapbar2pagebreak"] and tags["pageBreak"]:
 1254         tags["bar2"] = tags["pageBreak"]
 1255 
 1256     # Map strong line to separator if not defined
 1257     if not tags["bar2"] and tags["bar1"]:
 1258         tags["bar2"] = tags["bar1"]
 1259 
 1260     return tags
 1261 
 1262 
 1263 ##############################################################################
 1264 
 1265 
 1266 def getRules(config):
 1267     """Return all the target-specific syntax rules."""
 1268     allrules = [
 1269         # target rules (ON/OFF)
 1270         "linkable",  # target supports external links
 1271         "tableable",  # target supports tables
 1272         "imglinkable",  # target supports images as links
 1273         "imgalignable",  # target supports image alignment
 1274         "imgasdefterm",  # target supports image as definition term
 1275         "autonumberlist",  # target supports numbered lists natively
 1276         "autonumbertitle",  # target supports numbered titles natively
 1277         "stylable",  # target supports external style files
 1278         "parainsidelist",  # lists items supports paragraph
 1279         "compactlist",  # separate enclosing tags for compact lists
 1280         "spacedlistitem",  # lists support blank lines between items
 1281         "listnotnested",  # lists cannot be nested
 1282         "quotenotnested",  # quotes cannot be nested
 1283         "verbblocknotescaped",  # don't escape specials in verb block
 1284         "verbblockfinalescape",  # do final escapes in verb block
 1285         "escapeurl",  # escape special in link URL
 1286         "labelbeforelink",  # label comes before the link on the tag
 1287         "onelinepara",  # dump paragraph as a single long line
 1288         "tabletitlerowinbold",  # manually bold any cell on table titles
 1289         "tablecellstrip",  # strip extra spaces from each table cell
 1290         "tablecellspannable",  # the table cells can have span attribute
 1291         "tablecellmulticol",  # separate open+close tags for multicol cells
 1292         "barinsidequote",  # bars are allowed inside quote blocks
 1293         "finalescapetitle",  # perform final escapes on title lines
 1294         "autotocnewpagebefore",  # break page before automatic TOC
 1295         "autotocnewpageafter",  # break page after automatic TOC
 1296         "autotocwithbars",  # automatic TOC surrounded by bars
 1297         "mapbar2pagebreak",  # map the strong bar to a page break
 1298         "titleblocks",  # titles must be on open/close section blocks
 1299         # Target code beautify (ON/OFF)
 1300         "indentverbblock",  # add leading spaces to verb block lines
 1301         "breaktablecell",  # break lines after any table cell
 1302         "breaktablelineopen",  # break line after opening table line
 1303         "notbreaklistopen",  # don't break line after opening a new list
 1304         "keepquoteindent",  # don't remove the leading TABs on quotes
 1305         "keeplistindent",  # don't remove the leading spaces on lists
 1306         "blankendautotoc",  # append a blank line at the auto TOC end
 1307         "tagnotindentable",  # tags must be placed at the line beginning
 1308         "spacedlistitemopen",  # append a space after the list item open tag
 1309         "spacednumlistitemopen",  # append a space after the numlist item open tag
 1310         "deflisttextstrip",  # strip the contents of the deflist text
 1311         "blanksaroundpara",  # put a blank line before and after paragraphs
 1312         "blanksaroundverb",  # put a blank line before and after verb blocks
 1313         "blanksaroundquote",  # put a blank line before and after quotes
 1314         "blanksaroundlist",  # put a blank line before and after lists
 1315         "blanksaroundnumlist",  # put a blank line before and after numlists
 1316         "blanksarounddeflist",  # put a blank line before and after deflists
 1317         "blanksaroundtable",  # put a blank line before and after tables
 1318         "blanksaroundbar",  # put a blank line before and after bars
 1319         "blanksaroundtitle",  # put a blank line before and after titles
 1320         "blanksaroundnumtitle",  # put a blank line before and after numtitles
 1321         # Value settings
 1322         "listmaxdepth",  # maximum depth for lists
 1323         "quotemaxdepth",  # maximum depth for quotes
 1324         "tablecellaligntype",  # type of table cell align: cell, column
 1325     ]
 1326 
 1327     rules_bank = {
 1328         "txt": {
 1329             "indentverbblock": 1,
 1330             "spacedlistitem": 1,
 1331             "parainsidelist": 1,
 1332             "keeplistindent": 1,
 1333             "barinsidequote": 1,
 1334             "autotocwithbars": 1,
 1335             "blanksaroundpara": 1,
 1336             "blanksaroundverb": 1,
 1337             "blanksaroundquote": 1,
 1338             "blanksaroundlist": 1,
 1339             "blanksaroundnumlist": 1,
 1340             "blanksarounddeflist": 1,
 1341             "blanksaroundtable": 1,
 1342             "blanksaroundbar": 1,
 1343             "blanksaroundtitle": 1,
 1344             "blanksaroundnumtitle": 1,
 1345         },
 1346         "html": {
 1347             "indentverbblock": 0,
 1348             "linkable": 1,
 1349             "stylable": 1,
 1350             "escapeurl": 1,
 1351             "imglinkable": 1,
 1352             "imgalignable": 1,
 1353             "imgasdefterm": 1,
 1354             "autonumberlist": 1,
 1355             "spacedlistitem": 1,
 1356             "parainsidelist": 1,
 1357             "tableable": 1,
 1358             "tablecellstrip": 1,
 1359             "breaktablecell": 1,
 1360             "breaktablelineopen": 1,
 1361             "keeplistindent": 1,
 1362             "keepquoteindent": 1,
 1363             "barinsidequote": 1,
 1364             "autotocwithbars": 0,
 1365             "tablecellspannable": 1,
 1366             "tablecellaligntype": "cell",
 1367             # 'blanksaroundpara':1,
 1368             "blanksaroundverb": 1,
 1369             # 'blanksaroundquote':1,
 1370             "blanksaroundlist": 1,
 1371             "blanksaroundnumlist": 1,
 1372             "blanksarounddeflist": 1,
 1373             "blanksaroundtable": 1,
 1374             "blanksaroundbar": 1,
 1375             "blanksaroundtitle": 1,
 1376             "blanksaroundnumtitle": 1,
 1377             "titleblocks": 1,
 1378         },
 1379         "sgml": {
 1380             "linkable": 1,
 1381             "escapeurl": 1,
 1382             "autonumberlist": 1,
 1383             "spacedlistitem": 1,
 1384             "tableable": 1,
 1385             "tablecellstrip": 1,
 1386             "blankendautotoc": 1,
 1387             "quotenotnested": 1,
 1388             "keeplistindent": 1,
 1389             "keepquoteindent": 1,
 1390             "barinsidequote": 1,
 1391             "finalescapetitle": 1,
 1392             "tablecellaligntype": "column",
 1393             "blanksaroundpara": 1,
 1394             "blanksaroundverb": 1,
 1395             "blanksaroundquote": 1,
 1396             "blanksaroundlist": 1,
 1397             "blanksaroundnumlist": 1,
 1398             "blanksarounddeflist": 1,
 1399             "blanksaroundtable": 1,
 1400             "blanksaroundbar": 1,
 1401             "blanksaroundtitle": 1,
 1402             "blanksaroundnumtitle": 1,
 1403         },
 1404         "dbk": {
 1405             "linkable": 1,
 1406             "tableable": 0,  # activate when table tags are ready
 1407             "imglinkable": 1,
 1408             "imgalignable": 1,
 1409             "imgasdefterm": 1,
 1410             "autonumberlist": 1,
 1411             "autonumbertitle": 1,
 1412             "parainsidelist": 1,
 1413             "spacedlistitem": 1,
 1414             "titleblocks": 1,
 1415         },
 1416         "mgp": {
 1417             "tagnotindentable": 1,
 1418             "spacedlistitem": 1,
 1419             "imgalignable": 1,
 1420             "autotocnewpagebefore": 1,
 1421             "blanksaroundpara": 1,
 1422             "blanksaroundverb": 1,
 1423             # 'blanksaroundquote':1,
 1424             "blanksaroundlist": 1,
 1425             "blanksaroundnumlist": 1,
 1426             "blanksarounddeflist": 1,
 1427             "blanksaroundtable": 1,
 1428             "blanksaroundbar": 1,
 1429             # 'blanksaroundtitle':1,
 1430             # 'blanksaroundnumtitle':1,
 1431         },
 1432         "tex": {
 1433             "stylable": 1,
 1434             "escapeurl": 1,
 1435             "autonumberlist": 1,
 1436             "autonumbertitle": 1,
 1437             "spacedlistitem": 1,
 1438             "compactlist": 1,
 1439             "parainsidelist": 1,
 1440             "tableable": 1,
 1441             "tablecellstrip": 1,
 1442             "tabletitlerowinbold": 0,
 1443             "verbblocknotescaped": 1,
 1444             "keeplistindent": 1,
 1445             "listmaxdepth": 4,  # deflist is 6
 1446             "quotemaxdepth": 6,
 1447             "barinsidequote": 1,
 1448             "finalescapetitle": 1,
 1449             "autotocnewpageafter": 1,
 1450             "mapbar2pagebreak": 1,
 1451             "tablecellaligntype": "column",
 1452             "tablecellmulticol": 1,
 1453             "blanksaroundpara": 1,
 1454             "blanksaroundverb": 1,
 1455             # 'blanksaroundquote':1,
 1456             "blanksaroundlist": 1,
 1457             "blanksaroundnumlist": 1,
 1458             "blanksarounddeflist": 1,
 1459             "blanksaroundtable": 1,
 1460             "blanksaroundbar": 1,
 1461             "blanksaroundtitle": 1,
 1462             "blanksaroundnumtitle": 1,
 1463         },
 1464         "lout": {
 1465             "keepquoteindent": 1,
 1466             "deflisttextstrip": 1,
 1467             "escapeurl": 1,
 1468             "verbblocknotescaped": 1,
 1469             "imgalignable": 1,
 1470             "mapbar2pagebreak": 1,
 1471             "titleblocks": 1,
 1472             "autonumberlist": 1,
 1473             "parainsidelist": 1,
 1474             "blanksaroundpara": 1,
 1475             "blanksaroundverb": 1,
 1476             # 'blanksaroundquote':1,
 1477             "blanksaroundlist": 1,
 1478             "blanksaroundnumlist": 1,
 1479             "blanksarounddeflist": 1,
 1480             "blanksaroundtable": 1,
 1481             "blanksaroundbar": 1,
 1482             "blanksaroundtitle": 1,
 1483             "blanksaroundnumtitle": 1,
 1484         },
 1485         "moin": {
 1486             "spacedlistitem": 1,
 1487             "linkable": 1,
 1488             "keeplistindent": 1,
 1489             "tableable": 1,
 1490             "barinsidequote": 1,
 1491             "tabletitlerowinbold": 1,
 1492             "tablecellstrip": 1,
 1493             "autotocwithbars": 1,
 1494             "tablecellaligntype": "cell",
 1495             "deflisttextstrip": 1,
 1496             "blanksaroundpara": 1,
 1497             "blanksaroundverb": 1,
 1498             # 'blanksaroundquote':1,
 1499             "blanksaroundlist": 1,
 1500             "blanksaroundnumlist": 1,
 1501             "blanksarounddeflist": 1,
 1502             "blanksaroundtable": 1,
 1503             # 'blanksaroundbar':1,
 1504             "blanksaroundtitle": 1,
 1505             "blanksaroundnumtitle": 1,
 1506         },
 1507         "gwiki": {
 1508             "spacedlistitem": 1,
 1509             "linkable": 1,
 1510             "keeplistindent": 1,
 1511             "tableable": 1,
 1512             "tabletitlerowinbold": 1,
 1513             "tablecellstrip": 1,
 1514             "autonumberlist": 1,
 1515             "blanksaroundpara": 1,
 1516             "blanksaroundverb": 1,
 1517             # 'blanksaroundquote':1,
 1518             "blanksaroundlist": 1,
 1519             "blanksaroundnumlist": 1,
 1520             "blanksarounddeflist": 1,
 1521             "blanksaroundtable": 1,
 1522             # 'blanksaroundbar':1,
 1523             "blanksaroundtitle": 1,
 1524             "blanksaroundnumtitle": 1,
 1525         },
 1526         "adoc": {
 1527             "spacedlistitem": 1,
 1528             "linkable": 1,
 1529             "keeplistindent": 1,
 1530             "autonumberlist": 1,
 1531             "autonumbertitle": 1,
 1532             "listnotnested": 1,
 1533             "blanksaroundpara": 1,
 1534             "blanksaroundverb": 1,
 1535             "blanksaroundlist": 1,
 1536             "blanksaroundnumlist": 1,
 1537             "blanksarounddeflist": 1,
 1538             "blanksaroundtable": 1,
 1539             "blanksaroundtitle": 1,
 1540             "blanksaroundnumtitle": 1,
 1541         },
 1542         "doku": {
 1543             "indentverbblock": 1,  # DokuWiki uses '  ' to mark verb blocks
 1544             "spacedlistitem": 1,
 1545             "linkable": 1,
 1546             "keeplistindent": 1,
 1547             "tableable": 1,
 1548             "barinsidequote": 1,
 1549             "tablecellstrip": 1,
 1550             "autotocwithbars": 1,
 1551             "autonumberlist": 1,
 1552             "imgalignable": 1,
 1553             "tablecellaligntype": "cell",
 1554             "blanksaroundpara": 1,
 1555             "blanksaroundverb": 1,
 1556             # 'blanksaroundquote':1,
 1557             "blanksaroundlist": 1,
 1558             "blanksaroundnumlist": 1,
 1559             "blanksarounddeflist": 1,
 1560             "blanksaroundtable": 1,
 1561             "blanksaroundbar": 1,
 1562             "blanksaroundtitle": 1,
 1563             "blanksaroundnumtitle": 1,
 1564         },
 1565         "pmw": {
 1566             "indentverbblock": 1,
 1567             "spacedlistitem": 1,
 1568             "linkable": 1,
 1569             "labelbeforelink": 1,
 1570             # 'keeplistindent':1,
 1571             "tableable": 1,
 1572             "barinsidequote": 1,
 1573             "tablecellstrip": 1,
 1574             "autotocwithbars": 1,
 1575             "autonumberlist": 1,
 1576             "spacedlistitemopen": 1,
 1577             "spacednumlistitemopen": 1,
 1578             "imgalignable": 1,
 1579             "tabletitlerowinbold": 1,
 1580             "tablecellaligntype": "cell",
 1581             "blanksaroundpara": 1,
 1582             "blanksaroundverb": 1,
 1583             "blanksaroundquote": 1,
 1584             "blanksaroundlist": 1,
 1585             "blanksaroundnumlist": 1,
 1586             "blanksarounddeflist": 1,
 1587             "blanksaroundtable": 1,
 1588             "blanksaroundbar": 1,
 1589             "blanksaroundtitle": 1,
 1590             "blanksaroundnumtitle": 1,
 1591         },
 1592         "wiki": {
 1593             "linkable": 1,
 1594             "tableable": 1,
 1595             "tablecellstrip": 1,
 1596             "autotocwithbars": 1,
 1597             "spacedlistitemopen": 1,
 1598             "spacednumlistitemopen": 1,
 1599             "deflisttextstrip": 1,
 1600             "autonumberlist": 1,
 1601             "imgalignable": 1,
 1602             "blanksaroundpara": 1,
 1603             "blanksaroundverb": 1,
 1604             # 'blanksaroundquote':1,
 1605             "blanksaroundlist": 1,
 1606             "blanksaroundnumlist": 1,
 1607             "blanksarounddeflist": 1,
 1608             "blanksaroundtable": 1,
 1609             "blanksaroundbar": 1,
 1610             "blanksaroundtitle": 1,
 1611             "blanksaroundnumtitle": 1,
 1612         },
 1613         "man": {
 1614             "spacedlistitem": 1,
 1615             "tagnotindentable": 1,
 1616             "tableable": 1,
 1617             "tablecellaligntype": "column",
 1618             "tabletitlerowinbold": 1,
 1619             "tablecellstrip": 1,
 1620             "barinsidequote": 1,
 1621             "parainsidelist": 0,
 1622             "blanksaroundpara": 0,
 1623             "blanksaroundverb": 1,
 1624             # 'blanksaroundquote':1,
 1625             "blanksaroundlist": 1,
 1626             "blanksaroundnumlist": 1,
 1627             "blanksarounddeflist": 1,
 1628             "blanksaroundtable": 1,
 1629             # 'blanksaroundbar':1,
 1630             "blanksaroundtitle": 0,
 1631             "blanksaroundnumtitle": 1,
 1632         },
 1633         "creole": {
 1634             "linkable": 1,
 1635             "tableable": 1,
 1636             "imglinkable": 1,
 1637             "tablecellstrip": 1,
 1638             "autotocwithbars": 1,
 1639             "spacedlistitemopen": 1,
 1640             "spacednumlistitemopen": 1,
 1641             "deflisttextstrip": 1,
 1642             "verbblocknotescaped": 1,
 1643             "blanksaroundpara": 1,
 1644             "blanksaroundverb": 1,
 1645             "blanksaroundquote": 1,
 1646             "blanksaroundlist": 1,
 1647             "blanksaroundnumlist": 1,
 1648             "blanksarounddeflist": 1,
 1649             "blanksaroundtable": 1,
 1650             "blanksaroundbar": 1,
 1651             "blanksaroundtitle": 1,
 1652         },
 1653         "md": {
 1654             # "keeplistindent": 1,
 1655             "linkable": 1,
 1656             "labelbeforelink": 1,
 1657             "tableable": 1,
 1658             "imglinkable": 1,
 1659             "tablecellstrip": 1,
 1660             "autonumberlist": 1,
 1661             "spacedlistitemopen": 1,
 1662             "spacednumlistitemopen": 1,
 1663             "deflisttextstrip": 1,
 1664             "blanksaroundpara": 1,
 1665             "blanksaroundlist": 1,
 1666             "blanksaroundnumlist": 1,
 1667             # "blanksarounddeflist": 1,
 1668             "blanksaroundtable": 1,
 1669             "blanksaroundbar": 1,
 1670             "blanksaroundtitle": 1,
 1671         },
 1672     }
 1673     assert set(rules_bank) == set(TARGETS)
 1674 
 1675     for target, rules in rules_bank.items():
 1676         for rule in rules:
 1677             if rule not in allrules:
 1678                 raise AssertionError(
 1679                     "{} target has invalid rule {}".format(target, rule)
 1680                 )
 1681 
 1682     ret = collections.defaultdict(int)
 1683     ret.update(rules_bank[config["target"]])
 1684     return ret
 1685 
 1686 
 1687 ##############################################################################
 1688 
 1689 
 1690 def getRegexes():
 1691     "Returns all the regexes used to find the t2t marks"
 1692 
 1693     bank = {
 1694         "blockVerbOpen": re.compile(r"^```\s*$"),
 1695         "blockVerbClose": re.compile(r"^```\s*$"),
 1696         "blockRawOpen": re.compile(r'^"""\s*$'),
 1697         "blockRawClose": re.compile(r'^"""\s*$'),
 1698         "blockTaggedOpen": re.compile(r"^'''\s*$"),
 1699         "blockTaggedClose": re.compile(r"^'''\s*$"),
 1700         "blockCommentOpen": re.compile(r"^%%%\s*$"),
 1701         "blockCommentClose": re.compile(r"^%%%\s*$"),
 1702         "quote": re.compile(r"^\t+"),
 1703         "1lineVerb": re.compile(r"^``` (?=.)"),
 1704         "1lineRaw": re.compile(r'^""" (?=.)'),
 1705         "1lineTagged": re.compile(r"^''' (?=.)"),
 1706         # mono, raw, bold, italic, underline:
 1707         # - marks must be glued with the contents, no boundary spaces
 1708         # - they are greedy, so in ****bold****, turns to <b>**bold**</b>
 1709         "fontMono": re.compile(r"``([^\s](|.*?[^\s])`*)``"),
 1710         "raw": re.compile(r'""([^\s](|.*?[^\s])"*)""'),
 1711         "tagged": re.compile(r"''([^\s](|.*?[^\s])'*)''"),
 1712         "fontBold": re.compile(r"\*\*([^\s](|.*?[^\s])\**)\*\*"),
 1713         "fontItalic": re.compile(r"//([^\s](|.*?[^\s])/*)//"),
 1714         "fontUnderline": re.compile(r"__([^\s](|.*?[^\s])_*)__"),
 1715         "fontStrike": re.compile(r"--([^\s](|.*?[^\s])-*)--"),
 1716         "list": re.compile(r"^( *)(-) (?=[^ ])"),
 1717         "numlist": re.compile(r"^( *)(\+) (?=[^ ])"),
 1718         "deflist": re.compile(r"^( *)(:) (.*)$"),
 1719         "listclose": re.compile(r"^( *)([-+:])\s*$"),
 1720         "bar": re.compile(r"^(\s*)([_=-]{20,})\s*$"),
 1721         "table": re.compile(r"^ *\|([|_/])? "),
 1722         "blankline": re.compile(r"^\s*$"),
 1723         "comment": re.compile(r"^%"),
 1724         # Auxiliary tag regexes
 1725         "_imgAlign": re.compile(r"~A~", re.I),
 1726         "_tableAlign": re.compile(r"~A~", re.I),
 1727         "_anchor": re.compile(r"~A~", re.I),
 1728         "_tableBorder": re.compile(r"~B~", re.I),
 1729         "_tableColAlign": re.compile(r"~C~", re.I),
 1730         "_tableCellColSpan": re.compile(r"~S~", re.I),
 1731         "_tableCellAlign": re.compile(r"~A~", re.I),
 1732     }
 1733 
 1734     # Special char to place data on TAGs contents  (\a == bell)
 1735     bank["x"] = re.compile("\a")
 1736 
 1737     # Almost complicated title regexes ;)
 1738     titskel = r"^ *(?P<id>%s)(?P<txt>%s)\1(\[(?P<label>[\w-]*)\])?\s*$"
 1739     bank["title"] = re.compile(titskel % ("[=]{1,5}", "[^=](|.*[^=])"))
 1740     bank["numtitle"] = re.compile(titskel % ("[+]{1,5}", "[^+](|.*[^+])"))
 1741 
 1742     # Complicated regexes begin here ;)
 1743     #
 1744     # Textual descriptions on --help's style: [...] is optional, | is OR
 1745 
 1746     # First, some auxiliary variables
 1747     #
 1748 
 1749     # [image.EXT]
 1750     patt_img = r"\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp|svg))\]"
 1751 
 1752     # Link things
 1753     # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
 1754     # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@
 1755     # Recomended order: scheme://user:pass@domain/path?query=foo#anchor
 1756     # Also works      : scheme://user:pass@domain/path#anchor?query=foo
 1757     # TODO form: !'():
 1758     urlskel = {
 1759         "proto": r"(https?|ftp|news|telnet|gopher|wais)://",
 1760         "guess": r"(www[23]?|ftp)\.",  # w/out proto, try to guess
 1761         "login": r"A-Za-z0-9_.-",  # for ftp://login@domain.com
 1762         "pass": r"[^ @]*",  # for ftp://login:pass@dom.com
 1763         "chars": r"A-Za-z0-9%._/~:,=$@&+-",  # %20(space), :80(port), D&D
 1764         "anchor": r"A-Za-z0-9%._-",  # %nn(encoded)
 1765         "form": r"A-Za-z0-9/%&=+:;.,$@*_-",  # .,@*_-(as is)
 1766         "punct": r".,;:!?",
 1767     }
 1768 
 1769     # username [ :password ] @
 1770     patt_url_login = r"([{}]+(:{})?@)?".format(urlskel["login"], urlskel["pass"])
 1771 
 1772     # [ http:// ] [ username:password@ ] domain.com [ / ]
 1773     #     [ #anchor | ?form=data ]
 1774     retxt_url = r"\b({}{}|{})[{}]+\b/*(\?[{}]+)?(#[{}]*)?".format(
 1775         urlskel["proto"],
 1776         patt_url_login,
 1777         urlskel["guess"],
 1778         urlskel["chars"],
 1779         urlskel["form"],
 1780         urlskel["anchor"],
 1781     )
 1782 
 1783     # filename | [ filename ] #anchor
 1784     retxt_url_local = r"[{}]+|[{}]*(#[{}]*)".format(
 1785         urlskel["chars"], urlskel["chars"], urlskel["anchor"]
 1786     )
 1787 
 1788     # user@domain [ ?form=data ]
 1789     patt_email = r"\b[{}]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{{2,4}}\b(\?[{}]+)?".format(
 1790         urlskel["login"], urlskel["form"]
 1791     )
 1792 
 1793     # Saving for future use
 1794     bank["_urlskel"] = urlskel
 1795 
 1796     # And now the real regexes
 1797 
 1798     bank["email"] = re.compile(patt_email, re.I)
 1799 
 1800     # email | url
 1801     bank["link"] = re.compile(r"{}|{}".format(retxt_url, patt_email), re.I)
 1802 
 1803     # \[ label | imagetag    url | email | filename \]
 1804     bank["linkmark"] = re.compile(
 1805         r"\[(?P<label>%s|[^]]+) (?P<link>%s|%s|%s)\]"
 1806         % (patt_img, retxt_url, patt_email, retxt_url_local),
 1807         re.I,
 1808     )
 1809 
 1810     # Image
 1811     bank["img"] = re.compile(patt_img, re.I)
 1812 
 1813     # Special things
 1814     bank["special"] = re.compile(r"^%!\s*")
 1815     return bank
 1816 
 1817 
 1818 # END OF regex nightmares
 1819 
 1820 
 1821 class error(Exception):
 1822     pass
 1823 
 1824 
 1825 def Quit(msg=""):
 1826     if msg:
 1827         print(msg)
 1828     sys.exit(0)
 1829 
 1830 
 1831 def Error(msg):
 1832     msg = "%s: Error: " % my_name + msg
 1833     raise error(msg)
 1834 
 1835 
 1836 def getTraceback():
 1837     try:
 1838         from traceback import format_exception
 1839 
 1840         etype, value, tb = sys.exc_info()
 1841         return "".join(format_exception(etype, value, tb))
 1842     except Exception:
 1843         pass
 1844 
 1845 
 1846 def getUnknownErrorMessage():
 1847     msg = "{}\n{} ({}):\n\n{}".format(
 1848         "Sorry! Txt2tags aborted by an unknown error.",
 1849         "Please send the following Error Traceback to the author",
 1850         my_email,
 1851         getTraceback(),
 1852     )
 1853     return msg
 1854 
 1855 
 1856 def Message(msg, level):
 1857     if level <= VERBOSE and not QUIET:
 1858         prefix = "-" * 5
 1859         print("{} {}".format(prefix * level, msg))
 1860 
 1861 
 1862 def Debug(msg, id_=0, linenr=None):
 1863     """Show debug messages, categorized."""
 1864     if QUIET or not DEBUG:
 1865         return
 1866     ids = ["INI", "CFG", "SRC", "BLK", "HLD", "GUI", "OUT", "DET"]
 1867     if linenr is not None:
 1868         msg = "LINE %04d: %s" % (linenr, msg)
 1869     print("++ {}: {}".format(ids[id_], msg))
 1870 
 1871 
 1872 def Readfile(file_path):
 1873     if file_path == "-":
 1874         try:
 1875             contents = sys.stdin.read()
 1876         except KeyboardInterrupt:
 1877             Error("You must feed me with data on STDIN!")
 1878     else:
 1879         try:
 1880             with io.open(file_path, encoding=ENCODING) as f:
 1881                 contents = f.read()
 1882         except IOError as exception:
 1883             Error("Cannot read file: {}\n{}".format(file_path, exception))
 1884     lines = contents.splitlines()
 1885     Message("File read (%d lines): %s" % (len(lines), file_path), 2)
 1886     return lines
 1887 
 1888 
 1889 def Savefile(file_path, lines):
 1890     contents = "\n".join(lines) + "\n"
 1891     try:
 1892         with io.open(file_path, "w", encoding=ENCODING) as f:
 1893             try:
 1894                 f.write(contents)
 1895             except TypeError:
 1896                 f.write(contents.decode(ENCODING))
 1897     except IOError as exception:
 1898         Error("Cannot open file for writing: {}\n{}".format(file_path, exception))
 1899 
 1900 
 1901 def dotted_spaces(txt=""):
 1902     return txt.replace(" ", ".")
 1903 
 1904 
 1905 # TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html
 1906 def get_rc_path():
 1907     "Return the full path for the users' RC file"
 1908     # Try to get the path from an env var. if yes, we're done
 1909     user_defined = os.environ.get("T2TCONFIG")
 1910     if user_defined:
 1911         return user_defined
 1912     # Env var not found, so perform automatic path composing
 1913     # Set default filename according system platform
 1914     rc_names = {"default": ".txt2tagsrc", "win": "_t2trc"}
 1915     rc_file = rc_names.get(sys.platform[:3]) or rc_names["default"]
 1916     # The file must be on the user directory, but where is this dir?
 1917     rc_dir_search = ["HOME", "HOMEPATH"]
 1918     for var in rc_dir_search:
 1919         rc_dir = os.environ.get(var)
 1920         if rc_dir:
 1921             break
 1922     # rc dir found, now we must join dir+file to compose the full path
 1923     if rc_dir:
 1924         # Compose path and return it if the file exists
 1925         rc_path = os.path.join(rc_dir, rc_file)
 1926         # On windows, prefix with the drive (%homedrive%: 2k/XP/NT)
 1927         if sys.platform.startswith("win"):
 1928             rc_drive = os.environ.get("HOMEDRIVE")
 1929             rc_path = os.path.join(rc_drive, rc_path)
 1930         return rc_path
 1931     # Sorry, not found
 1932     return ""
 1933 
 1934 
 1935 ##############################################################################
 1936 
 1937 
 1938 class CommandLine:
 1939     """
 1940     Command Line class - Masters command line
 1941 
 1942     This class checks and extract data from the provided command line.
 1943     The --long options and flags are taken from the global OPTIONS,
 1944     FLAGS and ACTIONS dictionaries. The short options are registered
 1945     here, and also their equivalence to the long ones.
 1946 
 1947     _compose_short_opts() -> str
 1948     _compose_long_opts() -> list
 1949             Compose the valid short and long options list, on the
 1950             'getopt' format.
 1951 
 1952     parse() -> (opts, args)
 1953             Call getopt to check and parse the command line.
 1954             It expects to receive the command line as a list, and
 1955             without the program name (sys.argv[1:]).
 1956 
 1957     get_raw_config() -> [RAW config]
 1958             Scans command line and convert the data to the RAW config
 1959             format. See ConfigMaster class to the RAW format description.
 1960             Optional 'ignore' and 'filter_' arguments are used to filter
 1961             in or out specified keys.
 1962 
 1963     The get_raw_config() calls parse(), so the typical use of this
 1964     class is:
 1965 
 1966         raw = CommandLine().get_raw_config(sys.argv[1:])
 1967     """
 1968 
 1969     def __init__(self):
 1970         self.all_options = list(OPTIONS.keys())
 1971         self.all_flags = list(FLAGS.keys())
 1972         self.all_actions = list(ACTIONS.keys())
 1973 
 1974         # short:long options equivalence
 1975         self.short_long = {
 1976             "C": "config-file",
 1977             "h": "help",
 1978             "H": "no-headers",
 1979             "i": "infile",
 1980             "n": "enum-title",
 1981             "o": "outfile",
 1982             "q": "quiet",
 1983             "t": "target",
 1984             "v": "verbose",
 1985             "V": "version",
 1986         }
 1987 
 1988         # Compose valid short and long options data for getopt
 1989         self.short_opts = self._compose_short_opts()
 1990         self.long_opts = self._compose_long_opts()
 1991 
 1992     def _compose_short_opts(self):
 1993         "Returns a string like 'hVt:o' with all short options/flags"
 1994         ret = []
 1995         for opt in self.short_long.keys():
 1996             long_ = self.short_long[opt]
 1997             if long_ in self.all_options:  # is flag or option?
 1998                 opt = opt + ":"  # option: have param
 1999             ret.append(opt)
 2000         # Debug('Valid SHORT options: %s'%ret)
 2001         return "".join(ret)
 2002 
 2003     def _compose_long_opts(self):
 2004         "Returns a list with all the valid long options/flags"
 2005         ret = [x + "=" for x in self.all_options]  # add =
 2006         ret.extend(self.all_flags)  # flag ON
 2007         ret.extend(self.all_actions)  # actions
 2008         ret.extend(["no-" + x for x in self.all_flags])  # add no-*
 2009         ret.extend(["no-style"])  # turn OFF
 2010         ret.extend(["no-outfile", "no-infile"])  # turn OFF
 2011         ret.extend(["no-targets"])  # turn OFF
 2012         # Debug('Valid LONG options: %s'%ret)
 2013         return ret
 2014 
 2015     def _tokenize(self, cmd_string=""):
 2016         "Convert a command line string to a list"
 2017         # TODO protect quotes contents -- Don't use it, pass cmdline as list
 2018         return cmd_string.split()
 2019 
 2020     def parse(self, cmdline):
 2021         "Check/Parse a command line list     TIP: no program name!"
 2022         # Get the valid options
 2023         short, long_ = self.short_opts, self.long_opts
 2024         # Parse it!
 2025         try:
 2026             opts, args = getopt.getopt(cmdline, short, long_)
 2027         except getopt.error as errmsg:
 2028             Error("%s (try --help)" % errmsg)
 2029         return (opts, args)
 2030 
 2031     def get_raw_config(self, cmdline=None, ignore=None, filter_=None, relative=False):
 2032         "Returns the options/arguments found as RAW config"
 2033 
 2034         if not cmdline:
 2035             return []
 2036         ignore = ignore or []
 2037         filter_ = filter_ or []
 2038 
 2039         ret = []
 2040 
 2041         # We need lists, not strings (such as from %!options)
 2042         if not isinstance(cmdline, list):
 2043             cmdline = self._tokenize(cmdline)
 2044 
 2045         # Extract name/value pair of all configs, check for invalid names
 2046         options, arguments = self.parse(cmdline[:])
 2047 
 2048         # Some cleanup on the raw config
 2049         for name, value in options:
 2050 
 2051             # Remove leading - and --
 2052             name = re.sub("^--?", "", name)
 2053 
 2054             # Translate short option to long
 2055             if len(name) == 1:
 2056                 name = self.short_long[name]
 2057 
 2058             # Outfile exception: path relative to PWD
 2059             if name == "outfile" and relative and value not in [STDOUT, MODULEOUT]:
 2060                 value = os.path.abspath(value)
 2061 
 2062             # -C, --config-file inclusion, path relative to PWD
 2063             if name == "config-file":
 2064                 ret.extend(ConfigLines().include_config_file(value))
 2065                 continue
 2066 
 2067             # Save this config
 2068             ret.append(["all", name, value])
 2069 
 2070         # All configuration was read and saved
 2071 
 2072         # Get infile, if any
 2073         while arguments:
 2074             infile = arguments.pop(0)
 2075             ret.append(["all", "infile", infile])
 2076 
 2077         # Apply 'ignore' and 'filter_' rules (filter_ is stronger)
 2078         if ignore or filter_:
 2079             filtered = []
 2080             for target, name, value in ret:
 2081                 if (filter_ and name in filter_) or (ignore and name not in ignore):
 2082                     filtered.append([target, name, value])
 2083             ret = filtered[:]
 2084 
 2085         return ret
 2086 
 2087 
 2088 ##############################################################################
 2089 
 2090 
 2091 class SourceDocument:
 2092     """
 2093     SourceDocument class - scan document structure, extract data
 2094 
 2095     It knows about full files. It reads a file and identify all
 2096     the areas beginning (Head,Conf,Body). With this info it can
 2097     extract each area contents.
 2098     Note: the original line break is removed.
 2099 
 2100     DATA:
 2101       self.arearef - Save Head, Conf, Body init line number
 2102       self.areas   - Store the area names which are not empty
 2103       self.buffer  - The full file contents (with NO \\r, \\n)
 2104 
 2105     METHODS:
 2106       get()   - Access the contents of an Area. Example:
 2107                 config = SourceDocument(file).get('conf')
 2108 
 2109       split() - Get all the document Areas at once. Example:
 2110                 head, conf, body = SourceDocument(file).split()
 2111 
 2112     RULES:
 2113         * The document parts are sequential: Head, Conf and Body.
 2114         * One ends when the next begins.
 2115         * The Conf Area is optional, so a document can have just
 2116           Head and Body Areas.
 2117 
 2118         These are the Areas limits:
 2119           - Head Area: the first three lines
 2120           - Body Area: from the first valid text line to the end
 2121           - Conf Area: the comments between Head and Body Areas
 2122 
 2123         Exception: If the first line is blank, this means no
 2124         header info, so the Head Area is just the first line.
 2125     """
 2126 
 2127     def __init__(self, filename="", contents=None):
 2128         self.areas = ["head", "conf", "body"]
 2129         self.arearef = []
 2130         self.areas_fancy = ""
 2131         self.filename = filename
 2132         self.buffer = []
 2133         if filename:
 2134             self.scan_file(filename)
 2135         elif contents:
 2136             self.scan(contents)
 2137 
 2138     def split(self):
 2139         "Returns all document parts, splitted into lists."
 2140         return self.get("head"), self.get("conf"), self.get("body")
 2141 
 2142     def get(self, areaname):
 2143         "Returns head|conf|body contents from self.buffer"
 2144         # Sanity
 2145         if areaname not in self.areas:
 2146             return []
 2147         if not self.buffer:
 2148             return []
 2149         # Go get it
 2150         bufini = 1
 2151         bufend = len(self.buffer)
 2152         if areaname == "head":
 2153             ini = bufini
 2154             end = self.arearef[1] or self.arearef[2] or bufend
 2155         elif areaname == "conf":
 2156             ini = self.arearef[1]
 2157             end = self.arearef[2] or bufend
 2158         elif areaname == "body":
 2159             ini = self.arearef[2]
 2160             end = bufend
 2161         else:
 2162             Error("Unknown Area name '%s'" % areaname)
 2163         lines = self.buffer[ini:end]
 2164         # Make sure head will always have 3 lines
 2165         while areaname == "head" and len(lines) < 3:
 2166             lines.append("")
 2167         return lines
 2168 
 2169     def scan_file(self, filename):
 2170         Debug("source file: %s" % filename)
 2171         Message("Loading source document", 1)
 2172         buf = Readfile(filename)
 2173         self.scan(buf)
 2174 
 2175     def scan(self, lines):
 2176         "Run through source file and identify head/conf/body areas"
 2177         buf = lines
 2178         if len(buf) == 0:
 2179             Error("The input file is empty: %s" % self.filename)
 2180         cfg_parser = ConfigLines().parse_line
 2181         buf.insert(0, "")  # text start at pos 1
 2182         ref = [1, 4, 0]
 2183         if not buf[1].strip():  # no header
 2184             ref[0] = 0
 2185             ref[1] = 2
 2186         rgx = getRegexes()
 2187         on_comment_block = 0
 2188         for i in range(ref[1], len(buf)):  # find body init:
 2189             # Handle comment blocks inside config area
 2190             if not on_comment_block and rgx["blockCommentOpen"].search(buf[i]):
 2191                 on_comment_block = 1
 2192                 continue
 2193             if on_comment_block and rgx["blockCommentOpen"].search(buf[i]):
 2194                 on_comment_block = 0
 2195                 continue
 2196             if on_comment_block:
 2197                 continue
 2198 
 2199             if buf[i].strip() and (
 2200                 buf[i][0] != "%" or cfg_parser(buf[i], "include")[1]
 2201             ):
 2202                 ref[2] = i
 2203                 break
 2204         if ref[1] == ref[2]:
 2205             ref[1] = 0  # no conf area
 2206         for i in 0, 1, 2:  # del !existent
 2207             if ref[i] >= len(buf):
 2208                 ref[i] = 0  # title-only
 2209             if not ref[i]:
 2210                 self.areas[i] = ""
 2211         Debug("Head,Conf,Body start line: %s" % ref)
 2212         self.arearef = ref  # save results
 2213         self.buffer = buf
 2214         # Fancyness sample: head conf body (1 4 8)
 2215         self.areas_fancy = "{} ({})".format(
 2216             " ".join(self.areas), " ".join(str(x or "") for x in ref)
 2217         )
 2218         Message("Areas found: %s" % self.areas_fancy, 2)
 2219 
 2220     def get_raw_config(self):
 2221         "Handy method to get the CONF area RAW config (if any)"
 2222         if not self.areas.count("conf"):
 2223             return []
 2224         Message("Scanning source document CONF area", 1)
 2225         raw = ConfigLines(
 2226             file_=self.filename, lines=self.get("conf"), first_line=self.arearef[1]
 2227         ).get_raw_config()
 2228         Debug("document raw config: %s" % raw, 1)
 2229         return raw
 2230 
 2231 
 2232 ##############################################################################
 2233 
 2234 
 2235 class ConfigMaster:
 2236     """
 2237     ConfigMaster class - the configuration wizard
 2238 
 2239     This class is the configuration master. It knows how to handle
 2240     the RAW and PARSED config format. It also performs the sanity
 2241     checking for a given configuration.
 2242 
 2243     DATA:
 2244       self.raw         - Stores the config on the RAW format
 2245       self.parsed      - Stores the config on the PARSED format
 2246       self.defaults    - Stores the default values for all keys
 2247       self.off         - Stores the OFF values for all keys
 2248       self.multi       - List of keys which can have multiple values
 2249       self.incremental - List of keys which are incremental
 2250 
 2251     RAW FORMAT:
 2252       The RAW format is a list of lists, being each mother list item
 2253       a full configuration entry. Any entry is a 3 item list, on
 2254       the following format: [ TARGET, KEY, VALUE ]
 2255       Being a list, the order is preserved, so it's easy to use
 2256       different kinds of configs, as CONF area and command line,
 2257       respecting the precedence.
 2258       The special target 'all' is used when no specific target was
 2259       defined on the original config.
 2260 
 2261     PARSED FORMAT:
 2262       The PARSED format is a dictionary, with all the 'key : value'
 2263       found by reading the RAW config. The self.target contents
 2264       matters, so this dictionary only contains the target's
 2265       config. The configs of other targets are ignored.
 2266 
 2267     The CommandLine and ConfigLines classes have the get_raw_config()
 2268     method which convert the configuration found to the RAW format.
 2269     Just feed it to parse() and get a brand-new ready-to-use config
 2270     dictionary. Example:
 2271 
 2272         >>> raw = CommandLine().get_raw_config(['-n', '-H'])
 2273         >>> print raw
 2274         [['all', 'enum-title', ''], ['all', 'no-headers', '']]
 2275         >>> parsed = ConfigMaster(raw).parse()
 2276         >>> print parsed
 2277         {'enum-title': 1, 'headers': 0}
 2278     """
 2279 
 2280     def __init__(self, raw=None, target=""):
 2281         self.raw = raw or []
 2282         self.target = target
 2283         self.parsed = {}
 2284         self.dft_options = OPTIONS.copy()
 2285         self.dft_flags = FLAGS.copy()
 2286         self.dft_actions = ACTIONS.copy()
 2287         self.defaults = self._get_defaults()
 2288         self.off = self._get_off()
 2289         self.incremental = ["verbose"]
 2290         self.multi = ["infile", "preproc", "postproc", "options", "style"]
 2291 
 2292     def _get_defaults(self):
 2293         "Get the default values for all config/options/flags"
 2294         empty = {}
 2295         for kw in CONFIG_KEYWORDS:
 2296             empty[kw] = ""
 2297         empty.update(self.dft_options)
 2298         empty.update(self.dft_flags)
 2299         empty.update(self.dft_actions)
 2300         empty["sourcefile"] = ""  # internal use only
 2301         return empty
 2302 
 2303     def _get_off(self):
 2304         "Turns OFF all the config/options/flags"
 2305         off = {}
 2306         for key in self.defaults.keys():
 2307             kind = type(self.defaults[key])
 2308             if kind == int:
 2309                 off[key] = 0
 2310             elif kind == str:
 2311                 off[key] = ""
 2312             elif kind == list:
 2313                 off[key] = []
 2314             else:
 2315                 Error("ConfigMaster: %s: Unknown type" % key)
 2316         return off
 2317 
 2318     def _check_target(self):
 2319         "Checks if the target is already defined. If not, do it"
 2320         if not self.target:
 2321             self.target = self.find_value("target")
 2322 
 2323     def get_target_raw(self):
 2324         "Returns the raw config for self.target or 'all'"
 2325         ret = []
 2326         self._check_target()
 2327         for entry in self.raw:
 2328             if entry[0] == self.target or entry[0] == "all":
 2329                 ret.append(entry)
 2330         return ret
 2331 
 2332     def add(self, key, val):
 2333         "Adds the key:value pair to the config dictionary (if needed)"
 2334         # %!options
 2335         if key == "options":
 2336             ignoreme = list(self.dft_actions.keys()) + ["target"]
 2337             ignoreme.remove("targets")
 2338             raw_opts = CommandLine().get_raw_config(val, ignore=ignoreme)
 2339             for _target, key, val in raw_opts:
 2340                 self.add(key, val)
 2341             return
 2342         # The no- prefix turns OFF this key
 2343         if key.startswith("no-"):
 2344             key = key[3:]  # remove prefix
 2345             val = self.off.get(key)  # turn key OFF
 2346         # Is this key valid?
 2347         if key not in self.defaults.keys():
 2348             Debug("Bogus Config {}:{}".format(key, val), 1)
 2349             return
 2350         # Is this value the default one?
 2351         if val == self.defaults.get(key):
 2352             # If default value, remove previous key:val
 2353             if key in self.parsed:
 2354                 del self.parsed[key]
 2355             # Nothing more to do
 2356             return
 2357         # Flags ON comes empty. we'll add the 1 value now
 2358         if val == "" and (
 2359             key in self.dft_flags.keys() or key in self.dft_actions.keys()
 2360         ):
 2361             val = 1
 2362         # Multi value or single?
 2363         if key in self.multi:
 2364             # First one? start new list
 2365             if key not in self.parsed:
 2366                 self.parsed[key] = []
 2367             self.parsed[key].append(val)
 2368         # Incremental value? so let's add it
 2369         elif key in self.incremental:
 2370             self.parsed[key] = (self.parsed.get(key) or 0) + val
 2371         else:
 2372             self.parsed[key] = val
 2373         fancykey = dotted_spaces("%12s" % key)
 2374         Message("Added config {} : {}".format(fancykey, val), 3)
 2375 
 2376     def get_outfile_name(self, config):
 2377         "Dirname is the same for {in,out}file"
 2378         infile, outfile = config["sourcefile"], config["outfile"]
 2379         if (
 2380             outfile
 2381             and outfile not in (STDOUT, MODULEOUT)
 2382             and not os.path.isabs(outfile)
 2383         ):
 2384             outfile = os.path.join(os.path.dirname(infile), outfile)
 2385         if infile == STDIN and not outfile:
 2386             outfile = STDOUT
 2387         if infile == MODULEIN and not outfile:
 2388             outfile = MODULEOUT
 2389         if not outfile and (infile and config.get("target")):
 2390             basename = re.sub(r"\.(txt|t2t)$", "", infile)
 2391             outfile = "{}.{}".format(basename, config["target"])
 2392         Debug(" infile: '%s'" % infile, 1)
 2393         Debug("outfile: '%s'" % outfile, 1)
 2394         return outfile
 2395 
 2396     def sanity(self, config):
 2397         "Basic config sanity checking"
 2398         if not config:
 2399             return {}
 2400         target = config.get("target")
 2401         # Some actions don't require target specification
 2402         if not target:
 2403             for action in NO_TARGET:
 2404                 if config.get(action):
 2405                     target = "txt"
 2406                     break
 2407 
 2408         # We *need* a target
 2409         if not target:
 2410             Error(
 2411                 "No target specified (try --help)."
 2412                 + "\n\n"
 2413                 + "Please select a target using the -t option or the %!target command."
 2414                 + "\n"
 2415                 + "Example:"
 2416                 + " {} -t html {}".format(my_name, "file.t2t")
 2417                 + "\n\n"
 2418                 + "Run 'txt2tags --targets' to see all available targets."
 2419             )
 2420         # And of course, an infile also
 2421         if "infile" not in config:
 2422             Error("Missing input file (try --help)")
 2423         # Is the target valid?
 2424         if not TARGETS.count(target):
 2425             Error(
 2426                 "Invalid target '%s'" % target
 2427                 + "\n\n"
 2428                 + "Run 'txt2tags --targets' to see all the available targets."
 2429             )
 2430         # Ensure all keys are present
 2431         empty = self.defaults.copy()
 2432         empty.update(config)
 2433         config = empty.copy()
 2434         # Restore target
 2435         config["target"] = target
 2436         # Set output file name
 2437         config["outfile"] = self.get_outfile_name(config)
 2438         # Checking suicide
 2439         if os.path.abspath(config["sourcefile"]) == os.path.abspath(
 2440             config["outfile"]
 2441         ) and config["outfile"] not in [STDOUT, MODULEOUT]:
 2442             Error("Input and Output files are the same: %s" % config["outfile"])
 2443         return config
 2444 
 2445     def parse(self):
 2446         "Returns the parsed config for the current target"
 2447         raw = self.get_target_raw()
 2448         for _target, key, value in raw:
 2449             self.add(key, value)
 2450         Message("Added the following keys: %s" % ", ".join(sorted(self.parsed)), 2)
 2451         return self.parsed.copy()
 2452 
 2453     def find_value(self, key="", target=""):
 2454         "Scans ALL raw config to find the desired key"
 2455         ret = []
 2456         # Scan and save all values found
 2457         for targ, k, val in self.raw:
 2458             if k == key and (targ == target or targ == "all"):
 2459                 ret.append(val)
 2460         if not ret:
 2461             return ""
 2462         # If not multi value, return only the last found
 2463         if key in self.multi:
 2464             return ret
 2465         else:
 2466             return ret[-1]
 2467 
 2468 
 2469 ########################################################################
 2470 
 2471 
 2472 class ConfigLines:
 2473     """
 2474     ConfigLines class - the config file data extractor
 2475 
 2476     This class reads and parse the config lines on the %!key:val
 2477     format, converting it to RAW config. It deals with user
 2478     config file (RC file), source document CONF area and
 2479     %!includeconf directives.
 2480 
 2481     Call it passing a file name or feed the desired config lines.
 2482     Then just call the get_raw_config() method and wait to
 2483     receive the full config data on the RAW format. This method
 2484     also follows the possible %!includeconf directives found on
 2485     the config lines. Example:
 2486 
 2487             raw = ConfigLines(file=".txt2tagsrc").get_raw_config()
 2488 
 2489     The parse_line() method is also useful to be used alone,
 2490     to identify and tokenize a single config line. For example,
 2491     to get the %!include command components, on the source
 2492     document BODY:
 2493 
 2494             target, key, value = ConfigLines().parse_line(body_line)
 2495     """
 2496 
 2497     def __init__(self, file_="", lines=None, first_line=1):
 2498         self.file = file_ or "NOFILE"
 2499         self.lines = lines or []
 2500         self.first_line = first_line
 2501 
 2502     def load_lines(self):
 2503         "Make sure we've loaded the file contents into buffer"
 2504         if not self.lines and not self.file:
 2505             Error("ConfigLines: No file or lines provided")
 2506         if not self.lines:
 2507             self.lines = self.read_config_file(self.file)
 2508 
 2509     def read_config_file(self, filename=""):
 2510         "Read a Config File contents, aborting on invalid line"
 2511         if not filename:
 2512             return []
 2513         errormsg = "Invalid CONFIG line on %s" + "\n%03d:%s"
 2514         lines = Readfile(filename)
 2515         # Sanity: try to find invalid config lines
 2516         for i in range(len(lines)):
 2517             line = lines[i].rstrip()
 2518             if not line:
 2519                 continue  # empty
 2520             if line[0] != "%":
 2521                 Error(errormsg % (filename, i + 1, line))
 2522         return lines
 2523 
 2524     def include_config_file(self, file_=""):
 2525         "Perform the %!includeconf action, returning RAW config"
 2526         if not file_:
 2527             return []
 2528         # Current dir relative to the current file (self.file)
 2529         current_dir = os.path.dirname(self.file)
 2530         file_ = os.path.join(current_dir, file_)
 2531         # Read and parse included config file contents
 2532         lines = self.read_config_file(file_)
 2533         return ConfigLines(file_=file_, lines=lines).get_raw_config()
 2534 
 2535     def get_raw_config(self):
 2536         "Scan buffer and extract all config as RAW (including includes)"
 2537         ret = []
 2538         self.load_lines()
 2539         first = self.first_line
 2540         for i in range(len(self.lines)):
 2541             line = self.lines[i]
 2542             Message("Processing line %03d: %s" % (first + i, line), 2)
 2543             target, key, val = self.parse_line(line)
 2544             if not key:
 2545                 continue  # no config on this line
 2546             if key == "includeconf":
 2547                 err = "A file cannot include itself (loop!)"
 2548                 if val == self.file:
 2549                     Error("{}: %!includeconf: {}".format(err, self.file))
 2550                 more_raw = self.include_config_file(val)
 2551                 ret.extend(more_raw)
 2552                 Message("Finished Config file inclusion: %s" % val, 2)
 2553             else:
 2554                 ret.append([target, key, val])
 2555                 Message("Added %s" % key, 3)
 2556         return ret
 2557 
 2558     def parse_line(self, line="", keyname="", target=""):
 2559         "Detects %!key:val config lines and extract data from it"
 2560         empty = ["", "", ""]
 2561         if not line:
 2562             return empty
 2563         no_target = ["target", "includeconf"]
 2564         re_name = keyname or "[a-z]+"
 2565         re_target = target or "[a-z]*"
 2566         # XXX TODO <value>\S.+?  requires TWO chars, breaks %!include:a
 2567         cfgregex = re.compile(
 2568             r"""
 2569                 ^%%!\s*               # leading id with opt spaces
 2570                 (?P<name>%s)\s*       # config name
 2571                 (\((?P<target>%s)\))? # optional target spec inside ()
 2572                 \s*:\s*               # key:value delimiter with opt spaces
 2573                 (?P<value>\S.+?)      # config value
 2574                 \s*$                  # rstrip() spaces and hit EOL
 2575                 """
 2576             % (re_name, re_target),
 2577             re.I + re.VERBOSE,
 2578         )
 2579         prepostregex = re.compile(
 2580             r"""
 2581                                       # ---[ PATTERN ]---
 2582                 ^( "([^"]*)"          # "double quoted" or
 2583                 | '([^']*)'           # 'single quoted' or
 2584                 | ([^\s]+)            # single_word
 2585                 )
 2586                 \s+                   # separated by spaces
 2587 
 2588                                       # ---[ REPLACE ]---
 2589                 ( "([^"]*)"           # "double quoted" or
 2590                 | '([^']*)'           # 'single quoted' or
 2591                 | (.*)                # anything
 2592                 )
 2593                 \s*$
 2594                 """,
 2595             re.VERBOSE,
 2596         )
 2597 
 2598         # Give me a match or get out
 2599         match = cfgregex.match(line)
 2600         if not match:
 2601             return empty
 2602 
 2603         # Save information about this config
 2604         name = (match.group("name") or "").lower()
 2605         target = (match.group("target") or "all").lower()
 2606         value = match.group("value")
 2607 
 2608         # %!keyword(target) not allowed for these
 2609         if name in no_target and match.group("target"):
 2610             Error("You can't use (target) with %s" % ("%!" + name) + "\n%s" % line)
 2611 
 2612         # Force no_target keywords to be valid for all targets
 2613         if name in no_target:
 2614             target = "all"
 2615 
 2616         # Special config with two quoted values (%!preproc: "foo" 'bar')
 2617         if name == "preproc" or name == "postproc":
 2618             valmatch = prepostregex.search(value)
 2619             if not valmatch:
 2620                 return empty
 2621             getval = valmatch.group
 2622             patt = getval(2) or getval(3) or getval(4) or ""
 2623             repl = getval(6) or getval(7) or getval(8) or ""
 2624             value = (patt, repl)
 2625         return [target, name, value]
 2626 
 2627 
 2628 ##############################################################################
 2629 
 2630 
 2631 class MaskMaster:
 2632     "(Un)Protect important structures from escaping and formatting"
 2633 
 2634     def __init__(self):
 2635         self.linkmask = "vvvLINKvvv"
 2636         self.monomask = "vvvMONOvvv"
 2637         self.rawmask = "vvvRAWvvv"
 2638         self.taggedmask = "vvvTAGGEDvvv"
 2639         self.reset()
 2640 
 2641     def reset(self):
 2642         self.linkbank = []
 2643         self.monobank = []
 2644         self.rawbank = []
 2645         self.taggedbank = []
 2646 
 2647     def mask(self, line=""):
 2648         # The verbatim, raw and tagged inline marks are mutually exclusive.
 2649         # This means that one can't appear inside the other.
 2650         # If found, the inner marks must be ignored.
 2651         # Example: ``foo ""bar"" ''baz''``
 2652         # In HTML: <code>foo ""bar"" ''baz''</code>
 2653         #
 2654         # The trick here is to protect the mark who appears first on the line.
 2655         # The three regexes are tried and the one with the lowest index wins.
 2656         # If none is found (else), we get out of the loop.
 2657         #
 2658         while True:
 2659             try:
 2660                 t = regex["tagged"].search(line).start()
 2661             except Exception:
 2662                 t = -1
 2663 
 2664             try:
 2665                 r = regex["raw"].search(line).start()
 2666             except Exception:
 2667                 r = -1
 2668 
 2669             try:
 2670                 v = regex["fontMono"].search(line).start()
 2671             except Exception:
 2672                 v = -1
 2673 
 2674             # Protect tagged text
 2675             if t >= 0 and (r == -1 or t < r) and (v == -1 or t < v):
 2676                 txt = regex["tagged"].search(line).group(1)
 2677                 if TARGET == "tex":
 2678                     txt = txt.replace("_", "vvvUnderscoreInTaggedTextvvv")
 2679                 self.taggedbank.append(txt)
 2680                 line = regex["tagged"].sub(self.taggedmask, line, 1)
 2681 
 2682             # Protect raw text
 2683             elif r >= 0 and (t == -1 or r < t) and (v == -1 or r < v):
 2684                 txt = regex["raw"].search(line).group(1)
 2685                 txt = doEscape(TARGET, txt)
 2686                 if TARGET == "tex":
 2687                     txt = txt.replace("_", "vvvUnderscoreInRawTextvvv")
 2688                 self.rawbank.append(txt)
 2689                 line = regex["raw"].sub(self.rawmask, line, 1)
 2690 
 2691             # Protect verbatim text
 2692             elif v >= 0 and (t == -1 or v < t) and (r == -1 or v < r):
 2693                 txt = regex["fontMono"].search(line).group(1)
 2694                 txt = doEscape(TARGET, txt)
 2695                 self.monobank.append(txt)
 2696                 line = regex["fontMono"].sub(self.monomask, line, 1)
 2697             else:
 2698                 break
 2699 
 2700         # Protect URLs and emails
 2701         while regex["linkmark"].search(line) or regex["link"].search(line):
 2702 
 2703             # Try to match plain or named links
 2704             match_link = regex["link"].search(line)
 2705             match_named = regex["linkmark"].search(line)
 2706 
 2707             # Define the current match
 2708             if match_link and match_named:
 2709                 # Both types found, which is the first?
 2710                 m = match_link
 2711                 if match_named.start() < match_link.start():
 2712                     m = match_named
 2713             else:
 2714                 # Just one type found, we're fine
 2715                 m = match_link or match_named
 2716 
 2717             # Extract link data and apply mask
 2718             if m == match_link:  # plain link
 2719                 link = m.group()
 2720                 label = ""
 2721                 link_re = regex["link"]
 2722             else:  # named link
 2723                 link = m.group("link")
 2724                 label = m.group("label").rstrip()
 2725                 link_re = regex["linkmark"]
 2726             line = link_re.sub(self.linkmask, line, 1)
 2727 
 2728             # Save link data to the link bank
 2729             self.linkbank.append((label, link))
 2730         return line
 2731 
 2732     def undo(self, line):
 2733         # url & email
 2734         for label, url in self.linkbank:
 2735             link = get_tagged_link(label, url)
 2736             line = line.replace(self.linkmask, link, 1)
 2737 
 2738         # Expand verb
 2739         for mono in self.monobank:
 2740             open_, close = TAGS["fontMonoOpen"], TAGS["fontMonoClose"]
 2741             line = line.replace(self.monomask, open_ + mono + close, 1)
 2742 
 2743         # Expand raw
 2744         for raw in self.rawbank:
 2745             line = line.replace(self.rawmask, raw, 1)
 2746 
 2747         # Expand tagged
 2748         for tagged in self.taggedbank:
 2749             line = line.replace(self.taggedmask, tagged, 1)
 2750 
 2751         return line
 2752 
 2753 
 2754 ##############################################################################
 2755 
 2756 
 2757 class TitleMaster:
 2758     "Title things"
 2759 
 2760     def __init__(self):
 2761         self.count = ["", 0, 0, 0, 0, 0]
 2762         self.toc = []
 2763         self.level = 0
 2764         self.kind = ""
 2765         self.txt = ""
 2766         self.label = ""
 2767         self.tag = ""
 2768         self.tag_hold = []
 2769         self.last_level = 0
 2770         self.count_id = ""
 2771         self.anchor_count = 0
 2772         self.anchor_prefix = "toc"
 2773 
 2774     def _open_close_blocks(self):
 2775         "Open new title blocks, closing the previous (if any)"
 2776         if not rules["titleblocks"]:
 2777             return
 2778         tag = ""
 2779         last = self.last_level
 2780         curr = self.level
 2781 
 2782         # Same level, just close the previous
 2783         if curr == last:
 2784             tag = TAGS.get("title%dClose" % last)
 2785             if tag:
 2786                 self.tag_hold.append(tag)
 2787 
 2788         # Section -> subsection, more depth
 2789         while curr > last:
 2790             last += 1
 2791 
 2792             # Open the new block of subsections
 2793             tag = TAGS.get("blockTitle%dOpen" % last)
 2794             if tag:
 2795                 self.tag_hold.append(tag)
 2796 
 2797             # Jump from title1 to title3 or more
 2798             # Fill the gap with an empty section
 2799             if curr - last > 0:
 2800                 tag = TAGS.get("title%dOpen" % last)
 2801                 tag = regex["x"].sub("", tag)  # del \a
 2802                 if tag:
 2803                     self.tag_hold.append(tag)
 2804 
 2805         # Section <- subsection, less depth
 2806         while curr < last:
 2807             # Close the current opened subsection
 2808             tag = TAGS.get("title%dClose" % last)
 2809             if tag:
 2810                 self.tag_hold.append(tag)
 2811 
 2812             # Close the current opened block of subsections
 2813             tag = TAGS.get("blockTitle%dClose" % last)
 2814             if tag:
 2815                 self.tag_hold.append(tag)
 2816 
 2817             last -= 1
 2818 
 2819             # Close the previous section of the same level
 2820             # The subsections were under it
 2821             if curr == last:
 2822                 tag = TAGS.get("title%dClose" % last)
 2823                 if tag:
 2824                     self.tag_hold.append(tag)
 2825 
 2826     def add(self, line):
 2827         "Parses a new title line."
 2828         if not line:
 2829             return
 2830         self._set_prop(line)
 2831         self._open_close_blocks()
 2832         self._set_count_id()
 2833         self._set_label()
 2834         self._save_toc_info()
 2835 
 2836     def close_all(self):
 2837         "Closes all opened title blocks"
 2838         ret = []
 2839         ret.extend(self.tag_hold)
 2840         while self.level:
 2841             tag = TAGS.get("title%dClose" % self.level)
 2842             if tag:
 2843                 ret.append(tag)
 2844             tag = TAGS.get("blockTitle%dClose" % self.level)
 2845             if tag:
 2846                 ret.append(tag)
 2847             self.level -= 1
 2848         return ret
 2849 
 2850     def _save_toc_info(self):
 2851         "Save TOC info, used by self.dump_marked_toc()"
 2852         self.toc.append((self.level, self.count_id, self.txt, self.label))
 2853 
 2854     def _set_prop(self, line=""):
 2855         "Extract info from original line and set data holders."
 2856         # Detect title type (numbered or not)
 2857         id_ = line.lstrip()[0]
 2858         if id_ == "=":
 2859             kind = "title"
 2860         elif id_ == "+":
 2861             kind = "numtitle"
 2862         else:
 2863             Error("Unknown Title ID '%s'" % id_)
 2864         # Extract line info
 2865         match = regex[kind].search(line)
 2866         level = len(match.group("id"))
 2867         txt = match.group("txt").strip()
 2868         label = match.group("label")
 2869         # Parse info & save
 2870         if CONF["enum-title"]:
 2871             kind = "numtitle"  # force
 2872         if rules["titleblocks"]:
 2873             self.tag = TAGS.get("%s%dOpen" % (kind, level)) or TAGS.get(
 2874                 "title%dOpen" % level
 2875             )
 2876         else:
 2877             self.tag = TAGS.get(kind + repr(level)) or TAGS.get("title" + repr(level))
 2878         self.last_level = self.level
 2879         self.kind = kind
 2880         self.level = level
 2881         self.txt = txt
 2882         self.label = label
 2883 
 2884     def _set_count_id(self):
 2885         "Compose and save the title count identifier (if needed)."
 2886         count_id = ""
 2887         if self.kind == "numtitle" and not rules["autonumbertitle"]:
 2888             # Manually increase title count
 2889             self.count[self.level] += 1
 2890             # Reset sublevels count (if any)
 2891             max_levels = len(self.count)
 2892             if self.level < max_levels - 1:
 2893                 for i in range(self.level + 1, max_levels):
 2894                     self.count[i] = 0
 2895             # Compose count id from hierarchy
 2896             for i in range(self.level):
 2897                 count_id = "%s%d." % (count_id, self.count[i + 1])
 2898         self.count_id = count_id
 2899 
 2900     def _set_label(self):
 2901         "Compose and save title label, used by anchors."
 2902         # Remove invalid chars from label set by user
 2903         self.label = re.sub("[^A-Za-z0-9_-]", "", self.label or "")
 2904 
 2905     def _get_tagged_anchor(self):
 2906         "Return anchor if user defined a label, or TOC is on."
 2907         ret = ""
 2908         label = self.label
 2909         if CONF["toc"]:
 2910             self.anchor_count += 1
 2911             # Autonumber label (if needed)
 2912             label = label or "{}{}".format(self.anchor_prefix, self.anchor_count)
 2913         if label and TAGS["anchor"]:
 2914             ret = regex["x"].sub(label, TAGS["anchor"])
 2915         return ret
 2916 
 2917     def _get_full_title_text(self):
 2918         "Returns the full title contents, already escaped."
 2919         ret = self.txt
 2920         # Insert count_id (if any) before text
 2921         if self.count_id:
 2922             ret = "{} {}".format(self.count_id, ret)
 2923         # Escape specials
 2924         ret = doEscape(TARGET, ret)
 2925         # Same targets needs final escapes on title lines
 2926         # It's here because there is a 'continue' after title
 2927         if rules["finalescapetitle"]:
 2928             ret = doFinalEscape(TARGET, ret)
 2929         return ret
 2930 
 2931     def get(self):
 2932         "Returns the tagged title as a list."
 2933         ret = []
 2934 
 2935         # Maybe some anchoring before?
 2936         anchor = self._get_tagged_anchor()
 2937         self.tag = regex["_anchor"].sub(anchor, self.tag)
 2938 
 2939         # Compose & escape title text (TOC uses unescaped)
 2940         full_title = self._get_full_title_text()
 2941 
 2942         # Close previous section area
 2943         ret.extend(self.tag_hold)
 2944         self.tag_hold = []
 2945 
 2946         tagged = regex["x"].sub(full_title, self.tag)
 2947 
 2948         # Adds "underline" on TXT target
 2949         if TARGET == "txt":
 2950             if BLOCK.count > 1:
 2951                 ret.append("")  # blank line before
 2952             ret.append(tagged)
 2953             # Get the right letter count for UTF
 2954             if isinstance(full_title, bytes):
 2955                 full_title = full_title.decode(ENCODING)
 2956             ret.append(regex["x"].sub("=" * len(full_title), self.tag))
 2957         else:
 2958             ret.append(tagged)
 2959         return ret
 2960 
 2961     def dump_marked_toc(self):
 2962         "Dumps all toc itens as a valid t2t-marked list"
 2963         ret = []
 2964         toc_count = 1
 2965         for level, count_id, txt, label in self.toc:
 2966             indent = "  " * level
 2967             id_txt = ("{} {}".format(count_id, txt)).lstrip()
 2968             label = label or self.anchor_prefix + repr(toc_count)
 2969             toc_count += 1
 2970 
 2971             # TOC will have crosslinks to anchors
 2972             if TAGS["anchor"]:
 2973                 if CONF["enum-title"] and level == 1:
 2974                     # 1. [Foo #anchor] is more readable than [1. Foo #anchor] in level 1.
 2975                     # This is an idea stolen from Windows .CHM help files.
 2976                     tocitem = '{}+ [""{}"" #{}]'.format(indent, txt, label)
 2977                 else:
 2978                     tocitem = '{}- [""{}"" #{}]'.format(indent, id_txt, label)
 2979 
 2980             # TOC will be plain text (no links)
 2981             else:
 2982                 if TARGET in ["txt", "man"]:
 2983                     # For these, the list is not necessary, just dump the text
 2984                     tocitem = '{}""{}""'.format(indent, id_txt)
 2985                 else:
 2986                     tocitem = '{}- ""{}""'.format(indent, id_txt)
 2987             ret.append(tocitem)
 2988         return ret
 2989 
 2990 
 2991 ##############################################################################
 2992 
 2993 # TODO check all this table mess
 2994 # It uses parse_row properties for table lines
 2995 # BLOCK.table() replaces the cells by the parsed content
 2996 class TableMaster:
 2997     def __init__(self, line=""):
 2998         self.rows = []
 2999         self.border = False
 3000         self.align = "Left"
 3001         self.cellalign = []
 3002         self.colalign = []
 3003         self.cellspan = []
 3004         if line:
 3005             prop = self.parse_row(line)
 3006             self.border = prop["border"]
 3007             self.align = prop["align"]
 3008             self.cellalign = prop["cellalign"]
 3009             self.cellspan = prop["cellspan"]
 3010             self.colalign = self._get_col_align()
 3011 
 3012     def _get_col_align(self):
 3013         colalign = []
 3014         for cell in range(len(self.cellalign)):
 3015             align = self.cellalign[cell]
 3016             span = self.cellspan[cell]
 3017             colalign.extend([align] * span)
 3018         return colalign
 3019 
 3020     def _get_open_tag(self):
 3021         topen = TAGS["tableOpen"]
 3022         tborder = TAGS["_tableBorder"]
 3023         talign = TAGS["_tableAlign" + self.align]
 3024         calignsep = TAGS["tableColAlignSep"]
 3025         calign = ""
 3026 
 3027         # The first line defines if table has border or not
 3028         if not self.border:
 3029             tborder = ""
 3030         # Set the columns alignment
 3031         if rules["tablecellaligntype"] == "column":
 3032             calign = [TAGS["_tableColAlign%s" % x] for x in self.colalign]
 3033             calign = calignsep.join(calign)
 3034         # Align full table, set border and Column align (if any)
 3035         topen = regex["_tableAlign"].sub(talign, topen)
 3036         topen = regex["_tableBorder"].sub(tborder, topen)
 3037         topen = regex["_tableColAlign"].sub(calign, topen)
 3038         # Tex table spec, border or not: {|l|c|r|} , {lcr}
 3039         if calignsep and not self.border:
 3040             # Remove cell align separator
 3041             topen = topen.replace(calignsep, "")
 3042         return topen
 3043 
 3044     def _get_cell_align(self, cells):
 3045         ret = []
 3046         for cell in cells:
 3047             align = "Left"
 3048             if cell.strip():
 3049                 if cell[0] == " " and cell[-1] == " ":
 3050                     align = "Center"
 3051                 elif cell[0] == " ":
 3052                     align = "Right"
 3053             ret.append(align)
 3054         return ret
 3055 
 3056     def _get_cell_span(self, cells):
 3057         ret = []
 3058         for cell in cells:
 3059             span = 1
 3060             m = re.search(r"\a(\|+)$", cell)
 3061             if m:
 3062                 span = len(m.group(1)) + 1
 3063             ret.append(span)
 3064         return ret
 3065 
 3066     def _tag_cells(self, rowdata):
 3067         row = []
 3068         cells = rowdata["cells"]
 3069         open_ = TAGS["tableCellOpen"]
 3070         close = TAGS["tableCellClose"]
 3071         sep = TAGS["tableCellSep"]
 3072         calign = [TAGS["_tableCellAlign" + x] for x in rowdata["cellalign"]]
 3073         calignsep = TAGS["tableColAlignSep"]
 3074         ncolumns = len(self.colalign)
 3075 
 3076         # Populate the span and multicol open tags
 3077         cspan = []
 3078         multicol = []
 3079         colindex = 0
 3080         for cellindex in range(0, len(rowdata["cellspan"])):
 3081 
 3082             span = rowdata["cellspan"][cellindex]
 3083             align = rowdata["cellalign"][cellindex]
 3084 
 3085             if span > 1:
 3086                 cspan.append(regex["x"].sub(str(span), TAGS["_tableCellColSpan"]))
 3087 
 3088                 mcopen = regex["x"].sub(str(span), TAGS["_tableCellMulticolOpen"])
 3089                 multicol.append(mcopen)
 3090             else:
 3091                 cspan.append("")
 3092 
 3093                 if colindex < ncolumns and align != self.colalign[colindex]:
 3094                     mcopen = regex["x"].sub("1", TAGS["_tableCellMulticolOpen"])
 3095                     multicol.append(mcopen)
 3096                 else:
 3097                     multicol.append("")
 3098 
 3099             if not self.border:
 3100                 multicol[-1] = multicol[-1].replace(calignsep, "")
 3101 
 3102             colindex += span
 3103 
 3104         # Maybe is it a title row?
 3105         if rowdata["title"]:
 3106             open_ = TAGS["tableTitleCellOpen"] or open_
 3107             close = TAGS["tableTitleCellClose"] or close
 3108             sep = TAGS["tableTitleCellSep"] or sep
 3109 
 3110         # Should we break the line on *each* table cell?
 3111         if rules["breaktablecell"]:
 3112             close = close + "\n"
 3113 
 3114         # Cells pre processing
 3115         if rules["tablecellstrip"]:
 3116             cells = [x.strip() for x in cells]
 3117         if rowdata["title"] and rules["tabletitlerowinbold"]:
 3118             cells = [enclose_me("fontBold", x) for x in cells]
 3119 
 3120         # Add cell BEGIN/END tags
 3121         for cell in cells:
 3122             copen = open_
 3123             cclose = close
 3124             # Make sure we will pop from some filled lists
 3125             # Fixes empty line bug '| |'
 3126             this_align = this_span = this_mcopen = ""
 3127             if calign:
 3128                 this_align = calign.pop(0)
 3129             if cspan:
 3130                 this_span = cspan.pop(0)
 3131             if multicol:
 3132                 this_mcopen = multicol.pop(0)
 3133 
 3134             # Insert cell align into open tag (if cell is alignable)
 3135             if rules["tablecellaligntype"] == "cell":
 3136                 copen = regex["_tableCellAlign"].sub(this_align, copen)
 3137 
 3138             # Insert cell span into open tag (if cell is spannable)
 3139             if rules["tablecellspannable"]:
 3140                 copen = regex["_tableCellColSpan"].sub(this_span, copen)
 3141 
 3142             # Use multicol tags instead (if multicol supported, and if
 3143             # cell has a span or is aligned differently to column)
 3144             if rules["tablecellmulticol"]:
 3145                 if this_mcopen:
 3146                     copen = regex["_tableColAlign"].sub(this_align, this_mcopen)
 3147                     cclose = TAGS["_tableCellMulticolClose"]
 3148 
 3149             row.append(copen + cell + cclose)
 3150 
 3151         # Maybe there are cell separators?
 3152         return sep.join(row)
 3153 
 3154     def add_row(self, cells):
 3155         self.rows.append(cells)
 3156 
 3157     def parse_row(self, line):
 3158         # Default table properties
 3159         ret = {
 3160             "border": False,
 3161             "title": False,
 3162             "align": "Left",
 3163             "cells": [],
 3164             "cellalign": [],
 3165             "cellspan": [],
 3166         }
 3167         # Detect table align (and remove spaces mark)
 3168         if line[0] == " ":
 3169             ret["align"] = "Center"
 3170         line = line.lstrip()
 3171         # Detect title mark
 3172         if line[1] == "|":
 3173             ret["title"] = True
 3174         # Detect border mark and normalize the EOL
 3175         m = re.search(r" (\|+) *$", line)
 3176         if m:
 3177             line += " "
 3178             ret["border"] = True
 3179         else:
 3180             line += " | "
 3181         # Delete table mark
 3182         line = regex["table"].sub("", line)
 3183         # Detect colspan  | foo | bar baz |||
 3184         line = re.sub(r" (\|+)\| ", "\a\\1 | ", line)
 3185         # Split cells (the last is fake)
 3186         ret["cells"] = line.split(" | ")[:-1]
 3187         # Find cells span
 3188         ret["cellspan"] = self._get_cell_span(ret["cells"])
 3189         # Remove span ID
 3190         ret["cells"] = [re.sub(r"\a\|+$", "", x) for x in ret["cells"]]
 3191         # Find cells align
 3192         ret["cellalign"] = self._get_cell_align(ret["cells"])
 3193         # Hooray!
 3194         Debug("Table Prop: %s" % ret, 7)
 3195         return ret
 3196 
 3197     def dump(self):
 3198         open_ = self._get_open_tag()
 3199         rows = self.rows
 3200         close = TAGS["tableClose"]
 3201 
 3202         rowopen = TAGS["tableRowOpen"]
 3203         rowclose = TAGS["tableRowClose"]
 3204         rowsep = TAGS["tableRowSep"]
 3205         titrowopen = TAGS["tableTitleRowOpen"] or rowopen
 3206         titrowclose = TAGS["tableTitleRowClose"] or rowclose
 3207 
 3208         if rules["breaktablelineopen"]:
 3209             rowopen = rowopen + "\n"
 3210             titrowopen = titrowopen + "\n"
 3211 
 3212         # Tex gotchas
 3213         if TARGET == "tex":
 3214             if not self.border:
 3215                 rowopen = titrowopen = ""
 3216             else:
 3217                 close = rowopen + close
 3218 
 3219         # Now we tag all the table cells on each row
 3220         tagged_cells = [self._tag_cells(cell) for cell in rows]
 3221 
 3222         # Add row separator tags between lines
 3223         tagged_rows = []
 3224         if rowsep:
 3225             tagged_rows = [cell + rowsep for cell in tagged_cells]
 3226             # Remove last rowsep, because the table is over
 3227             tagged_rows[-1] = tagged_rows[-1].replace(rowsep, "")
 3228         # Add row BEGIN/END tags for each line
 3229         else:
 3230             for rowdata in rows:
 3231                 if rowdata["title"]:
 3232                     o, c = titrowopen, titrowclose
 3233                 else:
 3234                     o, c = rowopen, rowclose
 3235                 row = tagged_cells.pop(0)
 3236                 tagged_rows.append(o + row + c)
 3237 
 3238         # Join the pieces together
 3239         fulltable = []
 3240         if open_:
 3241             fulltable.append(open_)
 3242         fulltable.extend(tagged_rows)
 3243         if close:
 3244             fulltable.append(close)
 3245 
 3246         return fulltable
 3247 
 3248 
 3249 ##############################################################################
 3250 
 3251 
 3252 class BlockMaster:
 3253     "TIP: use blockin/out to add/del holders"
 3254 
 3255     def __init__(self):
 3256         self.BLK = []
 3257         self.HLD = []
 3258         self.PRP = []
 3259         self.depth = 0
 3260         self.count = 0
 3261         self.last = ""
 3262         self.tableparser = None
 3263         self.contains = {
 3264             "para": ["comment", "raw", "tagged"],
 3265             "verb": [],
 3266             "table": ["comment"],
 3267             "raw": [],
 3268             "tagged": [],
 3269             "comment": [],
 3270             "quote": ["quote", "comment", "raw", "tagged"],
 3271             "list": [
 3272                 "list",
 3273                 "numlist",
 3274                 "deflist",
 3275                 "para",
 3276                 "verb",
 3277                 "comment",
 3278                 "raw",
 3279                 "tagged",
 3280             ],
 3281             "numlist": [
 3282                 "list",
 3283                 "numlist",
 3284                 "deflist",
 3285                 "para",
 3286                 "verb",
 3287                 "comment",
 3288                 "raw",
 3289                 "tagged",
 3290             ],
 3291             "deflist": [
 3292                 "list",
 3293                 "numlist",
 3294                 "deflist",
 3295                 "para",
 3296                 "verb",
 3297                 "comment",
 3298                 "raw",
 3299                 "tagged",
 3300             ],
 3301             "bar": [],
 3302             "title": [],
 3303             "numtitle": [],
 3304         }
 3305         self.allblocks = list(self.contains.keys())
 3306 
 3307         # If one is found inside another, ignore the marks
 3308         self.exclusive = ["comment", "verb", "raw", "tagged"]
 3309 
 3310         # May we include bars inside quotes?
 3311         if rules["barinsidequote"]:
 3312             self.contains["quote"].append("bar")
 3313 
 3314     def block(self):
 3315         if not self.BLK:
 3316             return ""
 3317         return self.BLK[-1]
 3318 
 3319     def isblock(self, name=""):
 3320         return self.block() == name
 3321 
 3322     def prop(self, key):
 3323         if not self.PRP:
 3324             return ""
 3325         return self.PRP[-1].get(key) or ""
 3326 
 3327     def propset(self, key, val):
 3328         self.PRP[-1][key] = val
 3329         # Debug('BLOCK prop ++: %s->%s'%(key,repr(val)), 1)
 3330         # Debug('BLOCK props: %s'%(repr(self.PRP)), 1)
 3331 
 3332     def hold(self):
 3333         if not self.HLD:
 3334             return []
 3335         return self.HLD[-1]
 3336 
 3337     def holdadd(self, line):
 3338         if self.block().endswith("list"):
 3339             line = [line]
 3340         self.HLD[-1].append(line)
 3341         Debug("HOLD add: %s" % repr(line), 4)
 3342         Debug("FULL HOLD: %s" % self.HLD, 4)
 3343 
 3344     def holdaddsub(self, line):
 3345         self.HLD[-1][-1].append(line)
 3346         Debug("HOLD addsub: %s" % repr(line), 4)
 3347         Debug("FULL HOLD: %s" % self.HLD, 4)
 3348 
 3349     def holdextend(self, lines):
 3350         if self.block().endswith("list"):
 3351             lines = [lines]
 3352         self.HLD[-1].extend(lines)
 3353         Debug("HOLD extend: %s" % repr(lines), 4)
 3354         Debug("FULL HOLD: %s" % self.HLD, 4)
 3355 
 3356     def blockin(self, block):
 3357         ret = []
 3358         if block not in self.allblocks:
 3359             Error("Invalid block '%s'" % block)
 3360 
 3361         # First, let's close other possible open blocks
 3362         while self.block() and block not in self.contains[self.block()]:
 3363             ret.extend(self.blockout())
 3364 
 3365         # Now we can gladly add this new one
 3366         self.BLK.append(block)
 3367         self.HLD.append([])
 3368         self.PRP.append({})
 3369         self.count += 1
 3370         if block == "table":
 3371             self.tableparser = TableMaster()
 3372         # Deeper and deeper
 3373         self.depth = len(self.BLK)
 3374         Debug("block ++ ({}): {}".format(block, self.BLK), 3)
 3375         return ret
 3376 
 3377     def blockout(self):
 3378         if not self.BLK:
 3379             Error("No block to pop")
 3380         blockname = self.BLK.pop()
 3381         result = getattr(self, blockname)()
 3382         parsed = self.HLD.pop()
 3383         self.PRP.pop()
 3384         self.depth = len(self.BLK)
 3385         if blockname == "table":
 3386             del self.tableparser
 3387 
 3388         # Inserting a nested block into mother
 3389         if self.block():
 3390             if blockname != "comment":  # ignore comment blocks
 3391                 if self.block().endswith("list"):
 3392                     self.HLD[-1][-1].append(result)
 3393                 else:
 3394                     self.HLD[-1].append(result)
 3395             # Reset now. Mother block will have it all
 3396             result = []
 3397 
 3398         Debug("block -- ({}): {}".format(blockname, self.BLK), 3)
 3399         Debug("RELEASED ({}): {}".format(blockname, parsed), 3)
 3400 
 3401         # Save this top level block name (produced output)
 3402         # The next block will use it
 3403         if result:
 3404             self.last = blockname
 3405             Debug("BLOCK: %s" % result, 6)
 3406 
 3407         return result
 3408 
 3409     def _last_escapes(self, line):
 3410         return doFinalEscape(TARGET, line)
 3411 
 3412     def _get_escaped_hold(self):
 3413         ret = []
 3414         for line in self.hold():
 3415             if isinstance(line, list):
 3416                 ret.extend(line)
 3417             else:
 3418                 ret.append(self._last_escapes(line))
 3419         return ret
 3420 
 3421     def _remove_twoblanks(self, lastitem):
 3422         if len(lastitem) > 1 and lastitem[-2:] == ["", ""]:
 3423             return lastitem[:-2]
 3424         return lastitem
 3425 
 3426     def _should_add_blank_line(self, where, blockname):
 3427         "Validates the blanksaround* rules"
 3428 
 3429         # Nestable blocks: only mother blocks (level 1) are spaced
 3430         if blockname.endswith("list") and self.depth > 1:
 3431             return False
 3432 
 3433         # The blank line after the block is always added
 3434         if where == "after" and rules["blanksaround" + blockname]:
 3435             return True
 3436 
 3437         # The blank line before the block is only added if
 3438         # the previous block haven't added a blank line
 3439         # (to avoid consecutive blanks)
 3440         elif (
 3441             where == "before"
 3442             and rules["blanksaround" + blockname]
 3443             and not rules.get("blanksaround" + self.last)
 3444         ):
 3445             return True
 3446 
 3447         # Nested quotes are handled here,
 3448         # because the mother quote isn't closed yet
 3449         elif (
 3450             where == "before"
 3451             and blockname == "quote"
 3452             and rules["blanksaround" + blockname]
 3453             and self.depth > 1
 3454         ):
 3455             return True
 3456 
 3457         return False
 3458 
 3459     def comment(self):
 3460         return ""
 3461 
 3462     def raw(self):
 3463         lines = self.hold()
 3464         return [doEscape(TARGET, x) for x in lines]
 3465 
 3466     def tagged(self):
 3467         return self.hold()
 3468 
 3469     def para(self):
 3470         result = []
 3471         open_ = TAGS["paragraphOpen"]
 3472         close = TAGS["paragraphClose"]
 3473         lines = self._get_escaped_hold()
 3474 
 3475         # Blank line before?
 3476         if self._should_add_blank_line("before", "para"):
 3477             result.append("")
 3478 
 3479         # Open tag
 3480         if open_:
 3481             result.append(open_)
 3482 
 3483         # Pagemaker likes a paragraph as a single long line
 3484         if rules["onelinepara"]:
 3485             result.append(" ".join(lines))
 3486         # Others are normal :)
 3487         else:
 3488             result.extend(lines)
 3489 
 3490         # Close tag
 3491         if close:
 3492             result.append(close)
 3493 
 3494         # Blank line after?
 3495         if self._should_add_blank_line("after", "para"):
 3496             result.append("")
 3497 
 3498         return result
 3499 
 3500     def verb(self):
 3501         "Verbatim lines are not masked, so there's no need to unmask"
 3502         result = []
 3503         open_ = TAGS["blockVerbOpen"]
 3504         close = TAGS["blockVerbClose"]
 3505 
 3506         # Blank line before?
 3507         if self._should_add_blank_line("before", "verb"):
 3508             result.append("")
 3509 
 3510         # Open tag
 3511         if open_:
 3512             result.append(open_)
 3513 
 3514         # Get contents
 3515         for line in self.hold():
 3516             if not rules["verbblocknotescaped"]:
 3517                 line = doEscape(TARGET, line)
 3518             if TAGS["blockVerbLine"]:
 3519                 line = TAGS["blockVerbLine"] + line
 3520             if rules["indentverbblock"]:
 3521                 line = "  " + line
 3522             if rules["verbblockfinalescape"]:
 3523                 line = doFinalEscape(TARGET, line)
 3524             result.append(line)
 3525 
 3526         # Close tag
 3527         if close:
 3528             result.append(close)
 3529 
 3530         # Blank line after?
 3531         if self._should_add_blank_line("after", "verb"):
 3532             result.append("")
 3533 
 3534         return result
 3535 
 3536     def numtitle(self):
 3537         return self.title("numtitle")
 3538 
 3539     def title(self, name="title"):
 3540         result = []
 3541 
 3542         # Blank line before?
 3543         if self._should_add_blank_line("before", name):
 3544             result.append("")
 3545 
 3546         # Get contents
 3547         result.extend(TITLE.get())
 3548 
 3549         # Blank line after?
 3550         if self._should_add_blank_line("after", name):
 3551             result.append("")
 3552 
 3553         return result
 3554 
 3555     def table(self):
 3556         result = []
 3557 
 3558         # Blank line before?
 3559         if self._should_add_blank_line("before", "table"):
 3560             result.append("")
 3561 
 3562         # Rewrite all table cells by the unmasked and escaped data
 3563         lines = self._get_escaped_hold()
 3564         for i in range(len(lines)):
 3565             cells = lines[i].split(SEPARATOR)
 3566             self.tableparser.rows[i]["cells"] = cells
 3567         result.extend(self.tableparser.dump())
 3568 
 3569         # Blank line after?
 3570         if self._should_add_blank_line("after", "table"):
 3571             result.append("")
 3572 
 3573         return result
 3574 
 3575     def quote(self):
 3576         result = []
 3577         open_ = TAGS["blockQuoteOpen"]  # block based
 3578         close = TAGS["blockQuoteClose"]
 3579         qline = TAGS["blockQuoteLine"]  # line based
 3580         indent = tagindent = "\t" * self.depth
 3581 
 3582         # Apply rules
 3583         if rules["tagnotindentable"]:
 3584             tagindent = ""
 3585         if not rules["keepquoteindent"]:
 3586             indent = ""
 3587 
 3588         # Blank line before?
 3589         if self._should_add_blank_line("before", "quote"):
 3590             result.append("")
 3591 
 3592         # Open tag
 3593         if open_:
 3594             result.append(tagindent + open_)
 3595 
 3596         # Get contents
 3597         for item in self.hold():
 3598             if isinstance(item, list):
 3599                 result.extend(item)  # subquotes
 3600             else:
 3601                 item = regex["quote"].sub("", item)  # del TABs
 3602                 item = self._last_escapes(item)
 3603                 item = qline * self.depth + item
 3604                 result.append(indent + item)  # quote line
 3605 
 3606         # Close tag
 3607         if close:
 3608             result.append(tagindent + close)
 3609 
 3610         # Blank line after?
 3611         if self._should_add_blank_line("after", "quote"):
 3612             result.append("")
 3613 
 3614         return result
 3615 
 3616     def bar(self):
 3617         result = []
 3618         bar_tag = ""
 3619 
 3620         # Blank line before?
 3621         if self._should_add_blank_line("before", "bar"):
 3622             result.append("")
 3623 
 3624         # Get the original bar chars
 3625         bar_chars = self.hold()[0].strip()
 3626 
 3627         # Set bar type
 3628         if bar_chars.startswith("="):
 3629             bar_tag = TAGS["bar2"]
 3630         else:
 3631             bar_tag = TAGS["bar1"]
 3632 
 3633         # To avoid comment tag confusion like <!-- ------ --> (sgml)
 3634         if TAGS["comment"].count("--"):
 3635             bar_chars = bar_chars.replace("--", "__")
 3636 
 3637         # Get the bar tag (may contain \a)
 3638         result.append(regex["x"].sub(bar_chars, bar_tag))
 3639 
 3640         # Blank line after?
 3641         if self._should_add_blank_line("after", "bar"):
 3642             result.append("")
 3643 
 3644         return result
 3645 
 3646     def deflist(self):
 3647         return self.list("deflist")
 3648 
 3649     def numlist(self):
 3650         return self.list("numlist")
 3651 
 3652     def list(self, name="list"):
 3653         result = []
 3654         items = self.hold()
 3655         indent = self.prop("indent")
 3656         tagindent = indent
 3657         listline = TAGS.get(name + "ItemLine")
 3658         itemcount = 0
 3659 
 3660         if name == "deflist":
 3661             itemopen = TAGS[name + "Item1Open"]
 3662             itemclose = TAGS[name + "Item2Close"]
 3663             itemsep = TAGS[name + "Item1Close"] + TAGS[name + "Item2Open"]
 3664         else:
 3665             itemopen = TAGS[name + "ItemOpen"]
 3666             itemclose = TAGS[name + "ItemClose"]
 3667             itemsep = ""
 3668 
 3669         # Apply rules
 3670         if rules["tagnotindentable"]:
 3671             tagindent = ""
 3672         if not rules["keeplistindent"]:
 3673             indent = tagindent = ""
 3674 
 3675         # ItemLine: number of leading chars identifies list depth
 3676         if listline:
 3677             itemopen = listline * self.depth + itemopen
 3678 
 3679         # Adds trailing space on opening tags
 3680         if (name == "list" and rules["spacedlistitemopen"]) or (
 3681             name == "numlist" and rules["spacednumlistitemopen"]
 3682         ):
 3683             itemopen = itemopen + " "
 3684 
 3685         # Remove two-blanks from list ending mark, to avoid <p>
 3686         items[-1] = self._remove_twoblanks(items[-1])
 3687 
 3688         # Blank line before?
 3689         if self._should_add_blank_line("before", name):
 3690             result.append("")
 3691 
 3692         # Tag each list item (multiline items), store in listbody
 3693         itemopenorig = itemopen
 3694         listbody = []
 3695         widelist = 0
 3696         for item in items:
 3697 
 3698             # Add "manual" item count for noautonum targets
 3699             itemcount += 1
 3700             if name == "numlist" and not rules["autonumberlist"]:
 3701                 n = str(itemcount)
 3702                 itemopen = regex["x"].sub(n, itemopenorig)
 3703                 del n
 3704 
 3705             # Tag it
 3706             item[0] = self._last_escapes(item[0])
 3707             if name == "deflist":
 3708                 _, term, rest = item[0].split(SEPARATOR, 2)
 3709                 item[0] = rest
 3710                 if not item[0]:
 3711                     del item[0]  # to avoid <p>
 3712                 listbody.append(tagindent + itemopen + term + itemsep)
 3713             else:
 3714                 fullitem = tagindent + itemopen
 3715                 listbody.append(item[0].replace(SEPARATOR, fullitem))
 3716                 del item[0]
 3717 
 3718             # Process next lines for this item (if any)
 3719             for line in item:
 3720                 if isinstance(line, list):  # sublist inside
 3721                     listbody.extend(line)
 3722                 else:
 3723                     line = self._last_escapes(line)
 3724 
 3725                     # Blank lines turns to <p>
 3726                     if not line and rules["parainsidelist"]:
 3727                         line = indent + TAGS["paragraphOpen"] + TAGS["paragraphClose"]
 3728                         line = line.rstrip()
 3729                         widelist = 1
 3730 
 3731                     # Some targets don't like identation here (wiki)
 3732                     if not rules["keeplistindent"] or (
 3733                         name == "deflist" and rules["deflisttextstrip"]
 3734                     ):
 3735                         line = line.lstrip()
 3736 
 3737                     # Maybe we have a line prefix to add? (wiki)
 3738                     if name == "deflist" and TAGS["deflistItem2LinePrefix"]:
 3739                         line = TAGS["deflistItem2LinePrefix"] + line
 3740 
 3741                     listbody.append(line)
 3742 
 3743             # Close item (if needed)
 3744             if itemclose:
 3745                 listbody.append(tagindent + itemclose)
 3746 
 3747         if not widelist and rules["compactlist"]:
 3748             listopen = TAGS.get(name + "OpenCompact")
 3749             listclose = TAGS.get(name + "CloseCompact")
 3750         else:
 3751             listopen = TAGS.get(name + "Open")
 3752             listclose = TAGS.get(name + "Close")
 3753 
 3754         # Open list (not nestable lists are only opened at mother)
 3755         if listopen and not (rules["listnotnested"] and BLOCK.depth != 1):
 3756             result.append(tagindent + listopen)
 3757 
 3758         result.extend(listbody)
 3759 
 3760         # Close list (not nestable lists are only closed at mother)
 3761         if listclose and not (rules["listnotnested"] and self.depth != 1):
 3762             result.append(tagindent + listclose)
 3763 
 3764         # Blank line after?
 3765         if self._should_add_blank_line("after", name):
 3766             result.append("")
 3767 
 3768         return result
 3769 
 3770 
 3771 ##############################################################################
 3772 
 3773 
 3774 def listTargets():
 3775     """List available targets."""
 3776     for target, name in sorted(TARGET_NAMES.items()):
 3777         print("{:8}{}".format(target, name))
 3778 
 3779 
 3780 def get_file_body(file_):
 3781     "Returns all the document BODY lines"
 3782     return process_source_file(file_, noconf=1)[1][2]
 3783 
 3784 
 3785 def finish_him(outlist, config):
 3786     "Writing output to screen or file"
 3787     outfile = config["outfile"]
 3788     outlist = unmaskEscapeChar(outlist)
 3789     outlist = expandLineBreaks(outlist)
 3790 
 3791     # Apply PostProc filters
 3792     if config["postproc"]:
 3793         filters = compile_filters(config["postproc"], "Invalid PostProc filter regex")
 3794         postoutlist = []
 3795         errmsg = "Invalid PostProc filter replacement"
 3796         for line in outlist:
 3797             for rgx, repl in filters:
 3798                 try:
 3799                     line = rgx.sub(repl, line)
 3800                 except Exception:
 3801                     Error("{}: '{}'".format(errmsg, repl))
 3802             postoutlist.append(line)
 3803         outlist = postoutlist[:]
 3804 
 3805     if outfile == MODULEOUT:
 3806         return outlist
 3807     elif outfile == STDOUT:
 3808         Message("Saving results to the output file", 1)
 3809         for line in outlist:
 3810             print(line)
 3811     else:
 3812         Message("Saving results to the output file", 1)
 3813         Savefile(outfile, outlist)
 3814         if not QUIET:
 3815             print("{} wrote {}".format(my_name, outfile))
 3816 
 3817 
 3818 def toc_tagger(toc, config):
 3819     "Returns the tagged TOC, as a single tag or a tagged list"
 3820     if not config["toc"]:
 3821         return []
 3822     elif TAGS["TOC"]:
 3823         # Our TOC list is not needed, the target already knows how to do a TOC
 3824         ret = [TAGS["TOC"]]
 3825     # Convert the TOC list (t2t-marked) to the target's list format
 3826     else:
 3827         fakeconf = config.copy()
 3828         fakeconf["headers"] = 0
 3829         fakeconf["preproc"] = []
 3830         fakeconf["postproc"] = []
 3831         ret, _ = convert(toc, fakeconf)
 3832         set_global_config(config)  # restore config
 3833     return ret
 3834 
 3835 
 3836 def toc_formatter(toc, config):
 3837     "Formats TOC for automatic placement between headers and body"
 3838 
 3839     if not config["toc"]:
 3840         return []  # TOC disabled
 3841     ret = toc
 3842 
 3843     # TOC open/close tags (if any)
 3844     if TAGS["tocOpen"]:
 3845         ret.insert(0, TAGS["tocOpen"])
 3846     if TAGS["tocClose"]:
 3847         ret.append(TAGS["tocClose"])
 3848 
 3849     # Autotoc specific formatting
 3850     if rules["autotocwithbars"]:  # TOC between bars
 3851         para = TAGS["paragraphOpen"] + TAGS["paragraphClose"]
 3852         bar = regex["x"].sub("-" * DFT_TEXT_WIDTH, TAGS["bar1"])
 3853         tocbar = [para, bar, para]
 3854         ret = tocbar + ret + tocbar
 3855     if rules["blankendautotoc"]:  # blank line after TOC
 3856         ret.append("")
 3857     if rules["autotocnewpagebefore"]:  # page break before TOC
 3858         ret.insert(0, TAGS["pageBreak"])
 3859     if rules["autotocnewpageafter"]:  # page break after TOC
 3860         ret.append(TAGS["pageBreak"])
 3861     return ret
 3862 
 3863 
 3864 def doHeader(headers, config):
 3865     if not config["headers"]:
 3866         return []
 3867     if not headers:
 3868         headers = ["", "", ""]
 3869     target = config["target"]
 3870 
 3871     template = HEADER_TEMPLATE[target].split("\n")
 3872 
 3873     style = config.get("style")
 3874     # Tex: strip .sty extension from each style filename.
 3875     if target == "tex":
 3876         style = [os.path.splitext(x)[0] for x in style]
 3877 
 3878     head_data = {"STYLE": style, "ENCODING": get_encoding_string(target)}
 3879 
 3880     # Parse header contents
 3881     for i in 0, 1, 2:
 3882         contents = headers[i]
 3883         # Escapes - on tex, just do it if any \tag{} present
 3884         if target != "tex" or (target == "tex" and re.search(r"\\\w+{", contents)):
 3885             contents = doEscape(target, contents)
 3886         if target in ["lout", "tex"]:
 3887             contents = doFinalEscape(target, contents)
 3888 
 3889         head_data["HEADER%d" % (i + 1)] = contents
 3890 
 3891     Debug("Header Data: %s" % head_data, 1)
 3892 
 3893     # Scan for empty dictionary keys
 3894     # If found, scan template lines for that key reference
 3895     # If found, remove the reference
 3896     # If there isn't any other key reference on the same line, remove it
 3897     # TODO loop by template line > key
 3898     for key, value in head_data.items():
 3899         if value:
 3900             continue
 3901         for line in template:
 3902             if line.count("%%(%s)s" % key):
 3903                 sline = line.replace("%%(%s)s" % key, "")
 3904                 if not re.search(r"%\([A-Z0-9]+\)s", sline):
 3905                     template.remove(line)
 3906     # Style is a multiple tag.
 3907     # - If none or just one, use default template
 3908     # - If two or more, insert extra lines in a loop (and remove original)
 3909     styles = head_data["STYLE"]
 3910     if len(styles) == 1:
 3911         head_data["STYLE"] = styles[0]
 3912     elif len(styles) > 1:
 3913         style_mark = "%(STYLE)s"
 3914         for i in range(len(template)):
 3915             if template[i].count(style_mark):
 3916                 while styles:
 3917                     template.insert(
 3918                         i + 1, template[i].replace(style_mark, styles.pop())
 3919                     )
 3920                 del template[i]
 3921                 break
 3922     # Populate template with data (dict expansion)
 3923     template = "\n".join(template) % head_data
 3924 
 3925     return template.split("\n")
 3926 
 3927 
 3928 def doFooter(config):
 3929     ret = []
 3930 
 3931     # No footer. The --no-headers option hides header AND footer
 3932     if not config["headers"]:
 3933         return []
 3934 
 3935     # Only add blank line before footer if last block doesn't added by itself
 3936     if not rules.get("blanksaround" + BLOCK.last):
 3937         ret.append("")
 3938 
 3939     # Maybe we have a specific tag to close the document?
 3940     if TAGS["EOD"]:
 3941         ret.append(TAGS["EOD"])
 3942 
 3943     return ret
 3944 
 3945 
 3946 def doEscape(target, txt):
 3947     "Target-specific special escapes. Apply *before* insert any tag."
 3948     tmpmask = "vvvvThisEscapingSuxvvvv"
 3949     if target in ("html", "sgml", "dbk"):
 3950         txt = re.sub("&", "&amp;", txt)
 3951         txt = re.sub("<", "&lt;", txt)
 3952         txt = re.sub(">", "&gt;", txt)
 3953         if target == "sgml":
 3954             txt = re.sub("\xff", "&yuml;", txt)  # "+y
 3955     elif target == "mgp":
 3956         txt = re.sub("^%", " %", txt)  # add leading blank to avoid parse
 3957     elif target == "man":
 3958         txt = re.sub("^([.'])", "\\&\\1", txt)  # command ID
 3959         txt = txt.replace(ESCCHAR, ESCCHAR + "e")  # \e
 3960     elif target == "lout":
 3961         # TIP: / moved to FinalEscape to avoid //italic//
 3962         # TIP: these are also converted by lout:  ...  ---  --
 3963         txt = txt.replace(ESCCHAR, tmpmask)  # \
 3964         txt = txt.replace('"', '"%s""' % ESCCHAR)  # "\""
 3965         txt = re.sub("([|&{}@#^~])", '"\\1"', txt)  # "@"
 3966         txt = txt.replace(tmpmask, '"%s"' % (ESCCHAR * 2))  # "\\"
 3967     elif target == "tex":
 3968         # Mark literal \ to be changed to $\backslash$ later
 3969         txt = txt.replace(ESCCHAR, tmpmask)
 3970         txt = re.sub("([#$&%{}])", ESCCHAR + r"\1", txt)  # \%
 3971         txt = re.sub("([~^])", ESCCHAR + r"\1{}", txt)  # \~{}
 3972         txt = re.sub("([<|>])", r"$\1$", txt)  # $>$
 3973         txt = txt.replace(tmpmask, maskEscapeChar(r"$\backslash$"))
 3974         # TIP the _ is escaped at the end
 3975     return txt
 3976 
 3977 
 3978 # TODO man: where - really needs to be escaped?
 3979 def doFinalEscape(target, txt):
 3980     "Last escapes of each line"
 3981     if target == "man":
 3982         txt = txt.replace("-", r"\-")
 3983     elif target == "sgml":
 3984         txt = txt.replace("[", "&lsqb;")
 3985     elif target == "lout":
 3986         txt = txt.replace("/", '"/"')
 3987     elif target == "tex":
 3988         txt = txt.replace("_", r"\_")
 3989         txt = txt.replace("vvvvTexUndervvvv", "_")  # shame!
 3990         txt = txt.replace("vvvUnderscoreInRawTextvvv", "_")
 3991         txt = txt.replace("vvvUnderscoreInTaggedTextvvv", "_")
 3992     return txt
 3993 
 3994 
 3995 def EscapeCharHandler(action, data):
 3996     "Mask/Unmask the Escape Char on the given string"
 3997     if not data.strip():
 3998         return data
 3999     if action not in ("mask", "unmask"):
 4000         Error("EscapeCharHandler: Invalid action '%s'" % action)
 4001     if action == "mask":
 4002         return data.replace("\\", ESCCHAR)
 4003     else:
 4004         return data.replace(ESCCHAR, "\\")
 4005 
 4006 
 4007 def maskEscapeChar(data):
 4008     "Replace any escape char with a text mask (Input: str or list)"
 4009     if isinstance(data, list):
 4010         return [EscapeCharHandler("mask", x) for x in data]
 4011     return EscapeCharHandler("mask", data)
 4012 
 4013 
 4014 def unmaskEscapeChar(data):
 4015     "Undo the escape char masking (Input: str or list)"
 4016     if isinstance(data, list):
 4017         return [EscapeCharHandler("unmask", x) for x in data]
 4018     return EscapeCharHandler("unmask", data)
 4019 
 4020 
 4021 # Convert ['foo\nbar'] to ['foo', 'bar']
 4022 def expandLineBreaks(mylist):
 4023     ret = []
 4024     for line in mylist:
 4025         ret.extend(line.split("\n"))
 4026     return ret
 4027 
 4028 
 4029 def compile_filters(filters, errmsg="Filter"):
 4030     if filters:
 4031         for i in range(len(filters)):
 4032             patt, repl = filters[i]
 4033             try:
 4034                 rgx = re.compile(patt)
 4035             except Exception:
 4036                 Error("{}: '{}'".format(errmsg, patt))
 4037             filters[i] = (rgx, repl)
 4038     return filters
 4039 
 4040 
 4041 def enclose_me(tagname, txt):
 4042     return TAGS.get(tagname + "Open") + txt + TAGS.get(tagname + "Close")
 4043 
 4044 
 4045 def beautify_me(name, font, line):
 4046     "where name is: bold, italic, underline or strike"
 4047 
 4048     # Exception: Doesn't parse an horizontal bar as strike
 4049     if name == "strike" and regex["bar"].search(line):
 4050         return line
 4051 
 4052     open_ = TAGS["%sOpen" % font]
 4053     close = TAGS["%sClose" % font]
 4054     txt = r"{}\1{}".format(open_, close)
 4055     line = regex[font].sub(txt, line)
 4056     return line
 4057 
 4058 
 4059 def get_tagged_link(label, url):
 4060     ret = ""
 4061     target = CONF["target"]
 4062     image_re = regex["img"]
 4063 
 4064     # Set link type
 4065     if regex["email"].match(url):
 4066         linktype = "email"
 4067     else:
 4068         linktype = "url"
 4069 
 4070     # Escape specials from TEXT parts
 4071     label = doEscape(target, label)
 4072 
 4073     # Escape specials from link URL
 4074     if not rules["linkable"] or rules["escapeurl"]:
 4075         url = doEscape(target, url)
 4076 
 4077     # Adding protocol to guessed link
 4078     guessurl = ""
 4079     if linktype == "url" and re.match("(?i)" + regex["_urlskel"]["guess"], url):
 4080         if url[0] in "Ww":
 4081             guessurl = "http://" + url
 4082         else:
 4083             guessurl = "ftp://" + url
 4084 
 4085         # Not link aware targets -> protocol is useless
 4086         if not rules["linkable"]:
 4087             guessurl = ""
 4088 
 4089     # Simple link (not guessed)
 4090     if not label and not guessurl:
 4091         # Just add link data to tag
 4092         tag = TAGS[linktype]
 4093         ret = regex["x"].sub(url, tag)
 4094 
 4095     # Named link or guessed simple link
 4096     else:
 4097         # Adjusts for guessed link
 4098         if not label:
 4099             label = url  # no protocol
 4100         if guessurl:
 4101             url = guessurl  # with protocol
 4102 
 4103         # Image inside link!
 4104         if image_re.match(label):
 4105             if rules["imglinkable"]:  # get image tag
 4106                 label = parse_images(label)
 4107             else:
 4108                 # img@link !supported
 4109                 label = "(%s)" % image_re.match(label).group(1)
 4110 
 4111         # Putting data on the right appearance order
 4112         if rules["labelbeforelink"] or not rules["linkable"]:
 4113             urlorder = [label, url]  # label before link
 4114         else:
 4115             urlorder = [url, label]  # link before label
 4116 
 4117         # Add link data to tag (replace \a's)
 4118         ret = TAGS["%sMark" % linktype]
 4119         for data in urlorder:
 4120             ret = regex["x"].sub(data, ret, 1)
 4121 
 4122     return ret
 4123 
 4124 
 4125 def parse_deflist_term(line):
 4126     "Extract and parse definition list term contents"
 4127     img_re = regex["img"]
 4128     term = regex["deflist"].search(line).group(3)
 4129 
 4130     # Mask image inside term as (image.jpg), where not supported
 4131     if not rules["imgasdefterm"] and img_re.search(term):
 4132         while img_re.search(term):
 4133             imgfile = img_re.search(term).group(1)
 4134             term = img_re.sub("(%s)" % imgfile, term, 1)
 4135 
 4136     # TODO tex: escape ] on term. \], \rbrack{} and \verb!]! don't work :(
 4137     return term
 4138 
 4139 
 4140 def get_image_align(line):
 4141     "Return the image (first found) align for the given line"
 4142 
 4143     # First clear marks that can mess align detection
 4144     line = re.sub(SEPARATOR + "$", "", line)  # remove deflist sep
 4145     line = re.sub("^" + SEPARATOR, "", line)  # remove list sep
 4146     line = re.sub("^[\t]+", "", line)  # remove quote mark
 4147 
 4148     # Get image position on the line
 4149     m = regex["img"].search(line)
 4150     ini = m.start()
 4151     head = 0
 4152     end = m.end()
 4153     tail = len(line)
 4154 
 4155     # The align detection algorithm
 4156     if ini == head and end != tail:
 4157         align = "left"  # ^img + text$
 4158     elif ini != head and end == tail:
 4159         align = "right"  # ^text + img$
 4160     else:
 4161         align = "center"  # default align
 4162 
 4163     # Some special cases
 4164     if BLOCK.isblock("table"):
 4165         align = "center"  # ignore when table
 4166 
 4167     return align
 4168 
 4169 
 4170 def get_encoding_string(target):
 4171     return "utf8" if target == "tex" else "utf-8"
 4172 
 4173 
 4174 def process_source_file(file_="", noconf=0, contents=None):
 4175     """
 4176     Find and Join all the configuration available for a source file.
 4177     No sanity checking is done on this step.
 4178     It also extracts the source document parts into separate holders.
 4179 
 4180     The config scan order is:
 4181             1. The user configuration file (i.e. $HOME/.txt2tagsrc)
 4182             2. The source document's CONF area
 4183             3. The command line options
 4184 
 4185     The return data is a tuple of two items:
 4186             1. The parsed config dictionary
 4187             2. The document's parts, as a (head, conf, body) tuple
 4188 
 4189     All the conversion process will be based on the data and
 4190     configuration returned by this function.
 4191     The source file is read in this step only.
 4192     """
 4193     if contents:
 4194         source = SourceDocument(contents=contents)
 4195     else:
 4196         source = SourceDocument(file_)
 4197     head, conf, body = source.split()
 4198     Message("Source document contents stored", 2)
 4199     if not noconf:
 4200         # Read document config
 4201         source_raw = source.get_raw_config()
 4202         # Join all the config directives found, then parse it
 4203         full_raw = RC_RAW + source_raw + CMDLINE_RAW
 4204         Message("Parsing and saving all config found (%03d items)" % (len(full_raw)), 1)
 4205         full_parsed = ConfigMaster(full_raw).parse()
 4206         # Add manually the filename to the conf dic
 4207         if contents:
 4208             full_parsed["sourcefile"] = MODULEIN
 4209             full_parsed["infile"] = MODULEIN
 4210             full_parsed["outfile"] = MODULEOUT
 4211         else:
 4212             full_parsed["sourcefile"] = file_
 4213         Debug("Complete config: %s" % full_parsed, 1)
 4214     else:
 4215         full_parsed = {}
 4216     return full_parsed, (head, conf, body)
 4217 
 4218 
 4219 def convert_file(headers, body, config, first_body_lineno=1):
 4220     config = ConfigMaster().sanity(config)
 4221     # Compose the target file Headers
 4222     # TODO escape line before?
 4223     # TODO see exceptions by tex and mgp
 4224     Message("Composing target Headers", 1)
 4225     target_head = doHeader(headers, config)
 4226     # Parse the full marked body into tagged target
 4227 
 4228     Message("Composing target Body", 1)
 4229     target_body, marked_toc = convert(body, config, firstlinenr=first_body_lineno)
 4230 
 4231     # Compose the target file Footer
 4232     Message("Composing target Footer", 1)
 4233     target_foot = doFooter(config)
 4234 
 4235     # Make TOC (if needed)
 4236     Message("Composing target TOC", 1)
 4237     tagged_toc = toc_tagger(marked_toc, config)
 4238     target_toc = toc_formatter(tagged_toc, config)
 4239 
 4240     # Finally, we have our document
 4241     outlist = target_head + target_toc + target_body + target_foot
 4242     return finish_him(outlist, config)
 4243 
 4244 
 4245 def parse_images(line):
 4246     "Tag all images found"
 4247     while regex["img"].search(line) and TAGS["img"] != "[\a]":
 4248         txt = regex["img"].search(line).group(1)
 4249         tag = TAGS["img"]
 4250 
 4251         # If target supports image alignment, here we go
 4252         if rules["imgalignable"]:
 4253 
 4254             align = get_image_align(line)  # right
 4255             align_name = align.capitalize()  # Right
 4256 
 4257             # The align is a full tag, or part of the image tag (~A~)
 4258             if TAGS["imgAlign" + align_name]:
 4259                 tag = TAGS["imgAlign" + align_name]
 4260             else:
 4261                 align_tag = TAGS["_imgAlign" + align_name]
 4262                 tag = regex["_imgAlign"].sub(align_tag, tag, 1)
 4263 
 4264         if TARGET == "tex":
 4265             tag = re.sub(r"\\b", r"\\\\b", tag)
 4266             txt = txt.replace("_", "vvvvTexUndervvvv")
 4267 
 4268         # Ugly hack to avoid infinite loop when target's image tag contains []
 4269         tag = tag.replace("[", "vvvvEscapeSquareBracketvvvv")
 4270 
 4271         line = regex["img"].sub(tag, line, 1)
 4272         line = regex["x"].sub(txt, line, 1)
 4273     return line.replace("vvvvEscapeSquareBracketvvvv", "[")
 4274 
 4275 
 4276 def add_inline_tags(line):
 4277     # Beautifiers
 4278     for beauti, font in [
 4279         ("bold", "fontBold"),
 4280         ("italic", "fontItalic"),
 4281         ("underline", "fontUnderline"),
 4282         ("strike", "fontStrike"),
 4283     ]:
 4284         if regex[font].search(line):
 4285             line = beautify_me(beauti, font, line)
 4286 
 4287     line = parse_images(line)
 4288     return line
 4289 
 4290 
 4291 def get_include_contents(file_, path=""):
 4292     "Parses %!include: value and extract file contents"
 4293     ids = {"`": "verb", '"': "raw", "'": "tagged"}
 4294     id_ = "t2t"
 4295     # Set include type and remove identifier marks
 4296     mark = file_[0]
 4297     if mark in ids.keys():
 4298         if file_[:2] == file_[-2:] == mark * 2:
 4299             id_ = ids[mark]  # set type
 4300             file_ = file_[2:-2]  # remove marks
 4301     # Handle remote dir execution
 4302     filepath = os.path.join(path, file_)
 4303     # Read included file contents
 4304     lines = Readfile(filepath)
 4305     # Default txt2tags marked text, just BODY matters
 4306     if id_ == "t2t":
 4307         lines = get_file_body(filepath)
 4308         # TODO fix images relative path if file has a path, ie.:
 4309         # chapter1/index.t2t (wait until tree parsing)
 4310         # TODO for the images path fix, also respect outfile path,
 4311         # if different from infile (wait until tree parsing)
 4312         lines.insert(0, "%INCLUDED({}) starts here: {}".format(id_, file_))
 4313         # This appears when included hit EOF with verbatim area open
 4314         # lines.append('%%INCLUDED(%s) ends here: %s'%(id_,file_))
 4315     return id_, lines
 4316 
 4317 
 4318 def set_global_config(config):
 4319     global CONF, TAGS, regex, rules, TARGET
 4320     CONF = config
 4321     rules = getRules(CONF)
 4322     TAGS = getTags(CONF)
 4323     regex = getRegexes()
 4324     TARGET = config["target"]  # save for buggy functions that need global
 4325 
 4326 
 4327 def convert(bodylines, config, firstlinenr=1):
 4328     global BLOCK, TITLE
 4329 
 4330     set_global_config(config)
 4331 
 4332     target = config["target"]
 4333     BLOCK = BlockMaster()
 4334     MASK = MaskMaster()
 4335     TITLE = TitleMaster()
 4336 
 4337     ret = []
 4338     f_lastwasblank = 0
 4339 
 4340     # Compiling all PreProc regexes
 4341     pre_filter = compile_filters(CONF["preproc"], "Invalid PreProc filter regex")
 4342 
 4343     # Let's mark it up!
 4344     linenr = firstlinenr - 1
 4345     lineref = 0
 4346     while lineref < len(bodylines):
 4347         # Defaults
 4348         MASK.reset()
 4349         results_box = ""
 4350 
 4351         untouchedline = bodylines[lineref]
 4352 
 4353         line = re.sub("[\n\r]+$", "", untouchedline)  # del line break
 4354 
 4355         # Apply PreProc filters
 4356         if pre_filter:
 4357             errmsg = "Invalid PreProc filter replacement"
 4358             for rgx, repl in pre_filter:
 4359                 try:
 4360                     line = rgx.sub(repl, line)
 4361                 except Exception:
 4362                     Error("{}: '{}'".format(errmsg, repl))
 4363 
 4364         line = maskEscapeChar(line)  # protect \ char
 4365         linenr += 1
 4366         lineref += 1
 4367 
 4368         Debug(repr(line), 2, linenr)  # heavy debug: show each line
 4369 
 4370         # ------------------[ Comment Block ]------------------------
 4371 
 4372         # We're already on a comment block
 4373         if BLOCK.block() == "comment":
 4374 
 4375             # Closing comment
 4376             if regex["blockCommentClose"].search(line):
 4377                 ret.extend(BLOCK.blockout() or [])
 4378                 continue
 4379 
 4380             # Normal comment-inside line. Ignore it.
 4381             continue
 4382 
 4383         # Detecting comment block init
 4384         if (
 4385             regex["blockCommentOpen"].search(line)
 4386             and BLOCK.block() not in BLOCK.exclusive
 4387         ):
 4388             ret.extend(BLOCK.blockin("comment"))
 4389             continue
 4390 
 4391         # -------------------------[ Tagged Text ]----------------------
 4392 
 4393         # We're already on a tagged block
 4394         if BLOCK.block() == "tagged":
 4395 
 4396             # Closing tagged
 4397             if regex["blockTaggedClose"].search(line):
 4398                 ret.extend(BLOCK.blockout())
 4399                 continue
 4400 
 4401             # Normal tagged-inside line
 4402             BLOCK.holdadd(line)
 4403             continue
 4404 
 4405         # Detecting tagged block init
 4406         if (
 4407             regex["blockTaggedOpen"].search(line)
 4408             and BLOCK.block() not in BLOCK.exclusive
 4409         ):
 4410             ret.extend(BLOCK.blockin("tagged"))
 4411             continue
 4412 
 4413         # One line tagged text
 4414         if regex["1lineTagged"].search(line) and BLOCK.block() not in BLOCK.exclusive:
 4415             ret.extend(BLOCK.blockin("tagged"))
 4416             line = regex["1lineTagged"].sub("", line)
 4417             BLOCK.holdadd(line)
 4418             ret.extend(BLOCK.blockout())
 4419             continue
 4420 
 4421         # -------------------------[ Raw Text ]----------------------
 4422 
 4423         # We're already on a raw block
 4424         if BLOCK.block() == "raw":
 4425 
 4426             # Closing raw
 4427             if regex["blockRawClose"].search(line):
 4428                 ret.extend(BLOCK.blockout())
 4429                 continue
 4430 
 4431             # Normal raw-inside line
 4432             BLOCK.holdadd(line)
 4433             continue
 4434 
 4435         # Detecting raw block init
 4436         if regex["blockRawOpen"].search(line) and BLOCK.block() not in BLOCK.exclusive:
 4437             ret.extend(BLOCK.blockin("raw"))
 4438             continue
 4439 
 4440         # One line raw text
 4441         if regex["1lineRaw"].search(line) and BLOCK.block() not in BLOCK.exclusive:
 4442             ret.extend(BLOCK.blockin("raw"))
 4443             line = regex["1lineRaw"].sub("", line)
 4444             BLOCK.holdadd(line)
 4445             ret.extend(BLOCK.blockout())
 4446             continue
 4447 
 4448         # ------------------------[ Verbatim  ]----------------------
 4449 
 4450         # TIP We'll never support beautifiers inside verbatim
 4451 
 4452         # Closing table mapped to verb
 4453         if (
 4454             BLOCK.block() == "verb"
 4455             and BLOCK.prop("mapped") == "table"
 4456             and not regex["table"].search(line)
 4457         ):
 4458             ret.extend(BLOCK.blockout())
 4459 
 4460         # We're already on a verb block
 4461         if BLOCK.block() == "verb":
 4462 
 4463             # Closing verb
 4464             if regex["blockVerbClose"].search(line):
 4465                 ret.extend(BLOCK.blockout())
 4466                 continue
 4467 
 4468             # Normal verb-inside line
 4469             BLOCK.holdadd(line)
 4470             continue
 4471 
 4472         # Detecting verb block init
 4473         if regex["blockVerbOpen"].search(line) and BLOCK.block() not in BLOCK.exclusive:
 4474             ret.extend(BLOCK.blockin("verb"))
 4475             f_lastwasblank = 0
 4476             continue
 4477 
 4478         # One line verb-formatted text
 4479         if regex["1lineVerb"].search(line) and BLOCK.block() not in BLOCK.exclusive:
 4480             ret.extend(BLOCK.blockin("verb"))
 4481             line = regex["1lineVerb"].sub("", line)
 4482             BLOCK.holdadd(line)
 4483             ret.extend(BLOCK.blockout())
 4484             f_lastwasblank = 0
 4485             continue
 4486 
 4487         # Tables are mapped to verb when target is not table-aware
 4488         if not rules["tableable"] and regex["table"].search(line):
 4489             if not BLOCK.isblock("verb"):
 4490                 ret.extend(BLOCK.blockin("verb"))
 4491                 BLOCK.propset("mapped", "table")
 4492                 BLOCK.holdadd(line)
 4493                 continue
 4494 
 4495         # ---------------------[ blank lines ]-----------------------
 4496 
 4497         if regex["blankline"].search(line):
 4498 
 4499             # Close open paragraph
 4500             if BLOCK.isblock("para"):
 4501                 ret.extend(BLOCK.blockout())
 4502                 f_lastwasblank = 1
 4503                 continue
 4504 
 4505             # Close all open tables
 4506             if BLOCK.isblock("table"):
 4507                 ret.extend(BLOCK.blockout())
 4508                 f_lastwasblank = 1
 4509                 continue
 4510 
 4511             # Close all open quotes
 4512             while BLOCK.isblock("quote"):
 4513                 ret.extend(BLOCK.blockout())
 4514 
 4515             # Closing all open lists
 4516             if f_lastwasblank:  # 2nd consecutive blank
 4517                 if BLOCK.block().endswith("list"):
 4518                     BLOCK.holdaddsub("")  # helps parser
 4519                 while BLOCK.depth:  # closes list (if any)
 4520                     ret.extend(BLOCK.blockout())
 4521                 continue  # ignore consecutive blanks
 4522 
 4523             # Paragraph (if any) is wanted inside lists also
 4524             if BLOCK.block().endswith("list"):
 4525                 BLOCK.holdaddsub("")
 4526 
 4527             f_lastwasblank = 1
 4528             continue
 4529 
 4530         # ---------------------[ special ]---------------------------
 4531 
 4532         if regex["special"].search(line):
 4533 
 4534             targ, key, val = ConfigLines().parse_line(line, None, target)
 4535 
 4536             if key:
 4537                 Debug("Found config '{}', value '{}'".format(key, val), 1, linenr)
 4538             else:
 4539                 Debug("Bogus Special Line", 1, linenr)
 4540 
 4541             # %!include command
 4542             if key == "include":
 4543                 incpath = os.path.dirname(CONF["sourcefile"])
 4544                 incfile = val
 4545                 err = "A file cannot include itself (loop!)"
 4546                 if CONF["sourcefile"] == incfile:
 4547                     Error("{}: {}".format(err, incfile))
 4548                 inctype, inclines = get_include_contents(incfile, incpath)
 4549 
 4550                 # Verb, raw and tagged are easy
 4551                 if inctype != "t2t":
 4552                     ret.extend(BLOCK.blockin(inctype))
 4553                     BLOCK.holdextend(inclines)
 4554                     ret.extend(BLOCK.blockout())
 4555                 else:
 4556                     # Insert include lines into body
 4557                     # TODO include maxdepth limit
 4558                     bodylines = bodylines[:lineref] + inclines + bodylines[lineref:]
 4559 
 4560                 # This line is done, go to next
 4561                 continue
 4562 
 4563         # ---------------------[ Comments ]--------------------------
 4564 
 4565         # Just skip them
 4566         if regex["comment"].search(line):
 4567             continue
 4568 
 4569         # ---------------------[ Triggers ]--------------------------
 4570 
 4571         # Valid line, reset blank status
 4572         f_lastwasblank = 0
 4573 
 4574         # Any NOT quote line closes all open quotes
 4575         if BLOCK.isblock("quote") and not regex["quote"].search(line):
 4576             while BLOCK.isblock("quote"):
 4577                 ret.extend(BLOCK.blockout())
 4578 
 4579         # Any NOT table line closes an open table
 4580         if BLOCK.isblock("table") and not regex["table"].search(line):
 4581             ret.extend(BLOCK.blockout())
 4582 
 4583         # ---------------------[ Horizontal Bar ]--------------------
 4584 
 4585         if regex["bar"].search(line):
 4586 
 4587             # Bars inside quotes are handled on the Quote processing
 4588             # Otherwise we parse the bars right here
 4589             #
 4590             if not (BLOCK.isblock("quote") or regex["quote"].search(line)) or (
 4591                 BLOCK.isblock("quote") and not rules["barinsidequote"]
 4592             ):
 4593 
 4594                 # Close all the opened blocks
 4595                 ret.extend(BLOCK.blockin("bar"))
 4596 
 4597                 # Extract the bar chars (- or =)
 4598                 m = regex["bar"].search(line)
 4599                 bar_chars = m.group(2)
 4600 
 4601                 # Process and dump the tagged bar
 4602                 BLOCK.holdadd(bar_chars)
 4603                 ret.extend(BLOCK.blockout())
 4604                 Debug("BAR: %s" % line, 6)
 4605 
 4606                 # We're done, nothing more to process
 4607                 continue
 4608 
 4609         # ---------------------[ Title ]-----------------------------
 4610 
 4611         if (
 4612             regex["title"].search(line) or regex["numtitle"].search(line)
 4613         ) and not BLOCK.block().endswith("list"):
 4614 
 4615             if regex["title"].search(line):
 4616                 name = "title"
 4617             else:
 4618                 name = "numtitle"
 4619 
 4620             # Close all the opened blocks
 4621             ret.extend(BLOCK.blockin(name))
 4622 
 4623             # Process title
 4624             TITLE.add(line)
 4625             ret.extend(BLOCK.blockout())
 4626 
 4627             # We're done, nothing more to process
 4628             continue
 4629 
 4630         # ---------------------[ apply masks ]-----------------------
 4631 
 4632         line = MASK.mask(line)
 4633 
 4634         # XXX from here, only block-inside lines will pass
 4635 
 4636         # ---------------------[ Quote ]-----------------------------
 4637 
 4638         if regex["quote"].search(line):
 4639 
 4640             # Store number of leading TABS
 4641             quotedepth = len(regex["quote"].search(line).group(0))
 4642 
 4643             # SGML doesn't support nested quotes
 4644             if rules["quotenotnested"]:
 4645                 quotedepth = 1
 4646 
 4647             # Don't cross depth limit
 4648             maxdepth = rules["quotemaxdepth"]
 4649             if maxdepth and quotedepth > maxdepth:
 4650                 quotedepth = maxdepth
 4651 
 4652             # New quote
 4653             if not BLOCK.isblock("quote"):
 4654                 ret.extend(BLOCK.blockin("quote"))
 4655 
 4656             # New subquotes
 4657             while BLOCK.depth < quotedepth:
 4658                 BLOCK.blockin("quote")
 4659 
 4660             # Closing quotes
 4661             while quotedepth < BLOCK.depth:
 4662                 ret.extend(BLOCK.blockout())
 4663 
 4664             # Bar inside quote
 4665             if regex["bar"].search(line) and rules["barinsidequote"]:
 4666                 tempBlock = BlockMaster()
 4667                 tagged_bar = []
 4668                 tagged_bar.extend(tempBlock.blockin("bar"))
 4669                 tempBlock.holdadd(line)
 4670                 tagged_bar.extend(tempBlock.blockout())
 4671                 BLOCK.holdextend(tagged_bar)
 4672                 continue
 4673 
 4674         # ---------------------[ Lists ]-----------------------------
 4675 
 4676         # An empty item also closes the current list
 4677         if BLOCK.block().endswith("list"):
 4678             m = regex["listclose"].match(line)
 4679             if m:
 4680                 listindent = m.group(1)
 4681                 listtype = m.group(2)
 4682                 currlisttype = BLOCK.prop("type")
 4683                 currlistindent = BLOCK.prop("indent")
 4684                 if listindent == currlistindent and listtype == currlisttype:
 4685                     ret.extend(BLOCK.blockout())
 4686                     continue
 4687 
 4688         if (
 4689             regex["list"].search(line)
 4690             or regex["numlist"].search(line)
 4691             or regex["deflist"].search(line)
 4692         ):
 4693 
 4694             listindent = BLOCK.prop("indent")
 4695             listids = "".join(LISTNAMES.keys())
 4696             m = re.match("^( *)([%s]) " % re.escape(listids), line)
 4697             listitemindent = m.group(1)
 4698             listtype = m.group(2)
 4699             listname = LISTNAMES[listtype]
 4700             results_box = BLOCK.holdadd
 4701 
 4702             # Del list ID (and separate term from definition)
 4703             if listname == "deflist":
 4704                 term = parse_deflist_term(line)
 4705                 line = regex["deflist"].sub(SEPARATOR + term + SEPARATOR, line)
 4706             else:
 4707                 line = regex[listname].sub(SEPARATOR, line)
 4708 
 4709             # Don't cross depth limit
 4710             maxdepth = rules["listmaxdepth"]
 4711             if maxdepth and BLOCK.depth == maxdepth:
 4712                 if len(listitemindent) > len(listindent):
 4713                     listitemindent = listindent
 4714 
 4715             # List bumping (same indent, diff mark)
 4716             # Close the currently open list to clear the mess
 4717             if (
 4718                 BLOCK.block().endswith("list")
 4719                 and listname != BLOCK.block()
 4720                 and len(listitemindent) == len(listindent)
 4721             ):
 4722                 ret.extend(BLOCK.blockout())
 4723                 listindent = BLOCK.prop("indent")
 4724 
 4725             # Open mother list or sublist
 4726             if not BLOCK.block().endswith("list") or len(listitemindent) > len(
 4727                 listindent
 4728             ):
 4729                 ret.extend(BLOCK.blockin(listname))
 4730                 BLOCK.propset("indent", listitemindent)
 4731                 BLOCK.propset("type", listtype)
 4732 
 4733             # Closing sublists
 4734             while len(listitemindent) < len(BLOCK.prop("indent")):
 4735                 ret.extend(BLOCK.blockout())
 4736 
 4737             # O-oh, sublist before list ("\n\n  - foo\n- foo")
 4738             # Fix: close sublist (as mother), open another list
 4739             if not BLOCK.block().endswith("list"):
 4740                 ret.extend(BLOCK.blockin(listname))
 4741                 BLOCK.propset("indent", listitemindent)
 4742                 BLOCK.propset("type", listtype)
 4743 
 4744         # ---------------------[ Table ]-----------------------------
 4745 
 4746         # TODO escape undesired format inside table
 4747         if regex["table"].search(line):
 4748 
 4749             if not BLOCK.isblock("table"):  # first table line!
 4750                 ret.extend(BLOCK.blockin("table"))
 4751                 BLOCK.tableparser.__init__(line)
 4752 
 4753             tablerow = TableMaster().parse_row(line)
 4754             BLOCK.tableparser.add_row(tablerow)  # save config
 4755 
 4756             # Maintain line to unmask and inlines
 4757             # XXX Bug: | **bo | ld** | turns **bo\x01ld** and gets converted :(
 4758             # TODO isolate unmask+inlines parsing to use here
 4759             line = SEPARATOR.join(tablerow["cells"])
 4760 
 4761         # ---------------------[ Paragraph ]-------------------------
 4762 
 4763         if not BLOCK.block():  # new para!
 4764             ret.extend(BLOCK.blockin("para"))
 4765 
 4766         ############################################################
 4767         ############################################################
 4768         ############################################################
 4769 
 4770         # ---------------------[ Final Parses ]----------------------
 4771 
 4772         # The target-specific special char escapes for body lines
 4773         line = doEscape(target, line)
 4774 
 4775         line = add_inline_tags(line)
 4776         line = MASK.undo(line)
 4777 
 4778         # ---------------------[ Hold or Return? ]-------------------
 4779 
 4780         # Now we must choose where to put the parsed line
 4781         #
 4782         if not results_box:
 4783             # List item extra lines
 4784             if BLOCK.block().endswith("list"):
 4785                 results_box = BLOCK.holdaddsub
 4786             # Other blocks
 4787             elif BLOCK.block():
 4788                 results_box = BLOCK.holdadd
 4789             # No blocks
 4790             else:
 4791                 line = doFinalEscape(target, line)
 4792                 results_box = ret.append
 4793 
 4794         results_box(line)
 4795 
 4796     # EOF: close any open para/verb/lists/table/quotes
 4797     Debug("EOF", 7)
 4798     while BLOCK.block():
 4799         ret.extend(BLOCK.blockout())
 4800 
 4801     # Maybe close some opened title area?
 4802     if rules["titleblocks"]:
 4803         ret.extend(TITLE.close_all())
 4804 
 4805     # Maybe a major tag to enclose body? (like DIV for CSS)
 4806     if TAGS["bodyOpen"]:
 4807         ret.insert(0, TAGS["bodyOpen"])
 4808     if TAGS["bodyClose"]:
 4809         ret.append(TAGS["bodyClose"])
 4810 
 4811     marked_toc = TITLE.dump_marked_toc()
 4812 
 4813     return ret, marked_toc
 4814 
 4815 
 4816 def exec_command_line(user_cmdline=None):
 4817     global CMDLINE_RAW, RC_RAW, DEBUG, VERBOSE, QUIET, Error
 4818 
 4819     # Extract command line data
 4820     cmdline_data = user_cmdline or sys.argv[1:]
 4821     CMDLINE_RAW = CommandLine().get_raw_config(cmdline_data, relative=True)
 4822     cmdline_parsed = ConfigMaster(CMDLINE_RAW).parse()
 4823     DEBUG = cmdline_parsed.get("debug") or 0
 4824     VERBOSE = cmdline_parsed.get("verbose") or 0
 4825     QUIET = cmdline_parsed.get("quiet") or 0
 4826     infiles = cmdline_parsed.get("infile") or []
 4827 
 4828     Message("Processing begins", 1)
 4829 
 4830     # The easy ones
 4831     if cmdline_parsed.get("help"):
 4832         Quit(USAGE)
 4833     if cmdline_parsed.get("version"):
 4834         Quit(VERSIONSTR)
 4835     if cmdline_parsed.get("targets"):
 4836         listTargets()
 4837         Quit()
 4838 
 4839     Debug("system platform: %s" % sys.platform)
 4840     Debug("python version: %s" % (sys.version.split("(")[0]))
 4841     Debug("command line: %s" % sys.argv)
 4842     Debug("command line raw config: %s" % CMDLINE_RAW, 1)
 4843 
 4844     # Extract RC file config
 4845     if cmdline_parsed.get("rc") == 0:
 4846         Message("Ignoring user configuration file", 1)
 4847     else:
 4848         rc_file = get_rc_path()
 4849         if os.path.isfile(rc_file):
 4850             Message("Loading user configuration file", 1)
 4851             RC_RAW = ConfigLines(file_=rc_file).get_raw_config()
 4852 
 4853         Debug("rc file: %s" % rc_file)
 4854         Debug("rc file raw config: %s" % RC_RAW, 1)
 4855 
 4856     # TODO#1: this checking should be only in ConfigMaster.sanity()
 4857     if len(infiles) == 1:
 4858         infile = infiles[0]
 4859     else:
 4860         Error(
 4861             "Pass exactly one input file (see --help). "
 4862             "Example: {} -t html file.t2t".format(my_name)
 4863         )
 4864 
 4865     config, doc = process_source_file(infile)
 4866     headers, config_source, body = doc
 4867 
 4868     first_body_lineno = (len(headers) or 1) + len(config_source) + 1
 4869     convert_file(headers, body, config, first_body_lineno=first_body_lineno)
 4870 
 4871     Message("Txt2tags finished successfully", 1)
 4872 
 4873 
 4874 if __name__ == "__main__":
 4875     try:
 4876         exec_command_line()
 4877     except error as msg:
 4878         sys.exit(msg)
 4879     except Exception:
 4880         sys.exit(getUnknownErrorMessage())
 4881     else:
 4882         Quit()