"Fossies" - the Fresh Open Source Software Archive

Member "asciidoctor-2.0.10/lib/asciidoctor/parser.rb" (1 Jun 2019, 116069 Bytes) of package /linux/www/asciidoctor-2.0.10.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Ruby source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "parser.rb": 2.0.8_vs_2.0.9.

    1 # frozen_string_literal: true
    2 module Asciidoctor
    3 # Internal: Methods to parse lines of AsciiDoc into an object hierarchy
    4 # representing the structure of the document. All methods are class methods and
    5 # should be invoked from the Parser class. The main entry point is ::next_block.
    6 # No Parser instances shall be discovered running around. (Any attempt to
    7 # instantiate a Parser will be futile).
    8 #
    9 # The object hierarchy created by the Parser consists of zero or more Section
   10 # and Block objects. Section objects may be nested and a Section object
   11 # contains zero or more Block objects. Block objects may be nested, but may
   12 # only contain other Block objects. Block objects which represent lists may
   13 # contain zero or more ListItem objects.
   14 #
   15 # Examples
   16 #
   17 #   # Create a Reader for the AsciiDoc lines and retrieve the next block from it.
   18 #   # Parser.next_block requires a parent, so we begin by instantiating an empty Document.
   19 #
   20 #   doc = Document.new
   21 #   reader = Reader.new lines
   22 #   block = Parser.next_block(reader, doc)
   23 #   block.class
   24 #   # => Asciidoctor::Block
   25 class Parser
   26   include Logging
   27 
   28   BlockMatchData = Struct.new :context, :masq, :tip, :terminator
   29 
   30   # String for matching tab character
   31   TAB = ?\t
   32 
   33   # Regexp for leading tab indentation
   34   TabIndentRx = /^\t+/
   35 
   36   StartOfBlockProc = proc {|l| ((l.start_with? '[') && (BlockAttributeLineRx.match? l)) || (is_delimited_block? l) }
   37 
   38   StartOfListProc = proc {|l| AnyListRx.match? l }
   39 
   40   StartOfBlockOrListProc = proc {|l| (is_delimited_block? l) || ((l.start_with? '[') && (BlockAttributeLineRx.match? l)) || (AnyListRx.match? l) }
   41 
   42   NoOp = nil
   43 
   44   AuthorKeys = ['author', 'authorinitials', 'firstname', 'middlename', 'lastname', 'email']
   45 
   46   # Internal: A Hash mapping horizontal alignment abbreviations to alignments
   47   # that can be applied to a table cell (or to all cells in a column)
   48   TableCellHorzAlignments = {
   49     '<' => 'left',
   50     '>' => 'right',
   51     '^' => 'center'
   52   }
   53 
   54   # Internal: A Hash mapping vertical alignment abbreviations to alignments
   55   # that can be applied to a table cell (or to all cells in a column)
   56   TableCellVertAlignments = {
   57     '<' => 'top',
   58     '>' => 'bottom',
   59     '^' => 'middle'
   60   }
   61 
   62   # Internal: A Hash mapping styles abbreviations to styles that can be applied
   63   # to a table cell (or to all cells in a column)
   64   TableCellStyles = {
   65     'd' => :none,
   66     's' => :strong,
   67     'e' => :emphasis,
   68     'm' => :monospaced,
   69     'h' => :header,
   70     'l' => :literal,
   71     'a' => :asciidoc
   72   }
   73 
   74   # Hide the default constructor to make sure this class doesn't get instantiated.
   75   #
   76   # Raises NoMethodError if an attempt is made to invoke the constructor.
   77   private_class_method :new
   78 
   79   # Public: Parses AsciiDoc source read from the Reader into the Document
   80   #
   81   # This method is the main entry-point into the Parser when parsing a full document.
   82   # It first looks for and, if found, processes the document title. It then
   83   # proceeds to iterate through the lines in the Reader, parsing the document
   84   # into nested Sections and Blocks.
   85   #
   86   # reader   - the Reader holding the source lines of the document
   87   # document - the empty Document into which the lines will be parsed
   88   # options  - a Hash of options to control processing
   89   #
   90   # returns the Document object
   91   def self.parse(reader, document, options = {})
   92     block_attributes = parse_document_header(reader, document)
   93 
   94     # NOTE don't use a postfix conditional here as it's known to confuse JRuby in certain circumstances
   95     unless options[:header_only]
   96       while reader.has_more_lines?
   97         new_section, block_attributes = next_section(reader, document, block_attributes)
   98         if new_section
   99           document.assign_numeral new_section
  100           document.blocks << new_section
  101         end
  102       end
  103     end
  104 
  105     document
  106   end
  107 
  108   # Public: Parses the document header of the AsciiDoc source read from the Reader
  109   #
  110   # Reads the AsciiDoc source from the Reader until the end of the document
  111   # header is reached. The Document object is populated with information from
  112   # the header (document title, document attributes, etc). The document
  113   # attributes are then saved to establish a save point to which to rollback
  114   # after parsing is complete.
  115   #
  116   # This method assumes that there are no blank lines at the start of the document,
  117   # which are automatically removed by the reader.
  118   #
  119   # returns the Hash of orphan block attributes captured above the header
  120   def self.parse_document_header(reader, document)
  121     # capture lines of block-level metadata and plow away comment lines that precede first block
  122     block_attrs = parse_block_metadata_lines reader, document
  123     doc_attrs = document.attributes
  124 
  125     # special case, block title is not allowed above document title,
  126     # carry attributes over to the document body
  127     if (implicit_doctitle = is_next_line_doctitle? reader, block_attrs, doc_attrs['leveloffset']) && block_attrs['title']
  128       return document.finalize_header block_attrs, false
  129     end
  130 
  131     # yep, document title logic in AsciiDoc is just insanity
  132     # definitely an area for spec refinement
  133 
  134     unless (val = doc_attrs['doctitle']).nil_or_empty?
  135       document.title = doctitle_attr_val = val
  136     end
  137 
  138     # if the first line is the document title, add a header to the document and parse the header metadata
  139     if implicit_doctitle
  140       source_location = reader.cursor if document.sourcemap
  141       document.id, _, l0_section_title, _, atx = parse_section_title reader, document
  142       if doctitle_attr_val
  143         # NOTE doctitle attribute (set above or below implicit doctitle) overrides implicit doctitle
  144         l0_section_title = nil
  145       else
  146         document.title = l0_section_title
  147         doc_attrs['doctitle'] = doctitle_attr_val = document.apply_header_subs l0_section_title
  148       end
  149       document.header.source_location = source_location if source_location
  150       # default to compat-mode if document has setext doctitle
  151       doc_attrs['compat-mode'] = '' unless atx || (document.attribute_locked? 'compat-mode')
  152       if (separator = block_attrs['separator'])
  153         doc_attrs['title-separator'] = separator unless document.attribute_locked? 'title-separator'
  154       end
  155       if (doc_id = block_attrs['id'])
  156         document.id = doc_id
  157       else
  158         doc_id = document.id
  159       end
  160       if (role = block_attrs['role'])
  161         doc_attrs['role'] = role
  162       end
  163       if (reftext = block_attrs['reftext'])
  164         doc_attrs['reftext'] = reftext
  165       end
  166       block_attrs.clear
  167       (modified_attrs = document.instance_variable_get :@attributes_modified).delete 'doctitle'
  168       parse_header_metadata reader, document
  169       if modified_attrs.include? 'doctitle'
  170         if (val = doc_attrs['doctitle']).nil_or_empty? || val == doctitle_attr_val
  171           doc_attrs['doctitle'] = doctitle_attr_val
  172         else
  173           document.title = val
  174         end
  175       elsif !l0_section_title
  176         modified_attrs << 'doctitle'
  177       end
  178       document.register :refs, [doc_id, document] if doc_id
  179     end
  180 
  181     # parse title and consume name section of manpage document
  182     parse_manpage_header reader, document, block_attrs if document.doctype == 'manpage'
  183 
  184     # NOTE block_attrs are the block-level attributes (not document attributes) that
  185     # precede the first line of content (document title, first section or first block)
  186     document.finalize_header block_attrs
  187   end
  188 
  189   # Public: Parses the manpage header of the AsciiDoc source read from the Reader
  190   #
  191   # returns Nothing
  192   def self.parse_manpage_header(reader, document, block_attributes)
  193     if ManpageTitleVolnumRx =~ (doc_attrs = document.attributes)['doctitle']
  194       doc_attrs['manvolnum'] = manvolnum = $2
  195       doc_attrs['mantitle'] = (((mantitle = $1).include? ATTR_REF_HEAD) ? (document.sub_attributes mantitle) : mantitle).downcase
  196     else
  197       logger.error message_with_context 'non-conforming manpage title', source_location: (reader.cursor_at_line 1)
  198       # provide sensible fallbacks
  199       doc_attrs['mantitle'] = doc_attrs['doctitle'] || doc_attrs['docname'] || 'command'
  200       doc_attrs['manvolnum'] = manvolnum = '1'
  201     end
  202     if (manname = doc_attrs['manname']) && doc_attrs['manpurpose']
  203       doc_attrs['manname-title'] ||= 'Name'
  204       doc_attrs['mannames'] = [manname]
  205       if document.backend == 'manpage'
  206         doc_attrs['docname'] = manname
  207         doc_attrs['outfilesuffix'] = %(.#{manvolnum})
  208       end
  209     else
  210       reader.skip_blank_lines
  211       reader.save
  212       block_attributes.update parse_block_metadata_lines reader, document
  213       if (name_section_level = is_next_line_section? reader, {})
  214         if name_section_level == 1
  215           name_section = initialize_section reader, document, {}
  216           name_section_buffer = (reader.read_lines_until break_on_blank_lines: true, skip_line_comments: true).map {|l| l.lstrip }.join ' '
  217           if ManpageNamePurposeRx =~ name_section_buffer
  218             doc_attrs['manname-title'] ||= name_section.title
  219             doc_attrs['manname-id'] = name_section.id if name_section.id
  220             doc_attrs['manpurpose'] = $2
  221             if (manname = $1).include? ATTR_REF_HEAD
  222               manname = document.sub_attributes manname
  223             end
  224             if manname.include? ','
  225               manname = (mannames = (manname.split ',').map {|n| n.lstrip })[0]
  226             else
  227               mannames = [manname]
  228             end
  229             doc_attrs['manname'] = manname
  230             doc_attrs['mannames'] = mannames
  231             if document.backend == 'manpage'
  232               doc_attrs['docname'] = manname
  233               doc_attrs['outfilesuffix'] = %(.#{manvolnum})
  234             end
  235           else
  236             error_msg = 'non-conforming name section body'
  237           end
  238         else
  239           error_msg = 'name section must be at level 1'
  240         end
  241       else
  242         error_msg = 'name section expected'
  243       end
  244       if error_msg
  245         reader.restore_save
  246         logger.error message_with_context error_msg, source_location: reader.cursor
  247         doc_attrs['manname'] = manname = doc_attrs['docname'] || 'command'
  248         doc_attrs['mannames'] = [manname]
  249         if document.backend == 'manpage'
  250           doc_attrs['docname'] = manname
  251           doc_attrs['outfilesuffix'] = %(.#{manvolnum})
  252         end
  253       else
  254         reader.discard_save
  255       end
  256     end
  257     nil
  258   end
  259 
  260   # Public: Return the next section from the Reader.
  261   #
  262   # This method process block metadata, content and subsections for this
  263   # section and returns the Section object and any orphaned attributes.
  264   #
  265   # If the parent is a Document and has a header (document title), then
  266   # this method will put any non-section blocks at the start of document
  267   # into a preamble Block. If there are no such blocks, the preamble is
  268   # dropped.
  269   #
  270   # Since we are reading line-by-line, there's a chance that metadata
  271   # that should be associated with the following block gets consumed.
  272   # To deal with this case, the method returns a running Hash of
  273   # "orphaned" attributes that get passed to the next Section or Block.
  274   #
  275   # reader     - the source Reader
  276   # parent     - the parent Section or Document of this new section
  277   # attributes - a Hash of metadata that was left orphaned from the
  278   #              previous Section.
  279   #
  280   # Examples
  281   #
  282   #   source
  283   #   # => "= Greetings\n\nThis is my doc.\n\n== Salutations\n\nIt is awesome."
  284   #
  285   #   reader = Reader.new source, nil, normalize: true
  286   #   # create empty document to parent the section
  287   #   # and hold attributes extracted from header
  288   #   doc = Document.new
  289   #
  290   #   Parser.next_section(reader, doc)[0].title
  291   #   # => "Greetings"
  292   #
  293   #   Parser.next_section(reader, doc)[0].title
  294   #   # => "Salutations"
  295   #
  296   # returns a two-element Array containing the Section and Hash of orphaned attributes
  297   def self.next_section reader, parent, attributes = {}
  298     preamble = intro = part = false
  299 
  300     # check if we are at the start of processing the document
  301     # NOTE we could drop a hint in the attributes to indicate
  302     # that we are at a section title (so we don't have to check)
  303     if parent.context == :document && parent.blocks.empty? && ((has_header = parent.header?) ||
  304         (attributes.delete 'invalid-header') || !(is_next_line_section? reader, attributes))
  305       book = (document = parent).doctype == 'book'
  306       if has_header || (book && attributes[1] != 'abstract')
  307         preamble = intro = Block.new parent, :preamble, content_model: :compound
  308         preamble.title = parent.attr 'preface-title' if book && (parent.attr? 'preface-title')
  309         parent.blocks << preamble
  310       end
  311       section = parent
  312       current_level = 0
  313       if parent.attributes.key? 'fragment'
  314         expected_next_level = -1
  315       # small tweak to allow subsequent level-0 sections for book doctype
  316       elsif book
  317         expected_next_level, expected_next_level_alt = 1, 0
  318       else
  319         expected_next_level = 1
  320       end
  321     else
  322       book = (document = parent.document).doctype == 'book'
  323       section = initialize_section reader, parent, attributes
  324       # clear attributes except for title attribute, which must be carried over to next content block
  325       attributes = (title = attributes['title']) ? { 'title' => title } : {}
  326       expected_next_level = (current_level = section.level) + 1
  327       if current_level == 0
  328         part = book
  329       elsif current_level == 1 && section.special
  330         # NOTE technically preface and abstract sections are only permitted in the book doctype
  331         unless (sectname = section.sectname) == 'appendix' || sectname == 'preface' || sectname == 'abstract'
  332           expected_next_level = nil
  333         end
  334       end
  335     end
  336 
  337     reader.skip_blank_lines
  338 
  339     # Parse lines belonging to this section and its subsections until we
  340     # reach the end of this section level
  341     #
  342     # 1. first look for metadata thingies (anchor, attribute list, block title line, etc)
  343     # 2. then look for a section, recurse if found
  344     # 3. then process blocks
  345     #
  346     # We have to parse all the metadata lines before continuing with the loop,
  347     # otherwise subsequent metadata lines get interpreted as block content
  348     while reader.has_more_lines?
  349       parse_block_metadata_lines reader, document, attributes
  350       if (next_level = is_next_line_section?(reader, attributes))
  351         if document.attr? 'leveloffset'
  352           next_level += (document.attr 'leveloffset').to_i
  353           next_level = 0 if next_level < 0
  354         end
  355         if next_level > current_level
  356           if expected_next_level
  357             unless next_level == expected_next_level || (expected_next_level_alt && next_level == expected_next_level_alt) || expected_next_level < 0
  358               expected_condition = expected_next_level_alt ? %(expected levels #{expected_next_level_alt} or #{expected_next_level}) : %(expected level #{expected_next_level})
  359               logger.warn message_with_context %(section title out of sequence: #{expected_condition}, got level #{next_level}), source_location: reader.cursor
  360             end
  361           else
  362             logger.error message_with_context %(#{sectname} sections do not support nested sections), source_location: reader.cursor
  363           end
  364           new_section, attributes = next_section reader, section, attributes
  365           section.assign_numeral new_section
  366           section.blocks << new_section
  367         elsif next_level == 0 && section == document
  368           logger.error message_with_context 'level 0 sections can only be used when doctype is book', source_location: reader.cursor unless book
  369           new_section, attributes = next_section reader, section, attributes
  370           section.assign_numeral new_section
  371           section.blocks << new_section
  372         else
  373           # close this section (and break out of the nesting) to begin a new one
  374           break
  375         end
  376       else
  377         # just take one block or else we run the risk of overrunning section boundaries
  378         block_cursor = reader.cursor
  379         if (new_block = next_block reader, intro || section, attributes, parse_metadata: false)
  380           # REVIEW this may be doing too much
  381           if part
  382             if !section.blocks?
  383               # if this block wasn't marked as [partintro], emulate behavior as if it had
  384               if new_block.style != 'partintro'
  385                 # emulate [partintro] paragraph
  386                 if new_block.context == :paragraph
  387                   new_block.context = :open
  388                   new_block.style = 'partintro'
  389                 # emulate [partintro] open block
  390                 else
  391                   new_block.parent = (intro = Block.new section, :open, content_model: :compound)
  392                   intro.style = 'partintro'
  393                   section.blocks << intro
  394                 end
  395               end
  396             elsif section.blocks.size == 1
  397               first_block = section.blocks[0]
  398               # open the [partintro] open block for appending
  399               if !intro && first_block.content_model == :compound
  400                 logger.error message_with_context 'illegal block content outside of partintro block', source_location: block_cursor
  401               # rebuild [partintro] paragraph as an open block
  402               elsif first_block.content_model != :compound
  403                 new_block.parent = (intro = Block.new section, :open, content_model: :compound)
  404                 intro.style = 'partintro'
  405                 section.blocks.shift
  406                 if first_block.style == 'partintro'
  407                   first_block.context = :paragraph
  408                   first_block.style = nil
  409                 end
  410                 intro << first_block
  411                 section.blocks << intro
  412               end
  413             end
  414           end
  415 
  416           (intro || section).blocks << new_block
  417           attributes.clear
  418         #else
  419         #  # don't clear attributes if we don't find a block because they may
  420         #  # be trailing attributes that didn't get associated with a block
  421         end
  422       end
  423 
  424       reader.skip_blank_lines || break
  425     end
  426 
  427     if part
  428       unless section.blocks? && section.blocks[-1].context == :section
  429         logger.error message_with_context 'invalid part, must have at least one section (e.g., chapter, appendix, etc.)', source_location: reader.cursor
  430       end
  431     # NOTE we could try to avoid creating a preamble in the first place, though
  432     # that would require reworking assumptions in next_section since the preamble
  433     # is treated like an untitled section
  434     elsif preamble # implies parent == document
  435       if preamble.blocks?
  436         # unwrap standalone preamble (i.e., document has no sections) except for books, if permissible
  437         unless book || document.blocks[1] || !Compliance.unwrap_standalone_preamble
  438           document.blocks.shift
  439           while (child_block = preamble.blocks.shift)
  440             document << child_block
  441           end
  442         end
  443       # drop the preamble if it has no content
  444       else
  445         document.blocks.shift
  446       end
  447     end
  448 
  449     # The attributes returned here are orphaned attributes that fall at the end
  450     # of a section that need to get transfered to the next section
  451     # see "trailing block attributes transfer to the following section" in
  452     # test/attributes_test.rb for an example
  453     [section != parent ? section : nil, attributes.merge]
  454   end
  455 
  456   # Public: Parse and return the next Block at the Reader's current location
  457   #
  458   # This method begins by skipping over blank lines to find the start of the
  459   # next block (paragraph, block macro, or delimited block). If a block is
  460   # found, that block is parsed, initialized as a Block object, and returned.
  461   # Otherwise, the method returns nothing.
  462   #
  463   # Regular expressions from the Asciidoctor module are used to match block
  464   # boundaries. The ensuing lines are then processed according to the content
  465   # model.
  466   #
  467   # reader     - The Reader from which to retrieve the next Block.
  468   # parent     - The Document, Section or Block to which the next Block belongs.
  469   # attributes - A Hash of attributes that will become the attributes
  470   #              associated with the parsed Block (default: {}).
  471   # options    - An options Hash to control parsing (default: {}):
  472   #              * :text_only indicates that the parser is only looking for text content
  473   #              * :list_type indicates this block will be attached to a list item in a list of the specified type
  474   #
  475   # Returns a Block object built from the parsed content of the processed
  476   # lines, or nothing if no block is found.
  477   def self.next_block(reader, parent, attributes = {}, options = {})
  478     # skip ahead to the block content; bail if we've reached the end of the reader
  479     return unless (skipped = reader.skip_blank_lines)
  480 
  481     # check for option to find list item text only
  482     # if skipped a line, assume a list continuation was
  483     # used and block content is acceptable
  484     if (text_only = options[:text_only]) && skipped > 0
  485       options.delete :text_only
  486       text_only = nil
  487     end
  488 
  489     document = parent.document
  490 
  491     if options.fetch :parse_metadata, true
  492       # read lines until there are no more metadata lines to read; note that :text_only option impacts parsing rules
  493       while parse_block_metadata_line reader, document, attributes, options
  494         # discard the line just processed
  495         reader.shift
  496         # QUESTION should we clear the attributes? no known cases when it's necessary
  497         reader.skip_blank_lines || return
  498       end
  499     end
  500 
  501     if (extensions = document.extensions)
  502       block_extensions, block_macro_extensions = extensions.blocks?, extensions.block_macros?
  503     end
  504 
  505     # QUESTION should we introduce a parsing context object?
  506     reader.mark
  507     this_line, doc_attrs, style = reader.read_line, document.attributes, attributes[1]
  508     block = block_context = cloaked_context = terminator = nil
  509 
  510     if (delimited_block = is_delimited_block? this_line, true)
  511       block_context = cloaked_context = delimited_block.context
  512       terminator = delimited_block.terminator
  513       if style
  514         unless style == block_context.to_s
  515           if delimited_block.masq.include? style
  516             block_context = style.to_sym
  517           elsif delimited_block.masq.include?('admonition') && ADMONITION_STYLES.include?(style)
  518             block_context = :admonition
  519           elsif block_extensions && extensions.registered_for_block?(style, block_context)
  520             block_context = style.to_sym
  521           else
  522             logger.debug message_with_context %(unknown style for #{block_context} block: #{style}), source_location: reader.cursor_at_mark if logger.debug?
  523             style = block_context.to_s
  524           end
  525         end
  526       else
  527         style = attributes['style'] = block_context.to_s
  528       end
  529     end
  530 
  531     # this loop is used for flow control; it only executes once, and only when delimited_block is not set
  532     # break once a block is found or at end of loop
  533     # returns nil if the line should be dropped
  534     while true
  535       # process lines verbatim
  536       if style && Compliance.strict_verbatim_paragraphs && (VERBATIM_STYLES.include? style)
  537         block_context = style.to_sym
  538         reader.unshift_line this_line
  539         # advance to block parsing =>
  540         break
  541       end
  542 
  543       # process lines normally
  544       if text_only
  545         indented = this_line.start_with? ' ', TAB
  546       else
  547         # NOTE move this declaration up if we need it when text_only is false
  548         md_syntax = Compliance.markdown_syntax
  549         if this_line.start_with? ' '
  550           indented, ch0 = true, ' '
  551           # QUESTION should we test line length?
  552           if md_syntax && this_line.lstrip.start_with?(*MARKDOWN_THEMATIC_BREAK_CHARS.keys) &&
  553               #!(this_line.start_with? '    ') &&
  554               (MarkdownThematicBreakRx.match? this_line)
  555             # NOTE we're letting break lines (horizontal rule, page_break, etc) have attributes
  556             block = Block.new(parent, :thematic_break, content_model: :empty)
  557             break
  558           end
  559         elsif this_line.start_with? TAB
  560           indented, ch0 = true, TAB
  561         else
  562           indented, ch0 = false, this_line.chr
  563           layout_break_chars = md_syntax ? HYBRID_LAYOUT_BREAK_CHARS : LAYOUT_BREAK_CHARS
  564           if (layout_break_chars.key? ch0) &&
  565               (md_syntax ? (ExtLayoutBreakRx.match? this_line) : (uniform? this_line, ch0, (ll = this_line.length)) && ll > 2)
  566             # NOTE we're letting break lines (horizontal rule, page_break, etc) have attributes
  567             block = Block.new(parent, layout_break_chars[ch0], content_model: :empty)
  568             break
  569           # NOTE very rare that a text-only line will end in ] (e.g., inline macro), so check that first
  570           elsif (this_line.end_with? ']') && (this_line.include? '::')
  571             #if (this_line.start_with? 'image', 'video', 'audio') && BlockMediaMacroRx =~ this_line
  572             if (ch0 == 'i' || (this_line.start_with? 'video:', 'audio:')) && BlockMediaMacroRx =~ this_line
  573               blk_ctx, target, blk_attrs = $1.to_sym, $2, $3
  574               block = Block.new parent, blk_ctx, content_model: :empty
  575               if blk_attrs
  576                 case blk_ctx
  577                 when :video
  578                   posattrs = ['poster', 'width', 'height']
  579                 when :audio
  580                   posattrs = []
  581                 else # :image
  582                   posattrs = ['alt', 'width', 'height']
  583                 end
  584                 block.parse_attributes blk_attrs, posattrs, sub_input: true, into: attributes
  585               end
  586               # style doesn't have special meaning for media macros
  587               attributes.delete 'style' if attributes.key? 'style'
  588               if target.include? ATTR_REF_HEAD
  589                 if (expanded_target = block.sub_attributes target).empty? &&
  590                     (doc_attrs['attribute-missing'] || Compliance.attribute_missing) == 'drop-line' &&
  591                     (block.sub_attributes target + ' ', attribute_missing: 'drop-line', drop_line_severity: :ignore).empty?
  592                   attributes.clear
  593                   return
  594                 else
  595                   target = expanded_target
  596                 end
  597               end
  598               if blk_ctx == :image
  599                 document.register :images, target
  600                 attributes['imagesdir'] = doc_attrs['imagesdir']
  601                 # NOTE style is the value of the first positional attribute in the block attribute line
  602                 attributes['alt'] ||= style || (attributes['default-alt'] = Helpers.basename(target, true).tr('_-', ' '))
  603                 unless (scaledwidth = attributes.delete 'scaledwidth').nil_or_empty?
  604                   # NOTE assume % units if not specified
  605                   attributes['scaledwidth'] = (TrailingDigitsRx.match? scaledwidth) ? %(#{scaledwidth}%) : scaledwidth
  606                 end
  607                 if attributes['title']
  608                   block.title = block_title = attributes.delete 'title'
  609                   block.assign_caption (attributes.delete 'caption'), 'figure'
  610                 end
  611               end
  612               attributes['target'] = target
  613               break
  614 
  615             elsif ch0 == 't' && (this_line.start_with? 'toc:') && BlockTocMacroRx =~ this_line
  616               block = Block.new parent, :toc, content_model: :empty
  617               block.parse_attributes $1, [], into: attributes if $1
  618               break
  619 
  620             elsif block_macro_extensions ? (CustomBlockMacroRx =~ this_line &&
  621                 (extension = extensions.registered_for_block_macro? $1) || (report_unknown_block_macro = logger.debug?)) :
  622                 (logger.debug? && (report_unknown_block_macro = CustomBlockMacroRx =~ this_line))
  623               if report_unknown_block_macro
  624                 logger.debug message_with_context %(unknown name for block macro: #{$1}), source_location: reader.cursor_at_mark
  625               else
  626                 content = $3
  627                 if (target = $2).include? ATTR_REF_HEAD
  628                   if (expanded_target = parent.sub_attributes target).empty? &&
  629                       (doc_attrs['attribute-missing'] || Compliance.attribute_missing) == 'drop-line' &&
  630                       (parent.sub_attributes target + ' ', attribute_missing: 'drop-line', drop_line_severity: :ignore).empty?
  631                     attributes.clear
  632                     return
  633                   else
  634                     target = expanded_target
  635                   end
  636                 end
  637                 if (ext_config = extension.config)[:content_model] == :attributes
  638                   document.parse_attributes content, ext_config[:positional_attrs] || ext_config[:pos_attrs] || [], sub_input: true, into: attributes if content
  639                 else
  640                   attributes['text'] = content || ''
  641                 end
  642                 if (default_attrs = ext_config[:default_attrs])
  643                   attributes.update(default_attrs) {|_, old_v| old_v }
  644                 end
  645                 if (block = extension.process_method[parent, target, attributes])
  646                   attributes.replace block.attributes
  647                   break
  648                 else
  649                   attributes.clear
  650                   return
  651                 end
  652               end
  653             end
  654           end
  655         end
  656       end
  657 
  658       # haven't found anything yet, continue
  659       if !indented && (ch0 ||= this_line.chr) == '<' && CalloutListRx =~ this_line
  660         reader.unshift_line this_line
  661         block = parse_callout_list(reader, $~, parent, document.callouts)
  662         attributes['style'] = 'arabic'
  663         break
  664 
  665       elsif UnorderedListRx.match? this_line
  666         reader.unshift_line this_line
  667         attributes['style'] = style = 'bibliography' if !style && Section === parent && parent.sectname == 'bibliography'
  668         block = parse_list(reader, :ulist, parent, style)
  669         break
  670 
  671       elsif OrderedListRx.match? this_line
  672         reader.unshift_line this_line
  673         block = parse_list(reader, :olist, parent, style)
  674         attributes['style'] = block.style if block.style
  675         break
  676 
  677       elsif ((this_line.include? '::') || (this_line.include? ';;')) && DescriptionListRx =~ this_line
  678         reader.unshift_line this_line
  679         block = parse_description_list(reader, $~, parent)
  680         break
  681 
  682       elsif (style == 'float' || style == 'discrete') && (Compliance.underline_style_section_titles ?
  683           (is_section_title? this_line, reader.peek_line) : !indented && (atx_section_title? this_line))
  684         reader.unshift_line this_line
  685         float_id, float_reftext, block_title, float_level = parse_section_title reader, document, attributes['id']
  686         attributes['reftext'] = float_reftext if float_reftext
  687         block = Block.new(parent, :floating_title, content_model: :empty)
  688         block.title = block_title
  689         attributes.delete 'title'
  690         block.id = float_id || ((doc_attrs.key? 'sectids') ? (Section.generate_id block.title, document) : nil)
  691         block.level = float_level
  692         break
  693 
  694       # FIXME create another set for "passthrough" styles
  695       # FIXME make this more DRY!
  696       elsif style && style != 'normal'
  697         if PARAGRAPH_STYLES.include?(style)
  698           block_context = style.to_sym
  699           cloaked_context = :paragraph
  700           reader.unshift_line this_line
  701           # advance to block parsing =>
  702           break
  703         elsif ADMONITION_STYLES.include?(style)
  704           block_context = :admonition
  705           cloaked_context = :paragraph
  706           reader.unshift_line this_line
  707           # advance to block parsing =>
  708           break
  709         elsif block_extensions && extensions.registered_for_block?(style, :paragraph)
  710           block_context = style.to_sym
  711           cloaked_context = :paragraph
  712           reader.unshift_line this_line
  713           # advance to block parsing =>
  714           break
  715         else
  716           logger.debug message_with_context %(unknown style for paragraph: #{style}), source_location: reader.cursor_at_mark if logger.debug?
  717           style = nil
  718           # continue to process paragraph
  719         end
  720       end
  721 
  722       reader.unshift_line this_line
  723 
  724       # a literal paragraph: contiguous lines starting with at least one whitespace character
  725       # NOTE style can only be nil or "normal" at this point
  726       if indented && !style
  727         lines = read_paragraph_lines reader, (content_adjacent = skipped == 0 ? options[:list_type] : nil), skip_line_comments: text_only
  728         adjust_indentation! lines
  729         if text_only || content_adjacent == :dlist
  730           # this block gets folded into the list item text
  731           block = Block.new(parent, :paragraph, content_model: :simple, source: lines, attributes: attributes)
  732         else
  733           block = Block.new(parent, :literal, content_model: :verbatim, source: lines, attributes: attributes)
  734         end
  735       # a normal paragraph: contiguous non-blank/non-continuation lines (left-indented or normal style)
  736       else
  737         lines = read_paragraph_lines reader, skipped == 0 && options[:list_type], skip_line_comments: true
  738         # NOTE don't check indented here since it's extremely rare
  739         #if text_only || indented
  740         if text_only
  741           # if [normal] is used over an indented paragraph, shift content to left margin
  742           # QUESTION do we even need to shift since whitespace is normalized by XML in this case?
  743           adjust_indentation! lines if indented && style == 'normal'
  744           block = Block.new(parent, :paragraph, content_model: :simple, source: lines, attributes: attributes)
  745         elsif (ADMONITION_STYLE_HEADS.include? ch0) && (this_line.include? ':') && (AdmonitionParagraphRx =~ this_line)
  746           lines[0] = $' # string after match
  747           attributes['name'] = admonition_name = (attributes['style'] = $1).downcase
  748           attributes['textlabel'] = (attributes.delete 'caption') || doc_attrs[%(#{admonition_name}-caption)]
  749           block = Block.new(parent, :admonition, content_model: :simple, source: lines, attributes: attributes)
  750         elsif md_syntax && ch0 == '>' && this_line.start_with?('> ')
  751           lines.map! {|line| line == '>' ? (line.slice 1, line.length) : ((line.start_with? '> ') ? (line.slice 2, line.length) : line) }
  752           if lines[-1].start_with? '-- '
  753             credit_line = (credit_line = lines.pop).slice 3, credit_line.length
  754             unless lines.empty?
  755               lines.pop while lines[-1].empty?
  756             end
  757           end
  758           attributes['style'] = 'quote'
  759           # NOTE will only detect discrete (aka free-floating) headings
  760           # TODO could assume a discrete heading when inside a block context
  761           # FIXME Reader needs to be created w/ line info
  762           block = build_block(:quote, :compound, false, parent, Reader.new(lines), attributes)
  763           if credit_line
  764             attribution, citetitle = (block.apply_subs credit_line).split ', ', 2
  765             attributes['attribution'] = attribution if attribution
  766             attributes['citetitle'] = citetitle if citetitle
  767           end
  768         elsif ch0 == '"' && lines.size > 1 && (lines[-1].start_with? '-- ') && (lines[-2].end_with? '"')
  769           lines[0] = this_line.slice 1, this_line.length # strip leading quote
  770           credit_line = (credit_line = lines.pop).slice 3, credit_line.length
  771           lines.pop while lines[-1].empty?
  772           lines << lines.pop.chop # strip trailing quote
  773           attributes['style'] = 'quote'
  774           block = Block.new(parent, :quote, content_model: :simple, source: lines, attributes: attributes)
  775           attribution, citetitle = (block.apply_subs credit_line).split ', ', 2
  776           attributes['attribution'] = attribution if attribution
  777           attributes['citetitle'] = citetitle if citetitle
  778         else
  779           # if [normal] is used over an indented paragraph, shift content to left margin
  780           # QUESTION do we even need to shift since whitespace is normalized by XML in this case?
  781           adjust_indentation! lines if indented && style == 'normal'
  782           block = Block.new(parent, :paragraph, content_model: :simple, source: lines, attributes: attributes)
  783         end
  784 
  785         catalog_inline_anchors((lines.join LF), block, document, reader)
  786       end
  787 
  788       break # forbid loop from executing more than once
  789     end unless delimited_block
  790 
  791     # either delimited block or styled paragraph
  792     unless block
  793       case block_context
  794       when :listing, :source
  795         if block_context == :source || (!attributes[1] && (language = attributes[2] || doc_attrs['source-language']))
  796           if language
  797             attributes['style'] = 'source'
  798             attributes['language'] = language
  799             AttributeList.rekey attributes, [nil, nil, 'linenums']
  800           else
  801             AttributeList.rekey attributes, [nil, 'language', 'linenums']
  802             if doc_attrs.key? 'source-language'
  803               attributes['language'] = doc_attrs['source-language']
  804             end unless attributes.key? 'language'
  805           end
  806           if attributes['linenums-option'] || doc_attrs['source-linenums-option']
  807             attributes['linenums'] = ''
  808           end unless attributes.key? 'linenums'
  809           if doc_attrs.key? 'source-indent'
  810             attributes['indent'] = doc_attrs['source-indent']
  811           end unless attributes.key? 'indent'
  812         end
  813         block = build_block(:listing, :verbatim, terminator, parent, reader, attributes)
  814       when :fenced_code
  815         attributes['style'] = 'source'
  816         if (ll = this_line.length) > 3
  817           if (comma_idx = (language = this_line.slice 3, ll).index ',')
  818             if comma_idx > 0
  819               language = (language.slice 0, comma_idx).strip
  820               attributes['linenums'] = '' if comma_idx < ll - 4
  821             else
  822               attributes['linenums'] = '' if ll > 4
  823             end
  824           else
  825             language = language.lstrip
  826           end
  827         end
  828         if language.nil_or_empty?
  829           attributes['language'] = doc_attrs['source-language'] if doc_attrs.key? 'source-language'
  830         else
  831           attributes['language'] = language
  832         end
  833         if attributes['linenums-option'] || doc_attrs['source-linenums-option']
  834           attributes['linenums'] = ''
  835         end unless attributes.key? 'linenums'
  836         if doc_attrs.key? 'source-indent'
  837           attributes['indent'] = doc_attrs['source-indent']
  838         end unless attributes.key? 'indent'
  839         terminator = terminator.slice 0, 3
  840         block = build_block(:listing, :verbatim, terminator, parent, reader, attributes)
  841       when :table
  842         block_cursor = reader.cursor
  843         block_reader = Reader.new reader.read_lines_until(terminator: terminator, skip_line_comments: true, context: :table, cursor: :at_mark), block_cursor
  844         # NOTE it's very rare that format is set when using a format hint char, so short-circuit
  845         unless terminator.start_with? '|', '!'
  846           # NOTE infer dsv once all other format hint chars are ruled out
  847           attributes['format'] ||= (terminator.start_with? ',') ? 'csv' : 'dsv'
  848         end
  849         block = parse_table(block_reader, parent, attributes)
  850       when :sidebar
  851         block = build_block(block_context, :compound, terminator, parent, reader, attributes)
  852       when :admonition
  853         attributes['name'] = admonition_name = style.downcase
  854         attributes['textlabel'] = (attributes.delete 'caption') || doc_attrs[%(#{admonition_name}-caption)]
  855         block = build_block(block_context, :compound, terminator, parent, reader, attributes)
  856       when :open, :abstract, :partintro
  857         block = build_block(:open, :compound, terminator, parent, reader, attributes)
  858       when :literal
  859         block = build_block(block_context, :verbatim, terminator, parent, reader, attributes)
  860       when :example
  861         block = build_block(block_context, :compound, terminator, parent, reader, attributes)
  862       when :quote, :verse
  863         AttributeList.rekey(attributes, [nil, 'attribution', 'citetitle'])
  864         block = build_block(block_context, (block_context == :verse ? :verbatim : :compound), terminator, parent, reader, attributes)
  865       when :stem, :latexmath, :asciimath
  866         attributes['style'] = STEM_TYPE_ALIASES[attributes[2] || doc_attrs['stem']] if block_context == :stem
  867         block = build_block(:stem, :raw, terminator, parent, reader, attributes)
  868       when :pass
  869         block = build_block(block_context, :raw, terminator, parent, reader, attributes)
  870       when :comment
  871         build_block(block_context, :skip, terminator, parent, reader, attributes)
  872         attributes.clear
  873         return
  874       else
  875         if block_extensions && (extension = extensions.registered_for_block? block_context, cloaked_context)
  876           unless (content_model = (ext_config = extension.config)[:content_model]) == :skip
  877             unless (positional_attrs = ext_config[:positional_attrs] || ext_config[:pos_attrs]).nil_or_empty?
  878               AttributeList.rekey(attributes, [nil] + positional_attrs)
  879             end
  880             if (default_attrs = ext_config[:default_attrs])
  881               default_attrs.each {|k, v| attributes[k] ||= v }
  882             end
  883             # QUESTION should we clone the extension for each cloaked context and set in config?
  884             attributes['cloaked-context'] = cloaked_context
  885           end
  886           unless (block = build_block block_context, content_model, terminator, parent, reader, attributes, extension: extension)
  887             attributes.clear
  888             return
  889           end
  890         else
  891           # this should only happen if there's a misconfiguration
  892           raise %(Unsupported block type #{block_context} at #{reader.cursor})
  893         end
  894       end
  895     end
  896 
  897     # FIXME we've got to clean this up, it's horrible!
  898     block.source_location = reader.cursor_at_mark if document.sourcemap
  899     # FIXME title and caption should be assigned when block is constructed (though we need to handle all cases)
  900     if attributes['title']
  901       block.title = block_title = attributes.delete 'title'
  902       if (caption_attr_name = CAPTION_ATTR_NAMES[block.context]) && document.attributes[caption_attr_name]
  903         block.assign_caption (attributes.delete 'caption')
  904       end
  905     end
  906     # TODO eventually remove the style attribute from the attributes hash
  907     #block.style = attributes.delete 'style'
  908     block.style = attributes['style']
  909     if (block_id = block.id || (block.id = attributes['id']))
  910       # convert title to resolve attributes while in scope
  911       block.title if block_title ? (block_title.include? ATTR_REF_HEAD) : block.title?
  912       unless document.register :refs, [block_id, block]
  913         logger.warn message_with_context %(id assigned to block already in use: #{block_id}), source_location: reader.cursor_at_mark
  914       end
  915     end
  916     # FIXME remove the need for this update!
  917     block.update_attributes attributes unless attributes.empty?
  918     block.commit_subs
  919 
  920     #if doc_attrs.key? :pending_attribute_entries
  921     #  doc_attrs.delete(:pending_attribute_entries).each do |entry|
  922     #    entry.save_to block.attributes
  923     #  end
  924     #end
  925 
  926     if block.sub? :callouts
  927       # No need to sub callouts if none are found when cataloging
  928       block.remove_sub :callouts unless catalog_callouts block.source, document
  929     end
  930 
  931     block
  932   end
  933 
  934   def self.read_paragraph_lines reader, break_at_list, opts = {}
  935     opts[:break_on_blank_lines] = true
  936     opts[:break_on_list_continuation] = true
  937     opts[:preserve_last_line] = true
  938     break_condition = (break_at_list ?
  939         (Compliance.block_terminates_paragraph ? StartOfBlockOrListProc : StartOfListProc) :
  940         (Compliance.block_terminates_paragraph ? StartOfBlockProc : NoOp))
  941     reader.read_lines_until opts, &break_condition
  942   end
  943 
  944   # Public: Determines whether this line is the start of a known delimited block.
  945   #
  946   # Returns the BlockMatchData (if return_match_data is true) or true (if return_match_data is false) if this line is
  947   # the start of a delimited block, otherwise nothing.
  948   def self.is_delimited_block? line, return_match_data = nil
  949     # highly optimized for best performance
  950     return unless (line_len = line.length) > 1 && DELIMITED_BLOCK_HEADS[line.slice 0, 2]
  951     # open block
  952     if line_len == 2
  953       tip = line
  954       tip_len = 2
  955     else
  956       # all other delimited blocks, including fenced code
  957       if line_len < 5
  958         tip = line
  959         tip_len = line_len
  960       else
  961         tip = line.slice 0, (tip_len = 4)
  962       end
  963       # special case for fenced code blocks
  964       if Compliance.markdown_syntax && (tip.start_with? '`')
  965         if tip_len == 4
  966           if tip == '````'
  967             return
  968           elsif (tip = tip.chop) == '```'
  969             line = tip
  970             line_len = tip_len = 3
  971           else
  972             return
  973           end
  974         elsif tip == '```'
  975           # keep it
  976         else
  977           return
  978         end
  979       elsif tip_len == 3
  980         return
  981       end
  982     end
  983     # NOTE line matches the tip when delimiter is minimum length or fenced code
  984     context, masq = DELIMITED_BLOCKS[tip]
  985     if context && (line_len == tip_len || (uniform? (line.slice 1, line_len), DELIMITED_BLOCK_TAILS[tip], (line_len - 1)))
  986       return_match_data ? (BlockMatchData.new context, masq, tip, line) : true
  987     end
  988   end
  989 
  990   # whether a block supports compound content should be a config setting
  991   # if terminator is false, that means the all the lines in the reader should be parsed
  992   # NOTE could invoke filter in here, before and after parsing
  993   def self.build_block(block_context, content_model, terminator, parent, reader, attributes, options = {})
  994     if content_model == :skip
  995       skip_processing, parse_as_content_model = true, :simple
  996     elsif content_model == :raw
  997       skip_processing, parse_as_content_model = false, :simple
  998     else
  999       skip_processing, parse_as_content_model = false, content_model
 1000     end
 1001 
 1002     if terminator.nil?
 1003       if parse_as_content_model == :verbatim
 1004         lines = reader.read_lines_until break_on_blank_lines: true, break_on_list_continuation: true
 1005       else
 1006         content_model = :simple if content_model == :compound
 1007         # TODO we could also skip processing if we're able to detect reader is a BlockReader
 1008         lines = read_paragraph_lines reader, false, skip_line_comments: true, skip_processing: skip_processing
 1009         # QUESTION check for empty lines after grabbing lines for simple content model?
 1010       end
 1011       block_reader = nil
 1012     elsif parse_as_content_model != :compound
 1013       lines = reader.read_lines_until terminator: terminator, skip_processing: skip_processing, context: block_context, cursor: :at_mark
 1014       block_reader = nil
 1015     # terminator is false when reader has already been prepared
 1016     elsif terminator == false
 1017       lines = nil
 1018       block_reader = reader
 1019     else
 1020       lines = nil
 1021       block_cursor = reader.cursor
 1022       block_reader = Reader.new reader.read_lines_until(terminator: terminator, skip_processing: skip_processing, context: block_context, cursor: :at_mark), block_cursor
 1023     end
 1024 
 1025     if content_model == :verbatim
 1026       tab_size = (attributes['tabsize'] || parent.document.attributes['tabsize']).to_i
 1027       if (indent = attributes['indent'])
 1028         adjust_indentation! lines, indent.to_i, tab_size
 1029       elsif tab_size > 0
 1030         adjust_indentation! lines, -1, tab_size
 1031       end
 1032     elsif content_model == :skip
 1033       # QUESTION should we still invoke process method if extension is specified?
 1034       return
 1035     end
 1036 
 1037     if (extension = options[:extension])
 1038       # QUESTION do we want to delete the style?
 1039       attributes.delete('style')
 1040       if (block = extension.process_method[parent, block_reader || (Reader.new lines), attributes.merge])
 1041         attributes.replace block.attributes
 1042         # FIXME if the content model is set to compound, but we only have simple in this context, then
 1043         # forcefully set the content_model to simple to prevent parsing blocks from children
 1044         # TODO document this behavior!!
 1045         if block.content_model == :compound && !(lines = block.lines).empty?
 1046           content_model = :compound
 1047           block_reader = Reader.new lines
 1048         end
 1049       else
 1050         return
 1051       end
 1052     else
 1053       block = Block.new(parent, block_context, content_model: content_model, source: lines, attributes: attributes)
 1054     end
 1055 
 1056     # reader is confined within boundaries of a delimited block, so look for
 1057     # blocks until there are no more lines
 1058     parse_blocks block_reader, block if content_model == :compound
 1059 
 1060     block
 1061   end
 1062 
 1063   # Public: Parse blocks from this reader until there are no more lines.
 1064   #
 1065   # This method calls Parser#next_block until there are no more lines in the
 1066   # Reader. It does not consider sections because it's assumed the Reader only
 1067   # has lines which are within a delimited block region.
 1068   #
 1069   # reader - The Reader containing the lines to process
 1070   # parent - The parent Block to which to attach the parsed blocks
 1071   #
 1072   # Returns nothing.
 1073   def self.parse_blocks(reader, parent, attributes = nil)
 1074     if attributes
 1075       while ((block = next_block reader, parent, attributes.merge) && parent.blocks << block) || reader.has_more_lines?; end
 1076     else
 1077       while ((block = next_block reader, parent) && parent.blocks << block) || reader.has_more_lines?; end
 1078     end
 1079     nil
 1080   end
 1081 
 1082   # Internal: Parse and construct an ordered or unordered list at the current position of the Reader
 1083   #
 1084   # reader    - The Reader from which to retrieve the list
 1085   # list_type - A Symbol representing the list type (:olist for ordered, :ulist for unordered)
 1086   # parent    - The parent Block to which this list belongs
 1087   # style     - The block style assigned to this list (optional, default: nil)
 1088   #
 1089   # Returns the Block encapsulating the parsed unordered or ordered list
 1090   def self.parse_list reader, list_type, parent, style
 1091     list_block = List.new parent, list_type
 1092     list_rx = ListRxMap[list_type]
 1093 
 1094     while reader.has_more_lines? && list_rx =~ reader.peek_line
 1095       # NOTE parse_list_item will stop at sibling item or end of list; never sees ancestor items
 1096       if (list_item = parse_list_item reader, list_block, $~, $1, style)
 1097         list_block.items << list_item
 1098       end
 1099 
 1100       reader.skip_blank_lines || break
 1101     end
 1102 
 1103     list_block
 1104   end
 1105 
 1106   # Internal: Catalog any callouts found in the text, but don't process them
 1107   #
 1108   # text     - The String of text in which to look for callouts
 1109   # document - The current document in which the callouts are stored
 1110   #
 1111   # Returns A Boolean indicating whether callouts were found
 1112   def self.catalog_callouts(text, document)
 1113     found = false
 1114     autonum = 0
 1115     text.scan CalloutScanRx do
 1116       document.callouts.register $2 == '.' ? (autonum += 1).to_s : $2 unless $&.start_with? '\\'
 1117       # we have to mark as found even if it's escaped so it can be unescaped
 1118       found = true
 1119     end if text.include? '<'
 1120     found
 1121   end
 1122 
 1123   # Internal: Catalog a matched inline anchor.
 1124   #
 1125   # id       - The String id of the anchor
 1126   # reftext  - The optional String reference text of the anchor
 1127   # node     - The AbstractNode parent node of the anchor node
 1128   # location - The source location (file and line) where the anchor was found
 1129   # doc      - The document to which the node belongs; computed from node if not specified
 1130   #
 1131   # Returns nothing
 1132   def self.catalog_inline_anchor id, reftext, node, location, doc = node.document
 1133     reftext = doc.sub_attributes reftext if reftext && (reftext.include? ATTR_REF_HEAD)
 1134     unless doc.register :refs, [id, (Inline.new node, :anchor, reftext, type: :ref, id: id)]
 1135       location = location.cursor if Reader === location
 1136       logger.warn message_with_context %(id assigned to anchor already in use: #{id}), source_location: location
 1137     end
 1138     nil
 1139   end
 1140 
 1141   # Internal: Catalog any inline anchors found in the text (but don't convert)
 1142   #
 1143   # text     - The String text in which to look for inline anchors
 1144   # block    - The block in which the references should be searched
 1145   # document - The current Document on which the references are stored
 1146   #
 1147   # Returns nothing
 1148   def self.catalog_inline_anchors text, block, document, reader
 1149     text.scan InlineAnchorScanRx do
 1150       if (id = $1)
 1151         if (reftext = $2)
 1152           next if (reftext.include? ATTR_REF_HEAD) && (reftext = document.sub_attributes reftext).empty?
 1153         end
 1154       else
 1155         id = $3
 1156         if (reftext = $4)
 1157           reftext = reftext.gsub '\]', ']' if reftext.include? ']'
 1158           next if (reftext.include? ATTR_REF_HEAD) && (reftext = document.sub_attributes reftext).empty?
 1159         end
 1160       end
 1161       unless document.register :refs, [id, (Inline.new block, :anchor, reftext, type: :ref, id: id)]
 1162         location = reader.cursor_at_mark
 1163         if (offset = ($`.count LF) + (($&.start_with? LF) ? 1 : 0)) > 0
 1164           (location = location.dup).advance offset
 1165         end
 1166         logger.warn message_with_context %(id assigned to anchor already in use: #{id}), source_location: location
 1167       end
 1168     end if (text.include? '[[') || (text.include? 'or:')
 1169     nil
 1170   end
 1171 
 1172   # Internal: Catalog the bibliography inline anchor found in the start of the list item (but don't convert)
 1173   #
 1174   # id      - The String id of the anchor
 1175   # reftext - The optional String reference text of the anchor
 1176   # node    - The AbstractNode parent node of the anchor node
 1177   # reader  - The source Reader for the current Document, positioned at the current list item
 1178   #
 1179   # Returns nothing
 1180   def self.catalog_inline_biblio_anchor id, reftext, node, reader
 1181     # QUESTION should we sub attributes in reftext (like with regular anchors)?
 1182     unless node.document.register :refs, [id, (Inline.new node, :anchor, reftext && %([#{reftext}]), type: :bibref, id: id)]
 1183       logger.warn message_with_context %(id assigned to bibliography anchor already in use: #{id}), source_location: reader.cursor
 1184     end
 1185     nil
 1186   end
 1187 
 1188   # Internal: Parse and construct a description list Block from the current position of the Reader
 1189   #
 1190   # reader    - The Reader from which to retrieve the description list
 1191   # match     - The Regexp match for the head of the list
 1192   # parent    - The parent Block to which this description list belongs
 1193   #
 1194   # Returns the Block encapsulating the parsed description list
 1195   def self.parse_description_list reader, match, parent
 1196     list_block = List.new parent, :dlist
 1197     # detects a description list item that uses the same delimiter (::, :::, :::: or ;;)
 1198     sibling_pattern = DescriptionListSiblingRx[match[2]]
 1199     list_block.items << (current_pair = parse_list_item reader, list_block, match, sibling_pattern)
 1200 
 1201     while reader.has_more_lines? && sibling_pattern =~ reader.peek_line
 1202       next_pair = parse_list_item reader, list_block, $~, sibling_pattern
 1203       if current_pair[1]
 1204         list_block.items << (current_pair = next_pair)
 1205       else
 1206         current_pair[0] << next_pair[0][0]
 1207         current_pair[1] = next_pair[1]
 1208       end
 1209     end
 1210 
 1211     list_block
 1212   end
 1213 
 1214   # Internal: Parse and construct a callout list Block from the current position of the Reader and
 1215   # advance the document callouts catalog to the next list.
 1216   #
 1217   # reader   - The Reader from which to retrieve the callout list.
 1218   # match    - The Regexp match containing the head of the list.
 1219   # parent   - The parent Block to which this callout list belongs.
 1220   # callouts - The document callouts catalog.
 1221   #
 1222   # Returns the Block that represents the parsed callout list.
 1223   def self.parse_callout_list reader, match, parent, callouts
 1224     list_block = List.new(parent, :colist)
 1225     next_index = 1
 1226     autonum = 0
 1227     # NOTE skip the match on the first time through as we've already done it (emulates begin...while)
 1228     while match || ((match = CalloutListRx.match reader.peek_line) && reader.mark)
 1229       if (num = match[1]) == '.'
 1230         num = (autonum += 1).to_s
 1231       end
 1232       # might want to move this check to a validate method
 1233       unless num == next_index.to_s
 1234         logger.warn message_with_context %(callout list item index: expected #{next_index}, got #{num}), source_location: reader.cursor_at_mark
 1235       end
 1236       if (list_item = parse_list_item reader, list_block, match, '<1>')
 1237         list_block.items << list_item
 1238         if (coids = callouts.callout_ids list_block.items.size).empty?
 1239           logger.warn message_with_context %(no callout found for <#{list_block.items.size}>), source_location: reader.cursor_at_mark
 1240         else
 1241           list_item.attributes['coids'] = coids
 1242         end
 1243       end
 1244       next_index += 1
 1245       match = nil
 1246     end
 1247 
 1248     callouts.next_list
 1249     list_block
 1250   end
 1251 
 1252   # Internal: Parse and construct the next ListItem (unordered, ordered, or callout list) or next
 1253   # term ListItem and description ListItem pair (description list) for the specified list Block.
 1254   #
 1255   # First, collect and process all the lines that constitute the next list item for the specified
 1256   # list (according to its type). Next, create a ListItem (in the case of a description list, a
 1257   # description ListItem), parse the lines into blocks, and associate those blocks with that
 1258   # ListItem. Finally, fold the first block into the item's text attribute according to rules
 1259   # described in ListItem.
 1260   #
 1261   # reader        - The Reader from which to retrieve the next list item
 1262   # list_block    - The parent list Block for this ListItem. Also provides access to the list type.
 1263   # match         - The MatchData that contains the list item marker and first line text of the ListItem
 1264   # sibling_trait - The trait to match a sibling list item. For ordered and unordered lists, this is
 1265   #                 a String marker (e.g., '**' or 'ii)'). For description lists, this is a Regexp
 1266   #                 marker pattern.
 1267   # style         - The block style assigned to this list (optional, default: nil)
 1268   #
 1269   # Returns the next ListItem or [[ListItem], ListItem] pair (description list) for the parent list Block.
 1270   def self.parse_list_item(reader, list_block, match, sibling_trait, style = nil)
 1271     if (list_type = list_block.context) == :dlist
 1272       dlist = true
 1273       list_term = ListItem.new(list_block, (term_text = match[1]))
 1274       if term_text.start_with?('[[') && LeadingInlineAnchorRx =~ term_text
 1275         catalog_inline_anchor $1, ($2 || $'.lstrip), list_term, reader
 1276       end
 1277       has_text = true if (item_text = match[3])
 1278       list_item = ListItem.new(list_block, item_text)
 1279       if list_block.document.sourcemap
 1280         list_term.source_location = reader.cursor
 1281         if has_text
 1282           list_item.source_location = list_term.source_location
 1283         else
 1284           sourcemap_assignment_deferred = true
 1285         end
 1286       end
 1287     else
 1288       has_text = true
 1289       list_item = ListItem.new(list_block, (item_text = match[2]))
 1290       list_item.source_location = reader.cursor if list_block.document.sourcemap
 1291       if list_type == :ulist
 1292         list_item.marker = sibling_trait
 1293         if item_text.start_with?('[')
 1294           if style && style == 'bibliography'
 1295             if InlineBiblioAnchorRx =~ item_text
 1296               catalog_inline_biblio_anchor $1, $2, list_item, reader
 1297             end
 1298           elsif item_text.start_with?('[[')
 1299             if LeadingInlineAnchorRx =~ item_text
 1300               catalog_inline_anchor $1, $2, list_item, reader
 1301             end
 1302           elsif item_text.start_with?('[ ] ', '[x] ', '[*] ')
 1303             list_block.set_option 'checklist'
 1304             list_item.attributes['checkbox'] = ''
 1305             list_item.attributes['checked'] = '' unless item_text.start_with? '[ '
 1306             list_item.text = item_text.slice(4, item_text.length)
 1307           end
 1308         end
 1309       elsif list_type == :olist
 1310         sibling_trait, implicit_style = resolve_ordered_list_marker(sibling_trait, (ordinal = list_block.items.size), true, reader)
 1311         list_item.marker = sibling_trait
 1312         if ordinal == 0 && !style
 1313           # using list level makes more sense, but we don't track it
 1314           # basing style on marker level is compliant with AsciiDoc Python
 1315           list_block.style = implicit_style || ((ORDERED_LIST_STYLES[sibling_trait.length - 1] || 'arabic').to_s)
 1316         end
 1317         if item_text.start_with?('[[') && LeadingInlineAnchorRx =~ item_text
 1318           catalog_inline_anchor $1, $2, list_item, reader
 1319         end
 1320       else # :colist
 1321         list_item.marker = sibling_trait
 1322         if item_text.start_with?('[[') && LeadingInlineAnchorRx =~ item_text
 1323           catalog_inline_anchor $1, $2, list_item, reader
 1324         end
 1325       end
 1326     end
 1327 
 1328     # first skip the line with the marker / term (it gets put back onto the reader by next_block)
 1329     reader.shift
 1330     block_cursor = reader.cursor
 1331     list_item_reader = Reader.new read_lines_for_list_item(reader, list_type, sibling_trait, has_text), block_cursor
 1332     if list_item_reader.has_more_lines?
 1333       list_item.source_location = block_cursor if sourcemap_assignment_deferred
 1334       # NOTE peek on the other side of any comment lines
 1335       comment_lines = list_item_reader.skip_line_comments
 1336       if (subsequent_line = list_item_reader.peek_line)
 1337         list_item_reader.unshift_lines comment_lines unless comment_lines.empty?
 1338         unless subsequent_line.empty?
 1339           content_adjacent = true
 1340           # treat lines as paragraph text if continuation does not connect first block (i.e., has_text = nil)
 1341           has_text = nil unless dlist
 1342         end
 1343       end
 1344 
 1345       # reader is confined to boundaries of list, which means only blocks will be found (no sections)
 1346       if (block = next_block(list_item_reader, list_item, {}, text_only: has_text ? nil : true, list_type: list_type))
 1347         list_item.blocks << block
 1348       end
 1349 
 1350       while list_item_reader.has_more_lines?
 1351         if (block = next_block(list_item_reader, list_item, {}, list_type: list_type))
 1352           list_item.blocks << block
 1353         end
 1354       end
 1355 
 1356       list_item.fold_first if content_adjacent && (first_block = list_item.blocks[0]) && first_block.context == :paragraph
 1357     end
 1358 
 1359     dlist ? [[list_term], (list_item.text? || list_item.blocks? ? list_item : nil)] : list_item
 1360   end
 1361 
 1362   # Internal: Collect the lines belonging to the current list item, navigating
 1363   # through all the rules that determine what comprises a list item.
 1364   #
 1365   # Grab lines until a sibling list item is found, or the block is broken by a
 1366   # terminator (such as a line comment). Description lists are more greedy if
 1367   # they don't have optional inline item text...they want that text
 1368   #
 1369   # reader          - The Reader from which to retrieve the lines.
 1370   # list_type       - The Symbol context of the list (:ulist, :olist, :colist or :dlist)
 1371   # sibling_trait   - A Regexp that matches a sibling of this list item or String list marker
 1372   #                   of the items in this list (default: nil)
 1373   # has_text        - Whether the list item has text defined inline (always true except for description lists)
 1374   #
 1375   # Returns an Array of lines belonging to the current list item.
 1376   def self.read_lines_for_list_item(reader, list_type, sibling_trait = nil, has_text = true)
 1377     buffer = []
 1378 
 1379     # three states for continuation: :inactive, :active & :frozen
 1380     # :frozen signifies we've detected sequential continuation lines &
 1381     # continuation is not permitted until reset
 1382     continuation = :inactive
 1383 
 1384     # if we are within a nested list, we don't throw away the list
 1385     # continuation marks because they will be processed when grabbing
 1386     # the lines for those nested lists
 1387     within_nested_list = false
 1388 
 1389     # a detached continuation is a list continuation that follows a blank line
 1390     # it gets associated with the outermost block
 1391     detached_continuation = nil
 1392 
 1393     dlist = list_type == :dlist
 1394 
 1395     while reader.has_more_lines?
 1396       this_line = reader.read_line
 1397 
 1398       # if we've arrived at a sibling item in this list, we've captured
 1399       # the complete list item and can begin processing it
 1400       # the remainder of the method determines whether we've reached
 1401       # the termination of the list
 1402       break if is_sibling_list_item?(this_line, list_type, sibling_trait)
 1403 
 1404       prev_line = buffer.empty? ? nil : buffer[-1]
 1405 
 1406       if prev_line == LIST_CONTINUATION
 1407         if continuation == :inactive
 1408           continuation = :active
 1409           has_text = true
 1410           buffer[-1] = '' unless within_nested_list
 1411         end
 1412 
 1413         # dealing with adjacent list continuations (which is really a syntax error)
 1414         if this_line == LIST_CONTINUATION
 1415           if continuation != :frozen
 1416             continuation = :frozen
 1417             buffer << this_line
 1418           end
 1419           this_line = nil
 1420           next
 1421         end
 1422       end
 1423 
 1424       # a delimited block immediately breaks the list unless preceded
 1425       # by a list continuation (they are harsh like that ;0)
 1426       if (match = is_delimited_block?(this_line, true))
 1427         if continuation == :active
 1428           buffer << this_line
 1429           # grab all the lines in the block, leaving the delimiters in place
 1430           # we're being more strict here about the terminator, but I think that's a good thing
 1431           buffer.concat reader.read_lines_until(terminator: match.terminator, read_last_line: true, context: nil)
 1432           continuation = :inactive
 1433         else
 1434           break
 1435         end
 1436       # technically BlockAttributeLineRx only breaks if ensuing line is not a list item
 1437       # which really means BlockAttributeLineRx only breaks if it's acting as a block delimiter
 1438       # FIXME to be AsciiDoc compliant, we shouldn't break if style in attribute line is "literal" (i.e., [literal])
 1439       elsif dlist && continuation != :active && (BlockAttributeLineRx.match? this_line)
 1440         break
 1441       else
 1442         if continuation == :active && !this_line.empty?
 1443           # literal paragraphs have special considerations (and this is one of
 1444           # two entry points into one)
 1445           # if we don't process it as a whole, then a line in it that looks like a
 1446           # list item will throw off the exit from it
 1447           if LiteralParagraphRx.match? this_line
 1448             reader.unshift_line this_line
 1449             if dlist
 1450               # we may be in an indented list disguised as a literal paragraph
 1451               # so we need to make sure we don't slurp up a legitimate sibling
 1452               buffer.concat reader.read_lines_until(preserve_last_line: true, break_on_blank_lines: true, break_on_list_continuation: true) {|line| is_sibling_list_item? line, list_type, sibling_trait }
 1453             else
 1454               buffer.concat reader.read_lines_until(preserve_last_line: true, break_on_blank_lines: true, break_on_list_continuation: true)
 1455             end
 1456             continuation = :inactive
 1457           # let block metadata play out until we find the block
 1458           elsif (BlockTitleRx.match? this_line) || (BlockAttributeLineRx.match? this_line) || (AttributeEntryRx.match? this_line)
 1459             buffer << this_line
 1460           else
 1461             if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).find {|ctx| ListRxMap[ctx].match? this_line }
 1462               within_nested_list = true
 1463               if nested_list_type == :dlist && $3.nil_or_empty?
 1464                 # get greedy again
 1465                 has_text = false
 1466               end
 1467             end
 1468             buffer << this_line
 1469             continuation = :inactive
 1470           end
 1471         elsif prev_line && prev_line.empty?
 1472           # advance to the next line of content
 1473           if this_line.empty?
 1474             # stop reading if we reach eof
 1475             break unless (this_line = reader.skip_blank_lines && reader.read_line)
 1476             # stop reading if we hit a sibling list item
 1477             break if is_sibling_list_item? this_line, list_type, sibling_trait
 1478           end
 1479 
 1480           if this_line == LIST_CONTINUATION
 1481             detached_continuation = buffer.size
 1482             buffer << this_line
 1483           else
 1484             # has_text is only relevant for dlist, which is more greedy until it has text for an item
 1485             # for all other lists, has_text is always true
 1486             # in this block, we have to see whether we stay in the list
 1487             if has_text
 1488               # TODO any way to combine this with the check after skipping blank lines?
 1489               if is_sibling_list_item?(this_line, list_type, sibling_trait)
 1490                 break
 1491               elsif nested_list_type = NESTABLE_LIST_CONTEXTS.find {|ctx| ListRxMap[ctx] =~ this_line }
 1492                 buffer << this_line
 1493                 within_nested_list = true
 1494                 if nested_list_type == :dlist && $3.nil_or_empty?
 1495                   # get greedy again
 1496                   has_text = false
 1497                 end
 1498               # slurp up any literal paragraph offset by blank lines
 1499               # NOTE we have to check for indented list items first
 1500               elsif LiteralParagraphRx.match? this_line
 1501                 reader.unshift_line this_line
 1502                 if dlist
 1503                   # we may be in an indented list disguised as a literal paragraph
 1504                   # so we need to make sure we don't slurp up a legitimate sibling
 1505                   buffer.concat reader.read_lines_until(preserve_last_line: true, break_on_blank_lines: true, break_on_list_continuation: true) {|line| is_sibling_list_item? line, list_type, sibling_trait }
 1506                 else
 1507                   buffer.concat reader.read_lines_until(preserve_last_line: true, break_on_blank_lines: true, break_on_list_continuation: true)
 1508                 end
 1509               else
 1510                 break
 1511               end
 1512             else # only dlist in need of item text, so slurp it up!
 1513               # pop the blank line so it's not interpretted as a list continuation
 1514               buffer.pop unless within_nested_list
 1515               buffer << this_line
 1516               has_text = true
 1517             end
 1518           end
 1519         else
 1520           has_text = true unless this_line.empty?
 1521           if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).find {|ctx| ListRxMap[ctx] =~ this_line }
 1522             within_nested_list = true
 1523             if nested_list_type == :dlist && $3.nil_or_empty?
 1524               # get greedy again
 1525               has_text = false
 1526             end
 1527           end
 1528           buffer << this_line
 1529         end
 1530       end
 1531       this_line = nil
 1532     end
 1533 
 1534     reader.unshift_line this_line if this_line
 1535 
 1536     buffer[detached_continuation] = '' if detached_continuation
 1537 
 1538     until buffer.empty?
 1539       # strip trailing blank lines to prevent empty blocks
 1540       if (last_line = buffer[-1]).empty?
 1541         buffer.pop
 1542       else
 1543         # drop optional trailing continuation
 1544         # (a blank line would have served the same purpose in the document)
 1545         buffer.pop if last_line == LIST_CONTINUATION
 1546         break
 1547       end
 1548     end
 1549 
 1550     buffer
 1551   end
 1552 
 1553   # Internal: Initialize a new Section object and assign any attributes provided
 1554   #
 1555   # The information for this section is retrieved by parsing the lines at the
 1556   # current position of the reader.
 1557   #
 1558   # reader     - the source reader
 1559   # parent     - the parent Section or Document of this Section
 1560   # attributes - a Hash of attributes to assign to this section (default: {})
 1561   #
 1562   # Returns the section [Block]
 1563   def self.initialize_section reader, parent, attributes = {}
 1564     document = parent.document
 1565     book = (doctype = document.doctype) == 'book'
 1566     source_location = reader.cursor if document.sourcemap
 1567     sect_style = attributes[1]
 1568     sect_id, sect_reftext, sect_title, sect_level, sect_atx = parse_section_title reader, document, attributes['id']
 1569 
 1570     if sect_reftext
 1571       attributes['reftext'] = sect_reftext
 1572     else
 1573       sect_reftext = attributes['reftext']
 1574     end
 1575 
 1576     if sect_style
 1577       if book && sect_style == 'abstract'
 1578         sect_name, sect_level = 'chapter', 1
 1579       elsif (sect_style.start_with? 'sect') && (SectionLevelStyleRx.match? sect_style)
 1580         sect_name = 'section'
 1581       else
 1582         sect_name, sect_special = sect_style, true
 1583         sect_level = 1 if sect_level == 0
 1584         sect_numbered = sect_name == 'appendix'
 1585       end
 1586     elsif book
 1587       sect_name = sect_level == 0 ? 'part' : (sect_level > 1 ? 'section' : 'chapter')
 1588     elsif doctype == 'manpage' && (sect_title.casecmp 'synopsis') == 0
 1589       sect_name, sect_special = 'synopsis', true
 1590     else
 1591       sect_name = 'section'
 1592     end
 1593 
 1594     section = Section.new parent, sect_level
 1595     section.id, section.title, section.sectname, section.source_location = sect_id, sect_title, sect_name, source_location
 1596     if sect_special
 1597       section.special = true
 1598       if sect_numbered
 1599         section.numbered = true
 1600       elsif document.attributes['sectnums'] == 'all'
 1601         section.numbered = book && sect_level == 1 ? :chapter : true
 1602       end
 1603     elsif document.attributes['sectnums'] && sect_level > 0
 1604       # NOTE a special section here is guaranteed to be nested in another section
 1605       section.numbered = section.special ? parent.numbered && true : true
 1606     elsif book && sect_level == 0 && document.attributes['partnums']
 1607       section.numbered = true
 1608     end
 1609 
 1610     # generate an ID if one was not embedded or specified as anchor above section title
 1611     if (id = section.id || (section.id = (document.attributes.key? 'sectids') ? (generated_id = Section.generate_id section.title, document) : nil))
 1612       # convert title to resolve attributes while in scope
 1613       section.title if sect_title.include? ATTR_REF_HEAD unless generated_id
 1614       unless document.register :refs, [id, section]
 1615         logger.warn message_with_context %(id assigned to section already in use: #{id}), source_location: (reader.cursor_at_line reader.lineno - (sect_atx ? 1 : 2))
 1616       end
 1617     end
 1618 
 1619     section.update_attributes(attributes)
 1620     reader.skip_blank_lines
 1621 
 1622     section
 1623   end
 1624 
 1625   # Internal: Checks if the next line on the Reader is a section title
 1626   #
 1627   # reader     - the source Reader
 1628   # attributes - a Hash of attributes collected above the current line
 1629   #
 1630   # Returns the Integer section level if the Reader is positioned at a section title or nil otherwise
 1631   def self.is_next_line_section?(reader, attributes)
 1632     if (style = attributes[1]) && (style == 'discrete' || style == 'float')
 1633       return
 1634     elsif Compliance.underline_style_section_titles
 1635       next_lines = reader.peek_lines 2, style && style == 'comment'
 1636       is_section_title?(next_lines[0] || '', next_lines[1])
 1637     else
 1638       atx_section_title?(reader.peek_line || '')
 1639     end
 1640   end
 1641 
 1642   # Internal: Convenience API for checking if the next line on the Reader is the document title
 1643   #
 1644   # reader      - the source Reader
 1645   # attributes  - a Hash of attributes collected above the current line
 1646   # leveloffset - an Integer (or integer String value) the represents the current leveloffset
 1647   #
 1648   # returns true if the Reader is positioned at the document title, false otherwise
 1649   def self.is_next_line_doctitle? reader, attributes, leveloffset
 1650     if leveloffset
 1651       (sect_level = is_next_line_section? reader, attributes) && (sect_level + leveloffset.to_i == 0)
 1652     else
 1653       (is_next_line_section? reader, attributes) == 0
 1654     end
 1655   end
 1656 
 1657   # Public: Checks whether the lines given are an atx or setext section title.
 1658   #
 1659   # line1 - [String] candidate title.
 1660   # line2 - [String] candidate underline (default: nil).
 1661   #
 1662   # Returns the [Integer] section level if these lines are a section title, otherwise nothing.
 1663   def self.is_section_title?(line1, line2 = nil)
 1664     atx_section_title?(line1) || (line2.nil_or_empty? ? nil : setext_section_title?(line1, line2))
 1665   end
 1666 
 1667   # Checks whether the line given is an atx section title.
 1668   #
 1669   # The level returned is 1 less than number of leading markers.
 1670   #
 1671   # line - [String] candidate title with leading atx marker.
 1672   #
 1673   # Returns the [Integer] section level if this line is an atx section title, otherwise nothing.
 1674   def self.atx_section_title? line
 1675     if Compliance.markdown_syntax ? ((line.start_with? '=', '#') && ExtAtxSectionTitleRx =~ line) :
 1676         ((line.start_with? '=') && AtxSectionTitleRx =~ line)
 1677       $1.length - 1
 1678     end
 1679   end
 1680 
 1681   # Checks whether the lines given are an setext section title.
 1682   #
 1683   # line1 - [String] candidate title
 1684   # line2 - [String] candidate underline
 1685   #
 1686   # Returns the [Integer] section level if these lines are an setext section title, otherwise nothing.
 1687   def self.setext_section_title? line1, line2
 1688     if (level = SETEXT_SECTION_LEVELS[line2_ch0 = line2.chr]) && (uniform? line2, line2_ch0, (line2_len = line2.length)) &&
 1689         (SetextSectionTitleRx.match? line1) && (line1.length - line2_len).abs < 2
 1690       level
 1691     end
 1692   end
 1693 
 1694   # Internal: Parse the section title from the current position of the reader
 1695   #
 1696   # Parse an atx (single-line) or setext (underlined) section title. After this method is called,
 1697   # the Reader will be positioned at the line after the section title.
 1698   #
 1699   # For efficiency, we don't reuse methods internally that check for a section title.
 1700   #
 1701   # reader   - the source [Reader], positioned at a section title.
 1702   # document - the current [Document].
 1703   #
 1704   # Examples
 1705   #
 1706   #   reader.lines
 1707   #   # => ["Foo", "~~~"]
 1708   #
 1709   #   id, reftext, title, level, atx = parse_section_title(reader, document)
 1710   #
 1711   #   title
 1712   #   # => "Foo"
 1713   #   level
 1714   #   # => 2
 1715   #   id
 1716   #   # => nil
 1717   #   atx
 1718   #   # => false
 1719   #
 1720   #   line1
 1721   #   # => "==== Foo"
 1722   #
 1723   #   id, reftext, title, level, atx = parse_section_title(reader, document)
 1724   #
 1725   #   title
 1726   #   # => "Foo"
 1727   #   level
 1728   #   # => 3
 1729   #   id
 1730   #   # => nil
 1731   #   atx
 1732   #   # => true
 1733   #
 1734   # Returns an 5-element [Array] containing the id (String), reftext (String),
 1735   # title (String), level (Integer), and flag (Boolean) indicating whether an
 1736   # atx section title was matched, or nothing.
 1737   def self.parse_section_title(reader, document, sect_id = nil)
 1738     sect_reftext = nil
 1739     line1 = reader.read_line
 1740 
 1741     if Compliance.markdown_syntax ? ((line1.start_with? '=', '#') && ExtAtxSectionTitleRx =~ line1) :
 1742         ((line1.start_with? '=') && AtxSectionTitleRx =~ line1)
 1743       # NOTE level is 1 less than number of line markers
 1744       sect_level, sect_title, atx = $1.length - 1, $2, true
 1745       if sect_title.end_with?(']]') && InlineSectionAnchorRx =~ sect_title && !$1 # escaped
 1746         sect_title, sect_id, sect_reftext = (sect_title.slice 0, sect_title.length - $&.length), $2, $3
 1747       end unless sect_id
 1748     elsif Compliance.underline_style_section_titles && (line2 = reader.peek_line(true)) &&
 1749         (sect_level = SETEXT_SECTION_LEVELS[line2_ch0 = line2.chr]) && (uniform? line2, line2_ch0, (line2_len = line2.length)) &&
 1750         (sect_title = SetextSectionTitleRx =~ line1 && $1) && (line1.length - line2_len).abs < 2
 1751       atx = false
 1752       if sect_title.end_with?(']]') && InlineSectionAnchorRx =~ sect_title && !$1 # escaped
 1753         sect_title, sect_id, sect_reftext = (sect_title.slice 0, sect_title.length - $&.length), $2, $3
 1754       end unless sect_id
 1755       reader.shift
 1756     else
 1757       raise %(Unrecognized section at #{reader.cursor_at_prev_line})
 1758     end
 1759     if document.attr? 'leveloffset'
 1760       sect_level += (document.attr 'leveloffset').to_i
 1761       sect_level = 0 if sect_level < 0
 1762     end
 1763     [sect_id, sect_reftext, sect_title, sect_level, atx]
 1764   end
 1765 
 1766   # Public: Consume and parse the two header lines (line 1 = author info, line 2 = revision info).
 1767   #
 1768   # Returns the Hash of header metadata. If a Document object is supplied, the metadata
 1769   # is applied directly to the attributes of the Document.
 1770   #
 1771   # reader   - the Reader holding the source lines of the document
 1772   # document - the Document we are building (default: nil)
 1773   #
 1774   # Examples
 1775   #
 1776   #  data = ["Author Name <author@example.org>\n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"]
 1777   #  parse_header_metadata(Reader.new data, nil, normalize: true)
 1778   #  # => { 'author' => 'Author Name', 'firstname' => 'Author', 'lastname' => 'Name', 'email' => 'author@example.org',
 1779   #  #       'revnumber' => '1.0', 'revdate' => '2012-12-21', 'revremark' => 'Coincide w/ end of world.' }
 1780   def self.parse_header_metadata(reader, document = nil)
 1781     doc_attrs = document && document.attributes
 1782     # NOTE this will discard any comment lines, but not skip blank lines
 1783     process_attribute_entries reader, document
 1784 
 1785     metadata, implicit_author, implicit_authorinitials = implicit_authors = {}, nil, nil
 1786 
 1787     if reader.has_more_lines? && !reader.next_line_empty?
 1788       unless (author_metadata = process_authors reader.read_line).empty?
 1789         if document
 1790           # apply header subs and assign to document
 1791           author_metadata.each do |key, val|
 1792             # NOTE the attributes substitution only applies for the email record
 1793             doc_attrs[key] = ::String === val ? (document.apply_header_subs val) : val unless doc_attrs.key? key
 1794           end
 1795 
 1796           implicit_author = doc_attrs['author']
 1797           implicit_authorinitials = doc_attrs['authorinitials']
 1798           implicit_authors = doc_attrs['authors']
 1799         end
 1800 
 1801         metadata = author_metadata
 1802       end
 1803 
 1804       # NOTE this will discard any comment lines, but not skip blank lines
 1805       process_attribute_entries reader, document
 1806 
 1807       rev_metadata = {}
 1808 
 1809       if reader.has_more_lines? && !reader.next_line_empty?
 1810         rev_line = reader.read_line
 1811         if (match = RevisionInfoLineRx.match(rev_line))
 1812           rev_metadata['revnumber'] = match[1].rstrip if match[1]
 1813           unless (component = match[2].strip).empty?
 1814             # version must begin with 'v' if date is absent
 1815             if !match[1] && (component.start_with? 'v')
 1816               rev_metadata['revnumber'] = component.slice 1, component.length
 1817             else
 1818               rev_metadata['revdate'] = component
 1819             end
 1820           end
 1821           rev_metadata['revremark'] = match[3].rstrip if match[3]
 1822         else
 1823           # throw it back
 1824           reader.unshift_line rev_line
 1825         end
 1826       end
 1827 
 1828       unless rev_metadata.empty?
 1829         if document
 1830           # apply header subs and assign to document
 1831           rev_metadata.each do |key, val|
 1832             unless doc_attrs.key? key
 1833               doc_attrs[key] = document.apply_header_subs val
 1834             end
 1835           end
 1836         end
 1837 
 1838         metadata.update rev_metadata
 1839       end
 1840 
 1841       # NOTE this will discard any comment lines, but not skip blank lines
 1842       process_attribute_entries reader, document
 1843 
 1844       reader.skip_blank_lines
 1845     else
 1846       author_metadata = {}
 1847     end
 1848 
 1849     # process author attribute entries that override (or stand in for) the implicit author line
 1850     if document
 1851       if doc_attrs.key?('author') && (author_line = doc_attrs['author']) != implicit_author
 1852         # do not allow multiple, process as names only
 1853         author_metadata = process_authors author_line, true, false
 1854         author_metadata.delete 'authorinitials' if doc_attrs['authorinitials'] != implicit_authorinitials
 1855       elsif doc_attrs.key?('authors') && (author_line = doc_attrs['authors']) != implicit_authors
 1856         # allow multiple, process as names only
 1857         author_metadata = process_authors author_line, true
 1858       else
 1859         authors, author_idx, author_key, explicit, sparse = [], 1, 'author_1', false, false
 1860         while doc_attrs.key? author_key
 1861           # only use indexed author attribute if value is different
 1862           # leaves corner case if line matches with underscores converted to spaces; use double space to force
 1863           if (author_override = doc_attrs[author_key]) == author_metadata[author_key]
 1864             authors << nil
 1865             sparse = true
 1866           else
 1867             authors << author_override
 1868             explicit = true
 1869           end
 1870           author_key = %(author_#{author_idx += 1})
 1871         end
 1872         if explicit
 1873           # rebuild implicit author names to reparse
 1874           authors.each_with_index do |author, idx|
 1875             unless author
 1876               authors[idx] = [
 1877                 author_metadata[%(firstname_#{name_idx = idx + 1})],
 1878                 author_metadata[%(middlename_#{name_idx})],
 1879                 author_metadata[%(lastname_#{name_idx})]
 1880               ].compact.map {|it| it.tr ' ', '_' }.join ' '
 1881             end
 1882           end if sparse
 1883           # process as names only
 1884           author_metadata = process_authors authors, true, false
 1885         else
 1886           author_metadata = {}
 1887         end
 1888       end
 1889 
 1890       if author_metadata.empty?
 1891         metadata['authorcount'] ||= (doc_attrs['authorcount'] = 0)
 1892       else
 1893         doc_attrs.update author_metadata
 1894 
 1895         # special case
 1896         if !doc_attrs.key?('email') && doc_attrs.key?('email_1')
 1897           doc_attrs['email'] = doc_attrs['email_1']
 1898         end
 1899       end
 1900     end
 1901 
 1902     metadata
 1903   end
 1904 
 1905   # Internal: Parse the author line into a Hash of author metadata
 1906   #
 1907   # author_line  - the String author line
 1908   # names_only   - a Boolean flag that indicates whether to process line as
 1909   #                names only or names with emails (default: false)
 1910   # multiple     - a Boolean flag that indicates whether to process multiple
 1911   #                semicolon-separated entries in the author line (default: true)
 1912   #
 1913   # returns a Hash of author metadata
 1914   def self.process_authors author_line, names_only = false, multiple = true
 1915     author_metadata = {}
 1916     author_idx = 0
 1917     (multiple && (author_line.include? ';') ? (author_line.split AuthorDelimiterRx) : [*author_line]).each do |author_entry|
 1918       next if author_entry.empty?
 1919       key_map = {}
 1920       if (author_idx += 1) == 1
 1921         AuthorKeys.each {|key| key_map[key.to_sym] = key }
 1922       else
 1923         AuthorKeys.each {|key| key_map[key.to_sym] = %(#{key}_#{author_idx}) }
 1924       end
 1925 
 1926       if names_only # when parsing an attribute value
 1927         # QUESTION should we rstrip author_entry?
 1928         if author_entry.include? '<'
 1929           author_metadata[key_map[:author]] = author_entry.tr('_', ' ')
 1930           author_entry = author_entry.gsub XmlSanitizeRx, ''
 1931         end
 1932         # NOTE split names and collapse repeating whitespace (split drops any leading whitespace)
 1933         if (segments = author_entry.split nil, 3).size == 3
 1934           segments << (segments.pop.squeeze ' ')
 1935         end
 1936       elsif (match = AuthorInfoLineRx.match(author_entry))
 1937         (segments = match.to_a).shift
 1938       end
 1939 
 1940       if segments
 1941         author = author_metadata[key_map[:firstname]] = fname = segments[0].tr('_', ' ')
 1942         author_metadata[key_map[:authorinitials]] = fname.chr
 1943         if segments[1]
 1944           if segments[2]
 1945             author_metadata[key_map[:middlename]] = mname = segments[1].tr('_', ' ')
 1946             author_metadata[key_map[:lastname]] = lname = segments[2].tr('_', ' ')
 1947             author = fname + ' ' + mname + ' ' + lname
 1948             author_metadata[key_map[:authorinitials]] = %(#{fname.chr}#{mname.chr}#{lname.chr})
 1949           else
 1950             author_metadata[key_map[:lastname]] = lname = segments[1].tr('_', ' ')
 1951             author = fname + ' ' + lname
 1952             author_metadata[key_map[:authorinitials]] = %(#{fname.chr}#{lname.chr})
 1953           end
 1954         end
 1955         author_metadata[key_map[:author]] ||= author
 1956         author_metadata[key_map[:email]] = segments[3] unless names_only || !segments[3]
 1957       else
 1958         author_metadata[key_map[:author]] = author_metadata[key_map[:firstname]] = fname = author_entry.squeeze(' ').strip
 1959         author_metadata[key_map[:authorinitials]] = fname.chr
 1960       end
 1961 
 1962       if author_idx == 1
 1963         author_metadata['authors'] = author_metadata[key_map[:author]]
 1964       else
 1965         # only assign the _1 attributes once we see the second author
 1966         if author_idx == 2
 1967           AuthorKeys.each {|key| author_metadata[%(#{key}_1)] = author_metadata[key] if author_metadata.key? key }
 1968         end
 1969         author_metadata['authors'] = %(#{author_metadata['authors']}, #{author_metadata[key_map[:author]]})
 1970       end
 1971     end
 1972 
 1973     author_metadata['authorcount'] = author_idx
 1974     author_metadata
 1975   end
 1976 
 1977   # Internal: Parse lines of metadata until a line of metadata is not found.
 1978   #
 1979   # This method processes sequential lines containing block metadata, ignoring
 1980   # blank lines and comments.
 1981   #
 1982   # reader     - the source reader
 1983   # document   - the current Document
 1984   # attributes - a Hash of attributes in which any metadata found will be stored (default: {})
 1985   # options    - a Hash of options to control processing: (default: {})
 1986   #              *  :text_only indicates that parser is only looking for text content
 1987   #                   and thus the block title should not be captured
 1988   #
 1989   # returns the Hash of attributes including any metadata found
 1990   def self.parse_block_metadata_lines reader, document, attributes = {}, options = {}
 1991     while parse_block_metadata_line reader, document, attributes, options
 1992       # discard the line just processed
 1993       reader.shift
 1994       reader.skip_blank_lines || break
 1995     end
 1996     attributes
 1997   end
 1998 
 1999   # Internal: Parse the next line if it contains metadata for the following block
 2000   #
 2001   # This method handles lines with the following content:
 2002   #
 2003   # * line or block comment
 2004   # * anchor
 2005   # * attribute list
 2006   # * block title
 2007   #
 2008   # Any attributes found will be inserted into the attributes argument.
 2009   # If the line contains block metadata, the method returns true, otherwise false.
 2010   #
 2011   # reader     - the source reader
 2012   # document   - the current Document
 2013   # attributes - a Hash of attributes in which any metadata found will be stored
 2014   # options    - a Hash of options to control processing: (default: {})
 2015   #              *  :text_only indicates the parser is only looking for text content,
 2016   #                   thus neither a block title or attribute entry should be captured
 2017   #
 2018   # returns true if the line contains metadata, otherwise falsy
 2019   def self.parse_block_metadata_line reader, document, attributes, options = {}
 2020     if (next_line = reader.peek_line) &&
 2021         (options[:text_only] ? (next_line.start_with? '[', '/') : (normal = next_line.start_with? '[', '.', '/', ':'))
 2022       if next_line.start_with? '['
 2023         if next_line.start_with? '[['
 2024           if (next_line.end_with? ']]') && BlockAnchorRx =~ next_line
 2025             # NOTE registration of id and reftext is deferred until block is processed
 2026             attributes['id'] = $1
 2027             if (reftext = $2)
 2028               attributes['reftext'] = (reftext.include? ATTR_REF_HEAD) ? (document.sub_attributes reftext) : reftext
 2029             end
 2030             return true
 2031           end
 2032         elsif (next_line.end_with? ']') && BlockAttributeListRx =~ next_line
 2033           current_style = attributes[1]
 2034           # extract id, role, and options from first positional attribute and remove, if present
 2035           if (document.parse_attributes $1, [], sub_input: true, sub_result: true, into: attributes)[1]
 2036             attributes[1] = (parse_style_attribute attributes, reader) || current_style
 2037           end
 2038           return true
 2039         end
 2040       elsif normal && (next_line.start_with? '.')
 2041         if BlockTitleRx =~ next_line
 2042           # NOTE title doesn't apply to section, but we need to stash it for the first block
 2043           # TODO should issue an error if this is found above the document title
 2044           attributes['title'] = $1
 2045           return true
 2046         end
 2047       elsif !normal || (next_line.start_with? '/')
 2048         if next_line == '//'
 2049           return true
 2050         elsif normal && (uniform? next_line, '/', (ll = next_line.length))
 2051           unless ll == 3
 2052             reader.read_lines_until terminator: next_line, skip_first_line: true, preserve_last_line: true, skip_processing: true, context: :comment
 2053             return true
 2054           end
 2055         else
 2056           return true unless next_line.start_with? '///'
 2057         end if next_line.start_with? '//'
 2058       # NOTE the final condition can be consolidated into single line
 2059       elsif normal && (next_line.start_with? ':') && AttributeEntryRx =~ next_line
 2060         process_attribute_entry reader, document, attributes, $~
 2061         return true
 2062       end
 2063     end
 2064   end
 2065 
 2066   # Process consecutive attribute entry lines, ignoring adjacent line comments and comment blocks.
 2067   #
 2068   # Returns nothing
 2069   def self.process_attribute_entries reader, document, attributes = nil
 2070     reader.skip_comment_lines
 2071     while process_attribute_entry reader, document, attributes
 2072       # discard line just processed
 2073       reader.shift
 2074       reader.skip_comment_lines
 2075     end
 2076   end
 2077 
 2078   def self.process_attribute_entry reader, document, attributes = nil, match = nil
 2079     if match || (match = reader.has_more_lines? ? (AttributeEntryRx.match reader.peek_line) : nil)
 2080       if (value = match[2]).nil_or_empty?
 2081         value = ''
 2082       elsif value.end_with? LINE_CONTINUATION, LINE_CONTINUATION_LEGACY
 2083         con, value = (value.slice value.length - 2, 2), (value.slice 0, value.length - 2).rstrip
 2084         while reader.advance && !(next_line = reader.peek_line || '').empty?
 2085           next_line = next_line.lstrip
 2086           next_line = (next_line.slice 0, next_line.length - 2).rstrip if (keep_open = next_line.end_with? con)
 2087           value = %(#{value}#{(value.end_with? HARD_LINE_BREAK) ? LF : ' '}#{next_line})
 2088           break unless keep_open
 2089         end
 2090       end
 2091 
 2092       store_attribute match[1], value, document, attributes
 2093       true
 2094     end
 2095   end
 2096 
 2097   # Public: Store the attribute in the document and register attribute entry if accessible
 2098   #
 2099   # name  - the String name of the attribute to store;
 2100   #         if name begins or ends with !, it signals to remove the attribute with that root name
 2101   # value - the String value of the attribute to store
 2102   # doc   - the Document being parsed
 2103   # attrs - the attributes for the current context
 2104   #
 2105   # returns a 2-element array containing the resolved attribute name (minus the ! indicator) and value
 2106   def self.store_attribute name, value, doc = nil, attrs = nil
 2107     # TODO move processing of attribute value to utility method
 2108     if name.end_with? '!'
 2109       # a nil value signals the attribute should be deleted (unset)
 2110       name = name.chop
 2111       value = nil
 2112     elsif name.start_with? '!'
 2113       # a nil value signals the attribute should be deleted (unset)
 2114       name = (name.slice 1, name.length)
 2115       value = nil
 2116     end
 2117 
 2118     if (name = sanitize_attribute_name name) == 'numbered'
 2119       name = 'sectnums'
 2120     elsif name == 'hardbreaks'
 2121       name = 'hardbreaks-option'
 2122     end
 2123 
 2124     if doc
 2125       if value
 2126         if name == 'leveloffset'
 2127           # support relative leveloffset values
 2128           if value.start_with? '+'
 2129             value = ((doc.attr 'leveloffset', 0).to_i + (value.slice 1, value.length).to_i).to_s
 2130           elsif value.start_with? '-'
 2131             value = ((doc.attr 'leveloffset', 0).to_i - (value.slice 1, value.length).to_i).to_s
 2132           end
 2133         end
 2134         # QUESTION should we set value to locked value if set_attribute returns false?
 2135         if (resolved_value = doc.set_attribute name, value)
 2136           value = resolved_value
 2137           (Document::AttributeEntry.new name, value).save_to attrs if attrs
 2138         end
 2139       elsif (doc.delete_attribute name) && attrs
 2140         (Document::AttributeEntry.new name, value).save_to attrs
 2141       end
 2142     elsif attrs
 2143       (Document::AttributeEntry.new name, value).save_to attrs
 2144     end
 2145 
 2146     [name, value]
 2147   end
 2148 
 2149   # Internal: Resolve the 0-index marker for this list item
 2150   #
 2151   # For ordered lists, match the marker used for this list item against the
 2152   # known list markers and determine which marker is the first (0-index) marker
 2153   # in its number series.
 2154   #
 2155   # For callout lists, return <1>.
 2156   #
 2157   # For bulleted lists, return the marker as passed to this method.
 2158   #
 2159   # list_type  - The Symbol context of the list
 2160   # marker     - The String marker for this list item
 2161   # ordinal    - The position of this list item in the list
 2162   # validate   - Whether to validate the value of the marker
 2163   #
 2164   # Returns the String 0-index marker for this list item
 2165   def self.resolve_list_marker(list_type, marker, ordinal = 0, validate = false, reader = nil)
 2166     if list_type == :ulist
 2167       marker
 2168     elsif list_type == :olist
 2169       resolve_ordered_list_marker(marker, ordinal, validate, reader)[0]
 2170     else # :colist
 2171       '<1>'
 2172     end
 2173   end
 2174 
 2175   # Internal: Resolve the 0-index marker for this ordered list item
 2176   #
 2177   # Match the marker used for this ordered list item against the
 2178   # known ordered list markers and determine which marker is
 2179   # the first (0-index) marker in its number series.
 2180   #
 2181   # The purpose of this method is to normalize the implicit numbered markers
 2182   # so that they can be compared against other list items.
 2183   #
 2184   # marker   - The marker used for this list item
 2185   # ordinal  - The 0-based index of the list item (default: 0)
 2186   # validate - Perform validation that the marker provided is the proper
 2187   #            marker in the sequence (default: false)
 2188   #
 2189   # Examples
 2190   #
 2191   #  marker = 'B.'
 2192   #  Parser.resolve_ordered_list_marker(marker, 1, true, reader)
 2193   #  # => ['A.', :upperalpha]
 2194   #
 2195   #  marker = '.'
 2196   #  Parser.resolve_ordered_list_marker(marker, 1, true, reader)
 2197   #  # => ['.']
 2198   #
 2199   # Returns a tuple that contains the String of the first marker in this number
 2200   # series and the implicit list style, if applicable
 2201   def self.resolve_ordered_list_marker(marker, ordinal = 0, validate = false, reader = nil)
 2202     return [marker] if marker.start_with? '.'
 2203     # NOTE case statement is guaranteed to match one of the conditions
 2204     case (style = ORDERED_LIST_STYLES.find {|s| OrderedListMarkerRxMap[s].match? marker })
 2205     when :arabic
 2206       if validate
 2207         expected = ordinal + 1
 2208         actual = marker.to_i # remove trailing . and coerce to int
 2209       end
 2210       marker = '1.'
 2211     when :loweralpha
 2212       if validate
 2213         expected = ('a'[0].ord + ordinal).chr
 2214         actual = marker.chop # remove trailing .
 2215       end
 2216       marker = 'a.'
 2217     when :upperalpha
 2218       if validate
 2219         expected = ('A'[0].ord + ordinal).chr
 2220         actual = marker.chop # remove trailing .
 2221       end
 2222       marker = 'A.'
 2223     when :lowerroman
 2224       if validate
 2225         expected = Helpers.int_to_roman(ordinal + 1).downcase
 2226         actual = marker.chop # remove trailing )
 2227       end
 2228       marker = 'i)'
 2229     when :upperroman
 2230       if validate
 2231         expected = Helpers.int_to_roman(ordinal + 1)
 2232         actual = marker.chop # remove trailing )
 2233       end
 2234       marker = 'I)'
 2235     end
 2236 
 2237     if validate && expected != actual
 2238       logger.warn message_with_context %(list item index: expected #{expected}, got #{actual}), source_location: reader.cursor
 2239     end
 2240 
 2241     [marker, style]
 2242   end
 2243 
 2244   # Internal: Determine whether the this line is a sibling list item
 2245   # according to the list type and trait (marker) provided.
 2246   #
 2247   # line          - The String line to check
 2248   # list_type     - The context of the list (:olist, :ulist, :colist, :dlist)
 2249   # sibling_trait - The String marker for the list or the Regexp to match a sibling
 2250   #
 2251   # Returns a Boolean indicating whether this line is a sibling list item given the criteria provided
 2252   def self.is_sibling_list_item? line, list_type, sibling_trait
 2253     if ::Regexp === sibling_trait
 2254       sibling_trait.match? line
 2255     else
 2256       ListRxMap[list_type] =~ line && sibling_trait == (resolve_list_marker list_type, $1)
 2257     end
 2258   end
 2259 
 2260   # Internal: Parse the table contained in the provided Reader
 2261   #
 2262   # table_reader - a Reader containing the source lines of an AsciiDoc table
 2263   # parent       - the parent Block of this Asciidoctor::Table
 2264   # attributes   - attributes captured from above this Block
 2265   #
 2266   # returns an instance of Asciidoctor::Table parsed from the provided reader
 2267   def self.parse_table(table_reader, parent, attributes)
 2268     table = Table.new(parent, attributes)
 2269 
 2270     if (attributes.key? 'cols') && !(colspecs = parse_colspecs attributes['cols']).empty?
 2271       table.create_columns colspecs
 2272       explicit_colspecs = true
 2273     end
 2274 
 2275     skipped = table_reader.skip_blank_lines || 0
 2276     parser_ctx = Table::ParserContext.new table_reader, table, attributes
 2277     format, loop_idx, implicit_header_boundary = parser_ctx.format, -1, nil
 2278     implicit_header = true unless skipped > 0 || attributes['header-option'] || attributes['noheader-option']
 2279 
 2280     while (line = table_reader.read_line)
 2281       if (beyond_first = (loop_idx += 1) > 0) && line.empty?
 2282         line = nil
 2283         implicit_header_boundary += 1 if implicit_header_boundary
 2284       elsif format == 'psv'
 2285         if parser_ctx.starts_with_delimiter? line
 2286           line = line.slice 1, line.length
 2287           # push empty cell spec if cell boundary appears at start of line
 2288           parser_ctx.close_open_cell
 2289           implicit_header_boundary = nil if implicit_header_boundary
 2290         else
 2291           next_cellspec, line = parse_cellspec line, :start, parser_ctx.delimiter
 2292           # if cellspec is not nil, we're at a cell boundary
 2293           if next_cellspec
 2294             parser_ctx.close_open_cell next_cellspec
 2295             implicit_header_boundary = nil if implicit_header_boundary
 2296           # otherwise, the cell continues from previous line
 2297           elsif implicit_header_boundary && implicit_header_boundary == loop_idx
 2298             implicit_header, implicit_header_boundary = false, nil
 2299           end
 2300         end
 2301       end
 2302 
 2303       unless beyond_first
 2304         table_reader.mark
 2305         # NOTE implicit header is offset by at least one blank line; implicit_header_boundary tracks size of gap
 2306         if implicit_header
 2307           if table_reader.has_more_lines? && table_reader.peek_line.empty?
 2308             implicit_header_boundary = 1
 2309           else
 2310             implicit_header = false
 2311           end
 2312         end
 2313       end
 2314 
 2315       # this loop is used for flow control; internal logic controls how many times it executes
 2316       while true
 2317         if line && (m = parser_ctx.match_delimiter line)
 2318           pre_match, post_match = m.pre_match, m.post_match
 2319           case format
 2320           when 'csv'
 2321             if parser_ctx.buffer_has_unclosed_quotes? pre_match
 2322               parser_ctx.skip_past_delimiter pre_match
 2323               break if (line = post_match).empty?
 2324               redo
 2325             end
 2326             parser_ctx.buffer = %(#{parser_ctx.buffer}#{pre_match})
 2327           when 'dsv'
 2328             if pre_match.end_with? '\\'
 2329               parser_ctx.skip_past_escaped_delimiter pre_match
 2330               if (line = post_match).empty?
 2331                 parser_ctx.buffer = %(#{parser_ctx.buffer}#{LF})
 2332                 parser_ctx.keep_cell_open
 2333                 break
 2334               end
 2335               redo
 2336             end
 2337             parser_ctx.buffer = %(#{parser_ctx.buffer}#{pre_match})
 2338           else # psv
 2339             if pre_match.end_with? '\\'
 2340               parser_ctx.skip_past_escaped_delimiter pre_match
 2341               if (line = post_match).empty?
 2342                 parser_ctx.buffer = %(#{parser_ctx.buffer}#{LF})
 2343                 parser_ctx.keep_cell_open
 2344                 break
 2345               end
 2346               redo
 2347             end
 2348             next_cellspec, cell_text = parse_cellspec pre_match
 2349             parser_ctx.push_cellspec next_cellspec
 2350             parser_ctx.buffer = %(#{parser_ctx.buffer}#{cell_text})
 2351           end
 2352           # don't break if empty to preserve empty cell found at end of line (see issue #1106)
 2353           line = nil if (line = post_match).empty?
 2354           parser_ctx.close_cell
 2355         else
 2356           # no other delimiters to see here; suck up this line into the buffer and move on
 2357           parser_ctx.buffer = %(#{parser_ctx.buffer}#{line}#{LF})
 2358           case format
 2359           when 'csv'
 2360             if parser_ctx.buffer_has_unclosed_quotes?
 2361               implicit_header, implicit_header_boundary = false, nil if implicit_header_boundary && loop_idx == 0
 2362               parser_ctx.keep_cell_open
 2363             else
 2364               parser_ctx.close_cell true
 2365             end
 2366           when 'dsv'
 2367             parser_ctx.close_cell true
 2368           else # psv
 2369             parser_ctx.keep_cell_open
 2370           end
 2371           break
 2372         end
 2373       end
 2374 
 2375       # NOTE cell may already be closed if table format is csv or dsv
 2376       if parser_ctx.cell_open?
 2377         parser_ctx.close_cell true unless table_reader.has_more_lines?
 2378       else
 2379         table_reader.skip_blank_lines || break
 2380       end
 2381     end
 2382 
 2383     unless (table.attributes['colcount'] ||= table.columns.size) == 0 || explicit_colspecs
 2384       table.assign_column_widths
 2385     end
 2386 
 2387     if implicit_header
 2388       table.has_header_option = true
 2389       attributes['header-option'] = ''
 2390     end
 2391 
 2392     table.partition_header_footer attributes
 2393 
 2394     table
 2395   end
 2396 
 2397   # Internal: Parse the column specs for this table.
 2398   #
 2399   # The column specs dictate the number of columns, relative
 2400   # width of columns, default alignments for cells in each
 2401   # column, and/or default styles or filters applied to the cells in
 2402   # the column.
 2403   #
 2404   # Every column spec is guaranteed to have a width
 2405   #
 2406   # returns a Hash of attributes that specify how to format
 2407   # and layout the cells in the table.
 2408   def self.parse_colspecs records
 2409     records = records.delete ' ' if records.include? ' '
 2410     # check for deprecated syntax: single number, equal column spread
 2411     if records == records.to_i.to_s
 2412       return ::Array.new(records.to_i) { { 'width' => 1 } }
 2413     end
 2414 
 2415     specs = []
 2416     # NOTE -1 argument ensures we don't drop empty records
 2417     ((records.include? ',') ? (records.split ',', -1) : (records.split ';', -1)).each do |record|
 2418       if record.empty?
 2419         specs << { 'width' => 1 }
 2420       # TODO might want to use scan rather than this mega-regexp
 2421       elsif (m = ColumnSpecRx.match(record))
 2422         spec = {}
 2423         if m[2]
 2424           # make this an operation
 2425           colspec, rowspec = m[2].split '.'
 2426           if !colspec.nil_or_empty? && TableCellHorzAlignments.key?(colspec)
 2427             spec['halign'] = TableCellHorzAlignments[colspec]
 2428           end
 2429           if !rowspec.nil_or_empty? && TableCellVertAlignments.key?(rowspec)
 2430             spec['valign'] = TableCellVertAlignments[rowspec]
 2431           end
 2432         end
 2433 
 2434         if (width = m[3])
 2435           # to_i will strip the optional %
 2436           spec['width'] = width == '~' ? -1 : width.to_i
 2437         else
 2438           spec['width'] = 1
 2439         end
 2440 
 2441         # make this an operation
 2442         if m[4] && TableCellStyles.key?(m[4])
 2443           spec['style'] = TableCellStyles[m[4]]
 2444         end
 2445 
 2446         if m[1]
 2447           1.upto(m[1].to_i) { specs << spec.merge }
 2448         else
 2449           specs << spec
 2450         end
 2451       end
 2452     end
 2453     specs
 2454   end
 2455 
 2456   # Internal: Parse the cell specs for the current cell.
 2457   #
 2458   # The cell specs dictate the cell's alignments, styles or filters,
 2459   # colspan, rowspan and/or repeating content.
 2460   #
 2461   # The default spec when pos == :end is {} since we already know we're at a
 2462   # delimiter. When pos == :start, we *may* be at a delimiter, nil indicates
 2463   # we're not.
 2464   #
 2465   # returns the Hash of attributes that indicate how to layout
 2466   # and style this cell in the table.
 2467   def self.parse_cellspec(line, pos = :end, delimiter = nil)
 2468     m, rest = nil, ''
 2469 
 2470     if pos == :start
 2471       if line.include? delimiter
 2472         spec_part, delimiter, rest = line.partition delimiter
 2473         if (m = CellSpecStartRx.match spec_part)
 2474           return [{}, rest] if m[0].empty?
 2475         else
 2476           return [nil, line]
 2477         end
 2478       else
 2479         return [nil, line]
 2480       end
 2481     else # pos == :end
 2482       if (m = CellSpecEndRx.match line)
 2483         # NOTE return the line stripped of trailing whitespace if no cellspec is found in this case
 2484         return [{}, line.rstrip] if m[0].lstrip.empty?
 2485         rest = m.pre_match
 2486       else
 2487         return [{}, line]
 2488       end
 2489     end
 2490 
 2491     spec = {}
 2492     if m[1]
 2493       colspec, rowspec = m[1].split '.'
 2494       colspec = colspec.nil_or_empty? ? 1 : colspec.to_i
 2495       rowspec = rowspec.nil_or_empty? ? 1 : rowspec.to_i
 2496       if m[2] == '+'
 2497         spec['colspan'] = colspec unless colspec == 1
 2498         spec['rowspan'] = rowspec unless rowspec == 1
 2499       elsif m[2] == '*'
 2500         spec['repeatcol'] = colspec unless colspec == 1
 2501       end
 2502     end
 2503 
 2504     if m[3]
 2505       colspec, rowspec = m[3].split '.'
 2506       if !colspec.nil_or_empty? && TableCellHorzAlignments.key?(colspec)
 2507         spec['halign'] = TableCellHorzAlignments[colspec]
 2508       end
 2509       if !rowspec.nil_or_empty? && TableCellVertAlignments.key?(rowspec)
 2510         spec['valign'] = TableCellVertAlignments[rowspec]
 2511       end
 2512     end
 2513 
 2514     if m[4] && TableCellStyles.key?(m[4])
 2515       spec['style'] = TableCellStyles[m[4]]
 2516     end
 2517 
 2518     [spec, rest]
 2519   end
 2520 
 2521   # Public: Parse the first positional attribute and assign named attributes
 2522   #
 2523   # Parse the first positional attribute to extract the style, role and id
 2524   # parts, assign the values to their cooresponding attribute keys and return
 2525   # the parsed style from the first positional attribute.
 2526   #
 2527   # attributes - The Hash of attributes to process and update
 2528   #
 2529   # Examples
 2530   #
 2531   #   puts attributes
 2532   #   => { 1 => "abstract#intro.lead%fragment", "style" => "preamble" }
 2533   #
 2534   #   parse_style_attribute(attributes)
 2535   #   => "abstract"
 2536   #
 2537   #   puts attributes
 2538   #   => { 1 => "abstract#intro.lead%fragment", "style" => "abstract", "id" => "intro",
 2539   #         "role" => "lead", "options" => "fragment", "fragment-option" => '' }
 2540   #
 2541   # Returns the String style parsed from the first positional attribute
 2542   def self.parse_style_attribute attributes, reader = nil
 2543     # NOTE spaces are not allowed in shorthand, so if we detect one, this ain't no shorthand
 2544     if (raw_style = attributes[1]) && !raw_style.include?(' ') && Compliance.shorthand_property_syntax
 2545       name = nil
 2546       accum = ''
 2547       parsed_attrs = {}
 2548 
 2549       raw_style.each_char do |c|
 2550         case c
 2551         when '.'
 2552           yield_buffered_attribute parsed_attrs, name, accum, reader
 2553           accum = ''
 2554           name = :role
 2555         when '#'
 2556           yield_buffered_attribute parsed_attrs, name, accum, reader
 2557           accum = ''
 2558           name = :id
 2559         when '%'
 2560           yield_buffered_attribute parsed_attrs, name, accum, reader
 2561           accum = ''
 2562           name = :option
 2563         else
 2564           accum = accum + c
 2565         end
 2566       end
 2567 
 2568       # small optimization if no shorthand is found
 2569       if name
 2570         yield_buffered_attribute parsed_attrs, name, accum, reader
 2571 
 2572         if (parsed_style = parsed_attrs[:style])
 2573           attributes['style'] = parsed_style
 2574         end
 2575 
 2576         attributes['id'] = parsed_attrs[:id] if parsed_attrs.key? :id
 2577 
 2578         if parsed_attrs.key? :role
 2579           attributes['role'] = (existing_role = attributes['role']).nil_or_empty? ? (parsed_attrs[:role].join ' ') : %(#{existing_role} #{parsed_attrs[:role].join ' '})
 2580         end
 2581 
 2582         if parsed_attrs.key? :option
 2583           (opts = parsed_attrs[:option]).each {|opt| attributes[%(#{opt}-option)] = '' }
 2584         end
 2585 
 2586         parsed_style
 2587       else
 2588         attributes['style'] = raw_style
 2589       end
 2590     else
 2591       attributes['style'] = raw_style
 2592     end
 2593   end
 2594 
 2595   # Internal: Save the collected attribute (:id, :option, :role, or nil for :style) in the attribute Hash.
 2596   def self.yield_buffered_attribute attrs, name, value, reader
 2597     if name
 2598       if value.empty?
 2599         if reader
 2600           logger.warn message_with_context %(invalid empty #{name} detected in style attribute), source_location: reader.cursor_at_prev_line
 2601         else
 2602           logger.warn %(invalid empty #{name} detected in style attribute)
 2603         end
 2604       elsif name == :id
 2605         if attrs.key? :id
 2606           if reader
 2607             logger.warn message_with_context 'multiple ids detected in style attribute', source_location: reader.cursor_at_prev_line
 2608           else
 2609             logger.warn 'multiple ids detected in style attribute'
 2610           end
 2611         end
 2612         attrs[name] = value
 2613       else
 2614         (attrs[name] ||= []) << value
 2615       end
 2616     else
 2617       attrs[:style] = value unless value.empty?
 2618     end
 2619     nil
 2620   end
 2621 
 2622   # Remove the block indentation (the amount of whitespace of the least indented line), replace tabs with spaces (using
 2623   # proper tab expansion logic) and, finally, indent the lines by the margin width. Modifies the input Array directly.
 2624   #
 2625   # This method preserves the significant indentation (that exceeding the block indent) on each line.
 2626   #
 2627   # lines       - The Array of String lines to process (no trailing newlines)
 2628   # indent_size - The Integer number of spaces to readd to the start of non-empty lines after removing the indentation.
 2629   #               If this value is < 0, the existing indentation is preserved (optional, default: 0)
 2630   # tab_size    - the Integer number of spaces to use in place of a tab. A value of <= 0 disables the replacement
 2631   #               (optional, default: 0)
 2632   #
 2633   # Examples
 2634   #
 2635   #   source = <<EOS
 2636   #       def names
 2637   #         @name.split
 2638   #       end
 2639   #   EOS
 2640   #
 2641   #   source.split ?\n
 2642   #   # => ["    def names", "      @names.split", "    end"]
 2643   #
 2644   #   puts (Parser.adjust_indentation! source.split ?\n).join ?\n
 2645   #   # => def names
 2646   #   # =>   @names.split
 2647   #   # => end
 2648   #
 2649   # returns Nothing
 2650   def self.adjust_indentation! lines, indent_size = 0, tab_size = 0
 2651     return if lines.empty?
 2652 
 2653     # expand tabs if a tab character is detected and tab_size > 0
 2654     if tab_size > 0 && lines.any? {|line| line.include? TAB }
 2655       full_tab_space = ' ' * tab_size
 2656       lines.map! do |line|
 2657         if line.empty?
 2658           line
 2659         elsif (tab_idx = line.index TAB)
 2660           if tab_idx == 0
 2661             leading_tabs = 0
 2662             line.each_byte do |b|
 2663               break unless b == 9
 2664               leading_tabs += 1
 2665             end
 2666             line = %(#{full_tab_space * leading_tabs}#{line.slice leading_tabs, line.length})
 2667             next line unless line.include? TAB
 2668           end
 2669           # keeps track of how many spaces were added to adjust offset in match data
 2670           spaces_added = 0
 2671           idx = 0
 2672           result = ''
 2673           line.each_char do |c|
 2674             if c == TAB
 2675               # calculate how many spaces this tab represents, then replace tab with spaces
 2676               if (offset = idx + spaces_added) % tab_size == 0
 2677                 spaces_added += (tab_size - 1)
 2678                 result = result + full_tab_space
 2679               else
 2680                 unless (spaces = tab_size - offset % tab_size) == 1
 2681                   spaces_added += (spaces - 1)
 2682                 end
 2683                 result = result + (' ' * spaces)
 2684               end
 2685             else
 2686               result = result + c
 2687             end
 2688             idx += 1
 2689           end
 2690           result
 2691         else
 2692           line
 2693         end
 2694       end
 2695     end
 2696 
 2697     # skip block indent adjustment if indent_size is < 0
 2698     return if indent_size < 0
 2699 
 2700     # determine block indent (assumes no whitespace-only lines are present)
 2701     block_indent = nil
 2702     lines.each do |line|
 2703       next if line.empty?
 2704       if (line_indent = line.length - line.lstrip.length) == 0
 2705         block_indent = nil
 2706         break
 2707       end
 2708       block_indent = line_indent unless block_indent && block_indent < line_indent
 2709     end
 2710 
 2711     # remove block indent then apply indent_size if specified
 2712     # NOTE block_indent is > 0 if not nil
 2713     if indent_size == 0
 2714       lines.map! {|line| line.empty? ? line : (line.slice block_indent, line.length) } if block_indent
 2715     else
 2716       new_block_indent = ' ' * indent_size
 2717       if block_indent
 2718         lines.map! {|line| line.empty? ? line : new_block_indent + (line.slice block_indent, line.length) }
 2719       else
 2720         lines.map! {|line| line.empty? ? line : new_block_indent + line }
 2721       end
 2722     end
 2723 
 2724     nil
 2725   end
 2726 
 2727   def self.uniform? str, chr, len
 2728     (str.count chr) == len
 2729   end
 2730 
 2731   # Internal: Convert a string to a legal attribute name.
 2732   #
 2733   # name  - the String name of the attribute
 2734   #
 2735   # Returns a String with the legal AsciiDoc attribute name.
 2736   #
 2737   # Examples
 2738   #
 2739   #   sanitize_attribute_name('Foo Bar')
 2740   #   => 'foobar'
 2741   #
 2742   #   sanitize_attribute_name('foo')
 2743   #   => 'foo'
 2744   #
 2745   #   sanitize_attribute_name('Foo 3 #-Billy')
 2746   #   => 'foo3-billy'
 2747   def self.sanitize_attribute_name(name)
 2748     name.gsub(InvalidAttributeNameCharsRx, '').downcase
 2749   end
 2750 end
 2751 end