"Fossies" - the Fresh Open Source Software Archive

Member "pandoc-2.7.3/src/Text/Pandoc/Readers/Org/Blocks.hs" (12 Jun 2019, 29289 Bytes) of package /linux/www/pandoc-2.7.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Haskell source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "Blocks.hs": 2.6_vs_2.7.

    1 {-# LANGUAGE NoImplicitPrelude #-}
    2 {-# LANGUAGE FlexibleContexts #-}
    3 {-# LANGUAGE RecordWildCards  #-}
    4 {- |
    5    Module      : Text.Pandoc.Readers.Org.Blocks
    6    Copyright   : Copyright (C) 2014-2019 Albert Krewinkel
    7    License     : GNU GPL, version 2 or above
    8 
    9    Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
   10 
   11 Parsers for Org-mode block elements.
   12 -}
   13 module Text.Pandoc.Readers.Org.Blocks
   14   ( blockList
   15   , meta
   16   ) where
   17 
   18 import Prelude
   19 import Text.Pandoc.Readers.Org.BlockStarts
   20 import Text.Pandoc.Readers.Org.DocumentTree (documentTree,
   21                                              unprunedHeadlineToBlocks)
   22 import Text.Pandoc.Readers.Org.Inlines
   23 import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
   24 import Text.Pandoc.Readers.Org.ParserState
   25 import Text.Pandoc.Readers.Org.Parsing
   26 import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
   27                                        originalLang, translateLang, exportsCode)
   28 
   29 import Text.Pandoc.Builder (Blocks, Inlines)
   30 import Text.Pandoc.Class (PandocMonad)
   31 import Text.Pandoc.Definition
   32 import Text.Pandoc.Options
   33 import Text.Pandoc.Shared (compactify, compactifyDL, safeRead)
   34 
   35 import Control.Monad (foldM, guard, mzero, void)
   36 import Data.Char (isSpace, toLower, toUpper)
   37 import Data.Default (Default)
   38 import Data.List (foldl', isPrefixOf)
   39 import Data.Maybe (fromMaybe, isJust, isNothing)
   40 
   41 import qualified Text.Pandoc.Builder as B
   42 import qualified Text.Pandoc.Walk as Walk
   43 
   44 --
   45 -- parsing blocks
   46 --
   47 
   48 -- | Get a list of blocks.
   49 blockList :: PandocMonad m => OrgParser m [Block]
   50 blockList = do
   51   fHeadlineTree  <- documentTree blocks inline
   52   st             <- getState
   53   let headlineTree = runF fHeadlineTree st
   54   unprunedHeadlineToBlocks headlineTree st
   55 
   56 -- | Get the meta information saved in the state.
   57 meta :: Monad m => OrgParser m Meta
   58 meta = do
   59   meta' <- metaExport
   60   runF meta' <$> getState
   61 
   62 blocks :: PandocMonad m => OrgParser m (F Blocks)
   63 blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof)
   64 
   65 block :: PandocMonad m => OrgParser m (F Blocks)
   66 block = choice [ mempty <$ blanklines
   67                , table
   68                , orgBlock
   69                , figure
   70                , example
   71                , genericDrawer
   72                , include
   73                , specialLine
   74                , horizontalRule
   75                , list
   76                , latexFragment
   77                , noteBlock
   78                , paraOrPlain
   79                ] <?> "block"
   80 
   81 
   82 -- | Parse a horizontal rule into a block element
   83 horizontalRule :: Monad m => OrgParser m (F Blocks)
   84 horizontalRule = return B.horizontalRule <$ try hline
   85 
   86 
   87 --
   88 -- Block Attributes
   89 --
   90 
   91 -- | Attributes that may be added to figures (like a name or caption).
   92 data BlockAttributes = BlockAttributes
   93   { blockAttrName      :: Maybe String
   94   , blockAttrLabel     :: Maybe String
   95   , blockAttrCaption   :: Maybe (F Inlines)
   96   , blockAttrKeyValues :: [(String, String)]
   97   }
   98 
   99 -- | Convert BlockAttributes into pandoc Attr
  100 attrFromBlockAttributes :: BlockAttributes -> Attr
  101 attrFromBlockAttributes BlockAttributes{..} =
  102   let
  103     ident   = fromMaybe mempty $ lookup "id" blockAttrKeyValues
  104     classes = case lookup "class" blockAttrKeyValues of
  105                 Nothing     -> []
  106                 Just clsStr -> words clsStr
  107     kv      = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
  108   in (ident, classes, kv)
  109 
  110 stringyMetaAttribute :: Monad m => OrgParser m (String, String)
  111 stringyMetaAttribute = try $ do
  112   metaLineStart
  113   attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
  114   skipSpaces
  115   attrValue <- anyLine <|> ("" <$ newline)
  116   return (attrName, attrValue)
  117 
  118 blockAttributes :: PandocMonad m => OrgParser m BlockAttributes
  119 blockAttributes = try $ do
  120   kv <- many stringyMetaAttribute
  121   guard $ all (attrCheck . fst) kv
  122   let caption = foldl' (appendValues "CAPTION") Nothing kv
  123   let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv
  124   let name    = lookup "NAME" kv
  125   let label   = lookup "LABEL" kv
  126   caption' <- case caption of
  127                    Nothing -> return Nothing
  128                    Just s  -> Just <$> parseFromString inlines (s ++ "\n")
  129   kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
  130   return BlockAttributes
  131            { blockAttrName = name
  132            , blockAttrLabel = label
  133            , blockAttrCaption = caption'
  134            , blockAttrKeyValues = kvAttrs'
  135            }
  136  where
  137    attrCheck :: String -> Bool
  138    attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"]
  139 
  140    appendValues :: String -> Maybe String -> (String, String) -> Maybe String
  141    appendValues attrName accValue (key, value) =
  142      if key /= attrName
  143      then accValue
  144      else case accValue of
  145             Just acc -> Just $ acc ++ ' ':value
  146             Nothing  -> Just value
  147 
  148 -- | Parse key-value pairs for HTML attributes
  149 keyValues :: Monad m => OrgParser m [(String, String)]
  150 keyValues = try $
  151   manyTill ((,) <$> key <*> value) newline
  152  where
  153    key :: Monad m => OrgParser m String
  154    key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
  155 
  156    value :: Monad m => OrgParser m String
  157    value = skipSpaces *> manyTill anyChar endOfValue
  158 
  159    endOfValue :: Monad m => OrgParser m ()
  160    endOfValue =
  161      lookAhead $ (() <$ try (many1 spaceChar <* key))
  162               <|> () <$ newline
  163 
  164 
  165 --
  166 -- Org Blocks (#+BEGIN_... / #+END_...)
  167 --
  168 
  169 -- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
  170 orgBlock :: PandocMonad m => OrgParser m (F Blocks)
  171 orgBlock = try $ do
  172   blockAttrs <- blockAttributes
  173   blkType <- blockHeaderStart
  174   ($ blkType) $
  175     case map toLower blkType of
  176       "export"  -> exportBlock
  177       "comment" -> rawBlockLines (const mempty)
  178       "html"    -> rawBlockLines (return . B.rawBlock (lowercase blkType))
  179       "latex"   -> rawBlockLines (return . B.rawBlock (lowercase blkType))
  180       "ascii"   -> rawBlockLines (return . B.rawBlock (lowercase blkType))
  181       "example" -> rawBlockLines (return . exampleCode)
  182       "quote"   -> parseBlockLines (fmap B.blockQuote)
  183       "verse"   -> verseBlock
  184       "src"     -> codeBlock blockAttrs
  185       _         -> parseBlockLines $
  186                    let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
  187                    in fmap $ B.divWith (ident, classes ++ [blkType], kv)
  188  where
  189    blockHeaderStart :: Monad m => OrgParser m String
  190    blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
  191 
  192    lowercase :: String -> String
  193    lowercase = map toLower
  194 
  195 rawBlockLines :: Monad m => (String   -> F Blocks) -> String -> OrgParser m (F Blocks)
  196 rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType)
  197 
  198 parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
  199 parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent)
  200  where
  201    parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
  202    parsedBlockContent = try $ do
  203      raw <- rawBlockContent blockType
  204      parseFromString blocks (raw ++ "\n")
  205 
  206 -- | Read the raw string content of a block
  207 rawBlockContent :: Monad m => String -> OrgParser m String
  208 rawBlockContent blockType = try $ do
  209   blkLines <- manyTill rawLine blockEnder
  210   tabLen <- getOption readerTabStop
  211   return
  212     . unlines
  213     . stripIndent
  214     . map (tabsToSpaces tabLen . commaEscaped)
  215     $ blkLines
  216  where
  217    rawLine :: Monad m => OrgParser m String
  218    rawLine = try $ ("" <$ blankline) <|> anyLine
  219 
  220    blockEnder :: Monad m => OrgParser m ()
  221    blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
  222 
  223    stripIndent :: [String] -> [String]
  224    stripIndent strs = map (drop (shortestIndent strs)) strs
  225 
  226    shortestIndent :: [String] -> Int
  227    shortestIndent = foldr (min . length . takeWhile isSpace) maxBound
  228                     . filter (not . null)
  229 
  230    tabsToSpaces :: Int -> String -> String
  231    tabsToSpaces _      []         = []
  232    tabsToSpaces tabLen cs'@(c:cs) =
  233        case c of
  234          ' '  -> ' ':tabsToSpaces tabLen cs
  235          '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs
  236          _    -> cs'
  237 
  238    commaEscaped :: String -> String
  239    commaEscaped (',':cs@('*':_))     = cs
  240    commaEscaped (',':cs@('#':'+':_)) = cs
  241    commaEscaped (' ':cs)             = ' ':commaEscaped cs
  242    commaEscaped ('\t':cs)            = '\t':commaEscaped cs
  243    commaEscaped cs                   = cs
  244 
  245 -- | Read but ignore all remaining block headers.
  246 ignHeaders :: Monad m => OrgParser m ()
  247 ignHeaders = (() <$ newline) <|> (() <$ anyLine)
  248 
  249 -- | Read a block containing code intended for export in specific backends
  250 -- only.
  251 exportBlock :: Monad m => String -> OrgParser m (F Blocks)
  252 exportBlock blockType = try $ do
  253   exportType <- skipSpaces *> orgArgWord <* ignHeaders
  254   contents   <- rawBlockContent blockType
  255   returnF (B.rawBlock (map toLower exportType) contents)
  256 
  257 verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
  258 verseBlock blockType = try $ do
  259   ignHeaders
  260   content <- rawBlockContent blockType
  261   fmap B.lineBlock . sequence
  262     <$> mapM parseVerseLine (lines content)
  263  where
  264    -- replace initial spaces with nonbreaking spaces to preserve
  265    -- indentation, parse the rest as normal inline
  266    parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
  267    parseVerseLine cs = do
  268      let (initialSpaces, indentedLine) = span isSpace cs
  269      let nbspIndent = if null initialSpaces
  270                       then mempty
  271                       else B.str $ map (const '\160') initialSpaces
  272      line <- parseFromString inlines (indentedLine ++ "\n")
  273      return (trimInlinesF $ pure nbspIndent <> line)
  274 
  275 -- | Read a code block and the associated results block if present.  Which of
  276 -- boths blocks is included in the output is determined using the "exports"
  277 -- argument in the block header.
  278 codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
  279 codeBlock blockAttrs blockType = do
  280   skipSpaces
  281   (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders)
  282   content           <- rawBlockContent blockType
  283   resultsContent    <- option mempty babelResultsBlock
  284   let id'            = fromMaybe mempty $ blockAttrName blockAttrs
  285   let codeBlck       = B.codeBlockWith ( id', classes, kv ) content
  286   let labelledBlck   = maybe (pure codeBlck)
  287                              (labelDiv codeBlck)
  288                              (blockAttrCaption blockAttrs)
  289   return $
  290     (if exportsCode kv    then labelledBlck   else mempty) <>
  291     (if exportsResults kv then resultsContent else mempty)
  292  where
  293    labelDiv :: Blocks -> F Inlines -> F Blocks
  294    labelDiv blk value =
  295      B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
  296 
  297    labelledBlock :: F Inlines -> F Blocks
  298    labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
  299 
  300    exportsResults :: [(String, String)] -> Bool
  301    exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
  302 
  303 -- | Parse the result of an evaluated babel code block.
  304 babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks)
  305 babelResultsBlock = try $ do
  306   blanklines
  307   resultsMarker <|>
  308     (lookAhead . void . try $
  309       manyTill (metaLineStart *> anyLineNewline) resultsMarker)
  310   block
  311  where
  312   resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline
  313 
  314 -- | Parse code block arguments
  315 codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
  316 codeHeaderArgs = try $ do
  317   language   <- skipSpaces *> orgArgWord
  318   (switchClasses, switchKv) <- switchesAsAttributes
  319   parameters <- manyTill blockOption newline
  320   return ( translateLang language : switchClasses
  321          , originalLang language <> switchKv <> parameters
  322          )
  323 
  324 switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)])
  325 switchesAsAttributes = try $ do
  326   switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar)
  327   return $ foldr addToAttr ([], []) switches
  328  where
  329   addToAttr :: (Char, Maybe String, SwitchPolarity)
  330             -> ([String], [(String, String)])
  331             -> ([String], [(String, String)])
  332   addToAttr ('n', lineNum, pol) (cls, kv) =
  333     let kv' = case lineNum of
  334                 Just num -> ("startFrom", num):kv
  335                 Nothing  -> kv
  336         cls' = case pol of
  337                  SwitchPlus  -> "continuedSourceBlock":cls
  338                  SwitchMinus -> cls
  339     in ("numberLines":cls', kv')
  340   addToAttr _ x = x
  341 
  342 -- | Whether a switch flag is specified with @+@ or @-@.
  343 data SwitchPolarity = SwitchPlus | SwitchMinus
  344   deriving (Show, Eq)
  345 
  346 -- | Parses a switch's polarity.
  347 switchPolarity :: Monad m => OrgParser m SwitchPolarity
  348 switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+')
  349 
  350 -- | Parses a source block switch option.
  351 switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
  352 switch = try $ lineNumberSwitch <|> labelSwitch <|> simpleSwitch
  353  where
  354    simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter
  355    labelSwitch = genericSwitch 'l' $
  356      char '"' *> many1Till nonspaceChar (char '"')
  357 
  358 -- | Generic source block switch-option parser.
  359 genericSwitch :: Monad m
  360               => Char
  361               -> OrgParser m String
  362               -> OrgParser m (Char, Maybe String, SwitchPolarity)
  363 genericSwitch c p = try $ do
  364   polarity <- switchPolarity <* char c <* skipSpaces
  365   arg <- optionMaybe p
  366   return (c, arg, polarity)
  367 
  368 -- | Reads a line number switch option. The line number switch can be used with
  369 -- example and source blocks.
  370 lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
  371 lineNumberSwitch = genericSwitch 'n' (many digit)
  372 
  373 blockOption :: Monad m => OrgParser m (String, String)
  374 blockOption = try $ do
  375   argKey <- orgArgKey
  376   paramValue <- option "yes" orgParamValue
  377   return (argKey, paramValue)
  378 
  379 orgParamValue :: Monad m => OrgParser m String
  380 orgParamValue = try $
  381   skipSpaces
  382     *> notFollowedBy orgArgKey
  383     *> noneOf "\n\r" `many1Till` endOfValue
  384     <* skipSpaces
  385  where
  386   endOfValue = lookAhead $  try (skipSpaces <* oneOf "\n\r")
  387                         <|> try (skipSpaces1 <* orgArgKey)
  388 
  389 
  390 --
  391 -- Drawers
  392 --
  393 
  394 -- | A generic drawer which has no special meaning for org-mode.
  395 -- Whether or not this drawer is included in the output depends on the drawers
  396 -- export setting.
  397 genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
  398 genericDrawer = try $ do
  399   name    <- map toUpper <$> drawerStart
  400   content <- manyTill drawerLine (try drawerEnd)
  401   state   <- getState
  402   -- Include drawer if it is explicitly included in or not explicitly excluded
  403   -- from the list of drawers that should be exported.  PROPERTIES drawers are
  404   -- never exported.
  405   case exportDrawers . orgStateExportSettings $ state of
  406     _           | name == "PROPERTIES" -> return mempty
  407     Left  names | name `elem`    names -> return mempty
  408     Right names | name `notElem` names -> return mempty
  409     _           -> drawerDiv name <$> parseLines content
  410  where
  411   parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
  412   parseLines = parseFromString blocks . (++ "\n") . unlines
  413 
  414   drawerDiv :: String -> F Blocks -> F Blocks
  415   drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
  416 
  417 drawerLine :: Monad m => OrgParser m String
  418 drawerLine = anyLine
  419 
  420 drawerEnd :: Monad m => OrgParser m String
  421 drawerEnd = try $
  422   skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
  423 
  424 
  425 --
  426 -- Figures
  427 --
  428 
  429 -- | Figures or an image paragraph (i.e. an image on a line by itself). Only
  430 -- images with a caption attribute are interpreted as figures.
  431 figure :: PandocMonad m => OrgParser m (F Blocks)
  432 figure = try $ do
  433   figAttrs <- blockAttributes
  434   src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
  435   case cleanLinkString src of
  436     Nothing     -> mzero
  437     Just imgSrc -> do
  438       guard (isImageFilename imgSrc)
  439       let isFigure = isJust $ blockAttrCaption figAttrs
  440       return $ imageBlock isFigure figAttrs imgSrc
  441  where
  442    selfTarget :: PandocMonad m => OrgParser m String
  443    selfTarget = try $ char '[' *> linkTarget <* char ']'
  444 
  445    imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
  446    imageBlock isFigure figAttrs imgSrc =
  447      let
  448        figName    = fromMaybe mempty $ blockAttrName figAttrs
  449        figLabel   = fromMaybe mempty $ blockAttrLabel figAttrs
  450        figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
  451        figKeyVals = blockAttrKeyValues figAttrs
  452        attr       = (figLabel, mempty, figKeyVals)
  453        figTitle   = (if isFigure then withFigPrefix else id) figName
  454      in
  455        B.para . B.imageWith attr imgSrc figTitle <$> figCaption
  456 
  457    withFigPrefix :: String -> String
  458    withFigPrefix cs =
  459      if "fig:" `isPrefixOf` cs
  460      then cs
  461      else "fig:" ++ cs
  462 
  463 -- | Succeeds if looking at the end of the current paragraph
  464 endOfParagraph :: Monad m => OrgParser m ()
  465 endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
  466 
  467 
  468 --
  469 -- Examples
  470 --
  471 
  472 -- | Example code marked up by a leading colon.
  473 example :: Monad m => OrgParser m (F Blocks)
  474 example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine
  475  where
  476    exampleLine :: Monad m => OrgParser m String
  477    exampleLine = try $ exampleLineStart *> anyLine
  478 
  479 exampleCode :: String -> Blocks
  480 exampleCode = B.codeBlockWith ("", ["example"], [])
  481 
  482 
  483 --
  484 -- Comments, Options and Metadata
  485 --
  486 
  487 specialLine :: PandocMonad m => OrgParser m (F Blocks)
  488 specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine
  489 
  490 -- | Include the content of a file.
  491 include :: PandocMonad m => OrgParser m (F Blocks)
  492 include = try $ do
  493   metaLineStart <* stringAnyCase "include:" <* skipSpaces
  494   filename <- includeTarget
  495   includeArgs <- many (try $ skipSpaces *> many1 alphaNum)
  496   params <- keyValues
  497   blocksParser <- case includeArgs of
  498       ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw
  499       ["export"] -> return . returnF $ B.fromList []
  500       ["export", format] -> return $ pure . B.rawBlock format <$> parseRaw
  501       ("src" : rest) -> do
  502         let attr = case rest of
  503                      [lang] -> (mempty, [lang], mempty)
  504                      _ -> nullAttr
  505         return $ pure . B.codeBlockWith attr <$> parseRaw
  506       _ -> return $ return . B.fromList . blockFilter params <$> blockList
  507   insertIncludedFileF blocksParser ["."] filename
  508  where
  509   includeTarget :: PandocMonad m => OrgParser m FilePath
  510   includeTarget = do
  511     char '"'
  512     manyTill (noneOf "\n\r\t") (char '"')
  513 
  514   parseRaw :: PandocMonad m => OrgParser m String
  515   parseRaw = many anyChar
  516 
  517   blockFilter :: [(String, String)] -> [Block] -> [Block]
  518   blockFilter params blks =
  519     let minlvl = lookup "minlevel" params
  520     in case (minlvl >>= safeRead :: Maybe Int) of
  521          Nothing -> blks
  522          Just lvl -> let levels = Walk.query headerLevel blks
  523                          -- CAVE: partial function in else
  524                          curMin = if null levels then 0 else minimum levels
  525                      in Walk.walk (shiftHeader (curMin - lvl)) blks
  526 
  527   headerLevel :: Block -> [Int]
  528   headerLevel (Header lvl _attr _content) = [lvl]
  529   headerLevel _ = []
  530 
  531   shiftHeader :: Int -> Block -> Block
  532   shiftHeader shift blk =
  533     case blk of
  534       (Header lvl attr content)
  535        | lvl - shift > 0  -> Header (lvl - shift) attr content
  536        | otherwise        -> Para content
  537       _ -> blk
  538 
  539 rawExportLine :: PandocMonad m => OrgParser m Blocks
  540 rawExportLine = try $ do
  541   metaLineStart
  542   key <- metaKey
  543   if key `elem` ["latex", "html", "texinfo", "beamer"]
  544     then B.rawBlock key <$> anyLine
  545     else mzero
  546 
  547 commentLine :: Monad m => OrgParser m Blocks
  548 commentLine = commentLineStart *> anyLine *> pure mempty
  549 
  550 
  551 --
  552 -- Tables
  553 --
  554 data ColumnProperty = ColumnProperty
  555   { columnAlignment :: Maybe Alignment
  556   , columnRelWidth  :: Maybe Int
  557   } deriving (Show, Eq)
  558 
  559 instance Default ColumnProperty where
  560   def = ColumnProperty Nothing Nothing
  561 
  562 data OrgTableRow = OrgContentRow (F [Blocks])
  563                  | OrgAlignRow [ColumnProperty]
  564                  | OrgHlineRow
  565 
  566 -- OrgTable is strongly related to the pandoc table ADT.  Using the same
  567 -- (i.e. pandoc-global) ADT would mean that the reader would break if the
  568 -- global structure was to be changed, which would be bad.  The final table
  569 -- should be generated using a builder function.
  570 data OrgTable = OrgTable
  571   { orgTableColumnProperties :: [ColumnProperty]
  572   , orgTableHeader           :: [Blocks]
  573   , orgTableRows             :: [[Blocks]]
  574   }
  575 
  576 table :: PandocMonad m => OrgParser m (F Blocks)
  577 table = gridTableWith blocks True <|> orgTable
  578 
  579 -- | A normal org table
  580 orgTable :: PandocMonad m => OrgParser m (F Blocks)
  581 orgTable = try $ do
  582   -- don't allow a table on the first line of a list item; org requires that
  583   -- tables start at first non-space character on the line
  584   let isFirstInListItem st = orgStateParserContext st == ListItemState &&
  585                              isNothing (orgStateLastPreCharPos st)
  586   guard =<< not . isFirstInListItem <$> getState
  587   blockAttrs <- blockAttributes
  588   lookAhead tableStart
  589   do
  590     rows <- tableRows
  591     let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs
  592     return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows
  593 
  594 orgToPandocTable :: OrgTable
  595                  -> Inlines
  596                  -> Blocks
  597 orgToPandocTable (OrgTable colProps heads lns) caption =
  598   let totalWidth = if any isJust (map columnRelWidth colProps)
  599                    then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps
  600                    else Nothing
  601   in B.table caption (map (convertColProp totalWidth) colProps) heads lns
  602  where
  603    convertColProp :: Maybe Int -> ColumnProperty -> (Alignment, Double)
  604    convertColProp totalWidth colProp =
  605      let
  606        align' = fromMaybe AlignDefault $ columnAlignment colProp
  607        width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t))
  608                               <$> columnRelWidth colProp
  609                               <*> totalWidth
  610      in (align', width')
  611 
  612 tableRows :: PandocMonad m => OrgParser m [OrgTableRow]
  613 tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
  614 
  615 tableContentRow :: PandocMonad m => OrgParser m OrgTableRow
  616 tableContentRow = try $
  617   OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
  618 
  619 tableContentCell :: PandocMonad m => OrgParser m (F Blocks)
  620 tableContentCell = try $
  621   fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
  622 
  623 tableAlignRow :: Monad m => OrgParser m OrgTableRow
  624 tableAlignRow = try $ do
  625   tableStart
  626   colProps <- many1Till columnPropertyCell newline
  627   -- Empty rows are regular (i.e. content) rows, not alignment rows.
  628   guard $ any (/= def) colProps
  629   return $ OrgAlignRow colProps
  630 
  631 columnPropertyCell :: Monad m => OrgParser m ColumnProperty
  632 columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
  633  where
  634    emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell)
  635    propCell = try $ ColumnProperty
  636                  <$> (skipSpaces
  637                       *> char '<'
  638                       *> optionMaybe tableAlignFromChar)
  639                  <*> (optionMaybe (many1 digit >>= safeRead)
  640                       <* char '>'
  641                       <* emptyCell)
  642 
  643 tableAlignFromChar :: Monad m => OrgParser m Alignment
  644 tableAlignFromChar = try $
  645   choice [ char 'l' *> return AlignLeft
  646          , char 'c' *> return AlignCenter
  647          , char 'r' *> return AlignRight
  648          ]
  649 
  650 tableHline :: Monad m => OrgParser m OrgTableRow
  651 tableHline = try $
  652   OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
  653 
  654 endOfCell :: Monad m => OrgParser m Char
  655 endOfCell = try $ char '|' <|> lookAhead newline
  656 
  657 rowsToTable :: [OrgTableRow]
  658             -> F OrgTable
  659 rowsToTable = foldM rowToContent emptyTable
  660  where emptyTable = OrgTable mempty mempty mempty
  661 
  662 normalizeTable :: OrgTable -> OrgTable
  663 normalizeTable (OrgTable colProps heads rows) =
  664   OrgTable colProps' heads rows
  665  where
  666    refRow = if heads /= mempty
  667             then heads
  668             else case rows of
  669                    (r:_) -> r
  670                    _     -> mempty
  671    cols = length refRow
  672    fillColumns base padding = take cols $ base ++ repeat padding
  673    colProps' = fillColumns colProps def
  674 
  675 -- One or more horizontal rules after the first content line mark the previous
  676 -- line as a header.  All other horizontal lines are discarded.
  677 rowToContent :: OrgTable
  678              -> OrgTableRow
  679              -> F OrgTable
  680 rowToContent tbl row =
  681   case row of
  682     OrgHlineRow       -> return singleRowPromotedToHeader
  683     OrgAlignRow props -> return . setProperties $ props
  684     OrgContentRow cs  -> appendToBody cs
  685  where
  686    singleRowPromotedToHeader :: OrgTable
  687    singleRowPromotedToHeader = case tbl of
  688      OrgTable{ orgTableHeader = [], orgTableRows = [b] } ->
  689             tbl{ orgTableHeader = b , orgTableRows = [] }
  690      _   -> tbl
  691 
  692    setProperties :: [ColumnProperty] -> OrgTable
  693    setProperties ps = tbl{ orgTableColumnProperties = ps }
  694 
  695    appendToBody :: F [Blocks] -> F OrgTable
  696    appendToBody frow = do
  697      newRow <- frow
  698      let oldRows = orgTableRows tbl
  699      -- NOTE: This is an inefficient O(n) operation.  This should be changed
  700      -- if performance ever becomes a problem.
  701      return tbl{ orgTableRows = oldRows ++ [newRow] }
  702 
  703 
  704 --
  705 -- LaTeX fragments
  706 --
  707 latexFragment :: Monad m => OrgParser m (F Blocks)
  708 latexFragment = try $ do
  709   envName <- latexEnvStart
  710   content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
  711   returnF $ B.rawBlock "latex" (content `inLatexEnv` envName)
  712  where
  713    c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
  714                               , c
  715                               , "\\end{", e, "}\n"
  716                               ]
  717 
  718 latexEnd :: Monad m => String -> OrgParser m ()
  719 latexEnd envName = try $
  720   () <$ skipSpaces
  721      <* string ("\\end{" ++ envName ++ "}")
  722      <* blankline
  723 
  724 
  725 --
  726 -- Footnote definitions
  727 --
  728 noteBlock :: PandocMonad m => OrgParser m (F Blocks)
  729 noteBlock = try $ do
  730   ref <- noteMarker <* skipSpaces <* updateLastPreCharPos
  731   content <- mconcat <$> many1Till block endOfFootnote
  732   addToNotesTable (ref, content)
  733   return mempty
  734  where
  735    endOfFootnote =  eof
  736                 <|> () <$ lookAhead noteMarker
  737                 <|> () <$ lookAhead headerStart
  738                 <|> () <$ lookAhead (try $ blankline *> blankline)
  739 
  740 -- Paragraphs or Plain text
  741 paraOrPlain :: PandocMonad m => OrgParser m (F Blocks)
  742 paraOrPlain = try $ do
  743   -- Make sure we are not looking at a headline
  744   notFollowedBy' headerStart
  745   ils <- inlines
  746   nl <- option False (newline *> return True)
  747   -- Read block as paragraph, except if we are in a list context and the block
  748   -- is directly followed by a list item, in which case the block is read as
  749   -- plain text.
  750   try (guard nl
  751        *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
  752        *> return (B.para <$> ils))
  753     <|>  return (B.plain <$> ils)
  754 
  755 
  756 --
  757 -- list blocks
  758 --
  759 
  760 list :: PandocMonad m => OrgParser m (F Blocks)
  761 list = choice [ definitionList, bulletList, orderedList ] <?> "list"
  762 
  763 definitionList :: PandocMonad m => OrgParser m (F Blocks)
  764 definitionList = try $ do
  765   indent <- lookAhead bulletListStart
  766   fmap (B.definitionList . compactifyDL) . sequence
  767     <$> many1 (definitionListItem (bulletListStart `indented` indent))
  768 
  769 bulletList :: PandocMonad m => OrgParser m (F Blocks)
  770 bulletList = try $ do
  771   indent <- lookAhead bulletListStart
  772   fmap (B.bulletList . compactify) . sequence
  773     <$> many1 (listItem (bulletListStart `indented` indent))
  774 
  775 indented :: Monad m => OrgParser m Int -> Int -> OrgParser m Int
  776 indented indentedMarker minIndent = try $ do
  777   n <- indentedMarker
  778   guard (minIndent <= n)
  779   return n
  780 
  781 orderedList :: PandocMonad m => OrgParser m (F Blocks)
  782 orderedList = try $ do
  783   indent <- lookAhead orderedListStart
  784   fmap (B.orderedList . compactify) . sequence
  785     <$> many1 (listItem (orderedListStart `indented` indent))
  786 
  787 definitionListItem :: PandocMonad m
  788                    => OrgParser m Int
  789                    -> OrgParser m (F (Inlines, [Blocks]))
  790 definitionListItem parseIndentedMarker = try $ do
  791   markerLength <- parseIndentedMarker
  792   term <- manyTill (noneOf "\n\r") (try definitionMarker)
  793   line1 <- anyLineNewline
  794   blank <- option "" ("\n" <$ blankline)
  795   cont <- concat <$> many (listContinuation markerLength)
  796   term' <- parseFromString inlines term
  797   contents' <- parseFromString blocks $ line1 ++ blank ++ cont
  798   return $ (,) <$> term' <*> fmap (:[]) contents'
  799  where
  800    definitionMarker =
  801      spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
  802 
  803 -- | parse raw text for one list item
  804 listItem :: PandocMonad m
  805          => OrgParser m Int
  806          -> OrgParser m (F Blocks)
  807 listItem parseIndentedMarker = try . withContext ListItemState $ do
  808   markerLength <- try parseIndentedMarker
  809   firstLine <- anyLineNewline
  810   blank <- option "" ("\n" <$ blankline)
  811   rest <- concat <$> many (listContinuation markerLength)
  812   parseFromString blocks $ firstLine ++ blank ++ rest
  813 
  814 -- continuation of a list item - indented and separated by blankline or endline.
  815 -- Note: nested lists are parsed as continuations.
  816 listContinuation :: Monad m => Int
  817                  -> OrgParser m String
  818 listContinuation markerLength = try $ do
  819   notFollowedBy' blankline
  820   mappend <$> (concat <$> many1 listLine)
  821           <*> many blankline
  822  where
  823    listLine = try $ indentWith markerLength *> anyLineNewline