"Fossies" - the Fresh Open Source Software Archive

Member "pandoc-2.7.3/src/Text/Pandoc/Readers/Ipynb.hs" (12 Jun 2019, 8704 Bytes) of package /linux/www/pandoc-2.7.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Haskell source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "Ipynb.hs": 2.7.1_vs_2.7.2.

    1 {-# LANGUAGE NoImplicitPrelude #-}
    2 {-# LANGUAGE DeriveGeneric #-}
    3 {-# LANGUAGE LambdaCase #-}
    4 {-# LANGUAGE OverloadedStrings #-}
    5 {-# LANGUAGE FlexibleContexts  #-}
    6 {-# LANGUAGE ScopedTypeVariables #-}
    7 {- |
    8    Module      : Text.Pandoc.Readers.Ipynb
    9    Copyright   : Copyright (C) 2019 John MacFarlane
   10    License     : GNU GPL, version 2 or above
   11 
   12    Maintainer  : John MacFarlane <jgm@berkeley.edu>
   13    Stability   : alpha
   14    Portability : portable
   15 
   16 Ipynb (Jupyter notebook JSON format) reader for pandoc.
   17 -}
   18 module Text.Pandoc.Readers.Ipynb ( readIpynb )
   19 where
   20 import Prelude
   21 import Data.Char (isDigit)
   22 import Data.List (isPrefixOf)
   23 import Data.Maybe (fromMaybe)
   24 import Data.Digest.Pure.SHA (sha1, showDigest)
   25 import Text.Pandoc.Options
   26 import qualified Data.Scientific as Scientific
   27 import qualified Text.Pandoc.Builder as B
   28 import Text.Pandoc.Logging
   29 import Text.Pandoc.Definition
   30 import Data.Ipynb as Ipynb
   31 import Text.Pandoc.Class
   32 import Text.Pandoc.MIME (extensionFromMimeType)
   33 import Text.Pandoc.UTF8
   34 import Text.Pandoc.Walk (walk)
   35 import Text.Pandoc.Error
   36 import Data.Text (Text)
   37 import qualified Data.Map as M
   38 import qualified Data.Text as T
   39 import qualified Data.Text.Encoding as TE
   40 import qualified Data.ByteString.Lazy as BL
   41 import Data.Aeson as Aeson
   42 import Control.Monad.Except (throwError)
   43 import Text.Pandoc.Readers.Markdown (readMarkdown)
   44 import qualified Text.Pandoc.UTF8 as UTF8
   45 
   46 readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
   47 readIpynb opts t = do
   48   let src = BL.fromStrict (TE.encodeUtf8 t)
   49   case eitherDecode src of
   50     Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
   51     Left _ ->
   52       case eitherDecode src of
   53         Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3
   54         Left err -> throwError $ PandocIpynbDecodingError err
   55 
   56 notebookToPandoc :: (PandocMonad m, FromJSON (Notebook a))
   57                  => ReaderOptions -> Notebook a -> m Pandoc
   58 notebookToPandoc opts notebook = do
   59   let cells = notebookCells notebook
   60   let (fmt,fmtminor) = notebookFormat notebook
   61   let m = M.insert "nbformat" (MetaString $ show fmt) $
   62           M.insert "nbformat_minor" (MetaString $ show fmtminor) $
   63           jsonMetaToMeta (notebookMetadata notebook)
   64   let lang = case M.lookup "kernelspec" m of
   65                    Just (MetaMap ks) ->
   66                       case M.lookup "language" ks of
   67                          Just (MetaString l) -> l
   68                          _ -> "python"
   69                    _ -> "python"
   70   bs <- mconcat <$> mapM (cellToBlocks opts lang) cells
   71   let Pandoc _ blocks = B.doc bs
   72   return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks
   73 
   74 cellToBlocks :: PandocMonad m
   75              => ReaderOptions -> String -> Cell a -> m B.Blocks
   76 cellToBlocks opts lang c = do
   77   let Source ts = cellSource c
   78   let source = mconcat ts
   79   let kvs = jsonMetaToPairs (cellMetadata c)
   80   let attachments = maybe mempty M.toList $ cellAttachments c
   81   mapM_ addAttachment attachments
   82   case cellType c of
   83     Ipynb.Markdown -> do
   84       Pandoc _ bs <- walk fixImage <$> readMarkdown opts source
   85       return $ B.divWith ("",["cell","markdown"],kvs)
   86              $ B.fromList bs
   87     Ipynb.Heading lev -> do
   88       Pandoc _ bs <- readMarkdown opts
   89         (T.replicate lev "#" <> " " <> source)
   90       return $ B.divWith ("",["cell","markdown"],kvs)
   91              $ B.fromList bs
   92     Ipynb.Raw -> do
   93       -- we use ipynb to indicate no format given (a wildcard in nbformat)
   94       let format = fromMaybe "ipynb" $ lookup "format" kvs
   95       let format' =
   96             case format of
   97               "text/html"       -> "html"
   98               "text/latex"      -> "latex"
   99               "application/pdf" -> "latex"
  100               "text/markdown"   -> "markdown"
  101               "text/x-rsrt"     -> "rst"
  102               _                 -> format
  103       return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format'
  104              $ T.unpack source
  105     Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
  106       outputBlocks <- mconcat <$> mapM outputToBlock outputs
  107       let kvs' = maybe kvs (\x -> ("execution_count", show x):kvs) ec
  108       return $ B.divWith ("",["cell","code"],kvs') $
  109         B.codeBlockWith ("",[lang],[]) (T.unpack source)
  110         <> outputBlocks
  111 
  112 -- Remove attachment: prefix from images...
  113 fixImage :: Inline -> Inline
  114 fixImage (Image attr lab (src,tit))
  115   | "attachment:" `isPrefixOf` src = Image attr lab (drop 11 src, tit)
  116 fixImage x = x
  117 
  118 addAttachment :: PandocMonad m => (Text, MimeBundle) -> m ()
  119 addAttachment (fname, mimeBundle) = do
  120   let fp = T.unpack fname
  121   case M.toList (unMimeBundle mimeBundle) of
  122     (mimeType, BinaryData bs):_ ->
  123       insertMedia fp (Just $ T.unpack mimeType) (BL.fromStrict bs)
  124     (mimeType, TextualData t):_ ->
  125       insertMedia fp (Just $ T.unpack mimeType)
  126           (BL.fromStrict $ TE.encodeUtf8 t)
  127     (mimeType, JsonData v):_ ->
  128       insertMedia fp (Just $ T.unpack mimeType) (encode v)
  129     [] -> report $ CouldNotFetchResource fp "no attachment"
  130 
  131 outputToBlock :: PandocMonad m => Output a -> m B.Blocks
  132 outputToBlock Stream{ streamName = sName,
  133                       streamText = Source text } = do
  134   return $ B.divWith ("",["output","stream",T.unpack sName],[])
  135          $ B.codeBlock $ T.unpack . mconcat $ text
  136 outputToBlock DisplayData{ displayData = data',
  137                             displayMetadata = metadata' } =
  138   B.divWith ("",["output", "display_data"],[]) <$>
  139     handleData metadata' data'
  140 outputToBlock ExecuteResult{ executeCount = ec,
  141                               executeData = data',
  142                               executeMetadata = metadata' } =
  143   B.divWith ("",["output", "execute_result"],[("execution_count",show ec)])
  144     <$> handleData metadata' data'
  145 outputToBlock Err{ errName = ename,
  146                    errValue = evalue,
  147                    errTraceback = traceback } = do
  148   return $ B.divWith ("",["output","error"],
  149                          [("ename",T.unpack ename),
  150                           ("evalue",T.unpack evalue)])
  151          $ B.codeBlock $ T.unpack . T.unlines $ traceback
  152 
  153 -- We want to display the richest output possible given
  154 -- the output format.
  155 handleData :: PandocMonad m
  156            => JSONMeta -> MimeBundle -> m B.Blocks
  157 handleData metadata (MimeBundle mb) =
  158   mconcat <$> mapM dataBlock (M.toList mb)
  159 
  160   where
  161 
  162     dataBlock :: PandocMonad m => (MimeType, MimeData) -> m B.Blocks
  163     dataBlock (mt, BinaryData bs)
  164      | "image/" `T.isPrefixOf` mt
  165       = do
  166       -- normally metadata maps from mime types to key-value map;
  167       -- but not always...
  168       let meta = case M.lookup mt metadata of
  169                    Just v@(Object{}) ->
  170                      case fromJSON v of
  171                        Success m' -> m'
  172                        Error _   -> mempty
  173                    _ -> mempty
  174       let metaPairs = jsonMetaToPairs meta
  175       let bl = BL.fromStrict bs
  176       -- SHA1 hash for filename
  177       let mt' = T.unpack mt
  178       let fname = showDigest (sha1 bl) ++
  179             case extensionFromMimeType mt' of
  180               Nothing  -> ""
  181               Just ext -> '.':ext
  182       insertMedia fname (Just mt') bl
  183       return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty
  184      | otherwise = return mempty
  185 
  186     dataBlock ("text/html", TextualData t)
  187       = return $ B.rawBlock "html" $ T.unpack t
  188 
  189     dataBlock ("text/latex", TextualData t)
  190       = return $ B.rawBlock "latex" $ T.unpack t
  191 
  192     dataBlock ("text/plain", TextualData t) =
  193       return $ B.codeBlock $ T.unpack t
  194 
  195     dataBlock (_, JsonData v) =
  196       return $ B.codeBlockWith ("",["json"],[]) $ toStringLazy $ encode v
  197 
  198     dataBlock _ = return mempty
  199 
  200 jsonMetaToMeta :: JSONMeta -> M.Map String MetaValue
  201 jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue
  202   where
  203     valueToMetaValue :: Value -> MetaValue
  204     valueToMetaValue x@(Object{}) =
  205       case fromJSON x of
  206         Error s -> MetaString s
  207         Success jm' -> MetaMap $ jsonMetaToMeta jm'
  208     valueToMetaValue x@(Array{}) =
  209       case fromJSON x of
  210         Error s -> MetaString s
  211         Success xs -> MetaList $ map valueToMetaValue xs
  212     valueToMetaValue (Bool b) = MetaBool b
  213     valueToMetaValue (String t) = MetaString (T.unpack t)
  214     valueToMetaValue (Number n)
  215       | Scientific.isInteger n = MetaString (show (floor n :: Integer))
  216       | otherwise              = MetaString (show n)
  217     valueToMetaValue Aeson.Null = MetaString ""
  218 
  219 jsonMetaToPairs :: JSONMeta -> [(String, String)]
  220 jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map
  221   (\case
  222       String t
  223         | not (T.all isDigit t)
  224         , t /= "true"
  225         , t /= "false"
  226                  -> T.unpack t
  227       x          -> UTF8.toStringLazy $ Aeson.encode x)