Use Pandoc for document rendering, for now just Markdown

This commit is contained in:
fr33domlover 2016-04-17 17:55:23 +00:00
parent 25bb1e5b83
commit a0945bfd87
6 changed files with 411 additions and 21 deletions

View file

@ -0,0 +1,45 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
-- | File path utilities for 'Data.Text.Text'.
module Text.FilePath.Local
( -- * Types
FileName
, FileBaseName
, FileExtension
-- * Functions
, breakExt
)
where
import Prelude
import Control.Arrow ((***))
import Data.Text (Text)
import System.FilePath
import qualified Data.Text as T
type FileName = Text
type FileBaseName = Text
type FileExtension = Text
breakExt :: FileName -> (FileBaseName, FileExtension)
breakExt name =
case id *** T.uncons $ T.break isExtSeparator name of
(_, Nothing) -> (name, T.empty)
(p, Just (_, r)) -> (p, r)

View file

@ -64,10 +64,12 @@ import qualified Data.Set as S (member)
import qualified Data.Text.Lazy.Encoding as L (decodeUtf8With)
import Data.ByteString.Char8.Local (takeLine)
import Text.FilePath.Local (breakExt)
import Vervis.Form.Repo
import Vervis.Foundation
import Vervis.Git (timeAgo')
import Vervis.Path
import Vervis.MediaType (chooseMediaType)
import Vervis.Model
import Vervis.Readme
import Vervis.Render
@ -147,8 +149,8 @@ getRepoSource repository user repo ref dir = do
Nothing -> return Nothing
Just tree -> do
let dir' = map (entName . encodeUtf8) dir
mRootOid <- resolveTreePath git tree dir'
target <- case mRootOid of
mTargetOid <- resolveTreePath git tree dir'
target <- case mTargetOid of
Nothing -> return $ Right tree
Just oid -> do
obj <- getObject_ git (unObjId oid) True
@ -164,7 +166,7 @@ getRepoSource repository user repo ref dir = do
let r = case mreadme of
Nothing -> Nothing
Just (t, b) ->
Just (t, renderReadme t b)
Just (t, renderReadme dir t b)
return $ Right (v, r)
return $ Just (branches, tags, view)
else return Nothing
@ -177,7 +179,11 @@ getRepoSource repository user repo ref dir = do
)
display <- case view of
Left b -> return $ Left $
renderSource (unpack $ last dir) (blobGetContent b)
let name = last dir
parent = init dir
(base, ext) = breakExt name
mediaType = chooseMediaType parent base ext () ()
in renderSource mediaType (blobGetContent b)
Right (v, mr) -> return $ Right (map mkrow v, mr)
let parent = if null dir then [] else init dir
dirs = zip parent (tail $ inits parent)

121
src/Vervis/MediaType.hs Normal file
View file

@ -0,0 +1,121 @@
{- This file is part of Vervis.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
-- | File content types and tools for detecting them. The focus is on content
-- that gets special treatment in Vervis, and not general MIME type modeling or
-- detection (although that could be done in the future).
module Vervis.MediaType
( MediaType (..)
, FileName
, FileBaseName
, FileExtension
, WorkType
, SourceViewOptions
, chooseMediaType
)
where
import Prelude
import Data.Text (Text)
data MediaType
= PlainText
| XML
| JSON
| YAML
| HTML
| CSS
| Markdown
| CSource
| CHeader
| Haskell
| LiterateHaskell
| CabalPackageDescription
| PersistentTemplate
| YesodRouteTemplate
| Hamlet
| Cassius
deriving Show
type FileName = Text
type FileBaseName = Text
type FileExtension = Text
type WorkType = ()
type SourceViewOptions = ()
chooseMediaType
:: [FileName]
-> FileBaseName
-> FileExtension
-> WorkType -- project type
-> SourceViewOptions -- e.g. whether to see rendered pages or their sources
-> MediaType
chooseMediaType dir base ext wt opts =
case (dir, base, ext, wt) of
-- * Data interchange
(_, _, "xml" , _) -> PlainText
(_, _, "json" , _) -> PlainText
(_, _, "yml" , _) -> PlainText
(_, _, "yaml" , _) -> PlainText
-- * Documents
(_, _, "txt" , _) -> PlainText
(_, _, "md" , _) -> Markdown
(_, _, "mdwn" , _) -> Markdown
(_, _, "mkdn" , _) -> Markdown
(_, _, "markdown", _) -> Markdown
-- * Web page basics
(_, _, "html" , _) -> PlainText
(_, _, "xhtml" , _) -> PlainText
(_, _, "css" , _) -> PlainText
(_, _, "js" , _) -> PlainText
-- * Programming languages
-- ** C
(_, _, "c" , _) -> PlainText
(_, _, "h" , _) -> PlainText
-- ** C++
(_, _, "cc" , _) -> PlainText
(_, _, "cpp" , _) -> PlainText
(_, _, "cxx" , _) -> PlainText
(_, _, "hh" , _) -> PlainText
(_, _, "hpp" , _) -> PlainText
-- ** Haskell
(_, _, "hs" , _) -> Haskell
(_, _, "lhs" , _) -> PlainText
(_, _, "cabal" , _) -> PlainText
(_, _, "hamlet" , _) -> PlainText
(_, _, "cassius" , _) -> PlainText
-- ** Java
(_, _, "java" , _) -> PlainText
-- ** Lisp
(_, _, "cl" , _) -> PlainText
(_, _, "el" , _) -> PlainText
-- ** Lua
(_, _, "lua" , _) -> PlainText
-- ** Perl
(_, _, "pl" , _) -> PlainText
-- ** PHP
(_, _, "php" , _) -> PlainText
-- ** Python
(_, _, "py" , _) -> PlainText
-- ** Ruby
(_, _, "rb" , _) -> PlainText
-- ** Scheme
(_, _, "scm" , _) -> PlainText
(_, _, _ , _) -> PlainText

View file

@ -33,7 +33,9 @@ import Data.Text.Encoding.Error (strictDecode)
import System.FilePath (isExtSeparator)
import Vervis.Foundation (Widget)
import Vervis.MediaType (chooseMediaType)
import Vervis.Render (renderSource)
import Text.FilePath.Local (breakExt)
-- | Check if the given filename should be considered as README file. Assumes
-- a flat filename which doesn't contain a directory part.
@ -59,5 +61,8 @@ findReadme git tree = go $ treeGetEnts tree
else go es
-- | Render README content into a widget for inclusion in a page.
renderReadme :: Text -> ByteString -> Widget
renderReadme name content = renderSource (unpack name) content
renderReadme :: [Text] -> Text -> ByteString -> Widget
renderReadme dir name content =
let (base, ext) = breakExt name
mediaType = chooseMediaType dir base ext () ()
in renderSource mediaType content

View file

@ -13,47 +13,253 @@
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
{-# Language CPP #-}
-- | Tools for rendering repository file contents and other source files.
--
-- There are several ways to render a file:
--
-- (1) As a source file, plain text and with line numbers
-- (2) As a source file, syntax highlighted and with line numbers
-- (3) As a plain text document
-- (4) As a document rendered to HTML, e.g. Markdown is a popular format
-- (5) As a document rendered to a custom format, e.g. presentation
--
-- The difference between 3 and 5 is line numbers and font (3 would use regular
-- text font, while 5 would use monospaced font).
--
-- At the time of writing, not all rendering modes are implemented. The current
-- status, assuming I'm keeping it updated, is:
--
-- (1) Partially implemented: No line numbers
-- (2) Implemented, using line numbers generated by @highlighter2@ formatter
-- (3) Not implemented
-- (4) Not implemented
-- (5) Not implmented
module Vervis.Render
( renderPlain
, renderHighlight
, renderSource
( --renderPlain
--, renderHighlight
renderSource
)
where
import Prelude
import Control.Monad.Logger (logDebug, logWarn)
import Data.ByteString.Lazy (ByteString, toStrict)
import Data.Foldable (for_)
import Data.Maybe (fromMaybe)
import Data.Monoid ((<>))
import Data.Text (pack)
import Data.Text.Encoding.Error (lenientDecode)
import Data.Text.Lazy.Encoding (decodeUtf8With)
import Formatting hiding (format)
--import Formatting hiding (format)
import Text.Blaze.Html (preEscapedToMarkup)
import Text.Blaze.Html.Renderer.Text (renderHtml)
import Text.Highlighter (lexerFromFilename, runLexer, Lexer (lName))
import Text.Highlighter.Formatters.Html (format)
import Text.Highlighting.Kate.Styles (tango)
import Text.HTML.SanitizeXSS (sanitizeBalance)
import Text.Pandoc.Definition (Pandoc)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Writers.HTML
import Yesod.Core.Widget (whamlet, toWidget)
import Vervis.Foundation (Widget)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T
import qualified Data.Text.Encoding.Error as TE
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import qualified Text.Highlighter.Lexers.Haskell as L.Haskell
renderPlain :: ByteString -> Widget
import Vervis.Foundation (Widget)
import Vervis.MediaType (MediaType (..))
renderPlain :: BL.ByteString -> Widget
renderPlain content =
[whamlet|
<pre>
<code>#{decodeUtf8With lenientDecode content}
<code>#{TLE.decodeUtf8With TE.lenientDecode content}
|]
renderHighlight
:: FilePath -> ByteString -> Either (Maybe Lexer) (Lexer, Widget)
{-renderHighlight
:: FilePath -> BL.ByteString -> Either (Maybe Lexer) (Lexer, Widget)
renderHighlight name content =
case lexerFromFilename name of
Nothing -> Left Nothing
Just lexer ->
case runLexer lexer $ toStrict content of
case runLexer lexer $ BL.toStrict content of
Left err -> Left $ Just lexer
Right tokens -> Right (lexer, toWidget $ format True tokens)
-}
renderSource :: FilePath -> ByteString -> Widget
-- * File uploads and wiki attachments
-- * Wiki pages
-- * READMEs
-- * Source files which happen to be documents, e.g. Markdown, manpages,
-- OrgMode, LaTeX, and
-- * Literate Haskell files
--
-- For now, let's ignore the first two. Which source files, README or other, do
-- we want to offer to display as HTML rendering?
--
-- * [ ] native
-- * [ ] json
-- * [x] markdown
-- * [ ] markdown_strict
-- * [ ] markdown_phpextra
-- * [ ] markdown_github
-- * [ ] markdown_mmd
-- * [ ] commonmark
-- * [ ] rst
-- * [ ] mediawiki
-- * [ ] docbook
-- * [ ] opml
-- * [ ] org
-- * [ ] textile
-- * [ ] html
-- * [ ] latex
-- * [ ] haddock
-- * [ ] twiki
-- * [ ] docx
-- * [ ] odt
-- * [ ] t2t
-- * [ ] epub
--
-- Any others not in this list, maybe using other libraries?
--
-- * [ ] asciidoc
-- * [ ] groff manpage
renderHighlight :: Lexer -> BL.ByteString -> Maybe Widget
renderHighlight lexer content =
case runLexer lexer $ BL.toStrict content of
Left err -> Nothing
Right tokens -> Just $ toWidget $ format True tokens
renderCode :: Lexer -> BL.ByteString -> Widget
renderCode lexer content =
fromMaybe (renderPlain content) $ renderHighlight lexer content
readerOptions :: ReaderOptions
readerOptions = def
{ readerExtensions = pandocExtensions
, readerSmart = True
, readerStandalone = False
, readerParseRaw = True
, readerColumns = 80
, readerTabStop = 4
-- , readerOldDashes = False
-- , readerApplyMacros = True
-- , readerIndentedCodeClasses = []
-- , readerDefaultImageExtension = ""
, readerTrace =
#if DEVELOPMENT
True
#else
False
#endif
-- , readerTrackChanges = AcceptChanges
-- , readerFileScope = False
}
writerOptions :: WriterOptions
writerOptions = def
{ writerStandalone = False
-- , writerTemplate = ""
-- , writerVariables = []
, writerTabStop = 4
, writerTableOfContents = True
-- , writerSlideVariant = NoSlides
-- , writerIncremental = False
-- , writerHTMLMathMethod = PlainMath
-- , writerIgnoreNotes = False
-- , writerNumberSections = False
-- , writerNumberOffset = [0,0,0,0,0,0]
-- , writerSectionDivs = False
, writerExtensions = pandocExtensions
-- , writerReferenceLinks = False
-- , writerDpi = 96
, writerWrapText = WrapAuto
, writerColumns = 79
, writerEmailObfuscation = ReferenceObfuscation
-- , writerIdentifierPrefix = ""
-- , writerSourceURL = Nothing
-- , writerUserDataDir = Nothing
-- , writerCiteMethod = Citeproc
, writerHtml5 = True
-- , writerHtmlQTags = False
-- , writerBeamer = False
-- , writerSlideLevel = Nothing
-- , writerChapters = False
-- , writerListings = False
, writerHighlight = True
, writerHighlightStyle = tango
-- , writerSetextHeaders = True
-- , writerTeXLigatures = True
-- , writerEpubVersion = Nothing
-- , writerEpubMetadata = ""
-- , writerEpubStylesheet = Nothing
-- , writerEpubFonts = []
-- , writerEpubChapterLevel = 1
-- , writerTOCDepth = 3
-- , writerReferenceODT = Nothing
-- , writerReferenceDocx = Nothing
-- , writerMediaBag = mempty
, writerVerbose =
#if DEVELOPMENT
True
#else
False
#endif
-- , writerLaTeXArgs = []
}
renderPandoc :: Pandoc -> Widget
renderPandoc =
toWidget .
preEscapedToMarkup .
sanitizeBalance .
TL.toStrict .
renderHtml .
writeHtml writerOptions
renderSource :: MediaType -> BL.ByteString -> Widget
renderSource mt content =
let contentBL = content
contentTL = TLE.decodeUtf8With TE.lenientDecode contentBL
contentS = TL.unpack contentTL
mtName = T.pack $ show mt
failed e =
"Failed to parse " <> mtName <> "content: " <> T.pack (show e)
-- Plain text with line numbers
plain = renderPlain content
-- Syntax highlighted source code with line numbers
code l = renderCode l content
-- Rendered document from String source
docS r =
case r readerOptions contentS of
Left err -> $logWarn (failed err) >> plain
Right doc -> renderPandoc doc
-- Rendered document from String source, with warnings
docSW r =
case r readerOptions contentS of
Left err -> $logWarn (failed err) >> plain
Right (doc, warns) -> do
for_ warns $ \ warn ->
$logDebug $ mtName <> " reader warning: " <> T.pack warn
renderPandoc doc
in case mt of
-- * Documents
PlainText -> plain
Markdown -> docSW readMarkdownWithWarnings
-- * Programming languages
-- ** Haskell
Haskell -> code L.Haskell.lexer
-- * Misc
_ -> plain
{-renderSource :: FilePath -> BL.ByteString -> Widget
renderSource name content =
let plain = renderPlain content
in case renderHighlight name content of
@ -71,3 +277,4 @@ renderSource name content =
$logDebug $ sformat
("Lexed " % string % " with " % string) name (lName lexer)
widget
-}

View file

@ -39,6 +39,7 @@ library
Data.Char.Local
Data.List.Local
Network.SSH.Local
Text.FilePath.Local
Vervis.Application
Vervis.Field.Key
Vervis.Field.Person
@ -52,6 +53,7 @@ library
Vervis.Git
Vervis.Import
Vervis.Import.NoFoundation
Vervis.MediaType
Vervis.Model
Vervis.Readme
Vervis.Render
@ -107,6 +109,7 @@ library
, formatting
, hashable
, highlighter2
, highlighting-kate
, hit
, hit-graph >= 0.1
, hjsmin
@ -115,6 +118,8 @@ library
, http-types
, monad-control
, monad-logger
, pandoc
, pandoc-types
, persistent
, persistent-postgresql
, persistent-template
@ -130,6 +135,7 @@ library
, wai-extra
, wai-logger
, warp
, xss-sanitize
, yaml
, yesod
, yesod-auth