module Documentation.Haddock.Parser ( parseString, parseParas
, overIdentifier, toRegular, Identifier
) where
import Control.Applicative
import Control.Arrow (first)
import Control.Monad
import qualified Data.ByteString.Char8 as BS
import Data.Char (chr, isAsciiUpper)
import Data.List (stripPrefix, intercalate, unfoldr)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Documentation.Haddock.Doc
import Documentation.Haddock.Parser.Monad hiding (take, endOfLine)
import Documentation.Haddock.Parser.Util
import Documentation.Haddock.Types
import Documentation.Haddock.Utf8
import Prelude hiding (takeWhile)
type Identifier = (Char, String, Char)
toRegular :: DocH mod Identifier -> DocH mod String
toRegular = fmap (\(_, x, _) -> x)
overIdentifier :: (String -> Maybe a)
-> DocH mod Identifier
-> DocH mod a
overIdentifier f d = g d
where
g (DocIdentifier (o, x, e)) = case f x of
Nothing -> DocString $ o : x ++ [e]
Just x' -> DocIdentifier x'
g DocEmpty = DocEmpty
g (DocAppend x x') = DocAppend (g x) (g x')
g (DocString x) = DocString x
g (DocParagraph x) = DocParagraph $ g x
g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
g (DocModule x) = DocModule x
g (DocWarning x) = DocWarning $ g x
g (DocEmphasis x) = DocEmphasis $ g x
g (DocMonospaced x) = DocMonospaced $ g x
g (DocBold x) = DocBold $ g x
g (DocUnorderedList x) = DocUnorderedList $ fmap g x
g (DocOrderedList x) = DocOrderedList $ fmap g x
g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
g (DocCodeBlock x) = DocCodeBlock $ g x
g (DocHyperlink x) = DocHyperlink x
g (DocPic x) = DocPic x
g (DocMathInline x) = DocMathInline x
g (DocMathDisplay x) = DocMathDisplay x
g (DocAName x) = DocAName x
g (DocProperty x) = DocProperty x
g (DocExamples x) = DocExamples x
g (DocHeader (Header l x)) = DocHeader . Header l $ g x
parse :: Parser a -> BS.ByteString -> (ParserState, a)
parse p = either err id . parseOnly (p <* endOfInput)
where
err = error . ("Haddock.Parser.parse: " ++)
parseParas :: String
-> MetaDoc mod Identifier
parseParas input = case parseParasState input of
(state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state }
, _doc = a
}
parseParasState :: String -> (ParserState, DocH mod Identifier)
parseParasState =
parse (p <* skipSpace) . encodeUtf8 . (++ "\n") . filter (/= '\r')
where
p :: Parser (DocH mod Identifier)
p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
parseParagraphs :: String -> Parser (DocH mod Identifier)
parseParagraphs input = case parseParasState input of
(state, a) -> setParserState state >> return a
parseString :: String -> DocH mod Identifier
parseString = parseStringBS . encodeUtf8 . dropWhile isSpace . filter (/= '\r')
parseStringBS :: BS.ByteString -> DocH mod Identifier
parseStringBS = snd . parse p
where
p :: Parser (DocH mod Identifier)
p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName
<|> picture <|> mathDisplay <|> mathInline
<|> markdownImage
<|> hyperlink <|> bold
<|> emphasis <|> encodedChar <|> string'
<|> skipSpecialChar)
encodedChar :: Parser (DocH mod a)
encodedChar = "&#" *> c <* ";"
where
c = DocString . return . chr <$> num
num = hex <|> decimal
hex = ("x" <|> "X") *> hexadecimal
specialChar :: [Char]
specialChar = "_/<@\"&'`# "
string' :: Parser (DocH mod a)
string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
where
unescape "" = ""
unescape ('\\':x:xs) = x : unescape xs
unescape (x:xs) = x : unescape xs
skipSpecialChar :: Parser (DocH mod a)
skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
emphasis :: Parser (DocH mod Identifier)
emphasis = DocEmphasis . parseStringBS <$>
mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
bold :: Parser (DocH mod Identifier)
bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
disallowNewline = mfilter ('\n' `BS.notElem`)
takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
takeWhile_ p = scan False p_
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
takeWhile1_ = mfilter (not . BS.null) . takeWhile_
anchor :: Parser (DocH mod a)
anchor = DocAName . decodeUtf8 <$>
disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")
monospace :: Parser (DocH mod Identifier)
monospace = DocMonospaced . parseStringBS
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
moduleName :: Parser (DocH mod a)
moduleName = DocModule <$> (char '"' *> modid <* char '"')
where
modid = intercalate "." <$> conid `sepBy1` "."
conid = (:)
<$> satisfy isAsciiUpper
<*> (decodeUtf8 <$> takeWhile (`notElem` (" .&[{}(=*)+]!|@/;,^?\"\n"::String)))
picture :: Parser (DocH mod a)
picture = DocPic . makeLabeled Picture . decodeUtf8
<$> disallowNewline ("<<" *> takeUntil ">>")
mathInline :: Parser (DocH mod a)
mathInline = DocMathInline . decodeUtf8
<$> disallowNewline ("\\(" *> takeUntil "\\)")
mathDisplay :: Parser (DocH mod a)
mathDisplay = DocMathDisplay . decodeUtf8
<$> ("\\[" *> takeUntil "\\]")
markdownImage :: Parser (DocH mod a)
markdownImage = fromHyperlink <$> ("!" *> linkParser)
where
fromHyperlink (Hyperlink url label) = DocPic (Picture url label)
paragraph :: Parser (DocH mod Identifier)
paragraph = examples <|> do
indent <- takeIndent
choice
[ since
, unorderedList indent
, orderedList indent
, birdtracks
, codeblock
, property
, header
, textParagraphThatStartsWithMarkdownLink
, definitionList indent
, docParagraph <$> textParagraph
]
since :: Parser (DocH mod a)
since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
where
version = decimal `sepBy1'` "."
header :: Parser (DocH mod Identifier)
header = do
let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
pser = foldl1 (<|>) psers
delim <- decodeUtf8 <$> pser
line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
rest <- paragraph <|> return DocEmpty
return $ DocHeader (Header (length delim) line) `docAppend` rest
textParagraph :: Parser (DocH mod Identifier)
textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine
textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)
textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)
where
optionalTextParagraph :: Parser (DocH mod Identifier)
optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty
whitespace :: Parser (DocH mod a)
whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")
where
f :: BS.ByteString -> Maybe BS.ByteString -> String
f xs (fromMaybe "" -> x)
| BS.null (xs <> x) = ""
| otherwise = " "
unorderedList :: BS.ByteString -> Parser (DocH mod Identifier)
unorderedList indent = DocUnorderedList <$> p
where
p = ("*" <|> "-") *> innerList indent p
orderedList :: BS.ByteString -> Parser (DocH mod Identifier)
orderedList indent = DocOrderedList <$> p
where
p = (paren <|> dot) *> innerList indent p
dot = (decimal :: Parser Int) <* "."
paren = "(" *> decimal <* ")"
innerList :: BS.ByteString -> Parser [DocH mod Identifier]
-> Parser [DocH mod Identifier]
innerList indent item = do
c <- takeLine
(cs, items) <- more indent item
let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
return $ case items of
Left p -> [contents `docAppend` p]
Right i -> contents : i
definitionList :: BS.ByteString -> Parser (DocH mod Identifier)
definitionList indent = DocDefList <$> p
where
p = do
label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
c <- takeLine
(cs, items) <- more indent p
let contents = parseString . dropNLs . unlines $ c : cs
return $ case items of
Left x -> [(label, contents `docAppend` x)]
Right i -> (label, contents) : i
dropNLs :: String -> String
dropNLs = reverse . dropWhile (== '\n') . reverse
more :: Monoid a => BS.ByteString -> Parser a
-> Parser ([String], Either (DocH mod Identifier) a)
more indent item = innerParagraphs indent
<|> moreListItems indent item
<|> moreContent indent item
<|> pure ([], Right mempty)
innerParagraphs :: BS.ByteString
-> Parser ([String], Either (DocH mod Identifier) a)
innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent)
moreListItems :: BS.ByteString -> Parser a
-> Parser ([String], Either (DocH mod Identifier) a)
moreListItems indent item = (,) [] . Right <$> indentedItem
where
indentedItem = string indent *> skipSpace *> item
moreContent :: Monoid a => BS.ByteString -> Parser a
-> Parser ([String], Either (DocH mod Identifier) a)
moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item
indentedParagraphs :: BS.ByteString -> Parser (DocH mod Identifier)
indentedParagraphs indent =
(concat <$> dropFrontOfPara indent') >>= parseParagraphs
where
indent' = string $ BS.append indent " "
dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
dropFrontOfPara sp = do
currentParagraph <- some (sp *> takeNonEmptyLine)
followingParagraphs <-
skipHorizontalSpace *> nextPar
<|> skipHorizontalSpace *> nlList
<|> endOfInput *> return []
return (currentParagraph ++ followingParagraphs)
where
nextPar = (++) <$> nlList <*> dropFrontOfPara sp
nlList = "\n" *> return ["\n"]
nonSpace :: BS.ByteString -> Parser BS.ByteString
nonSpace xs
| not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
| otherwise = return xs
takeNonEmptyLine :: Parser String
takeNonEmptyLine = do
(++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
takeIndent :: Parser BS.ByteString
takeIndent = do
indent <- takeHorizontalSpace
"\n" *> takeIndent <|> return indent
birdtracks :: Parser (DocH mod a)
birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
where
line = skipHorizontalSpace *> ">" *> takeLine
stripSpace :: [String] -> [String]
stripSpace = fromMaybe <*> mapM strip'
where
strip' (' ':xs') = Just xs'
strip' "" = Just ""
strip' _ = Nothing
examples :: Parser (DocH mod a)
examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
where
go :: Parser [Example]
go = do
prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
expr <- takeLine
(rs, es) <- resultAndMoreExamples
return (makeExample prefix expr rs : es)
where
resultAndMoreExamples :: Parser ([String], [Example])
resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
where
moreExamples :: Parser ([String], [Example])
moreExamples = (,) [] <$> go
result :: Parser ([String], [Example])
result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
makeExample :: String -> String -> [String] -> Example
makeExample prefix expression res =
Example (strip expression) result
where
result = map (substituteBlankLine . tryStripPrefix) res
tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine xs = xs
nonEmptyLine :: Parser String
nonEmptyLine = mfilter (any (not . isSpace)) takeLine
takeLine :: Parser String
takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
endOfLine :: Parser ()
endOfLine = void "\n" <|> endOfInput
property :: Parser (DocH mod a)
property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
codeblock :: Parser (DocH mod Identifier)
codeblock =
DocCodeBlock . parseStringBS . dropSpaces
<$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
where
dropSpaces xs =
let rs = decodeUtf8 xs
in case splitByNl rs of
[] -> xs
ys -> case last ys of
' ':_ -> case mapM dropSpace ys of
Nothing -> xs
Just zs -> encodeUtf8 $ intercalate "\n" zs
_ -> xs
splitByNl = unfoldr (\x -> case x of
'\n':s -> Just (span (/= '\n') s)
_ -> Nothing)
. ('\n' :)
dropSpace "" = Just ""
dropSpace (' ':xs) = Just xs
dropSpace _ = Nothing
block' = scan False p
where
p isNewline c
| isNewline && c == '@' = Nothing
| isNewline && isSpace c = Just isNewline
| otherwise = Just $ c == '\n'
hyperlink :: Parser (DocH mod a)
hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
<$> disallowNewline ("<" *> takeUntil ">")
<|> autoUrl
<|> markdownLink
markdownLink :: Parser (DocH mod a)
markdownLink = DocHyperlink <$> linkParser
linkParser :: Parser Hyperlink
linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
where
label :: Parser (Maybe String)
label = Just . strip . decode <$> ("[" *> takeUntil "]")
whitespace :: Parser ()
whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
url :: Parser String
url = rejectWhitespace (decode <$> ("(" *> takeUntil ")"))
rejectWhitespace :: MonadPlus m => m String -> m String
rejectWhitespace = mfilter (all (not . isSpace))
decode :: BS.ByteString -> String
decode = removeEscapes . decodeUtf8
autoUrl :: Parser (DocH mod a)
autoUrl = mkLink <$> url
where
url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
mkLink :: BS.ByteString -> DocH mod a
mkLink s = case unsnoc s of
Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
_ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
parseValid :: Parser String
parseValid = p some
where
idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String))
<|> digit <|> letter_ascii
p p' = do
vs' <- p' $ utf8String "⋆" <|> return <$> idChar
let vs = concat vs'
c <- peekChar'
case c of
'`' -> return vs
'\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs
_ -> fail "outofvalid"
utf8String :: String -> Parser String
utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
identifier :: Parser (DocH mod Identifier)
identifier = do
o <- idDelim
vid <- parseValid
e <- idDelim
return $ DocIdentifier (o, vid, e)
where
idDelim = char '\'' <|> char '`'