{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Documentation.Haddock.Parser (
parseString,
parseParas,
overIdentifier,
toRegular,
Identifier
) where
import Control.Applicative
import Control.Arrow (first)
import Control.Monad
import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace)
import Data.List (intercalate, unfoldr, elemIndex, notElem)
import Data.Maybe (fromMaybe, mapMaybe)
import Data.Monoid
import qualified Data.Set as Set
import Documentation.Haddock.Doc
import Documentation.Haddock.Parser.Monad
import Documentation.Haddock.Parser.Util
import Documentation.Haddock.Types
import Prelude hiding (takeWhile)
import qualified Prelude as P
import qualified Text.Parsec as Parsec
import Text.Parsec (try)
import qualified Data.Text as T
import Data.Text (Text)
#if MIN_VERSION_base(4,9,0)
import Text.Read.Lex (isSymbolChar)
#else
import Data.Char (GeneralCategory (..),
generalCategory)
#endif
#if !MIN_VERSION_base(4,9,0)
isSymbolChar :: Char -> Bool
isSymbolChar c = not (isPuncChar c) && case generalCategory c of
MathSymbol -> True
CurrencySymbol -> True
ModifierSymbol -> True
OtherSymbol -> True
DashPunctuation -> True
OtherPunctuation -> c `notElem` ("'\"" :: String)
ConnectorPunctuation -> c /= '_'
_ -> False
where
isPuncChar :: Char -> Bool
isPuncChar = (`elem` (",;()[]{}`" :: String))
#endif
type Identifier = (Char, String, Char)
toRegular :: DocH mod Identifier -> DocH mod String
toRegular = fmap (\(_, x, _) -> x)
overIdentifier :: (String -> Maybe a)
-> DocH mod Identifier
-> DocH mod a
overIdentifier f d = g d
where
g (DocIdentifier (o, x, e)) = case f x of
Nothing -> DocString $ o : x ++ [e]
Just x' -> DocIdentifier x'
g DocEmpty = DocEmpty
g (DocAppend x x') = DocAppend (g x) (g x')
g (DocString x) = DocString x
g (DocParagraph x) = DocParagraph $ g x
g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
g (DocModule x) = DocModule x
g (DocWarning x) = DocWarning $ g x
g (DocEmphasis x) = DocEmphasis $ g x
g (DocMonospaced x) = DocMonospaced $ g x
g (DocBold x) = DocBold $ g x
g (DocUnorderedList x) = DocUnorderedList $ fmap g x
g (DocOrderedList x) = DocOrderedList $ fmap g x
g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
g (DocCodeBlock x) = DocCodeBlock $ g x
g (DocHyperlink x) = DocHyperlink x
g (DocPic x) = DocPic x
g (DocMathInline x) = DocMathInline x
g (DocMathDisplay x) = DocMathDisplay x
g (DocAName x) = DocAName x
g (DocProperty x) = DocProperty x
g (DocExamples x) = DocExamples x
g (DocHeader (Header l x)) = DocHeader . Header l $ g x
g (DocTable (Table h b)) = DocTable (Table (map (fmap g) h) (map (fmap g) b))
choice' :: [Parser a] -> Parser a
choice' [] = empty
choice' [p] = p
choice' (p : ps) = try p <|> choice' ps
parse :: Parser a -> Text -> (ParserState, a)
parse p = either err id . parseOnly (p <* Parsec.eof)
where
err = error . ("Haddock.Parser.parse: " ++)
parseParas :: Maybe Package
-> String
-> MetaDoc mod Identifier
parseParas pkg input = case parseParasState input of
(state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state
, _package = pkg
}
, _doc = a
}
parseParasState :: String -> (ParserState, DocH mod Identifier)
parseParasState = parse (emptyLines *> p) . T.pack . (++ "\n") . filter (/= '\r')
where
p :: Parser (DocH mod Identifier)
p = docConcat <$> many (paragraph <* emptyLines)
emptyLines :: Parser ()
emptyLines = void $ many (try (skipHorizontalSpace *> "\n"))
parseParagraphs :: String -> Parser (DocH mod Identifier)
parseParagraphs input = case parseParasState input of
(state, a) -> Parsec.putState state *> pure a
parseString :: String -> DocH mod Identifier
parseString = parseText . T.pack
parseText :: Text -> DocH mod Identifier
parseText = parseParagraph . T.dropWhile isSpace . T.filter (/= '\r')
parseParagraph :: Text -> DocH mod Identifier
parseParagraph = snd . parse p
where
p :: Parser (DocH mod Identifier)
p = docConcat <$> many (choice' [ monospace
, anchor
, identifier
, moduleName
, picture
, mathDisplay
, mathInline
, markdownImage
, hyperlink
, bold
, emphasis
, encodedChar
, string'
, skipSpecialChar
])
encodedChar :: Parser (DocH mod a)
encodedChar = "&#" *> c <* ";"
where
c = DocString . return . chr <$> num
num = hex <|> decimal
hex = ("x" <|> "X") *> hexadecimal
specialChar :: [Char]
specialChar = "_/<@\"&'`# "
string' :: Parser (DocH mod a)
string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar)
where
unescape "" = ""
unescape ('\\':x:xs) = x : unescape xs
unescape (x:xs) = x : unescape xs
skipSpecialChar :: Parser (DocH mod a)
skipSpecialChar = DocString . return <$> Parsec.oneOf specialChar
emphasis :: Parser (DocH mod Identifier)
emphasis = DocEmphasis . parseParagraph <$>
disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
bold :: Parser (DocH mod Identifier)
bold = DocBold . parseParagraph <$> disallowNewline ("__" *> takeUntil "__")
disallowNewline :: Parser Text -> Parser Text
disallowNewline = mfilter (T.all (/= '\n'))
takeWhile_ :: (Char -> Bool) -> Parser Text
takeWhile_ p = scan p_ False
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
takeWhile1_ :: (Char -> Bool) -> Parser Text
takeWhile1_ = mfilter (not . T.null) . takeWhile_
anchor :: Parser (DocH mod a)
anchor = DocAName . T.unpack <$>
disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")
monospace :: Parser (DocH mod Identifier)
monospace = DocMonospaced . parseParagraph
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
moduleName :: Parser (DocH mod a)
moduleName = DocModule <$> ("\"" *> modid <* "\"")
where
modid = intercalate "." <$> conid `Parsec.sepBy1` "."
conid = (:)
<$> Parsec.satisfy (\c -> isAlpha c && isUpper c)
<*> many (conChar <|> Parsec.oneOf "\\#")
conChar = Parsec.alphaNum <|> Parsec.char '_'
picture :: Parser (DocH mod a)
picture = DocPic . makeLabeled Picture
<$> disallowNewline ("<<" *> takeUntil ">>")
mathInline :: Parser (DocH mod a)
mathInline = DocMathInline . T.unpack
<$> disallowNewline ("\\(" *> takeUntil "\\)")
mathDisplay :: Parser (DocH mod a)
mathDisplay = DocMathDisplay . T.unpack
<$> ("\\[" *> takeUntil "\\]")
markdownImage :: Parser (DocH mod a)
markdownImage = fromHyperlink <$> ("!" *> linkParser)
where
fromHyperlink (Hyperlink url label) = DocPic (Picture url label)
paragraph :: Parser (DocH mod Identifier)
paragraph = choice' [ examples
, table
, do indent <- takeIndent
choice' [ since
, unorderedList indent
, orderedList indent
, birdtracks
, codeblock
, property
, header
, textParagraphThatStartsWithMarkdownLink
, definitionList indent
, docParagraph <$> textParagraph
]
]
table :: Parser (DocH mod Identifier)
table = do
firstRow <- parseFirstRow
let len = T.length firstRow
restRows <- many (try (parseRestRows len))
DocTable <$> tableStepTwo len (firstRow : restRows)
where
parseFirstRow :: Parser Text
parseFirstRow = do
skipHorizontalSpace
c <- Parsec.char '+'
cs <- some (Parsec.char '-' <|> Parsec.char '+')
guard (last cs == '+')
skipHorizontalSpace
_ <- Parsec.newline
return (T.cons c $ T.pack cs)
parseRestRows :: Int -> Parser Text
parseRestRows l = do
skipHorizontalSpace
c <- Parsec.char '|' <|> Parsec.char '+'
bs <- scan predicate (l - 2)
c2 <- Parsec.char '|' <|> Parsec.char '+'
skipHorizontalSpace
_ <- Parsec.newline
return (T.cons c (T.snoc bs c2))
where
predicate n c
| n <= 0 = Nothing
| c == '\n' = Nothing
| otherwise = Just (n - 1)
tableStepTwo
:: Int
-> [Text]
-> Parser (Table (DocH mod Identifier))
tableStepTwo width = go 0 [] where
go _ left [] = tableStepThree width (reverse left) Nothing
go n left (r : rs)
| T.all (`elem` ['+', '=']) r =
tableStepThree width (reverse left ++ r' : rs) (Just n)
| otherwise =
go (n + 1) (r : left) rs
where
r' = T.map (\c -> if c == '=' then '-' else c) r
tableStepThree
:: Int
-> [Text]
-> Maybe Int
-> Parser (Table (DocH mod Identifier))
tableStepThree width rs hdrIndex = do
cells <- loop (Set.singleton (0, 0))
tableStepFour rs hdrIndex cells
where
height = length rs
loop :: Set.Set (Int, Int) -> Parser [TC]
loop queue = case Set.minView queue of
Nothing -> return []
Just ((y, x), queue')
| y + 1 >= height || x + 1 >= width -> loop queue'
| otherwise -> case scanRight x y of
Nothing -> loop queue'
Just (x2, y2) -> do
let tc = TC y x y2 x2
fmap (tc :) $ loop $ queue' `Set.union` Set.fromList
[(y, x2), (y2, x), (y2, x2)]
scanRight :: Int -> Int -> Maybe (Int, Int)
scanRight x y = go (x + 1) where
bs = rs !! y
go x' | x' >= width = fail "overflow right "
| T.index bs x' == '+' = scanDown x y x' <|> go (x' + 1)
| T.index bs x' == '-' = go (x' + 1)
| otherwise = fail $ "not a border (right) " ++ show (x,y,x')
scanDown :: Int -> Int -> Int -> Maybe (Int, Int)
scanDown x y x2 = go (y + 1) where
go y' | y' >= height = fail "overflow down"
| T.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1)
| T.index (rs !! y') x2 == '|' = go (y' + 1)
| otherwise = fail $ "not a border (down) " ++ show (x,y,x2,y')
scanLeft :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
scanLeft x y x2 y2
| all (\x' -> T.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2
| otherwise = fail $ "not a border (left) " ++ show (x,y,x2,y2)
where
bs = rs !! y2
scanUp :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
scanUp x y x2 y2
| all (\y' -> T.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2)
| otherwise = fail $ "not a border (up) " ++ show (x,y,x2,y2)
data TC = TC !Int !Int !Int !Int
deriving Show
tcXS :: TC -> [Int]
tcXS (TC _ x _ x2) = [x, x2]
tcYS :: TC -> [Int]
tcYS (TC y _ y2 _) = [y, y2]
tableStepFour :: [Text] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier))
tableStepFour rs hdrIndex cells = case hdrIndex of
Nothing -> return $ Table [] rowsDoc
Just i -> case elemIndex i yTabStops of
Nothing -> return $ Table [] rowsDoc
Just i' -> return $ uncurry Table $ splitAt i' rowsDoc
where
xTabStops = sortNub $ concatMap tcXS cells
yTabStops = sortNub $ concatMap tcYS cells
sortNub :: Ord a => [a] -> [a]
sortNub = Set.toList . Set.fromList
init' :: [a] -> [a]
init' [] = []
init' [_] = []
init' (x : xs) = x : init' xs
rowsDoc = (fmap . fmap) parseParagraph rows
rows = map makeRow (init' yTabStops)
where
makeRow y = TableRow $ mapMaybe (makeCell y) cells
makeCell y (TC y' x y2 x2)
| y /= y' = Nothing
| otherwise = Just $ TableCell xts yts (extract (x + 1) (y + 1) (x2 - 1) (y2 - 1))
where
xts = length $ P.takeWhile (< x2) $ dropWhile (< x) xTabStops
yts = length $ P.takeWhile (< y2) $ dropWhile (< y) yTabStops
extract :: Int -> Int -> Int -> Int -> Text
extract x y x2 y2 = T.intercalate "\n"
[ T.take (x2 - x + 1) $ T.drop x $ rs !! y'
| y' <- [y .. y2]
]
since :: Parser (DocH mod a)
since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
where
version = decimal `Parsec.sepBy1` "."
header :: Parser (DocH mod Identifier)
header = do
let psers = map (string . flip T.replicate "=") [6, 5 .. 1]
pser = choice' psers
delim <- T.unpack <$> pser
line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseText
rest <- try paragraph <|> return DocEmpty
return $ DocHeader (Header (length delim) line) `docAppend` rest
textParagraph :: Parser (DocH mod Identifier)
textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine
textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)
textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)
where
optionalTextParagraph :: Parser (DocH mod Identifier)
optionalTextParagraph = choice' [ docAppend <$> whitespace <*> textParagraph
, pure DocEmpty ]
whitespace :: Parser (DocH mod a)
whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")
where
f :: Text -> Maybe Text -> String
f xs (fromMaybe "" -> x)
| T.null (xs <> x) = ""
| otherwise = " "
unorderedList :: Text -> Parser (DocH mod Identifier)
unorderedList indent = DocUnorderedList <$> p
where
p = ("*" <|> "-") *> innerList indent p
orderedList :: Text -> Parser (DocH mod Identifier)
orderedList indent = DocOrderedList <$> p
where
p = (paren <|> dot) *> innerList indent p
dot = (decimal :: Parser Int) <* "."
paren = "(" *> decimal <* ")"
innerList :: Text -> Parser [DocH mod Identifier]
-> Parser [DocH mod Identifier]
innerList indent item = do
c <- takeLine
(cs, items) <- more indent item
let contents = docParagraph . parseText . dropNLs . T.unlines $ c : cs
return $ case items of
Left p -> [contents `docAppend` p]
Right i -> contents : i
definitionList :: Text -> Parser (DocH mod Identifier)
definitionList indent = DocDefList <$> p
where
p = do
label <- "[" *> (parseParagraph <$> takeWhile1_ (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
c <- takeLine
(cs, items) <- more indent p
let contents = parseText . dropNLs . T.unlines $ c : cs
return $ case items of
Left x -> [(label, contents `docAppend` x)]
Right i -> (label, contents) : i
dropNLs :: Text -> Text
dropNLs = T.dropWhileEnd (== '\n')
more :: Monoid a => Text -> Parser a
-> Parser ([Text], Either (DocH mod Identifier) a)
more indent item = choice' [ innerParagraphs indent
, moreListItems indent item
, moreContent indent item
, pure ([], Right mempty)
]
innerParagraphs :: Text
-> Parser ([Text], Either (DocH mod Identifier) a)
innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent)
moreListItems :: Text -> Parser a
-> Parser ([Text], Either (DocH mod Identifier) a)
moreListItems indent item = (,) [] . Right <$> indentedItem
where
indentedItem = string indent *> Parsec.spaces *> item
moreContent :: Monoid a => Text -> Parser a
-> Parser ([Text], Either (DocH mod Identifier) a)
moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item
indentedParagraphs :: Text -> Parser (DocH mod Identifier)
indentedParagraphs indent =
(T.unpack . T.concat <$> dropFrontOfPara indent') >>= parseParagraphs
where
indent' = string $ indent <> " "
dropFrontOfPara :: Parser Text -> Parser [Text]
dropFrontOfPara sp = do
currentParagraph <- some (try (sp *> takeNonEmptyLine))
followingParagraphs <-
choice' [ skipHorizontalSpace *> nextPar
, skipHorizontalSpace *> nlList
, Parsec.eof *> return []
]
return (currentParagraph ++ followingParagraphs)
where
nextPar = (++) <$> nlList <*> dropFrontOfPara sp
nlList = "\n" *> return ["\n"]
nonSpace :: Text -> Parser Text
nonSpace xs
| T.all isSpace xs = fail "empty line"
| otherwise = return xs
takeNonEmptyLine :: Parser Text
takeNonEmptyLine = do
l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace
_ <- "\n"
pure (l <> "\n")
takeIndent :: Parser Text
takeIndent = do
indent <- takeHorizontalSpace
choice' [ "\n" *> takeIndent
, return indent
]
birdtracks :: Parser (DocH mod a)
birdtracks = DocCodeBlock . DocString . T.unpack . T.intercalate "\n" . stripSpace <$> some line
where
line = try (skipHorizontalSpace *> ">" *> takeLine)
stripSpace :: [Text] -> [Text]
stripSpace = fromMaybe <*> mapM strip'
where
strip' t = case T.uncons t of
Nothing -> Just ""
Just (' ',t') -> Just t'
_ -> Nothing
examples :: Parser (DocH mod a)
examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go)
where
go :: Parser [Example]
go = do
prefix <- takeHorizontalSpace <* ">>>"
expr <- takeLine
(rs, es) <- resultAndMoreExamples
return (makeExample prefix expr rs : es)
where
resultAndMoreExamples :: Parser ([Text], [Example])
resultAndMoreExamples = choice' [ moreExamples, result, pure ([], []) ]
where
moreExamples :: Parser ([Text], [Example])
moreExamples = (,) [] <$> go
result :: Parser ([Text], [Example])
result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
makeExample :: Text -> Text -> [Text] -> Example
makeExample prefix expression res =
Example (T.unpack (T.strip expression)) result
where
result = map (T.unpack . substituteBlankLine . tryStripPrefix) res
tryStripPrefix xs = fromMaybe xs (T.stripPrefix prefix xs)
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine xs = xs
nonEmptyLine :: Parser Text
nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)
takeLine :: Parser Text
takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine)
endOfLine :: Parser ()
endOfLine = void "\n" <|> Parsec.eof
property :: Parser (DocH mod a)
property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n"))
codeblock :: Parser (DocH mod Identifier)
codeblock =
DocCodeBlock . parseParagraph . dropSpaces
<$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
where
dropSpaces xs =
case splitByNl xs of
[] -> xs
ys -> case T.uncons (last ys) of
Just (' ',_) -> case mapM dropSpace ys of
Nothing -> xs
Just zs -> T.intercalate "\n" zs
_ -> xs
splitByNl = unfoldr (\x -> case T.uncons x of
Just ('\n',x') -> Just (T.span (/= '\n') x')
_ -> Nothing)
. ("\n" <>)
dropSpace t = case T.uncons t of
Nothing -> Just ""
Just (' ',t') -> Just t'
_ -> Nothing
block' = scan p False
where
p isNewline c
| isNewline && c == '@' = Nothing
| isNewline && isSpace c = Just isNewline
| otherwise = Just $ c == '\n'
hyperlink :: Parser (DocH mod a)
hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ]
angleBracketLink :: Parser (DocH mod a)
angleBracketLink =
DocHyperlink . makeLabeled Hyperlink
<$> disallowNewline ("<" *> takeUntil ">")
markdownLink :: Parser (DocH mod a)
markdownLink = DocHyperlink <$> linkParser
linkParser :: Parser Hyperlink
linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
where
label :: Parser (Maybe String)
label = Just . decode . T.strip <$> ("[" *> takeUntil "]")
whitespace :: Parser ()
whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
url :: Parser String
url = rejectWhitespace (decode <$> ("(" *> takeUntil ")"))
rejectWhitespace :: MonadPlus m => m String -> m String
rejectWhitespace = mfilter (all (not . isSpace))
decode :: Text -> String
decode = T.unpack . removeEscapes
autoUrl :: Parser (DocH mod a)
autoUrl = mkLink <$> url
where
url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace))
mkLink :: Text -> DocH mod a
mkLink s = case T.unsnoc s of
Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x]
_ -> DocHyperlink (mkHyperlink s)
mkHyperlink :: Text -> Hyperlink
mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing
parseValid :: Parser String
parseValid = p some
where
idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_')
p p' = do
vs <- p' idChar
c <- peekChar'
case c of
'`' -> return vs
'\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ]
_ -> fail "outofvalid"
identifier :: Parser (DocH mod Identifier)
identifier = do
o <- idDelim
vid <- parseValid
e <- idDelim
return $ DocIdentifier (o, vid, e)
where
idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`')