{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ViewPatterns #-}
module Documentation.Haddock.Parser.Identifier (
Identifier(..),
parseValid,
) where
import Documentation.Haddock.Types ( Namespace(..) )
import Documentation.Haddock.Parser.Monad
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos ( updatePosChar )
import Text.Parsec ( State(..)
, getParserState, setParserState )
import Data.Text (Text)
import qualified Data.Text as T
import Data.Char (isAlpha, isAlphaNum)
import Control.Monad (guard)
import Data.Functor (($>))
#if MIN_VERSION_base(4,9,0)
import Text.Read.Lex (isSymbolChar)
#else
import Data.Char (GeneralCategory (..),
generalCategory)
#endif
import Data.Maybe
data Identifier = Identifier !Namespace !Char String !Char
deriving (Show, Eq)
parseValid :: Parser Identifier
parseValid = do
s@State{ stateInput = inp, statePos = pos } <- getParserState
case takeIdentifier inp of
Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
Just (ns, op, ident, cl, inp') ->
let posOp = updatePosChar pos op
posIdent = T.foldl updatePosChar posOp ident
posCl = updatePosChar posIdent cl
s' = s{ stateInput = inp', statePos = posCl }
in setParserState s' $> Identifier ns op (T.unpack ident) cl
#if !MIN_VERSION_base(4,9,0)
isSymbolChar :: Char -> Bool
isSymbolChar c = not (isPuncChar c) && case generalCategory c of
MathSymbol -> True
CurrencySymbol -> True
ModifierSymbol -> True
OtherSymbol -> True
DashPunctuation -> True
OtherPunctuation -> c `notElem` "'\""
ConnectorPunctuation -> c /= '_'
_ -> False
where
isPuncChar :: Char -> Bool
isPuncChar = (`elem` (",;()[]{}`" :: String))
#endif
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier input = listToMaybe $ do
let (ns, input') = case T.uncons input of
Just ('v', i) -> (Value, i)
Just ('t', i) -> (Type, i)
_ -> (None, input)
(op, input'') <- maybeToList (T.uncons input')
guard (op == '\'' || op == '`')
(ident, input''') <- wrapped input''
(cl, input'''') <- maybeToList (T.uncons input''')
guard (cl == '\'' || cl == '`')
return (ns, op, ident, cl, input'''')
where
wrapped t = do
(c, t' ) <- maybeToList (T.uncons t)
case c of
'(' | Just (c', _) <- T.uncons t'
, c' == ',' || c' == ')'
-> do let (commas, t'') = T.span (== ',') t'
(')', t''') <- maybeToList (T.uncons t'')
return (T.take (T.length commas + 2) t, t''')
'(' -> do (n, t'' ) <- general False 0 [] t'
(')', t''') <- maybeToList (T.uncons t'')
return (T.take (n + 2) t, t''')
'`' -> do (n, t'' ) <- general False 0 [] t'
('`', t''') <- maybeToList (T.uncons t'')
return (T.take (n + 2) t, t''')
_ -> do (n, t'' ) <- general False 0 [] t
return (T.take n t, t'')
general :: Bool
-> Int
-> [(Int, Text)]
-> Text
-> [(Int, Text)]
general !identOnly !i acc t
| Just (n, rest) <- identLike t
= if T.null rest
then acc
else case T.head rest of
'`' -> (n + i, rest) : acc
')' -> (n + i, rest) : acc
'.' -> general False (n + i + 1) acc (T.tail rest)
'\'' -> let (m, rest') = quotes rest
in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (T.tail rest')
_ -> acc
| Just (n, rest) <- optr t
, not identOnly
= (n + i, rest) : acc
| otherwise
= acc
identLike t
| T.null t = Nothing
| isAlpha (T.head t) || '_' == T.head t
= let !(idt, rest) = T.span (\c -> isAlphaNum c || c == '_') t
!(octos, rest') = T.span (== '#') rest
in Just (T.length idt + T.length octos, rest')
| otherwise = Nothing
quotes :: Text -> (Int, Text)
quotes t = let !n = T.length (T.takeWhile (== '\'') t) - 1
in (n, T.drop n t)
optr t = let !(op, rest) = T.span isSymbolChar t
in if T.null op then Nothing else Just (T.length op, rest)