{-# LANGUAGE CPP #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Language.C.Types.Parse
(
TypeNames
, CParserContext(..)
, CIdentifier
, unCIdentifier
, cIdentifierFromString
, cCParserContext
, CParser
, runCParser
, quickCParser
, quickCParser_
, identifier_no_lex
, DeclarationSpecifier(..)
, declaration_specifiers
, StorageClassSpecifier(..)
, storage_class_specifier
, TypeSpecifier(..)
, type_specifier
, TypeQualifier(..)
, type_qualifier
, FunctionSpecifier(..)
, function_specifier
, Declarator(..)
, declarator
, DirectDeclarator(..)
, direct_declarator
, ArrayOrProto(..)
, array_or_proto
, ArrayType(..)
, array_type
, Pointer(..)
, pointer
, ParameterDeclaration(..)
, DeclaratorOrAbstractDeclarator(..)
, parameter_declaration
, parameter_list
, AbstractDeclarator(..)
, abstract_declarator
, DirectAbstractDeclarator(..)
, direct_abstract_declarator
, cIdentStart
, cIdentLetter
, cReservedWords
, isTypeName
) where
import Control.Applicative
import Control.Monad (msum, void, MonadPlus, unless, when)
import Control.Monad.Reader (MonadReader, runReaderT, ReaderT, asks, ask)
import Data.List (intersperse)
import Data.Functor.Identity (Identity)
import qualified Data.HashSet as HashSet
import Data.Hashable (Hashable)
import Data.Monoid ((<>))
import Data.String (IsString(..))
import Data.Typeable (Typeable)
import qualified Text.Parsec as Parsec
import Text.Parser.Char
import Text.Parser.Combinators
import Text.Parser.LookAhead
import Text.Parser.Token
import qualified Text.Parser.Token.Highlight as Highlight
import Text.PrettyPrint.ANSI.Leijen (Pretty(..), (<+>), Doc, hsep)
import qualified Text.PrettyPrint.ANSI.Leijen as PP
#if __GLASGOW_HASKELL__ < 710
import Data.Foldable (Foldable)
import Data.Traversable (Traversable)
#endif
type TypeNames = HashSet.HashSet CIdentifier
data CParserContext i = CParserContext
{ cpcIdentName :: String
, cpcTypeNames :: TypeNames
, cpcParseIdent :: forall m. CParser i m => m i
, cpcIdentToString :: i -> String
, cpcEnableCpp :: Bool
}
newtype CIdentifier = CIdentifier {unCIdentifier :: String}
deriving (Typeable, Eq, Ord, Show, Hashable)
cIdentifierFromString :: Bool -> String -> Either String CIdentifier
cIdentifierFromString useCpp s =
case Parsec.parse (identNoLex useCpp cIdentStyle <* eof) "cIdentifierFromString" s of
Left err -> Left $ show err
Right x -> Right $ CIdentifier x
instance IsString CIdentifier where
fromString s =
case cIdentifierFromString True s of
Left err -> error $ "CIdentifier fromString: invalid string " ++ show s ++ "\n" ++ err
Right x -> x
cCParserContext :: Bool -> TypeNames -> CParserContext CIdentifier
cCParserContext useCpp typeNames = CParserContext
{ cpcTypeNames = typeNames
, cpcParseIdent = cidentifier_no_lex
, cpcIdentToString = unCIdentifier
, cpcIdentName = "C identifier"
, cpcEnableCpp = useCpp
}
type CParser i m =
( Monad m
, Functor m
, Applicative m
, MonadPlus m
, Parsing m
, CharParsing m
, TokenParsing m
, LookAheadParsing m
, MonadReader (CParserContext i) m
#if (MIN_VERSION_base(4,13,0))
, MonadFail m
#endif
, Hashable i
)
runCParser
:: Parsec.Stream s Identity Char
=> CParserContext i
-> String
-> s
-> (ReaderT (CParserContext i) (Parsec.Parsec s ()) a)
-> Either Parsec.ParseError a
runCParser typeNames fn s p = Parsec.parse (runReaderT p typeNames) fn s
quickCParser
:: CParserContext i
-> String
-> (ReaderT (CParserContext i) (Parsec.Parsec String ()) a)
-> a
quickCParser typeNames s p = case runCParser typeNames "quickCParser" s p of
Left err -> error $ "quickCParser: " ++ show err
Right x -> x
quickCParser_
:: Bool
-> String
-> (ReaderT (CParserContext CIdentifier) (Parsec.Parsec String ()) a)
-> a
quickCParser_ useCpp = quickCParser (cCParserContext useCpp HashSet.empty)
cReservedWords :: HashSet.HashSet String
cReservedWords = HashSet.fromList
[ "auto", "else", "long", "switch"
, "break", "enum", "register", "typedef"
, "case", "extern", "return", "union"
, "char", "float", "short", "unsigned"
, "const", "for", "signed", "void"
, "continue", "goto", "sizeof", "volatile"
, "default", "if", "static", "while"
, "do", "int", "struct", "double"
]
cIdentStart :: [Char]
cIdentStart = ['a'..'z'] ++ ['A'..'Z'] ++ ['_']
cIdentLetter :: [Char]
cIdentLetter = ['a'..'z'] ++ ['A'..'Z'] ++ ['_'] ++ ['0'..'9']
cIdentStyle :: (TokenParsing m, Monad m) => IdentifierStyle m
cIdentStyle = IdentifierStyle
{ _styleName = "C identifier"
, _styleStart = oneOf cIdentStart
, _styleLetter = oneOf cIdentLetter
, _styleReserved = cReservedWords
, _styleHighlight = Highlight.Identifier
, _styleReservedHighlight = Highlight.ReservedIdentifier
}
data DeclarationSpecifier
= StorageClassSpecifier StorageClassSpecifier
| TypeSpecifier TypeSpecifier
| TypeQualifier TypeQualifier
| FunctionSpecifier FunctionSpecifier
deriving (Typeable, Eq, Show)
declaration_specifiers :: CParser i m => m [DeclarationSpecifier]
declaration_specifiers = many1 $ msum
[ StorageClassSpecifier <$> storage_class_specifier
, TypeSpecifier <$> type_specifier
, TypeQualifier <$> type_qualifier
, FunctionSpecifier <$> function_specifier
]
data StorageClassSpecifier
= TYPEDEF
| EXTERN
| STATIC
| AUTO
| REGISTER
deriving (Typeable, Eq, Show)
storage_class_specifier :: CParser i m => m StorageClassSpecifier
storage_class_specifier = msum
[ TYPEDEF <$ reserve cIdentStyle "typedef"
, EXTERN <$ reserve cIdentStyle "extern"
, STATIC <$ reserve cIdentStyle "static"
, AUTO <$ reserve cIdentStyle "auto"
, REGISTER <$ reserve cIdentStyle "register"
]
data TypeSpecifier
= VOID
| BOOL
| CHAR
| SHORT
| INT
| LONG
| FLOAT
| DOUBLE
| SIGNED
| UNSIGNED
| Struct CIdentifier
| Enum CIdentifier
| TypeName CIdentifier
| Template CIdentifier [TypeSpecifier]
| TemplateConst String
| TemplatePointer TypeSpecifier
deriving (Typeable, Eq, Show)
type_specifier :: CParser i m => m TypeSpecifier
type_specifier = msum
[ VOID <$ reserve cIdentStyle "void"
, BOOL <$ reserve cIdentStyle "bool"
, CHAR <$ reserve cIdentStyle "char"
, SHORT <$ reserve cIdentStyle "short"
, INT <$ reserve cIdentStyle "int"
, LONG <$ reserve cIdentStyle "long"
, FLOAT <$ reserve cIdentStyle "float"
, DOUBLE <$ reserve cIdentStyle "double"
, SIGNED <$ reserve cIdentStyle "signed"
, UNSIGNED <$ reserve cIdentStyle "unsigned"
, Struct <$> (reserve cIdentStyle "struct" >> cidentifier)
, Enum <$> (reserve cIdentStyle "enum" >> cidentifier)
, template_parser
, TypeName <$> type_name
]
identifier :: CParser i m => m i
identifier = token identifier_no_lex
isTypeName :: Bool -> TypeNames -> String -> Bool
isTypeName useCpp typeNames id_ =
case cIdentifierFromString useCpp id_ of
Left _err -> False
Right s -> HashSet.member s typeNames
identifier_no_lex :: CParser i m => m i
identifier_no_lex = try $ do
ctx <- ask
id_ <- cpcParseIdent ctx <?> cpcIdentName ctx
when (isTypeName (cpcEnableCpp ctx) (cpcTypeNames ctx) (cpcIdentToString ctx id_)) $
unexpected $ "type name " ++ cpcIdentToString ctx id_
return id_
cidentifier_raw :: (TokenParsing m, Monad m) => Bool -> m CIdentifier
cidentifier_raw useCpp = identNoLex useCpp cIdentStyle
cidentifier_no_lex :: CParser i m => m CIdentifier
cidentifier_no_lex = try $ do
ctx <- ask
s <- cidentifier_raw (cpcEnableCpp ctx)
typeNames <- asks cpcTypeNames
when (HashSet.member s typeNames) $
unexpected $ "type name " ++ unCIdentifier s
return s
cidentifier :: CParser i m => m CIdentifier
cidentifier = token cidentifier_no_lex
type_name :: CParser i m => m CIdentifier
type_name = try $ do
ctx <- ask
s <- ident' (cpcEnableCpp ctx) cIdentStyle <?> "type name"
typeNames <- asks cpcTypeNames
unless (HashSet.member s typeNames) $
unexpected $ "identifier " ++ unCIdentifier s
return s
templateParser :: (Monad m, CharParsing m, CParser i m) => IdentifierStyle m -> m TypeSpecifier
templateParser s = parse'
where
parse' = do
id' <- cidentParserWithNamespace
_ <- string "<"
args <- templateArgParser
_ <- string ">"
return $ Template (CIdentifier id') args
cidentParser = ((:) <$> _styleStart s <*> many (_styleLetter s) <?> _styleName s)
cidentParserWithNamespace =
try (concat <$> sequence [cidentParser, (string "::"), cidentParserWithNamespace]) <|>
cidentParser
templateArgType = try ((TemplatePointer <$> (type_specifier)) <* (string "*")) <|> try type_specifier <|> (TemplateConst <$> (many $ oneOf ['0'..'9']))
templateArgParser' = do
t <- templateArgType
_ <- string ","
tt <- templateArgParser
return $ t:tt
templateArgParser =
try (templateArgParser') <|> ((:) <$> templateArgType <*> return [])
template_parser :: CParser i m => m TypeSpecifier
template_parser = try $ templateParser cIdentStyle <?> "template name"
data TypeQualifier
= CONST
| RESTRICT
| VOLATILE
deriving (Typeable, Eq, Show)
type_qualifier :: CParser i m => m TypeQualifier
type_qualifier = msum
[ CONST <$ reserve cIdentStyle "const"
, RESTRICT <$ reserve cIdentStyle "restrict"
, VOLATILE <$ reserve cIdentStyle "volatile"
]
data FunctionSpecifier
= INLINE
deriving (Typeable, Eq, Show)
function_specifier :: CParser i m => m FunctionSpecifier
function_specifier = msum
[ INLINE <$ reserve cIdentStyle "inline"
]
data Declarator i = Declarator
{ declaratorPointers :: [Pointer]
, declaratorDirect :: (DirectDeclarator i)
} deriving (Typeable, Eq, Show, Functor, Foldable, Traversable)
declarator :: CParser i m => m (Declarator i)
declarator = (Declarator <$> many pointer <*> direct_declarator) <?> "declarator"
data DirectDeclarator i
= DeclaratorRoot i
| ArrayOrProto (DirectDeclarator i) (ArrayOrProto i)
| DeclaratorParens (Declarator i)
deriving (Typeable, Eq, Show, Functor, Foldable, Traversable)
data ArrayOrProto i
= Array (ArrayType i)
| Proto [ParameterDeclaration i]
deriving (Eq, Show, Typeable, Functor, Foldable, Traversable)
array_or_proto :: CParser i m => m (ArrayOrProto i)
array_or_proto = msum
[ Array <$> brackets array_type
, Proto <$> parens parameter_list
]
data ArrayType i
= VariablySized
| Unsized
| SizedByInteger Integer
| SizedByIdentifier i
deriving (Typeable, Eq, Show, Functor, Foldable, Traversable)
array_type :: CParser i m => m (ArrayType i)
array_type = msum
[ VariablySized <$ symbolic '*'
, SizedByInteger <$> natural
, SizedByIdentifier <$> identifier
, return Unsized
]
direct_declarator :: CParser i m => m (DirectDeclarator i)
direct_declarator = do
ddecltor <- msum
[ DeclaratorRoot <$> identifier
, DeclaratorParens <$> parens declarator
]
aops <- many array_or_proto
return $ foldl ArrayOrProto ddecltor aops
data Pointer
= Pointer [TypeQualifier]
deriving (Typeable, Eq, Show)
pointer :: CParser i m => m Pointer
pointer = do
void $ symbolic '*'
Pointer <$> many type_qualifier
parameter_list :: CParser i m => m [ParameterDeclaration i]
parameter_list =
sepBy parameter_declaration $ symbolic ','
data ParameterDeclaration i = ParameterDeclaration
{ parameterDeclarationSpecifiers :: [DeclarationSpecifier]
, parameterDeclarationDeclarator :: DeclaratorOrAbstractDeclarator i
} deriving (Eq, Show, Typeable, Functor, Foldable, Traversable)
data DeclaratorOrAbstractDeclarator i
= IsDeclarator (Declarator i)
| IsAbstractDeclarator (AbstractDeclarator i)
deriving (Eq, Show, Typeable, Functor, Foldable, Traversable)
parameter_declaration :: CParser i m => m (ParameterDeclaration i)
parameter_declaration =
ParameterDeclaration
<$> declaration_specifiers
<*> mbabstract
where
mbabstract =
IsDeclarator <$> try declarator <|>
IsAbstractDeclarator <$> try abstract_declarator <|>
return (IsAbstractDeclarator (AbstractDeclarator [] Nothing))
data AbstractDeclarator i = AbstractDeclarator
{ abstractDeclaratorPointers :: [Pointer]
, abstractDeclaratorDirect :: Maybe (DirectAbstractDeclarator i)
} deriving (Typeable, Eq, Show, Functor, Foldable, Traversable)
abstract_declarator :: CParser i m => m (AbstractDeclarator i)
abstract_declarator = do
ptrs <- many pointer
let p = if null ptrs
then Just <$> direct_abstract_declarator
else (Just <$> try direct_abstract_declarator) <|> return Nothing
AbstractDeclarator ptrs <$> p
data DirectAbstractDeclarator i
= ArrayOrProtoHere (ArrayOrProto i)
| ArrayOrProtoThere (DirectAbstractDeclarator i) (ArrayOrProto i)
| AbstractDeclaratorParens (AbstractDeclarator i)
deriving (Typeable, Eq, Show, Functor, Foldable, Traversable)
direct_abstract_declarator :: CParser i m => m (DirectAbstractDeclarator i)
direct_abstract_declarator = do
ddecltor <- msum
[ try (ArrayOrProtoHere <$> array_or_proto)
, AbstractDeclaratorParens <$> parens abstract_declarator
] <?> "array, prototype, or parenthesised abstract declarator"
aops <- many array_or_proto
return $ foldl ArrayOrProtoThere ddecltor aops
instance Pretty CIdentifier where
pretty = PP.text . unCIdentifier
instance Pretty DeclarationSpecifier where
pretty dspec = case dspec of
StorageClassSpecifier x -> pretty x
TypeSpecifier x -> pretty x
TypeQualifier x -> pretty x
FunctionSpecifier x -> pretty x
instance Pretty StorageClassSpecifier where
pretty storage = case storage of
TYPEDEF -> "typedef"
EXTERN -> "extern"
STATIC -> "static"
AUTO -> "auto"
REGISTER -> "register"
instance Pretty TypeSpecifier where
pretty tySpec = case tySpec of
VOID -> "void"
BOOL -> "bool"
CHAR -> "char"
SHORT -> "short"
INT -> "int"
LONG -> "long"
FLOAT -> "float"
DOUBLE -> "double"
SIGNED -> "signed"
UNSIGNED -> "unsigned"
Struct x -> "struct" <+> pretty x
Enum x -> "enum" <+> pretty x
TypeName x -> pretty x
Template x args -> pretty x <+> "<" <+> mconcat (intersperse "," (map pretty args)) <+> ">"
TemplateConst x -> pretty x
TemplatePointer x -> pretty x <+> "*"
instance Pretty TypeQualifier where
pretty tyQual = case tyQual of
CONST -> "const"
RESTRICT -> "restrict"
VOLATILE -> "volatile"
instance Pretty FunctionSpecifier where
pretty funSpec = case funSpec of
INLINE -> "inline"
instance Pretty i => Pretty (Declarator i) where
pretty (Declarator ptrs ddecltor) = case ptrs of
[] -> pretty ddecltor
_:_ -> prettyPointers ptrs <+> pretty ddecltor
prettyPointers :: [Pointer] -> Doc
prettyPointers [] = ""
prettyPointers (x : xs) = pretty x <> prettyPointers xs
instance Pretty Pointer where
pretty (Pointer tyQual) = "*" <> hsep (map pretty tyQual)
instance Pretty i => Pretty (DirectDeclarator i) where
pretty decltor = case decltor of
DeclaratorRoot x -> pretty x
DeclaratorParens x -> "(" <> pretty x <> ")"
ArrayOrProto ddecltor aorp -> pretty ddecltor <> pretty aorp
instance Pretty i => Pretty (ArrayOrProto i) where
pretty aorp = case aorp of
Array x -> "[" <> pretty x <> "]"
Proto x -> "(" <> prettyParams x <> ")"
prettyParams :: (Pretty a) => [a] -> Doc
prettyParams xs = case xs of
[] -> ""
[x] -> pretty x
x : xs'@(_:_) -> pretty x <> "," <+> prettyParams xs'
instance Pretty i => Pretty (ArrayType i) where
pretty at = case at of
VariablySized -> "*"
SizedByInteger n -> pretty n
SizedByIdentifier s -> pretty s
Unsized -> ""
instance Pretty i => Pretty (ParameterDeclaration i) where
pretty (ParameterDeclaration declSpecs decltor) = case declSpecs of
[] -> decltorDoc
_:_ -> hsep (map pretty declSpecs) <+> decltorDoc
where
decltorDoc = case decltor of
IsDeclarator x -> pretty x
IsAbstractDeclarator x -> pretty x
instance Pretty i => Pretty (AbstractDeclarator i) where
pretty (AbstractDeclarator ptrs mbDecltor) = case (ptrs, mbDecltor) of
(_, Nothing) -> prettyPointers ptrs
([], Just x) -> pretty x
(_:_, Just x) -> prettyPointers ptrs <+> pretty x
instance Pretty i => Pretty (DirectAbstractDeclarator i) where
pretty ddecltor = case ddecltor of
AbstractDeclaratorParens x -> "(" <> pretty x <> ")"
ArrayOrProtoHere aop -> pretty aop
ArrayOrProtoThere ddecltor' aop -> pretty ddecltor' <> pretty aop
many1 :: CParser i m => m a -> m [a]
many1 p = (:) <$> p <*> many p
cppIdentParser :: (Monad m, CharParsing m) => Bool -> IdentifierStyle m -> m [Char]
cppIdentParser useCpp s = cidentParserWithNamespace
where
cidentParser = ((:) <$> _styleStart s <*> many (_styleLetter s) <?> _styleName s)
cidentParserWithNamespace =
if useCpp
then
try (concat <$> sequence [cidentParser, (string "::"), cidentParserWithNamespace]) <|>
cidentParser
else
cidentParser
identNoLex :: (TokenParsing m, Monad m, IsString s) => Bool -> IdentifierStyle m -> m s
identNoLex useCpp s = fmap fromString $ try $ do
name <- highlight (_styleHighlight s) (cppIdentParser useCpp s)
when (HashSet.member name (_styleReserved s)) $ unexpected $ "reserved " ++ _styleName s ++ " " ++ show name
return name
ident' :: (TokenParsing m, Monad m, IsString s) => Bool -> IdentifierStyle m -> m s
ident' useCpp s = fmap fromString $ token $ try $ do
name <- highlight (_styleHighlight s) (cppIdentParser useCpp s)
when (HashSet.member name (_styleReserved s)) $ unexpected $ "reserved " ++ _styleName s ++ " " ++ show name
return name