{-# LANGUAGE TypeOperators, FlexibleInstances #-}
module GLL.Combinators.Interface (
term_parser, satisfy,
keychar, keyword, int_lit, float_lit, bool_lit, char_lit, string_lit, alt_id_lit, id_lit, token,
char,
(<**>),
(<||>),
(<$$>),
(<:=>),(<::=>),chooses,chooses_prec,
BNF, SymbExpr, AltExpr, AltExprs,
Token(..), Parseable(..), SubsumesToken(..), unlexTokens, unlexToken,
grammarOf, parse, printParseData, evaluatorWithParseData,
parseWithOptions, parseWithParseOptions, printParseDataWithOptions, evaluatorWithParseDataAndOptions, printGrammarData,
CombinatorOptions, CombinatorOption,
GLL.Combinators.Options.maximumErrors, throwErrors,
maximumPivot, maximumPivotAtNt,leftBiased,
fullSPPF, allNodes, packedNodesOnly, strictBinarisation,
GLL.Parser.noSelectTest,
parseWithOptionsAndError, parseWithParseOptionsAndError,
parseResult, parseResultWithOptions,ParseResult(..),
default_lexer,
lexer, lexerEither, LexerSettings(..), emptyLanguage,
mkNt,
(<$$), (**>), (<**),
optional, preferably, reluctantly, optionalWithDef,
multiple, multiple1, multipleSepBy, multipleSepBy1,
multipleSepBy2, within, parens, braces, brackets, angles,
foldr_multiple, foldr_multipleSepBy,
fromOpTable, opTableFromList, OpTable, Assoc(..), Fixity(..),
(<:=), (<::=),(<<<**>), (<**>>>), (<<**>), (<<<**), (**>>>), (<**>>),
longest_match,shortest_match,
many, many1, some, some1,
manySepBy, manySepBy1, manySepBy2,
someSepBy, someSepBy1,someSepBy2,
HasAlts(..), IsSymbExpr(..), IsAltExpr(..),
memo, newMemoTable, memClear, MemoTable, MemoRef, useMemoisation,
) where
import GLL.Combinators.Options
import GLL.Combinators.Visit.Join
import GLL.Combinators.Visit.Sem (emptyAncestors)
import GLL.Combinators.Memoisation
import GLL.Combinators.Lexer
import GLL.Types.Grammar
import GLL.Parser hiding (parse, parseWithOptions)
import qualified GLL.Parser as GLL
import Control.Monad (when)
import Control.Compose (OO(..))
import Control.Arrow
import qualified Data.Array as A
import qualified Data.IntMap as IM
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Text (pack)
import qualified Data.Text
import Data.IORef
import Data.Time.Clock
import System.IO.Unsafe
parse' :: (Show t, Parseable t, IsSymbExpr s) => ParseOptions ->
PCOptions -> s t a -> [t] -> (Grammar t, ParseResult t, Either String [a])
parse' popts opts p' input =
let SymbExpr (Nt start,vpa2,vpa3) = mkRule ("__Start" <:=> OO [id <$$> p'])
rules = vpa2 M.empty
as = vpa3 opts emptyAncestors sppf arr 0 m
grammar = (start, [ p | (_, alts) <- M.assocs rules, p <- alts ])
max_err = max_errors opts
parse_res = GLL.parseWithOptionsArray (popts++[GLL.maximumErrors max_err]) grammar arr
sppf = sppf_result parse_res
arr = mkInput input
m = length input
res_list = unsafePerformIO as
in (grammar, parse_res, if res_success parse_res && not (null res_list)
then Right $ res_list
else Left (error_message parse_res) )
printParseData :: (Parseable t, IsSymbExpr s, Show a) => s t a -> [t] -> IO ()
printParseData = printParseDataWithOptions [] []
printParseDataWithOptions :: (Parseable t, IsSymbExpr s, Show a) => ParseOptions -> CombinatorOptions -> s t a -> [t] -> IO ()
printParseDataWithOptions popts opts p' input =
let SymbExpr (Nt start,vpa2,vpa3) = mkRule ("__Start" <:=> OO [id <$$> p'])
rules = vpa2 M.empty
grammar = (start, [ p | (_, alts) <- M.assocs rules, p <- alts ])
parse_res = GLL.parseWithOptions (popts ++ [packedNodesOnly,strictBinarisation]) grammar input
arr = mkInput input
(_,m) = A.bounds arr
in do let (_,prods) = grammar
nt_set = S.fromList [ x | Prod x _ <- prods ]
putStrLn $ "#production: " ++ show (length prods)
putStrLn $ "#nonterminals: " ++ show (length nt_set)
putStrLn $ "largest nonterminal: " ++ show (
foldr (\x -> max (Data.Text.length x)) 0 nt_set)
startTime <- getCurrentTime
putStrLn $ "#tokens: " ++ (show m)
putStrLn $ "#successes: " ++ (show $ res_successes parse_res)
endTime <- getCurrentTime
putStrLn $ "recognition time: " ++ show (diffUTCTime endTime startTime)
startTime' <- getCurrentTime
putStrLn $ "#descriptors " ++ (show $ nr_descriptors parse_res)
putStrLn $ "#EPNs " ++ (show $ nr_packed_node_attempts parse_res)
endTime <- getCurrentTime
putStrLn $ "parse-data time: " ++ show (diffUTCTime endTime startTime')
putStrLn $ "total time: " ++ show (diffUTCTime endTime startTime)
evaluatorWithParseData :: (Parseable t, IsSymbExpr s, Show a) => s t a -> [t] -> [a]
evaluatorWithParseData = evaluatorWithParseDataAndOptions [] []
evaluatorWithParseDataAndOptions :: (Parseable t, IsSymbExpr s, Show a) => ParseOptions -> CombinatorOptions -> s t a -> [t] -> [a]
evaluatorWithParseDataAndOptions popts opts p' input =
let SymbExpr (Nt start,vpa2,vpa3) = mkRule ("__Start" <:=> OO [id <$$> p'])
rules = vpa2 M.empty
grammar = (start, [ p | (_, alts) <- M.assocs rules, p <- alts ])
parse_res = GLL.parseWithOptions (popts++[packedNodesOnly,strictBinarisation]) grammar input
arr = mkInput input
(_,m) = A.bounds arr
in unsafePerformIO $ do
let (_,prods) = grammar
nt_set = S.fromList [ x | Prod x _ <- prods ]
putStrLn $ "#production: " ++ show (length prods)
putStrLn $ "#nonterminals: " ++ show (length nt_set)
putStrLn $ "largest nonterminal: " ++ show (
foldr (\x -> max (Data.Text.length x)) 0 nt_set)
startTime <- getCurrentTime
putStrLn $ "#tokens: " ++ (show m)
putStrLn $ "#successes: " ++ (show $ res_successes parse_res)
endTime <- getCurrentTime
putStrLn $ "recognition time: " ++ show (diffUTCTime endTime startTime)
startTime' <- getCurrentTime
putStrLn $ "#descriptors " ++ (show $ nr_descriptors parse_res)
putStrLn $ "#EPNs " ++ (show $ nr_packed_node_attempts parse_res)
endTime <- getCurrentTime
putStrLn $ "parse-data time: " ++ show (diffUTCTime endTime startTime')
startTime' <- getCurrentTime
as <- vpa3 (runOptions opts) emptyAncestors (sppf_result parse_res) arr 0 m
when (not (null as)) (writeFile "/tmp/derivation" (show (head as)))
endTime <- getCurrentTime
putStrLn $ "semantic phase: " ++ show (diffUTCTime endTime startTime')
putStrLn $ "total time: " ++ show (diffUTCTime endTime startTime)
return as
grammarOf :: (Show t, Parseable t, IsSymbExpr s) => s t a -> Grammar t
grammarOf p = (\(f,_,_) -> f) (parse' defaultPOpts defaultOptions p [])
printGrammarData :: (Show t, Parseable t, IsSymbExpr s) => s t a -> IO ()
printGrammarData p = do
putStrLn $ "production: " ++ show (length prods)
putStrLn $ "nonterminals: " ++ show (length nt_set)
putStrLn $ "largest nonterminal: " ++ show (
foldr (\x -> max (Data.Text.length x)) 0 nt_set)
where (_,prods) = grammarOf p
nt_set = S.fromList [ x | Prod x _ <- prods ]
parse :: (Show t, Parseable t, IsSymbExpr s) => s t a -> [t] -> [a]
parse = parseWithOptions [throwErrors]
parseWithOptions :: (Show t, Parseable t, IsSymbExpr s) =>
CombinatorOptions -> s t a -> [t] -> [a]
parseWithOptions opts p ts = parseWithParseOptions defaultPOpts opts p ts
parseWithParseOptions :: (Show t, Parseable t, IsSymbExpr s) =>
ParseOptions -> CombinatorOptions -> s t a -> [t] -> [a]
parseWithParseOptions pcopts opts p ts =
case parseWithParseOptionsAndError pcopts opts p ts of
Left str | throw_errors opts' -> error str
| otherwise -> []
Right as -> as
where opts' = runOptions opts
parseWithOptionsAndError :: (Show t, Parseable t, IsSymbExpr s) =>
CombinatorOptions -> s t a -> [t] -> Either String [a]
parseWithOptionsAndError opts p = parseWithParseOptionsAndError defaultPOpts opts p
parseWithParseOptionsAndError :: (Show t, Parseable t, IsSymbExpr s) =>
ParseOptions -> CombinatorOptions -> s t a -> [t] -> Either String [a]
parseWithParseOptionsAndError popts opts p = (\(_,_,t) -> t) . parse' defaultPOpts (runOptions opts) p
parseResult :: (Show t, Parseable t, IsSymbExpr s) => s t a -> [t] -> ParseResult t
parseResult = parseResultWithOptions [] []
parseResultWithOptions :: (Show t, Parseable t, IsSymbExpr s) =>
ParseOptions -> CombinatorOptions -> s t a -> [t] -> ParseResult t
parseResultWithOptions popts opts p str =
(\(_,s,_) -> s) $ parse' popts (runOptions opts) p str
defaultPOpts = [strictBinarisation, packedNodesOnly]
infixl 2 <:=>
(<:=>) :: (Show t, Ord t, HasAlts b) => String -> b t a -> SymbExpr t a
x <:=> altPs = mkNtRule False False x altPs
infixl 2 <::=>
(<::=>) :: (Show t, Ord t, HasAlts b) => String -> b t a -> SymbExpr t a
x <::=> altPs = mkNtRule True False x altPs
chooses :: (Show t, Ord t, IsAltExpr alt) => String -> [alt t a] -> SymbExpr t a
chooses p alts | null alts = error "chooses cannot be given an empty list of alternatives"
| otherwise = (<::=>) p (OO (map toAlt alts))
chooses_prec :: (Show t, Ord t, IsAltExpr alt) => String -> [alt t a] -> SymbExpr t a
chooses_prec p alts | null alts = error "chooses cannot be given an empty list of alternatives"
| otherwise = (<::=) p (OO (map toAlt alts))
infixl 4 <$$>
(<$$>) :: (Show t, Ord t, IsSymbExpr s) => (a -> b) -> s t a -> AltExpr t b
f <$$> p' = join_apply f p'
infixl 4 <**>,<<<**>,<**>>>
(<**>) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) =>
i t (a -> b) -> s t a -> AltExpr t b
pl' <**> pr' = join_seq [] pl' pr'
(<**>>>) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) =>
i t (a -> b) -> s t a -> AltExpr t b
pl' <**>>> pr' = join_seq [maximumPivot] pl' pr'
(<<<**>) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) =>
i t (a -> b) -> s t a -> AltExpr t b
pl' <<<**> pr' = join_seq [minimumPivot] pl' pr'
infixr 3 <||>
(<||>) :: (Show t, Ord t, IsAltExpr i, HasAlts b) => i t a -> b t a -> AltExprs t a
l' <||> r' = let l = toAlt l'
r = altsOf r'
in OO (l : r)
longest_match :: (Show t, Ord t, IsAltExpr alt) => alt t a -> AltExpr t a
longest_match isalt = AltExpr (v1,v2,\opts -> v3 (maximumPivot opts))
where AltExpr (v1,v2,v3) = toAlt isalt
shortest_match :: (Show t, Ord t, IsAltExpr alt) => alt t a -> AltExpr t a
shortest_match isalt = AltExpr (v1,v2,\opts -> v3 (minimumPivot opts))
where AltExpr (v1,v2,v3) = toAlt isalt
term_parser :: t -> (t -> a) -> SymbExpr t a
term_parser t f = SymbExpr (Term t, id,\_ _ _ arr l _ -> return [f (arr A.! l)])
char :: Char -> SymbExpr Char Char
char c = term_parser c id
keychar :: SubsumesToken t => Char -> SymbExpr t Char
keychar c = term_parser (upcast (Char c)) (const c)
keyword :: SubsumesToken t => String -> SymbExpr t String
keyword k = term_parser (upcast (Keyword k)) (const k)
int_lit :: SubsumesToken t => SymbExpr t Int
int_lit = term_parser (upcast (IntLit Nothing)) (unwrap . downcast)
where unwrap (Just (IntLit (Just i))) = i
unwrap _ = error "int_lit: downcast, or token without lexeme"
float_lit :: SubsumesToken t => SymbExpr t Double
float_lit = term_parser (upcast (FloatLit Nothing)) (unwrap . downcast)
where unwrap (Just (FloatLit (Just i))) = i
unwrap _ = error "float_lit: downcast, or token without lexeme"
bool_lit :: SubsumesToken t => SymbExpr t Bool
bool_lit = term_parser (upcast (BoolLit Nothing)) (unwrap . downcast)
where unwrap (Just (BoolLit (Just b))) = b
unwrap _ = error "bool_lit: downcast, or token without lexeme"
char_lit :: SubsumesToken t => SymbExpr t Char
char_lit = term_parser (upcast (CharLit Nothing)) (unwrap . downcast)
where unwrap (Just (CharLit (Just s))) = s
unwrap _ = error "char_lit: downcast, or token without lexeme"
string_lit :: SubsumesToken t => SymbExpr t String
string_lit = term_parser (upcast (StringLit Nothing)) (unwrap . downcast)
where unwrap (Just (StringLit (Just i))) = i
unwrap _ = error "string_lit: downcast, or token without lexeme"
id_lit :: SubsumesToken t => SymbExpr t String
id_lit = term_parser (upcast (IDLit Nothing)) (unwrap . downcast)
where unwrap (Just (IDLit (Just i))) = i
unwrap _ = error "id_lit: downcast, or token without lexeme"
alt_id_lit :: SubsumesToken t => SymbExpr t String
alt_id_lit = term_parser (upcast (AltIDLit Nothing)) (unwrap . downcast)
where unwrap (Just (AltIDLit (Just i))) = i
unwrap _ = error "alt_id_lit: downcast, or token without lexeme"
token :: SubsumesToken t => String -> SymbExpr t String
token name = term_parser (upcast (Token name Nothing)) (unwrap . downcast)
where unwrap (Just (Token name' (Just i))) | name == name' = i
unwrap _ = error "tokenT: downcast, or token without lexeme"
epsilon :: (Show t, Ord t) => AltExpr t ()
epsilon = AltExpr ([], M.insert (pack x) [Prod (pack x) []],\_ _ _ _ _ l r ->
if l == r then return [(l,())] else return [] )
where x = "__eps"
satisfy :: (Show t, Ord t ) => a -> AltExpr t a
satisfy a = a <$$ epsilon
memo :: (Ord t, Show t, IsSymbExpr s) => MemoRef [a] -> s t a -> SymbExpr t a
memo ref p' = let SymbExpr (sym,rules,sem) = toSymb p'
lhs_sem opts ctx sppf arr l r
| not (do_memo opts) = sem opts ctx sppf arr l r
| otherwise = do
tab <- readIORef ref
case memLookup (l,r) tab of
Just as -> return as
Nothing -> do as <- sem opts ctx sppf arr l r
modifyIORef ref (memInsert (l,r) as)
return as
in SymbExpr (sym, rules, lhs_sem)
mkNt :: (Show t, Ord t, IsSymbExpr s) => s t a -> String -> String
mkNt p str = let SymbExpr (myx,_,_) = mkRule p
in "_(" ++ show myx ++ ")" ++ str
(.$.) :: (Show t, Ord t, IsAltExpr i) => (a -> b) -> i t a -> AltExpr t b
f .$. i = let AltExpr (s,r,sem) = toAlt i
in AltExpr (s,r,\opts slot ctx sppf arr l r ->
do as <- sem opts slot ctx sppf arr l r
return $ map (id *** f) as )
(<$$) :: (Show t, Ord t, IsSymbExpr s) => b -> s t a -> AltExpr t b
f <$$ p = const f <$$> p
infixl 4 <$$
infixl 4 **>, <<**>, **>>>
(**>) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) => i t a -> s t b -> AltExpr t b
l **> r = flip const .$. l <**> r
(**>>>) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) => i t a -> s t b -> AltExpr t b
l **>>> r = flip const .$. l <**>>> r
(<<**>) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) => i t a -> s t b -> AltExpr t b
l <<**>r = flip const .$. l <<<**> r
infixl 4 <**, <<<**, <**>>
(<**) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) => i t a -> s t b -> AltExpr t a
l <** r = const .$. l <**> r
(<**>>) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) => i t a -> s t b -> AltExpr t a
l <**>> r = const .$. l <**>>> r
(<<<**) :: (Show t, Ord t, IsAltExpr i, IsSymbExpr s) => i t a -> s t b -> AltExpr t a
l <<<** r = const .$. l <<<**> r
x <::= altPs = mkNtRule True True x altPs
infixl 2 <::=
x <:= altPs = mkNtRule False True x altPs
infixl 2 <:=
many :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t [a]
many = multiple_ (<<<**>)
many1 :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t [a]
many1 = multiple1_ (<<<**>)
some :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t [a]
some = multiple_ (<**>>>)
some1 :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t [a]
some1 = multiple1_ (<**>>>)
multiple :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t [a]
multiple = multiple_ (<**>)
multiple1 :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t [a]
multiple1 = multiple1_ (<**>)
multiple_ disa p = let fresh = mkNt p "*"
in fresh <::=> ((:) <$$> p) `disa` (multiple_ disa p) <||> satisfy []
multiple1_ disa p = let fresh = mkNt p "+"
in fresh <::=> ((:) <$$> p) `disa` (multiple_ disa p)
manySepBy :: (Show t, Ord t, IsSymbExpr s, IsSymbExpr s2, IsAltExpr s2) =>
s t a -> s2 t b -> SymbExpr t [a]
manySepBy = sepBy many
manySepBy1 :: (Show t, Ord t, IsSymbExpr s, IsSymbExpr s2, IsAltExpr s2) =>
s t a -> s2 t b -> SymbExpr t [a]
manySepBy1 = sepBy1 many
someSepBy :: (Show t, Ord t, IsSymbExpr s, IsSymbExpr s2, IsAltExpr s2) =>
s t a -> s2 t b -> SymbExpr t [a]
someSepBy = sepBy some
someSepBy1 :: (Show t, Ord t, IsSymbExpr s, IsSymbExpr s2, IsAltExpr s2) =>
s t a -> s2 t b -> SymbExpr t [a]
someSepBy1 = sepBy1 some
multipleSepBy :: (Show t, Ord t, IsSymbExpr s, IsSymbExpr s2, IsAltExpr s2) =>
s t a -> s2 t b -> SymbExpr t [a]
multipleSepBy = sepBy multiple
multipleSepBy1 :: (Show t, Ord t, IsSymbExpr s, IsSymbExpr s2, IsAltExpr s2) =>
s t a -> s2 t b -> SymbExpr t [a]
multipleSepBy1 = sepBy1 multiple
sepBy :: (Show t, Ord t, IsSymbExpr s1, IsSymbExpr s2, IsAltExpr s2) =>
(AltExpr t a -> SymbExpr t [a]) -> s1 t a -> s2 t b -> SymbExpr t [a]
sepBy mult p c = mkRule $ satisfy [] <||> (:) <$$> p <**> mult (c **> p)
sepBy1 :: (Show t, Ord t, IsSymbExpr s1, IsSymbExpr s2, IsAltExpr s2) =>
(AltExpr t a -> SymbExpr t [a]) -> s1 t a -> s2 t b -> SymbExpr t [a]
sepBy1 mult p c = mkRule $ (:) <$$> p <**> mult (c **> p)
multipleSepBy2 p s = mkRule $
(:) <$$> p <** s <**> multipleSepBy1 p s
someSepBy2 p s = mkRule $
(:) <$$> p <** s <**> someSepBy1 p s
manySepBy2 p s = mkRule $
(:) <$$> p <** s <**> manySepBy1 p s
optional :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t (Maybe a)
optional p = fresh
<:=> Just <$$> p
<||> satisfy Nothing
where fresh = mkNt p "?"
preferably :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t (Maybe a)
preferably p = fresh
<:= Just <$$> p
<||> satisfy Nothing
where fresh = mkNt p "?"
reluctantly :: (Show t, Ord t, IsSymbExpr s) => s t a -> SymbExpr t (Maybe a)
reluctantly p = fresh
<:= satisfy Nothing
<||> Just <$$> p
where fresh = mkNt p "?"
optionalWithDef :: (Show t, Ord t, IsSymbExpr s) => s t a -> a -> SymbExpr t a
optionalWithDef p def = mkNt p "?" <:=> id <$$> p <||> satisfy def
within :: (Show t, Ord t, IsSymbExpr s) => BNF t a -> s t b -> BNF t c -> BNF t b
within l p r = mkRule $ l **> toSymb p <** r
parens p = within (keychar '(') p (keychar ')')
braces p = within (keychar '{') p (keychar '}')
brackets p = within (keychar '[') p (keychar ']')
angles p = within (keychar '<') p (keychar '>')
quotes p = within (keychar '\'') p (keychar '\'')
dquotes p = within (keychar '"') p (keychar '"')
foldr_multiple :: (IsSymbExpr s, Parseable t) => s t (a -> a) -> a -> BNF t a
foldr_multiple comb def = mkNt comb "-foldr"
<::=> satisfy def
<||> ($) <$$> comb <<<**> foldr_multiple comb def
foldr_multipleSepBy :: (IsSymbExpr s, Parseable t) => s t (a -> a) -> s t b -> a -> BNF t a
foldr_multipleSepBy comb sep def = mkNt comb "-foldr"
<::=> satisfy def
<||> ($ def) <$$> comb
<||> ($) <$$> comb <** sep <<<**> foldr_multipleSepBy comb sep def
type OpTable e = M.Map Double [(String, Fixity e)]
data Fixity e = Prefix (String -> e -> e) | Infix (e -> String -> e -> e) Assoc
data Assoc = LAssoc | RAssoc | NA
opTableFromList :: [(Double, [(String, Fixity e)])] -> OpTable e
opTableFromList = M.fromList
fromOpTable :: (SubsumesToken t, Parseable t, IsSymbExpr s) => String -> OpTable e -> s t e -> BNF t e
fromOpTable nt ops rec = chooses_prec (nt ++ "-infix-prefix-exprs") $
[ mkNterm ix row
| (ix, row) <- zip [1..] (M.elems ops)
]
where mkNterm ix ops = chooses (ntName ix) $
[ mkAlt op fix | (op, fix) <- ops ]
where mkAlt op fix = case fix of
Prefix f -> f <$$> keyword op <**> rec
Infix f assoc -> case assoc of
LAssoc -> f <$$> rec <**> keyword op <**>>> rec
RAssoc -> f <$$> rec <**> keyword op <<<**> rec
_ -> f <$$> rec <**> keyword op <**> rec
ntName i = show i ++ nt ++ "-op-row"