{-# LANGUAGE TypeFamilies, FlexibleContexts, BangPatterns, ScopedTypeVariables
, OverloadedStrings, StandaloneDeriving, UndecidableInstances #-}
module Text.ANTLR.Allstar.ParserGenerator
( GrammarSymbol(..), ATNEnv(..)
, AST(..), ATNState(..), ATNEdge(..)
, ATNEdgeLabel(..), Label(..)
, parse, Tokenizer(..)
) where
import Data.List
import qualified Data.Set as DS
import Debug.Trace as D
import Data.Maybe (fromJust)
import Text.ANTLR.Parser (HasEOF(..), CanParse(..) )
import Text.ANTLR.Grammar (Ref(..))
import Text.ANTLR.Pretty (pshow', Prettify(..), pStr)
data GrammarSymbol nt t = NT nt | T t | EPS deriving (Eq, Ord, Show)
instance (Prettify nt, Prettify t) => Prettify (GrammarSymbol nt t) where
prettify (NT nt) = pStr "NT " >> prettify nt
prettify (T t) = pStr "T " >> prettify t
data ATNState nt =
Init nt
| Middle nt Int Int
| Final nt
deriving (Eq, Ord, Show)
instance (Prettify nt) => Prettify (ATNState nt) where
prettify (Init nt) = pStr "Init " >> prettify nt
prettify (Middle nt i0 i1) = pStr "Middle " >> prettify nt >> pStr " " >> prettify i0 >> pStr " " >> prettify i1
prettify (Final nt) = pStr "Final " >> prettify nt
type ATNEdge nt t = (ATNState nt, ATNEdgeLabel nt t, ATNState nt)
data ATNEdgeLabel nt t =
GS (GrammarSymbol nt t)
| PRED Bool
deriving (Eq, Ord, Show)
instance (Prettify nt, Prettify t) => Prettify (ATNEdgeLabel nt t) where
prettify (GS gs) = prettify gs
prettify (PRED b) = pStr "PRED " >> prettify b
type ATNEnv nt t = DS.Set (ATNEdge nt t)
isInit :: ATNState nt -> Bool
isInit (Init nt) = True
isInit _ = False
outgoingEdge :: (Eq nt, Prettify nt) => ATNState nt -> ATNEnv nt t -> ATNEdge nt t
outgoingEdge p atnEnv = let edges = outgoingEdges p atnEnv
in case edges of
[edge] -> edge
_ -> error "Multiple edges found"
outgoingEdges :: (Eq nt, Prettify nt) => ATNState nt -> ATNEnv nt t -> [ATNEdge nt t]
outgoingEdges p atnEnv = DS.toList (DS.filter (\(p',_,_) -> p' == p) atnEnv)
type ATNStack nt = [ATNState nt]
type ATNConfig nt = (ATNState nt, Int, ATNStack nt)
type DFA nt t = [DFAEdge nt t]
type DFAEdge nt t = (DFAState nt, t, DFAState nt)
data DFAState nt = Dinit [ATNConfig nt] | D [ATNConfig nt] | F Int | Derror deriving (Eq, Ord, Show)
type DFAEnv nt t = [(GrammarSymbol nt t, DFA nt t)]
instance (Prettify nt) => Prettify (DFAState nt) where
prettify (Dinit cfgs) = pStr "Dinit " >> prettify cfgs
prettify (D cfgs) = pStr "D " >> prettify cfgs
prettify (F i) = pStr "F " >> prettify i
prettify (Derror) = pStr "Derror"
getLabel :: (Ref v, HasEOF (Sym v), Prettify v) => v -> (StripEOF (Sym v))
getLabel v = (fromJust . stripEOF . getSymbol) v
type Label tok = StripEOF (Sym tok)
data AST nt tok = Node nt [GrammarSymbol nt (Label tok)] [AST nt tok] | Leaf tok
deriving instance (Eq nt, Eq tok, Eq (Label tok)) => Eq (AST nt tok)
deriving instance (Show nt, Show tok, Show (Label tok)) => Show (AST nt tok)
instance (Prettify nt, Prettify tok, Prettify (Label tok)) => Prettify (AST nt tok) where
prettify (Node nt ruleFired asts) = do
pStr "Node "
prettify nt
pStr " <"
prettify ruleFired
pStr "> "
prettify asts
prettify (Leaf tok) = prettify tok
emptyEnv = []
emptyStack = []
emptyDerivation = []
getConflictSetsPerLoc :: (Eq nt, Ord nt) => DFAState nt -> [[ATNConfig nt]]
getConflictSetsPerLoc q =
case q of
F _ -> error "final state passed to getConflictSetsPerLoc"
Derror -> error "error state passed to getConflictSetsPerLoc"
D configs -> let sortedConfigs = sortOn (\(p, i, gamma) -> (p, gamma)) configs
in groupBy (\(p, i, gamma) (p', j, gamma') ->
p == p' && i /= j && gamma == gamma')
sortedConfigs
getProdSetsPerState :: (Eq nt, Ord nt) => DFAState nt -> [[ATNConfig nt]]
getProdSetsPerState q =
case q of
F _ -> error "final state passed to getProdSetsPerState"
Derror -> error "error state passed to getProdSetsPerState"
D configs -> let sortedConfigs = sortOn (\(p, i, gamma) -> (p, gamma)) configs
in groupBy (\(p, _, _) (p', _, _) -> p == p')
sortedConfigs
dfaTrans :: (Eq nt, Eq t) => DFAState nt -> t -> DFA nt t -> Maybe (DFAEdge nt t)
dfaTrans d t dfa = find (\(d1, label, _) -> d1 == d && label == t) dfa
findInitialState :: DFA nt t -> Maybe (DFAState nt)
findInitialState dfa =
let isInit d = case d of
Dinit _ -> True
_ -> False
in case find (\(d1, _, _) -> isInit d1) dfa of
Just (d1, _, _) -> Just d1
Nothing -> Nothing
allEqual :: Eq a => [a] -> Bool
allEqual [] = True
allEqual (x : xs) = all (== x) xs
bind :: Eq a => a -> b -> [(a, b)] -> [(a, b)]
bind k v [] = [(k, v)]
bind k v ((k', v') : al') = if k == k' then (k, v) : al' else (k', v') : bind k v al'
type Tokenizer chr tok = [chr] -> [(tok, [chr])]
getMe :: (Ref tok, HasEOF (Sym tok), Prettify tok) => AST nt tok -> GrammarSymbol nt (Label tok)
getMe (Leaf tok) = T $ getLabel tok
getMe (Node nt _ _) = NT nt
parse :: forall chr tok nt. ( CanParse nt tok, Prettify chr ) =>
Tokenizer chr tok ->
[chr] -> GrammarSymbol nt (Label tok) -> ATNEnv nt (Label tok) -> Bool -> Either String (AST nt tok)
parse tokenizer input startSym atnEnv useCache =
let
parseLoop
:: (tok, [chr])
-> ATNState nt
-> [ATNState nt]
-> DFAEnv nt (Label tok)
-> [AST nt tok]
-> [[AST nt tok]]
-> Either String (AST nt tok)
parseLoop (t, chrs) currState stack dfaEnv subtrees astStack =
case (currState, startSym) of
(Final c, NT c') ->
if c == c' then
Right (Node c (map getMe subtrees) subtrees)
else
case (stack, astStack) of
(q : stack', leftSiblings : astStack') ->
parseLoop (t, chrs) q stack' dfaEnv (leftSiblings ++ [Node c (map getMe subtrees) subtrees]) astStack'
_ -> error ("Reached a final ATN state, but parse is incomplete " ++
"and there's no ATN state to return to")
(_, _) ->
case (outgoingEdge currState atnEnv) of
(p, term, q) ->
case (term, t) of
(GS (T b), t) -> if b == getLabel t then
case tokenizer chrs of
[] -> parseLoop (t, chrs) q stack dfaEnv (subtrees ++ [Leaf t]) astStack
((t', chrs'):ts') -> parseLoop (t', chrs') q stack dfaEnv (subtrees ++ [Leaf t]) astStack
else
Left ("remaining input: " ++ pshow' (t, chrs))
(GS (NT b), _) -> let stack' = q : stack
in case adaptivePredict (NT b) (t, chrs) stack' dfaEnv of
Nothing -> Left ("Couldn't find a path through ATN " ++ pshow' b ++
" with input " ++ pshow' (t, chrs))
Just (i, dfaEnv') -> parseLoop (t, chrs) (Middle b i 0) stack' dfaEnv' [] (subtrees : astStack)
(GS EPS, _) -> parseLoop (t, chrs) q stack dfaEnv subtrees astStack
(PRED _, _) -> error "not implemented"
initialDfaEnv = DS.toList (DS.foldr (\(p,_,_) ntNames ->
case p of Init ntName -> DS.insert (NT ntName, []) ntNames
_ -> ntNames)
DS.empty
atnEnv)
tokens = tokenizer input
in case tokens of
[] -> Left $ "Tokenizer returned nothing on input: " ++ pshow' input
((t, chrs):ts) ->
case startSym of
(NT c) -> case adaptivePredict startSym (t, chrs) emptyStack initialDfaEnv of
Nothing ->
Left ("Couldn't find a path through ATN " ++ pshow' c ++ " with input " ++ pshow' input)
Just (iStart, initialDfaEnv') ->
parseLoop (t, chrs) (Middle c iStart 0) emptyStack initialDfaEnv' [] emptyStack
_ -> Left "Start symbol must be a nonterminal"
where
adaptivePredict sym (t, chrs) stack dfaEnv =
case lookup sym dfaEnv of
Nothing -> error ("No DFA found for " ++ pshow' sym)
Just dfa -> let d0 = case findInitialState dfa of
Just d0 -> d0
Nothing -> startState sym emptyStack
in sllPredict sym (t, chrs) d0 stack dfaEnv
startState sym stack =
case sym of
NT ntName ->
let initEdges = outgoingEdges (Init ntName) atnEnv
loopOverAtnPaths initEdges =
case initEdges of
[] -> []
(Init _, GS EPS, q@(Middle _ i _)) : es ->
(closure [] (q, i, stack)) ++ loopOverAtnPaths es
_ -> error "ATN path must begin with an epsilon edge from Init to Choice"
in D (loopOverAtnPaths initEdges)
_ -> error "Symbol passed to startState must be a nonterminal"
closure busy currConfig =
if elem currConfig busy then
[]
else
let busy' = currConfig : busy
(p, i, gamma) = currConfig
pEdges = outgoingEdges p atnEnv
loopOverEdges es =
case es of
[] -> []
(_, GS (NT ntName), q) : es' ->
closure busy' (Init ntName, i, q : gamma) ++
loopOverEdges es'
(_, GS EPS, q) : es' ->
closure busy' (q, i, gamma) ++
loopOverEdges es'
(_, GS (T _), _) : es' ->
loopOverEdges es'
in case (p, gamma) of
(Final _, []) -> [currConfig]
(Final _, q : gamma') -> currConfig : closure busy' (q, i, gamma')
_ -> currConfig : loopOverEdges pEdges
sllPredict sym (t, chrs) d0 stack initialDfaEnv =
let predictionLoop d (t, chrs) dfaEnv =
let (d', dfaEnv') =
if useCache then
case lookup sym dfaEnv of
Nothing -> error ("No DFA found for nonterminal " ++ pshow' sym ++ pshow' dfaEnv)
Just dfa ->
case dfaTrans d (getLabel t) dfa of
Just (_, _, d2) -> (d2, dfaEnv)
Nothing -> let d' = target d t
in (d', bind sym ((d, getLabel t, d') : dfa) dfaEnv)
else
(target d t, dfaEnv)
in case d' of
Derror -> Nothing
F i -> Just (i, dfaEnv')
D atnConfigs ->
let conflictSets = getConflictSetsPerLoc d'
prodSets = getProdSetsPerState d'
stackSensitive =
any (\cSet -> length cSet > 1) conflictSets &&
not (any (\pSet -> length pSet == 1) prodSets)
in if stackSensitive then
Just (llPredict sym (t, chrs) stack, initialDfaEnv)
else
predictionLoop d' (case tokenizer chrs of
[] -> (t, chrs)
((t', chrs'):_) -> (t', chrs')) dfaEnv'
in predictionLoop d0 (t, chrs) initialDfaEnv
llPredict sym (t, chrs) stack =
let d0 = startState sym stack
predictionLoop d (t, chrs) =
let mv = move d (getLabel t)
d' = D (concat (map (closure []) mv))
in case d' of
D [] -> error ("empty DFA state in llPredict")
D atnConfigs ->
case nub (map (\(_, j, _) -> j) atnConfigs) of
[i] -> i
_ ->
let altSets = getConflictSetsPerLoc d'
in case altSets of
[] -> error ("No alt sets found")
a : as ->
if allEqual altSets && length a > 1 then
minimum (map (\(_, i, _) -> i) a)
else
predictionLoop d' (case tokenizer chrs of
[] -> error "Tokenizer can't do it."
((t', chrs'):_) -> (t', chrs'))
in predictionLoop d0 (t, chrs)
target d a =
let mv = move d (getLabel a)
d' = D (concat (map (closure []) mv))
in case d' of
D [] -> Derror
D atnConfigs ->
case nub (map (\(_, j, _) -> j) atnConfigs) of
[i] -> F i
_ -> d'
move q t =
case q of
D atnConfigs ->
let qsForP (p, i, gamma) =
let pOutgoingEdges = outgoingEdges p atnEnv
in foldr (\(p', label, q) acc ->
case label of
GS (T a) -> if t == a then
(q, i, gamma) : acc
else
acc
_ -> acc)
[]
pOutgoingEdges
in concat (map qsForP atnConfigs)