module PP.Rule
(
Rule(..)
, uniformize
, extend
, separate
, regexfy
, RuleSet
, ruleSet
, rule
, check
, FirstSet
, firstSet
, first
) where
import Control.Monad
import Data.Binary
import Data.Either
import Data.List
import qualified Data.Map.Strict as Map
import Data.Maybe
import PP.Lexer (IToken)
data Rule
= Rule String [Rule]
| NonTerm String
| Term IToken
| TermToken String
| Empty
| Concat [Rule]
| RegEx String
| RegExString String
deriving (Eq, Ord)
instance Show Rule where
show (Rule a xs) = a ++ " -> " ++ right xs
where
right [] = ""
right [x] = show x
right (x:xs) = show x ++ "," ++ right xs
show (NonTerm a) = a
show (Term c) = show c
show (TermToken t) = '%' : t
show Empty = "$"
show (Concat xs) = "Concat " ++ show xs
show (RegEx re) = '%' : show re
show (RegExString s) = show s
instance Binary Rule where
put (Rule a xs) = putWord8 0 >> put a >> put xs
put (NonTerm a) = putWord8 1 >> put a
put (Term c) = putWord8 2 >> put c
put (TermToken t) = putWord8 3 >> put t
put Empty = putWord8 4
put (Concat xs) = putWord8 5 >> put xs
put (RegEx re) = putWord8 6 >> put re
put (RegExString s) = putWord8 7 >> put s
get = do
tag <- getWord8
case tag of
0 -> liftM2 Rule get get
1 -> fmap NonTerm get
2 -> fmap Term get
3 -> fmap TermToken get
4 -> return Empty
5 -> fmap Concat get
6 -> fmap RegEx get
7 -> fmap RegExString get
uniformize :: [Rule] -> [Rule]
uniformize = sort . nub . concatMap (flatten . clean)
clean :: Rule -> Rule
clean (Rule s xs) = Rule s (cleaning xs)
where
cleaning [] = []
cleaning a@[Empty] = a
cleaning (Empty : xs) = cleaning xs
cleaning (Concat [] : xs) = cleaning xs
cleaning (Concat xs : ys) = cleaning (xs ++ ys)
cleaning (Rule s xs : ys) = Rule s (cleaning xs) : cleaning ys
cleaning (x : xs) = x : cleaning xs
flatten :: Rule -> [Rule]
flatten (Rule s xs) = Rule s (replace xs) : extract xs
where
replace [] = []
replace (Rule s _ : xs) = NonTerm s : replace xs
replace (x : xs) = x : replace xs
extract [] = []
extract (r@(Rule _ _) : xs) = flatten r ++ extract xs
extract (x : xs) = extract xs
extend :: [Rule] -> Either String [Rule]
extend xs = case start xs of
Left s -> Left $ "cannot extend, " ++ s
Right s -> Right $ Rule "__start" [NonTerm s, Empty] : xs
start :: [Rule] -> Either String String
start xs = let c = candidates xs in
case length c of
1 -> Right $ head c
_ -> Left $ "no start rule found (candidates: " ++ show c ++ ")"
candidates :: [Rule] -> [String]
candidates = map (fst . head) . filter (all snd) . grp . sortOn fst . evaluate
where
grp = groupBy (\(a, _) (b, _) -> a == b)
evaluate [] = []
evaluate (Rule s xs : ys) = (s, True) : evaluate xs ++ evaluate ys
evaluate (NonTerm s : xs) = (s, False) : evaluate xs
evaluate (_ : xs) = evaluate xs
type RuleSet = Map.Map String [[Rule]]
ruleSet :: [Rule] -> RuleSet
ruleSet xs = Map.fromList [(n, collect n xs) | n <- names xs]
where
names = nub . map (\(Rule s _) -> s)
collect n = map (\(Rule _ r) -> r) . filter (\(Rule s _) -> s == n)
rule :: String -> RuleSet -> [Rule]
rule name rs = case Map.lookup name rs of
Nothing -> []
Just xs -> map (Rule name) xs
check :: RuleSet -> ([String], [String])
check rs = (missing ++ leftRec, unused)
where
missing = ["missing non-terminal: " ++ n | n <- right, n `notElem` left]
leftRec = ["direct left-recusion: " ++ n | n <- left, hasLeftRec n]
unused = ["unused non-terminal: " ++ n
| n <- left
, n /= "__start"
, n `notElem` right]
hasLeftRec n = hasLeftRec' n /= []
hasLeftRec' n = [0 | (Rule _ (x:_)) <- rule n rs, hasLeftRec'' n x]
hasLeftRec'' n (NonTerm s) = n == s
hasLeftRec'' _ _ = False
left = Map.keys rs
right = nub $ concat [nonTerm xs | n <- left, (Rule _ xs) <- rule n rs]
nonTerm [] = []
nonTerm (NonTerm s : xs) = s : nonTerm xs
nonTerm (_:xs) = nonTerm xs
type FirstSet = Map.Map String [Rule]
firstSet :: RuleSet -> FirstSet
firstSet rs = Map.mapWithKey (\k _ -> find k rs) rs
where
find name rs = nub . sort $ concatMap compute $ noLeftRec $ rule name rs
noLeftRec = filter (\(Rule a (x:_)) -> case x of
NonTerm b -> a /= b
_ -> True)
compute (Rule _ [Empty]) = [Empty]
compute (Rule name (x:xs)) = case compute x of
[Empty] -> compute $ Rule name xs
a -> a
compute a@(Term _) = [a]
compute a@(TermToken _) = [a]
compute (NonTerm s) = find s rs
compute Empty = [Empty]
first :: Rule -> FirstSet -> [Rule]
first Empty _ = [Empty]
first a@(Term _) _ = [a]
first a@(TermToken _) _ = [a]
first (NonTerm s) fs = fromMaybe [Empty] (Map.lookup s fs)
first (Rule _ (x:_)) fs = first x fs
separate :: [Rule] -> ([Rule], [Rule])
separate rs = nonTermToToken (filter (not . hasRegex) rs, filter hasRegex rs)
where
hasRegex (Rule _ []) = False
hasRegex (Rule r (x:xs)) = hasRegex x || hasRegex (Rule r xs)
hasRegex (NonTerm _) = False
hasRegex (Term _) = False
hasRegex (TermToken _) = False
hasRegex Empty = False
hasRegex (Concat []) = False
hasRegex (Concat (x:xs)) = hasRegex x || hasRegex (Concat xs)
hasRegex (RegEx _) = True
hasRegex (RegExString _) = True
nonTermToToken :: ([Rule], [Rule]) -> ([Rule], [Rule])
nonTermToToken (rs, lrs) = (mappers rs, mappers lrs)
where
mappers = map (\(Rule r xs) -> Rule r $ map (replaceNonTerm tok) xs)
tok = map (\(Rule r _) -> r) lrs
replaceNonTerm [] r = r
replaceNonTerm (t:ts) r@(NonTerm nt) =
if nt == t then TermToken t else replaceNonTerm ts r
replaceNonTerm (_:ts) r = replaceNonTerm ts r
regexfy :: [Rule] -> [Rule]
regexfy lrs = concatMap replace lrs
where
replace (Rule r xs) = [Rule r $ bind [RegEx ""] $ concatMap replace xs]
replace (TermToken nt) = concatMap replace $ find nt
replace x = [x]
bind acc [Empty] = acc ++ [Empty]
bind (RegEx a:acc) (RegEx b:xs) = bind [RegEx $ a ++ b] xs
bind (RegEx a:acc) (RegExString b:xs) = bind [RegEx $ a ++ toRegex b] xs
find r = let (Rule _ xs:_) = rule r rs in init xs
toRegex s = '(' : concat [['[',c,']'] | c <- s] ++ ")"
rs = ruleSet lrs