{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE Safe #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Kleene.Internal.RE (
RE (..),
empty,
eps,
char,
charRange,
anyChar,
appends,
unions,
star,
string,
nullable,
derivate,
transitionMap,
leadingChars,
equivalent,
generate,
isEmpty,
nullableProof,
) where
import Prelude ()
import Prelude.Compat
import Data.Semigroup (Semigroup (..))
import Control.Applicative (liftA2)
import Data.Foldable (toList)
import Data.List (foldl')
import Data.Map (Map)
import Data.RangeSet.Map (RSet)
import Data.Set (Set)
import Data.String (IsString (..))
import qualified Data.Function.Step.Discrete.Closed as SF
import qualified Data.Map as Map
import qualified Data.RangeSet.Map as RSet
import qualified Data.Set as Set
import qualified Test.QuickCheck as QC
import qualified Test.QuickCheck.Gen as QC (unGen)
import qualified Test.QuickCheck.Random as QC (mkQCGen)
import qualified Kleene.Classes as C
import qualified Kleene.Internal.Partition as P
import Kleene.Internal.Pretty
data RE c
= REChars (RSet c)
| REAppend [RE c]
| REUnion (RSet c) (Set (RE c))
| REStar (RE c)
deriving (Eq, Ord, Show)
empty :: RE c
empty = REChars RSet.empty
everything :: Bounded c => RE c
everything = REStar (REChars RSet.full)
eps :: RE c
eps = REAppend []
char :: c -> RE c
char = REChars . RSet.singleton
charRange :: Ord c => c -> c -> RE c
charRange c c' = REChars $ RSet.singletonRange (c, c')
anyChar :: Bounded c => RE c
anyChar = REChars RSet.full
appends :: Eq c => [RE c] -> RE c
appends rs0
| elem empty rs1 = empty
| otherwise = case rs1 of
[r] -> r
rs -> REAppend rs
where
rs1 = concatMap f rs0
f (REAppend rs) = rs
f r = [r]
unions :: (Ord c, Enum c, Bounded c) => [RE c] -> RE c
unions = uncurry mk . foldMap f where
mk cs rss
| Set.null rss = REChars cs
| Set.member everything rss = everything
| RSet.null cs = case Set.toList rss of
[] -> empty
[r] -> r
_ -> REUnion cs rss
| otherwise = REUnion cs rss
f (REUnion cs rs) = (cs, rs)
f (REChars cs) = (cs, Set.empty)
f r = (mempty, Set.singleton r)
star :: Ord c => RE c -> RE c
star r = case r of
REStar _ -> r
REAppend [] -> eps
REChars cs | RSet.null cs -> eps
REUnion cs rs | Set.member eps rs -> case Set.toList rs' of
[] -> star (REChars cs)
[r'] | RSet.null cs -> star r'
_ -> REStar (REUnion cs rs')
where
rs' = Set.delete eps rs
_ -> REStar r
string :: [c] -> RE c
string [] = eps
string [c] = REChars (RSet.singleton c)
string cs = REAppend $ map (REChars . RSet.singleton) cs
instance (Ord c, Enum c, Bounded c) => C.Kleene (RE c) where
empty = empty
eps = eps
appends = appends
unions = unions
star = star
instance (Ord c, Enum c, Bounded c) => C.CharKleene c (RE c) where
char = char
instance (Ord c, Enum c, Bounded c) => C.FiniteKleene c (RE c) where
everything = everything
charRange = charRange
fromRSet = REChars
anyChar = anyChar
(\/) :: (Ord c, Enum c, Bounded c) => RE c -> RE c -> RE c
r \/ r' = unions [r, r']
nullable :: RE c -> Bool
nullable (REChars _) = False
nullable (REAppend rs) = all nullable rs
nullable (REUnion _cs rs) = any nullable rs
nullable (REStar _) = True
derivate :: (Ord c, Enum c, Bounded c) => c -> RE c -> RE c
derivate c (REChars cs) = derivateChars c cs
derivate c (REUnion cs rs) = unions $ derivateChars c cs : [ derivate c r | r <- toList rs]
derivate c (REAppend rs) = derivateAppend c rs
derivate c rs@(REStar r) = derivate c r <> rs
derivateAppend :: (Ord c, Enum c, Bounded c) => c -> [RE c] -> RE c
derivateAppend _ [] = empty
derivateAppend c [r] = derivate c r
derivateAppend c (r:rs)
| nullable r = unions [r' <> appends rs, rs']
| otherwise = r' <> appends rs
where
r' = derivate c r
rs' = derivateAppend c rs
derivateChars :: Ord c => c -> RSet c -> RE c
derivateChars c cs
| c `RSet.member` cs = eps
| otherwise = empty
instance (Ord c, Enum c, Bounded c) => C.Derivate c (RE c) where
nullable = nullable
derivate = derivate
instance (Ord c, Enum c, Bounded c) => C.Match c (RE c) where
match r = nullable . foldl' (flip derivate) r
nullableProof :: forall c. (Ord c, Enum c, Bounded c) => RE c -> Maybe (RE c)
nullableProof (REChars _) = Nothing
nullableProof (REAppend []) = Just empty
nullableProof (REAppend xs)
| Just ys <- traverse (\x -> (,) x <$> nullableProof x) xs = Just (go ys)
| otherwise = Nothing
where
go :: [(RE c, RE c)] -> RE c
go rs = unions $ map appends $ tail $ traverse (\(r,r') -> [r,r']) rs
nullableProof (REUnion cs rs)
| any nullable rs = Just $ REUnion cs $ Set.map (\r -> maybe r id $ nullableProof r) rs
| otherwise = Nothing
nullableProof (REStar r)
| Just r' <- nullableProof r = Just (r' <> REStar r')
| otherwise = Just (r <> REStar r)
isEmpty :: RE c -> Bool
isEmpty (REChars rs) = RSet.null rs
isEmpty _ = False
transitionMap
:: forall c. (Ord c, Enum c, Bounded c)
=> RE c
-> Map (RE c) (SF.SF c (RE c))
transitionMap re = go Map.empty [re] where
go :: Map (RE c) (SF.SF c (RE c))
-> [RE c]
-> Map (RE c) (SF.SF c (RE c))
go !acc [] = acc
go acc (r : rs)
| r `Map.member` acc = go acc rs
| otherwise = go (Map.insert r pm acc) (SF.values pm ++ rs)
where
pm = P.toSF (\c -> derivate c r) (leadingChars r)
instance (Ord c, Enum c, Bounded c) => C.TransitionMap c (RE c) where
transitionMap = transitionMap
leadingChars :: (Ord c, Enum c, Bounded c) => RE c -> P.Partition c
leadingChars (REChars cs) = P.fromRSet cs
leadingChars (REUnion cs rs) = P.fromRSet cs <> foldMap leadingChars rs
leadingChars (REStar r) = leadingChars r
leadingChars (REAppend rs) = leadingCharsAppend rs
leadingCharsAppend :: (Ord c, Enum c, Bounded c) => [RE c] -> P.Partition c
leadingCharsAppend [] = P.whole
leadingCharsAppend (r : rs)
| nullable r = leadingChars r <> leadingCharsAppend rs
| otherwise = leadingChars r
equivalent :: forall c. (Ord c, Enum c, Bounded c) => RE c -> RE c -> Bool
equivalent x0 y0 = go mempty [(x0, y0)] where
go :: Set (RE c, RE c) -> [(RE c, RE c)] -> Bool
go !_ [] = True
go acc (p@(x, y) : zs)
| p `Set.member` acc = go acc zs
| x == y = go (Set.insert p acc) zs
| all agree ps = go (Set.insert p acc) (ps ++ zs)
| otherwise = False
where
cs = toList $ P.examples $ leadingChars x `P.wedge` leadingChars y
ps = map (\c -> (derivate c x, derivate c y)) cs
agree :: (RE c, RE c) -> Bool
agree (x, y) = nullable x == nullable y
instance (Ord c, Enum c, Bounded c) => C.Equivalent c (RE c) where
equivalent = equivalent
generate
:: (c -> c -> QC.Gen c)
-> Int
-> RE c
-> [[c]]
generate c seed re
| isEmpty re = []
| otherwise = QC.unGen (QC.infiniteListOf (generator c re)) (QC.mkQCGen seed) 10
generator
:: (c -> c -> QC.Gen c)
-> RE c
-> QC.Gen [c]
generator c = go where
go (REChars cs) = goChars cs
go (REAppend rs) = concat <$> traverse go rs
go (REUnion cs rs)
| RSet.null cs = QC.oneof [ go r | r <- toList rs ]
| otherwise = QC.oneof $ goChars cs : [ go r | r <- toList rs ]
go (REStar r) = QC.sized $ \n -> do
n' <- QC.choose (0, n)
concat <$> sequence (replicate n' (go r))
goChars cs = pure <$> QC.oneof [ c x y | (x,y) <- RSet.toRangeList cs ]
instance Eq c => Semigroup (RE c) where
r <> r' = appends [r, r']
instance Eq c => Monoid (RE c) where
mempty = eps
mappend = (<>)
mconcat = appends
instance c ~ Char => IsString (RE c) where
fromString = string
instance (Ord c, Enum c, Bounded c, QC.Arbitrary c) => QC.Arbitrary (RE c) where
arbitrary = QC.sized arb where
c :: QC.Gen (RE c)
c = REChars . RSet.fromRangeList <$> QC.arbitrary
arb :: Int -> QC.Gen (RE c)
arb n | n <= 0 = QC.oneof [c, fmap char QC.arbitrary, pure eps]
| otherwise = QC.oneof
[ c
, pure eps
, fmap char QC.arbitrary
, liftA2 (<>) (arb n2) (arb n2)
, liftA2 (\/) (arb n2) (arb n2)
, fmap star (arb n2)
]
where
n2 = n `div` 2
shrink (REUnion _cs rs) = Set.toList rs
shrink (REAppend rs) = rs ++ map appends (QC.shrink rs)
shrink (REStar r) = r : map star (QC.shrink r)
shrink _ = []
instance (QC.CoArbitrary c) => QC.CoArbitrary (RE c) where
coarbitrary (REChars cs) = QC.variant (0 :: Int) . QC.coarbitrary (RSet.toRangeList cs)
coarbitrary (REAppend rs) = QC.variant (1 :: Int) . QC.coarbitrary rs
coarbitrary (REUnion cs rs) = QC.variant (2 :: Int) . QC.coarbitrary (RSet.toRangeList cs, Set.toList rs)
coarbitrary (REStar r) = QC.variant (3 :: Int) . QC.coarbitrary r
instance c ~ Char => Pretty (RE c) where
prettyS x = showChar '^' . go False x . showChar '$'
where
go :: Bool -> RE Char -> ShowS
go p (REStar a)
= parens p
$ go True a . showChar '*'
go p (REAppend rs)
= parens p $ goMany id rs
go p (REUnion cs rs)
| RSet.null cs = goUnion p rs
| Set.null rs = prettyS cs
| otherwise = goUnion p (Set.insert (REChars cs) rs)
go _ (REChars cs)
= prettyS cs
goUnion p rs
| Set.member eps rs = parens p $ goUnion' True . showChar '?'
| otherwise = goUnion' p
where
goUnion' p' = case Set.toList (Set.delete eps rs) of
[] -> go True empty
[r] -> go p' r
(r:rs') -> parens True $ goSome1 (showChar '|') r rs'
goMany :: ShowS -> [RE Char] -> ShowS
goMany sep = foldr (\a b -> go False a . sep . b) id
goSome1 :: ShowS -> RE Char -> [RE Char] -> ShowS
goSome1 sep r = foldl (\a b -> a . sep . go False b) (go False r)
parens :: Bool -> ShowS -> ShowS
parens True s = showString "(" . s . showChar ')'
parens False s = s
instance C.ToLatin1 RE where
toLatin1 (REChars rs) = C.fromRSet (C.toLatin1 rs)
toLatin1 (REAppend xs) = appends (map C.toLatin1 xs)
toLatin1 (REUnion rs xs) = C.fromRSet (C.toLatin1 rs) \/ unions (map C.toLatin1 (Set.toList xs))
toLatin1 (REStar r) = star (C.toLatin1 r)