{-# LANGUAGE CPP         #-}
#if __GLASGOW_HASKELL__ >=710
{-# LANGUAGE Safe        #-}
#elif __GLASGOW_HASKELL__ >=702
{-# LANGUAGE Trustworthy #-}
#endif
-- | Charactor classes.
module RERE.CharClasses (
    CharClasses,
    charClasses,
    classOfChar,
    ) where

import RERE.Type

import qualified Data.Set     as Set
import qualified RERE.CharSet as CS

-- | Character classes are represented by partition lower bounds.
type CharClasses = Set.Set Char

-- | Character classes.
--
-- We can partition 'Char' so characters in each part,
-- affect the given regular expression in the same way.
--
-- If we do some kind of memoising, we can map all characters
-- to 'classOfChar', making everything smaller.
--
charClasses :: RE a -> CharClasses
charClasses :: RE a -> CharClasses
charClasses = [CharSet] -> CharClasses
charsetClasses ([CharSet] -> CharClasses)
-> (RE a -> [CharSet]) -> RE a -> CharClasses
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set CharSet -> [CharSet]
forall a. Set a -> [a]
Set.toList (Set CharSet -> [CharSet])
-> (RE a -> Set CharSet) -> RE a -> [CharSet]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RE a -> Set CharSet
forall a. RE a -> Set CharSet
collect

-- | Map char to the representer of a class.
classOfChar :: CharClasses -> Char -> Char
#if MIN_VERSION_containers(0,5,0)
classOfChar :: CharClasses -> Char -> Char
classOfChar CharClasses
cc Char
c = case Char -> CharClasses -> Maybe Char
forall a. Ord a => a -> Set a -> Maybe a
Set.lookupLE Char
c CharClasses
cc of
    Just Char
c' -> Char
c'
    Maybe Char
Nothing -> Char
'\NUL'
#else
-- old containers: slow path
classOfChar _ c = c
#endif

collect :: RE a -> Set.Set CS.CharSet
collect :: RE a -> Set CharSet
collect = RE a -> Set CharSet
forall a. RE a -> Set CharSet
go where
    go :: RE a -> Set.Set CS.CharSet
    go :: RE a -> Set CharSet
go RE a
Null        = Set CharSet
forall a. Set a
Set.empty
    go RE a
Full        = Set CharSet
forall a. Set a
Set.empty
    go RE a
Eps         = Set CharSet
forall a. Set a
Set.empty
    go (Ch CharSet
cs)     = CharSet -> Set CharSet
forall a. a -> Set a
Set.singleton CharSet
cs
    go (App RE a
r RE a
s)   = Set CharSet -> Set CharSet -> Set CharSet
forall a. Ord a => Set a -> Set a -> Set a
Set.union (RE a -> Set CharSet
forall a. RE a -> Set CharSet
go RE a
r) (RE a -> Set CharSet
forall a. RE a -> Set CharSet
go RE a
s)
    go (Alt RE a
r RE a
s)   = Set CharSet -> Set CharSet -> Set CharSet
forall a. Ord a => Set a -> Set a -> Set a
Set.union (RE a -> Set CharSet
forall a. RE a -> Set CharSet
go RE a
r) (RE a -> Set CharSet
forall a. RE a -> Set CharSet
go RE a
s)
    go (Star RE a
r)    = RE a -> Set CharSet
forall a. RE a -> Set CharSet
go RE a
r
#ifdef RERE_INTERSECTION
    go (And r s)   = Set.union (go r) (go s)
#endif
    go (Var a
_)     = Set CharSet
forall a. Set a
Set.empty
    go (Let Name
_ RE a
r RE (Var a)
s) = Set CharSet -> Set CharSet -> Set CharSet
forall a. Ord a => Set a -> Set a -> Set a
Set.union (RE a -> Set CharSet
forall a. RE a -> Set CharSet
go RE a
r) (RE (Var a) -> Set CharSet
forall a. RE a -> Set CharSet
go RE (Var a)
s)
    go (Fix Name
_ RE (Var a)
r)   = RE (Var a) -> Set CharSet
forall a. RE a -> Set CharSet
go RE (Var a)
r

charsetClasses :: [CS.CharSet] -> CharClasses
charsetClasses :: [CharSet] -> CharClasses
charsetClasses = CharClasses -> [CharSet] -> CharClasses
go (Char -> CharClasses
forall a. a -> Set a
Set.singleton Char
'\NUL') where
    go :: CharClasses -> [CharSet] -> CharClasses
go CharClasses
acc []       = CharClasses
acc
    go CharClasses
acc (CharSet
cs:[CharSet]
css) = CharClasses -> [CharSet] -> CharClasses
go
        (CharClasses -> CharClasses -> CharClasses
forall a. Ord a => Set a -> Set a -> Set a
Set.union CharClasses
acc (CharClasses -> CharClasses) -> CharClasses -> CharClasses
forall a b. (a -> b) -> a -> b
$ [Char] -> CharClasses
forall a. Ord a => [a] -> Set a
Set.fromList ([Char] -> CharClasses) -> [Char] -> CharClasses
forall a b. (a -> b) -> a -> b
$ ((Char, Char) -> [Char]) -> [(Char, Char)] -> [Char]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Char, Char) -> [Char]
forall a. (Eq a, Bounded a, Enum a) => (a, a) -> [a]
bounds ([(Char, Char)] -> [Char]) -> [(Char, Char)] -> [Char]
forall a b. (a -> b) -> a -> b
$ CharSet -> [(Char, Char)]
CS.toIntervalList CharSet
cs)
        [CharSet]
css

    bounds :: (a, a) -> [a]
bounds (a
x, a
y) | a
y a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
forall a. Bounded a => a
maxBound = [a
x]
                  | Bool
otherwise     = [a
x, a -> a
forall a. Enum a => a -> a
succ a
y]