{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE MagicHash #-}
{-# OPTIONS_HADDOCK not-home #-}
module Regex.Internal.CharSet
( CharSet(..)
, empty
, singleton
, fromRange
, fromList
, fromRanges
, insert
, insertRange
, delete
, deleteRange
, map
, not
, union
, difference
, intersection
, member
, notMember
, elems
, ranges
, valid
) where
import Prelude hiding (not, map)
import qualified Prelude
import Data.Char
import Data.String
import qualified Data.Foldable as F
import qualified Data.IntMap.Strict as IM
import Data.Semigroup (Semigroup(..), stimesIdempotentMonoid)
import GHC.Exts (Int(..), Char(..), chr#)
newtype CharSet = CharSet { CharSet -> IntMap Char
unCharSet :: IM.IntMap Char } deriving CharSet -> CharSet -> Bool
(CharSet -> CharSet -> Bool)
-> (CharSet -> CharSet -> Bool) -> Eq CharSet
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CharSet -> CharSet -> Bool
== :: CharSet -> CharSet -> Bool
$c/= :: CharSet -> CharSet -> Bool
/= :: CharSet -> CharSet -> Bool
Eq
instance Show CharSet where
showsPrec :: Int -> CharSet -> ShowS
showsPrec Int
p CharSet
cs = Bool -> ShowS -> ShowS
showParen (Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10) (ShowS -> ShowS) -> ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$
String -> ShowS
showString String
"fromRanges " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Char, Char)] -> ShowS
forall a. Show a => a -> ShowS
shows (CharSet -> [(Char, Char)]
ranges CharSet
cs)
instance IsString CharSet where
fromString :: String -> CharSet
fromString = String -> CharSet
fromList
instance Semigroup CharSet where
<> :: CharSet -> CharSet -> CharSet
(<>) = CharSet -> CharSet -> CharSet
union
sconcat :: NonEmpty CharSet -> CharSet
sconcat = (CharSet -> CharSet -> CharSet)
-> CharSet -> NonEmpty CharSet -> CharSet
forall b a. (b -> a -> b) -> b -> NonEmpty a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
F.foldl' CharSet -> CharSet -> CharSet
union CharSet
empty
{-# INLINE sconcat #-}
stimes :: forall b. Integral b => b -> CharSet -> CharSet
stimes = b -> CharSet -> CharSet
forall b a. (Integral b, Monoid a) => b -> a -> a
stimesIdempotentMonoid
instance Monoid CharSet where
mempty :: CharSet
mempty = CharSet
empty
mconcat :: [CharSet] -> CharSet
mconcat = (CharSet -> CharSet -> CharSet) -> CharSet -> [CharSet] -> CharSet
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
F.foldl' CharSet -> CharSet -> CharSet
union CharSet
empty
{-# INLINE mconcat #-}
empty :: CharSet
empty :: CharSet
empty = IntMap Char -> CharSet
CharSet IntMap Char
forall a. IntMap a
IM.empty
singleton :: Char -> CharSet
singleton :: Char -> CharSet
singleton Char
c = IntMap Char -> CharSet
CharSet (Int -> Char -> IntMap Char
forall a. Int -> a -> IntMap a
IM.singleton (Char -> Int
ord Char
c) Char
c)
fromRange :: (Char, Char) -> CharSet
fromRange :: (Char, Char) -> CharSet
fromRange (Char
cl,Char
ch) | Char
cl Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
ch = CharSet
empty
fromRange (Char
cl,Char
ch) = IntMap Char -> CharSet
CharSet (Int -> Char -> IntMap Char
forall a. Int -> a -> IntMap a
IM.singleton (Char -> Int
ord Char
cl) Char
ch)
fromList :: [Char] -> CharSet
fromList :: String -> CharSet
fromList = (CharSet -> Char -> CharSet) -> CharSet -> String -> CharSet
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
F.foldl' ((Char -> CharSet -> CharSet) -> CharSet -> Char -> CharSet
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> CharSet -> CharSet
insert) CharSet
empty
{-# INLINE fromList #-}
fromRanges :: [(Char, Char)] -> CharSet
fromRanges :: [(Char, Char)] -> CharSet
fromRanges = (CharSet -> (Char, Char) -> CharSet)
-> CharSet -> [(Char, Char)] -> CharSet
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
F.foldl' (((Char, Char) -> CharSet -> CharSet)
-> CharSet -> (Char, Char) -> CharSet
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Char, Char) -> CharSet -> CharSet
insertRange) CharSet
empty
{-# INLINE fromRanges #-}
insert :: Char -> CharSet -> CharSet
insert :: Char -> CharSet -> CharSet
insert Char
c = (Char, Char) -> CharSet -> CharSet
insertRange (Char
c,Char
c)
insertRange :: (Char, Char) -> CharSet -> CharSet
insertRange :: (Char, Char) -> CharSet -> CharSet
insertRange (Char
cl,Char
ch) CharSet
cs | Char
cl Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
ch = CharSet
cs
insertRange (Char
cl,Char
ch) CharSet
cs = CharSet
l CharSet -> CharSet -> CharSet
`join` (Char, Char) -> CharSet
fromRange (Char
cl,Char
ch) CharSet -> CharSet -> CharSet
`join` CharSet
r
where
(CharSet
l,CharSet
mr) = Char -> CharSet -> (CharSet, CharSet)
split Char
cl CharSet
cs
(CharSet
_,CharSet
r) = Char -> CharSet -> (CharSet, CharSet)
split (Int -> Char
unsafeChr (Char -> Int
ord Char
ch Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) CharSet
mr
delete :: Char -> CharSet -> CharSet
delete :: Char -> CharSet -> CharSet
delete Char
c = (Char, Char) -> CharSet -> CharSet
deleteRange (Char
c,Char
c)
deleteRange :: (Char, Char) -> CharSet -> CharSet
deleteRange :: (Char, Char) -> CharSet -> CharSet
deleteRange (Char
cl,Char
ch) CharSet
cs | Char
cl Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
ch = CharSet
cs
deleteRange (Char
cl,Char
ch) CharSet
cs = CharSet
l CharSet -> CharSet -> CharSet
`join` CharSet
r
where
(CharSet
l,CharSet
mr) = Char -> CharSet -> (CharSet, CharSet)
split Char
cl CharSet
cs
(CharSet
_,CharSet
r) = Char -> CharSet -> (CharSet, CharSet)
split (Int -> Char
unsafeChr (Char -> Int
ord Char
ch Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) CharSet
mr
map :: (Char -> Char) -> CharSet -> CharSet
map :: (Char -> Char) -> CharSet -> CharSet
map Char -> Char
f = String -> CharSet
fromList (String -> CharSet) -> (CharSet -> String) -> CharSet -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ShowS
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Char
f ShowS -> (CharSet -> String) -> CharSet -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharSet -> String
elems
not :: CharSet -> CharSet
not :: CharSet -> CharSet
not = IntMap Char -> CharSet
CharSet (IntMap Char -> CharSet)
-> (CharSet -> IntMap Char) -> CharSet -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Char)] -> IntMap Char
forall a. [(Int, a)] -> IntMap a
IM.fromDistinctAscList ([(Int, Char)] -> IntMap Char)
-> (CharSet -> [(Int, Char)]) -> CharSet -> IntMap Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Char, Char)] -> [(Int, Char)]
complementRanges ([(Char, Char)] -> [(Int, Char)])
-> (CharSet -> [(Char, Char)]) -> CharSet -> [(Int, Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharSet -> [(Char, Char)]
ranges
union :: CharSet -> CharSet -> CharSet
union :: CharSet -> CharSet -> CharSet
union = (CharSet -> Char -> Char -> CharSet)
-> CharSet -> CharSet -> CharSet
forall b. (b -> Char -> Char -> b) -> b -> CharSet -> b
foldlRanges' (\CharSet
cs Char
cl Char
ch -> (Char, Char) -> CharSet -> CharSet
insertRange (Char
cl,Char
ch) CharSet
cs)
difference :: CharSet -> CharSet -> CharSet
difference :: CharSet -> CharSet -> CharSet
difference = (CharSet -> Char -> Char -> CharSet)
-> CharSet -> CharSet -> CharSet
forall b. (b -> Char -> Char -> b) -> b -> CharSet -> b
foldlRanges' (\CharSet
cs Char
cl Char
ch -> (Char, Char) -> CharSet -> CharSet
deleteRange (Char
cl,Char
ch) CharSet
cs)
intersection :: CharSet -> CharSet -> CharSet
intersection :: CharSet -> CharSet -> CharSet
intersection CharSet
lcs CharSet
rcs = CharSet -> CharSet
not (CharSet -> CharSet
not CharSet
lcs CharSet -> CharSet -> CharSet
`union` CharSet -> CharSet
not CharSet
rcs)
member :: Char -> CharSet -> Bool
member :: Char -> CharSet -> Bool
member Char
c CharSet
cs = case Int -> IntMap Char -> Maybe (Int, Char)
forall a. Int -> IntMap a -> Maybe (Int, a)
IM.lookupLE (Char -> Int
ord Char
c) (CharSet -> IntMap Char
unCharSet CharSet
cs) of
Maybe (Int, Char)
Nothing -> Bool
False
Just (Int
_,Char
ch) -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
ch
notMember :: Char -> CharSet -> Bool
notMember :: Char -> CharSet -> Bool
notMember Char
c = Bool -> Bool
Prelude.not (Bool -> Bool) -> (CharSet -> Bool) -> CharSet -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> CharSet -> Bool
member Char
c
elems :: CharSet -> [Char]
elems :: CharSet -> String
elems CharSet
cs = CharSet -> [(Char, Char)]
ranges CharSet
cs [(Char, Char)] -> ((Char, Char) -> String) -> String
forall a b. [a] -> (a -> [b]) -> [b]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(Char
cl,Char
ch) -> [Char
cl..Char
ch]
{-# INLINE elems #-}
ranges :: CharSet -> [(Char, Char)]
ranges :: CharSet -> [(Char, Char)]
ranges CharSet
cs = [(Int -> Char
unsafeChr Int
cl, Char
ch) | (Int
cl,Char
ch) <- IntMap Char -> [(Int, Char)]
forall a. IntMap a -> [(Int, a)]
IM.assocs (CharSet -> IntMap Char
unCharSet CharSet
cs)]
{-# INLINE ranges #-}
split :: Char -> CharSet -> (CharSet, CharSet)
split :: Char -> CharSet -> (CharSet, CharSet)
split !Char
c CharSet
cs = case Int -> IntMap Char -> (IntMap Char, Maybe Char, IntMap Char)
forall a. Int -> IntMap a -> (IntMap a, Maybe a, IntMap a)
IM.splitLookup (Char -> Int
ord Char
c) (CharSet -> IntMap Char
unCharSet CharSet
cs) of
(IntMap Char
l, Just Char
ch, IntMap Char
r) -> (IntMap Char -> CharSet
CharSet IntMap Char
l, IntMap Char -> CharSet
CharSet (IntMap Char -> CharSet) -> IntMap Char -> CharSet
forall a b. (a -> b) -> a -> b
$ Int -> Char -> IntMap Char -> IntMap Char
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert (Char -> Int
ord Char
c) Char
ch IntMap Char
r)
(IntMap Char
l, Maybe Char
Nothing, IntMap Char
r) -> case IntMap Char -> Maybe ((Int, Char), IntMap Char)
forall a. IntMap a -> Maybe ((Int, a), IntMap a)
IM.maxViewWithKey IntMap Char
l of
Just ((Int
lgl,Char
lgh),IntMap Char
l1)
| Char
lgh Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
c -> ( IntMap Char -> CharSet
CharSet (IntMap Char -> CharSet) -> IntMap Char -> CharSet
forall a b. (a -> b) -> a -> b
$ Int -> Char -> IntMap Char -> IntMap Char
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert Int
lgl (Int -> Char
unsafeChr (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) IntMap Char
l1
, IntMap Char -> CharSet
CharSet (IntMap Char -> CharSet) -> IntMap Char -> CharSet
forall a b. (a -> b) -> a -> b
$ Int -> Char -> IntMap Char -> IntMap Char
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert (Char -> Int
ord Char
c) Char
lgh IntMap Char
r )
Maybe ((Int, Char), IntMap Char)
_ -> (IntMap Char -> CharSet
CharSet IntMap Char
l, IntMap Char -> CharSet
CharSet IntMap Char
r)
join :: CharSet -> CharSet -> CharSet
join :: CharSet -> CharSet -> CharSet
join CharSet
lcs CharSet
rcs = case ( IntMap Char -> Maybe ((Int, Char), IntMap Char)
forall a. IntMap a -> Maybe ((Int, a), IntMap a)
IM.maxViewWithKey (CharSet -> IntMap Char
unCharSet CharSet
lcs)
, IntMap Char -> Maybe ((Int, Char), IntMap Char)
forall a. IntMap a -> Maybe ((Int, a), IntMap a)
IM.minViewWithKey (CharSet -> IntMap Char
unCharSet CharSet
rcs) ) of
(Maybe ((Int, Char), IntMap Char)
Nothing, Maybe ((Int, Char), IntMap Char)
Nothing) -> CharSet
empty
(Maybe ((Int, Char), IntMap Char)
Nothing, Maybe ((Int, Char), IntMap Char)
_) -> CharSet
rcs
(Maybe ((Int, Char), IntMap Char)
_, Maybe ((Int, Char), IntMap Char)
Nothing) -> CharSet
lcs
(Just ((Int
lgl,Char
lgh),IntMap Char
l1), Just ((Int
rgl,Char
rgh),IntMap Char
r1))
| Char -> Int
ord Char
lgh Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
rgl Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 -> IntMap Char -> CharSet
CharSet (IntMap Char -> CharSet) -> IntMap Char -> CharSet
forall a b. (a -> b) -> a -> b
$ IntMap Char -> IntMap Char -> IntMap Char
forall a. IntMap a -> IntMap a -> IntMap a
IM.union IntMap Char
l1 (Int -> Char -> IntMap Char -> IntMap Char
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert Int
lgl Char
rgh IntMap Char
r1)
| Bool
otherwise -> IntMap Char -> CharSet
CharSet (IntMap Char -> CharSet) -> IntMap Char -> CharSet
forall a b. (a -> b) -> a -> b
$ IntMap Char -> IntMap Char -> IntMap Char
forall a. IntMap a -> IntMap a -> IntMap a
IM.union (CharSet -> IntMap Char
unCharSet CharSet
lcs) (CharSet -> IntMap Char
unCharSet CharSet
rcs)
foldlRanges' :: (b -> Char -> Char -> b) -> b -> CharSet -> b
foldlRanges' :: forall b. (b -> Char -> Char -> b) -> b -> CharSet -> b
foldlRanges' = \b -> Char -> Char -> b
f b
z CharSet
cs ->
(b -> Int -> Char -> b) -> b -> IntMap Char -> b
forall a b. (a -> Int -> b -> a) -> a -> IntMap b -> a
IM.foldlWithKey' (\b
b Int
cl Char
ch -> b -> Char -> Char -> b
f b
b (Int -> Char
unsafeChr Int
cl) Char
ch) b
z (CharSet -> IntMap Char
unCharSet CharSet
cs)
{-# INLINE foldlRanges' #-}
complementRanges :: [(Char, Char)] -> [(Int, Char)]
complementRanges :: [(Char, Char)] -> [(Int, Char)]
complementRanges = [(Char, Char)] -> [(Int, Char)]
go
where
go :: [(Char, Char)] -> [(Int, Char)]
go [] = [(Char -> Int
ord Char
forall a. Bounded a => a
minBound, Char
forall a. Bounded a => a
maxBound)]
go ((Char
l,Char
h):[(Char, Char)]
xs)
| Char
l Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
forall a. Bounded a => a
minBound = Char -> [(Char, Char)] -> [(Int, Char)]
go1 Char
h [(Char, Char)]
xs
| Bool
otherwise = (Char -> Int
ord Char
forall a. Bounded a => a
minBound, Char -> Char
unsafePred Char
l) (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
: Char -> [(Char, Char)] -> [(Int, Char)]
go1 Char
h [(Char, Char)]
xs
go1 :: Char -> [(Char, Char)] -> [(Int, Char)]
go1 !Char
ph []
| Char
ph Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
forall a. Bounded a => a
maxBound = []
| Bool
otherwise = [(Char -> Int
ord Char
ph Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, Char
forall a. Bounded a => a
maxBound)]
go1 Char
ph ((Char
l,Char
h):[(Char, Char)]
xs) = (Char -> Int
ord Char
ph Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, Char -> Char
unsafePred Char
l) (Int, Char) -> [(Int, Char)] -> [(Int, Char)]
forall a. a -> [a] -> [a]
: Char -> [(Char, Char)] -> [(Int, Char)]
go1 Char
h [(Char, Char)]
xs
unsafePred :: Char -> Char
unsafePred Char
c = Int -> Char
unsafeChr (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
unsafeChr :: Int -> Char
unsafeChr :: Int -> Char
unsafeChr (I# Int#
i#) = Char# -> Char
C# (Int# -> Char#
chr# Int#
i#)
valid :: CharSet -> Bool
valid :: CharSet -> Bool
valid CharSet
cs = Bool
noneEmpty Bool -> Bool -> Bool
&& Bool
noneAdjacent
where
([Int]
ls,[Int]
hs) = [(Int, Int)] -> ([Int], [Int])
forall a b. [(a, b)] -> ([a], [b])
unzip (((Int, Char) -> (Int, Int)) -> [(Int, Char)] -> [(Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Char -> Int) -> (Int, Char) -> (Int, Int)
forall a b. (a -> b) -> (Int, a) -> (Int, b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Int
ord) (IntMap Char -> [(Int, Char)]
forall a. IntMap a -> [(Int, a)]
IM.assocs (CharSet -> IntMap Char
unCharSet CharSet
cs)))
noneEmpty :: Bool
noneEmpty = [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
and ((Int -> Int -> Bool) -> [Int] -> [Int] -> [Bool]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
(<=) [Int]
ls [Int]
hs)
noneAdjacent :: Bool
noneAdjacent = case [Int]
ls of
[] -> Bool
True
Int
_:[Int]
ls' -> (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
1) ((Int -> Int -> Int) -> [Int] -> [Int] -> [Int]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (-) [Int]
ls' [Int]
hs)