{-# OPTIONS_GHC -fspec-constr #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
module Data.CharSet
(
CharSet(..)
, (\\)
, null
, size
, member
, notMember
, overlaps, isSubsetOf
, isComplemented
, build
, empty
, singleton
, full
, insert
, delete
, complement
, range
, union
, intersection
, difference
, filter
, partition
, map
, fold
, toList
, fromList
, toAscList
, fromAscList
, fromDistinctAscList
, fromCharSet
, toCharSet
, toArray
) where
import Data.Array.Unboxed hiding (range)
import Data.Data
import Data.Function (on)
import Data.IntSet (IntSet)
import Data.CharSet.ByteSet (ByteSet)
import qualified Data.CharSet.ByteSet as ByteSet
import Data.Bits hiding (complement)
import Data.Word
import Data.ByteString.Internal (c2w)
import Data.Semigroup
import qualified Data.IntSet as I
import qualified Data.List as L
import Prelude hiding (filter, map, null)
import qualified Prelude as P
import Text.Read
data CharSet = CharSet !Bool {-# UNPACK #-} !ByteSet !IntSet
deriving Typeable
charSet :: Bool -> IntSet -> CharSet
charSet b s = CharSet b (ByteSet.fromList (fmap headByte (I.toAscList s))) s
headByte :: Int -> Word8
headByte i
| i <= 0x7f = toEnum i
| i <= 0x7ff = toEnum $ 0x80 + (i `shiftR` 6)
| i <= 0xffff = toEnum $ 0xe0 + (i `shiftR` 12)
| otherwise = toEnum $ 0xf0 + (i `shiftR` 18)
pos :: IntSet -> CharSet
pos = charSet True
neg :: IntSet -> CharSet
neg = charSet False
(\\) :: CharSet -> CharSet -> CharSet
(\\) = difference
build :: (Char -> Bool) -> CharSet
build p = fromDistinctAscList $ P.filter p [minBound .. maxBound]
{-# INLINE build #-}
map :: (Char -> Char) -> CharSet -> CharSet
map f (CharSet True _ i) = pos (I.map (fromEnum . f . toEnum) i)
map f (CharSet False _ i) = fromList $ P.map f $ P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
{-# INLINE map #-}
isComplemented :: CharSet -> Bool
isComplemented (CharSet True _ _) = False
isComplemented (CharSet False _ _) = True
{-# INLINE isComplemented #-}
toList :: CharSet -> String
toList (CharSet True _ i) = P.map toEnum (I.toList i)
toList (CharSet False _ i) = P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
{-# INLINE toList #-}
toAscList :: CharSet -> String
toAscList (CharSet True _ i) = P.map toEnum (I.toAscList i)
toAscList (CharSet False _ i) = P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
{-# INLINE toAscList #-}
empty :: CharSet
empty = pos I.empty
singleton :: Char -> CharSet
singleton = pos . I.singleton . fromEnum
{-# INLINE singleton #-}
full :: CharSet
full = neg I.empty
null :: CharSet -> Bool
null (CharSet True _ i) = I.null i
null (CharSet False _ i) = I.size i == numChars
{-# INLINE null #-}
size :: CharSet -> Int
size (CharSet True _ i) = I.size i
size (CharSet False _ i) = numChars - I.size i
{-# INLINE size #-}
insert :: Char -> CharSet -> CharSet
insert c (CharSet True _ i) = pos (I.insert (fromEnum c) i)
insert c (CharSet False _ i) = neg (I.delete (fromEnum c) i)
{-# INLINE insert #-}
range :: Char -> Char -> CharSet
range a b
| a <= b = fromDistinctAscList [a..b]
| otherwise = empty
delete :: Char -> CharSet -> CharSet
delete c (CharSet True _ i) = pos (I.delete (fromEnum c) i)
delete c (CharSet False _ i) = neg (I.insert (fromEnum c) i)
{-# INLINE delete #-}
complement :: CharSet -> CharSet
complement (CharSet True s i) = CharSet False s i
complement (CharSet False s i) = CharSet True s i
{-# INLINE complement #-}
union :: CharSet -> CharSet -> CharSet
union (CharSet True _ i) (CharSet True _ j) = pos (I.union i j)
union (CharSet True _ i) (CharSet False _ j) = neg (I.difference j i)
union (CharSet False _ i) (CharSet True _ j) = neg (I.difference i j)
union (CharSet False _ i) (CharSet False _ j) = neg (I.intersection i j)
{-# INLINE union #-}
intersection :: CharSet -> CharSet -> CharSet
intersection (CharSet True _ i) (CharSet True _ j) = pos (I.intersection i j)
intersection (CharSet True _ i) (CharSet False _ j) = pos (I.difference i j)
intersection (CharSet False _ i) (CharSet True _ j) = pos (I.difference j i)
intersection (CharSet False _ i) (CharSet False _ j) = neg (I.union i j)
{-# INLINE intersection #-}
difference :: CharSet -> CharSet -> CharSet
difference (CharSet True _ i) (CharSet True _ j) = pos (I.difference i j)
difference (CharSet True _ i) (CharSet False _ j) = pos (I.intersection i j)
difference (CharSet False _ i) (CharSet True _ j) = neg (I.union i j)
difference (CharSet False _ i) (CharSet False _ j) = pos (I.difference j i)
{-# INLINE difference #-}
member :: Char -> CharSet -> Bool
member c (CharSet True b i)
| c <= toEnum 0x7f = ByteSet.member (c2w c) b
| otherwise = I.member (fromEnum c) i
member c (CharSet False b i)
| c <= toEnum 0x7f = not (ByteSet.member (c2w c) b)
| otherwise = I.notMember (fromEnum c) i
{-# INLINE member #-}
notMember :: Char -> CharSet -> Bool
notMember c s = not (member c s)
{-# INLINE notMember #-}
fold :: (Char -> b -> b) -> b -> CharSet -> b
fold f z (CharSet True _ i) = I.fold (f . toEnum) z i
fold f z (CharSet False _ i) = foldr f z $ P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
{-# INLINE fold #-}
filter :: (Char -> Bool) -> CharSet -> CharSet
filter p (CharSet True _ i) = pos (I.filter (p . toEnum) i)
filter p (CharSet False _ i) = neg $ foldr (I.insert) i $ P.filter (\x -> (x `I.notMember` i) && not (p (toEnum x))) [ol..oh]
{-# INLINE filter #-}
partition :: (Char -> Bool) -> CharSet -> (CharSet, CharSet)
partition p (CharSet True _ i) = (pos l, pos r)
where (l,r) = I.partition (p . toEnum) i
partition p (CharSet False _ i) = (neg (foldr I.insert i l), neg (foldr I.insert i r))
where (l,r) = L.partition (p . toEnum) $ P.filter (\x -> x `I.notMember` i) [ol..oh]
{-# INLINE partition #-}
overlaps :: CharSet -> CharSet -> Bool
overlaps (CharSet True _ i) (CharSet True _ j) = not (I.null (I.intersection i j))
overlaps (CharSet True _ i) (CharSet False _ j) = not (I.isSubsetOf j i)
overlaps (CharSet False _ i) (CharSet True _ j) = not (I.isSubsetOf i j)
overlaps (CharSet False _ i) (CharSet False _ j) = any (\x -> I.notMember x i && I.notMember x j) [ol..oh]
{-# INLINE overlaps #-}
isSubsetOf :: CharSet -> CharSet -> Bool
isSubsetOf (CharSet True _ i) (CharSet True _ j) = I.isSubsetOf i j
isSubsetOf (CharSet True _ i) (CharSet False _ j) = I.null (I.intersection i j)
isSubsetOf (CharSet False _ i) (CharSet True _ j) = all (\x -> I.member x i && I.member x j) [ol..oh]
isSubsetOf (CharSet False _ i) (CharSet False _ j) = I.isSubsetOf j i
{-# INLINE isSubsetOf #-}
fromList :: String -> CharSet
fromList = pos . I.fromList . P.map fromEnum
{-# INLINE fromList #-}
fromAscList :: String -> CharSet
fromAscList = pos . I.fromAscList . P.map fromEnum
{-# INLINE fromAscList #-}
fromDistinctAscList :: String -> CharSet
fromDistinctAscList = pos . I.fromDistinctAscList . P.map fromEnum
{-# INLINE fromDistinctAscList #-}
ul, uh :: Char
ul = minBound
uh = maxBound
{-# INLINE ul #-}
{-# INLINE uh #-}
ol, oh :: Int
ol = fromEnum ul
oh = fromEnum uh
{-# INLINE ol #-}
{-# INLINE oh #-}
numChars :: Int
numChars = oh - ol + 1
{-# INLINE numChars #-}
instance Data CharSet where
gfoldl k z set
| isComplemented set = z complement `k` complement set
| otherwise = z fromList `k` toList set
toConstr set
| isComplemented set = complementConstr
| otherwise = fromListConstr
dataTypeOf _ = charSetDataType
gunfold k z c = case constrIndex c of
1 -> k (z fromList)
2 -> k (z complement)
_ -> error "gunfold"
fromListConstr :: Constr
fromListConstr = mkConstr charSetDataType "fromList" [] Prefix
{-# NOINLINE fromListConstr #-}
complementConstr :: Constr
complementConstr = mkConstr charSetDataType "complement" [] Prefix
{-# NOINLINE complementConstr #-}
charSetDataType :: DataType
charSetDataType = mkDataType "Data.CharSet.CharSet" [fromListConstr, complementConstr]
{-# NOINLINE charSetDataType #-}
fromCharSet :: CharSet -> (Bool, IntSet)
fromCharSet (CharSet b _ i) = (b, i)
{-# INLINE fromCharSet #-}
toCharSet :: IntSet -> CharSet
toCharSet = pos
{-# INLINE toCharSet #-}
instance Eq CharSet where
(==) = (==) `on` toAscList
instance Ord CharSet where
compare = compare `on` toAscList
instance Bounded CharSet where
minBound = empty
maxBound = full
toArray :: CharSet -> UArray Char Bool
toArray set = array (minBound, maxBound) $ fmap (\x -> (x, x `member` set)) [minBound .. maxBound]
instance Show CharSet where
showsPrec d i
| isComplemented i = showParen (d > 10) $ showString "complement " . showsPrec 11 (complement i)
| otherwise = showParen (d > 10) $ showString "fromDistinctAscList " . showsPrec 11 (toAscList i)
instance Read CharSet where
readPrec = parens $ complemented +++ normal
where
complemented = prec 10 $ do
Ident "complement" <- lexP
complement `fmap` step readPrec
normal = prec 10 $ do
Ident "fromDistinctAscList" <- lexP
fromDistinctAscList `fmap` step readPrec
instance Semigroup CharSet where
(<>) = union
instance Monoid CharSet where
mempty = empty
mappend = union