#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702
#endif
#if MIN_VERSION_base(4,7,0)
#endif
module Data.CharSet
(
CharSet(..)
, (\\)
, null
, size
, member
, notMember
, overlaps, isSubsetOf
, isComplemented
, build
, empty
, singleton
, full
, insert
, delete
, complement
, range
, union
, intersection
, difference
, filter
, partition
, map
, fold
, toList
, fromList
, toAscList
, fromAscList
, fromDistinctAscList
, fromCharSet
, toCharSet
, toArray
) where
import Data.Array.Unboxed hiding (range)
import Data.Data
import Data.Function (on)
import Data.IntSet (IntSet)
import Data.CharSet.ByteSet (ByteSet)
import qualified Data.CharSet.ByteSet as ByteSet
import Data.Bits hiding (complement)
import Data.Word
import Data.ByteString.Internal (c2w)
import Data.Semigroup
import qualified Data.IntSet as I
import qualified Data.List as L
import Prelude hiding (filter, map, null)
import qualified Prelude as P
import Text.Read
data CharSet = CharSet !Bool !ByteSet !IntSet
charSet :: Bool -> IntSet -> CharSet
charSet b s = CharSet b (ByteSet.fromList (fmap headByte (I.toAscList s))) s
headByte :: Int -> Word8
headByte i
| i <= 0x7f = toEnum i
| i <= 0x7ff = toEnum $ 0x80 + (i `shiftR` 6)
| i <= 0xffff = toEnum $ 0xe0 + (i `shiftR` 12)
| otherwise = toEnum $ 0xf0 + (i `shiftR` 18)
pos :: IntSet -> CharSet
pos = charSet True
neg :: IntSet -> CharSet
neg = charSet False
(\\) :: CharSet -> CharSet -> CharSet
(\\) = difference
build :: (Char -> Bool) -> CharSet
build p = fromDistinctAscList $ P.filter p [minBound .. maxBound]
map :: (Char -> Char) -> CharSet -> CharSet
map f (CharSet True _ i) = pos (I.map (fromEnum . f . toEnum) i)
map f (CharSet False _ i) = fromList $ P.map f $ P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
isComplemented :: CharSet -> Bool
isComplemented (CharSet True _ _) = False
isComplemented (CharSet False _ _) = True
toList :: CharSet -> String
toList (CharSet True _ i) = P.map toEnum (I.toList i)
toList (CharSet False _ i) = P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
toAscList :: CharSet -> String
toAscList (CharSet True _ i) = P.map toEnum (I.toAscList i)
toAscList (CharSet False _ i) = P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
empty :: CharSet
empty = pos I.empty
singleton :: Char -> CharSet
singleton = pos . I.singleton . fromEnum
full :: CharSet
full = neg I.empty
null :: CharSet -> Bool
null (CharSet True _ i) = I.null i
null (CharSet False _ i) = I.size i == numChars
size :: CharSet -> Int
size (CharSet True _ i) = I.size i
size (CharSet False _ i) = numChars I.size i
insert :: Char -> CharSet -> CharSet
insert c (CharSet True _ i) = pos (I.insert (fromEnum c) i)
insert c (CharSet False _ i) = neg (I.delete (fromEnum c) i)
range :: Char -> Char -> CharSet
range a b
| a <= b = fromDistinctAscList [a..b]
| otherwise = empty
delete :: Char -> CharSet -> CharSet
delete c (CharSet True _ i) = pos (I.delete (fromEnum c) i)
delete c (CharSet False _ i) = neg (I.insert (fromEnum c) i)
complement :: CharSet -> CharSet
complement (CharSet True s i) = CharSet False s i
complement (CharSet False s i) = CharSet True s i
union :: CharSet -> CharSet -> CharSet
union (CharSet True _ i) (CharSet True _ j) = pos (I.union i j)
union (CharSet True _ i) (CharSet False _ j) = neg (I.difference j i)
union (CharSet False _ i) (CharSet True _ j) = neg (I.difference i j)
union (CharSet False _ i) (CharSet False _ j) = neg (I.intersection i j)
intersection :: CharSet -> CharSet -> CharSet
intersection (CharSet True _ i) (CharSet True _ j) = pos (I.intersection i j)
intersection (CharSet True _ i) (CharSet False _ j) = pos (I.difference i j)
intersection (CharSet False _ i) (CharSet True _ j) = pos (I.difference j i)
intersection (CharSet False _ i) (CharSet False _ j) = neg (I.union i j)
difference :: CharSet -> CharSet -> CharSet
difference (CharSet True _ i) (CharSet True _ j) = pos (I.difference i j)
difference (CharSet True _ i) (CharSet False _ j) = pos (I.intersection i j)
difference (CharSet False _ i) (CharSet True _ j) = neg (I.union i j)
difference (CharSet False _ i) (CharSet False _ j) = pos (I.difference j i)
member :: Char -> CharSet -> Bool
member c (CharSet True b i)
| c <= toEnum 0x7f = ByteSet.member (c2w c) b
| otherwise = I.member (fromEnum c) i
member c (CharSet False b i)
| c <= toEnum 0x7f = not (ByteSet.member (c2w c) b)
| otherwise = I.notMember (fromEnum c) i
notMember :: Char -> CharSet -> Bool
notMember c s = not (member c s)
fold :: (Char -> b -> b) -> b -> CharSet -> b
fold f z (CharSet True _ i) = I.fold (f . toEnum) z i
fold f z (CharSet False _ i) = foldr f z $ P.filter (\x -> fromEnum x `I.notMember` i) [ul..uh]
filter :: (Char -> Bool) -> CharSet -> CharSet
filter p (CharSet True _ i) = pos (I.filter (p . toEnum) i)
filter p (CharSet False _ i) = neg $ foldr (I.insert) i $ P.filter (\x -> (x `I.notMember` i) && not (p (toEnum x))) [ol..oh]
partition :: (Char -> Bool) -> CharSet -> (CharSet, CharSet)
partition p (CharSet True _ i) = (pos l, pos r)
where (l,r) = I.partition (p . toEnum) i
partition p (CharSet False _ i) = (neg (foldr I.insert i l), neg (foldr I.insert i r))
where (l,r) = L.partition (p . toEnum) $ P.filter (\x -> x `I.notMember` i) [ol..oh]
overlaps :: CharSet -> CharSet -> Bool
overlaps (CharSet True _ i) (CharSet True _ j) = not (I.null (I.intersection i j))
overlaps (CharSet True _ i) (CharSet False _ j) = not (I.isSubsetOf j i)
overlaps (CharSet False _ i) (CharSet True _ j) = not (I.isSubsetOf i j)
overlaps (CharSet False _ i) (CharSet False _ j) = any (\x -> I.notMember x i && I.notMember x j) [ol..oh]
isSubsetOf :: CharSet -> CharSet -> Bool
isSubsetOf (CharSet True _ i) (CharSet True _ j) = I.isSubsetOf i j
isSubsetOf (CharSet True _ i) (CharSet False _ j) = I.null (I.intersection i j)
isSubsetOf (CharSet False _ i) (CharSet True _ j) = all (\x -> I.member x i && I.member x j) [ol..oh]
isSubsetOf (CharSet False _ i) (CharSet False _ j) = I.isSubsetOf j i
fromList :: String -> CharSet
fromList = pos . I.fromList . P.map fromEnum
fromAscList :: String -> CharSet
fromAscList = pos . I.fromAscList . P.map fromEnum
fromDistinctAscList :: String -> CharSet
fromDistinctAscList = pos . I.fromDistinctAscList . P.map fromEnum
ul, uh :: Char
ul = minBound
uh = maxBound
ol, oh :: Int
ol = fromEnum ul
oh = fromEnum uh
numChars :: Int
numChars = oh ol + 1
#if MIN_VERSION_base(4,7,0)
deriving instance Typeable CharSet
#else
instance Typeable CharSet where
typeOf _ = mkTyConApp charSetTyCon []
charSetTyCon :: TyCon
#if MIN_VERSION_base(4,4,0)
charSetTyCon = mkTyCon3 "charset" "Data.CharSet" "CharSet"
#else
charSetTyCon = mkTyCon "Data.CharSet.CharSet"
#endif
#endif
instance Data CharSet where
gfoldl k z set
| isComplemented set = z complement `k` complement set
| otherwise = z fromList `k` toList set
toConstr set
| isComplemented set = complementConstr
| otherwise = fromListConstr
dataTypeOf _ = charSetDataType
gunfold k z c = case constrIndex c of
1 -> k (z fromList)
2 -> k (z complement)
_ -> error "gunfold"
fromListConstr :: Constr
fromListConstr = mkConstr charSetDataType "fromList" [] Prefix
complementConstr :: Constr
complementConstr = mkConstr charSetDataType "complement" [] Prefix
charSetDataType :: DataType
charSetDataType = mkDataType "Data.CharSet.CharSet" [fromListConstr, complementConstr]
fromCharSet :: CharSet -> (Bool, IntSet)
fromCharSet (CharSet b _ i) = (b, i)
toCharSet :: IntSet -> CharSet
toCharSet = pos
instance Eq CharSet where
(==) = (==) `on` toAscList
instance Ord CharSet where
compare = compare `on` toAscList
instance Bounded CharSet where
minBound = empty
maxBound = full
toArray :: CharSet -> UArray Char Bool
toArray set = array (minBound, maxBound) $ fmap (\x -> (x, x `member` set)) [minBound .. maxBound]
instance Show CharSet where
showsPrec d i
| isComplemented i = showParen (d > 10) $ showString "complement " . showsPrec 11 (complement i)
| otherwise = showParen (d > 10) $ showString "fromDistinctAscList " . showsPrec 11 (toAscList i)
instance Read CharSet where
readPrec = parens $ complemented +++ normal
where
complemented = prec 10 $ do
Ident "complement" <- lexP
complement `fmap` step readPrec
normal = prec 10 $ do
Ident "fromDistinctAscList" <- lexP
fromDistinctAscList `fmap` step readPrec
instance Semigroup CharSet where
(<>) = union
instance Monoid CharSet where
mempty = empty
mappend = union