module Language.Lexer.Tlex.Plugin.Encoding.UTF8 (
charSetPUtf8,
) where
import Language.Lexer.Tlex.Prelude
import qualified Data.CharSet as CharSet
import qualified Data.EnumMap.Strict as EnumMap
import qualified Data.EnumSet as EnumSet
import qualified Data.IntSet as IntSet
import qualified Language.Lexer.Tlex.Data.NonEmptyEnumStringSet as NonEmptyEnumStringSet
import qualified Language.Lexer.Tlex.Plugin.Encoding.CharSetP as CharSetP
import qualified Language.Lexer.Tlex.Syntax as Tlex
charSetPUtf8 :: CharSetP.CharSetEncoder m => CharSetP.CharSetP m
charSetPUtf8 :: forall (m :: * -> *). CharSetEncoder m => CharSetP m
charSetPUtf8 = CharSetP.CharSetP
{ $sel:charSetEncodingP:CharSetP :: CharSet -> m Pattern
CharSetP.charSetEncodingP = \case
CharSet.CharSet Bool
True ByteSet
_ IntSet
is -> forall {m :: * -> *}. CharSetEncoder m => IntSet -> m Pattern
goStraight IntSet
is
CharSet.CharSet Bool
False ByteSet
_ IntSet
is -> forall {m :: * -> *}. CharSetEncoder m => IntSet -> m Pattern
goComplement IntSet
is
}
where
goStraight :: IntSet -> m Pattern
goStraight IntSet
is = do
NonEmptyEnumStringSet Word8
bsSet <- forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is
forall (f :: * -> *) a. Applicative f => a -> f a
pure do forall {k}. Enum k => NonEmptyEnumStringSet k -> Pattern k
straightP NonEmptyEnumStringSet Word8
bsSet
straightP :: NonEmptyEnumStringSet k -> Pattern k
straightP NonEmptyEnumStringSet k
s =
let singleByteP :: Pattern k
singleByteP = forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP do
forall a. NonEmptyEnumStringSet a -> EnumSet a
NonEmptyEnumStringSet.singleEnums NonEmptyEnumStringSet k
s
in forall e. Enum e => [Pattern e] -> Pattern e
Tlex.orP do
Pattern k
singleBytePforall a. a -> [a] -> [a]
:
[ forall e. Enum e => [e] -> Pattern e
Tlex.enumsP [k
c] forall a. Semigroup a => a -> a -> a
<> NonEmptyEnumStringSet k -> Pattern k
straightP NonEmptyEnumStringSet k
s'
| (k
c, NonEmptyEnumStringSet k
s') <- forall k a. Enum k => EnumMap k a -> [(k, a)]
EnumMap.assocs do
forall a.
NonEmptyEnumStringSet a -> EnumMap a (NonEmptyEnumStringSet a)
NonEmptyEnumStringSet.enumStrings NonEmptyEnumStringSet k
s
]
goComplement :: IntSet -> m Pattern
goComplement IntSet
is = do
NonEmptyEnumStringSet Word8
bsSet <- forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is
forall (f :: * -> *) a. Applicative f => a -> f a
pure do NonEmptyEnumStringSet Word8 -> Pattern
complementPFromEnumStrings NonEmptyEnumStringSet Word8
bsSet
charSetToByteStringSetUtf8 :: CharSetP.CharSetEncoder m
=> IntSet.IntSet -> m (NonEmptyEnumStringSet.NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 :: forall (m :: * -> *).
CharSetEncoder m =>
IntSet -> m (NonEmptyEnumStringSet Word8)
charSetToByteStringSetUtf8 IntSet
is = forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM
do \NonEmptyEnumStringSet Word8
s Int
c -> forall {f :: * -> *}.
CharSetEncoder f =>
NonEmptyEnumStringSet Word8
-> Int -> f (NonEmptyEnumStringSet Word8)
foldStep NonEmptyEnumStringSet Word8
s Int
c
do forall a. Enum a => NonEmptyEnumStringSet a
NonEmptyEnumStringSet.empty
do IntSet -> [Int]
IntSet.toAscList IntSet
is
where
foldStep :: NonEmptyEnumStringSet Word8
-> Int -> f (NonEmptyEnumStringSet Word8)
foldStep NonEmptyEnumStringSet Word8
s Int
c = if
| Int
c forall a. Ord a => a -> a -> Bool
<= Int
0x7F -> forall (f :: * -> *) a. Applicative f => a -> f a
pure do
forall a.
Enum a =>
a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insertSingleByte
do forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c
do NonEmptyEnumStringSet Word8
s
| Int
c forall a. Ord a => a -> a -> Bool
<= Int
0x7FF ->
let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
1
in forall (f :: * -> *) a. Applicative f => a -> f a
pure do
forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
do (Word8
0xC0 forall a. Num a => a -> a -> a
+ Word8
c') forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
do NonEmptyEnumStringSet Word8
s
| Int
0xD800 forall a. Ord a => a -> a -> Bool
<= Int
c Bool -> Bool -> Bool
&& Int
c forall a. Ord a => a -> a -> Bool
<= Int
0xDFFF -> do
forall (m :: * -> *). CharSetEncoder m => EncodeWarning -> m ()
CharSetP.reportEncodeWarning
do Char -> EncodeWarning
CharSetP.NotSupportedChar do forall a. Enum a => Int -> a
toEnum Int
c
forall (f :: * -> *) a. Applicative f => a -> f a
pure NonEmptyEnumStringSet Word8
s
| Int
c forall a. Ord a => a -> a -> Bool
<= Int
0xFFFF ->
let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
2
in forall (f :: * -> *) a. Applicative f => a -> f a
pure do
forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
do (Word8
0xE0 forall a. Num a => a -> a -> a
+ Word8
c') forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
do NonEmptyEnumStringSet Word8
s
| Bool
otherwise ->
let (Word8
c', [Word8]
l) = Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
3
in forall (f :: * -> *) a. Applicative f => a -> f a
pure do
forall a.
Enum a =>
NonEmpty a -> NonEmptyEnumStringSet a -> NonEmptyEnumStringSet a
NonEmptyEnumStringSet.insert
do (Word8
0xF0 forall a. Num a => a -> a -> a
+ Word8
c') forall a. a -> [a] -> NonEmpty a
:| [Word8]
l
do NonEmptyEnumStringSet Word8
s
stringTails :: Int -> Int -> (Word8, [Word8])
stringTails :: Int -> Int -> (Word8, [Word8])
stringTails Int
c Int
n = forall {t} {t} {a} {a}.
(Integral t, Num t, Num a, Num a, Eq t) =>
t -> [a] -> t -> (a, [a])
stringTails' Int
c [] Int
n
stringTails' :: t -> [a] -> t -> (a, [a])
stringTails' t
c [a]
l = \case
t
0 -> (forall a b. (Integral a, Num b) => a -> b
fromIntegral t
c, [a]
l)
t
n ->
let (t
c', t
x) = forall a. Integral a => a -> a -> (a, a)
quotRem t
c t
0x40
x' :: a
x' = forall a b. (Integral a, Num b) => a -> b
fromIntegral do t
0x80 forall a. Num a => a -> a -> a
+ t
x
in t -> [a] -> t -> (a, [a])
stringTails' t
c'
do a
x' forall a. a -> [a] -> [a]
: [a]
l
do t
n forall a. Num a => a -> a -> a
- t
1
complementPFromEnumStrings
:: NonEmptyEnumStringSet.NonEmptyEnumStringSet Word8 -> Tlex.Pattern Word8
NonEmptyEnumStringSet Word8
ess0 = forall e. Enum e => [Pattern e] -> Pattern e
Tlex.orP
[ [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr1es] []
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr2es, EnumSet Word8
seqes] [Pattern
seqesP]
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p1o1es, EnumSet Word8
pr3p1o2es, EnumSet Word8
seqes]
[ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p1o1es
, forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p1o2es
, Pattern
seqesP
]
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
[ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p2es
, Pattern
seqesP
, Pattern
seqesP
]
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p3o1es, EnumSet Word8
pr3p3o2es, EnumSet Word8
seqes]
[ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p3o1es
, forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p3o2es
, Pattern
seqesP
]
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr3p4es, EnumSet Word8
seqes, EnumSet Word8
seqes]
[ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr3p4es
, Pattern
seqesP
, Pattern
seqesP
]
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p1o1es, EnumSet Word8
pr4p1o2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
[ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o1es
, forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o2es
, Pattern
seqesP
, Pattern
seqesP
]
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p2es, EnumSet Word8
seqes, EnumSet Word8
seqes, EnumSet Word8
seqes]
[ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p1o1es
, Pattern
seqesP
, Pattern
seqesP
, Pattern
seqesP
]
, [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8
pr4p3o1es, EnumSet Word8
pr4p3o2es, EnumSet Word8
seqes, EnumSet Word8
seqes]
[ forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p3o1es
, forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
pr4p3o2es
, Pattern
seqesP
, Pattern
seqesP
]
]
where
seqes :: EnumSet Word8
seqes = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x80..Word8
0xBF]
seqesP :: Pattern
seqesP = forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet Word8
seqes
pr1es :: EnumSet Word8
pr1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x00..Word8
0x7F]
pr2es :: EnumSet Word8
pr2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xC2..Word8
0xDF]
pr3p1o1es :: EnumSet Word8
pr3p1o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xE0]
pr3p1o2es :: EnumSet Word8
pr3p1o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xA0..Word8
0xBF]
pr3p2es :: EnumSet Word8
pr3p2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xE1..Word8
0xEC]
pr3p3o1es :: EnumSet Word8
pr3p3o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xED]
pr3p3o2es :: EnumSet Word8
pr3p3o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x80..Word8
0x9F]
pr3p4es :: EnumSet Word8
pr3p4es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xEE..Word8
0xEF]
pr4p1o1es :: EnumSet Word8
pr4p1o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xF0]
pr4p1o2es :: EnumSet Word8
pr4p1o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x90..Word8
0xBF]
pr4p2es :: EnumSet Word8
pr4p2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xF1..Word8
0xF3]
pr4p3o1es :: EnumSet Word8
pr4p3o1es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0xF4]
pr4p3o2es :: EnumSet Word8
pr4p3o2es = forall k. Enum k => [k] -> EnumSet k
EnumSet.fromList @Word8 [Word8
0x80..Word8
0x8F]
go :: [EnumSet Word8] -> [Pattern] -> Pattern
go [EnumSet Word8]
bess [Pattern]
restPs = forall {k}.
Enum k =>
[EnumSet k] -> [Pattern k] -> NonEmptyEnumStringSet k -> Pattern k
go' [EnumSet Word8]
bess [Pattern]
restPs NonEmptyEnumStringSet Word8
ess0
go' :: [EnumSet k] -> [Pattern k] -> NonEmptyEnumStringSet k -> Pattern k
go' [EnumSet k]
bess [Pattern k]
restPs NonEmptyEnumStringSet k
ess = case [EnumSet k]
bess of
[] -> forall a. Monoid a => a
mempty
[EnumSet k
bes] -> forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP
do EnumSet k
bes forall k. EnumSet k -> EnumSet k -> EnumSet k
`EnumSet.difference` forall a. NonEmptyEnumStringSet a -> EnumSet a
NonEmptyEnumStringSet.singleEnums NonEmptyEnumStringSet k
ess
EnumSet k
bes:[EnumSet k]
bess2 ->
let mess :: EnumMap k (NonEmptyEnumStringSet k)
mess = forall a.
NonEmptyEnumStringSet a -> EnumMap a (NonEmptyEnumStringSet a)
NonEmptyEnumStringSet.enumStrings NonEmptyEnumStringSet k
ess
(EnumSet k
nes, EnumSet k
ces) = forall k.
Enum k =>
(k -> Bool) -> EnumSet k -> (EnumSet k, EnumSet k)
EnumSet.partition
do \k
be -> forall k a. Enum k => k -> EnumMap k a -> Bool
EnumMap.member k
be EnumMap k (NonEmptyEnumStringSet k)
mess
EnumSet k
bes
cesP :: Pattern k
cesP = forall e. Enum e => EnumSet e -> Pattern e
Tlex.straightEnumSetP EnumSet k
ces forall a. Semigroup a => a -> a -> a
<> forall a. Monoid a => [a] -> a
mconcat [Pattern k]
restPs
in forall e. Enum e => [Pattern e] -> Pattern e
Tlex.orP do
Pattern k
cesPforall a. a -> [a] -> [a]
:
[ [EnumSet k] -> [Pattern k] -> NonEmptyEnumStringSet k -> Pattern k
go' [EnumSet k]
bess2 [Pattern k]
nrestPs NonEmptyEnumStringSet k
ness
| k
ne <- forall k. Enum k => EnumSet k -> [k]
EnumSet.toList EnumSet k
nes
, let ness :: NonEmptyEnumStringSet k
ness = case forall k a. Enum k => k -> EnumMap k a -> Maybe a
EnumMap.lookup k
ne EnumMap k (NonEmptyEnumStringSet k)
mess of
Just NonEmptyEnumStringSet k
x -> NonEmptyEnumStringSet k
x
Maybe (NonEmptyEnumStringSet k)
Nothing -> forall a. HasCallStack => [Char] -> a
error [Char]
"unreachable"
, let nrestPs :: [Pattern k]
nrestPs = case [Pattern k]
restPs of
[] -> []
Pattern k
_:[Pattern k]
xs -> [Pattern k]
xs
]