module UCD.Parser.Common (
readCodePoint,
readCodePointM,
UnicodeRange (..),
parseRange,
pattern Comma,
pattern HashTag,
pattern NewLine,
pattern Period,
pattern SemiColon,
pattern Slash,
) where
import Data.ByteString qualified as B
import Data.ByteString.Char8 qualified as B8
import Data.Char (chr)
import Data.Word (Word8)
readCodePoint ∷ B.ByteString → Char
readCodePoint :: ByteString -> Char
readCodePoint = Int -> Char
chr (Int -> Char) -> (ByteString -> Int) -> ByteString -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Int
forall a. Read a => String -> a
read (String -> Int) -> (ByteString -> String) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
B8.unpack (ByteString -> String)
-> (ByteString -> ByteString) -> ByteString -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString
"0x" <>)
readCodePointM ∷ B.ByteString → Maybe Char
readCodePointM :: ByteString -> Maybe Char
readCodePointM ByteString
raw
| ByteString -> Bool
B.null ByteString
raw = Maybe Char
forall a. Maybe a
Nothing
| Bool
otherwise = Char -> Maybe Char
forall a. a -> Maybe a
Just (ByteString -> Char
readCodePoint ByteString
raw)
data UnicodeRange a
= SingleChar
{ forall a. UnicodeRange a -> Char
_first ∷ !Char
}
| CharRange
{ _first ∷ !Char
, forall a. UnicodeRange a -> Char
_last ∷ !Char
, forall a. UnicodeRange a -> a
_rangeName ∷ !a
}
deriving (UnicodeRange a -> UnicodeRange a -> Bool
(UnicodeRange a -> UnicodeRange a -> Bool)
-> (UnicodeRange a -> UnicodeRange a -> Bool)
-> Eq (UnicodeRange a)
forall a. Eq a => UnicodeRange a -> UnicodeRange a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall a. Eq a => UnicodeRange a -> UnicodeRange a -> Bool
== :: UnicodeRange a -> UnicodeRange a -> Bool
$c/= :: forall a. Eq a => UnicodeRange a -> UnicodeRange a -> Bool
/= :: UnicodeRange a -> UnicodeRange a -> Bool
Eq, Int -> UnicodeRange a -> ShowS
[UnicodeRange a] -> ShowS
UnicodeRange a -> String
(Int -> UnicodeRange a -> ShowS)
-> (UnicodeRange a -> String)
-> ([UnicodeRange a] -> ShowS)
-> Show (UnicodeRange a)
forall a. Show a => Int -> UnicodeRange a -> ShowS
forall a. Show a => [UnicodeRange a] -> ShowS
forall a. Show a => UnicodeRange a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> UnicodeRange a -> ShowS
showsPrec :: Int -> UnicodeRange a -> ShowS
$cshow :: forall a. Show a => UnicodeRange a -> String
show :: UnicodeRange a -> String
$cshowList :: forall a. Show a => [UnicodeRange a] -> ShowS
showList :: [UnicodeRange a] -> ShowS
Show)
parseRange ∷ B.ByteString → UnicodeRange ()
parseRange :: ByteString -> UnicodeRange ()
parseRange ByteString
raw = case (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
Period) ByteString
raw of
(ByteString -> Char
readCodePoint → Char
ch1, ByteString
rest)
| ByteString -> Bool
B.null ByteString
rest → Char -> UnicodeRange ()
forall a. Char -> UnicodeRange a
SingleChar Char
ch1
| Bool
otherwise → Char -> Char -> () -> UnicodeRange ()
forall a. Char -> Char -> a -> UnicodeRange a
CharRange Char
ch1 (ByteString -> Char
readCodePoint (Int -> ByteString -> ByteString
B.drop Int
2 ByteString
rest)) ()
pattern NewLine ∷ Word8
pattern $mNewLine :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bNewLine :: Word8
NewLine = 0x0a
pattern HashTag ∷ Word8
pattern $mHashTag :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bHashTag :: Word8
HashTag = 0x23
pattern Comma ∷ Word8
pattern $mComma :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bComma :: Word8
Comma = 0x2c
pattern Period ∷ Word8
pattern $mPeriod :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bPeriod :: Word8
Period = 0x2e
pattern Slash ∷ Word8
pattern $mSlash :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bSlash :: Word8
Slash = 0x2f
pattern SemiColon ∷ Word8
pattern $mSemiColon :: forall {r}. Word8 -> ((# #) -> r) -> ((# #) -> r) -> r
$bSemiColon :: Word8
SemiColon = 0x3b