{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
module Text.Collate.UnicodeData
( UChar(..),
GeneralCategory(..),
BidiClass(..),
DecompositionType(..),
parseUnicodeData,
toCanonicalCombiningClassMap,
toCanonicalDecompositionMap,
genCanonicalCombiningClassMap,
genCanonicalDecompositionMap,
readCodePoints,
)
where
import qualified Data.ByteString as B
import qualified Data.IntMap as M
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Read as TR
import GHC.Generics (Generic)
import Instances.TH.Lift ()
import Language.Haskell.TH
import Language.Haskell.TH.Syntax (qAddDependentFile)
unicodeDataPath :: FilePath
unicodeDataPath :: FilePath
unicodeDataPath = FilePath
"data/UnicodeData.txt"
readUtf8Text :: FilePath -> IO Text
readUtf8Text :: FilePath -> IO Text
readUtf8Text FilePath
fp = ByteString -> Text
TE.decodeUtf8 (ByteString -> Text) -> IO ByteString -> IO Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> IO ByteString
B.readFile FilePath
fp
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap = do
FilePath -> Q ()
forall (m :: * -> *). Quasi m => FilePath -> m ()
qAddDependentFile FilePath
unicodeDataPath
IntMap Int
cccmap <-
IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap (IntMap UChar -> IntMap Int)
-> (Text -> IntMap UChar) -> Text -> IntMap Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
(Text -> IntMap Int) -> Q Text -> Q (IntMap Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO Text -> Q Text
forall a. IO a -> Q a
runIO (FilePath -> IO Text
readUtf8Text FilePath
unicodeDataPath)
[|cccmap|]
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap = do
FilePath -> Q ()
forall (m :: * -> *). Quasi m => FilePath -> m ()
qAddDependentFile FilePath
unicodeDataPath
IntMap [Int]
dmap <-
IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap (IntMap UChar -> IntMap [Int])
-> (Text -> IntMap UChar) -> Text -> IntMap [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
(Text -> IntMap [Int]) -> Q Text -> Q (IntMap [Int])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO Text -> Q Text
forall a. IO a -> Q a
runIO (FilePath -> IO Text
readUtf8Text FilePath
unicodeDataPath)
[|dmap|]
parseUnicodeData :: Text -> M.IntMap UChar
parseUnicodeData :: Text -> IntMap UChar
parseUnicodeData = (Text -> IntMap UChar -> IntMap UChar)
-> IntMap UChar -> [Text] -> IntMap UChar
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> IntMap UChar -> IntMap UChar
parseLine IntMap UChar
forall a. Monoid a => a
mempty ([Text] -> IntMap UChar)
-> (Text -> [Text]) -> Text -> IntMap UChar
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
toCanonicalCombiningClassMap :: M.IntMap UChar -> M.IntMap Int
toCanonicalCombiningClassMap :: IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap =
(UChar -> Int) -> IntMap UChar -> IntMap Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> Int
uCanonicalCombiningClass (IntMap UChar -> IntMap Int)
-> (IntMap UChar -> IntMap UChar) -> IntMap UChar -> IntMap Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (UChar -> Bool) -> IntMap UChar -> IntMap UChar
forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (Int -> Bool) -> (UChar -> Int) -> UChar -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UChar -> Int
uCanonicalCombiningClass)
toCanonicalDecompositionMap :: M.IntMap UChar -> M.IntMap [Int]
toCanonicalDecompositionMap :: IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap =
(UChar -> [Int]) -> IntMap UChar -> IntMap [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> [Int]
uDecompositionMapping
(IntMap UChar -> IntMap [Int])
-> (IntMap UChar -> IntMap UChar) -> IntMap UChar -> IntMap [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (UChar -> Bool) -> IntMap UChar -> IntMap UChar
forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter
( \UChar
x ->
UChar -> DecompositionType
uDecompositionType UChar
x DecompositionType -> DecompositionType -> Bool
forall a. Eq a => a -> a -> Bool
== DecompositionType
Canonical
Bool -> Bool -> Bool
&& Bool -> Bool
not ([Int] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (UChar -> [Int]
uDecompositionMapping UChar
x))
)
data GeneralCategory
= Lu
| Ll
| Lt
| Lm
| Lo
| Mn
| Mc
| Me
| Nd
| Nl
| No
| Pc
| Pd
| Ps
| Pe
| Pi
| Pf
| Po
| Sm
| Sc
| Sk
| So
| Zs
| Zl
| Zp
| Cc
| Cf
| Cs
| Co
| Cn
deriving (Int -> GeneralCategory -> ShowS
[GeneralCategory] -> ShowS
GeneralCategory -> FilePath
(Int -> GeneralCategory -> ShowS)
-> (GeneralCategory -> FilePath)
-> ([GeneralCategory] -> ShowS)
-> Show GeneralCategory
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [GeneralCategory] -> ShowS
$cshowList :: [GeneralCategory] -> ShowS
show :: GeneralCategory -> FilePath
$cshow :: GeneralCategory -> FilePath
showsPrec :: Int -> GeneralCategory -> ShowS
$cshowsPrec :: Int -> GeneralCategory -> ShowS
Show, ReadPrec [GeneralCategory]
ReadPrec GeneralCategory
Int -> ReadS GeneralCategory
ReadS [GeneralCategory]
(Int -> ReadS GeneralCategory)
-> ReadS [GeneralCategory]
-> ReadPrec GeneralCategory
-> ReadPrec [GeneralCategory]
-> Read GeneralCategory
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [GeneralCategory]
$creadListPrec :: ReadPrec [GeneralCategory]
readPrec :: ReadPrec GeneralCategory
$creadPrec :: ReadPrec GeneralCategory
readList :: ReadS [GeneralCategory]
$creadList :: ReadS [GeneralCategory]
readsPrec :: Int -> ReadS GeneralCategory
$creadsPrec :: Int -> ReadS GeneralCategory
Read, GeneralCategory -> GeneralCategory -> Bool
(GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> Eq GeneralCategory
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GeneralCategory -> GeneralCategory -> Bool
$c/= :: GeneralCategory -> GeneralCategory -> Bool
== :: GeneralCategory -> GeneralCategory -> Bool
$c== :: GeneralCategory -> GeneralCategory -> Bool
Eq, Eq GeneralCategory
Eq GeneralCategory
-> (GeneralCategory -> GeneralCategory -> Ordering)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> Bool)
-> (GeneralCategory -> GeneralCategory -> GeneralCategory)
-> (GeneralCategory -> GeneralCategory -> GeneralCategory)
-> Ord GeneralCategory
GeneralCategory -> GeneralCategory -> Bool
GeneralCategory -> GeneralCategory -> Ordering
GeneralCategory -> GeneralCategory -> GeneralCategory
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmin :: GeneralCategory -> GeneralCategory -> GeneralCategory
max :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmax :: GeneralCategory -> GeneralCategory -> GeneralCategory
>= :: GeneralCategory -> GeneralCategory -> Bool
$c>= :: GeneralCategory -> GeneralCategory -> Bool
> :: GeneralCategory -> GeneralCategory -> Bool
$c> :: GeneralCategory -> GeneralCategory -> Bool
<= :: GeneralCategory -> GeneralCategory -> Bool
$c<= :: GeneralCategory -> GeneralCategory -> Bool
< :: GeneralCategory -> GeneralCategory -> Bool
$c< :: GeneralCategory -> GeneralCategory -> Bool
compare :: GeneralCategory -> GeneralCategory -> Ordering
$ccompare :: GeneralCategory -> GeneralCategory -> Ordering
$cp1Ord :: Eq GeneralCategory
Ord, Int -> GeneralCategory
GeneralCategory -> Int
GeneralCategory -> [GeneralCategory]
GeneralCategory -> GeneralCategory
GeneralCategory -> GeneralCategory -> [GeneralCategory]
GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
(GeneralCategory -> GeneralCategory)
-> (GeneralCategory -> GeneralCategory)
-> (Int -> GeneralCategory)
-> (GeneralCategory -> Int)
-> (GeneralCategory -> [GeneralCategory])
-> (GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> (GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> (GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory])
-> Enum GeneralCategory
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFrom :: GeneralCategory -> [GeneralCategory]
$cenumFrom :: GeneralCategory -> [GeneralCategory]
fromEnum :: GeneralCategory -> Int
$cfromEnum :: GeneralCategory -> Int
toEnum :: Int -> GeneralCategory
$ctoEnum :: Int -> GeneralCategory
pred :: GeneralCategory -> GeneralCategory
$cpred :: GeneralCategory -> GeneralCategory
succ :: GeneralCategory -> GeneralCategory
$csucc :: GeneralCategory -> GeneralCategory
Enum, (forall x. GeneralCategory -> Rep GeneralCategory x)
-> (forall x. Rep GeneralCategory x -> GeneralCategory)
-> Generic GeneralCategory
forall x. Rep GeneralCategory x -> GeneralCategory
forall x. GeneralCategory -> Rep GeneralCategory x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep GeneralCategory x -> GeneralCategory
$cfrom :: forall x. GeneralCategory -> Rep GeneralCategory x
Generic)
data BidiClass
= L
| LRE
| LRO
| R
| AL
| RLE
| RLO
| PDF
| EN
| ES
| ET
| AN
| CS
| NSM
| BN
| B
| S
| WS
| ON
| LRI
| RLI
| FSI
| PDI
deriving (Int -> BidiClass -> ShowS
[BidiClass] -> ShowS
BidiClass -> FilePath
(Int -> BidiClass -> ShowS)
-> (BidiClass -> FilePath)
-> ([BidiClass] -> ShowS)
-> Show BidiClass
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [BidiClass] -> ShowS
$cshowList :: [BidiClass] -> ShowS
show :: BidiClass -> FilePath
$cshow :: BidiClass -> FilePath
showsPrec :: Int -> BidiClass -> ShowS
$cshowsPrec :: Int -> BidiClass -> ShowS
Show, ReadPrec [BidiClass]
ReadPrec BidiClass
Int -> ReadS BidiClass
ReadS [BidiClass]
(Int -> ReadS BidiClass)
-> ReadS [BidiClass]
-> ReadPrec BidiClass
-> ReadPrec [BidiClass]
-> Read BidiClass
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [BidiClass]
$creadListPrec :: ReadPrec [BidiClass]
readPrec :: ReadPrec BidiClass
$creadPrec :: ReadPrec BidiClass
readList :: ReadS [BidiClass]
$creadList :: ReadS [BidiClass]
readsPrec :: Int -> ReadS BidiClass
$creadsPrec :: Int -> ReadS BidiClass
Read, BidiClass -> BidiClass -> Bool
(BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool) -> Eq BidiClass
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BidiClass -> BidiClass -> Bool
$c/= :: BidiClass -> BidiClass -> Bool
== :: BidiClass -> BidiClass -> Bool
$c== :: BidiClass -> BidiClass -> Bool
Eq, Eq BidiClass
Eq BidiClass
-> (BidiClass -> BidiClass -> Ordering)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> Bool)
-> (BidiClass -> BidiClass -> BidiClass)
-> (BidiClass -> BidiClass -> BidiClass)
-> Ord BidiClass
BidiClass -> BidiClass -> Bool
BidiClass -> BidiClass -> Ordering
BidiClass -> BidiClass -> BidiClass
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: BidiClass -> BidiClass -> BidiClass
$cmin :: BidiClass -> BidiClass -> BidiClass
max :: BidiClass -> BidiClass -> BidiClass
$cmax :: BidiClass -> BidiClass -> BidiClass
>= :: BidiClass -> BidiClass -> Bool
$c>= :: BidiClass -> BidiClass -> Bool
> :: BidiClass -> BidiClass -> Bool
$c> :: BidiClass -> BidiClass -> Bool
<= :: BidiClass -> BidiClass -> Bool
$c<= :: BidiClass -> BidiClass -> Bool
< :: BidiClass -> BidiClass -> Bool
$c< :: BidiClass -> BidiClass -> Bool
compare :: BidiClass -> BidiClass -> Ordering
$ccompare :: BidiClass -> BidiClass -> Ordering
$cp1Ord :: Eq BidiClass
Ord, Int -> BidiClass
BidiClass -> Int
BidiClass -> [BidiClass]
BidiClass -> BidiClass
BidiClass -> BidiClass -> [BidiClass]
BidiClass -> BidiClass -> BidiClass -> [BidiClass]
(BidiClass -> BidiClass)
-> (BidiClass -> BidiClass)
-> (Int -> BidiClass)
-> (BidiClass -> Int)
-> (BidiClass -> [BidiClass])
-> (BidiClass -> BidiClass -> [BidiClass])
-> (BidiClass -> BidiClass -> [BidiClass])
-> (BidiClass -> BidiClass -> BidiClass -> [BidiClass])
-> Enum BidiClass
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
$cenumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
enumFromTo :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromTo :: BidiClass -> BidiClass -> [BidiClass]
enumFromThen :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromThen :: BidiClass -> BidiClass -> [BidiClass]
enumFrom :: BidiClass -> [BidiClass]
$cenumFrom :: BidiClass -> [BidiClass]
fromEnum :: BidiClass -> Int
$cfromEnum :: BidiClass -> Int
toEnum :: Int -> BidiClass
$ctoEnum :: Int -> BidiClass
pred :: BidiClass -> BidiClass
$cpred :: BidiClass -> BidiClass
succ :: BidiClass -> BidiClass
$csucc :: BidiClass -> BidiClass
Enum, (forall x. BidiClass -> Rep BidiClass x)
-> (forall x. Rep BidiClass x -> BidiClass) -> Generic BidiClass
forall x. Rep BidiClass x -> BidiClass
forall x. BidiClass -> Rep BidiClass x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BidiClass x -> BidiClass
$cfrom :: forall x. BidiClass -> Rep BidiClass x
Generic)
data DecompositionType
= Font
| NoBreak
| Initial
| Medial
| Final
| Isolated
| Circle
| Super
| Sub
| Vertical
| Wide
| Narrow
| Small
| Square
| Fraction
| Compat
| Canonical
deriving (Int -> DecompositionType -> ShowS
[DecompositionType] -> ShowS
DecompositionType -> FilePath
(Int -> DecompositionType -> ShowS)
-> (DecompositionType -> FilePath)
-> ([DecompositionType] -> ShowS)
-> Show DecompositionType
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [DecompositionType] -> ShowS
$cshowList :: [DecompositionType] -> ShowS
show :: DecompositionType -> FilePath
$cshow :: DecompositionType -> FilePath
showsPrec :: Int -> DecompositionType -> ShowS
$cshowsPrec :: Int -> DecompositionType -> ShowS
Show, ReadPrec [DecompositionType]
ReadPrec DecompositionType
Int -> ReadS DecompositionType
ReadS [DecompositionType]
(Int -> ReadS DecompositionType)
-> ReadS [DecompositionType]
-> ReadPrec DecompositionType
-> ReadPrec [DecompositionType]
-> Read DecompositionType
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DecompositionType]
$creadListPrec :: ReadPrec [DecompositionType]
readPrec :: ReadPrec DecompositionType
$creadPrec :: ReadPrec DecompositionType
readList :: ReadS [DecompositionType]
$creadList :: ReadS [DecompositionType]
readsPrec :: Int -> ReadS DecompositionType
$creadsPrec :: Int -> ReadS DecompositionType
Read, DecompositionType -> DecompositionType -> Bool
(DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> Eq DecompositionType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecompositionType -> DecompositionType -> Bool
$c/= :: DecompositionType -> DecompositionType -> Bool
== :: DecompositionType -> DecompositionType -> Bool
$c== :: DecompositionType -> DecompositionType -> Bool
Eq, Eq DecompositionType
Eq DecompositionType
-> (DecompositionType -> DecompositionType -> Ordering)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> Bool)
-> (DecompositionType -> DecompositionType -> DecompositionType)
-> (DecompositionType -> DecompositionType -> DecompositionType)
-> Ord DecompositionType
DecompositionType -> DecompositionType -> Bool
DecompositionType -> DecompositionType -> Ordering
DecompositionType -> DecompositionType -> DecompositionType
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: DecompositionType -> DecompositionType -> DecompositionType
$cmin :: DecompositionType -> DecompositionType -> DecompositionType
max :: DecompositionType -> DecompositionType -> DecompositionType
$cmax :: DecompositionType -> DecompositionType -> DecompositionType
>= :: DecompositionType -> DecompositionType -> Bool
$c>= :: DecompositionType -> DecompositionType -> Bool
> :: DecompositionType -> DecompositionType -> Bool
$c> :: DecompositionType -> DecompositionType -> Bool
<= :: DecompositionType -> DecompositionType -> Bool
$c<= :: DecompositionType -> DecompositionType -> Bool
< :: DecompositionType -> DecompositionType -> Bool
$c< :: DecompositionType -> DecompositionType -> Bool
compare :: DecompositionType -> DecompositionType -> Ordering
$ccompare :: DecompositionType -> DecompositionType -> Ordering
$cp1Ord :: Eq DecompositionType
Ord, Int -> DecompositionType
DecompositionType -> Int
DecompositionType -> [DecompositionType]
DecompositionType -> DecompositionType
DecompositionType -> DecompositionType -> [DecompositionType]
DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
(DecompositionType -> DecompositionType)
-> (DecompositionType -> DecompositionType)
-> (Int -> DecompositionType)
-> (DecompositionType -> Int)
-> (DecompositionType -> [DecompositionType])
-> (DecompositionType -> DecompositionType -> [DecompositionType])
-> (DecompositionType -> DecompositionType -> [DecompositionType])
-> (DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType])
-> Enum DecompositionType
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
enumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFrom :: DecompositionType -> [DecompositionType]
$cenumFrom :: DecompositionType -> [DecompositionType]
fromEnum :: DecompositionType -> Int
$cfromEnum :: DecompositionType -> Int
toEnum :: Int -> DecompositionType
$ctoEnum :: Int -> DecompositionType
pred :: DecompositionType -> DecompositionType
$cpred :: DecompositionType -> DecompositionType
succ :: DecompositionType -> DecompositionType
$csucc :: DecompositionType -> DecompositionType
Enum, (forall x. DecompositionType -> Rep DecompositionType x)
-> (forall x. Rep DecompositionType x -> DecompositionType)
-> Generic DecompositionType
forall x. Rep DecompositionType x -> DecompositionType
forall x. DecompositionType -> Rep DecompositionType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep DecompositionType x -> DecompositionType
$cfrom :: forall x. DecompositionType -> Rep DecompositionType x
Generic)
data UChar = UChar
{ UChar -> Int
uCodePoint :: Int,
UChar -> Text
uName :: Text,
UChar -> GeneralCategory
uGeneralCategory :: GeneralCategory,
UChar -> Int
uCanonicalCombiningClass :: Int,
UChar -> BidiClass
uBidiClass :: BidiClass,
UChar -> DecompositionType
uDecompositionType :: DecompositionType,
UChar -> [Int]
uDecompositionMapping :: [Int],
UChar -> (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int),
UChar -> Bool
uBidiMirrored :: Bool,
UChar -> Text
uUnicode1Name :: Text,
:: Text,
UChar -> Int
uSimpleUppercaseMapping :: Int,
UChar -> Int
uSimpleLowercaseMapping :: Int,
UChar -> Int
uSimpleTitlecaseMappping :: Int
}
deriving (Int -> UChar -> ShowS
[UChar] -> ShowS
UChar -> FilePath
(Int -> UChar -> ShowS)
-> (UChar -> FilePath) -> ([UChar] -> ShowS) -> Show UChar
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [UChar] -> ShowS
$cshowList :: [UChar] -> ShowS
show :: UChar -> FilePath
$cshow :: UChar -> FilePath
showsPrec :: Int -> UChar -> ShowS
$cshowsPrec :: Int -> UChar -> ShowS
Show, UChar -> UChar -> Bool
(UChar -> UChar -> Bool) -> (UChar -> UChar -> Bool) -> Eq UChar
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UChar -> UChar -> Bool
$c/= :: UChar -> UChar -> Bool
== :: UChar -> UChar -> Bool
$c== :: UChar -> UChar -> Bool
Eq, Eq UChar
Eq UChar
-> (UChar -> UChar -> Ordering)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> Bool)
-> (UChar -> UChar -> UChar)
-> (UChar -> UChar -> UChar)
-> Ord UChar
UChar -> UChar -> Bool
UChar -> UChar -> Ordering
UChar -> UChar -> UChar
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: UChar -> UChar -> UChar
$cmin :: UChar -> UChar -> UChar
max :: UChar -> UChar -> UChar
$cmax :: UChar -> UChar -> UChar
>= :: UChar -> UChar -> Bool
$c>= :: UChar -> UChar -> Bool
> :: UChar -> UChar -> Bool
$c> :: UChar -> UChar -> Bool
<= :: UChar -> UChar -> Bool
$c<= :: UChar -> UChar -> Bool
< :: UChar -> UChar -> Bool
$c< :: UChar -> UChar -> Bool
compare :: UChar -> UChar -> Ordering
$ccompare :: UChar -> UChar -> Ordering
$cp1Ord :: Eq UChar
Ord, (forall x. UChar -> Rep UChar x)
-> (forall x. Rep UChar x -> UChar) -> Generic UChar
forall x. Rep UChar x -> UChar
forall x. UChar -> Rep UChar x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep UChar x -> UChar
$cfrom :: forall x. UChar -> Rep UChar x
Generic)
readCodePoint :: Text -> Int
readCodePoint :: Text -> Int
readCodePoint Text
t =
case Reader Int
forall a. Integral a => Reader a
TR.hexadecimal Text
t of
Left FilePath
e -> FilePath -> Int
forall a. HasCallStack => FilePath -> a
error FilePath
e
Right (Int
codepoint, Text
_) -> Int
codepoint
readCodePoints :: Text -> ([Int], Text)
readCodePoints :: Text -> ([Int], Text)
readCodePoints Text
t =
case Reader Int
forall a. Integral a => Reader a
TR.hexadecimal Text
t of
Left FilePath
_ -> ([], Text
t)
Right (Int
codepoint, Text
rest) ->
let ([Int]
cps, Text
t') = Text -> ([Int], Text)
readCodePoints ((Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') Text
rest)
in (Int
codepoint Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
cps, Text
t')
parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp Text
bs =
case Text -> Maybe (Char, Text)
T.uncons Text
bs of
Just (Char
'<', Text
rest) -> (DecompositionType
ty, [Int]
xs)
where
xs :: [Int]
xs = ([Int], Text) -> [Int]
forall a b. (a, b) -> a
fst (([Int], Text) -> [Int]) -> ([Int], Text) -> [Int]
forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
cps
(Text
x, Text
y) = (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>') Text
rest
cps :: Text
cps = (Char -> Bool) -> Text -> Text
T.dropWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') Text
y
ty :: DecompositionType
ty = case Text
x of
Text
"font" -> DecompositionType
Font
Text
"noBreak" -> DecompositionType
NoBreak
Text
"initial" -> DecompositionType
Initial
Text
"medial" -> DecompositionType
Medial
Text
"final" -> DecompositionType
Final
Text
"isolate" -> DecompositionType
Isolated
Text
"circle" -> DecompositionType
Circle
Text
"super" -> DecompositionType
Super
Text
"sub" -> DecompositionType
Sub
Text
"vertical" -> DecompositionType
Vertical
Text
"wide" -> DecompositionType
Wide
Text
"narrow" -> DecompositionType
Narrow
Text
"small" -> DecompositionType
Small
Text
"square" -> DecompositionType
Square
Text
"fraction" -> DecompositionType
Fraction
Text
"compat" -> DecompositionType
Compat
Text
_ -> DecompositionType
Compat
Maybe (Char, Text)
_ -> (DecompositionType
Canonical,) ([Int] -> (DecompositionType, [Int]))
-> (([Int], Text) -> [Int])
-> ([Int], Text)
-> (DecompositionType, [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Int], Text) -> [Int]
forall a b. (a, b) -> a
fst (([Int], Text) -> (DecompositionType, [Int]))
-> ([Int], Text) -> (DecompositionType, [Int])
forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
bs
parseLine :: Text -> M.IntMap UChar -> M.IntMap UChar
parseLine :: Text -> IntMap UChar -> IntMap UChar
parseLine Text
t =
case Text -> Text -> [Text]
T.splitOn Text
";" Text
t of
[Text
f0, Text
f1, Text
f2, Text
f3, Text
f4, Text
f5, Text
f6, Text
f7, Text
f8, Text
f9, Text
f10, Text
f11, Text
f12, Text
f13, Text
f14] ->
Int -> UChar -> IntMap UChar -> IntMap UChar
forall a. Int -> a -> IntMap a -> IntMap a
M.insert Int
codepoint UChar
uchar
where
codepoint :: Int
codepoint = Text -> Int
readCodePoint Text
f0
(DecompositionType
decompType, [Int]
decompMapping) = Text -> (DecompositionType, [Int])
parseDecomp Text
f5
readNumericValue :: Text -> Maybe a
readNumericValue Text
x =
case Reader a
forall a. Integral a => Reader a
TR.decimal Text
x of
Left FilePath
_ -> Maybe a
forall a. Maybe a
Nothing
Right (a
v, Text
_) -> a -> Maybe a
forall a. a -> Maybe a
Just a
v
uchar :: UChar
uchar =
UChar :: Int
-> Text
-> GeneralCategory
-> Int
-> BidiClass
-> DecompositionType
-> [Int]
-> (Maybe Int, Maybe Int, Maybe Int)
-> Bool
-> Text
-> Text
-> Int
-> Int
-> Int
-> UChar
UChar
{ uCodePoint :: Int
uCodePoint = Int
codepoint,
uName :: Text
uName = Text
f1,
uGeneralCategory :: GeneralCategory
uGeneralCategory = FilePath -> GeneralCategory
forall a. Read a => FilePath -> a
read (Text -> FilePath
T.unpack Text
f2),
uCanonicalCombiningClass :: Int
uCanonicalCombiningClass = (FilePath -> Int)
-> ((Int, Text) -> Int) -> Either FilePath (Int, Text) -> Int
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Int -> FilePath -> Int
forall a b. a -> b -> a
const Int
0) (Int, Text) -> Int
forall a b. (a, b) -> a
fst (Reader Int
forall a. Integral a => Reader a
TR.decimal Text
f3),
uBidiClass :: BidiClass
uBidiClass = FilePath -> BidiClass
forall a. Read a => FilePath -> a
read (Text -> FilePath
T.unpack Text
f4),
uDecompositionType :: DecompositionType
uDecompositionType = DecompositionType
decompType,
uDecompositionMapping :: [Int]
uDecompositionMapping = [Int]
decompMapping,
uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue =
( Text -> Maybe Int
forall a. Integral a => Text -> Maybe a
readNumericValue Text
f6,
Text -> Maybe Int
forall a. Integral a => Text -> Maybe a
readNumericValue Text
f7,
Text -> Maybe Int
forall a. Integral a => Text -> Maybe a
readNumericValue Text
f8
),
uBidiMirrored :: Bool
uBidiMirrored = Text
f9 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"Y",
uUnicode1Name :: Text
uUnicode1Name = Text
f10,
uISOComment :: Text
uISOComment = Text
f11,
uSimpleUppercaseMapping :: Int
uSimpleUppercaseMapping = Text -> Int
readCodePoint Text
f12,
uSimpleLowercaseMapping :: Int
uSimpleLowercaseMapping = Text -> Int
readCodePoint Text
f13,
uSimpleTitlecaseMappping :: Int
uSimpleTitlecaseMappping = Text -> Int
readCodePoint Text
f14
}
[Text]
_ -> FilePath -> IntMap UChar -> IntMap UChar
forall a. HasCallStack => FilePath -> a
error (FilePath -> IntMap UChar -> IntMap UChar)
-> FilePath -> IntMap UChar -> IntMap UChar
forall a b. (a -> b) -> a -> b
$ FilePath
"Wrong number of fields in record:\n" FilePath -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> FilePath
forall a. Show a => a -> FilePath
show Text
t