{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
module Text.Collate.UnicodeData
( UChar(..),
GeneralCategory(..),
BidiClass(..),
DecompositionType(..),
parseUnicodeData,
toCanonicalCombiningClassMap,
toCanonicalDecompositionMap,
genCanonicalCombiningClassMap,
genCanonicalDecompositionMap,
readCodePoints,
)
where
import qualified Data.ByteString as B
import qualified Data.IntMap as M
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Read as TR
import GHC.Generics (Generic)
import Instances.TH.Lift ()
import Language.Haskell.TH
import Language.Haskell.TH.Syntax (qAddDependentFile)
unicodeDataPath :: FilePath
unicodeDataPath :: [Char]
unicodeDataPath = [Char]
"data/UnicodeData.txt"
readUtf8Text :: FilePath -> IO Text
readUtf8Text :: [Char] -> IO Text
readUtf8Text [Char]
fp = ByteString -> Text
TE.decodeUtf8 forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> IO ByteString
B.readFile [Char]
fp
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap = do
forall (m :: * -> *). Quasi m => [Char] -> m ()
qAddDependentFile [Char]
unicodeDataPath
IntMap Int
cccmap <-
IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. IO a -> Q a
runIO ([Char] -> IO Text
readUtf8Text [Char]
unicodeDataPath)
[|cccmap|]
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap = do
forall (m :: * -> *). Quasi m => [Char] -> m ()
qAddDependentFile [Char]
unicodeDataPath
IntMap [Int]
dmap <-
IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. IO a -> Q a
runIO ([Char] -> IO Text
readUtf8Text [Char]
unicodeDataPath)
[|dmap|]
parseUnicodeData :: Text -> M.IntMap UChar
parseUnicodeData :: Text -> IntMap UChar
parseUnicodeData = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> IntMap UChar -> IntMap UChar
parseLine forall a. Monoid a => a
mempty forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
toCanonicalCombiningClassMap :: M.IntMap UChar -> M.IntMap Int
toCanonicalCombiningClassMap :: IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap =
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> Int
uCanonicalCombiningClass forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter ((forall a. Ord a => a -> a -> Bool
> Int
0) forall b c a. (b -> c) -> (a -> b) -> a -> c
. UChar -> Int
uCanonicalCombiningClass)
toCanonicalDecompositionMap :: M.IntMap UChar -> M.IntMap [Int]
toCanonicalDecompositionMap :: IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap =
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> [Int]
uDecompositionMapping
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter
( \UChar
x ->
UChar -> DecompositionType
uDecompositionType UChar
x forall a. Eq a => a -> a -> Bool
== DecompositionType
Canonical
Bool -> Bool -> Bool
&& Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => t a -> Bool
null (UChar -> [Int]
uDecompositionMapping UChar
x))
)
data GeneralCategory
= Lu
| Ll
| Lt
| Lm
| Lo
| Mn
| Mc
| Me
| Nd
| Nl
| No
| Pc
| Pd
| Ps
| Pe
| Pi
| Pf
| Po
| Sm
| Sc
| Sk
| So
| Zs
| Zl
| Zp
| Cc
| Cf
| Cs
| Co
| Cn
deriving (Int -> GeneralCategory -> ShowS
[GeneralCategory] -> ShowS
GeneralCategory -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [GeneralCategory] -> ShowS
$cshowList :: [GeneralCategory] -> ShowS
show :: GeneralCategory -> [Char]
$cshow :: GeneralCategory -> [Char]
showsPrec :: Int -> GeneralCategory -> ShowS
$cshowsPrec :: Int -> GeneralCategory -> ShowS
Show, ReadPrec [GeneralCategory]
ReadPrec GeneralCategory
Int -> ReadS GeneralCategory
ReadS [GeneralCategory]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [GeneralCategory]
$creadListPrec :: ReadPrec [GeneralCategory]
readPrec :: ReadPrec GeneralCategory
$creadPrec :: ReadPrec GeneralCategory
readList :: ReadS [GeneralCategory]
$creadList :: ReadS [GeneralCategory]
readsPrec :: Int -> ReadS GeneralCategory
$creadsPrec :: Int -> ReadS GeneralCategory
Read, GeneralCategory -> GeneralCategory -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GeneralCategory -> GeneralCategory -> Bool
$c/= :: GeneralCategory -> GeneralCategory -> Bool
== :: GeneralCategory -> GeneralCategory -> Bool
$c== :: GeneralCategory -> GeneralCategory -> Bool
Eq, Eq GeneralCategory
GeneralCategory -> GeneralCategory -> Bool
GeneralCategory -> GeneralCategory -> Ordering
GeneralCategory -> GeneralCategory -> GeneralCategory
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmin :: GeneralCategory -> GeneralCategory -> GeneralCategory
max :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmax :: GeneralCategory -> GeneralCategory -> GeneralCategory
>= :: GeneralCategory -> GeneralCategory -> Bool
$c>= :: GeneralCategory -> GeneralCategory -> Bool
> :: GeneralCategory -> GeneralCategory -> Bool
$c> :: GeneralCategory -> GeneralCategory -> Bool
<= :: GeneralCategory -> GeneralCategory -> Bool
$c<= :: GeneralCategory -> GeneralCategory -> Bool
< :: GeneralCategory -> GeneralCategory -> Bool
$c< :: GeneralCategory -> GeneralCategory -> Bool
compare :: GeneralCategory -> GeneralCategory -> Ordering
$ccompare :: GeneralCategory -> GeneralCategory -> Ordering
Ord, Int -> GeneralCategory
GeneralCategory -> Int
GeneralCategory -> [GeneralCategory]
GeneralCategory -> GeneralCategory
GeneralCategory -> GeneralCategory -> [GeneralCategory]
GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFrom :: GeneralCategory -> [GeneralCategory]
$cenumFrom :: GeneralCategory -> [GeneralCategory]
fromEnum :: GeneralCategory -> Int
$cfromEnum :: GeneralCategory -> Int
toEnum :: Int -> GeneralCategory
$ctoEnum :: Int -> GeneralCategory
pred :: GeneralCategory -> GeneralCategory
$cpred :: GeneralCategory -> GeneralCategory
succ :: GeneralCategory -> GeneralCategory
$csucc :: GeneralCategory -> GeneralCategory
Enum, forall x. Rep GeneralCategory x -> GeneralCategory
forall x. GeneralCategory -> Rep GeneralCategory x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep GeneralCategory x -> GeneralCategory
$cfrom :: forall x. GeneralCategory -> Rep GeneralCategory x
Generic)
data BidiClass
= L
| LRE
| LRO
| R
| AL
| RLE
| RLO
| PDF
| EN
| ES
| ET
| AN
| CS
| NSM
| BN
| B
| S
| WS
| ON
| LRI
| RLI
| FSI
| PDI
deriving (Int -> BidiClass -> ShowS
[BidiClass] -> ShowS
BidiClass -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [BidiClass] -> ShowS
$cshowList :: [BidiClass] -> ShowS
show :: BidiClass -> [Char]
$cshow :: BidiClass -> [Char]
showsPrec :: Int -> BidiClass -> ShowS
$cshowsPrec :: Int -> BidiClass -> ShowS
Show, ReadPrec [BidiClass]
ReadPrec BidiClass
Int -> ReadS BidiClass
ReadS [BidiClass]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [BidiClass]
$creadListPrec :: ReadPrec [BidiClass]
readPrec :: ReadPrec BidiClass
$creadPrec :: ReadPrec BidiClass
readList :: ReadS [BidiClass]
$creadList :: ReadS [BidiClass]
readsPrec :: Int -> ReadS BidiClass
$creadsPrec :: Int -> ReadS BidiClass
Read, BidiClass -> BidiClass -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BidiClass -> BidiClass -> Bool
$c/= :: BidiClass -> BidiClass -> Bool
== :: BidiClass -> BidiClass -> Bool
$c== :: BidiClass -> BidiClass -> Bool
Eq, Eq BidiClass
BidiClass -> BidiClass -> Bool
BidiClass -> BidiClass -> Ordering
BidiClass -> BidiClass -> BidiClass
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: BidiClass -> BidiClass -> BidiClass
$cmin :: BidiClass -> BidiClass -> BidiClass
max :: BidiClass -> BidiClass -> BidiClass
$cmax :: BidiClass -> BidiClass -> BidiClass
>= :: BidiClass -> BidiClass -> Bool
$c>= :: BidiClass -> BidiClass -> Bool
> :: BidiClass -> BidiClass -> Bool
$c> :: BidiClass -> BidiClass -> Bool
<= :: BidiClass -> BidiClass -> Bool
$c<= :: BidiClass -> BidiClass -> Bool
< :: BidiClass -> BidiClass -> Bool
$c< :: BidiClass -> BidiClass -> Bool
compare :: BidiClass -> BidiClass -> Ordering
$ccompare :: BidiClass -> BidiClass -> Ordering
Ord, Int -> BidiClass
BidiClass -> Int
BidiClass -> [BidiClass]
BidiClass -> BidiClass
BidiClass -> BidiClass -> [BidiClass]
BidiClass -> BidiClass -> BidiClass -> [BidiClass]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
$cenumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
enumFromTo :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromTo :: BidiClass -> BidiClass -> [BidiClass]
enumFromThen :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromThen :: BidiClass -> BidiClass -> [BidiClass]
enumFrom :: BidiClass -> [BidiClass]
$cenumFrom :: BidiClass -> [BidiClass]
fromEnum :: BidiClass -> Int
$cfromEnum :: BidiClass -> Int
toEnum :: Int -> BidiClass
$ctoEnum :: Int -> BidiClass
pred :: BidiClass -> BidiClass
$cpred :: BidiClass -> BidiClass
succ :: BidiClass -> BidiClass
$csucc :: BidiClass -> BidiClass
Enum, forall x. Rep BidiClass x -> BidiClass
forall x. BidiClass -> Rep BidiClass x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BidiClass x -> BidiClass
$cfrom :: forall x. BidiClass -> Rep BidiClass x
Generic)
data DecompositionType
= Font
| NoBreak
| Initial
| Medial
| Final
| Isolated
| Circle
| Super
| Sub
| Vertical
| Wide
| Narrow
| Small
| Square
| Fraction
| Compat
| Canonical
deriving (Int -> DecompositionType -> ShowS
[DecompositionType] -> ShowS
DecompositionType -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [DecompositionType] -> ShowS
$cshowList :: [DecompositionType] -> ShowS
show :: DecompositionType -> [Char]
$cshow :: DecompositionType -> [Char]
showsPrec :: Int -> DecompositionType -> ShowS
$cshowsPrec :: Int -> DecompositionType -> ShowS
Show, ReadPrec [DecompositionType]
ReadPrec DecompositionType
Int -> ReadS DecompositionType
ReadS [DecompositionType]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DecompositionType]
$creadListPrec :: ReadPrec [DecompositionType]
readPrec :: ReadPrec DecompositionType
$creadPrec :: ReadPrec DecompositionType
readList :: ReadS [DecompositionType]
$creadList :: ReadS [DecompositionType]
readsPrec :: Int -> ReadS DecompositionType
$creadsPrec :: Int -> ReadS DecompositionType
Read, DecompositionType -> DecompositionType -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecompositionType -> DecompositionType -> Bool
$c/= :: DecompositionType -> DecompositionType -> Bool
== :: DecompositionType -> DecompositionType -> Bool
$c== :: DecompositionType -> DecompositionType -> Bool
Eq, Eq DecompositionType
DecompositionType -> DecompositionType -> Bool
DecompositionType -> DecompositionType -> Ordering
DecompositionType -> DecompositionType -> DecompositionType
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: DecompositionType -> DecompositionType -> DecompositionType
$cmin :: DecompositionType -> DecompositionType -> DecompositionType
max :: DecompositionType -> DecompositionType -> DecompositionType
$cmax :: DecompositionType -> DecompositionType -> DecompositionType
>= :: DecompositionType -> DecompositionType -> Bool
$c>= :: DecompositionType -> DecompositionType -> Bool
> :: DecompositionType -> DecompositionType -> Bool
$c> :: DecompositionType -> DecompositionType -> Bool
<= :: DecompositionType -> DecompositionType -> Bool
$c<= :: DecompositionType -> DecompositionType -> Bool
< :: DecompositionType -> DecompositionType -> Bool
$c< :: DecompositionType -> DecompositionType -> Bool
compare :: DecompositionType -> DecompositionType -> Ordering
$ccompare :: DecompositionType -> DecompositionType -> Ordering
Ord, Int -> DecompositionType
DecompositionType -> Int
DecompositionType -> [DecompositionType]
DecompositionType -> DecompositionType
DecompositionType -> DecompositionType -> [DecompositionType]
DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
enumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFrom :: DecompositionType -> [DecompositionType]
$cenumFrom :: DecompositionType -> [DecompositionType]
fromEnum :: DecompositionType -> Int
$cfromEnum :: DecompositionType -> Int
toEnum :: Int -> DecompositionType
$ctoEnum :: Int -> DecompositionType
pred :: DecompositionType -> DecompositionType
$cpred :: DecompositionType -> DecompositionType
succ :: DecompositionType -> DecompositionType
$csucc :: DecompositionType -> DecompositionType
Enum, forall x. Rep DecompositionType x -> DecompositionType
forall x. DecompositionType -> Rep DecompositionType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep DecompositionType x -> DecompositionType
$cfrom :: forall x. DecompositionType -> Rep DecompositionType x
Generic)
data UChar = UChar
{ UChar -> Int
uCodePoint :: Int,
UChar -> Text
uName :: Text,
UChar -> GeneralCategory
uGeneralCategory :: GeneralCategory,
UChar -> Int
uCanonicalCombiningClass :: Int,
UChar -> BidiClass
uBidiClass :: BidiClass,
UChar -> DecompositionType
uDecompositionType :: DecompositionType,
UChar -> [Int]
uDecompositionMapping :: [Int],
UChar -> (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int),
UChar -> Bool
uBidiMirrored :: Bool,
UChar -> Text
uUnicode1Name :: Text,
:: Text,
UChar -> Int
uSimpleUppercaseMapping :: Int,
UChar -> Int
uSimpleLowercaseMapping :: Int,
UChar -> Int
uSimpleTitlecaseMappping :: Int
}
deriving (Int -> UChar -> ShowS
[UChar] -> ShowS
UChar -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [UChar] -> ShowS
$cshowList :: [UChar] -> ShowS
show :: UChar -> [Char]
$cshow :: UChar -> [Char]
showsPrec :: Int -> UChar -> ShowS
$cshowsPrec :: Int -> UChar -> ShowS
Show, UChar -> UChar -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UChar -> UChar -> Bool
$c/= :: UChar -> UChar -> Bool
== :: UChar -> UChar -> Bool
$c== :: UChar -> UChar -> Bool
Eq, Eq UChar
UChar -> UChar -> Bool
UChar -> UChar -> Ordering
UChar -> UChar -> UChar
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: UChar -> UChar -> UChar
$cmin :: UChar -> UChar -> UChar
max :: UChar -> UChar -> UChar
$cmax :: UChar -> UChar -> UChar
>= :: UChar -> UChar -> Bool
$c>= :: UChar -> UChar -> Bool
> :: UChar -> UChar -> Bool
$c> :: UChar -> UChar -> Bool
<= :: UChar -> UChar -> Bool
$c<= :: UChar -> UChar -> Bool
< :: UChar -> UChar -> Bool
$c< :: UChar -> UChar -> Bool
compare :: UChar -> UChar -> Ordering
$ccompare :: UChar -> UChar -> Ordering
Ord, forall x. Rep UChar x -> UChar
forall x. UChar -> Rep UChar x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep UChar x -> UChar
$cfrom :: forall x. UChar -> Rep UChar x
Generic)
readCodePoint :: Text -> Int
readCodePoint :: Text -> Int
readCodePoint Text
t =
case forall a. Integral a => Reader a
TR.hexadecimal Text
t of
Left [Char]
e -> forall a. HasCallStack => [Char] -> a
error [Char]
e
Right (Int
codepoint, Text
_) -> Int
codepoint
readCodePoints :: Text -> ([Int], Text)
readCodePoints :: Text -> ([Int], Text)
readCodePoints Text
t =
case forall a. Integral a => Reader a
TR.hexadecimal Text
t of
Left [Char]
_ -> ([], Text
t)
Right (Int
codepoint, Text
rest) ->
let ([Int]
cps, Text
t') = Text -> ([Int], Text)
readCodePoints ((Char -> Bool) -> Text -> Text
T.dropWhile (forall a. Eq a => a -> a -> Bool
== Char
' ') Text
rest)
in (Int
codepoint forall a. a -> [a] -> [a]
: [Int]
cps, Text
t')
parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp Text
bs =
case Text -> Maybe (Char, Text)
T.uncons Text
bs of
Just (Char
'<', Text
rest) -> (DecompositionType
ty, [Int]
xs)
where
xs :: [Int]
xs = forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
cps
(Text
x, Text
y) = (Char -> Bool) -> Text -> (Text, Text)
T.break (forall a. Eq a => a -> a -> Bool
== Char
'>') Text
rest
cps :: Text
cps = (Char -> Bool) -> Text -> Text
T.dropWhile (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
'>' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
' ') Text
y
ty :: DecompositionType
ty = case Text
x of
Text
"font" -> DecompositionType
Font
Text
"noBreak" -> DecompositionType
NoBreak
Text
"initial" -> DecompositionType
Initial
Text
"medial" -> DecompositionType
Medial
Text
"final" -> DecompositionType
Final
Text
"isolate" -> DecompositionType
Isolated
Text
"circle" -> DecompositionType
Circle
Text
"super" -> DecompositionType
Super
Text
"sub" -> DecompositionType
Sub
Text
"vertical" -> DecompositionType
Vertical
Text
"wide" -> DecompositionType
Wide
Text
"narrow" -> DecompositionType
Narrow
Text
"small" -> DecompositionType
Small
Text
"square" -> DecompositionType
Square
Text
"fraction" -> DecompositionType
Fraction
Text
"compat" -> DecompositionType
Compat
Text
_ -> DecompositionType
Compat
Maybe (Char, Text)
_ -> (DecompositionType
Canonical,) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
bs
parseLine :: Text -> M.IntMap UChar -> M.IntMap UChar
parseLine :: Text -> IntMap UChar -> IntMap UChar
parseLine Text
t =
case Text -> Text -> [Text]
T.splitOn Text
";" Text
t of
[Text
f0, Text
f1, Text
f2, Text
f3, Text
f4, Text
f5, Text
f6, Text
f7, Text
f8, Text
f9, Text
f10, Text
f11, Text
f12, Text
f13, Text
f14] ->
forall a. Int -> a -> IntMap a -> IntMap a
M.insert Int
codepoint UChar
uchar
where
codepoint :: Int
codepoint = Text -> Int
readCodePoint Text
f0
(DecompositionType
decompType, [Int]
decompMapping) = Text -> (DecompositionType, [Int])
parseDecomp Text
f5
readNumericValue :: Text -> Maybe a
readNumericValue Text
x =
case forall a. Integral a => Reader a
TR.decimal Text
x of
Left [Char]
_ -> forall a. Maybe a
Nothing
Right (a
v, Text
_) -> forall a. a -> Maybe a
Just a
v
uchar :: UChar
uchar =
UChar
{ uCodePoint :: Int
uCodePoint = Int
codepoint,
uName :: Text
uName = Text
f1,
uGeneralCategory :: GeneralCategory
uGeneralCategory = forall a. Read a => [Char] -> a
read (Text -> [Char]
T.unpack Text
f2),
uCanonicalCombiningClass :: Int
uCanonicalCombiningClass = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const Int
0) forall a b. (a, b) -> a
fst (forall a. Integral a => Reader a
TR.decimal Text
f3),
uBidiClass :: BidiClass
uBidiClass = forall a. Read a => [Char] -> a
read (Text -> [Char]
T.unpack Text
f4),
uDecompositionType :: DecompositionType
uDecompositionType = DecompositionType
decompType,
uDecompositionMapping :: [Int]
uDecompositionMapping = [Int]
decompMapping,
uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue =
( forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f6,
forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f7,
forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f8
),
uBidiMirrored :: Bool
uBidiMirrored = Text
f9 forall a. Eq a => a -> a -> Bool
== Text
"Y",
uUnicode1Name :: Text
uUnicode1Name = Text
f10,
uISOComment :: Text
uISOComment = Text
f11,
uSimpleUppercaseMapping :: Int
uSimpleUppercaseMapping = Text -> Int
readCodePoint Text
f12,
uSimpleLowercaseMapping :: Int
uSimpleLowercaseMapping = Text -> Int
readCodePoint Text
f13,
uSimpleTitlecaseMappping :: Int
uSimpleTitlecaseMappping = Text -> Int
readCodePoint Text
f14
}
[Text]
_ -> forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"Wrong number of fields in record:\n" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Text
t