{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}

-- | Data types for unicode character data and functions for
-- extracting it from @UnicodeData.txt@.
module Text.Collate.UnicodeData
  ( UChar(..),
    GeneralCategory(..),
    BidiClass(..),
    DecompositionType(..),
    parseUnicodeData,
    toCanonicalCombiningClassMap,
    toCanonicalDecompositionMap,
    genCanonicalCombiningClassMap,
    genCanonicalDecompositionMap,
    readCodePoints,
  )
where

import qualified Data.ByteString as B
import qualified Data.IntMap as M
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Read as TR
import GHC.Generics (Generic)
import Instances.TH.Lift ()
import Language.Haskell.TH
import Language.Haskell.TH.Syntax (qAddDependentFile)

-- | Path to @UnicodeData.txt@.
unicodeDataPath :: FilePath
unicodeDataPath :: [Char]
unicodeDataPath = [Char]
"data/UnicodeData.txt"

readUtf8Text :: FilePath -> IO Text
readUtf8Text :: [Char] -> IO Text
readUtf8Text [Char]
fp = ByteString -> Text
TE.decodeUtf8 forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> IO ByteString
B.readFile [Char]
fp

-- | Generate map of code points to canonical combining class,
-- from @UnicodeData.txt@.
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap :: Q Exp
genCanonicalCombiningClassMap = do
  forall (m :: * -> *). Quasi m => [Char] -> m ()
qAddDependentFile [Char]
unicodeDataPath
  IntMap Int
cccmap <-
    IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
      forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. IO a -> Q a
runIO ([Char] -> IO Text
readUtf8Text [Char]
unicodeDataPath)
  [|cccmap|]

-- | Generate map of code points to canonical decompositions,
-- from @UnicodeData.txt@.
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap :: Q Exp
genCanonicalDecompositionMap = do
  forall (m :: * -> *). Quasi m => [Char] -> m ()
qAddDependentFile [Char]
unicodeDataPath
  IntMap [Int]
dmap <-
    IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> IntMap UChar
parseUnicodeData
      forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. IO a -> Q a
runIO ([Char] -> IO Text
readUtf8Text [Char]
unicodeDataPath)
  [|dmap|]

-- | Parse @UnicodeData.txt@ into a map of 'UChar' records.
parseUnicodeData :: Text -> M.IntMap UChar
parseUnicodeData :: Text -> IntMap UChar
parseUnicodeData = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Text -> IntMap UChar -> IntMap UChar
parseLine forall a. Monoid a => a
mempty forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines

-- | Convert unicode data to a map from code points to canonical combining
-- classes.
toCanonicalCombiningClassMap :: M.IntMap UChar -> M.IntMap Int
toCanonicalCombiningClassMap :: IntMap UChar -> IntMap Int
toCanonicalCombiningClassMap =
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> Int
uCanonicalCombiningClass forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter ((forall a. Ord a => a -> a -> Bool
> Int
0) forall b c a. (b -> c) -> (a -> b) -> a -> c
. UChar -> Int
uCanonicalCombiningClass)

-- | Convert unicode data to a map from code points to canonical decompositions.
toCanonicalDecompositionMap :: M.IntMap UChar -> M.IntMap [Int]
toCanonicalDecompositionMap :: IntMap UChar -> IntMap [Int]
toCanonicalDecompositionMap =
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap UChar -> [Int]
uDecompositionMapping
    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> IntMap a -> IntMap a
M.filter
      ( \UChar
x ->
          UChar -> DecompositionType
uDecompositionType UChar
x forall a. Eq a => a -> a -> Bool
== DecompositionType
Canonical
            Bool -> Bool -> Bool
&& Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => t a -> Bool
null (UChar -> [Int]
uDecompositionMapping UChar
x))
      )

data GeneralCategory
  = Lu
  | Ll
  | Lt
  | Lm
  | Lo
  | Mn
  | Mc
  | Me
  | Nd
  | Nl
  | No
  | Pc
  | Pd
  | Ps
  | Pe
  | Pi
  | Pf
  | Po
  | Sm
  | Sc
  | Sk
  | So
  | Zs
  | Zl
  | Zp
  | Cc
  | Cf
  | Cs
  | Co
  | Cn
  deriving (Int -> GeneralCategory -> ShowS
[GeneralCategory] -> ShowS
GeneralCategory -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [GeneralCategory] -> ShowS
$cshowList :: [GeneralCategory] -> ShowS
show :: GeneralCategory -> [Char]
$cshow :: GeneralCategory -> [Char]
showsPrec :: Int -> GeneralCategory -> ShowS
$cshowsPrec :: Int -> GeneralCategory -> ShowS
Show, ReadPrec [GeneralCategory]
ReadPrec GeneralCategory
Int -> ReadS GeneralCategory
ReadS [GeneralCategory]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [GeneralCategory]
$creadListPrec :: ReadPrec [GeneralCategory]
readPrec :: ReadPrec GeneralCategory
$creadPrec :: ReadPrec GeneralCategory
readList :: ReadS [GeneralCategory]
$creadList :: ReadS [GeneralCategory]
readsPrec :: Int -> ReadS GeneralCategory
$creadsPrec :: Int -> ReadS GeneralCategory
Read, GeneralCategory -> GeneralCategory -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: GeneralCategory -> GeneralCategory -> Bool
$c/= :: GeneralCategory -> GeneralCategory -> Bool
== :: GeneralCategory -> GeneralCategory -> Bool
$c== :: GeneralCategory -> GeneralCategory -> Bool
Eq, Eq GeneralCategory
GeneralCategory -> GeneralCategory -> Bool
GeneralCategory -> GeneralCategory -> Ordering
GeneralCategory -> GeneralCategory -> GeneralCategory
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmin :: GeneralCategory -> GeneralCategory -> GeneralCategory
max :: GeneralCategory -> GeneralCategory -> GeneralCategory
$cmax :: GeneralCategory -> GeneralCategory -> GeneralCategory
>= :: GeneralCategory -> GeneralCategory -> Bool
$c>= :: GeneralCategory -> GeneralCategory -> Bool
> :: GeneralCategory -> GeneralCategory -> Bool
$c> :: GeneralCategory -> GeneralCategory -> Bool
<= :: GeneralCategory -> GeneralCategory -> Bool
$c<= :: GeneralCategory -> GeneralCategory -> Bool
< :: GeneralCategory -> GeneralCategory -> Bool
$c< :: GeneralCategory -> GeneralCategory -> Bool
compare :: GeneralCategory -> GeneralCategory -> Ordering
$ccompare :: GeneralCategory -> GeneralCategory -> Ordering
Ord, Int -> GeneralCategory
GeneralCategory -> Int
GeneralCategory -> [GeneralCategory]
GeneralCategory -> GeneralCategory
GeneralCategory -> GeneralCategory -> [GeneralCategory]
GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThenTo :: GeneralCategory
-> GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromTo :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
$cenumFromThen :: GeneralCategory -> GeneralCategory -> [GeneralCategory]
enumFrom :: GeneralCategory -> [GeneralCategory]
$cenumFrom :: GeneralCategory -> [GeneralCategory]
fromEnum :: GeneralCategory -> Int
$cfromEnum :: GeneralCategory -> Int
toEnum :: Int -> GeneralCategory
$ctoEnum :: Int -> GeneralCategory
pred :: GeneralCategory -> GeneralCategory
$cpred :: GeneralCategory -> GeneralCategory
succ :: GeneralCategory -> GeneralCategory
$csucc :: GeneralCategory -> GeneralCategory
Enum, forall x. Rep GeneralCategory x -> GeneralCategory
forall x. GeneralCategory -> Rep GeneralCategory x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep GeneralCategory x -> GeneralCategory
$cfrom :: forall x. GeneralCategory -> Rep GeneralCategory x
Generic)

data BidiClass
  = L
  | LRE
  | LRO
  | R
  | AL
  | RLE
  | RLO
  | PDF
  | EN
  | ES
  | ET
  | AN
  | CS
  | NSM
  | BN
  | B
  | S
  | WS
  | ON
  | LRI
  | RLI
  | FSI
  | PDI
  deriving (Int -> BidiClass -> ShowS
[BidiClass] -> ShowS
BidiClass -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [BidiClass] -> ShowS
$cshowList :: [BidiClass] -> ShowS
show :: BidiClass -> [Char]
$cshow :: BidiClass -> [Char]
showsPrec :: Int -> BidiClass -> ShowS
$cshowsPrec :: Int -> BidiClass -> ShowS
Show, ReadPrec [BidiClass]
ReadPrec BidiClass
Int -> ReadS BidiClass
ReadS [BidiClass]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [BidiClass]
$creadListPrec :: ReadPrec [BidiClass]
readPrec :: ReadPrec BidiClass
$creadPrec :: ReadPrec BidiClass
readList :: ReadS [BidiClass]
$creadList :: ReadS [BidiClass]
readsPrec :: Int -> ReadS BidiClass
$creadsPrec :: Int -> ReadS BidiClass
Read, BidiClass -> BidiClass -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: BidiClass -> BidiClass -> Bool
$c/= :: BidiClass -> BidiClass -> Bool
== :: BidiClass -> BidiClass -> Bool
$c== :: BidiClass -> BidiClass -> Bool
Eq, Eq BidiClass
BidiClass -> BidiClass -> Bool
BidiClass -> BidiClass -> Ordering
BidiClass -> BidiClass -> BidiClass
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: BidiClass -> BidiClass -> BidiClass
$cmin :: BidiClass -> BidiClass -> BidiClass
max :: BidiClass -> BidiClass -> BidiClass
$cmax :: BidiClass -> BidiClass -> BidiClass
>= :: BidiClass -> BidiClass -> Bool
$c>= :: BidiClass -> BidiClass -> Bool
> :: BidiClass -> BidiClass -> Bool
$c> :: BidiClass -> BidiClass -> Bool
<= :: BidiClass -> BidiClass -> Bool
$c<= :: BidiClass -> BidiClass -> Bool
< :: BidiClass -> BidiClass -> Bool
$c< :: BidiClass -> BidiClass -> Bool
compare :: BidiClass -> BidiClass -> Ordering
$ccompare :: BidiClass -> BidiClass -> Ordering
Ord, Int -> BidiClass
BidiClass -> Int
BidiClass -> [BidiClass]
BidiClass -> BidiClass
BidiClass -> BidiClass -> [BidiClass]
BidiClass -> BidiClass -> BidiClass -> [BidiClass]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
$cenumFromThenTo :: BidiClass -> BidiClass -> BidiClass -> [BidiClass]
enumFromTo :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromTo :: BidiClass -> BidiClass -> [BidiClass]
enumFromThen :: BidiClass -> BidiClass -> [BidiClass]
$cenumFromThen :: BidiClass -> BidiClass -> [BidiClass]
enumFrom :: BidiClass -> [BidiClass]
$cenumFrom :: BidiClass -> [BidiClass]
fromEnum :: BidiClass -> Int
$cfromEnum :: BidiClass -> Int
toEnum :: Int -> BidiClass
$ctoEnum :: Int -> BidiClass
pred :: BidiClass -> BidiClass
$cpred :: BidiClass -> BidiClass
succ :: BidiClass -> BidiClass
$csucc :: BidiClass -> BidiClass
Enum, forall x. Rep BidiClass x -> BidiClass
forall x. BidiClass -> Rep BidiClass x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BidiClass x -> BidiClass
$cfrom :: forall x. BidiClass -> Rep BidiClass x
Generic)

data DecompositionType
  = Font
  | NoBreak
  | Initial
  | Medial
  | Final
  | Isolated
  | Circle
  | Super
  | Sub
  | Vertical
  | Wide
  | Narrow
  | Small
  | Square
  | Fraction
  | Compat
  | Canonical
  deriving (Int -> DecompositionType -> ShowS
[DecompositionType] -> ShowS
DecompositionType -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [DecompositionType] -> ShowS
$cshowList :: [DecompositionType] -> ShowS
show :: DecompositionType -> [Char]
$cshow :: DecompositionType -> [Char]
showsPrec :: Int -> DecompositionType -> ShowS
$cshowsPrec :: Int -> DecompositionType -> ShowS
Show, ReadPrec [DecompositionType]
ReadPrec DecompositionType
Int -> ReadS DecompositionType
ReadS [DecompositionType]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DecompositionType]
$creadListPrec :: ReadPrec [DecompositionType]
readPrec :: ReadPrec DecompositionType
$creadPrec :: ReadPrec DecompositionType
readList :: ReadS [DecompositionType]
$creadList :: ReadS [DecompositionType]
readsPrec :: Int -> ReadS DecompositionType
$creadsPrec :: Int -> ReadS DecompositionType
Read, DecompositionType -> DecompositionType -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecompositionType -> DecompositionType -> Bool
$c/= :: DecompositionType -> DecompositionType -> Bool
== :: DecompositionType -> DecompositionType -> Bool
$c== :: DecompositionType -> DecompositionType -> Bool
Eq, Eq DecompositionType
DecompositionType -> DecompositionType -> Bool
DecompositionType -> DecompositionType -> Ordering
DecompositionType -> DecompositionType -> DecompositionType
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: DecompositionType -> DecompositionType -> DecompositionType
$cmin :: DecompositionType -> DecompositionType -> DecompositionType
max :: DecompositionType -> DecompositionType -> DecompositionType
$cmax :: DecompositionType -> DecompositionType -> DecompositionType
>= :: DecompositionType -> DecompositionType -> Bool
$c>= :: DecompositionType -> DecompositionType -> Bool
> :: DecompositionType -> DecompositionType -> Bool
$c> :: DecompositionType -> DecompositionType -> Bool
<= :: DecompositionType -> DecompositionType -> Bool
$c<= :: DecompositionType -> DecompositionType -> Bool
< :: DecompositionType -> DecompositionType -> Bool
$c< :: DecompositionType -> DecompositionType -> Bool
compare :: DecompositionType -> DecompositionType -> Ordering
$ccompare :: DecompositionType -> DecompositionType -> Ordering
Ord, Int -> DecompositionType
DecompositionType -> Int
DecompositionType -> [DecompositionType]
DecompositionType -> DecompositionType
DecompositionType -> DecompositionType -> [DecompositionType]
DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromThenTo :: DecompositionType
-> DecompositionType -> DecompositionType -> [DecompositionType]
enumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromTo :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
$cenumFromThen :: DecompositionType -> DecompositionType -> [DecompositionType]
enumFrom :: DecompositionType -> [DecompositionType]
$cenumFrom :: DecompositionType -> [DecompositionType]
fromEnum :: DecompositionType -> Int
$cfromEnum :: DecompositionType -> Int
toEnum :: Int -> DecompositionType
$ctoEnum :: Int -> DecompositionType
pred :: DecompositionType -> DecompositionType
$cpred :: DecompositionType -> DecompositionType
succ :: DecompositionType -> DecompositionType
$csucc :: DecompositionType -> DecompositionType
Enum, forall x. Rep DecompositionType x -> DecompositionType
forall x. DecompositionType -> Rep DecompositionType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep DecompositionType x -> DecompositionType
$cfrom :: forall x. DecompositionType -> Rep DecompositionType x
Generic)

-- | A 'UChar' encodes the data in one line of @UnicodeData.txt@.
data UChar = UChar
  { UChar -> Int
uCodePoint :: Int,
    UChar -> Text
uName :: Text,
    UChar -> GeneralCategory
uGeneralCategory :: GeneralCategory,
    UChar -> Int
uCanonicalCombiningClass :: Int,
    UChar -> BidiClass
uBidiClass :: BidiClass,
    UChar -> DecompositionType
uDecompositionType :: DecompositionType,
    UChar -> [Int]
uDecompositionMapping :: [Int],
    UChar -> (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int),
    UChar -> Bool
uBidiMirrored :: Bool,
    UChar -> Text
uUnicode1Name :: Text,
    UChar -> Text
uISOComment :: Text,
    UChar -> Int
uSimpleUppercaseMapping :: Int,
    UChar -> Int
uSimpleLowercaseMapping :: Int,
    UChar -> Int
uSimpleTitlecaseMappping :: Int
  }
  deriving (Int -> UChar -> ShowS
[UChar] -> ShowS
UChar -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [UChar] -> ShowS
$cshowList :: [UChar] -> ShowS
show :: UChar -> [Char]
$cshow :: UChar -> [Char]
showsPrec :: Int -> UChar -> ShowS
$cshowsPrec :: Int -> UChar -> ShowS
Show, UChar -> UChar -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UChar -> UChar -> Bool
$c/= :: UChar -> UChar -> Bool
== :: UChar -> UChar -> Bool
$c== :: UChar -> UChar -> Bool
Eq, Eq UChar
UChar -> UChar -> Bool
UChar -> UChar -> Ordering
UChar -> UChar -> UChar
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: UChar -> UChar -> UChar
$cmin :: UChar -> UChar -> UChar
max :: UChar -> UChar -> UChar
$cmax :: UChar -> UChar -> UChar
>= :: UChar -> UChar -> Bool
$c>= :: UChar -> UChar -> Bool
> :: UChar -> UChar -> Bool
$c> :: UChar -> UChar -> Bool
<= :: UChar -> UChar -> Bool
$c<= :: UChar -> UChar -> Bool
< :: UChar -> UChar -> Bool
$c< :: UChar -> UChar -> Bool
compare :: UChar -> UChar -> Ordering
$ccompare :: UChar -> UChar -> Ordering
Ord, forall x. Rep UChar x -> UChar
forall x. UChar -> Rep UChar x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep UChar x -> UChar
$cfrom :: forall x. UChar -> Rep UChar x
Generic)

readCodePoint :: Text -> Int
readCodePoint :: Text -> Int
readCodePoint Text
t =
  case forall a. Integral a => Reader a
TR.hexadecimal Text
t of
    Left [Char]
e -> forall a. HasCallStack => [Char] -> a
error [Char]
e -- ok to error at compile-time
    Right (Int
codepoint, Text
_) -> Int
codepoint

-- | Read a sequence of space-separated hex numbers.
readCodePoints :: Text -> ([Int], Text)
readCodePoints :: Text -> ([Int], Text)
readCodePoints Text
t =
  case forall a. Integral a => Reader a
TR.hexadecimal Text
t of
    Left [Char]
_ -> ([], Text
t)
    Right (Int
codepoint, Text
rest) ->
      let ([Int]
cps, Text
t') = Text -> ([Int], Text)
readCodePoints ((Char -> Bool) -> Text -> Text
T.dropWhile (forall a. Eq a => a -> a -> Bool
== Char
' ') Text
rest)
       in (Int
codepoint forall a. a -> [a] -> [a]
: [Int]
cps, Text
t')

parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp :: Text -> (DecompositionType, [Int])
parseDecomp Text
bs =
  case Text -> Maybe (Char, Text)
T.uncons Text
bs of
    Just (Char
'<', Text
rest) -> (DecompositionType
ty, [Int]
xs)
      where
        xs :: [Int]
xs = forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
cps
        (Text
x, Text
y) = (Char -> Bool) -> Text -> (Text, Text)
T.break (forall a. Eq a => a -> a -> Bool
== Char
'>') Text
rest
        cps :: Text
cps = (Char -> Bool) -> Text -> Text
T.dropWhile (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
'>' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
' ') Text
y
        ty :: DecompositionType
ty = case Text
x of
          Text
"font" -> DecompositionType
Font
          Text
"noBreak" -> DecompositionType
NoBreak
          Text
"initial" -> DecompositionType
Initial
          Text
"medial" -> DecompositionType
Medial
          Text
"final" -> DecompositionType
Final
          Text
"isolate" -> DecompositionType
Isolated
          Text
"circle" -> DecompositionType
Circle
          Text
"super" -> DecompositionType
Super
          Text
"sub" -> DecompositionType
Sub
          Text
"vertical" -> DecompositionType
Vertical
          Text
"wide" -> DecompositionType
Wide
          Text
"narrow" -> DecompositionType
Narrow
          Text
"small" -> DecompositionType
Small
          Text
"square" -> DecompositionType
Square
          Text
"fraction" -> DecompositionType
Fraction
          Text
"compat" -> DecompositionType
Compat
          Text
_ -> DecompositionType
Compat
    Maybe (Char, Text)
_ -> (DecompositionType
Canonical,) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ Text -> ([Int], Text)
readCodePoints Text
bs

parseLine :: Text -> M.IntMap UChar -> M.IntMap UChar
parseLine :: Text -> IntMap UChar -> IntMap UChar
parseLine Text
t =
  case Text -> Text -> [Text]
T.splitOn Text
";" Text
t of
    [Text
f0, Text
f1, Text
f2, Text
f3, Text
f4, Text
f5, Text
f6, Text
f7, Text
f8, Text
f9, Text
f10, Text
f11, Text
f12, Text
f13, Text
f14] ->
      forall a. Int -> a -> IntMap a -> IntMap a
M.insert Int
codepoint UChar
uchar
      where
        codepoint :: Int
codepoint = Text -> Int
readCodePoint Text
f0
        (DecompositionType
decompType, [Int]
decompMapping) = Text -> (DecompositionType, [Int])
parseDecomp Text
f5
        readNumericValue :: Text -> Maybe a
readNumericValue Text
x =
          case forall a. Integral a => Reader a
TR.decimal Text
x of
            Left [Char]
_ -> forall a. Maybe a
Nothing
            Right (a
v, Text
_) -> forall a. a -> Maybe a
Just a
v
        uchar :: UChar
uchar =
          UChar
            { uCodePoint :: Int
uCodePoint = Int
codepoint,
              uName :: Text
uName = Text
f1,
              uGeneralCategory :: GeneralCategory
uGeneralCategory = forall a. Read a => [Char] -> a
read (Text -> [Char]
T.unpack Text
f2),
              uCanonicalCombiningClass :: Int
uCanonicalCombiningClass = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const Int
0) forall a b. (a, b) -> a
fst (forall a. Integral a => Reader a
TR.decimal Text
f3),
              uBidiClass :: BidiClass
uBidiClass = forall a. Read a => [Char] -> a
read (Text -> [Char]
T.unpack Text
f4),
              uDecompositionType :: DecompositionType
uDecompositionType = DecompositionType
decompType,
              uDecompositionMapping :: [Int]
uDecompositionMapping = [Int]
decompMapping,
              uNumericTypeAndValue :: (Maybe Int, Maybe Int, Maybe Int)
uNumericTypeAndValue =
                ( forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f6,
                  forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f7,
                  forall {a}. Integral a => Text -> Maybe a
readNumericValue Text
f8
                ),
              uBidiMirrored :: Bool
uBidiMirrored = Text
f9 forall a. Eq a => a -> a -> Bool
== Text
"Y",
              uUnicode1Name :: Text
uUnicode1Name = Text
f10,
              uISOComment :: Text
uISOComment = Text
f11,
              uSimpleUppercaseMapping :: Int
uSimpleUppercaseMapping = Text -> Int
readCodePoint Text
f12,
              uSimpleLowercaseMapping :: Int
uSimpleLowercaseMapping = Text -> Int
readCodePoint Text
f13,
              uSimpleTitlecaseMappping :: Int
uSimpleTitlecaseMappping = Text -> Int
readCodePoint Text
f14
            }
    [Text]
_ -> forall a. HasCallStack => [Char] -> a
error forall a b. (a -> b) -> a -> b
$ [Char]
"Wrong number of fields in record:\n" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> [Char]
show Text
t