{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Collate.Normalize
( toNFD
)
where
import qualified Data.IntMap as M
import Text.Collate.UnicodeData (genCanonicalDecompositionMap)
import Text.Collate.CanonicalCombiningClass (canonicalCombiningClass)
import Data.List (sortOn)
canonicalDecompositionMap :: M.IntMap [Int]
canonicalDecompositionMap :: IntMap [Int]
canonicalDecompositionMap = $(Int
[Int]
[(Int, [Int])]
[(Int, [Int])] -> IntMap [Int]
forall a. [(Int, a)] -> IntMap a
genCanonicalDecompositionMap)
toNFD :: [Int] -> [Int]
toNFD :: [Int] -> [Int]
toNFD = [Int] -> [Int]
rearrangeCombiningMarks ([Int] -> [Int]) -> ([Int] -> [Int]) -> [Int] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Int]
recursivelyDecompose
rearrangeCombiningMarks :: [Int] -> [Int]
rearrangeCombiningMarks :: [Int] -> [Int]
rearrangeCombiningMarks = [Int] -> [Int]
go
where
go :: [Int] -> [Int]
go [] = []
go (Int
c:[Int]
cs) =
if Int -> Int
canonicalCombiningClass Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then
Int
c Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: case [Int] -> ([Int], [Int])
reorderMarks [Int]
cs of
([], [Int]
rest) -> [Int] -> [Int]
go [Int]
rest
([Int]
marks, [Int]
rest) -> (Int -> [Int] -> [Int]) -> [Int] -> [Int] -> [Int]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (:) ([Int] -> [Int]
go [Int]
rest) [Int]
marks
else
case [Int] -> ([Int], [Int])
reorderMarks (Int
cInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
cs) of
([], [Int]
rest) -> [Int] -> [Int]
go [Int]
rest
([Int]
marks, [Int]
rest) -> (Int -> [Int] -> [Int]) -> [Int] -> [Int] -> [Int]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (:) ([Int] -> [Int]
go [Int]
rest) [Int]
marks
reorderMarks :: [Int] -> ([Int], [Int])
reorderMarks [Int]
zs =
case (Int -> Bool) -> [Int] -> ([Int], [Int])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (\Int
z -> Int -> Int
canonicalCombiningClass Int
z Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0) [Int]
zs of
([], [Int]
ys) -> ([], [Int]
ys)
([Int]
xs, [Int]
ys) -> ((Int -> Int) -> [Int] -> [Int]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn Int -> Int
canonicalCombiningClass [Int]
xs, [Int]
ys)
recursivelyDecompose :: [Int] -> [Int]
recursivelyDecompose :: [Int] -> [Int]
recursivelyDecompose = (Int -> [Int] -> [Int]) -> [Int] -> [Int] -> [Int]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> [Int] -> [Int]
go [Int]
forall a. Monoid a => a
mempty
where go :: Int -> [Int] -> [Int]
go Int
c
| Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xc0 = (Int
c Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
| Int -> Bool
isHangulSyllable Int
c = Int -> [Int] -> [Int]
decomposeHangulSyllable Int
c
| Bool
otherwise =
case Int -> IntMap [Int] -> Maybe [Int]
forall a. Int -> IntMap a -> Maybe a
M.lookup Int
c IntMap [Int]
canonicalDecompositionMap of
Maybe [Int]
Nothing -> (Int
c Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
Just [Int]
ds -> (\[Int]
xs -> (Int -> [Int] -> [Int]) -> [Int] -> [Int] -> [Int]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> [Int] -> [Int]
go [Int]
xs [Int]
ds)
isHangulSyllable :: Int -> Bool
isHangulSyllable :: Int -> Bool
isHangulSyllable Int
cp = Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0xAC00 Bool -> Bool -> Bool
&& Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xD7A3
decomposeHangulSyllable :: Int -> ([Int] -> [Int])
decomposeHangulSyllable :: Int -> [Int] -> [Int]
decomposeHangulSyllable !Int
c =
if Int
sindex Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| Int
sindex Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
scount
then (Int
cInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
else
let l :: Int
l = Int
lbase Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
sindex Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
ncount)
v :: Int
v = Int
vbase Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ((Int
sindex Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
ncount) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
tcount)
t :: Int
t = Int
tbase Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
sindex Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
tcount)
in if Int
t Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
tbase
then (Int
lInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:) ([Int] -> [Int]) -> ([Int] -> [Int]) -> [Int] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
vInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:) ([Int] -> [Int]) -> ([Int] -> [Int]) -> [Int] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
tInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
else (Int
lInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:) ([Int] -> [Int]) -> ([Int] -> [Int]) -> [Int] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
vInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
where
!sindex :: Int
sindex = Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
sbase
!sbase :: Int
sbase = Int
0xAC00
!lbase :: Int
lbase = Int
0x1100
!vbase :: Int
vbase = Int
0x1161
!tbase :: Int
tbase = Int
0x11A7
!tcount :: Int
tcount = Int
28
!ncount :: Int
ncount = Int
588
!scount :: Int
scount = Int
11172