{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Collate.Normalize
( toNFD
)
where
import qualified Data.IntMap as M
import Text.Collate.UnicodeData (genCanonicalDecompositionMap)
import Text.Collate.CanonicalCombiningClass (canonicalCombiningClass)
import Data.List (sortOn)
canonicalDecompositionMap :: M.IntMap [Int]
canonicalDecompositionMap :: IntMap [Int]
canonicalDecompositionMap = $(genCanonicalDecompositionMap)
toNFD :: [Int] -> [Int]
toNFD :: [Int] -> [Int]
toNFD = [Int] -> [Int]
rearrangeCombiningMarks forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Int]
recursivelyDecompose
rearrangeCombiningMarks :: [Int] -> [Int]
rearrangeCombiningMarks :: [Int] -> [Int]
rearrangeCombiningMarks = [Int] -> [Int]
go
where
go :: [Int] -> [Int]
go [] = []
go (Int
c:[Int]
cs) =
if Int -> Int
canonicalCombiningClass Int
c forall a. Eq a => a -> a -> Bool
== Int
0
then Int
c forall a. a -> [a] -> [a]
: [Int] -> [Int]
reorderMarks [Int]
cs
else [Int] -> [Int]
reorderMarks (Int
cforall a. a -> [a] -> [a]
:[Int]
cs)
reorderMarks :: [Int] -> [Int]
reorderMarks [Int]
zs =
case forall a. (a -> Bool) -> [a] -> ([a], [a])
break (\Int
z -> Int -> Int
canonicalCombiningClass Int
z forall a. Eq a => a -> a -> Bool
== Int
0) [Int]
zs of
([], [Int]
ys) -> [Int] -> [Int]
go [Int]
ys
([Int
x], [Int]
ys) -> Int
x forall a. a -> [a] -> [a]
: [Int] -> [Int]
go [Int]
ys
([Int
x1,Int
x2], [Int]
ys)
| Int -> Int
canonicalCombiningClass Int
x1 forall a. Ord a => a -> a -> Bool
<= Int -> Int
canonicalCombiningClass Int
x2
-> Int
x1 forall a. a -> [a] -> [a]
: Int
x2 forall a. a -> [a] -> [a]
: [Int] -> [Int]
go [Int]
ys
| Bool
otherwise -> Int
x2 forall a. a -> [a] -> [a]
: Int
x1 forall a. a -> [a] -> [a]
: [Int] -> [Int]
go [Int]
ys
([Int]
xs, [Int]
ys) -> forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn Int -> Int
canonicalCombiningClass [Int]
xs forall a. [a] -> [a] -> [a]
++ [Int] -> [Int]
go [Int]
ys
recursivelyDecompose :: [Int] -> [Int]
recursivelyDecompose :: [Int] -> [Int]
recursivelyDecompose = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> [Int] -> [Int]
go forall a. Monoid a => a
mempty
where go :: Int -> [Int] -> [Int]
go Int
c
| Int
c forall a. Ord a => a -> a -> Bool
< Int
0xc0 = (Int
c forall a. a -> [a] -> [a]
:)
| Int -> Bool
isHangulSyllable Int
c = Int -> [Int] -> [Int]
decomposeHangulSyllable Int
c
| Bool
otherwise =
case forall a. Int -> IntMap a -> Maybe a
M.lookup Int
c IntMap [Int]
canonicalDecompositionMap of
Maybe [Int]
Nothing -> (Int
c forall a. a -> [a] -> [a]
:)
Just [Int]
ds -> (\[Int]
xs -> forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> [Int] -> [Int]
go [Int]
xs [Int]
ds)
isHangulSyllable :: Int -> Bool
isHangulSyllable :: Int -> Bool
isHangulSyllable Int
cp = Int
cp forall a. Ord a => a -> a -> Bool
>= Int
0xAC00 Bool -> Bool -> Bool
&& Int
cp forall a. Ord a => a -> a -> Bool
<= Int
0xD7A3
decomposeHangulSyllable :: Int -> ([Int] -> [Int])
decomposeHangulSyllable :: Int -> [Int] -> [Int]
decomposeHangulSyllable !Int
c =
if Int
sindex forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| Int
sindex forall a. Ord a => a -> a -> Bool
>= Int
scount
then (Int
cforall a. a -> [a] -> [a]
:)
else
let l :: Int
l = Int
lbase forall a. Num a => a -> a -> a
+ (Int
sindex forall a. Integral a => a -> a -> a
`div` Int
ncount)
v :: Int
v = Int
vbase forall a. Num a => a -> a -> a
+ ((Int
sindex forall a. Integral a => a -> a -> a
`mod` Int
ncount) forall a. Integral a => a -> a -> a
`div` Int
tcount)
t :: Int
t = Int
tbase forall a. Num a => a -> a -> a
+ (Int
sindex forall a. Integral a => a -> a -> a
`mod` Int
tcount)
in if Int
t forall a. Eq a => a -> a -> Bool
/= Int
tbase
then (Int
lforall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
vforall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
tforall a. a -> [a] -> [a]
:)
else (Int
lforall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
vforall a. a -> [a] -> [a]
:)
where
!sindex :: Int
sindex = Int
c forall a. Num a => a -> a -> a
- Int
sbase
!sbase :: Int
sbase = Int
0xAC00
!lbase :: Int
lbase = Int
0x1100
!vbase :: Int
vbase = Int
0x1161
!tbase :: Int
tbase = Int
0x11A7
!tcount :: Int
tcount = Int
28
!ncount :: Int
ncount = Int
588
!scount :: Int
scount = Int
11172