module Data.RFC5051 (compareUnicode)
where
import Data.RFC5051.UnicodeData (decompositionMap)
import qualified Data.Map as M
import Data.Char (ord, toTitle)
import Data.Text (Text)
import qualified Data.Text as T

-- | Compare two strings using @i;unicode-casemap@,
-- the simple unicode collation algorithm described in RFC 5051.
compareUnicode :: Text -> Text -> Ordering
compareUnicode x y =
  case (T.uncons x, T.uncons y) of
    (Nothing, Nothing) -> EQ
    (Nothing, Just _)  -> LT
    (Just _, Nothing)  -> GT
    (Just (xc,x'), Just (yc,y')) ->
      case compare (canonicalize xc) (canonicalize yc) of
        GT -> GT
        LT -> LT
        EQ -> compareUnicode x' y'

canonicalize :: Char -> [Int]
canonicalize = decompose . ord . toTitle

decompose :: Int -> [Int]
decompose c =
  case decompose' c of
         Nothing -> [c]
         Just xs -> concatMap decompose xs

decompose' :: Int -> Maybe [Int]
decompose' c = M.lookup c decompositionMap