{- |
   Module      : Text.Pandoc.Asciify
   Copyright   : Copyright (C) 2013-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Function to convert accented latin letters to their unaccented
ascii equivalents (used in constructing HTML identifiers).
-}
module Text.Pandoc.Asciify (toAsciiChar, toAsciiText)
where
import Data.Char (isAscii, isMark)
import qualified Data.Text.Normalize as TN
import Data.Text (Text)
import qualified Data.Text as T

toAsciiText :: Text -> Text
toAsciiText :: Text -> Text
toAsciiText = (Char -> Bool) -> Text -> Text
T.filter Char -> Bool
isAscii (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> Text -> Text
T.map Char -> Char
specialCase (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NormalizationMode -> Text -> Text
TN.normalize (NormalizationMode
TN.NFD)
 where
  specialCase :: Char -> Char
specialCase Char
'\x131' = Char
'i' -- Turkish undotted i
  specialCase Char
c = Char
c

toAsciiChar :: Char -> Maybe Char
toAsciiChar :: Char -> Maybe Char
toAsciiChar Char
c = case Text -> String
T.unpack (NormalizationMode -> Text -> Text
TN.normalize NormalizationMode
TN.NFD (Char -> Text
T.singleton Char
c)) of
                  (Char
x:String
xs) | Char -> Bool
isAscii Char
x
                         , (Char -> Bool) -> String -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isMark String
xs
                         -> Char -> Maybe Char
forall a. a -> Maybe a
Just Char
x
                  [Char
'\x131'] -> Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'i'  -- Turkish undotted i
                  String
_      -> Maybe Char
forall a. Maybe a
Nothing