module NLP.Romkan
(
toKatakana
, toHiragana
, toKana
, toRoma
, kunreiToHepburn
, hepburnToKunrei
) where
import qualified Data.Text as T
import Data.Attoparsec.Text as A
import Control.Applicative ((<|>), pure)
import NLP.Romkan.Internal
toKatakana :: T.Text -> T.Text
toKatakana = convertRoma romKanSubs
toHiragana :: T.Text -> T.Text
toHiragana = convertRoma romKanSubs_H
toKana :: T.Text -> T.Text
toKana = toKatakana
toRoma :: T.Text -> T.Text
toRoma = doParse (processText normalizeN') . doParse (processText kanRomSubs)
kunreiToHepburn :: T.Text -> T.Text
kunreiToHepburn = convertRoma kunreiToHepburnSubs
hepburnToKunrei :: T.Text -> T.Text
hepburnToKunrei = convertRoma hepburnToKunreiSubs
convertRoma :: Parser T.Text -> T.Text -> T.Text
convertRoma p t = doParse (processText p) (normalizeRoma t)
doParse :: Parser T.Text -> T.Text -> T.Text
doParse p t =
let res = parseOnly p t
in case res of
Left err -> error $ "Internal Error, parser should not be able to fail: " ++ err
Right kat -> kat
normalizeNN :: Parser T.Text
normalizeNN = do
_ <- string "nn"
cM <- peekChar
case cM of
Nothing -> return "n"
Just c -> if (isEndOfLine c || notInClass "aiueoyn" c)
then return "n"
else return "n'"
:: Parser T.Text
normalizeN' :: Parser T.Text
normalizeN' = do
_ <- string "n'"
cM <- peekChar
case cM of
Nothing -> return "n"
Just c -> if (isEndOfLine c || notInClass "aiueoyn" c)
then return "n"
else return "n'"
normalizeN :: Parser T.Text
normalizeN = processText (normalizeNN <|> normalizeN')
normalizeRoma :: T.Text -> T.Text
normalizeRoma s = doParse normalizeN $ T.toLower s
processText :: Parser T.Text -> Parser T.Text
processText p = do
ts <- many' (p <|> A.take 1)
return $ T.concat ts
sub :: (T.Text, T.Text) -> Parser T.Text
sub (src, dst) = src .*> pure dst
romKanSubs :: Parser T.Text
romKanSubs = choice $ map sub romKanAList
romKanSubs_H :: Parser T.Text
romKanSubs_H = choice $ map sub romKanAList_H
kanRomSubs :: Parser T.Text
kanRomSubs = choice $ map sub $ kanRomAList ++ kanRomAList_H
kunreiToHepburnSubs :: Parser T.Text
kunreiToHepburnSubs = choice $ map sub kunreiToHepburnAList
hepburnToKunreiSubs :: Parser T.Text
hepburnToKunreiSubs = choice $ map sub hepburnToKunreiAList