{-# LANGUAGE OverloadedStrings #-}
module Text.TeXMath.TeX (TeX(..),
renderTeX,
isControlSeq,
escapeLaTeX)
where
import Data.Char (isLetter, isAlphaNum, isAscii)
import Data.Semigroup ((<>))
import qualified Data.Text as T
data TeX = ControlSeq T.Text
| Token Char
| Literal T.Text
| Grouped [TeX]
| Space
deriving (Show, Eq)
renderTeX :: TeX -> T.Text -> T.Text
renderTeX (Token c) cs = T.cons c cs
renderTeX (Literal s) cs
| endsWith (not . isLetter) s = s <> cs
| startsWith isLetter cs = s <> T.cons ' ' cs
| otherwise = s <> cs
renderTeX (ControlSeq s) cs
| s == "\\ " = s <> cs
| startsWith (\c -> isAlphaNum c || not (isAscii c)) cs
= s <> T.cons ' ' cs
| otherwise = s <> cs
renderTeX (Grouped [Grouped xs]) cs = renderTeX (Grouped xs) cs
renderTeX (Grouped xs) cs =
"{" <> foldr renderTeX "" (trimSpaces xs) <> "}" <> cs
renderTeX Space cs
| cs == "" = ""
| any (`T.isPrefixOf` cs) ps = cs
| otherwise = T.cons ' ' cs
where
ps = [ "^", "_", " ", "\\limits" ]
trimSpaces :: [TeX] -> [TeX]
trimSpaces = reverse . go . reverse . go
where go = dropWhile (== Space)
startsWith :: (Char -> Bool) -> T.Text -> Bool
startsWith p t = case T.uncons t of
Just (c, _) -> p c
Nothing -> False
endsWith :: (Char -> Bool) -> T.Text -> Bool
endsWith p t = case T.unsnoc t of
Just (_, c) -> p c
Nothing -> False
isControlSeq :: T.Text -> Bool
isControlSeq t = case T.uncons t of
Just ('\\', xs) -> T.length xs == 1 && xs /= " "
|| T.all isLetter xs
_ -> False
escapeLaTeX :: Char -> TeX
escapeLaTeX c =
case c of
'~' -> ControlSeq "\\textasciitilde"
'^' -> Literal "\\textasciicircum"
'\\' -> ControlSeq "\\textbackslash"
'\x200B' -> Literal "\\!"
'\x200A' -> Literal "\\,"
'\x2006' -> Literal "\\,"
'\xA0' -> Literal "~"
'\x2005' -> Literal "\\:"
'\x2004' -> Literal "\\;"
'\x2001' -> ControlSeq "\\quad"
'\x2003' -> ControlSeq "\\quad"
'\x2032' -> Literal "'"
'\x2033' -> Literal "''"
'\x2034' -> Literal "'''"
_ | T.any (== c) "#$%&_{} " -> Literal ("\\" <> T.singleton c)
| otherwise -> Token c