module Toml.Parser.String
( textP
, basicStringP
, literalStringP
) where
import Control.Applicative (Alternative (..))
import Control.Applicative.Combinators (count, manyTill, optional)
import Data.Char (chr, isControl)
import Data.Semigroup ((<>))
import Data.Text (Text)
import Toml.Parser.Core (Parser, anySingle, char, eol, hexDigitChar, lexeme, satisfy, space, string,
tab, try, (<?>))
import qualified Data.Text as Text
textP :: Parser Text
textP = (multilineBasicStringP <?> "multiline basic string")
<|> (multilineLiteralStringP <?> "multiline literal string")
<|> (literalStringP <?> "literal string")
<|> (basicStringP <?> "basic string")
<?> "text"
nonControlCharP :: Parser Text
nonControlCharP = Text.singleton <$> satisfy (not . isControl) <?> "non-control char"
escapeSequenceP :: Parser Text
escapeSequenceP = char '\\' *> anySingle >>= \case
'b' -> pure "\b"
't' -> pure "\t"
'n' -> pure "\n"
'f' -> pure "\f"
'r' -> pure "\r"
'"' -> pure "\""
'\\' -> pure "\\"
'u' -> hexUnicodeP 4
'U' -> hexUnicodeP 8
c -> fail $ "Invalid escape sequence: " <> "\\" <> [c]
where
hexUnicodeP :: Int -> Parser Text
hexUnicodeP n = count n hexDigitChar >>= \x -> case toUnicode $ hexToInt x of
Just c -> pure (Text.singleton c)
Nothing -> fail $ "Invalid unicode character: \\"
<> (if n == 4 then "u" else "U")
<> x
where
hexToInt :: String -> Int
hexToInt xs = read $ "0x" ++ xs
toUnicode :: Int -> Maybe Char
toUnicode x
| x >= 0 && x <= 0xD7FF = Just (chr x)
| x >= 0xE000 && x <= 0x10FFFF = Just (chr x)
| otherwise = Nothing
basicStringP :: Parser Text
basicStringP = lexeme $ mconcat <$> (char '"' *> charP `manyTill` char '"')
where
charP :: Parser Text
charP = escapeSequenceP <|> nonControlCharP
literalStringP :: Parser Text
literalStringP = lexeme $ Text.pack <$> (char '\'' *> nonEolCharP `manyTill` char '\'')
where
nonEolCharP :: Parser Char
nonEolCharP = satisfy (\c -> c /= '\n' && c /= '\r')
multilineP :: Parser Text -> Parser Text -> Parser Text
multilineP quotesP allowedCharP = lexeme $ mconcat <$>
(quotesP *> optional eol *> allowedCharP `manyTill` quotesP)
multilineBasicStringP :: Parser Text
multilineBasicStringP = multilineP quotesP allowedCharP
where
quotesP :: Parser Text
quotesP = string "\"\"\""
allowedCharP :: Parser Text
allowedCharP = lineEndingBackslashP <|> escapeSequenceP <|> nonControlCharP <|> eol
lineEndingBackslashP :: Parser Text
lineEndingBackslashP = Text.empty <$ try (char '\\' >> eol >> space)
multilineLiteralStringP :: Parser Text
multilineLiteralStringP = multilineP quotesP allowedCharP
where
quotesP :: Parser Text
quotesP = string "'''"
allowedCharP :: Parser Text
allowedCharP = nonControlCharP <|> eol <|> Text.singleton <$> tab