module Toml.Parser
( ParseException (..)
, parse
, arrayP
, boolP
, dateTimeP
, doubleP
, integerP
, keyP
, keyValP
, textP
, tableHeaderP
, tomlP
) where
import Control.Applicative (Alternative (..))
import Control.Applicative.Combinators (between, count, manyTill, optional, sepEndBy, skipMany)
import Control.Monad (void)
import Data.Char (chr, digitToInt, isControl)
import Data.Fixed (Pico)
import Data.List (foldl')
import Data.Semigroup ((<>))
import Data.Text (Text)
import Data.Time (LocalTime (..), ZonedTime (..), fromGregorianValid, makeTimeOfDayValid,
minutesToTimeZone)
import Data.Void (Void)
import Text.Megaparsec (Parsec, parseErrorPretty', try)
import Text.Megaparsec.Char (alphaNumChar, anyChar, char, digitChar, eol, hexDigitChar, oneOf,
satisfy, space, space1, string, tab)
import Toml.PrefixTree (Key (..), Piece (..), fromList)
import Toml.Type (AnyValue, DateTime (..), TOML (..), UValue (..), typeCheck)
import qualified Control.Applicative.Combinators.NonEmpty as NC
import qualified Data.HashMap.Lazy as HashMap
import qualified Data.Text as Text
import qualified Text.Megaparsec as Mega (parse)
import qualified Text.Megaparsec.Char.Lexer as L
type Parser = Parsec Void Text
sc :: Parser ()
sc = L.space space1 lineComment blockComment
where
lineComment = L.skipLineComment "#"
blockComment = empty
lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
text :: Text -> Parser Text
text = L.symbol sc
text_ :: Text -> Parser ()
text_ = void . text
textP :: Parser Text
textP = multilineBasicStringP <|> multilineLiteralStringP <|> literalStringP <|> basicStringP
nonControlCharP :: Parser Text
nonControlCharP = Text.singleton <$> satisfy (not . isControl)
escapeSequenceP :: Parser Text
escapeSequenceP = char '\\' >> anyChar >>= \case
'b' -> pure "\b"
't' -> pure "\t"
'n' -> pure "\n"
'f' -> pure "\f"
'r' -> pure "\r"
'"' -> pure "\""
'\\' -> pure "\\"
'u' -> hexUnicodeP 4
'U' -> hexUnicodeP 8
c -> fail $ "Invalid escape sequence: " <> "\\" <> [c]
where
hexUnicodeP :: Int -> Parser Text
hexUnicodeP n = count n hexDigitChar
>>= \x -> case toUnicode $ hexToInt x of
Just c -> pure (Text.singleton c)
Nothing -> fail $ "Invalid unicode character: " <> "\\" <> (if n == 4 then "u" else "U") <> x
where
hexToInt :: String -> Int
hexToInt xs = read $ "0x" ++ xs
toUnicode :: Int -> Maybe Char
toUnicode x
| x >= 0 && x <= 0xD7FF = Just (chr x)
| x >= 0xE000 && x <= 0x10FFFF = Just (chr x)
| otherwise = Nothing
basicStringP :: Parser Text
basicStringP = lexeme $ mconcat <$> (char '"' *> (escapeSequenceP <|> nonControlCharP) `manyTill` char '"')
literalStringP :: Parser Text
literalStringP = lexeme $ Text.pack <$> (char '\'' *> nonEolCharP `manyTill` char '\'')
where
nonEolCharP :: Parser Char
nonEolCharP = satisfy (\c -> c /= '\n' && c /= '\r')
multilineP :: Parser Text -> Parser Text -> Parser Text
multilineP quotesP allowedCharP = lexeme $ fmap mconcat $ quotesP >> optional eol >> allowedCharP `manyTill` quotesP
multilineBasicStringP :: Parser Text
multilineBasicStringP = multilineP quotesP allowedCharP
where
quotesP = string "\"\"\""
allowedCharP :: Parser Text
allowedCharP = lineEndingBackslashP <|> escapeSequenceP <|> nonControlCharP <|> eol
lineEndingBackslashP :: Parser Text
lineEndingBackslashP = Text.empty <$ try (char '\\' >> eol >> space)
multilineLiteralStringP :: Parser Text
multilineLiteralStringP = multilineP quotesP allowedCharP
where
quotesP = string "'''"
allowedCharP :: Parser Text
allowedCharP = nonControlCharP <|> eol <|> Text.singleton <$> tab
bareKeyP :: Parser Text
bareKeyP = lexeme $ Text.pack <$> bareStrP
where
bareStrP :: Parser String
bareStrP = some $ alphaNumChar <|> char '_' <|> char '-'
quote :: Text -> Text -> Text
quote q t = q <> t <> q
keyComponentP :: Parser Piece
keyComponentP = Piece <$> (bareKeyP <|> (quote "\"" <$> basicStringP) <|> (quote "'" <$> literalStringP))
keyP :: Parser Key
keyP = Key <$> NC.sepBy1 keyComponentP (char '.')
tableNameP :: Parser Key
tableNameP = lexeme $ between (char '[') (char ']') keyP
integerP :: Parser Integer
integerP = lexeme $ binary <|> octal <|> hexadecimal <|> decimal
where
decimal = L.signed sc L.decimal
binary = try (char '0' >> char 'b') >> mkNum 2 <$> (some binDigitChar)
octal = try (char '0' >> char 'o') >> L.octal
hexadecimal = try (char '0' >> char 'x') >> L.hexadecimal
binDigitChar = oneOf ['0', '1']
mkNum b = foldl' (step b) 0
step b a c = a * b + fromIntegral (digitToInt c)
doubleP :: Parser Double
doubleP = lexeme $ L.signed sc (num <|> inf <|> nan)
where
num, inf, nan :: Parser Double
num = L.float
inf = 1 / 0 <$ string "inf"
nan = 0 / 0 <$ string "nan"
boolP :: Parser Bool
boolP = False <$ text "false"
<|> True <$ text "true"
dateTimeP :: Parser DateTime
dateTimeP = lexeme $ try hoursP <|> dayLocalZoned
where
dayLocalZoned :: Parser DateTime
dayLocalZoned = do
let makeLocal (Day day) (Hours hours) = Local $ LocalTime day hours
makeLocal _ _ = error "Invalid arguments, unable to construct `Local`"
makeZoned (Local localTime) mins = Zoned $ ZonedTime localTime (minutesToTimeZone mins)
makeZoned _ _ = error "Invalid arguments, unable to construct `Zoned`"
day <- try dayP
maybeHours <- optional (try $ (char 'T' <|> char ' ') *> hoursP)
case maybeHours of
Nothing -> return day
Just hours -> do
maybeOffset <- optional (try timeOffsetP)
case maybeOffset of
Nothing -> return (makeLocal day hours)
Just offset -> return (makeZoned (makeLocal day hours) offset)
timeOffsetP :: Parser Int
timeOffsetP = z <|> numOffset
where
z = pure 0 <* char 'Z'
numOffset = do
sign <- char '+' <|> char '-'
hours <- int2DigitsP
_ <- char ':'
minutes <- int2DigitsP
let totalMinutes = hours * 60 + minutes
if sign == '+'
then return totalMinutes
else return (negate totalMinutes)
hoursP :: Parser DateTime
hoursP = do
hours <- int2DigitsP
_ <- char ':'
minutes <- int2DigitsP
_ <- char ':'
seconds <- picoTruncated
case makeTimeOfDayValid hours minutes seconds of
Just time -> return (Hours time)
Nothing -> fail $ "Invalid time of day: " <> show hours <> ":" <> show minutes <> ":" <> show seconds
dayP :: Parser DateTime
dayP = do
year <- integer4DigitsP
_ <- char '-'
month <- int2DigitsP
_ <- char '-'
day <- int2DigitsP
case fromGregorianValid year month day of
Just date -> return (Day date)
Nothing -> fail $ "Invalid date: " <> show year <> "-" <> show month <> "-" <> show day
integer4DigitsP = (read :: String -> Integer) <$> count 4 digitChar
int2DigitsP = (read :: String -> Int) <$> count 2 digitChar
picoTruncated = do
let rdPico = read :: String -> Pico
int <- count 2 digitChar
frc <- optional (char '.' >> take 12 <$> some digitChar)
case frc of
Nothing -> return (rdPico int)
Just frc' -> return (rdPico $ int ++ "." ++ frc')
arrayP :: Parser [UValue]
arrayP = lexeme $ between (char '[' *> sc) (char ']') elements
where
elements :: Parser [UValue]
elements = valueP `sepEndBy` spComma <* skipMany (text ",")
spComma :: Parser ()
spComma = char ',' *> sc
valueP :: Parser UValue
valueP = UBool <$> boolP
<|> UDate <$> dateTimeP
<|> UDouble <$> try doubleP
<|> UInteger <$> integerP
<|> UText <$> textP
<|> UArray <$> arrayP
keyValP :: Parser (Key, AnyValue)
keyValP = do
k <- keyP
text_ "="
uval <- valueP
case typeCheck uval of
Left err -> fail $ show err
Right v -> pure (k, v)
tableHeaderP :: Parser (Key, TOML)
tableHeaderP = do
k <- tableNameP
toml <- makeToml <$> many keyValP
pure (k, toml)
where
makeToml :: [(Key, AnyValue)] -> TOML
makeToml kv = TOML (HashMap.fromList kv) mempty
tomlP :: Parser TOML
tomlP = do
sc
kvs <- many keyValP
tables <- many tableHeaderP
pure TOML { tomlPairs = HashMap.fromList kvs
, tomlTables = fromList tables
}
newtype ParseException = ParseException Text
deriving (Show, Eq)
parse :: Text -> Either ParseException TOML
parse t = case Mega.parse tomlP "" t of
Left err -> Left $ ParseException $ Text.pack $ parseErrorPretty' t err
Right toml -> Right toml