{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE PostfixOperators #-}
{-# LANGUAGE Safe #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module Data.YAML.Token
( tokenize
, Token(..)
, Code(..)
) where
import qualified Data.ByteString.Lazy.Char8 as BLC
import qualified Data.DList as D
import Prelude hiding ((*), (+), (-), (/), (^))
import qualified Prelude
import Data.YAML.Token.Encoding (Encoding (..), decode)
import Util hiding (empty)
import qualified Util
infixl 6 .+
(.+) :: Int -> Int -> Int
(.+) = (Prelude.+)
infixl 6 .-
(.-) :: Int -> Int -> Int
(.-) = (Prelude.-)
infixl 8 ^.
(^.) :: record -> (record -> value) -> value
record ^. field = field record
data Code = Bom
| Text
| Meta
| Break
| LineFeed
| LineFold
| Indicator
| White
| Indent
| DirectivesEnd
| DocumentEnd
| BeginEscape
| EndEscape
| BeginComment
| EndComment
| BeginDirective
| EndDirective
| BeginTag
| EndTag
| BeginHandle
| EndHandle
| BeginAnchor
| EndAnchor
| BeginProperties
| EndProperties
| BeginAlias
| EndAlias
| BeginScalar
| EndScalar
| BeginSequence
| EndSequence
| BeginMapping
| EndMapping
| BeginPair
| EndPair
| BeginNode
| EndNode
| BeginDocument
| EndDocument
| BeginStream
| EndStream
| Error
| Unparsed
| Detected
deriving (Show,Eq)
data Token = Token {
tByteOffset :: !Int,
tCharOffset :: !Int,
tLine :: !Int,
tLineChar :: !Int,
tCode :: !Code,
tText :: !String
} deriving Show
newtype Parser result = Parser (State -> Reply result)
applyParser :: Parser result -> State -> Reply result
applyParser (Parser p) s = p s
data Result result = Failed String
| Result result
| More (Parser result)
instance (Show result) => Show (Result result) where
show result = case result of
Failed message -> "Failed " ++ message
Result result -> "Result " ++ (show result)
More _ -> "More"
data Reply result = Reply {
rResult :: !(Result result),
rTokens :: !(D.DList Token),
rCommit :: !(Maybe Decision),
rState :: !State
}
instance (Show result) => Show (Reply result) where
show reply = "Result: " ++ (show $ reply^.rResult)
++ ", Tokens: " ++ (show $ D.toList $ reply^.rTokens)
++ ", Commit: " ++ (show $ reply^.rCommit)
++ ", State: { " ++ (show $ reply^.rState) ++ "}"
type Pattern = Parser ()
data State = State {
sEncoding :: !Encoding,
sDecision :: !Decision,
sLimit :: !Int,
sForbidden :: !(Maybe Pattern),
sIsPeek :: !Bool,
sIsSol :: !Bool,
sChars :: ![Char],
sCharsByteOffset :: !Int,
sCharsCharOffset :: !Int,
sCharsLine :: !Int,
sCharsLineChar :: !Int,
sByteOffset :: !Int,
sCharOffset :: !Int,
sLine :: !Int,
sLineChar :: !Int,
sCode :: !Code,
sLast :: !Char,
sInput :: ![(Int, Char)]
}
instance Show State where
show state = "Encoding: " ++ (show $ state^.sEncoding)
++ ", Decision: " ++ (show $ state^.sDecision)
++ ", Limit: " ++ (show $ state^.sLimit)
++ ", IsPeek: " ++ (show $ state^.sIsPeek)
++ ", IsSol: " ++ (show $ state^.sIsSol)
++ ", Chars: >>>" ++ (reverse $ state^.sChars) ++ "<<<"
++ ", CharsByteOffset: " ++ (show $ state^.sCharsByteOffset)
++ ", CharsCharOffset: " ++ (show $ state^.sCharsCharOffset)
++ ", CharsLine: " ++ (show $ state^.sCharsLine)
++ ", CharsLineChar: " ++ (show $ state^.sCharsLineChar)
++ ", ByteOffset: " ++ (show $ state^.sByteOffset)
++ ", CharOffset: " ++ (show $ state^.sCharOffset)
++ ", Line: " ++ (show $ state^.sLine)
++ ", LineChar: " ++ (show $ state^.sLineChar)
++ ", Code: " ++ (show $ state^.sCode)
++ ", Last: " ++ (show $ state^.sLast)
initialState :: BLC.ByteString -> State
initialState input
= State { sEncoding = encoding
, sDecision = DeNone
, sLimit = -1
, sForbidden = Nothing
, sIsPeek = False
, sIsSol = True
, sChars = []
, sCharsByteOffset = -1
, sCharsCharOffset = -1
, sCharsLine = -1
, sCharsLineChar = -1
, sByteOffset = 0
, sCharOffset = 0
, sLine = 1
, sLineChar = 0
, sCode = Unparsed
, sLast = ' '
, sInput = decoded
}
where
(encoding, decoded) = decode input
setLimit :: Int -> State -> State
setLimit limit state = state { sLimit = limit }
setForbidden :: Maybe Pattern -> State -> State
setForbidden forbidden state = state { sForbidden = forbidden }
setCode :: Code -> State -> State
setCode code state = state { sCode = code }
class Match parameter result | parameter -> result where
match :: parameter -> Parser result
instance Match (Parser result) result where
match = id
instance Match Char () where
match code = nextIf (== code)
instance Match (Char, Char) () where
match (low, high) = nextIf $ \ code -> low <= code && code <= high
instance Match String () where
match = foldr (&) empty
returnReply :: State -> result -> Reply result
returnReply state result = Reply { rResult = Result result,
rTokens = D.empty,
rCommit = Nothing,
rState = state }
tokenReply :: State -> Token -> Reply ()
tokenReply state token = Reply { rResult = Result (),
rTokens = D.singleton token,
rCommit = Nothing,
rState = state { sCharsByteOffset = -1,
sCharsCharOffset = -1,
sCharsLine = -1,
sCharsLineChar = -1,
sChars = [] } }
failReply :: State -> String -> Reply result
failReply state message = Reply { rResult = Failed message,
rTokens = D.empty,
rCommit = Nothing,
rState = state }
unexpectedReply :: State -> Reply result
unexpectedReply state = case state^.sInput of
((_, char):_) -> failReply state $ "Unexpected '" ++ [char] ++ "'"
[] -> failReply state "Unexpected end of input"
instance Functor Parser where
fmap g f = Parser $ \state ->
let reply = applyParser f state
in case reply^.rResult of
Failed message -> reply { rResult = Failed message }
Result x -> reply { rResult = Result (g x) }
More parser -> reply { rResult = More $ fmap g parser }
instance Applicative Parser where
pure result = Parser $ \state -> returnReply state result
(<*>) = ap
left *> right = Parser $ \state ->
let reply = applyParser left state
in case reply^.rResult of
Failed message -> reply { rResult = Failed message }
Result _ -> reply { rResult = More right }
More parser -> reply { rResult = More $ parser *> right }
instance Monad Parser where
return = pure
left >>= right = Parser $ \state ->
let reply = applyParser left state
in case reply^.rResult of
Failed message -> reply { rResult = Failed message }
Result value -> reply { rResult = More $ right value }
More parser -> reply { rResult = More $ parser >>= right }
(>>) = (*>)
fail message = Parser $ \state -> failReply state message
infix 3 ^
infix 3 %
infix 3 <%
infix 3 !
infix 3 ?!
infixl 3 -
infixr 2 &
infixr 1 /
infix 0 ?
infix 0 *
infix 0 +
infix 0 <?
infix 0 >?
infix 0 >!
(%) :: (Match match result) => match -> Int -> Pattern
parser % n
| n <= 0 = empty
| n > 0 = parser' *> (parser' % n .- 1)
where
parser' = match parser
(<%) :: (Match match result) => match -> Int -> Pattern
parser <% n
| n < 1 = fail "Fewer than 0 repetitions"
| n == 1 = reject parser Nothing
| n > 1 = DeLess ^ ( ((parser ! DeLess) *> (parser <% n .- 1)) <|> empty )
data Decision = DeNone
| DeStar
| DeLess
| DeDirective
| DeDoc
| DeEscape
| DeEscaped
| DeFold
| DeKey
| DeHeader
| DeMore
| DeNode
| DePair
deriving (Show,Eq)
(^) :: (Match match result) => Decision -> match -> Parser result
decision ^ parser = choice decision $ match parser
(!) :: (Match match result) => match -> Decision -> Pattern
parser ! decision = match parser *> commit decision
(?!) :: (Match match result) => match -> Decision -> Pattern
parser ?! decision = peek parser *> commit decision
(<?) :: (Match match result) => match -> Parser result
(<?) lookbehind = prev lookbehind
(>?) :: (Match match result) => match -> Parser result
(>?) lookahead = peek lookahead
(>!) :: (Match match result) => match -> Pattern
(>!) lookahead = reject lookahead Nothing
(-) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result1
parser - rejected = reject rejected Nothing *> match parser
(&) :: (Match match1 result1, Match match2 result2) => match1 -> match2 -> Parser result2
before & after = match before *> match after
(/) :: (Match match1 result, Match match2 result) => match1 -> match2 -> Parser result
first / second = Parser $ applyParser (match first <|> match second)
(?) :: (Match match result) => match -> Pattern
(?) optional = (match optional *> empty) <|> empty
(*) :: (Match match result) => match -> Pattern
(*) parser = DeStar ^ zomParser
where
zomParser = ((parser ! DeStar) *> match zomParser) <|> empty
(+) :: (Match match result) => match -> Pattern
(+) parser = match parser *> (parser *)
instance Alternative Parser where
empty = fail "empty"
left <|> right = Parser $ \state -> decideParser state D.empty left right state
where
decideParser point tokens left right state =
let reply = applyParser left state
tokens' = D.append tokens $ reply^.rTokens
in case (reply^.rResult, reply^.rCommit) of
(Failed _, _) -> Reply { rState = point,
rTokens = D.empty,
rResult = More right,
rCommit = Nothing }
(Result _, _) -> reply { rTokens = tokens' }
(More _, Just _) -> reply { rTokens = tokens' }
(More left', Nothing) -> decideParser point tokens' left' right (reply^.rState)
choice :: Decision -> Parser result -> Parser result
choice decision parser = Parser $ \ state ->
applyParser (choiceParser (state^.sDecision) decision parser) state { sDecision = decision }
where choiceParser parentDecision makingDecision parser = Parser $ \ state ->
let reply = applyParser parser state
commit' = case reply^.rCommit of
Nothing -> Nothing
Just decision | decision == makingDecision -> Nothing
| otherwise -> reply^.rCommit
reply' = case reply^.rResult of
More parser' -> reply { rCommit = commit',
rResult = More $ choiceParser parentDecision makingDecision parser' }
_ -> reply { rCommit = commit',
rState = (reply^.rState) { sDecision = parentDecision } }
in reply'
recovery :: (Match match1 result) => match1 -> Parser result -> Parser result
recovery pattern recover =
Parser $ \ state ->
let reply = applyParser (match pattern) state
in if state^.sIsPeek
then reply
else case reply^.rResult of
Result _ -> reply
More more -> reply { rResult = More $ more `recovery` recover }
Failed message -> reply { rResult = More $ fake Error message *> unparsed *> recover }
where unparsed = Parser $ \ state -> applyParser (match finishToken) $ state { sCode = Unparsed }
prev :: (Match match result) => match -> Parser result
prev parser = Parser $ \ state ->
prevParser state (match parser) state { sIsPeek = True, sInput = (-1, state^.sLast) : state^.sInput }
where prevParser point parser state =
let reply = applyParser parser state
in case reply^.rResult of
Failed message -> failReply point message
Result value -> returnReply point value
More parser' -> prevParser point parser' $ reply^.rState
peek :: (Match match result) => match -> Parser result
peek parser = Parser $ \ state ->
peekParser state (match parser) state { sIsPeek = True }
where peekParser point parser state =
let reply = applyParser parser state
in case reply^.rResult of
Failed message -> failReply point message
Result value -> returnReply point value
More parser' -> peekParser point parser' $ reply^.rState
reject :: (Match match result) => match -> Maybe String -> Pattern
reject parser name = Parser $ \ state ->
rejectParser state name (match parser) state { sIsPeek = True }
where
rejectParser point name parser state =
let reply = applyParser parser state
in case reply^.rResult of
Failed _message -> returnReply point ()
Result _value -> case name of
Nothing -> unexpectedReply point
Just text -> failReply point $ "Unexpected " ++ text
More parser' -> rejectParser point name parser' $ reply^.rState
upto :: Pattern -> Pattern
upto parser = ( ( parser >!) *> nextIf (const True) *)
nonEmpty :: (Match match result) => match -> Parser result
nonEmpty parser = Parser $ \ state ->
applyParser (nonEmptyParser (state^.sCharOffset) (match parser)) state
where
nonEmptyParser offset parser = Parser $ \ state ->
let reply = applyParser parser state
state' = reply^.rState
in case reply^.rResult of
Failed _message -> reply
Result _value -> if state'^.sCharOffset > offset
then reply
else failReply state' "Matched empty pattern"
More parser' -> reply { rResult = More $ nonEmptyParser offset parser' }
empty :: Pattern
empty = return ()
eof :: Pattern
eof = Parser $ \ state ->
if state^.sInput == []
then returnReply state ()
else unexpectedReply state
sol :: Pattern
sol = Parser $ \ state ->
if state^.sIsSol
then returnReply state ()
else failReply state "Expected start of line"
commit :: Decision -> Pattern
commit decision = Parser $ \ state ->
Reply { rState = state,
rTokens = D.empty,
rResult = Result (),
rCommit = Just decision }
nextLine :: Pattern
nextLine = Parser $ \ state ->
returnReply state { sIsSol = True,
sLine = state^.sLine .+ 1,
sLineChar = 0 }
()
with :: (value -> State -> State) -> (State -> value) -> value -> Parser result -> Parser result
with setField getField value parser = Parser $ \ state ->
let value' = getField state
Parser parser' = value' `seq` withParser value' parser
in parser' $ setField value state
where
withParser parentValue parser = Parser $ \ state ->
let reply = applyParser parser state
in case reply^.rResult of
Failed _ -> reply { rState = setField parentValue $ reply^.rState }
Result _ -> reply { rState = setField parentValue $ reply^.rState }
More parser' -> reply { rResult = More $ withParser parentValue parser' }
forbidding :: (Match match1 result1) => match1 -> Parser result1 -> Parser result1
forbidding parser forbidden = with setForbidden sForbidden (Just $ forbidden *> empty) (match parser)
limitedTo :: (Match match result) => match -> Int -> Parser result
limitedTo parser limit = with setLimit sLimit limit (match parser)
nextIf :: (Char -> Bool) -> Pattern
nextIf test = Parser $ \ state ->
case state^.sForbidden of
Nothing -> limitedNextIf state
Just parser -> let reply = applyParser (reject parser $ Just "forbidden pattern") state { sForbidden = Nothing }
in case reply^.rResult of
Failed _ -> reply
Result _ -> limitedNextIf state
where
limitedNextIf state =
case state^.sLimit of
-1 -> consumeNextIf state
0 -> failReply state "Lookahead limit reached"
_limit -> consumeNextIf state { sLimit = state^.sLimit .- 1 }
consumeNextIf state =
case state^.sInput of
((offset, char):rest) | test char -> let chars = if state^.sIsPeek then [] else char:(state^.sChars)
byte_offset = charsOf sByteOffset sCharsByteOffset
char_offset = charsOf sCharOffset sCharsCharOffset
line = charsOf sLine sCharsLine
line_char = charsOf sLineChar sCharsLineChar
is_sol = char == '\xFEFF' && state^.sIsSol
state' = state { sInput = rest,
sLast = char,
sChars = chars,
sCharsByteOffset = byte_offset,
sCharsCharOffset = char_offset,
sCharsLine = line,
sCharsLineChar = line_char,
sIsSol = is_sol,
sByteOffset = offset,
sCharOffset = state^.sCharOffset .+ 1,
sLineChar = state^.sLineChar .+ 1 }
in returnReply state' ()
| otherwise -> unexpectedReply state
[] -> unexpectedReply state
where
charsOf field charsField
| state^.sIsPeek = -1
| state^.sChars == [] = state^.field
| otherwise = state^.charsField
finishToken :: Pattern
finishToken = Parser $ \ state ->
let state' = state { sChars = [],
sCharsByteOffset = -1,
sCharsCharOffset = -1,
sCharsLine = -1,
sCharsLineChar = -1 }
in if state^.sIsPeek
then returnReply state' ()
else case state^.sChars of
[] -> returnReply state' ()
chars@(_:_) -> tokenReply state' Token { tByteOffset = state^.sCharsByteOffset,
tCharOffset = state^.sCharsCharOffset,
tLine = state^.sCharsLine,
tLineChar = state^.sCharsLineChar,
tCode = state^.sCode,
tText = reverse chars }
wrap :: (Match match result) => match -> Parser result
wrap parser = do result <- match parser
finishToken
eof
return result
token :: (Match match result) => Code -> match -> Pattern
token code parser = finishToken & with setCode sCode code (parser & finishToken)
fake :: Code -> String -> Pattern
fake code text = Parser $ \ state ->
if state^.sIsPeek
then returnReply state ()
else tokenReply state Token { tByteOffset = value state sByteOffset sCharsByteOffset,
tCharOffset = value state sCharOffset sCharsCharOffset,
tLine = value state sLine sCharsLine,
tLineChar = value state sLineChar sCharsLineChar,
tCode = code,
tText = text }
where value state field1 field2 =
if field2 state == -1
then field1 state
else field2 state
meta :: (Match match result) => match -> Pattern
meta parser = token Meta parser
indicator :: (Match match result) => match -> Pattern
indicator parser = token Indicator $ parser
text :: (Match match result) => match -> Pattern
text parser = token Text parser
emptyToken :: Code -> Pattern
emptyToken code = finishToken & parser code
where parser code = Parser $ \ state ->
if state^.sIsPeek
then returnReply state ()
else tokenReply state Token { tByteOffset = state^.sByteOffset,
tCharOffset = state^.sCharOffset,
tLine = state^.sLine,
tLineChar = state^.sLineChar,
tCode = code,
tText = "" }
wrapTokens :: Code -> Code -> Pattern -> Pattern
wrapTokens beginCode endCode pattern = emptyToken beginCode
& prefixErrorWith pattern (emptyToken endCode)
& emptyToken endCode
prefixErrorWith :: (Match match result) => match -> Pattern -> Parser result
prefixErrorWith pattern prefix =
Parser $ \ state ->
let reply = applyParser (match pattern) state
in case reply^.rResult of
Result _ -> reply
More more -> reply { rResult = More $ prefixErrorWith more prefix }
Failed message -> reply { rResult = More $ prefix & (fail message :: Parser result) }
data Context = BlockOut
| BlockIn
| FlowOut
| FlowIn
| BlockKey
| FlowKey
instance Show Context where
show context = case context of
BlockOut -> "block-out"
BlockIn -> "block-in"
FlowOut -> "flow-out"
FlowIn -> "flow-in"
BlockKey -> "block-key"
FlowKey -> "flow-key"
instance Read Context where
readsPrec _ text = [ ((r word), tail) | (word, tail) <- lex text ]
where r word = case word of
"block_out" -> BlockOut
"block_in" -> BlockIn
"flow_out" -> FlowOut
"flow_in" -> FlowIn
"block_key" -> BlockKey
"flow_key" -> FlowKey
_ -> error $ "unknown context: " ++ word
data Chomp = Strip
| Clip
| Keep
instance Show Chomp where
show chomp = case chomp of
Strip -> "strip"
Clip -> "clip"
Keep -> "keep"
instance Read Chomp where
readsPrec _ text = [ ((r word), tail) | (word, tail) <- lex text ]
where r word = case word of
"strip" -> Strip
"clip" -> Clip
"keep" -> Keep
_ -> error $ "unknown chomp: " ++ word
type Tokenizer = BLC.ByteString -> Bool -> [Token]
patternTokenizer :: Pattern -> Tokenizer
patternTokenizer pattern input withFollowing =
D.toList $ patternParser (wrap pattern) (initialState input)
where
patternParser parser state =
let reply = applyParser parser state
tokens = commitBugs reply
state' = reply^.rState
in case reply^.rResult of
Failed message -> errorTokens tokens state' message withFollowing
Result _ -> tokens
More parser' -> D.append tokens $ patternParser parser' state'
errorTokens :: D.DList Token -> State -> String -> Bool -> D.DList Token
errorTokens tokens state message withFollowing =
let tokens' = D.append tokens $ D.singleton Token { tByteOffset = state^.sByteOffset,
tCharOffset = state^.sCharOffset,
tLine = state^.sLine,
tLineChar = state^.sLineChar,
tCode = Error,
tText = message }
in if withFollowing && state^.sInput /= []
then D.append tokens' $ D.singleton Token { tByteOffset = state^.sByteOffset,
tCharOffset = state^.sCharOffset,
tLine = state^.sLine,
tLineChar = state^.sLineChar,
tCode = Unparsed,
tText = map snd $ state^.sInput }
else tokens'
commitBugs :: Reply result -> D.DList Token
commitBugs reply =
let tokens = reply^.rTokens
state = reply^.rState
in case reply^.rCommit of
Nothing -> tokens
Just commit -> D.append tokens $ D.singleton Token { tByteOffset = state^.sByteOffset,
tCharOffset = state^.sCharOffset,
tLine = state^.sLine,
tLineChar = state^.sLineChar,
tCode = Error,
tText = "Commit to " ++ show commit ++ " was made outside it" }
tokenize :: BLC.ByteString -> Bool -> [Token]
tokenize = patternTokenizer l_yaml_stream
bom :: Match match1 result1 => match1 -> Parser ()
bom code = code
& (Parser $ \ state -> let text = case state^.sEncoding of
UTF8 -> "TF-8"
UTF16LE -> "TF-16LE"
UTF16BE -> "TF-16BE"
UTF32LE -> "TF-32LE"
UTF32BE -> "TF-32BE"
in applyParser (fake Bom text) state)
na :: Int
na = error "Accessing non-applicable indentation"
asInteger :: Parser Int
asInteger = Parser $ \ state -> returnReply state $ ord (state^.sLast) .- 48
result :: result -> Parser result
result = return
c_printable = '\x9' / '\xA' / '\xD' / ('\x20', '\x7E')
/ '\x85' / ('\xA0', '\xD7FF') / ('\xE000', '\xFFFD')
/ ('\x10000', '\x10FFFF')
nb_json = '\x9' / ('\x20', '\x10FFFF')
c_byte_order_mark = bom '\xFEFF'
c_sequence_entry = indicator '-'
c_mapping_key = indicator '?'
c_mapping_value = indicator ':'
c_collect_entry = indicator ','
c_sequence_start = indicator '['
c_sequence_end = indicator ']'
c_mapping_start = indicator '{'
c_mapping_end = indicator '}'
c_comment = indicator '#'
c_anchor = indicator '&'
c_alias = indicator '*'
c_tag = indicator '!'
c_literal = indicator '|'
c_folded = indicator '>'
c_single_quote = indicator '\''
c_double_quote = indicator '"'
c_directive = indicator '%'
c_reserved = indicator ( '@' / '`' )
c_indicator = c_sequence_entry / c_mapping_key / c_mapping_value / c_collect_entry
/ c_sequence_start / c_sequence_end / c_mapping_start / c_mapping_end
/ c_comment / c_anchor / c_alias / c_tag
/ c_literal / c_folded / c_single_quote / c_double_quote
/ c_directive / c_reserved
c_flow_indicator = c_collect_entry / c_sequence_start / c_sequence_end / c_mapping_start / c_mapping_end
b_line_feed = '\xA'
b_carriage_return = '\xD'
b_char = b_line_feed / b_carriage_return
nb_char = c_printable - b_char - c_byte_order_mark
b_break = ( b_carriage_return & b_line_feed
/ b_carriage_return
/ b_line_feed )
& nextLine
b_as_line_feed = token LineFeed b_break
b_non_content = token Break b_break
s_space = '\x20'
s_tab = '\x9'
s_white = s_space / s_tab
ns_char = nb_char - s_white
ns_dec_digit = ('\x30', '\x39')
ns_hex_digit = ns_dec_digit
/ ('\x41', '\x46') / ('\x61', '\x66')
ns_ascii_letter = ('\x41', '\x5A') / ('\x61', '\x7A')
ns_word_char = ns_dec_digit / ns_ascii_letter / '-'
ns_uri_char = DeEscape
^ ( '%' ! DeEscape & ns_hex_digit & ns_hex_digit / ns_word_char / '#'
/ ';' / '/' / '?' / ':' / '@' / '&' / '=' / '+' / '$' / ','
/ '_' / '.' / '!' / '~' / '*' / '\'' / '(' / ')' / '[' / ']' )
ns_tag_char = ns_uri_char - c_tag - c_flow_indicator
c_escape = indicator '\\'
ns_esc_null = meta '0'
ns_esc_bell = meta 'a'
ns_esc_backspace = meta 'b'
ns_esc_horizontal_tab = meta ( 't' / '\x9' )
ns_esc_line_feed = meta 'n'
ns_esc_vertical_tab = meta 'v'
ns_esc_form_feed = meta 'f'
ns_esc_carriage_return = meta 'r'
ns_esc_escape = meta 'e'
ns_esc_space = meta '\x20'
ns_esc_double_quote = meta '"'
ns_esc_slash = meta '/'
ns_esc_backslash = meta '\\'
ns_esc_next_line = meta 'N'
ns_esc_non_breaking_space = meta '_'
ns_esc_line_separator = meta 'L'
ns_esc_paragraph_separator = meta 'P'
ns_esc_8_bit = indicator 'x' ! DeEscaped & meta ( ns_hex_digit % 2 )
ns_esc_16_bit = indicator 'u' ! DeEscaped & meta ( ns_hex_digit % 4 )
ns_esc_32_bit = indicator 'U' ! DeEscaped & meta ( ns_hex_digit % 8 )
c_ns_esc_char = wrapTokens BeginEscape EndEscape
$ c_escape ! DeEscape
& DeEscaped
^ ( ns_esc_null / ns_esc_bell / ns_esc_backspace
/ ns_esc_horizontal_tab / ns_esc_line_feed
/ ns_esc_vertical_tab / ns_esc_form_feed
/ ns_esc_carriage_return / ns_esc_escape / ns_esc_space
/ ns_esc_double_quote / ns_esc_slash / ns_esc_backslash
/ ns_esc_next_line / ns_esc_non_breaking_space
/ ns_esc_line_separator / ns_esc_paragraph_separator
/ ns_esc_8_bit / ns_esc_16_bit / ns_esc_32_bit )
s_indent n = token Indent ( s_space % n )
s_indent_lt n = token Indent ( s_space <% n )
s_indent_le n = token Indent ( s_space <% (n .+ 1) )
s_separate_in_line = token White ( s_white +) / sol
s_line_prefix n c = case c of
BlockOut -> s_block_line_prefix n
BlockIn -> s_block_line_prefix n
FlowOut -> s_flow_line_prefix n
FlowIn -> s_flow_line_prefix n
s_block_line_prefix n = s_indent n
s_flow_line_prefix n = s_indent n & ( s_separate_in_line ?)
l_empty n c = ( s_line_prefix n c / s_indent_lt n )
& b_as_line_feed
b_l_trimmed n c = b_non_content & ( l_empty n c +)
b_as_space = token LineFold b_break
b_l_folded n c = b_l_trimmed n c / b_as_space
s_flow_folded n = ( s_separate_in_line ?) & b_l_folded n FlowIn
& s_flow_line_prefix n
c_nb_comment_text = wrapTokens BeginComment EndComment
$ c_comment & meta ( nb_char *)
b_comment = b_non_content / eof
s_b_comment = ( s_separate_in_line & ( c_nb_comment_text ?) ?)
& b_comment
l_comment = s_separate_in_line & ( c_nb_comment_text ?) & b_comment
s_l_comments = ( s_b_comment / sol )
& ( nonEmpty l_comment *)
s_separate n c = case c of
BlockOut -> s_separate_lines n
BlockIn -> s_separate_lines n
FlowOut -> s_separate_lines n
FlowIn -> s_separate_lines n
BlockKey -> s_separate_in_line
FlowKey -> s_separate_in_line
s_separate_lines n = s_l_comments & s_flow_line_prefix n
/ s_separate_in_line
l_directive = ( wrapTokens BeginDirective EndDirective
$ c_directive ! DeDoc
& DeDirective
^ ( ns_yaml_directive
/ ns_tag_directive
/ ns_reserved_directive ) )
& s_l_comments
ns_reserved_directive = ns_directive_name
& ( s_separate_in_line & ns_directive_parameter *)
ns_directive_name = meta ( ns_char +)
ns_directive_parameter = meta ( ns_char +)
ns_yaml_directive = meta [ 'Y', 'A', 'M', 'L' ] ! DeDirective
& s_separate_in_line & ns_yaml_version
ns_yaml_version = meta ( ( ns_dec_digit +) & '.' & ( ns_dec_digit +) )
ns_tag_directive = meta [ 'T', 'A', 'G' ] ! DeDirective
& s_separate_in_line & c_tag_handle
& s_separate_in_line & ns_tag_prefix
c_tag_handle = c_named_tag_handle
/ c_secondary_tag_handle
/ c_primary_tag_handle
c_primary_tag_handle = wrapTokens BeginHandle EndHandle
$ c_tag
c_secondary_tag_handle = wrapTokens BeginHandle EndHandle
$ c_tag & c_tag
c_named_tag_handle = wrapTokens BeginHandle EndHandle
$ c_tag & meta ( ns_word_char +) & c_tag
ns_tag_prefix = wrapTokens BeginTag EndTag
$ ( c_ns_local_tag_prefix / ns_global_tag_prefix )
c_ns_local_tag_prefix = c_tag & meta ( ns_uri_char *)
ns_global_tag_prefix = meta ( ns_tag_char & ( ns_uri_char *) )
c_ns_properties n c = wrapTokens BeginProperties EndProperties
$ ( c_ns_tag_property
& ( s_separate n c & c_ns_anchor_property ?) )
/ ( c_ns_anchor_property
& ( s_separate n c & c_ns_tag_property ?) )
c_ns_tag_property = wrapTokens BeginTag EndTag
$ c_verbatim_tag
/ c_ns_shorthand_tag
/ c_non_specific_tag
c_verbatim_tag = c_tag & indicator '<' & meta ( ns_uri_char +) & indicator '>'
c_ns_shorthand_tag = c_tag_handle & meta ( ns_tag_char +)
c_non_specific_tag = c_tag
c_ns_anchor_property = wrapTokens BeginAnchor EndAnchor
$ c_anchor & ns_anchor_name
ns_anchor_char = ns_char - c_flow_indicator
ns_anchor_name = meta ( ns_anchor_char +)
c_ns_alias_node = wrapTokens BeginAlias EndAlias
$ c_alias ! DeNode & ns_anchor_name
e_scalar = wrapTokens BeginScalar EndScalar empty
e_node = wrapTokens BeginNode EndNode e_scalar
nb_double_char = DeEscape ^ ( c_ns_esc_char / ( nb_json - c_escape - c_double_quote ) )
ns_double_char = nb_double_char - s_white
c_double_quoted n c = wrapTokens BeginScalar EndScalar
$ c_double_quote ! DeNode & text ( nb_double_text n c ) & c_double_quote
nb_double_text n c = case c of
FlowOut -> nb_double_multi_line n
FlowIn -> nb_double_multi_line n
BlockKey -> nb_double_one_line
FlowKey -> nb_double_one_line
nb_double_one_line = ( nb_double_char *)
s_double_escaped n = ( s_white *)
& wrapTokens BeginEscape EndEscape ( c_escape ! DeEscape & b_non_content )
& ( l_empty n FlowIn *)
& s_flow_line_prefix n
s_double_break n = DeEscape ^ ( s_double_escaped n / s_flow_folded n )
nb_ns_double_in_line = ( ( s_white *) & ns_double_char *)
s_double_next_line n = s_double_break n
& ( ns_double_char & nb_ns_double_in_line
& ( s_double_next_line n / ( s_white *) ) ?)
nb_double_multi_line n = nb_ns_double_in_line
& ( s_double_next_line n / ( s_white *) )
c_quoted_quote = wrapTokens BeginEscape EndEscape
$ c_single_quote ! DeEscape & meta '\''
nb_single_char = DeEscape ^ ( c_quoted_quote / ( nb_json - c_single_quote ) )
ns_single_char = nb_single_char - s_white
c_single_quoted n c = wrapTokens BeginScalar EndScalar
$ c_single_quote ! DeNode & text ( nb_single_text n c ) & c_single_quote
nb_single_text n c = case c of
FlowOut -> nb_single_multi_line n
FlowIn -> nb_single_multi_line n
BlockKey -> nb_single_one_line
FlowKey -> nb_single_one_line
nb_single_one_line = ( nb_single_char *)
nb_ns_single_in_line = ( ( s_white *) & ns_single_char *)
s_single_next_line n = s_flow_folded n
& ( ns_single_char & nb_ns_single_in_line
& ( s_single_next_line n / ( s_white *) ) ?)
nb_single_multi_line n = nb_ns_single_in_line
& ( s_single_next_line n / ( s_white *) )
ns_plain_first _c = ns_char - c_indicator
/ ( ':' / '?' / '-' ) & ( ns_char >?)
ns_plain_safe c = case c of
FlowOut -> ns_plain_safe_out
FlowIn -> ns_plain_safe_in
BlockKey -> ns_plain_safe_out
FlowKey -> ns_plain_safe_in
ns_plain_safe_out = ns_char - c_mapping_value - c_comment
ns_plain_safe_in = ns_plain_safe_out - c_flow_indicator
ns_plain_char c = ns_plain_safe c
/ ( ns_char <?) & '#'
/ ':' & ( ns_char >?)
ns_plain n c = wrapTokens BeginScalar EndScalar
$ text (case c of
FlowOut -> ns_plain_multi_line n c
FlowIn -> ns_plain_multi_line n c
BlockKey -> ns_plain_one_line c
FlowKey -> ns_plain_one_line c)
nb_ns_plain_in_line c = ( ( s_white *) & ns_plain_char c *)
ns_plain_one_line c = ns_plain_first c ! DeNode & nb_ns_plain_in_line c
s_ns_plain_next_line n c = s_flow_folded n
& ns_plain_char c & nb_ns_plain_in_line c
ns_plain_multi_line n c = ns_plain_one_line c
& ( s_ns_plain_next_line n c *)
in_flow c = case c of
FlowOut -> FlowIn
FlowIn -> FlowIn
BlockKey -> FlowKey
FlowKey -> FlowKey
c_flow_sequence n c = wrapTokens BeginSequence EndSequence
$ c_sequence_start ! DeNode & ( s_separate n c ?)
& ( ns_s_flow_seq_entries n (in_flow c) ?) & c_sequence_end
ns_s_flow_seq_entries n c = ns_flow_seq_entry n c & ( s_separate n c ?)
& ( c_collect_entry & ( s_separate n c ?)
& ( ns_s_flow_seq_entries n c ?) ?)
ns_flow_seq_entry n c = DePair ^ ( ns_flow_pair n c / DeNode ^ ns_flow_node n c )
c_flow_mapping n c = wrapTokens BeginMapping EndMapping
$ c_mapping_start ! DeNode & ( s_separate n c ?)
& ( ns_s_flow_map_entries n (in_flow c) ?) & c_mapping_end
ns_s_flow_map_entries n c = ns_flow_map_entry n c & ( s_separate n c ?)
& ( c_collect_entry & ( s_separate n c ?)
& ( ns_s_flow_map_entries n c ?) ?)
ns_flow_map_entry n c = wrapTokens BeginPair EndPair
$ DeKey ^ ( ( c_mapping_key ! DeKey & s_separate n c
& ns_flow_map_explicit_entry n c )
/ ns_flow_map_implicit_entry n c )
ns_flow_map_explicit_entry n c = ns_flow_map_implicit_entry n c
/ ( e_node
& e_node )
ns_flow_map_implicit_entry n c = DePair
^ ( ns_flow_map_yaml_key_entry n c
/ c_ns_flow_map_empty_key_entry n c
/ c_ns_flow_map_json_key_entry n c )
ns_flow_map_yaml_key_entry n c = ( DeNode ^ ns_flow_yaml_node n c ) ! DePair
& ( ( ( s_separate n c ?)
& c_ns_flow_map_separate_value n c )
/ e_node )
c_ns_flow_map_empty_key_entry n c = e_node
& c_ns_flow_map_separate_value n c
c_ns_flow_map_separate_value n c = c_mapping_value & ( ns_char >!) ! DePair
& ( ( s_separate n c & ns_flow_node n c )
/ e_node )
c_ns_flow_map_json_key_entry n c = ( DeNode ^ c_flow_json_node n c ) ! DePair
& ( ( ( s_separate n c ?)
& c_ns_flow_map_adjacent_value n c )
/ e_node )
c_ns_flow_map_adjacent_value n c = c_mapping_value ! DePair
& ( ( ( s_separate n c ?)
& ns_flow_node n c )
/ e_node )
ns_flow_pair n c = wrapTokens BeginMapping EndMapping
$ wrapTokens BeginPair EndPair
$ ( ( c_mapping_key ! DePair & s_separate n c
& ns_flow_map_explicit_entry n c )
/ ns_flow_pair_entry n c )
ns_flow_pair_entry n c = ( ns_flow_pair_yaml_key_entry n c
/ c_ns_flow_map_empty_key_entry n c
/ c_ns_flow_pair_json_key_entry n c )
ns_flow_pair_yaml_key_entry n c = ns_s_implicit_yaml_key FlowKey
& c_ns_flow_map_separate_value n c
c_ns_flow_pair_json_key_entry n c = c_s_implicit_json_key FlowKey
& c_ns_flow_map_adjacent_value n c
ns_s_implicit_yaml_key c = ( DeNode ^ ( ns_flow_yaml_node na c ) & ( s_separate_in_line ?) )
`limitedTo` 1024
c_s_implicit_json_key c = ( DeNode ^ ( c_flow_json_node na c ) & ( s_separate_in_line ?) )
`limitedTo` 1024
ns_flow_yaml_content n c = ns_plain n c
c_flow_json_content n c = c_flow_sequence n c / c_flow_mapping n c
/ c_single_quoted n c / c_double_quoted n c
ns_flow_content n c = ns_flow_yaml_content n c / c_flow_json_content n c
ns_flow_yaml_node n c = wrapTokens BeginNode EndNode
$ c_ns_alias_node
/ ns_flow_yaml_content n c
/ ( c_ns_properties n c
& ( ( s_separate n c & ns_flow_yaml_content n c )
/ e_scalar ) )
c_flow_json_node n c = wrapTokens BeginNode EndNode
$ ( c_ns_properties n c & s_separate n c ?)
& c_flow_json_content n c
ns_flow_node n c = wrapTokens BeginNode EndNode
$ c_ns_alias_node
/ ns_flow_content n c
/ ( c_ns_properties n c
& ( ( s_separate n c & ns_flow_content n c )
/ e_scalar ) )
c_b_block_header n = DeHeader
^ ( do m <- c_indentation_indicator n
t <- c_chomping_indicator
( s_white / b_char ) ?! DeHeader
s_b_comment
result (m, t)
/ do t <- c_chomping_indicator
m <- c_indentation_indicator n
s_b_comment
result (m, t) )
c_indentation_indicator n = fmap fixup (indicator ( ns_dec_digit - '0' ) & asInteger)
/ detect_scalar_indentation n
where
fixup | n == -1 = (.+ 1)
| otherwise = id
detect_scalar_indentation n = peek $ ( nb_char *)
& ( b_break & ( (s_space *) & b_break *) ?)
& count_spaces (-n)
count_spaces n = (s_space & count_spaces (n .+ 1))
/ result (max 1 n)
c_chomping_indicator = indicator '-' & result Strip
/ indicator '+' & result Keep
/ result Clip
end_block_scalar t = case t of
Strip -> emptyToken EndScalar
Clip -> emptyToken EndScalar
Keep -> empty
b_chomped_last t = case t of
Strip -> emptyToken EndScalar & b_non_content
Clip -> b_as_line_feed & emptyToken EndScalar
Keep -> b_as_line_feed
l_chomped_empty n t = case t of
Strip -> l_strip_empty n
Clip -> l_strip_empty n
Keep -> l_keep_empty n
l_strip_empty n = ( s_indent_le n & b_non_content *)
& ( l_trail_comments n ?)
l_keep_empty n = ( l_empty n BlockIn *)
& emptyToken EndScalar
& ( l_trail_comments n ?)
l_trail_comments n = s_indent_lt n & c_nb_comment_text & b_comment
& ( nonEmpty l_comment *)
c_l__literal n = do emptyToken BeginScalar
c_literal ! DeNode
(m, t) <- c_b_block_header n `prefixErrorWith` emptyToken EndScalar
text ( l_literal_content (n .+ m) t )
l_nb_literal_text n = ( l_empty n BlockIn *)
& s_indent n & ( nb_char +)
b_nb_literal_next n = b_as_line_feed
& l_nb_literal_text n
l_literal_content n t = ( ( l_nb_literal_text n & ( b_nb_literal_next n *) & b_chomped_last t )
/ end_block_scalar t )
& l_chomped_empty n t
c_l__folded n = do emptyToken BeginScalar
c_folded ! DeNode
(m, t) <- c_b_block_header n `prefixErrorWith` emptyToken EndScalar
text ( l_folded_content (n .+ m) t )
s_nb_folded_text n = s_indent n & ns_char ! DeFold & ( nb_char *)
l_nb_folded_lines n = s_nb_folded_text n
& ( b_l_folded n BlockIn & s_nb_folded_text n *)
s_nb_spaced_text n = s_indent n & s_white ! DeFold & ( nb_char *)
b_l_spaced n = b_as_line_feed
& ( l_empty n BlockIn *)
l_nb_spaced_lines n = s_nb_spaced_text n
& ( b_l_spaced n & s_nb_spaced_text n *)
l_nb_same_lines n = ( l_empty n BlockIn *)
& DeFold ^ ( l_nb_folded_lines n / l_nb_spaced_lines n )
l_nb_diff_lines n = l_nb_same_lines n
& ( b_as_line_feed & l_nb_same_lines n *)
l_folded_content n t = ( ( l_nb_diff_lines n & b_chomped_last t )
/ end_block_scalar t )
& l_chomped_empty n t
detect_collection_indentation n = peek $ ( nonEmpty l_comment* ) & count_spaces (-n)
detect_inline_indentation = peek $ count_spaces 0
l__block_sequence n = do m <- detect_collection_indentation n
wrapTokens BeginSequence EndSequence $ ( s_indent (n .+ m) & c_l_block_seq_entry (n .+ m) +)
c_l_block_seq_entry n = c_sequence_entry & ( ns_char >!) ! DeNode
& s_l__block_indented n BlockIn
s_l__block_indented n c = do m <- detect_inline_indentation
DeNode ^ ( ( s_indent m
& ( ns_l_in_line_sequence (n .+ 1 .+ m)
/ ns_l_in_line_mapping (n .+ 1 .+ m) ) )
/ s_l__block_node n c
/ ( e_node & ( s_l_comments ?) & unparsed (n .+ 1) ) ) `recovery` unparsed (n .+ 1)
ns_l_in_line_sequence n = wrapTokens BeginNode EndNode
$ wrapTokens BeginSequence EndSequence
$ c_l_block_seq_entry n
& ( s_indent n & c_l_block_seq_entry n *)
l__block_mapping n = do m <- detect_collection_indentation n
wrapTokens BeginMapping EndMapping $ ( s_indent (n .+ m) & ns_l_block_map_entry (n .+ m) +)
ns_l_block_map_entry n = wrapTokens BeginPair EndPair
$ c_l_block_map_explicit_entry n
/ ns_l_block_map_implicit_entry n
c_l_block_map_explicit_entry n = c_l_block_map_explicit_key n
& ( l_block_map_explicit_value n
/ e_node )
c_l_block_map_explicit_key n = c_mapping_key ! DeNode & s_l__block_indented n BlockOut
l_block_map_explicit_value n = s_indent n & c_mapping_value & s_l__block_indented n BlockOut
ns_l_block_map_implicit_entry n = ( ns_s_block_map_implicit_key
/ e_node )
& c_l_block_map_implicit_value n
ns_s_block_map_implicit_key = c_s_implicit_json_key BlockKey
/ ns_s_implicit_yaml_key BlockKey
c_l_block_map_implicit_value n = c_mapping_value ! DeNode
& ( ( s_l__block_node n BlockOut
/ ( e_node & ( s_l_comments ?) & unparsed (n .+ 1) ) ) `recovery` unparsed (n .+ 1) )
ns_l_in_line_mapping n = wrapTokens BeginNode EndNode
$ wrapTokens BeginMapping EndMapping
$ ns_l_block_map_entry n
& ( s_indent n & ns_l_block_map_entry n *)
unparsed n = ( sol / unparsed_text & unparsed_break )
& ( nonEmpty ( unparsed_indent n & unparsed_text & unparsed_break ) *)
unparsed_indent n = token Unparsed ( s_space % n )
unparsed_text = token Unparsed ( upto ( eof / c_forbidden / b_break ) )
unparsed_break = eof / peek c_forbidden / token Unparsed b_break / empty
s_l__block_node n c = s_l__block_in_block n c / s_l__flow_in_block n
s_l__flow_in_block n = s_separate (n .+ 1) FlowOut
& ns_flow_node (n .+ 1) FlowOut & s_l_comments
s_l__block_in_block n c = wrapTokens BeginNode EndNode
$ ( s_l__block_scalar n c / s_l__block_collection n c )
s_l__block_scalar n c = s_separate (n .+ 1) c
& ( c_ns_properties (n .+ 1) c & s_separate (n .+ 1) c ?)
& ( c_l__literal n / c_l__folded n )
s_l__block_collection n c = ( s_separate (n .+ 1) c & c_ns_properties (n .+ 1) c & ( s_l_comments >?) ?)
& s_l_comments
& ( l__block_sequence (seq_spaces n c)
/ l__block_mapping n )
seq_spaces n c = case c of
BlockOut -> n .- 1
BlockIn -> n
l_document_prefix = ( c_byte_order_mark ?) & ( nonEmpty l_comment *)
c_directives_end = token DirectivesEnd [ '-', '-', '-' ]
c_document_end = token DocumentEnd [ '.', '.', '.' ]
l_document_suffix = c_document_end & s_l_comments
c_forbidden = sol
& ( c_directives_end / c_document_end )
& ( b_char / s_white / eof )
l_bare_document = DeNode ^ s_l__block_node (-1) BlockIn
`forbidding` c_forbidden
l_explicit_document = ( c_directives_end & ( b_char / s_white / eof >?)) ! DeDoc
& ( ( l_bare_document
/ e_node & ( s_l_comments ?) & unparsed 0 ) `recovery` unparsed 0 )
l_directives_document = ( l_directive +)
& l_explicit_document
l_any_document = wrapTokens BeginDocument EndDocument
$ DeDoc ^ ( l_directives_document
/ l_explicit_document
/ l_bare_document ) `recovery` unparsed 0
l_yaml_stream = ( nonEmpty l_document_prefix *)
& ( eof / ( c_document_end & ( b_char / s_white / eof ) >?) / l_any_document )
& ( nonEmpty ( DeMore ^ ( ( l_document_suffix ! DeMore +) & ( nonEmpty l_document_prefix *) & ( eof / l_any_document )
/ ( nonEmpty l_document_prefix *) & DeDoc ^ ( wrapTokens BeginDocument EndDocument l_explicit_document ?) ) ) *)