{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}

{-
    BNF Converter: C flex generator
    Copyright (C) 2004  Author:  Michael Pellauer
    Copyright (C) 2020  Andreas Abel

    Description   : This module generates the Flex file. It is
                    similar to JLex but with a few peculiarities.

    Author        : Michael Pellauer
    Created       : 5 August, 2003
-}

module BNFC.Backend.C.CFtoFlexC
  ( cf2flex
  , preludeForBuffer  -- C code defining a buffer for lexing string literals.
  , cMacros           -- Lexer definitions.
  , commentStates     -- Stream of names for lexer states for comments.
  , lexComments       -- Lexing rules for comments.
  , lexStrings        -- Lexing rules for string literals.
  , lexChars          -- Lexing rules for character literals.
  ) where

import Prelude hiding ((<>))
import Data.Bifunctor (first)
import Data.List  (isInfixOf)
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map

import BNFC.CF
import BNFC.Backend.C.RegToFlex
import BNFC.Backend.Common.NamedVariables
import BNFC.PrettyPrint
import BNFC.Utils (cstring, unless, when)

-- | Entrypoint.
cf2flex :: String -> CF -> (String, SymMap) -- The environment is reused by the parser.
cf2flex :: [Char] -> CF -> ([Char], SymMap)
cf2flex [Char]
name CF
cf = (, SymMap
env) ([Char] -> ([Char], SymMap)) -> [Char] -> ([Char], SymMap)
forall a b. (a -> b) -> a -> b
$ [[Char]] -> [Char]
unlines
    [ Bool -> [Char] -> [Char]
prelude Bool
stringLiterals [Char]
name
    , CF -> [Char]
cMacros CF
cf
    , KeywordEnv -> [Char]
lexSymbols KeywordEnv
env0
    , CF -> SymMap -> [Char]
restOfFlex CF
cf SymMap
env
    ]
  where
    env :: SymMap
env  = [(SymKey, [Char])] -> SymMap
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList [(SymKey, [Char])]
env1
    env0 :: KeywordEnv
env0 = [[Char]] -> [Int] -> KeywordEnv
forall {a}. [a] -> [Int] -> [(a, [Char])]
makeSymEnv (CF -> [[Char]]
forall function. CFG function -> [[Char]]
cfgSymbols CF
cf [[Char]] -> [[Char]] -> [[Char]]
forall a. [a] -> [a] -> [a]
++ CF -> [[Char]]
forall function. CFG function -> [[Char]]
reservedWords CF
cf) [Int
0 :: Int ..]
    env1 :: [(SymKey, [Char])]
env1 = (([Char], [Char]) -> (SymKey, [Char]))
-> KeywordEnv -> [(SymKey, [Char])]
forall a b. (a -> b) -> [a] -> [b]
map (([Char] -> SymKey) -> ([Char], [Char]) -> (SymKey, [Char])
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first [Char] -> SymKey
Keyword )KeywordEnv
env0 [(SymKey, [Char])] -> [(SymKey, [Char])] -> [(SymKey, [Char])]
forall a. [a] -> [a] -> [a]
++ [SymKey] -> [Int] -> [(SymKey, [Char])]
forall {a}. [a] -> [Int] -> [(a, [Char])]
makeSymEnv (([Char] -> SymKey) -> [[Char]] -> [SymKey]
forall a b. (a -> b) -> [a] -> [b]
map [Char] -> SymKey
Tokentype ([[Char]] -> [SymKey]) -> [[Char]] -> [SymKey]
forall a b. (a -> b) -> a -> b
$ CF -> [[Char]]
forall function. CFG function -> [[Char]]
tokenNames CF
cf) [KeywordEnv -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length KeywordEnv
env0 ..]
    makeSymEnv :: [a] -> [Int] -> [(a, [Char])]
makeSymEnv = (a -> Int -> (a, [Char])) -> [a] -> [Int] -> [(a, [Char])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith ((a -> Int -> (a, [Char])) -> [a] -> [Int] -> [(a, [Char])])
-> (a -> Int -> (a, [Char])) -> [a] -> [Int] -> [(a, [Char])]
forall a b. (a -> b) -> a -> b
$ \ a
s Int
n -> (a
s, [Char]
"_SYMB_" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n)
    stringLiterals :: Bool
stringLiterals = CF -> Cat -> Bool
forall f. CFG f -> Cat -> Bool
isUsedCat CF
cf ([Char] -> Cat
TokenCat [Char]
catString)

prelude :: Bool -> String -> String
prelude :: Bool -> [Char] -> [Char]
prelude Bool
stringLiterals [Char]
name = [[Char]] -> [Char]
unlines ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
  [ [ [Char]
"/* -*- c -*- This FLex file was machine-generated by the BNF converter */"
    -- noinput and nounput are most often unused
    -- https://stackoverflow.com/questions/39075510/option-noinput-nounput-what-are-they-for
    , [Char]
"%option noyywrap noinput nounput"
    , [Char]
"%top{"
    , [Char]
"/* strdup was not in the ISO C standard before 6/2019 (C2x), but in POSIX 1003.1."
    , [Char]
" * See: https://en.cppreference.com/w/c/experimental/dynamic/strdup"
    , [Char]
" * Setting _POSIX_C_SOURCE to 200809L activates strdup in string.h."
    , [Char]
" */"
    -- The following #define needs to be at the top before the automatic #include <stdlib.h>
    , [Char]
"#define _POSIX_C_SOURCE 200809L"
    , [Char]
"}"
    , [Char]
"%{"
    , [Char]
"#define yylval " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
name [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"lval"
    , [Char]
"#define yylloc " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
name [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"lloc"
    , [Char]
"#define init_lexer " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
name [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"_init_lexer"
    , [Char]
"#include \"Parser.h\""
    , [Char]
""
    ]
  , Bool -> [[Char]] -> [[Char]]
forall m. Monoid m => Bool -> m -> m
when Bool
stringLiterals ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [[Char]]
preludeForBuffer [Char]
"Buffer.h"
    -- https://www.gnu.org/software/bison/manual/html_node/Token-Locations.html
    -- Flex is responsible for keeping tracking of the yylloc for Bison.
    -- Flex also doesn't do this automatically so we need this function
    -- https://stackoverflow.com/a/22125500/425756
  , [ [Char]
"static void update_loc(YYLTYPE* loc, char* text)"
    , [Char]
"{"
    , [Char]
"  loc->first_line = loc->last_line;"
    , [Char]
"  loc->first_column = loc->last_column;"
    , [Char]
"  int i = 0;"  -- put this here as @for (int i...)@ is only allowed in C99
    , [Char]
"  for (; text[i] != '\\0'; ++i) {"
    , [Char]
"      if (text[i] == '\\n') {"
    , [Char]
"          ++loc->last_line;"
    , [Char]
"          loc->last_column = 0; "
    , [Char]
"      } else {"
    , [Char]
"          ++loc->last_column; "
    , [Char]
"      }"
    , [Char]
"  }"
    , [Char]
"}"
    , [Char]
"#define YY_USER_ACTION update_loc(&yylloc, yytext);"
    , [Char]
""
    , [Char]
"%}"
    ]
  ]

-- | Part of the lexer prelude needed when string literals are to be lexed.
--   Defines an interface to the Buffer.
preludeForBuffer :: String -> [String]
preludeForBuffer :: [Char] -> [[Char]]
preludeForBuffer [Char]
bufferH =
    [ [Char]
"/* BEGIN extensible string buffer */"
    , [Char]
""
    , [Char]
"#include \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
bufferH [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\""
    , [Char]
""
    , [Char]
"/* The initial size of the buffer to lex string literals. */"
    , [Char]
"#define LITERAL_BUFFER_INITIAL_SIZE 1024"
    , [Char]
""
    , [Char]
"/* The pointer to the literal buffer. */"
    , [Char]
"static Buffer literal_buffer = NULL;"
    , [Char]
""
    , [Char]
"/* Initialize the literal buffer. */"
    , [Char]
"#define LITERAL_BUFFER_CREATE() literal_buffer = newBuffer(LITERAL_BUFFER_INITIAL_SIZE)"
    , [Char]
""
    , [Char]
"/* Append characters at the end of the buffer. */"
    , [Char]
"#define LITERAL_BUFFER_APPEND(s) bufferAppendString(literal_buffer, s)"
    , [Char]
""
    , [Char]
"/* Append a character at the end of the buffer. */"
    , [Char]
"#define LITERAL_BUFFER_APPEND_CHAR(c) bufferAppendChar(literal_buffer, c)"
    , [Char]
""
    , [Char]
"/* Release the buffer, returning a pointer to its content. */"
    , [Char]
"#define LITERAL_BUFFER_HARVEST() releaseBuffer(literal_buffer)"
    , [Char]
""
    , [Char]
"/* In exceptional cases, e.g. when reaching EOF, we have to free the buffer. */"
    , [Char]
"#define LITERAL_BUFFER_FREE() freeBuffer(literal_buffer)"
    , [Char]
""
    , [Char]
"/* END extensible string buffer */"
    , [Char]
""
    ]

-- For now all categories are included.
-- Optimally only the ones that are used should be generated.
cMacros :: CF ->  String
cMacros :: CF -> [Char]
cMacros CF
cf = [[Char]] -> [Char]
unlines
  [ [Char]
"LETTER [a-zA-Z]"
  , [Char]
"CAPITAL [A-Z]"
  , [Char]
"SMALL [a-z]"
  , [Char]
"DIGIT [0-9]"
  , [Char]
"IDENT [a-zA-Z0-9'_]"
  , [[Char]] -> [Char]
unwords ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
      [ [ [Char]
"%START YYINITIAL CHAR CHARESC CHAREND STRING ESCAPED" ]
      , Int -> [[Char]] -> [[Char]]
forall a. Int -> [a] -> [a]
take (CF -> Int
numberOfBlockCommentForms CF
cf) [[Char]]
commentStates
      ]
  , [Char]
""
  , [Char]
"%%  /* Rules. */"
  ]

lexSymbols :: KeywordEnv -> String
lexSymbols :: KeywordEnv -> [Char]
lexSymbols KeywordEnv
ss = (([Char], [Char]) -> [Char]) -> KeywordEnv -> [Char]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ([Char], [Char]) -> [Char]
transSym KeywordEnv
ss
  where
    transSym :: ([Char], [Char]) -> [Char]
transSym ([Char]
s,[Char]
r) =
      [Char]
"<YYINITIAL>\"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
s' [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\"      \t return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
r [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";\n"
        where
         s' :: [Char]
s' = [Char] -> [Char]
escapeChars [Char]
s

restOfFlex :: CF -> SymMap -> String
restOfFlex :: CF -> SymMap -> [Char]
restOfFlex CF
cf SymMap
env = [[Char]] -> [Char]
unlines ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$ [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
  [ [ Doc -> [Char]
render (Doc -> [Char]) -> Doc -> [Char]
forall a b. (a -> b) -> a -> b
$ Maybe [Char] -> (KeywordEnv, [[Char]]) -> Doc
lexComments Maybe [Char]
forall a. Maybe a
Nothing (CF -> (KeywordEnv, [[Char]])
comments CF
cf)
    , [Char]
""
    ]
  , [[Char]]
userDefTokens
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catString  ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> [[Char]]
lexStrings [Char]
"yylval" [Char]
"_STRING_" [Char]
"_ERROR_"
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catChar    ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [[Char]]
lexChars   [Char]
"yylval" [Char]
"_CHAR_"
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catDouble  [ [Char]
"<YYINITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)?      \t yylval._double = atof(yytext); return _DOUBLE_;" ]
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catInteger [ [Char]
"<YYINITIAL>{DIGIT}+      \t yylval._int = atoi(yytext); return _INTEGER_;" ]
  , [Char] -> [[Char]] -> [[Char]]
forall {a}. [Char] -> [a] -> [a]
ifC [Char]
catIdent   [ [Char]
"<YYINITIAL>{LETTER}{IDENT}*      \t yylval._string = strdup(yytext); return _IDENT_;" ]
  , [ [Char]
"<YYINITIAL>[ \\t\\r\\n\\f]      \t /* ignore white space. */;"
    , [Char]
"<YYINITIAL>.      \t return _ERROR_;"
    , [Char]
""
    , [Char]
"%%  /* Initialization code. */"
    , [Char]
""
    ]
  , [[Char]]
footer
  ]
  where
  ifC :: [Char] -> [a] -> [a]
ifC [Char]
cat [a]
s = if CF -> Cat -> Bool
forall f. CFG f -> Cat -> Bool
isUsedCat CF
cf ([Char] -> Cat
TokenCat [Char]
cat) then [a]
s else []
  userDefTokens :: [[Char]]
userDefTokens =
    [ [Char]
"<YYINITIAL>" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Reg -> [Char]
printRegFlex Reg
exp [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
       [Char]
"    \t yylval._string = strdup(yytext); return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char] -> [Char]
sName [Char]
name [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    | ([Char]
name, Reg
exp) <- CF -> [([Char], Reg)]
forall f. CFG f -> [([Char], Reg)]
tokenPragmas CF
cf
    ]
    where sName :: [Char] -> [Char]
sName [Char]
n = [Char] -> Maybe [Char] -> [Char]
forall a. a -> Maybe a -> a
fromMaybe [Char]
n (Maybe [Char] -> [Char]) -> Maybe [Char] -> [Char]
forall a b. (a -> b) -> a -> b
$ SymKey -> SymMap -> Maybe [Char]
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup ([Char] -> SymKey
Tokentype [Char]
n) SymMap
env
  footer :: [[Char]]
footer =
    [
     [Char]
"void init_lexer(FILE *inp)",
     [Char]
"{",
     [Char]
"  yyrestart(inp);",
     [Char]
"  yylloc.first_line   = 1;",
     [Char]
"  yylloc.first_column = 1;",
     [Char]
"  yylloc.last_line    = 1;",
     [Char]
"  yylloc.last_column  = 1;",
     [Char]
"  BEGIN YYINITIAL;",
     [Char]
"}"
    ]

-- | Lexing of strings, converting escaped characters.
lexStrings :: String -> String -> String -> [String]
lexStrings :: [Char] -> [Char] -> [Char] -> [[Char]]
lexStrings [Char]
yylval [Char]
stringToken [Char]
errorToken =
    [ [Char]
"<YYINITIAL>\"\\\"\"        \t LITERAL_BUFFER_CREATE(); BEGIN STRING;"
    , [Char]
"<STRING>\\\\             \t BEGIN ESCAPED;"
    , [Char]
"<STRING>\\\"             \t " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"._string = LITERAL_BUFFER_HARVEST(); BEGIN YYINITIAL; return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
stringToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<STRING>.              \t LITERAL_BUFFER_APPEND_CHAR(yytext[0]);"
    , [Char]
"<ESCAPED>n             \t LITERAL_BUFFER_APPEND_CHAR('\\n'); BEGIN STRING;"
    , [Char]
"<ESCAPED>\\\"            \t LITERAL_BUFFER_APPEND_CHAR('\"');  BEGIN STRING;"
    , [Char]
"<ESCAPED>\\\\            \t LITERAL_BUFFER_APPEND_CHAR('\\\\'); BEGIN STRING;"
    , [Char]
"<ESCAPED>t             \t LITERAL_BUFFER_APPEND_CHAR('\\t'); BEGIN STRING;"
    , [Char]
"<ESCAPED>.             \t LITERAL_BUFFER_APPEND(yytext);    BEGIN STRING;"
    , [Char]
"<STRING,ESCAPED><<EOF>>\t LITERAL_BUFFER_FREE(); return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
errorToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    ]

-- | Lexing of characters, converting escaped characters.
lexChars :: String -> String -> [String]
lexChars :: [Char] -> [Char] -> [[Char]]
lexChars [Char]
yylval [Char]
charToken =
    [ [Char]
"<YYINITIAL>\"'\" \tBEGIN CHAR;"
    , [Char]
"<CHAR>\\\\      \t BEGIN CHARESC;"
    , [Char]
"<CHAR>[^']      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"._char = yytext[0]; return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>n      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"._char = '\\n';     return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>t      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"._char = '\\t';     return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHARESC>.      \t BEGIN CHAREND; " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
yylval [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"._char = yytext[0]; return " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
charToken [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
";"
    , [Char]
"<CHAREND>\"'\"      \t BEGIN YYINITIAL;"
    ]

-- ---------------------------------------------------------------------------
-- Comments

-- | Create flex rules for single-line and multi-lines comments.
-- The first argument is an optional namespace (for C++); the second
-- argument is the set of comment delimiters as returned by BNFC.CF.comments.
--
-- This function is only compiling the results of applying either
-- lexSingleComment or lexMultiComment on each comment delimiter or pair of
-- delimiters.
--
-- >>> lexComments (Just "myns.") ([("{-","-}")],["--"])
-- <YYINITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
-- <YYINITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc
lexComments :: Maybe [Char] -> (KeywordEnv, [[Char]]) -> Doc
lexComments Maybe [Char]
_ (KeywordEnv
m,[[Char]]
s) = [Doc] -> Doc
vcat ([Doc] -> Doc) -> [Doc] -> Doc
forall a b. (a -> b) -> a -> b
$ [[Doc]] -> [Doc]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
  [ ([Char] -> Doc) -> [[Char]] -> [Doc]
forall a b. (a -> b) -> [a] -> [b]
map    [Char] -> Doc
lexSingleComment [[Char]]
s
  , (([Char], [Char]) -> [Char] -> Doc)
-> KeywordEnv -> [[Char]] -> [Doc]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith ([Char], [Char]) -> [Char] -> Doc
lexMultiComment KeywordEnv
m [[Char]]
commentStates
  ]

-- | If we have several block comments, we need different COMMENT lexing states.
commentStates :: [String]
commentStates :: [[Char]]
commentStates = ([Char] -> [Char]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ([Char]
"COMMENT" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++) ([[Char]] -> [[Char]]) -> [[Char]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char]
"" [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: (Integer -> [Char]) -> [Integer] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map Integer -> [Char]
forall a. Show a => a -> [Char]
show [Integer
1..]

-- | Create a lexer rule for single-line comments.
-- The first argument is -- an optional c++ namespace
-- The second argument is the delimiter that marks the beginning of the
-- comment.
--
-- >>> lexSingleComment "--"
-- <YYINITIAL>"--"[^\n]* /* skip */; /* BNFC: comment "--" */
--
-- >>> lexSingleComment "\""
-- <YYINITIAL>"\""[^\n]* /* skip */; /* BNFC: comment "\"" */
lexSingleComment :: String -> Doc
lexSingleComment :: [Char] -> Doc
lexSingleComment [Char]
c =
    Doc
"<YYINITIAL>" Doc -> Doc -> Doc
<> [Char] -> Doc
cstring [Char]
c Doc -> Doc -> Doc
<> Doc
"[^\\n]*"
    Doc -> Doc -> Doc
<+> Doc
"/* skip */;"
    Doc -> Doc -> Doc
<+> Bool -> Doc -> Doc
forall m. Monoid m => Bool -> m -> m
unless ([Char] -> Bool
containsCCommentMarker [Char]
c) (Doc
"/* BNFC: comment" Doc -> Doc -> Doc
<+> [Char] -> Doc
cstring [Char]
c Doc -> Doc -> Doc
<+> Doc
"*/")

containsCCommentMarker :: String -> Bool
containsCCommentMarker :: [Char] -> Bool
containsCCommentMarker [Char]
s = [Char]
"/*" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` [Char]
s Bool -> Bool -> Bool
|| [Char]
"*/" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` [Char]
s

-- | Create a lexer rule for multi-lines comments.
-- The first argument is -- an optional c++ namespace
-- The second arguments is the pair of delimiter for the multi-lines comment:
-- start deleminiter and end delimiter.
-- There might be a possible bug here if a language includes 2 multi-line
-- comments. They could possibly start a comment with one character and end it
-- with another.  However this seems rare.
--
-- >>> lexMultiComment ("{-", "-}") "COMMENT"
-- <YYINITIAL>"{-" BEGIN COMMENT; /* BNFC: block comment "{-" "-}" */
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
--
-- >>> lexMultiComment ("\"'", "'\"") "COMMENT"
-- <YYINITIAL>"\"'" BEGIN COMMENT; /* BNFC: block comment "\"'" "'\"" */
-- <COMMENT>"'\"" BEGIN YYINITIAL;
-- <COMMENT>.    /* skip */;
-- <COMMENT>[\n] /* skip */;
lexMultiComment :: (String, String) -> String -> Doc
lexMultiComment :: ([Char], [Char]) -> [Char] -> Doc
lexMultiComment ([Char]
b,[Char]
e) [Char]
comment = [Doc] -> Doc
vcat
    [ Doc
"<YYINITIAL>" Doc -> Doc -> Doc
<> [Char] -> Doc
cstring [Char]
b Doc -> Doc -> Doc
<+> Doc
"BEGIN" Doc -> Doc -> Doc
<+> [Char] -> Doc
text [Char]
comment Doc -> Doc -> Doc
<> Doc
";"
      Doc -> Doc -> Doc
<+> Bool -> Doc -> Doc
forall m. Monoid m => Bool -> m -> m
unless ([Char] -> Bool
containsCCommentMarker [Char]
b Bool -> Bool -> Bool
|| [Char] -> Bool
containsCCommentMarker [Char]
e)
          (Doc
"/* BNFC: block comment" Doc -> Doc -> Doc
<+> [Char] -> Doc
cstring [Char]
b Doc -> Doc -> Doc
<+> [Char] -> Doc
cstring [Char]
e Doc -> Doc -> Doc
<+> Doc
"*/")
    , Doc
commentTag Doc -> Doc -> Doc
<> [Char] -> Doc
cstring [Char]
e Doc -> Doc -> Doc
<+> Doc
"BEGIN YYINITIAL;"
    , Doc
commentTag Doc -> Doc -> Doc
<> Doc
".    /* skip */;"
    , Doc
commentTag Doc -> Doc -> Doc
<> Doc
"[\\n] /* skip */;"
    ]
  where
  commentTag :: Doc
commentTag = [Char] -> Doc
text ([Char] -> Doc) -> [Char] -> Doc
forall a b. (a -> b) -> a -> b
$ [Char]
"<" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
comment [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
">"

-- | Helper function that escapes characters in strings.
escapeChars :: String -> String
escapeChars :: [Char] -> [Char]
escapeChars [] = []
escapeChars (Char
'\\':[Char]
xs) = Char
'\\' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: (Char
'\\' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ([Char] -> [Char]
escapeChars [Char]
xs))
escapeChars (Char
'\"':[Char]
xs) = Char
'\\' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: (Char
'\"' Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ([Char] -> [Char]
escapeChars [Char]
xs))
escapeChars (Char
x:[Char]
xs) = Char
x Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
: ([Char] -> [Char]
escapeChars [Char]
xs)