module PropaFP.Parsers.Lisp.Parser ( tokenize , parse , parseSequence , analyzeExpression , analyzeExpressionSequence , isScientificNumber) where import PropaFP.Parsers.Lisp.DataTypes import Prelude import GHC.Utils.Misc (readRational) import qualified Data.Scientific as S import qualified Data.List as L -- Constants. symbolCharacters :: String symbolCharacters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_!?-+*/%<>#.=^" numberCharacters :: String numberCharacters = "0123456789." isSymbolCharacter :: Char -> Bool isSymbolCharacter ch = elem ch symbolCharacters isNumberCharacter :: Char -> Bool isNumberCharacter ch = elem ch numberCharacters isSymbol :: String -> Bool isSymbol = all isSymbolCharacter -- Fixed issue with parsing -.1, multiple decimal points isNumber :: String -> Bool isNumber [] = True isNumber [c] = elem c "0123456789" isNumber ('-' : cs) = isNumber cs isNumber (c : cs) = elem c "0123456789" && all isNumberCharacter cs && L.length (L.filter ('.' ==) cs) <= 1 isScientificNumber :: String -> Bool isScientificNumber [] = True isScientificNumber [_c] = False isScientificNumber ('-' : cs) = isScientificNumber cs isScientificNumber (c : cs) = elem c "0123456789" && case L.break (== 'e') cs of (_, []) -> False -- e is not in cs (beforeE, _e : afterE) -> all isNumberCharacter beforeE && case afterE of [] -> False ('-' : ecs) -> all (`elem` "0123456789") ecs ecs -> all (`elem` "0123456789") ecs && L.length (L.filter ('.' ==) cs) <= 1 -- The "tokenize" function is the first phase of converting the source code of -- a Lisp program into an abstract syntax tree. It performs lexical analysis on -- a String representation of a Lisp program by extracting a list of tokens. tokenize :: String -> [String] tokenize [] = [] tokenize (x:xs) | x == ';' = tokenize $ dropWhile (/= '\n') xs -- Remove comments | x == '(' = [x] : tokenize xs | x == ')' = [x] : tokenize xs | isNumberCharacter x = tokenizeNumber (x:xs) "" False | isSymbolCharacter x = tokenizeSymbol (x:xs) "" | otherwise = tokenize xs tokenizeNumber :: String -> String -> Bool -> [String] tokenizeNumber [] number foundE = [number] tokenizeNumber (x:xs) number foundE | isNumberCharacter x = tokenizeNumber xs (number ++ [x]) foundE | ('e' == x) && not foundE = tokenizeNumber xs (number ++ [x]) True -- Support scientific numbers | otherwise = number : tokenize (x:xs) tokenizeSymbol :: String -> String -> [String] tokenizeSymbol [] symbol = [symbol] tokenizeSymbol (x:xs) number | isSymbolCharacter x = tokenizeSymbol xs (number ++ [x]) | otherwise = number : tokenize (x:xs) -- The "parse" function is the second phase of converting the source code of -- a Lisp program into an abstract syntax tree. It takes the list of tokens -- generated by "tokenize" and scans it until a valid Expression is created. -- The newly built expression along with the remaining tokens are returned. -- The Expressions that are returned are "primitive". They are entirely -- comprised of elements such as boolean and numeric constants with the only -- compound element being a "pair". parse :: [String] -> (Expression, [String]) parse [] = (Null, []) parse (x:xs) | x == "(" = parseList xs | "#t" == x = ((Boolean True), xs) | "#f" == x = ((Boolean False), xs) | "null" == x = ((Null), xs) | isScientificNumber x = ((Number (toRational (read x :: S.Scientific))), xs) | isNumber x = ((Number (readRational x)), xs) | isSymbol x = ((Variable x), xs) | otherwise = (Null, []) -- A helper function to parse a list. A list is defined as either -- Null or a Pair who's second element is a list. parseList :: [String] -> (Expression, [String]) parseList [] = (Null, []) parseList tokens@(x:xs) | x == ")" = (Null, xs) | otherwise = ((Pair expr1 expr2), rest2) where (expr1, rest1) = parse tokens (expr2, rest2) = parseList rest1 -- A helper function that takes the list of tokens generated by "tokenize" -- and continues parsing until all the constituent Expressions are extracted. parseSequence :: [String] -> [Expression] parseSequence [] = [] parseSequence tokens = expr : parseSequence rest where (expr, rest) = parse tokens -- The "analyzeExpression" function implements the third and final phase of -- converting the source code of a Lisp program into an abstract syntax tree. -- It takes a "primitive" Expression as input and converts it into a more -- sophisticated abstract syntax tree Expression such as Lambda, Application, -- If, Define, etc. analyzeExpression :: Expression -> Expression analyzeExpression Null = Null analyzeExpression (Number number) = (Number number) analyzeExpression (Boolean bool) = (Boolean bool) analyzeExpression (Variable variable) = (Variable variable) analyzeExpression pair@(Pair first second) | isIfExpression pair = buildIfExpression pair | isLambdaExpression pair = buildLambdaExpression pair | isDefinitionExpression pair = buildDefinitionExpression pair | isCondExpression pair = buildCondExpression pair -- New special forms to be added here. | otherwise = buildApplicationExpression pair isIfExpression :: Expression -> Bool isIfExpression (Pair (Variable value) _) = value == "if" isIfExpression _ = False buildIfExpression :: Expression -> Expression buildIfExpression (Pair _ (Pair predicate (Pair thenClause (Pair elseClause Null)))) = If (analyzeExpression predicate) (analyzeExpression thenClause) (analyzeExpression elseClause) isLambdaExpression :: Expression -> Bool isLambdaExpression (Pair (Variable value) _) = value == "lambda" isLambdaExpression _ = False buildLambdaExpression :: Expression -> Expression buildLambdaExpression (Pair _ (Pair parameters (Pair body Null))) = Lambda (pairToList parameters) (analyzeExpression body) isDefinitionExpression :: Expression -> Bool isDefinitionExpression (Pair (Variable value) _) = value == "define" isDefinitionExpression _ = False buildDefinitionExpression :: Expression -> Expression buildDefinitionExpression (Pair _ (Pair variable (Pair value Null))) = Definition variable (analyzeExpression value) isCondExpression :: Expression -> Bool isCondExpression (Pair (Variable value) _) = value == "cond" isCondExpression _ = False buildCondExpression :: Expression -> Expression buildCondExpression (Pair _ second) = buildCondExpressionHelper second buildCondExpressionHelper :: Expression -> Expression buildCondExpressionHelper (Null) = (Cond []) buildCondExpressionHelper (Pair (Pair predicate (Pair expression Null)) other) = (Cond ((analyzeExpression predicate, analyzeExpression expression) : cases)) where (Cond cases) = buildCondExpressionHelper other buildApplicationExpression :: Expression -> Expression buildApplicationExpression (Pair operator operands) = Application (analyzeExpression operator) (map analyzeExpression (pairToList operands)) analyzeExpressionSequence :: [Expression] -> [Expression] analyzeExpressionSequence = map analyzeExpression